The following logic is a stereo biquad IIR filter to calculate low-pass, high-pass, notch, band-pass or peak-filters with a very low latency. It uses 13 clock-cycles to calculate a stereo-signal and at 100MHz the block is ready after 130 nanoseconds.
The calculation of the coefficients can be found down below. Have a look at my X/FBAPE-project at GitHub: https://www.github.com/xn--nding-jua/xfbape
VHDL
-- Stereo biquad IIR filter
-- Christian Noeding, christian@noeding-online.de
-- https://chrisdevblog.com | https://github.com/xn--nding-jua
--
-- Released under GNU General Public License v3
library IEEE;
use IEEE.STD_LOGIC_1164.all;
use ieee.numeric_std.all;
entity filter_iir_stereo is
generic(
bit_width : natural range 24 to 48 := 24;
coeff_bits : natural range 16 to 48 := 40; -- for a Qfract_bits-coefficient: signed-bit + integer-bits + Qfract_bits-bits = 1 + 4 + fract_bits = 40 bit
fract_bits : natural range 16 to 48 := 35
);
port (
clk : in std_logic := '0';
input_l : in signed(bit_width - 1 downto 0) := (others=>'0');
input_r : in signed(bit_width - 1 downto 0) := (others=>'0');
sync_in : in std_logic := '0';
rst : in std_logic := '0';
bypass : in std_logic := '0';
-- coefficients have to be multiplied with 2^fract_bits before
a0 : in signed(coeff_bits - 1 downto 0);
a1 : in signed(coeff_bits - 1 downto 0);
a2 : in signed(coeff_bits - 1 downto 0);
-- coefficients have to be multiplied with 2^fract_bits before
b1 : in signed(coeff_bits - 1 downto 0);
b2 : in signed(coeff_bits - 1 downto 0);
output_l : out signed(bit_width - 1 downto 0) := (others=>'0');
output_r : out signed(bit_width - 1 downto 0) := (others=>'0');
sync_out : out std_logic := '0'
);
end filter_iir_stereo;
architecture Behavioral of filter_iir_stereo is
signal state : natural range 0 to 13 := 0;
--signals for multiplier
signal mult_in_a : signed(coeff_bits - 1 downto 0) := (others=>'0');
signal mult_in_b : signed(coeff_bits + bit_width - 1 downto 0) := (others=>'0');
signal mult_out : signed((coeff_bits + coeff_bits + bit_width - 1) downto 0) := (others=>'0');
--temp regs and delay regs
signal temp_in_l, in_z1_l, in_z2_l : signed(bit_width - 1 downto 0):= (others=>'0');
signal temp_in_r, in_z1_r, in_z2_r : signed(bit_width - 1 downto 0):= (others=>'0');
signal out_z1_l, out_z2_l : signed(coeff_bits + bit_width - 1 downto 0):= (others=>'0');
signal out_z1_r, out_z2_r : signed(coeff_bits + bit_width - 1 downto 0):= (others=>'0');
signal temp : signed(coeff_bits + bit_width - 1 + 8 downto 0):= (others=>'0');
begin
-- multiplier
process(mult_in_a, mult_in_b)
begin
mult_out <= mult_in_a * mult_in_b;
end process;
--// calculate filter
process(clk, rst)
begin
if (rst = '1') then
-- reset internal signals
temp_in_l <= ( others => '0');
temp_in_r <= ( others => '0');
in_z1_l <= ( others => '0');
in_z2_l <= ( others => '0');
in_z1_r <= ( others => '0');
in_z2_r <= ( others => '0');
out_z1_l <= ( others => '0');
out_z2_l <= ( others => '0');
out_z1_r <= ( others => '0');
out_z2_r <= ( others => '0');
temp <= ( others => '0');
-- set output to zero
output_l <= ( others => '0');
output_r <= ( others => '0');
sync_out <= '1';
-- call last state to reset filter-states for next calculation
state <= 13;
else
if rising_edge(clk) then
if (sync_in = '1' and state = 0) then
mult_in_a <= a0;
mult_in_b <= resize(input_l, coeff_bits + bit_width);
temp_in_l <= input_l;
temp_in_r <= input_r;
state <= 1; -- start of state-machine
elsif (state = 1) then
temp <= resize(mult_out, temp'length);
mult_in_a <= a1;
mult_in_b <= resize(in_z1_l, coeff_bits + bit_width);
state <= state + 1;
elsif (state = 2) then
-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= a2;
mult_in_b <= resize(in_z2_l, coeff_bits + bit_width);
state <= state + 1;
elsif (state = 3) then
-- save and sum up result of (in_z2*a2) to temp and load multiplier with out_z1 and b1
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= b1;
mult_in_b <= out_z1_l;
state <= state + 1;
elsif (state = 4) then
-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
mult_in_a <= b2;
mult_in_b <= out_z2_l;
state <= state + 1;
elsif (state = 5) then
-- save and sum up (negative) result of (out_z2*b2)
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
state <= state + 1;
elsif (state = 6) then
-- save all delay registers and save result to output
in_z2_l <= in_z1_l;
in_z1_l <= temp_in_l;
out_z2_l <= out_z1_l;
out_z1_l <= resize(temp, out_z1_l'length); -- save value with fractions to gain higher resolution for this filter
if (bypass = '1') then
output_l <= input_l;
else
output_l <= resize(shift_right(temp, fract_bits), bit_width); -- resize to 24-bit audio
end if;
-- load multiplier with a0 * input
mult_in_a <= a0;
mult_in_b <= resize(temp_in_r, coeff_bits + bit_width);
state <= state + 1;
elsif (state = 7) then
-- save result of (samplein*a0) to temp and load multiplier with in_z1 and a1
temp <= resize(mult_out, temp'length);
mult_in_a <= a1;
mult_in_b <= resize(in_z1_r, coeff_bits + bit_width);
state <= state + 1;
elsif (state = 8) then
-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= a2;
mult_in_b <= resize(in_z2_r, coeff_bits + bit_width);
state <= state + 1;
elsif (state = 9) then
-- save and sum up result of (in_z2*a2) to temp and load multiplier with out_z1 and b1
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= b1;
mult_in_b <= out_z1_r;
state <= state + 1;
elsif (state = 10) then
-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
mult_in_a <= b2;
mult_in_b <= out_z2_r;
state <= state + 1;
elsif (state = 11) then
-- save and sum up (negative) result of (out_z2*b2)
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
state <= state + 1;
elsif (state = 12) then
-- save all delay registers, save result to output and apply ouput-valid signal
in_z2_r <= in_z1_r;
in_z1_r <= temp_in_r;
out_z2_r <= out_z1_r;
out_z1_r <= resize(temp, out_z1_r'length); -- save value with fractions to gain higher resolution for this filter
if (bypass = '1') then
output_r <= input_r;
else
output_r <= resize(shift_right(temp, fract_bits), bit_width); -- resize to 24-bit audio
end if;
sync_out <= '1';
state <= state + 1;
elsif (state = 13) then
sync_out <= '0';
state <= 0;
end if;
end if;
end if;
end process;
end Behavioral;The coefficients can be calculated like this:
C
typedef union
{
uint64_t u64;
int64_t s64;
uint32_t u32[2];
int32_t s32[2];
uint16_t u16[4];
int16_t s16[4];
uint8_t u8[8];
int8_t s8[8];
double d;
}data_64b;
struct sPEQ {
// user-settings
uint8_t type = 1; // 0=allpass, 1=peak, 2=low-shelf, 3=high-shelf, 4=bandpass, 5=notch, 6=lowpass, 7=highpass
float fc = 400; // center-frequency of PEQ
float Q = 1; // Quality of PEQ (bandwidth)
float gain = 0; // gain of PEQ
data_64b a[3];
data_64b b[3];
};
void recalcFilterCoefficients_PEQ(struct sPEQ *peq) {
// Online-Calculator: https://www.earlevel.com/main/2021/09/02/biquad-calculator-v3
// Source: https://www.earlevel.com/main/2012/11/26/biquad-c-source-code
double V = pow(10.0, fabs(peq->gain)/20.0);
double K = tan(PI * peq->fc / audiomixer.sampleRate);
double norm;
double coeff_a[3];
double coeff_b[3];
switch (peq->type) {
case 0: // allpass
norm = 1.0 / (1.0 + K * 1.0/peq->Q + K * K);
coeff_a[0] = (1.0 - K * 1.0/peq->Q + K * K) * norm;
coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
coeff_a[2] = 1.0;
coeff_b[1] = coeff_a[1];
coeff_b[2] = coeff_a[0];
break;
case 1: // peak-filter
if (peq->gain >= 0) {
norm = 1.0 / (1.0 + 1.0/peq->Q * K + K * K);
coeff_a[0] = (1.0 + V/peq->Q * K + K * K) * norm;
coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
coeff_a[2] = (1.0 - V/peq->Q * K + K * K) * norm;
coeff_b[1] = coeff_a[1];
coeff_b[2] = (1.0 - 1.0/peq->Q * K + K * K) * norm;
}else{
norm = 1.0 / (1.0 + V/peq->Q * K + K * K);
coeff_a[0] = (1.0 + 1.0/peq->Q * K + K * K) * norm;
coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
coeff_a[2] = (1.0 - 1.0/peq->Q * K + K * K) * norm;
coeff_b[1] = coeff_a[1];
coeff_b[2] = (1.0 - V/peq->Q * K + K * K) * norm;
}
break;
case 2: // low-shelf
if (peq->gain >= 0) { // boost
norm = 1.0 / (1.0 + sqrt(2.0) * K + K * K);
coeff_a[0] = (1.0 + sqrt(2.0*V) * K + V * K * K) * norm;
coeff_a[1] = 2.0 * (V * K * K - 1.0) * norm;
coeff_a[2] = (1.0 - sqrt(2.0*V) * K + V * K * K) * norm;
coeff_b[1] = 2.0 * (K * K - 1.0) * norm;
coeff_b[2] = (1.0 - sqrt(2.0) * K + K * K) * norm;
}
else { // cut
norm = 1.0 / (1.0 + sqrt(2.0*V) * K + V * K * K);
coeff_a[0] = (1.0 + sqrt(2.0) * K + K * K) * norm;
coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
coeff_a[2] = (1.0 - sqrt(2) * K + K * K) * norm;
coeff_b[1] = 2.0 * (V * K * K - 1.0) * norm;
coeff_b[2] = (1.0 - sqrt(2.0*V) * K + V * K * K) * norm;
}
break;
case 3: // high-shelf
if (peq->gain >= 0) { // boost
norm = 1.0 / (1.0 + sqrt(2.0) * K + K * K);
coeff_a[0] = (V + sqrt(2.0*V) * K + K * K) * norm;
coeff_a[1] = 2.0 * (K * K - V) * norm;
coeff_a[2] = (V - sqrt(2.0*V) * K + K * K) * norm;
coeff_b[1] = 2.0 * (K * K - 1.0) * norm;
coeff_b[2] = (1.0 - sqrt(2.0) * K + K * K) * norm;
}
else { // cut
norm = 1.0 / (V + sqrt(2.0*V) * K + K * K);
coeff_a[0] = (1.0 + sqrt(2.0) * K + K * K) * norm;
coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
coeff_a[2] = (1.0 - sqrt(2.0) * K + K * K) * norm;
coeff_b[1] = 2.0 * (K * K - V) * norm;
coeff_b[2] = (V - sqrt(2.0*V) * K + K * K) * norm;
}
break;
case 4: // bandpass
norm = 1.0 / (1.0 + K / peq->Q + K * K);
coeff_a[0] = (K / peq->Q) * norm;
coeff_a[1] = 0;
coeff_a[2] = -coeff_a[0];
coeff_b[1] = 2.0 * (K * K - 1.0) * norm;
coeff_b[2] = (1.0 - K / peq->Q + K * K) * norm;
break;
case 5: // notch
norm = 1.0 / (1.0 + K / peq->Q + K * K);
coeff_a[0] = (1.0 + K * K) * norm;
coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
coeff_a[2] = coeff_a[0];
coeff_b[1] = coeff_a[1];
coeff_b[2] = (1.0 - K / peq->Q + K * K) * norm;
break;
case 6: // lowpass
norm = 1.0 / (1.0 + K / peq->Q + K * K);
coeff_a[0] = K * K * norm;
coeff_a[1] = 2.0 * coeff_a[0];
coeff_a[2] = coeff_a[0];
coeff_b[1] = 2.0 * (K * K - 1.0) * norm;
coeff_b[2] = (1.0 - K / peq->Q + K * K) * norm;
break;
case 7: // highpass
norm = 1.0 / (1.0 + K / peq->Q + K * K);
coeff_a[0] = 1.0 * norm;
coeff_a[1] = -2.0 * coeff_a[0];
coeff_a[2] = coeff_a[0];
coeff_b[1] = 2.0 * (K * K - 1.0) * norm;
coeff_b[2] = (1.0 - K / peq->Q + K * K) * norm;
break;
}
// convert to Q34-format
for (int i=0; i<3; i++) {
peq->a[i].s64 = coeff_a[i] * 17179869183; // convert to Q34
peq->b[i].s64 = coeff_b[i] * 17179869183; // convert to Q34
}
}