The following logic is a stereo biquad IIR filter to calculate low-pass, high-pass, notch, band-pass or peak-filters with a very low latency. It uses 13 clock-cycles to calculate a stereo-signal and at 100MHz the block is ready after 130 nanoseconds.

The calculation of the coefficients can be found down below. Have a look at my X/FBAPE-project at GitHub: https://www.github.com/xn--nding-jua/xfbape

VHDL
-- Stereo biquad IIR filter
-- Christian Noeding, christian@noeding-online.de
-- https://chrisdevblog.com | https://github.com/xn--nding-jua
--
-- Released under GNU General Public License v3

library IEEE;
use IEEE.STD_LOGIC_1164.all;
use ieee.numeric_std.all;

entity filter_iir_stereo is
  generic(
    bit_width	:	natural range 24 to 48 := 24;
    coeff_bits	:	natural range 16 to 48 := 40; -- for a Qfract_bits-coefficient: signed-bit + integer-bits + Qfract_bits-bits = 1 + 4 + fract_bits = 40 bit
    fract_bits	:	natural range 16 to 48 := 35
    );
  port (
	clk			:	in std_logic := '0';
	input_l		:	in signed(bit_width - 1 downto 0) := (others=>'0');
	input_r		:	in signed(bit_width - 1 downto 0) := (others=>'0');
	sync_in		:	in std_logic := '0';
	rst			:	in std_logic := '0';
	bypass		:	in std_logic := '0';

	-- coefficients have to be multiplied with 2^fract_bits before
	a0 		:	in signed(coeff_bits - 1 downto 0);
	a1 		:	in signed(coeff_bits - 1 downto 0);
	a2 		:	in signed(coeff_bits - 1 downto 0);

	-- coefficients have to be multiplied with 2^fract_bits before
	b1 		:	in signed(coeff_bits - 1 downto 0);
	b2 		:	in signed(coeff_bits - 1 downto 0);

	output_l		:	out signed(bit_width - 1 downto 0) := (others=>'0');
	output_r		:	out signed(bit_width - 1 downto 0) := (others=>'0');
	sync_out		:	out std_logic := '0'
	);
end filter_iir_stereo;

architecture Behavioral of filter_iir_stereo is
	signal state		:	natural range 0 to 13 := 0;
	
	--signals for multiplier
	signal mult_in_a	:	signed(coeff_bits - 1 downto 0) := (others=>'0');
	signal mult_in_b	:	signed(coeff_bits + bit_width - 1 downto 0) := (others=>'0');
	signal mult_out	:	signed((coeff_bits + coeff_bits + bit_width - 1) downto 0) := (others=>'0');

	--temp regs and delay regs
	signal temp_in_l, in_z1_l, in_z2_l			:	signed(bit_width - 1 downto 0):= (others=>'0');	
	signal temp_in_r, in_z1_r, in_z2_r			:	signed(bit_width - 1 downto 0):= (others=>'0');	
	signal out_z1_l, out_z2_l						:	signed(coeff_bits + bit_width - 1 downto 0):= (others=>'0');	
	signal out_z1_r, out_z2_r						:	signed(coeff_bits + bit_width - 1 downto 0):= (others=>'0');	
	signal temp			:	signed(coeff_bits + bit_width - 1 + 8 downto 0):= (others=>'0');
begin
	-- multiplier
	process(mult_in_a, mult_in_b)
	begin
		mult_out <= mult_in_a * mult_in_b;
	end process;

	--// calculate filter
	process(clk, rst)
	begin
		if (rst = '1') then
			-- reset internal signals
			temp_in_l <= ( others => '0');
			temp_in_r <= ( others => '0');
			in_z1_l <= ( others => '0');
			in_z2_l <= ( others => '0');
			in_z1_r <= ( others => '0');
			in_z2_r <= ( others => '0');
			out_z1_l <= ( others => '0');
			out_z2_l <= ( others => '0');
			out_z1_r <= ( others => '0');
			out_z2_r <= ( others => '0');
			temp <= ( others => '0');
			
			-- set output to zero
			output_l <= ( others => '0');
			output_r <= ( others => '0');
			sync_out <= '1';
			
			-- call last state to reset filter-states for next calculation
			state <= 13;
		else
			if rising_edge(clk) then
				if (sync_in = '1' and state = 0) then
					mult_in_a <= a0;
					mult_in_b <= resize(input_l, coeff_bits + bit_width);
					temp_in_l <= input_l;
					temp_in_r <= input_r;
					state <= 1; -- start of state-machine
				elsif (state = 1) then
					temp <= resize(mult_out, temp'length);
					mult_in_a <= a1;
					mult_in_b <= resize(in_z1_l, coeff_bits + bit_width);
					state <= state + 1;
				elsif (state = 2) then
					-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a2;
					mult_in_b <= resize(in_z2_l, coeff_bits + bit_width);
					state <= state + 1;
								
				elsif (state = 3) then
					-- save and sum up result of (in_z2*a2) to temp and load multiplier with out_z1 and b1
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= b1;
					mult_in_b <= out_z1_l;
					state <= state + 1;
					
				elsif (state = 4) then
					-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b2;
					mult_in_b <= out_z2_l;
					state <= state + 1;
									
				elsif (state = 5) then
					-- save and sum up (negative) result of (out_z2*b2)
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					state <= state + 1;
					
				elsif (state = 6) then
					-- save all delay registers and save result to output
					in_z2_l <= in_z1_l;
					in_z1_l <= temp_in_l;

					out_z2_l <= out_z1_l;
					out_z1_l <= resize(temp, out_z1_l'length); -- save value with fractions to gain higher resolution for this filter
					
					if (bypass = '1') then
						output_l <= input_l;
					else
						output_l <= resize(shift_right(temp, fract_bits), bit_width); -- resize to 24-bit audio
					end if;

					-- load multiplier with a0 * input
					mult_in_a <= a0;
					mult_in_b <= resize(temp_in_r, coeff_bits + bit_width);

					state <= state + 1;
					
				elsif (state = 7) then
					-- save result of (samplein*a0) to temp and load multiplier with in_z1 and a1
					temp <= resize(mult_out, temp'length);
					mult_in_a <= a1;
					mult_in_b <= resize(in_z1_r, coeff_bits + bit_width);
					state <= state + 1;
					
				elsif (state = 8) then
					-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a2;
					mult_in_b <= resize(in_z2_r, coeff_bits + bit_width);
					state <= state + 1;
					
				elsif (state = 9) then
					-- save and sum up result of (in_z2*a2) to temp and load multiplier with out_z1 and b1
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= b1;
					mult_in_b <= out_z1_r;
					state <= state + 1;
					
				elsif (state = 10) then
					-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b2;
					mult_in_b <= out_z2_r;
					state <= state + 1;
					
				elsif (state = 11) then
					-- save and sum up (negative) result of (out_z2*b2)
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					state <= state + 1;
					
				elsif (state = 12) then
					-- save all delay registers, save result to output and apply ouput-valid signal
					in_z2_r <= in_z1_r;
					in_z1_r <= temp_in_r;

					out_z2_r <= out_z1_r;
					out_z1_r <= resize(temp, out_z1_r'length); -- save value with fractions to gain higher resolution for this filter
					
					if (bypass = '1') then
						output_r <= input_r;
					else
						output_r <= resize(shift_right(temp, fract_bits), bit_width); -- resize to 24-bit audio
					end if;

					sync_out <= '1';
					state <= state + 1;
					
				elsif (state = 13) then
					sync_out <= '0';
					state <= 0;
				end if;
			end if;
		end if;
	end process;
end Behavioral;

The coefficients can be calculated like this:

C
typedef union 
{
    uint64_t u64;
    int64_t s64;
    uint32_t u32[2];
    int32_t s32[2];
    uint16_t u16[4];
    int16_t s16[4];
    uint8_t u8[8];
    int8_t s8[8];
    double d;
}data_64b;

struct sPEQ {
  // user-settings
  uint8_t type = 1; // 0=allpass, 1=peak, 2=low-shelf, 3=high-shelf, 4=bandpass, 5=notch, 6=lowpass, 7=highpass
  float fc = 400; // center-frequency of PEQ
  float Q = 1; // Quality of PEQ (bandwidth)
  float gain = 0; // gain of PEQ

  data_64b a[3];
  data_64b b[3];
};

void recalcFilterCoefficients_PEQ(struct sPEQ *peq) {
  // Online-Calculator: https://www.earlevel.com/main/2021/09/02/biquad-calculator-v3
  // Source: https://www.earlevel.com/main/2012/11/26/biquad-c-source-code

  double V = pow(10.0, fabs(peq->gain)/20.0);
  double K = tan(PI * peq->fc / audiomixer.sampleRate);
  double norm;
  double coeff_a[3];
  double coeff_b[3];

  switch (peq->type) {
    case 0: // allpass
      norm = 1.0 / (1.0 + K * 1.0/peq->Q + K * K);
      coeff_a[0] = (1.0 - K * 1.0/peq->Q + K * K) * norm;
      coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
      coeff_a[2] = 1.0;
      coeff_b[1] = coeff_a[1];
      coeff_b[2] = coeff_a[0];
      break;
    case 1: // peak-filter
      if (peq->gain >= 0) {
        norm = 1.0 / (1.0 + 1.0/peq->Q * K + K * K);
        coeff_a[0] = (1.0 + V/peq->Q * K + K * K) * norm;
        coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
        coeff_a[2] = (1.0 - V/peq->Q * K + K * K) * norm;
        coeff_b[1] = coeff_a[1];
        coeff_b[2] = (1.0 - 1.0/peq->Q * K + K * K) * norm;
      }else{
        norm = 1.0 / (1.0 + V/peq->Q * K + K * K);
        coeff_a[0] = (1.0 + 1.0/peq->Q * K + K * K) * norm;
        coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
        coeff_a[2] = (1.0 - 1.0/peq->Q * K + K * K) * norm;
        coeff_b[1] = coeff_a[1];
        coeff_b[2] = (1.0 - V/peq->Q * K + K * K) * norm;
      }
      break;
    case 2: // low-shelf
      if (peq->gain >= 0) {    // boost
        norm = 1.0 / (1.0 + sqrt(2.0) * K + K * K);
        coeff_a[0] = (1.0 + sqrt(2.0*V) * K + V * K * K) * norm;
        coeff_a[1] = 2.0 * (V * K * K - 1.0) * norm;
        coeff_a[2] = (1.0 - sqrt(2.0*V) * K + V * K * K) * norm;
        coeff_b[1] = 2.0 * (K * K - 1.0) * norm;
        coeff_b[2] = (1.0 - sqrt(2.0) * K + K * K) * norm;
      }
      else {    // cut
        norm = 1.0 / (1.0 + sqrt(2.0*V) * K + V * K * K);
        coeff_a[0] = (1.0 + sqrt(2.0) * K + K * K) * norm;
        coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
        coeff_a[2] = (1.0 - sqrt(2) * K + K * K) * norm;
        coeff_b[1] = 2.0 * (V * K * K - 1.0) * norm;
        coeff_b[2] = (1.0 - sqrt(2.0*V) * K + V * K * K) * norm;
      }
      break;
    case 3: // high-shelf
        if (peq->gain >= 0) {    // boost
          norm = 1.0 / (1.0 + sqrt(2.0) * K + K * K);
          coeff_a[0] = (V + sqrt(2.0*V) * K + K * K) * norm;
          coeff_a[1] = 2.0 * (K * K - V) * norm;
          coeff_a[2] = (V - sqrt(2.0*V) * K + K * K) * norm;
          coeff_b[1] = 2.0 * (K * K - 1.0) * norm;
          coeff_b[2] = (1.0 - sqrt(2.0) * K + K * K) * norm;
        }
        else {    // cut
          norm = 1.0 / (V + sqrt(2.0*V) * K + K * K);
          coeff_a[0] = (1.0 + sqrt(2.0) * K + K * K) * norm;
          coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
          coeff_a[2] = (1.0 - sqrt(2.0) * K + K * K) * norm;
          coeff_b[1] = 2.0 * (K * K - V) * norm;
          coeff_b[2] = (V - sqrt(2.0*V) * K + K * K) * norm;
        }
      break;
    case 4: // bandpass
        norm = 1.0 / (1.0 + K / peq->Q + K * K);
        coeff_a[0] = (K / peq->Q) * norm;
        coeff_a[1] = 0;
        coeff_a[2] = -coeff_a[0];
        coeff_b[1] = 2.0 * (K * K - 1.0) * norm;
        coeff_b[2] = (1.0 - K / peq->Q + K * K) * norm;
      break;
    case 5: // notch
        norm = 1.0 / (1.0 + K / peq->Q + K * K);
        coeff_a[0] = (1.0 + K * K) * norm;
        coeff_a[1] = 2.0 * (K * K - 1.0) * norm;
        coeff_a[2] = coeff_a[0];
        coeff_b[1] = coeff_a[1];
        coeff_b[2] = (1.0 - K / peq->Q + K * K) * norm;
      break;
    case 6: // lowpass
        norm = 1.0 / (1.0 + K / peq->Q + K * K);
        coeff_a[0] = K * K * norm;
        coeff_a[1] = 2.0 * coeff_a[0];
        coeff_a[2] = coeff_a[0];
        coeff_b[1] = 2.0 * (K * K - 1.0) * norm;
        coeff_b[2] = (1.0 - K / peq->Q + K * K) * norm;
      break;
    case 7: // highpass
        norm = 1.0 / (1.0 + K / peq->Q + K * K);
        coeff_a[0] = 1.0 * norm;
        coeff_a[1] = -2.0 * coeff_a[0];
        coeff_a[2] = coeff_a[0];
        coeff_b[1] = 2.0 * (K * K - 1.0) * norm;
        coeff_b[2] = (1.0 - K / peq->Q + K * K) * norm;
      break;
  }

  // convert to Q34-format
  for (int i=0; i<3; i++) {
    peq->a[i].s64 = coeff_a[i] * 17179869183; // convert to Q34
    peq->b[i].s64 = coeff_b[i] * 17179869183; // convert to Q34
  }
}

Leave a comment

Your email address will not be published. Required fields are marked *