Stereo Linkwitz-Riley crossover

The following code processes an incoming stereo-audio-signal and a prepared mono-sub-signal and calculates a crossover with 24dB for the top- and sub-output.

The calculation of the coefficients can be found down below. Please have a look at my X/FBAPE-project at GitHub: https://www.github.com/xn--nding-jua/xfbape

VHDL

-- Stereo Linkwitz-Riley crossover with 24dB/oct
-- Christian Noeding, christian@noeding-online.de
-- https://chrisdevblog.com | https://github.com/xn--nding-jua
--
-- Released under GNU General Public License v3

library IEEE;
use IEEE.STD_LOGIC_1164.all;
use ieee.numeric_std.all;

entity filter_lr24_threeway is
  generic(
    coeff_bits	:	natural range 16 to 48 := 40; -- for a Qfract_bits-coefficient: signed-bit + integer-bits + Qfract_bits-bits = 1 + 4 + fract_bits = 40 bit
    fract_bits	:	natural range 16 to 48 := 35
    );
  port (
	clk			:	in std_logic := '0';
	input_l		:	in signed(23 downto 0) := (others=>'0');
	input_r		:	in signed(23 downto 0) := (others=>'0');
	input_sub	:	in signed(23 downto 0) := (others=>'0');
	sync_in		:	in std_logic := '0';
	rst			:	in std_logic := '0';

	-- coefficients have to be multiplied with 2^fract_bits before
	a0_lp 		:	in signed(coeff_bits - 1 downto 0);
	a1_lp 		:	in signed(coeff_bits - 1 downto 0);
	a2_lp 		:	in signed(coeff_bits - 1 downto 0);
	a3_lp 		:	in signed(coeff_bits - 1 downto 0);
	a4_lp 		:	in signed(coeff_bits - 1 downto 0);

	-- coefficients have to be multiplied with 2^fract_bits before
	b1_lp 		:	in signed(coeff_bits - 1 downto 0);
	b2_lp 		:	in signed(coeff_bits - 1 downto 0);
	b3_lp 		:	in signed(coeff_bits - 1 downto 0);
	b4_lp 		:	in signed(coeff_bits - 1 downto 0);

	output_l		:	out signed(23 downto 0) := (others=>'0');
	output_r		:	out signed(23 downto 0) := (others=>'0');
	output_sub	:	out signed(23 downto 0) := (others=>'0');
	sync_out		:	out std_logic := '0'
	);
end filter_lr24_threeway;

architecture Behavioral of filter_lr24_threeway is
	signal state		:	natural range 0 to 31 := 0;
	
	--signals for multiplier
	signal mult_in_a	:	signed(coeff_bits - 1 downto 0) := (others=>'0');
	signal mult_in_b	:	signed(coeff_bits + 24 - 1 downto 0) := (others=>'0');
	signal mult_out	:	signed((coeff_bits + coeff_bits + 24 - 1) downto 0) := (others=>'0');

	--temp regs and delay regs
	signal temp_in_l, in_z1_l, in_z2_l, in_z3_l, in_z4_l				:	signed(23 downto 0):= (others=>'0');	
	signal temp_in_r, in_z1_r, in_z2_r, in_z3_r, in_z4_r				:	signed(23 downto 0):= (others=>'0');	
	signal temp_in_sub, in_z1_sub, in_z2_sub, in_z3_sub, in_z4_sub	:	signed(23 downto 0):= (others=>'0');	
	signal out_z1_l, out_z2_l, out_z3_l, out_z4_l						:	signed(coeff_bits + 24 - 1 downto 0):= (others=>'0');	
	signal out_z1_r, out_z2_r, out_z3_r, out_z4_r						:	signed(coeff_bits + 24 - 1 downto 0):= (others=>'0');	
	signal out_z1_sub, out_z2_sub, out_z3_sub, out_z4_sub				:	signed(coeff_bits + 24 - 1 downto 0):= (others=>'0');	
	signal temp			:	signed(coeff_bits + 24 - 1 + 8 downto 0):= (others=>'0');
begin
	-- multiplier
	process(mult_in_a, mult_in_b)
	begin
		mult_out <= mult_in_a * mult_in_b;
	end process;

	--// calculate filter
	--float output = (LR24->a[0] * sampleData) + (LR24->a[1] * LR24->x[0]) + (LR24->a[2] * LR24->x[1]) + (LR24->a[3] * LR24->x[2]) + (LR24->a[4] * LR24->x[3]) - (LR24->b[1] * LR24->y[0]) - (LR24->b[2] * LR24->y[1]) - (LR24->b[3] * LR24->y[2]) - (LR24->b[4] * LR24->y[3]);
	process(clk, rst)
	begin
		if (rst = '1') then
			-- reset internal signals
			temp_in_l <= ( others => '0');
			temp_in_r <= ( others => '0');
			temp_in_sub <= ( others => '0');
			in_z1_l <= ( others => '0');
			in_z2_l <= ( others => '0');
			in_z3_l <= ( others => '0');
			in_z4_l <= ( others => '0');
			in_z1_r <= ( others => '0');
			in_z2_r <= ( others => '0');
			in_z3_r <= ( others => '0');
			in_z4_r <= ( others => '0');
			in_z1_sub <= ( others => '0');
			in_z2_sub <= ( others => '0');
			in_z3_sub <= ( others => '0');
			in_z4_sub <= ( others => '0');
			out_z1_l <= ( others => '0');
			out_z2_l <= ( others => '0');
			out_z3_l <= ( others => '0');
			out_z4_l <= ( others => '0');
			out_z1_r <= ( others => '0');
			out_z2_r <= ( others => '0');
			out_z3_r <= ( others => '0');
			out_z4_r <= ( others => '0');
			out_z1_sub <= ( others => '0');
			out_z2_sub <= ( others => '0');
			out_z3_sub <= ( others => '0');
			out_z4_sub <= ( others => '0');
			temp <= ( others => '0');
			
			-- set output to zero
			output_l <= ( others => '0');
			output_r <= ( others => '0');
			output_sub <= ( others => '0');
			sync_out <= '1';
			
			-- call last state to reset filter-states for next calculation
			state <= 31;
		else
			if rising_edge(clk) then
				if (sync_in = '1' and state = 0) then
					-- load multiplier with a0 * input
					mult_in_a <= a0_lp;
					mult_in_b <= resize(input_l, coeff_bits + 24);
					temp_in_l <= input_l;
					temp_in_r <= input_r;
					temp_in_sub <= input_sub;
					state <= 1; -- start of state-machine
					
				elsif (state = 1) then
					-- save result of (samplein*a0) to temp and load multiplier with in_z1 and a1
					temp <= resize(mult_out, temp'length);
					mult_in_a <= a1_lp;
					mult_in_b <= resize(in_z1_l, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 2) then
					-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a2_lp;
					mult_in_b <= resize(in_z2_l, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 3) then
					-- save and sum up result of (in_z2*a2) to temp and load multiplier with in_z3 and a3
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a3_lp;
					mult_in_b <= resize(in_z3_l, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 4) then
					-- save and sum up result of (in_z3*a3) to temp and load multiplier with in_z4 and a4
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a4_lp;
					mult_in_b <= resize(in_z4_l, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 5) then
					-- save and sum up result of (in_z4*a4) to temp and load multiplier with out_z1 and b1
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= b1_lp;
					mult_in_b <= out_z1_l;
					state <= state + 1;
					
				elsif (state = 6) then
					-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b2_lp;
					mult_in_b <= out_z2_l;
					state <= state + 1;
					
				elsif (state = 7) then
					-- save and sum up (negative) result of (out_z2*b2) and load multiplier with out_z3 and b3
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b3_lp;
					mult_in_b <= out_z3_l;
					state <= state + 1;
					
				elsif (state = 8) then
					-- save and sum up (negative) result of (out_z3*b3) and load multiplier with out_z4 and b4
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b4_lp;
					mult_in_b <= out_z4_l;
					state <= state + 1;
					
				elsif (state = 9) then
					-- save and sum up (negative) result of (out_z4*b4)
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					state <= state + 1;
					
				elsif (state = 10) then
					-- save all delay registers and save result to output
					in_z4_l <= in_z3_l;
					in_z3_l <= in_z2_l;
					in_z2_l <= in_z1_l;
					in_z1_l <= temp_in_l;

					out_z4_l <= out_z3_l;
					out_z3_l <= out_z2_l;
					out_z2_l <= out_z1_l;
					out_z1_l <= resize(temp, out_z1_l'length); -- save value with fractions to gain higher resolution for this filter
					
					output_l <= resize(shift_right(temp, fract_bits), 24); -- resize to 24-bit audio

					-- load multiplier with a0 * input
					mult_in_a <= a0_lp;
					mult_in_b <= resize(temp_in_r, coeff_bits + 24);

					state <= state + 1;
					
				elsif (state = 11) then
					-- save result of (samplein*a0) to temp and load multiplier with in_z1 and a1
					temp <= resize(mult_out, temp'length);
					mult_in_a <= a1_lp;
					mult_in_b <= resize(in_z1_r, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 12) then
					-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a2_lp;
					mult_in_b <= resize(in_z2_r, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 13) then
					-- save and sum up result of (in_z2*a2) to temp and load multiplier with in_z3 and a3
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a3_lp;
					mult_in_b <= resize(in_z3_r, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 14) then
					-- save and sum up result of (in_z3*a3) to temp and load multiplier with in_z4 and a4
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a4_lp;
					mult_in_b <= resize(in_z4_r, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 15) then
					-- save and sum up result of (in_z4*a4) to temp and load multiplier with out_z1 and b1
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= b1_lp;
					mult_in_b <= out_z1_r;
					state <= state + 1;
					
				elsif (state = 16) then
					-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b2_lp;
					mult_in_b <= out_z2_r;
					state <= state + 1;
					
				elsif (state = 17) then
					-- save and sum up (negative) result of (out_z2*b2) and load multiplier with out_z3 and b3
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b3_lp;
					mult_in_b <= out_z3_r;
					state <= state + 1;
					
				elsif (state = 18) then
					-- save and sum up (negative) result of (out_z3*b3) and load multiplier with out_z4 and b4
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b4_lp;
					mult_in_b <= out_z4_r;
					state <= state + 1;
					
				elsif (state = 19) then
					-- save and sum up (negative) result of (out_z4*b4)
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					state <= state + 1;
					
				elsif (state = 20) then
					-- save all delay registers, save result to output and apply ouput-valid signal
					in_z4_r <= in_z3_r;
					in_z3_r <= in_z2_r;
					in_z2_r <= in_z1_r;
					in_z1_r <= temp_in_r;

					out_z4_r <= out_z3_r;
					out_z3_r <= out_z2_r;
					out_z2_r <= out_z1_r;
					out_z1_r <= resize(temp, out_z1_r'length); -- save value with fractions to gain higher resolution for this filter
					
					output_r <= resize(shift_right(temp, fract_bits), 24); -- resize to 24-bit audio

					-- load multiplier with a0 * input
					mult_in_a <= a0_lp;
					mult_in_b <= resize(temp_in_sub, coeff_bits + 24);

					state <= state + 1;
					
				elsif (state = 21) then
					-- save result of (samplein*a0) to temp and load multiplier with in_z1 and a1
					temp <= resize(mult_out, temp'length);
					mult_in_a <= a1_lp;
					mult_in_b <= resize(in_z1_sub, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 22) then
					-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a2_lp;
					mult_in_b <= resize(in_z2_sub, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 23) then
					-- save and sum up result of (in_z2*a2) to temp and load multiplier with in_z3 and a3
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a3_lp;
					mult_in_b <= resize(in_z3_sub, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 24) then
					-- save and sum up result of (in_z3*a3) to temp and load multiplier with in_z4 and a4
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a4_lp;
					mult_in_b <= resize(in_z4_sub, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 25) then
					-- save and sum up result of (in_z4*a4) to temp and load multiplier with out_z1 and b1
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= b1_lp;
					mult_in_b <= out_z1_sub;
					state <= state + 1;
					
				elsif (state = 26) then
					-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b2_lp;
					mult_in_b <= out_z2_sub;
					state <= state + 1;
					
				elsif (state = 27) then
					-- save and sum up (negative) result of (out_z2*b2) and load multiplier with out_z3 and b3
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b3_lp;
					mult_in_b <= out_z3_sub;
					state <= state + 1;
					
				elsif (state = 28) then
					-- save and sum up (negative) result of (out_z3*b3) and load multiplier with out_z4 and b4
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b4_lp;
					mult_in_b <= out_z4_sub;
					state <= state + 1;
					
				elsif (state = 29) then
					-- save and sum up (negative) result of (out_z4*b4)
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					state <= state + 1;
					
				elsif (state = 30) then
					-- save all delay registers, save result to output and apply ouput-valid signal
					in_z4_sub <= in_z3_sub;
					in_z3_sub <= in_z2_sub;
					in_z2_sub <= in_z1_sub;
					in_z1_sub <= temp_in_sub;

					out_z4_sub <= out_z3_sub;
					out_z3_sub <= out_z2_sub;
					out_z2_sub <= out_z1_sub;
					out_z1_sub <= resize(temp, out_z1_sub'length); -- save value with fractions to gain higher resolution for this filter
					
					output_sub <= resize(shift_right(temp, fract_bits), 24); -- resize to 24-bit audio
					
					sync_out <= '1';
					state <= state + 1;
					
				elsif (state = 31) then
					sync_out <= '0';
					state <= 0;
				end if;
			end if;
		end if;
	end process;
end Behavioral;

-- Stereo Linkwitz-Riley crossover with 24dB/oct
-- Christian Noeding, christian@noeding-online.de
-- https://chrisdevblog.com | https://github.com/xn--nding-jua
--
-- Released under GNU General Public License v3

library IEEE;
use IEEE.STD_LOGIC_1164.all;
use ieee.numeric_std.all;

entity filter_lr24_threeway is
  generic(
    coeff_bits	:	natural range 16 to 48 := 40; -- for a Qfract_bits-coefficient: signed-bit + integer-bits + Qfract_bits-bits = 1 + 4 + fract_bits = 40 bit
    fract_bits	:	natural range 16 to 48 := 35
    );
  port (
	clk			:	in std_logic := '0';
	input_l		:	in signed(23 downto 0) := (others=>'0');
	input_r		:	in signed(23 downto 0) := (others=>'0');
	input_sub	:	in signed(23 downto 0) := (others=>'0');
	sync_in		:	in std_logic := '0';
	rst			:	in std_logic := '0';

	-- coefficients have to be multiplied with 2^fract_bits before
	a0_lp 		:	in signed(coeff_bits - 1 downto 0);
	a1_lp 		:	in signed(coeff_bits - 1 downto 0);
	a2_lp 		:	in signed(coeff_bits - 1 downto 0);
	a3_lp 		:	in signed(coeff_bits - 1 downto 0);
	a4_lp 		:	in signed(coeff_bits - 1 downto 0);

	-- coefficients have to be multiplied with 2^fract_bits before
	b1_lp 		:	in signed(coeff_bits - 1 downto 0);
	b2_lp 		:	in signed(coeff_bits - 1 downto 0);
	b3_lp 		:	in signed(coeff_bits - 1 downto 0);
	b4_lp 		:	in signed(coeff_bits - 1 downto 0);

	output_l		:	out signed(23 downto 0) := (others=>'0');
	output_r		:	out signed(23 downto 0) := (others=>'0');
	output_sub	:	out signed(23 downto 0) := (others=>'0');
	sync_out		:	out std_logic := '0'
	);
end filter_lr24_threeway;

architecture Behavioral of filter_lr24_threeway is
	signal state		:	natural range 0 to 31 := 0;
	
	--signals for multiplier
	signal mult_in_a	:	signed(coeff_bits - 1 downto 0) := (others=>'0');
	signal mult_in_b	:	signed(coeff_bits + 24 - 1 downto 0) := (others=>'0');
	signal mult_out	:	signed((coeff_bits + coeff_bits + 24 - 1) downto 0) := (others=>'0');

	--temp regs and delay regs
	signal temp_in_l, in_z1_l, in_z2_l, in_z3_l, in_z4_l				:	signed(23 downto 0):= (others=>'0');	
	signal temp_in_r, in_z1_r, in_z2_r, in_z3_r, in_z4_r				:	signed(23 downto 0):= (others=>'0');	
	signal temp_in_sub, in_z1_sub, in_z2_sub, in_z3_sub, in_z4_sub	:	signed(23 downto 0):= (others=>'0');	
	signal out_z1_l, out_z2_l, out_z3_l, out_z4_l						:	signed(coeff_bits + 24 - 1 downto 0):= (others=>'0');	
	signal out_z1_r, out_z2_r, out_z3_r, out_z4_r						:	signed(coeff_bits + 24 - 1 downto 0):= (others=>'0');	
	signal out_z1_sub, out_z2_sub, out_z3_sub, out_z4_sub				:	signed(coeff_bits + 24 - 1 downto 0):= (others=>'0');	
	signal temp			:	signed(coeff_bits + 24 - 1 + 8 downto 0):= (others=>'0');
begin
	-- multiplier
	process(mult_in_a, mult_in_b)
	begin
		mult_out <= mult_in_a * mult_in_b;
	end process;

	--// calculate filter
	--float output = (LR24->a[0] * sampleData) + (LR24->a[1] * LR24->x[0]) + (LR24->a[2] * LR24->x[1]) + (LR24->a[3] * LR24->x[2]) + (LR24->a[4] * LR24->x[3]) - (LR24->b[1] * LR24->y[0]) - (LR24->b[2] * LR24->y[1]) - (LR24->b[3] * LR24->y[2]) - (LR24->b[4] * LR24->y[3]);
	process(clk, rst)
	begin
		if (rst = '1') then
			-- reset internal signals
			temp_in_l <= ( others => '0');
			temp_in_r <= ( others => '0');
			temp_in_sub <= ( others => '0');
			in_z1_l <= ( others => '0');
			in_z2_l <= ( others => '0');
			in_z3_l <= ( others => '0');
			in_z4_l <= ( others => '0');
			in_z1_r <= ( others => '0');
			in_z2_r <= ( others => '0');
			in_z3_r <= ( others => '0');
			in_z4_r <= ( others => '0');
			in_z1_sub <= ( others => '0');
			in_z2_sub <= ( others => '0');
			in_z3_sub <= ( others => '0');
			in_z4_sub <= ( others => '0');
			out_z1_l <= ( others => '0');
			out_z2_l <= ( others => '0');
			out_z3_l <= ( others => '0');
			out_z4_l <= ( others => '0');
			out_z1_r <= ( others => '0');
			out_z2_r <= ( others => '0');
			out_z3_r <= ( others => '0');
			out_z4_r <= ( others => '0');
			out_z1_sub <= ( others => '0');
			out_z2_sub <= ( others => '0');
			out_z3_sub <= ( others => '0');
			out_z4_sub <= ( others => '0');
			temp <= ( others => '0');
			
			-- set output to zero
			output_l <= ( others => '0');
			output_r <= ( others => '0');
			output_sub <= ( others => '0');
			sync_out <= '1';
			
			-- call last state to reset filter-states for next calculation
			state <= 31;
		else
			if rising_edge(clk) then
				if (sync_in = '1' and state = 0) then
					-- load multiplier with a0 * input
					mult_in_a <= a0_lp;
					mult_in_b <= resize(input_l, coeff_bits + 24);
					temp_in_l <= input_l;
					temp_in_r <= input_r;
					temp_in_sub <= input_sub;
					state <= 1; -- start of state-machine
					
				elsif (state = 1) then
					-- save result of (samplein*a0) to temp and load multiplier with in_z1 and a1
					temp <= resize(mult_out, temp'length);
					mult_in_a <= a1_lp;
					mult_in_b <= resize(in_z1_l, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 2) then
					-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a2_lp;
					mult_in_b <= resize(in_z2_l, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 3) then
					-- save and sum up result of (in_z2*a2) to temp and load multiplier with in_z3 and a3
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a3_lp;
					mult_in_b <= resize(in_z3_l, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 4) then
					-- save and sum up result of (in_z3*a3) to temp and load multiplier with in_z4 and a4
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a4_lp;
					mult_in_b <= resize(in_z4_l, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 5) then
					-- save and sum up result of (in_z4*a4) to temp and load multiplier with out_z1 and b1
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= b1_lp;
					mult_in_b <= out_z1_l;
					state <= state + 1;
					
				elsif (state = 6) then
					-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b2_lp;
					mult_in_b <= out_z2_l;
					state <= state + 1;
					
				elsif (state = 7) then
					-- save and sum up (negative) result of (out_z2*b2) and load multiplier with out_z3 and b3
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b3_lp;
					mult_in_b <= out_z3_l;
					state <= state + 1;
					
				elsif (state = 8) then
					-- save and sum up (negative) result of (out_z3*b3) and load multiplier with out_z4 and b4
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b4_lp;
					mult_in_b <= out_z4_l;
					state <= state + 1;
					
				elsif (state = 9) then
					-- save and sum up (negative) result of (out_z4*b4)
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					state <= state + 1;
					
				elsif (state = 10) then
					-- save all delay registers and save result to output
					in_z4_l <= in_z3_l;
					in_z3_l <= in_z2_l;
					in_z2_l <= in_z1_l;
					in_z1_l <= temp_in_l;

					out_z4_l <= out_z3_l;
					out_z3_l <= out_z2_l;
					out_z2_l <= out_z1_l;
					out_z1_l <= resize(temp, out_z1_l'length); -- save value with fractions to gain higher resolution for this filter
					
					output_l <= resize(shift_right(temp, fract_bits), 24); -- resize to 24-bit audio

					-- load multiplier with a0 * input
					mult_in_a <= a0_lp;
					mult_in_b <= resize(temp_in_r, coeff_bits + 24);

					state <= state + 1;
					
				elsif (state = 11) then
					-- save result of (samplein*a0) to temp and load multiplier with in_z1 and a1
					temp <= resize(mult_out, temp'length);
					mult_in_a <= a1_lp;
					mult_in_b <= resize(in_z1_r, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 12) then
					-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a2_lp;
					mult_in_b <= resize(in_z2_r, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 13) then
					-- save and sum up result of (in_z2*a2) to temp and load multiplier with in_z3 and a3
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a3_lp;
					mult_in_b <= resize(in_z3_r, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 14) then
					-- save and sum up result of (in_z3*a3) to temp and load multiplier with in_z4 and a4
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a4_lp;
					mult_in_b <= resize(in_z4_r, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 15) then
					-- save and sum up result of (in_z4*a4) to temp and load multiplier with out_z1 and b1
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= b1_lp;
					mult_in_b <= out_z1_r;
					state <= state + 1;
					
				elsif (state = 16) then
					-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b2_lp;
					mult_in_b <= out_z2_r;
					state <= state + 1;
					
				elsif (state = 17) then
					-- save and sum up (negative) result of (out_z2*b2) and load multiplier with out_z3 and b3
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b3_lp;
					mult_in_b <= out_z3_r;
					state <= state + 1;
					
				elsif (state = 18) then
					-- save and sum up (negative) result of (out_z3*b3) and load multiplier with out_z4 and b4
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b4_lp;
					mult_in_b <= out_z4_r;
					state <= state + 1;
					
				elsif (state = 19) then
					-- save and sum up (negative) result of (out_z4*b4)
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					state <= state + 1;
					
				elsif (state = 20) then
					-- save all delay registers, save result to output and apply ouput-valid signal
					in_z4_r <= in_z3_r;
					in_z3_r <= in_z2_r;
					in_z2_r <= in_z1_r;
					in_z1_r <= temp_in_r;

					out_z4_r <= out_z3_r;
					out_z3_r <= out_z2_r;
					out_z2_r <= out_z1_r;
					out_z1_r <= resize(temp, out_z1_r'length); -- save value with fractions to gain higher resolution for this filter
					
					output_r <= resize(shift_right(temp, fract_bits), 24); -- resize to 24-bit audio

					-- load multiplier with a0 * input
					mult_in_a <= a0_lp;
					mult_in_b <= resize(temp_in_sub, coeff_bits + 24);

					state <= state + 1;
					
				elsif (state = 21) then
					-- save result of (samplein*a0) to temp and load multiplier with in_z1 and a1
					temp <= resize(mult_out, temp'length);
					mult_in_a <= a1_lp;
					mult_in_b <= resize(in_z1_sub, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 22) then
					-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a2_lp;
					mult_in_b <= resize(in_z2_sub, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 23) then
					-- save and sum up result of (in_z2*a2) to temp and load multiplier with in_z3 and a3
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a3_lp;
					mult_in_b <= resize(in_z3_sub, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 24) then
					-- save and sum up result of (in_z3*a3) to temp and load multiplier with in_z4 and a4
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= a4_lp;
					mult_in_b <= resize(in_z4_sub, coeff_bits + 24);
					state <= state + 1;
					
				elsif (state = 25) then
					-- save and sum up result of (in_z4*a4) to temp and load multiplier with out_z1 and b1
					temp <= temp + resize(mult_out, temp'length);
					mult_in_a <= b1_lp;
					mult_in_b <= out_z1_sub;
					state <= state + 1;
					
				elsif (state = 26) then
					-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b2_lp;
					mult_in_b <= out_z2_sub;
					state <= state + 1;
					
				elsif (state = 27) then
					-- save and sum up (negative) result of (out_z2*b2) and load multiplier with out_z3 and b3
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b3_lp;
					mult_in_b <= out_z3_sub;
					state <= state + 1;
					
				elsif (state = 28) then
					-- save and sum up (negative) result of (out_z3*b3) and load multiplier with out_z4 and b4
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					mult_in_a <= b4_lp;
					mult_in_b <= out_z4_sub;
					state <= state + 1;
					
				elsif (state = 29) then
					-- save and sum up (negative) result of (out_z4*b4)
					temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
					state <= state + 1;
					
				elsif (state = 30) then
					-- save all delay registers, save result to output and apply ouput-valid signal
					in_z4_sub <= in_z3_sub;
					in_z3_sub <= in_z2_sub;
					in_z2_sub <= in_z1_sub;
					in_z1_sub <= temp_in_sub;

					out_z4_sub <= out_z3_sub;
					out_z3_sub <= out_z2_sub;
					out_z2_sub <= out_z1_sub;
					out_z1_sub <= resize(temp, out_z1_sub'length); -- save value with fractions to gain higher resolution for this filter
					
					output_sub <= resize(shift_right(temp, fract_bits), 24); -- resize to 24-bit audio
					
					sync_out <= '1';
					state <= state + 1;
					
				elsif (state = 31) then
					sync_out <= '0';
					state <= 0;
				end if;
			end if;
		end if;
	end process;
end Behavioral;

The coefficients can be calculated like this:

typedef union 
{
    uint64_t u64;
    int64_t s64;
    uint32_t u32[2];
    int32_t s32[2];
    uint16_t u16[4];
    int16_t s16[4];
    uint8_t u8[8];
    int8_t s8[8];
    double d;
}data_64b;

struct sLR24 {
  // user-settings
  float fc = 100; // cutoff-frequency for high- or lowpass
  bool isHighpass = false; // choose if Highpass or Lowpass

  // filter-coefficients
  data_64b a[5];
  data_64b b[5];
};

void recalcFilterCoefficients_LR24(struct sLR24 *LR24) {
  double wc = 2.0 * PI * LR24->fc;
  double wc2 = wc * wc;
  double wc3 = wc2 * wc;
  double wc4 = wc2 * wc2;
  double k = wc / tan(PI * (LR24->fc / audiomixer.sampleRate));
  double k2 = k * k;
  double k3 = k2 * k;
  double k4 = k2 * k2;
  double sq_tmp1 = sqrt(2.0) * wc3 * k;
  double sq_tmp2 = sqrt(2.0) * wc * k3;
  double a_tmp = 4.0 * wc2 * k2 + 2.0 * sq_tmp1 + k4 + 2.0 * sq_tmp2 + wc4;

  if (LR24->isHighpass) {
    // coefficients for HighPass-Filter
    LR24->a[0].d = k4 / a_tmp;
    LR24->a[1].d = -4.0 * k4 / a_tmp;
    LR24->a[2].d = 6.0 * k4 / a_tmp;
    LR24->a[3].d = LR24->a[1].d;
    LR24->a[4].d = LR24->a[0].d;
  }else{
    // coefficients for LowPass-Filter
    LR24->a[0].d = wc4 / a_tmp;
    LR24->a[1].d = 4.0 * wc4 / a_tmp;
    LR24->a[2].d = 6.0 * wc4 / a_tmp;
    LR24->a[3].d = LR24->a[1].d;
    LR24->a[4].d = LR24->a[0].d;
  }

  LR24->b[0].d = 0; // we are not using this coefficient but keep it to not confuse with indices
  LR24->b[1].d = (4.0 * (wc4 + sq_tmp1 - k4 - sq_tmp2)) / a_tmp;
  LR24->b[2].d = (6.0 * wc4 - 8.0 * wc2 * k2 + 6.0 * k4) / a_tmp;
  LR24->b[3].d = (4.0 * (wc4 - sq_tmp1 + sq_tmp2 - k4)) / a_tmp;
  LR24->b[4].d = (k4 - 2.0 * sq_tmp1 + wc4 - 2.0 * sq_tmp2 + 4.0 * wc2 * k2) / a_tmp;

  // convert to Q44-format
  for (int i=0; i<5; i++) {
    LR24->a[i].s64 = LR24->a[i].d * 17592186044415; // convert to Q44
    LR24->b[i].s64 = LR24->b[i].d * 17592186044415; // convert to Q44
  }
}

typedef union 
{
    uint64_t u64;
    int64_t s64;
    uint32_t u32[2];
    int32_t s32[2];
    uint16_t u16[4];
    int16_t s16[4];
    uint8_t u8[8];
    int8_t s8[8];
    double d;
}data_64b;

struct sLR24 {
  // user-settings
  float fc = 100; // cutoff-frequency for high- or lowpass
  bool isHighpass = false; // choose if Highpass or Lowpass

  // filter-coefficients
  data_64b a[5];
  data_64b b[5];
};

void recalcFilterCoefficients_LR24(struct sLR24 *LR24) {
  double wc = 2.0 * PI * LR24->fc;
  double wc2 = wc * wc;
  double wc3 = wc2 * wc;
  double wc4 = wc2 * wc2;
  double k = wc / tan(PI * (LR24->fc / audiomixer.sampleRate));
  double k2 = k * k;
  double k3 = k2 * k;
  double k4 = k2 * k2;
  double sq_tmp1 = sqrt(2.0) * wc3 * k;
  double sq_tmp2 = sqrt(2.0) * wc * k3;
  double a_tmp = 4.0 * wc2 * k2 + 2.0 * sq_tmp1 + k4 + 2.0 * sq_tmp2 + wc4;

  if (LR24->isHighpass) {
    // coefficients for HighPass-Filter
    LR24->a[0].d = k4 / a_tmp;
    LR24->a[1].d = -4.0 * k4 / a_tmp;
    LR24->a[2].d = 6.0 * k4 / a_tmp;
    LR24->a[3].d = LR24->a[1].d;
    LR24->a[4].d = LR24->a[0].d;
  }else{
    // coefficients for LowPass-Filter
    LR24->a[0].d = wc4 / a_tmp;
    LR24->a[1].d = 4.0 * wc4 / a_tmp;
    LR24->a[2].d = 6.0 * wc4 / a_tmp;
    LR24->a[3].d = LR24->a[1].d;
    LR24->a[4].d = LR24->a[0].d;
  }

  LR24->b[0].d = 0; // we are not using this coefficient but keep it to not confuse with indices
  LR24->b[1].d = (4.0 * (wc4 + sq_tmp1 - k4 - sq_tmp2)) / a_tmp;
  LR24->b[2].d = (6.0 * wc4 - 8.0 * wc2 * k2 + 6.0 * k4) / a_tmp;
  LR24->b[3].d = (4.0 * (wc4 - sq_tmp1 + sq_tmp2 - k4)) / a_tmp;
  LR24->b[4].d = (k4 - 2.0 * sq_tmp1 + wc4 - 2.0 * sq_tmp2 + 4.0 * wc2 * k2) / a_tmp;

  // convert to Q44-format
  for (int i=0; i<5; i++) {
    LR24->a[i].s64 = LR24->a[i].d * 17592186044415; // convert to Q44
    LR24->b[i].s64 = LR24->b[i].d * 17592186044415; // convert to Q44
  }
}

Chris.Dev.Blog

Electronics, Programming and Development

Stereo Linkwitz-Riley crossover

Leave a comment Cancel reply