The following code processes an incoming stereo-audio-signal and a prepared mono-sub-signal and calculates a crossover with 24dB for the top- and sub-output.
The calculation of the coefficients can be found down below. Please have a look at my X/FBAPE-project at GitHub: https://www.github.com/xn--nding-jua/xfbape
VHDL
-- Stereo Linkwitz-Riley crossover with 24dB/oct
-- Christian Noeding, christian@noeding-online.de
-- https://chrisdevblog.com | https://github.com/xn--nding-jua
--
-- Released under GNU General Public License v3
library IEEE;
use IEEE.STD_LOGIC_1164.all;
use ieee.numeric_std.all;
entity filter_lr24_threeway is
generic(
coeff_bits : natural range 16 to 48 := 40; -- for a Qfract_bits-coefficient: signed-bit + integer-bits + Qfract_bits-bits = 1 + 4 + fract_bits = 40 bit
fract_bits : natural range 16 to 48 := 35
);
port (
clk : in std_logic := '0';
input_l : in signed(23 downto 0) := (others=>'0');
input_r : in signed(23 downto 0) := (others=>'0');
input_sub : in signed(23 downto 0) := (others=>'0');
sync_in : in std_logic := '0';
rst : in std_logic := '0';
-- coefficients have to be multiplied with 2^fract_bits before
a0_lp : in signed(coeff_bits - 1 downto 0);
a1_lp : in signed(coeff_bits - 1 downto 0);
a2_lp : in signed(coeff_bits - 1 downto 0);
a3_lp : in signed(coeff_bits - 1 downto 0);
a4_lp : in signed(coeff_bits - 1 downto 0);
-- coefficients have to be multiplied with 2^fract_bits before
b1_lp : in signed(coeff_bits - 1 downto 0);
b2_lp : in signed(coeff_bits - 1 downto 0);
b3_lp : in signed(coeff_bits - 1 downto 0);
b4_lp : in signed(coeff_bits - 1 downto 0);
output_l : out signed(23 downto 0) := (others=>'0');
output_r : out signed(23 downto 0) := (others=>'0');
output_sub : out signed(23 downto 0) := (others=>'0');
sync_out : out std_logic := '0'
);
end filter_lr24_threeway;
architecture Behavioral of filter_lr24_threeway is
signal state : natural range 0 to 31 := 0;
--signals for multiplier
signal mult_in_a : signed(coeff_bits - 1 downto 0) := (others=>'0');
signal mult_in_b : signed(coeff_bits + 24 - 1 downto 0) := (others=>'0');
signal mult_out : signed((coeff_bits + coeff_bits + 24 - 1) downto 0) := (others=>'0');
--temp regs and delay regs
signal temp_in_l, in_z1_l, in_z2_l, in_z3_l, in_z4_l : signed(23 downto 0):= (others=>'0');
signal temp_in_r, in_z1_r, in_z2_r, in_z3_r, in_z4_r : signed(23 downto 0):= (others=>'0');
signal temp_in_sub, in_z1_sub, in_z2_sub, in_z3_sub, in_z4_sub : signed(23 downto 0):= (others=>'0');
signal out_z1_l, out_z2_l, out_z3_l, out_z4_l : signed(coeff_bits + 24 - 1 downto 0):= (others=>'0');
signal out_z1_r, out_z2_r, out_z3_r, out_z4_r : signed(coeff_bits + 24 - 1 downto 0):= (others=>'0');
signal out_z1_sub, out_z2_sub, out_z3_sub, out_z4_sub : signed(coeff_bits + 24 - 1 downto 0):= (others=>'0');
signal temp : signed(coeff_bits + 24 - 1 + 8 downto 0):= (others=>'0');
begin
-- multiplier
process(mult_in_a, mult_in_b)
begin
mult_out <= mult_in_a * mult_in_b;
end process;
--// calculate filter
--float output = (LR24->a[0] * sampleData) + (LR24->a[1] * LR24->x[0]) + (LR24->a[2] * LR24->x[1]) + (LR24->a[3] * LR24->x[2]) + (LR24->a[4] * LR24->x[3]) - (LR24->b[1] * LR24->y[0]) - (LR24->b[2] * LR24->y[1]) - (LR24->b[3] * LR24->y[2]) - (LR24->b[4] * LR24->y[3]);
process(clk, rst)
begin
if (rst = '1') then
-- reset internal signals
temp_in_l <= ( others => '0');
temp_in_r <= ( others => '0');
temp_in_sub <= ( others => '0');
in_z1_l <= ( others => '0');
in_z2_l <= ( others => '0');
in_z3_l <= ( others => '0');
in_z4_l <= ( others => '0');
in_z1_r <= ( others => '0');
in_z2_r <= ( others => '0');
in_z3_r <= ( others => '0');
in_z4_r <= ( others => '0');
in_z1_sub <= ( others => '0');
in_z2_sub <= ( others => '0');
in_z3_sub <= ( others => '0');
in_z4_sub <= ( others => '0');
out_z1_l <= ( others => '0');
out_z2_l <= ( others => '0');
out_z3_l <= ( others => '0');
out_z4_l <= ( others => '0');
out_z1_r <= ( others => '0');
out_z2_r <= ( others => '0');
out_z3_r <= ( others => '0');
out_z4_r <= ( others => '0');
out_z1_sub <= ( others => '0');
out_z2_sub <= ( others => '0');
out_z3_sub <= ( others => '0');
out_z4_sub <= ( others => '0');
temp <= ( others => '0');
-- set output to zero
output_l <= ( others => '0');
output_r <= ( others => '0');
output_sub <= ( others => '0');
sync_out <= '1';
-- call last state to reset filter-states for next calculation
state <= 31;
else
if rising_edge(clk) then
if (sync_in = '1' and state = 0) then
-- load multiplier with a0 * input
mult_in_a <= a0_lp;
mult_in_b <= resize(input_l, coeff_bits + 24);
temp_in_l <= input_l;
temp_in_r <= input_r;
temp_in_sub <= input_sub;
state <= 1; -- start of state-machine
elsif (state = 1) then
-- save result of (samplein*a0) to temp and load multiplier with in_z1 and a1
temp <= resize(mult_out, temp'length);
mult_in_a <= a1_lp;
mult_in_b <= resize(in_z1_l, coeff_bits + 24);
state <= state + 1;
elsif (state = 2) then
-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= a2_lp;
mult_in_b <= resize(in_z2_l, coeff_bits + 24);
state <= state + 1;
elsif (state = 3) then
-- save and sum up result of (in_z2*a2) to temp and load multiplier with in_z3 and a3
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= a3_lp;
mult_in_b <= resize(in_z3_l, coeff_bits + 24);
state <= state + 1;
elsif (state = 4) then
-- save and sum up result of (in_z3*a3) to temp and load multiplier with in_z4 and a4
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= a4_lp;
mult_in_b <= resize(in_z4_l, coeff_bits + 24);
state <= state + 1;
elsif (state = 5) then
-- save and sum up result of (in_z4*a4) to temp and load multiplier with out_z1 and b1
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= b1_lp;
mult_in_b <= out_z1_l;
state <= state + 1;
elsif (state = 6) then
-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
mult_in_a <= b2_lp;
mult_in_b <= out_z2_l;
state <= state + 1;
elsif (state = 7) then
-- save and sum up (negative) result of (out_z2*b2) and load multiplier with out_z3 and b3
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
mult_in_a <= b3_lp;
mult_in_b <= out_z3_l;
state <= state + 1;
elsif (state = 8) then
-- save and sum up (negative) result of (out_z3*b3) and load multiplier with out_z4 and b4
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
mult_in_a <= b4_lp;
mult_in_b <= out_z4_l;
state <= state + 1;
elsif (state = 9) then
-- save and sum up (negative) result of (out_z4*b4)
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
state <= state + 1;
elsif (state = 10) then
-- save all delay registers and save result to output
in_z4_l <= in_z3_l;
in_z3_l <= in_z2_l;
in_z2_l <= in_z1_l;
in_z1_l <= temp_in_l;
out_z4_l <= out_z3_l;
out_z3_l <= out_z2_l;
out_z2_l <= out_z1_l;
out_z1_l <= resize(temp, out_z1_l'length); -- save value with fractions to gain higher resolution for this filter
output_l <= resize(shift_right(temp, fract_bits), 24); -- resize to 24-bit audio
-- load multiplier with a0 * input
mult_in_a <= a0_lp;
mult_in_b <= resize(temp_in_r, coeff_bits + 24);
state <= state + 1;
elsif (state = 11) then
-- save result of (samplein*a0) to temp and load multiplier with in_z1 and a1
temp <= resize(mult_out, temp'length);
mult_in_a <= a1_lp;
mult_in_b <= resize(in_z1_r, coeff_bits + 24);
state <= state + 1;
elsif (state = 12) then
-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= a2_lp;
mult_in_b <= resize(in_z2_r, coeff_bits + 24);
state <= state + 1;
elsif (state = 13) then
-- save and sum up result of (in_z2*a2) to temp and load multiplier with in_z3 and a3
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= a3_lp;
mult_in_b <= resize(in_z3_r, coeff_bits + 24);
state <= state + 1;
elsif (state = 14) then
-- save and sum up result of (in_z3*a3) to temp and load multiplier with in_z4 and a4
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= a4_lp;
mult_in_b <= resize(in_z4_r, coeff_bits + 24);
state <= state + 1;
elsif (state = 15) then
-- save and sum up result of (in_z4*a4) to temp and load multiplier with out_z1 and b1
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= b1_lp;
mult_in_b <= out_z1_r;
state <= state + 1;
elsif (state = 16) then
-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
mult_in_a <= b2_lp;
mult_in_b <= out_z2_r;
state <= state + 1;
elsif (state = 17) then
-- save and sum up (negative) result of (out_z2*b2) and load multiplier with out_z3 and b3
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
mult_in_a <= b3_lp;
mult_in_b <= out_z3_r;
state <= state + 1;
elsif (state = 18) then
-- save and sum up (negative) result of (out_z3*b3) and load multiplier with out_z4 and b4
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
mult_in_a <= b4_lp;
mult_in_b <= out_z4_r;
state <= state + 1;
elsif (state = 19) then
-- save and sum up (negative) result of (out_z4*b4)
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
state <= state + 1;
elsif (state = 20) then
-- save all delay registers, save result to output and apply ouput-valid signal
in_z4_r <= in_z3_r;
in_z3_r <= in_z2_r;
in_z2_r <= in_z1_r;
in_z1_r <= temp_in_r;
out_z4_r <= out_z3_r;
out_z3_r <= out_z2_r;
out_z2_r <= out_z1_r;
out_z1_r <= resize(temp, out_z1_r'length); -- save value with fractions to gain higher resolution for this filter
output_r <= resize(shift_right(temp, fract_bits), 24); -- resize to 24-bit audio
-- load multiplier with a0 * input
mult_in_a <= a0_lp;
mult_in_b <= resize(temp_in_sub, coeff_bits + 24);
state <= state + 1;
elsif (state = 21) then
-- save result of (samplein*a0) to temp and load multiplier with in_z1 and a1
temp <= resize(mult_out, temp'length);
mult_in_a <= a1_lp;
mult_in_b <= resize(in_z1_sub, coeff_bits + 24);
state <= state + 1;
elsif (state = 22) then
-- save and sum up result of (in_z1*a1) to temp and load multiplier with in_z2 and a2
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= a2_lp;
mult_in_b <= resize(in_z2_sub, coeff_bits + 24);
state <= state + 1;
elsif (state = 23) then
-- save and sum up result of (in_z2*a2) to temp and load multiplier with in_z3 and a3
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= a3_lp;
mult_in_b <= resize(in_z3_sub, coeff_bits + 24);
state <= state + 1;
elsif (state = 24) then
-- save and sum up result of (in_z3*a3) to temp and load multiplier with in_z4 and a4
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= a4_lp;
mult_in_b <= resize(in_z4_sub, coeff_bits + 24);
state <= state + 1;
elsif (state = 25) then
-- save and sum up result of (in_z4*a4) to temp and load multiplier with out_z1 and b1
temp <= temp + resize(mult_out, temp'length);
mult_in_a <= b1_lp;
mult_in_b <= out_z1_sub;
state <= state + 1;
elsif (state = 26) then
-- save and sum up (negative) result of (out_z1*b1) and load multiplier with out_z2 and b2
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
mult_in_a <= b2_lp;
mult_in_b <= out_z2_sub;
state <= state + 1;
elsif (state = 27) then
-- save and sum up (negative) result of (out_z2*b2) and load multiplier with out_z3 and b3
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
mult_in_a <= b3_lp;
mult_in_b <= out_z3_sub;
state <= state + 1;
elsif (state = 28) then
-- save and sum up (negative) result of (out_z3*b3) and load multiplier with out_z4 and b4
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
mult_in_a <= b4_lp;
mult_in_b <= out_z4_sub;
state <= state + 1;
elsif (state = 29) then
-- save and sum up (negative) result of (out_z4*b4)
temp <= temp - resize(shift_right(mult_out, fract_bits), temp'length);
state <= state + 1;
elsif (state = 30) then
-- save all delay registers, save result to output and apply ouput-valid signal
in_z4_sub <= in_z3_sub;
in_z3_sub <= in_z2_sub;
in_z2_sub <= in_z1_sub;
in_z1_sub <= temp_in_sub;
out_z4_sub <= out_z3_sub;
out_z3_sub <= out_z2_sub;
out_z2_sub <= out_z1_sub;
out_z1_sub <= resize(temp, out_z1_sub'length); -- save value with fractions to gain higher resolution for this filter
output_sub <= resize(shift_right(temp, fract_bits), 24); -- resize to 24-bit audio
sync_out <= '1';
state <= state + 1;
elsif (state = 31) then
sync_out <= '0';
state <= 0;
end if;
end if;
end if;
end process;
end Behavioral;The coefficients can be calculated like this:
C
typedef union
{
uint64_t u64;
int64_t s64;
uint32_t u32[2];
int32_t s32[2];
uint16_t u16[4];
int16_t s16[4];
uint8_t u8[8];
int8_t s8[8];
double d;
}data_64b;
struct sLR24 {
// user-settings
float fc = 100; // cutoff-frequency for high- or lowpass
bool isHighpass = false; // choose if Highpass or Lowpass
// filter-coefficients
data_64b a[5];
data_64b b[5];
};
void recalcFilterCoefficients_LR24(struct sLR24 *LR24) {
double wc = 2.0 * PI * LR24->fc;
double wc2 = wc * wc;
double wc3 = wc2 * wc;
double wc4 = wc2 * wc2;
double k = wc / tan(PI * (LR24->fc / audiomixer.sampleRate));
double k2 = k * k;
double k3 = k2 * k;
double k4 = k2 * k2;
double sq_tmp1 = sqrt(2.0) * wc3 * k;
double sq_tmp2 = sqrt(2.0) * wc * k3;
double a_tmp = 4.0 * wc2 * k2 + 2.0 * sq_tmp1 + k4 + 2.0 * sq_tmp2 + wc4;
if (LR24->isHighpass) {
// coefficients for HighPass-Filter
LR24->a[0].d = k4 / a_tmp;
LR24->a[1].d = -4.0 * k4 / a_tmp;
LR24->a[2].d = 6.0 * k4 / a_tmp;
LR24->a[3].d = LR24->a[1].d;
LR24->a[4].d = LR24->a[0].d;
}else{
// coefficients for LowPass-Filter
LR24->a[0].d = wc4 / a_tmp;
LR24->a[1].d = 4.0 * wc4 / a_tmp;
LR24->a[2].d = 6.0 * wc4 / a_tmp;
LR24->a[3].d = LR24->a[1].d;
LR24->a[4].d = LR24->a[0].d;
}
LR24->b[0].d = 0; // we are not using this coefficient but keep it to not confuse with indices
LR24->b[1].d = (4.0 * (wc4 + sq_tmp1 - k4 - sq_tmp2)) / a_tmp;
LR24->b[2].d = (6.0 * wc4 - 8.0 * wc2 * k2 + 6.0 * k4) / a_tmp;
LR24->b[3].d = (4.0 * (wc4 - sq_tmp1 + sq_tmp2 - k4)) / a_tmp;
LR24->b[4].d = (k4 - 2.0 * sq_tmp1 + wc4 - 2.0 * sq_tmp2 + 4.0 * wc2 * k2) / a_tmp;
// convert to Q44-format
for (int i=0; i<5; i++) {
LR24->a[i].s64 = LR24->a[i].d * 17592186044415; // convert to Q44
LR24->b[i].s64 = LR24->b[i].d * 17592186044415; // convert to Q44
}
}