FIR filter implementation doesn't use Cyclone DSP blocks

I've coded up an FIR filter in VHDL that uses up to 10 clocks to multiply sample pairs by filter coefficients and accumulate the results. This seems to work but Quartus implements this all as logic, which isn't going to fit when I need 50+ filters. Ive had a look at https://community.intel.com/t5/FPGA-Wiki/FIR-Filter-Design-in-Arria-V-Cyclone-V-DSP-Block-Using-VHDL/ta-p/735970 but I'm confused by the lack of background context that I'm supposed to know.

Does anyone have any guidance on how to use the DSP blocks as an FIR filter using sequential accumulation?

My code looks like this:

entity FIRLPF0_20 is
   generic (IO_WIDTH : integer := 15; COEFF_WIDTH : integer := 16);
   port (
      clk:        in  Std_logic;			-- Expecting this to be 100 MHz
      ph:         in  Std_logic_vector(9 downto 0);       -- Expecting this to be one hot of 10
		din:        in  signed(IO_WIDTH - 1 downto 0);
      dout:       out signed(IO_WIDTH - 1 downto 0)
   );
end;

-- Pipeline samples and run MAC over them
architecture DFLT of FIRLPF0_20 is
   -- when you add two samples you need an extra bit for the result
   constant PAIR_WIDTH : integer := IO_WIDTH + 1;
   type t_data_pipe    is array (0 to 12) of signed(IO_WIDTH - 1 downto 0);
   signal p_data     : t_data_pipe;
  
   type FAState_et is (Idle, S0, S1, S2, S3, S4, S5, S6);
   signal fas: FAState_et;

   -- define the filter coefficients a Q15 numbers
	constant  K0_12: signed(COEFF_WIDTH - 1 downto 0) := to_signed(1040, COEFF_WIDTH);
	constant  K1_11: signed(COEFF_WIDTH - 1 downto 0) := to_signed(-1962, COEFF_WIDTH);
	constant  K2_10: signed(COEFF_WIDTH - 1 downto 0) := to_signed(-1723, COEFF_WIDTH);
	constant  K3_9:  signed(COEFF_WIDTH - 1 downto 0) := to_signed(325, COEFF_WIDTH);
	constant  K4_8:  signed(COEFF_WIDTH - 1 downto 0) := to_signed(4438, COEFF_WIDTH);
   constant  K5_7:  signed(COEFF_WIDTH - 1 downto 0) := to_signed(8724, COEFF_WIDTH);
   constant  K6:    signed(COEFF_WIDTH - 1 downto 0) := to_signed(10556, COEFF_WIDTH);

	signal samppair:  Signed(PAIR_WIDTH - 1 downto 0);
	signal accum:     Signed(COEFF_WIDTH + PAIR_WIDTH - 1 downto 0);
	signal Kprod:     Signed(COEFF_WIDTH + PAIR_WIDTH - 1 downto 0);

	
begin
   -- Chunk the samples through the pipeline
   process(clk)
	Begin
      if rising_edge( clk ) then
		   if ('1' = ph(9)) then
            p_data <= signed(din)&p_data(0 to p_data'length-2);
			end if;
		end if;	
	end process; 

   -- Do the MACs when there is fresh data
   process(clk)
	Begin
      if rising_edge( clk ) then
         case fas is
         when Idle =>
			   -- on exit samppair is [5] + [7], Kprod is [6]
				-- Don't touch the accum as we want it available for output
			   Kprod <= K6 * resize(p_data(6), 16);
				samppair <= resize(p_data(5), PAIR_WIDTH) + resize(p_data(7), PAIR_WIDTH);
				if '1' = ph(0) then
				   fas <= S0;
				end if;
         when S0 =>
			   -- on exit samppair is [4] + [8], Kprod is [5] + [7]
			   Kprod <= K5_7 * samppair;
				accum <= Kprod;
				samppair <= resize(p_data(4), PAIR_WIDTH) + resize(p_data(8),PAIR_WIDTH);
				fas <= S1;
			when S1 =>
			   Kprod <= K4_8 * samppair;
				accum <= Kprod + accum;
				samppair <= resize(p_data(3), PAIR_WIDTH) + resize(p_data(9),PAIR_WIDTH);
				fas <= S2;
			when S2 =>
			   Kprod <= K3_9 * samppair;
				accum <= Kprod + accum;
				samppair <= resize(p_data(2), PAIR_WIDTH) + resize(p_data(10),PAIR_WIDTH);
				fas <= S3;
			when S3 =>
			   Kprod <= K2_10 * samppair;
				accum <= Kprod + accum;
				samppair <= resize(p_data(1), PAIR_WIDTH) + resize(p_data(11),PAIR_WIDTH);
				fas <= S4;
			when S4 =>
			   Kprod <= K1_11 * samppair;
				accum <= Kprod + accum;
				samppair <= resize(p_data(0), PAIR_WIDTH) + resize(p_data(12),PAIR_WIDTH);
				fas <= S5;
			when S5 =>
			   Kprod <= K0_12 * samppair;
				accum <= Kprod + accum;
				fas <= S6;
			when S6 =>
				accum <= Kprod + accum;
				fas <= Idle;
			end case;
		end if;	
	end process; 
	
	dout <= signed(accum(29 downto 15));
end;

Kshitij_Intel
2 years ago
Hi,

Please refer this coding style to infer DSP block.

dsp-block-vhdl-inferring-a5c5-figure-7.jpg (748×397) (intel.com)

dsp-block-vhdl-inferring-a5c5-figure-8.jpg (809×390) (intel.com)

Thank you,
Kshitij Goel

Forum Discussion

FIR filter implementation doesn't use Cyclone DSP blocks

3 Replies

Recent Discussions

timing violation fix

Issues with downloading

Quartus Prime Lite 25.1 License Error - "Unable to checkout a license" (SALT_LICENSE_SERVER)

Quartus Prime Pro 26.1 - Where to find Documentation of new Signaltap features

Error (292014): Can't find valid feature line for core SLL_CA_HBC_T001_Hyperbus_Memory_Controller_10