Hi, I rewrote my state-machine, but I did not achieve much increase in performance.
Adding multicycle-path statement on a "semi-static" configuration-registers caused the Fmax to go up, but I still have violations related to the filter. The calculations have been extracted from the state-machine as shown in the code below.
The timing analyzer still complains about long combinational paths. The process below calculates on every clock-cycle. Would it be a solution to added extra pipeline-registers in between the calculation stages, and add multicycle-statements to relax the timing requirement?
p_filter_calculations : process (clk) is
begin
if rising_edge(clk) then
-- Multiplication of input sample with coefficients
mult_Re <= std_logic_vector(resize(signed(sample_in) * signed(i_coef_data(2*GC_COEF_DATA_WIDTH-1 downto GC_COEF_DATA_WIDTH)), mult_Re'length));
mult_Im <= std_logic_vector(resize(signed(sample_in) * signed(i_coef_data(GC_COEF_DATA_WIDTH-1 downto 0)), mult_Im'length));
mult_Re_ext <= std_logic_vector(resize(signed(mult_Re), mult_Re_ext'length));
mult_Im_ext <= std_logic_vector(resize(signed(mult_Im), mult_Im_ext'length));
-- Truncate number of bits defined in 'config_lsb_prod_reg'
case i_config_lsb_prod_reg(2 downto 0) is
when b"000" =>
add_Re_in <= mult_Re_ext(GC_ADDER_WIDTH-1 downto 0); -- (35:0)
add_Im_in <= mult_Im_ext(GC_ADDER_WIDTH-1 downto 0);
when b"001" =>
add_Re_in <= mult_Re_ext(GC_ADDER_WIDTH downto 1); -- (36:1)
add_Im_in <= mult_Im_ext(GC_ADDER_WIDTH downto 1);
when b"010" =>
add_Re_in <= mult_Re_ext(GC_ADDER_WIDTH+1 downto 2); -- (37:2)
add_Im_in <= mult_Im_ext(GC_ADDER_WIDTH+1 downto 2);
when b"011" =>
add_Re_in <= mult_Re_ext(GC_ADDER_WIDTH+2 downto 3); -- (38:3)
add_Im_in <= mult_Im_ext(GC_ADDER_WIDTH+2 downto 3);
when b"100" =>
add_Re_in <= mult_Re_ext(GC_ADDER_WIDTH+3 downto 4); -- (39:4)
add_Im_in <= mult_Im_ext(GC_ADDER_WIDTH+3 downto 4);
when others =>
add_Re_in <= mult_Re_ext(GC_ADDER_WIDTH-1 downto 0); -- same as (b"000")
add_Im_in <= mult_Im_ext(GC_ADDER_WIDTH-1 downto 0);
end case;
-- Add new sum and store in accRAM
if (run_filter_cnt > C_START_WRITING_NEW_SUM and
run_filter_cnt < C_START_WRITING_NEW_SUM+i_config_q_factor_reg+1) then
accRAM_Re_in <= std_logic_vector(signed(add_Re_in) + signed(accRAM_Re_out_add));
accRAM_Im_in <= std_logic_vector(signed(add_Im_in) + signed(accRAM_Im_out_add));
elsif (next_state = FILTER_OUTPUT) then
accRAM_Re_in <= (others => '0');
accRAM_Im_in <= (others => '0');
end if;
end if;
end process p_filter_calculations;