Forum Discussion
Altera_Forum
Honored Contributor
15 years agoDear Josyb,
thank you for your kind answer. Would you, or somebody else, explain me, please, why unregistered memory provides such a delay, how to solve it, and why I get unregistered memory here? My questions occur due to the following: when you told about multipliers, I decide to put additional intermediate pipeline registers (in my attached code they are D1 and Data1) and I got some improvement for FMax:
N SHR FMAX 0C/85C Logic DSP Total Synt. Time
N=24, SHR=14, 363/336MHz, Logic 33%, DSP 100%, Time=56minutes
N=12, SHR=14, 376/346MHz, Logic 16%, DSP 50%, Time=19minutes
N= 6, SHR=14, 405/383MHz, Logic 8%, DSP 25%, Time= 9minutes
N=24, SHR= 2, 383/357MHz, Logic 29%, DSP 100%, Time=43minutes
N=12, SHR= 2, 407/380MHz, Logic 15%, DSP 50%, Time=23minutes
N= 6, SHR= 2, 443/413MHz, Logic 7%, DSP 25%, Time=12minutes
however, I cannot figure out myself when I should do these tricks, and what kind of other tricks are available for FMax improvement! PS: in my design I am free to append more pipeline stages, but where, please, help me with procedure to find it. I can see something in "Property Editor" however I cannot interpret it to make correct decision, please, help me!!! Thank you in advance! Sincerely, Ilghiz
module test(Clk, In, ClkSW, SW, Scal);
parameter N=6; // can be 2, 4, 6, ..., but I need 18 and dreaming about 24
parameter SHR=14; // can be 2, 3, 4, 5, ..., but I need 12-20
input Clk, ClkSW;
input signed In;
input SW;
reg signed Scal;
output Scal;
// Memory ////////////////////////////
reg signed D, Data;
reg signed D1, Data1; // new pipeline registers
reg signed Mul;
reg signed Sum, Sum2;
reg signed ScalX;
reg signed ScalY;
reg InDataCounter;
reg signed ScalY1, ScalY2; // new intermediate
reg signed ScalY3, ScalY4; // registers for simple output
// Reading Data from Channels - the key place where I cannot achieve to clock it with 400MHz for N=16 or 24
always @(posedge Clk)
begin
for(int i=0; i<2; i++)
for(int j=0; j<4; j++)
D<=Data;
for(int j=0; j<4; j++)
Data<=In;
for(int i=0; i<N-1; i++)
for(int j=0; j<4; j++)
Data<=Data;
InDataCounter<=~InDataCounter;
//
for(int i=0; i<N; i++)
for(int j=0; j<4; j++)
Data1<=Data; // new pipeline registers that helps for small N
for(int i=0; i<2; i++)
for(int j=0; j<4; j++)
D1<=D; // new pipeline registers that helps for small N
//
for(int i=0; i<N; i+=2)
for(int j=0; j<4; j++)
for(int k=0; k<4; k++)
begin
Mul <=D1*Data1;
Mul<=D1*Data1;
end
for(int i=0; i<N; i+=2)
for(int j=0; j<16; j++)
begin
Sum<=Mul+Mul;
Sum2<=Sum;
ScalX<=ScalX-(ScalX>>>SHR);
ScalX<=Sum2+ScalX;
ScalY<=ScalX>>>(16+SHR);
ScalY<=ScalX>>>(16+SHR);
end
end
// Output - different from previous one, just to save space...
always @(posedge ClkSW)
begin
for(int i=0; i<N; i++)
for(int j=0; j<16; j+=2)
ScalY1<=ScalY+j];
for(int i=0; i<N; i++)
for(int j=0; j<8; j+=2)
ScalY2<=ScalY1+j];
for(int i=0; i<N; i++)
for(int j=0; j<4; j+=2)
ScalY3<=ScalY2+j];
for(int i=0; i<N; i++)
ScalY4<=ScalY3];
Scal<=ScalY4];
end
endmodule