Forum Discussion
Altera_Forum
Honored Contributor
15 years agoJosyb,
thank you for your kind suggestion, actually I did small improvement with Megawizards and achieve 400MHz on 0C for N=24 and SHR=14, however it is still very unstable:
N SHR FMAX 0C/85C Logic DSP Total Synt. Time
N=24, SHR=14, 401/369MHz, Logic 53%, DSP 100%, Time=115m
N=12, SHR=14, 362/346MHz, Logic 26%, DSP 50%, Time=55m
N= 6, SHR=14, 451/418MHz, Logic 13%, DSP 25%, Time=20m
Hence, the behavior is very strange, sometimes it is fast, sometimes - no, the synthesis time is impressive - almost 2 hours on modern i7 quad core. Due to this instability I will probably switch to SL340 with demux of my global clock, otherwise I will fight more with unstable results of this fitter. Indeed I was able to write nice code with Megawizard that can be written again in <100 lines :), that I am publishing below. PS and OFF to Altera Quartus developers: in case if it is interesting to improve Quartus fitter using GPU or massively parallel platforms or even apply better mathematics in the fitter, do not hesitate to ask our help. Sincerely, Ilghiz -- Elegant Mathematics Ltd.
module TestOne(Clk, A1, A2, B1, B2, SW, Res);
parameter SHR=6; // can be 2, 3, 4, 5, ..., but I need 12-20
input Clk, SW;
input signed A1, A2, B1, B2;
output reg signed Res;
reg signed P1, P2, Q1, Q2;
reg signed Mul1, Mul2;
reg signed Sum, Sum2;
reg signed ScalX1, ScalX2, ScalX3, ScalX4;
// you need to install altmult_add module and call it as "mu_mmadd"
my_mmadd my_mmadd_module(Clk, P1, Q1, P2, Q2, Sum);
always @(posedge Clk)
begin
P1<=A1; P2<=A2;
Q1<=B1; Q2<=B2;
// Mul1<=P1*Q1; Mul2<=P2*Q2;
// Sum<=Mul1+Mul2;
Sum2<=Sum;
ScalX2<=ScalX1+(ScalX1>>>SHR);
ScalX4<=ScalX3+Sum2;
ScalX1<=ScalX4;
ScalX3<=ScalX2;
Res<=(SW)?ScalX1:ScalX3;
end
endmodule
module test(Clk, In, ClkSW, SW, Scal);
parameter N=24; // can be 2, 4, 6, ..., but I need 18
input Clk, ClkSW;
input signed In;
input SW;
output reg signed Scal;
// Memory
reg signed D, Data;
reg InDataCounter, SW0;
wire signed ScalY;
reg signed ScalY1, ScalY2;
reg signed ScalY3, ScalY4;
// Generating modules
generate
genvar i, j, k;
for(i=0; i<N; i+=2)
begin : aaa
for(j=0; j<4; j++)
begin : bbb
for(k=0; k<4; k++)
begin : ccc
TestOne TestOne_Module(Clk, D, D, Data, Data, SW0, ScalY);
end
end
end
endgenerate
// Reading Data
always @(posedge Clk)
begin
for(int i=0; i<2; i++)
for(int j=0; j<4; j++)
D<=Data;
for(int j=0; j<4; j++)
Data<=In;
for(int i=0; i<N-1; i++)
for(int j=0; j<4; j++)
Data<=Data;
InDataCounter<=~InDataCounter;
SW0<=SW^InDataCounter;
end
// Output
always @(posedge ClkSW)
begin
for(int i=0; i<N/2; i++)
for(int j=0; j<16; j+=2)
ScalY1<=ScalY+j];
for(int i=0; i<N/2; i++)
for(int j=0; j<8; j+=2)
ScalY2<=ScalY1+j];
for(int i=0; i<N/2; i++)
for(int j=0; j<4; j+=2)
ScalY3<=ScalY2+j];
for(int i=0; i<N/2; i++)
ScalY4<=ScalY3];
Scal<=ScalY4];
end
endmodule