amrmesh
New Contributor
6 years agoreport shows no DSP usage for OpenCL kernel
Hello, I'm new to OpenCL for fpgas, I wrote this code for matrix and vector multiplication, after compiling with aoc 19.3, the report shows that no DSP is used for calculation of the kernel, anyone know what I'm doing wrong?
__kernel
void matvec(global float* restrict matrix_a,
global float* restrict vectors_b,
global float* restrict result,
uint n,
uint vec_count)
{
float localmatrix[BLOCK_SIZE][BLOCK_SIZE];
float localvector[BLOCK_SIZE];
float localresult[BLOCK_SIZE];
float presult[II_CYCLES+1];
#pragma unroll 16
for(uint e=0 ; e<n ; e++)
result[e] = 0;
//iteration over matrix blocks rows
uint bi = 0;
for(uint bi=0 ; bi<n ; bi+=BLOCK_SIZE)
{
//initializing the localresult
#pragma unroll
for(uint e=0 ; e<n ; e++)
localresult[e] = 0;
//iteration over matrix blocks colomns
uint bj = 0;
for(uint bj=0 ; bj<n ; bj+=BLOCK_SIZE)
{
//loading block of matrix to local
#pragma unroll 16
for(uint ei=0 ; ei<BLOCK_SIZE ; ei++)
for(uint ej=0 ; ej<BLOCK_SIZE ; ej++)
localmatrix[ei][ej] = matrix_a[(bi+ei)*n+(bj+ej)];
//itteration over vectors
for(uint k=0 ; k<vec_count ; k++)
{
//loading one block of one vector to local
#pragma unroll 16
for(uint e=0 ; e<BLOCK_SIZE ; e++)
localvector[e] = vectors_b[k*n+(bj+e)];
//***localresult[j] += localmatrix[i][j] * localvector[j]***
//iteration over matrix colomns
for(uint i=0 ; i<BLOCK_SIZE ; i++)
{
//initializing presult
#pragma unroll
for(uint e=0 ; e<II_CYCLES+1 ; e++)
presult[e] = 0;
//iteration over matrix rows
for(uint j=0 ; j<BLOCK_SIZE ; j++)
{
//localresult[i] += localmatrix[i][j] * localvector[j];
presult[II_CYCLES] = localmatrix[i][j] * localvector[j] + presult[0];
#pragma unroll
for(uint e = 0 ; e<II_CYCLES+1 ; e++)
presult[e] = presult[e+1];
}
#pragma unroll
for(uint e=0 ; e<II_CYCLES+1 ; e++)
localresult[i] += presult[e];
}
}
}
//Writing the block of result back to main memory
#pragma unroll 16
for(uint e=0 ; e<BLOCK_SIZE ; e++)
result [bi+e] = localresult[e];
}
}