aejjeh
New Contributor
7 years agoOpenCL SDK unable to claculate II
I am looking at the generated report for my matrix multiply OpenCL kernel and the report is telling me that it is unable to estimate the II and the bottleneck is showing as 'n/a'. Any ideas what might be the reason for this? I am using version 18.0 of the tools, and I am attaching a screenshot of the loop analysis report (which shows my code)
Update: Kernel Code below (wasn't able to attach):
__kernel
void mysgemmNT_c_fpga_c_c_c(__global float* restrict A, ulong bytes_A,
__global float* restrict B, ulong bytes_B,
__global float* restrict C, ulong bytes_C,
uint mt, uint nt, uint kt) {
// unsigned tid = get_global_id(0);
// printf("Thread: %d\n", tid);
const int size = 1024;
__local float rowA[size];
__local float rowC[size];
__local float localB[size*size];
const int n = size;
const int m = size;
const int k = size;
// for (int x = 0; x < m; ++x)
// for (int y = 0; y < k; ++y)
// localA[y+x*k] = A[y+x*k];
for (int z = 0; z < n; ++z)
for (int y = 0; y < k; ++y)
localB[y+z*k] = B[y+z*k];
for (int x = 0; x < m; ++x) {
for(int y = 0; y < k; ++y)
rowA[y] = A[y+x*k];
for (int z = 0; z < n; ++z) {
float c = 0.0;
for (int y = 0; y < k; ++y) {
int indexA = y+x*k;
int indexB = z*k+y;
// printf("%d: A[%d][%d]=%f,\tB[%d][%d]=%f\n", y,x,y,A[indexA],z,y,B[indexB]);
c += rowA[y]*localB[indexB];
}
rowC[z] = c;
// printf("C[%d][%d] = %f, %f\n", x, z, C[indexC], c);
}
for (int z = 0; z < n; ++z) {
int indexC = z+x*n;
C[indexC] = rowC[z];
}
}
}