Forum Discussion
Altera_Forum
Honored Contributor
8 years agoHere is the kernel I compile:
__attribute__((num_compute_units(5)))
__attribute__((num_simd_work_items(16)))
__attribute__((reqd_work_group_size(256,1,1)))
__kernel void WGSXMAPIXLLXOPS16(const __global float * restrict GIn, __global float * restrict GOut, const float M, const float N, const float P) {
const int XGL = get_global_id(0);
const int XGRid = get_group_id(0);
const int XGRnum = get_num_groups(0);
const int XLSize = get_local_size(0);
const int XLid = get_local_id(0);
// Just a private variable
float MF = (float) M;
float NF = (float) N;
float PF = (float) P;
// Start of a new level of for loop
long baseIndex1 = XGRid*XLSize*2+XLid;
float temp1 = 1.0;
float temp2 = 1.0;
float temp3 = 1.0;
float temp4 = 1.0;
float tempOut;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
temp1 += temp1 * MF;
tempOut = temp1 + temp2 + temp3 + temp4;
GOut = tempOut;
}
Basically I use it for some performance measurement and I made sure no optimization is going to happen.