Altera_Forum
Honored Contributor
8 years agoObtaining different results while executing a kernel on GPU or FPGA
Hello,
I have got another question concerning OpenCL. My problem: When I execute the same kernel on GPU and on FPGA (pre-compiled binary), I get different results when I read the buffer afterwards. Are there any device specific operations that can result in a different output? The kernel code:# pragma OPENCL EXTENSION cl_amd_printf : enable
struct __attribute__ ((packed)) gm_component {
float w;
float4 m;
float16 P;
};
struct __attribute__ ((packed)) gm_component_survive {
float w;
float4 m;
float16 P;
float2 eta;
float4 S;
float8 K;
};
//4x4 matrix multiplication
float16 matrixMult4x4f(float16 M, float16 N,float4 unit4)
{
//return M*N
float4 a=M.s0123;
float4 b=M.s4567;
float4 c=M.s89ab;
float4 d=M.scdef;
float4 e=N.s048c;
float4 f=N.s159d;
float4 g=N.s26ae;
float4 h=N.s37bf;
float16 tmp = (float16){dot(a*e,unit4),dot(a*f,unit4),dot(a*g,unit4),dot(a*h,unit4),
dot(b*e,unit4),dot(b*f,unit4),dot(b*g,unit4),dot(b*h,unit4),
dot(c*e,unit4),dot(c*f,unit4),dot(c*g,unit4),dot(c*h,unit4),
dot(d*e,unit4),dot(d*f,unit4),dot(d*g,unit4),dot(d*h,unit4)};
return tmp;
}
// OpenCL Kernel to compute multiplication and addition
__kernel void
update(__global struct gm_component_survive * restrict predict_mixture,
float8 Hk, __global float2 *Zk,
__global struct gm_component * restrict update_mixture,int size, float pr_dk, int updateMixtureSize)
{
int zk_index=get_global_id(0);
int survive_index = get_global_id(1);
if(zk_index<size && survive_index<updateMixtureSize){
__global struct gm_component *um = &update_mixture;
__global struct gm_component_survive *pm = &predict_mixture;
float2 unit2={1.f,1.f};
float4 unit4={1.f,1.f,1.f,1.f};
//Multivarate guassian calculation
//calculate miu
float2 miu = Zk -pm->eta;
//Inverse of covarience.. start
float4 inv_covariance = pm->S;
//calculate denominator
float determenent = inv_covariance.s3*inv_covariance.s0 - inv_covariance.s2*inv_covariance.s1;
inv_covariance =(float4){inv_covariance.s3,-inv_covariance.s1,-inv_covariance.s2,inv_covariance.s0};
inv_covariance = inv_covariance/determenent; //inverse calculation end
//multiplication of miu.T*covariance*miu
float2 number = { dot(miu*inv_covariance.even,unit2),dot(miu*inv_covariance.odd,unit2)};
number=number*miu;
//Calculate denominator :pow(2*M_PI, 2)* determenent 39.4784
float denom = 39.4784* determenent;
denom=sqrt(denom);
//calculate weight
um->w = pr_dk*pm->w*native_exp(-0.5f * dot(number,unit2)) / denom;
//calculate mean
number = (float2){dot(Hk.lo*pm->m,unit4) , dot(Hk.hi*pm->m,unit4)};
number = Zk-number;
inv_covariance =(float4){dot(pm->K.lo.lo*number,unit2),dot(pm->K.lo.hi*number,unit2),dot(pm->K.hi.lo*number,unit2),dot(pm->K.hi.hi*number,unit2)};
um->m = pm->m+inv_covariance;
//calculate covarince
float16 temp1 = (float16){1-dot(pm->K.lo.lo*Hk.s04,unit2),-dot(pm->K.lo.lo*Hk.s15,unit2),-dot(pm->K.lo.lo*Hk.s26,unit2),-dot(pm->K.lo.lo*Hk.s37,unit2),
-dot(pm->K.lo.hi*Hk.s04,unit2),1-dot(pm->K.lo.hi*Hk.s15,unit2),-dot(pm->K.lo.hi*Hk.s26,unit2),-dot(pm->K.lo.hi*Hk.s37,unit2),
-dot(pm->K.hi.lo*Hk.s04,unit2),-dot(pm->K.hi.lo*Hk.s15,unit2),1-dot(pm->K.hi.lo*Hk.s26,unit2),-dot(pm->K.hi.lo*Hk.s37,unit2),
-dot(pm->K.hi.hi*Hk.s04,unit2),-dot(pm->K.hi.hi*Hk.s15,unit2),-dot(pm->K.hi.hi*Hk.s26,unit2),1-dot(pm->K.hi.hi*Hk.s37,unit2)};
um->P = matrixMult4x4f(temp1, pm->P,unit4);;
}
}
Any hints will be apprecciated. Tobias