--- Quote Start ---
Please post the section of your host code that measures the kernel execution times.
--- Quote End ---
Sorry HRZ, here it is:
#include "timing.h"# include <Windows.h>
double get_wall_time(){
LARGE_INTEGER time,freq;
if (!QueryPerformanceFrequency(&freq)){
// Handle error
return 0;
}
if (!QueryPerformanceCounter(&time)){
// Handle error
return 0;
}
return (double)time.QuadPart / freq.QuadPart;
}
-----------------------------------------------------------------------------
runKernerl(...){
/* Set Kernel Arguments */
for(i=0; i < num_arguments;i++)
status = clSetKernelArg(kernel, i, sizeof(cl_mem), &buffer);
/* Run kernel the kernel */
status = clEnqueueTask(cmdqueue,kernel,0,NULL,NULL);
checkError(status, "Failed to launch kernel");
/* Wait for command queue to complete pending events */
status = clFinish(cmdqueue);
/* Read the device output buffer to the host output array */
checkError(status, "Failed to finish");
}
-----------------------------------------------------------------------------
ini_kernel_bi = get_wall_time();
runKernel(context, cluster_kernel, cmd_queue, 6, 0, NULL, buffers, NULL , NULL);
end_kernel_bi = get_wall_time();
printf("Time:%f", end_kernel_bi - ini_kernel_bi);