ND-Range kernels vs SingleThread?

Occasional Contributor

7 years ago

Thanks for the answer,

Here is the kind of code I'm compiling and running on the FPGA:

#ifdef INT_PRECISION
 
#define DTYPE int
 
#elif SINGLE_PRECISION
 
#define DTYPE float
 
#elif DOUBLE_PRECISION
 
#pragma OPENCL EXTENSION cl_khr_fp64: enable
 
#define DTYPE double
 
#endif
 
 
#ifdef FPGA_NDRANGE
 
__attribute__((reqd_work_group_size(256, 1, 1)))
 
__attribute__((num_simd_work_items(16)))
 
__attribute__((num_compute_units(NUM_COMPUTE_UNITS)))
 
#endif
 
 
__kernel void S1119 (__global DTYPE* restrict AA,
 
					    __global const DTYPE* restrict BB,
 
                                            const int lllX
#ifdef FPGA_SINGLE
                                           ,const int lllY)
#else
 )
#endif
{
#ifdef GPU
	const int gid = get_global_id(0);
	const int size = get_global_size(0);
 
	for (int i = 1; i < lllX; i++) {
  	        AA[i*size+gid] = AA[(i-1)*size+gid] + BB[i*size+gid];
	}
#endif
 
#ifdef FPGA_NDRANGE
	const int gid = get_global_id(0);
	const int size = get_global_size(0);
 
	#pragma unroll UNROLL_FACTOR
	for (int i = 1; i < lllX; i++) {
		AA[i*size+gid] = AA[(i-1)*size+gid] + BB[i*size+gid];
	}
#endif
 
#ifdef FPGA_SINGLE
        for (int i = 1; i < lllX; i++) {
  	     #pragma ivdep
             #pragma unroll UNROLL_FACTOR
  	     for (int j = 0; j < lllY; j++) {
			AA[i*lllY+j] = AA[(i-1)*lllY+j] + BB[i*lllY+j];
	     }
       }
#endif
 
}

As you can see, there is a potential for memory coalescing for both FPGA_NDRANGE and FPGA_SINGLE.

Forum Discussion

Recent Discussions

Quartus did not start

The quartus license works with version 25.0 but not with version 17.0

Docker image for Quartus Pro 26.1 missing ?

Timing analysis - long combinational path

timing violation fix