Altera_Forum
Honored Contributor
8 years agoGetting segmentation fault if Kernelinput gets bigger
Hello ,
I have following OpenCL-host code, this code convolves the inputsignal with the mask int main() { //inits context and program init_opencl(); soc_convolution(500,500); clreleaseprogram(program); clreleasecontext(context); free(platforms); free(devices); } void soc_convolution(int inputsignalwidth , int inputsignalheight ) { cl_uint inputsignal[inputsignalwidth][inputsignalheight]; // set the inputvalues to 1 for(int i = 0; i < inputsignalwidth; i++) { for(int j = 0; j < inputsignalheight; j++) { inputsignal[j] = 1;[/I] } } const unsigned int outputsignalwidth = inputsignalwidth -2; const unsigned int outputsignalheight = inputsignalheight -2; cout << outputsignalwidth <<endl; cout << outputsignalheight <<endl; cl_uint outputsignal[outputsignalwidth][outputsignalheight]; const unsigned int maskwidth = 3; const unsigned int maskheight = 3;// kernel for convolve cl_uint mask[maskwidth][maskheight] = { { 1, 1, 1 }, { 1, 0, 1 }, { 1, 1, 1 }, }; // create kernel cl_kernel kernel = null; kernel = clcreatekernel(program, "convolve", &status); // create command queue and associate it with the device you want to execute on cl_command_queue cmdqueue; cmdqueue = clcreatecommandqueue(context,devices[0], 0, &status); checkerror(status, "failed to create commadnqueue"); // buffer cl_mem buffer_inputsignal = clcreatebuffer(context,cl_mem_read_only,sizeof(cl_uint) * inputsignalwidth * inputsignalheight,null,&status); cl_mem buffer_mask = clcreatebuffer(context,cl_mem_read_only, sizeof(cl_uint) * maskwidth * maskheight, null,&status); cl_mem buffer_outputsignal = clcreatebuffer(context,cl_mem_write_only, sizeof(cl_uint) * outputsignalwidth * outputsignalheight,null,&status); status = clenqueuewritebuffer(cmdqueue, buffer_inputsignal,cl_false,0,sizeof(cl_uint) * inputsignalwidth * inputsignalheight,inputsignal,0,null,null); status = clenqueuewritebuffer(cmdqueue, buffer_mask,cl_false,0,sizeof(cl_uint) * maskwidth * maskheight,mask,0,null,null); status = clsetkernelarg(kernel,0,sizeof(cl_mem),&buffer_inputsignal); status = clsetkernelarg(kernel,1,sizeof(cl_mem),&buffer_mask); status = clsetkernelarg(kernel,2,sizeof(cl_mem),&buffer_outputsignal); status = clsetkernelarg(kernel,3,sizeof(int),&inputsignalwidth); status = clsetkernelarg(kernel,4,sizeof(int),&maskwidth); size_t globalworksize[2]; globalworksize[0] = outputsignalwidth; globalworksize[1] = outputsignalheight; status = clenqueuendrangekernel(cmdqueue,kernel,2,null, globalworksize, null,0, null,null); clenqueuereadbuffer(cmdqueue,buffer_outputsignal,cl_true,0, sizeof(cl_uint) * outputsignalwidth * outputsignalheight,outputsignal, 0, null, null); //stop cpu till queue is finish clfinish(cmdqueue); // free opencl resources clreleasekernel(kernel); clreleasecommandqueue(cmdqueue); clreleasememobject(buffer_inputsignal); clreleasememobject(buffer_mask); clreleasememobject(buffer_outputsignal); }
and the following kernel-code for the convolution: __kernel void convolve(__global uint * input, __global uint * mask, __global uint * output,, const int inputwidth,const int maskwidth) { const int x = get_global_id(0); const int y = get_global_id(1); uint sum = 0; for (int r = 0; r < maskwidth; r++) { //inkrementieren des zeilenindex um breite der maske pro schleifendurchlauf const int idxrow = (y + r) * inputwidth + x; for (int c = 0; c < maskwidth; c++) { //zeilenweises falten sum += mask[(r * maskwidth) + c] * input[idxrow + c]; } } output[y * get_global_size(0) + x] = sum; }
When i call the function SoC_Convolution with values up to 600 it works but if setting the values above this causes an segmentation fault error when excecuting. Can someone tell me whats the problem? Thanks :)