Forum Discussion

Altera_Forum's avatar
Altera_Forum
Icon for Honored Contributor rankHonored Contributor
8 years ago

Getting segmentation fault if Kernelinput gets bigger

Hello ,

I have following OpenCL-host code, this code convolves the inputsignal with the mask

int main() {

//inits context and program

init_opencl();

soc_convolution(500,500);

clreleaseprogram(program);

clreleasecontext(context);

free(platforms);

free(devices);

}

void soc_convolution(int inputsignalwidth , int inputsignalheight )

{

cl_uint inputsignal[inputsignalwidth][inputsignalheight];

// set the inputvalues to 1

for(int i = 0; i < inputsignalwidth; i++)

{

for(int j = 0; j < inputsignalheight; j++)

{

inputsignal[j] = 1;[/I]

}

}

const unsigned int outputsignalwidth = inputsignalwidth -2;

const unsigned int outputsignalheight = inputsignalheight -2;

cout << outputsignalwidth <<endl;

cout << outputsignalheight <<endl;

cl_uint outputsignal[outputsignalwidth][outputsignalheight];

const unsigned int maskwidth = 3;

const unsigned int maskheight = 3;

// kernel for convolve

cl_uint mask[maskwidth][maskheight] =

{

{ 1, 1, 1 },

{ 1, 0, 1 },

{ 1, 1, 1 },

};

// create kernel

cl_kernel kernel = null;

kernel = clcreatekernel(program, "convolve", &status);

// create command queue and associate it with the device you want to execute on

cl_command_queue cmdqueue;

cmdqueue = clcreatecommandqueue(context,devices[0], 0, &status);

checkerror(status, "failed to create commadnqueue");

// buffer

cl_mem buffer_inputsignal = clcreatebuffer(context,cl_mem_read_only,sizeof(cl_uint) * inputsignalwidth * inputsignalheight,null,&status);

cl_mem buffer_mask = clcreatebuffer(context,cl_mem_read_only, sizeof(cl_uint) * maskwidth * maskheight, null,&status);

cl_mem buffer_outputsignal = clcreatebuffer(context,cl_mem_write_only, sizeof(cl_uint) * outputsignalwidth * outputsignalheight,null,&status);

status = clenqueuewritebuffer(cmdqueue, buffer_inputsignal,cl_false,0,sizeof(cl_uint) * inputsignalwidth * inputsignalheight,inputsignal,0,null,null);

status = clenqueuewritebuffer(cmdqueue, buffer_mask,cl_false,0,sizeof(cl_uint) * maskwidth * maskheight,mask,0,null,null);

status = clsetkernelarg(kernel,0,sizeof(cl_mem),&buffer_inputsignal);

status = clsetkernelarg(kernel,1,sizeof(cl_mem),&buffer_mask);

status = clsetkernelarg(kernel,2,sizeof(cl_mem),&buffer_outputsignal);

status = clsetkernelarg(kernel,3,sizeof(int),&inputsignalwidth);

status = clsetkernelarg(kernel,4,sizeof(int),&maskwidth);

size_t globalworksize[2];

globalworksize[0] = outputsignalwidth;

globalworksize[1] = outputsignalheight;

status = clenqueuendrangekernel(cmdqueue,kernel,2,null, globalworksize, null,0, null,null);

clenqueuereadbuffer(cmdqueue,buffer_outputsignal,cl_true,0, sizeof(cl_uint) * outputsignalwidth * outputsignalheight,outputsignal, 0, null, null);

//stop cpu till queue is finish

clfinish(cmdqueue);

// free opencl resources

clreleasekernel(kernel);

clreleasecommandqueue(cmdqueue);

clreleasememobject(buffer_inputsignal);

clreleasememobject(buffer_mask);

clreleasememobject(buffer_outputsignal);

}

and the following kernel-code for the convolution:

__kernel void convolve(__global uint * input, __global uint * mask, __global uint * output,, const int inputwidth,const int maskwidth)

{

const int x = get_global_id(0);

const int y = get_global_id(1);

uint sum = 0;

for (int r = 0; r < maskwidth; r++)

{

//inkrementieren des zeilenindex um breite der maske pro schleifendurchlauf

const int idxrow = (y + r) * inputwidth + x;

for (int c = 0; c < maskwidth; c++)

{

//zeilenweises falten

sum += mask[(r * maskwidth) + c] * input[idxrow + c];

}

}

output[y * get_global_size(0) + x] = sum;

}

When i call the function SoC_Convolution with values up to 600 it works but if setting the values above this causes an segmentation fault error when excecuting.

Can someone tell me whats the problem?

Thanks :)

1 Reply

  • Altera_Forum's avatar
    Altera_Forum
    Icon for Honored Contributor rankHonored Contributor

    You should insert printfs into your host code to find the exact function that is "segfault"ing. You are most likely overflowing the stack by declaring your data structure as "cl_uint inputSignal[inputSignalWidth][inputSignalHeight]"; allocating it using malloc will likely fix your problem.