Forum Discussion

offreitas's avatar
offreitas
Icon for New Contributor rankNew Contributor
4 years ago

Receiving an error when trying to use channels in OpenCL

Hello,

I'm trying to use a multikernel design and sending data through channels. However, I keep getting this error when trying to emulate my design:

host: acl_emulator.cpp:309: void* __acl_emulator_channel_dequeue(size_t): Assertion `!__acl_emulator_channel_empty(ch)' failed.

I can't find this "acl_emulator.cpp" archive anywhere. What am I probably doing wrong?

Thanks

3 Replies

  • HRZ's avatar
    HRZ
    Icon for Frequent Contributor rankFrequent Contributor

    That is an internal compiler error. If you can post your kernel code, it would be easier to find the issue.

    • offreitas's avatar
      offreitas
      Icon for New Contributor rankNew Contributor
      #include "../header/stencil.h"
      
      #define CHANNEL_SIZE 16
      
      // Enable channels
      #pragma OPENCL EXTENSION cl_intel_channels : enable
      
      // Floating point optimizations
      #pragma clang fp contract(fast)
      #pragma clang fp reassoc(on)
      
      /***********************************************************
      *********************** DATA TYPES *************************
      ***********************************************************/
      typedef struct {
      	float data[ACCESS_SIZE];
      } channelData;
      
      /***********************************************************
      ************************ CHANNELS **************************
      ***********************************************************/
      /*
       * Arguments
       */
      channel float3 fargs[TIME + 1] __attribute__((depth(0)));
      
      /*
       * Matrices
       */
      channel channelData prev[TIME + 1] __attribute__((depth(CHANNEL_SIZE)));
      channel channelData next[TIME + 1] __attribute__((depth(CHANNEL_SIZE)));
      channel channelData vel[TIME + 1]  __attribute__((depth(CHANNEL_SIZE)));
      
      /***********************************************************
      ********************* CONSTANTS KERNEL *********************
      ***********************************************************/
      __attribute__((max_global_work_dim(0)))
      __kernel void constants(const float div_dxSquared, const float div_dzSquared, const float dtSquared) {
      	/*
      	 * Transform constants into OpenCL's data struct
      	 */
      	// Output constants
      	float3 fconstants_out = (float3)(div_dxSquared, div_dzSquared, dtSquared);
      
      	// Inputs contants
      	float3 fconstants_in;
      
      	/*
      	 * Communication with channels
      	 */
      	// Writes on channels
      	write_channel_intel(fargs[0], fconstants_out);
      	mem_fence(CLK_CHANNEL_MEM_FENCE);
      
      	// Reads from channels
      	/*
      	 * In order to prevent compiler from inferring depth channels with constant's channels,
      	 * the code needs to create a false cycle of channels
      	 */
      	fconstants_in = read_channel_intel(fargs[TIME]);
      	mem_fence(CLK_CHANNEL_MEM_FENCE);
      }
      
      /***********************************************************
      ********************** READER KERNEL ***********************
      ***********************************************************/
      __kernel void reader(__global volatile float* restrict prev_base,
      		     __global volatile float* restrict next_base,
      		     __constant float* restrict vel_squared) {
      	/*
      	 * Variables to calculate index in grid
      	 */
      	uint group_offset = get_local_id(0) * ACCESS_SIZE;
      	uint x_offset = group_offset - 1;
      	uint y = get_global_id(1);
      	uint x_limit = ACCESS_SIZE + ORDER;
      
      	/*
      	 * Variable to send to PE
      	 */
      	channelData input;
      
      	for (uint i = 0; i < ACCESS_SIZE; i++) input.data[i] = 0.0f;
      
      	/*
      	 * Sends data to PE
      	 */
      	for (uint i = 0; i < x_limit; i++) {
      		uint real_x = x_offset + i;
      		uint index = y * NX + real_x;
      
      		if (real_x >= 0 && real_x < NX) {
      			input.data[i] = prev_base[index];
      		}
      	}
      
      	write_channel_intel(prev[0], input);
      }
      
      /***********************************************************
      ********************** WRITER KERNEL ***********************
      ***********************************************************/
      __kernel void writer(__global volatile float* restrict next_base) {
      	/*
      	 * Variables to calculate index in grid
      	 */
      	uint group_offset = get_local_id(0) * ACCESS_SIZE;
      	uint x_offset = group_offset - 1;
      	uint y = get_global_id(1);
      	uint x_limit = ACCESS_SIZE + ORDER;
      
      	/*
      	 * Reads from PE
      	 */
      	channelData output = read_channel_intel(prev[TIME]);
      
      	/*
      	 * Writes matrix
      	 */
      	for (uint i = 0; i < x_limit; i++) {
      		uint real_x = x_offset + i;
      		uint index = y * NX + real_x;
      
      		if (real_x >= 0 && real_x < NX) {
      			next_base[index] = output.data[i];
      		}
      	}
      }
      
      /***********************************************************
      ******************* PROCESSING ELEMENTS ********************
      ***********************************************************/
      __attribute__((max_global_work_dim(0)))
      __attribute__((autorun))
      __attribute__((num_compute_units(TIME, 1, 1)))
      __kernel void PE() {
      	/*
      	 * Gets current and next PE
      	 */
      	const uint id = get_compute_id(0);
      	const uint next_id = id + 1;
      
      	/*
      	 * Reads arguments
      	 */
      	const float3 fconstants = read_channel_intel(fargs[id]);
      	mem_fence(CLK_CHANNEL_MEM_FENCE);
      
      	/*
      	 * Writes arguments on next channel
      	 */
      	write_channel_intel(fargs[next_id], fconstants);
      
      	/*
      	 * Memory Allocation
      	 */
      	// Shift registers
      	__local float __attribute__((doublepump,
      				     memory("MLAB"),
      				     bankwidth(4))) sr_prev[SR_SIZE],
      		      				    sr_next[SR_SIZE],
      		      				    sr_vel[SR_SIZE];
      
      	const float div_dxSquared = fconstants.s0;
      	const float div_dzSquared = fconstants.s1;
      	const float dtSquared = fconstants.s2;
      
      	// Channel communication
      	channelData input;
      	channelData output;
      
      	// Times channel will be read
      	const uint count = 2 * GROUPS + 1;
      
      	/*
      	 * Computation
      	 */
      	// Index of shift registers
      	uint index = 0;
      
      	for (uint i = 0; i < count; i++) {
      		input = read_channel_intel(prev[id]);
      
      		for (uint pos = 0; pos < ACCESS_SIZE; pos++)
      			sr_prev[index++] = input.data[pos];
      	}
      
      	for (uint i = 0; i < GROUPS * NZ - count; i++) {
      		for (uint group = 0; group < GROUPS; group++) {}
      		// Writes future discarded values in channels
      		for (uint pos = 0; pos < ACCESS_SIZE; pos++)
      			output.data[pos] = sr_prev[pos];
      		
      		write_channel_intel(prev[next_id], output);
      
      		// Shifts registers
      		#pragma unroll
      		for (uint pos = 0; pos < SR_LIMIT; pos++) {
      			uint shift_pos = ACCESS_SIZE + pos;
      
      			sr_prev[pos] = sr_prev[shift_pos];
      		}
      
      		// Read from channels
      		input = read_channel_intel(prev[id]);
      
      		for (uint pos = SR_LIMIT; pos < SR_SIZE; pos++)
      			sr_prev[pos] = input.data[pos - SR_LIMIT];
      	}
      }