Forum Discussion
HRZ
Frequent Contributor
4 years agoThat is an internal compiler error. If you can post your kernel code, it would be easier to find the issue.
offreitas
New Contributor
4 years ago#include "../header/stencil.h" #define CHANNEL_SIZE 16 // Enable channels #pragma OPENCL EXTENSION cl_intel_channels : enable // Floating point optimizations #pragma clang fp contract(fast) #pragma clang fp reassoc(on) /*********************************************************** *********************** DATA TYPES ************************* ***********************************************************/ typedef struct { float data[ACCESS_SIZE]; } channelData; /*********************************************************** ************************ CHANNELS ************************** ***********************************************************/ /* * Arguments */ channel float3 fargs[TIME + 1] __attribute__((depth(0))); /* * Matrices */ channel channelData prev[TIME + 1] __attribute__((depth(CHANNEL_SIZE))); channel channelData next[TIME + 1] __attribute__((depth(CHANNEL_SIZE))); channel channelData vel[TIME + 1] __attribute__((depth(CHANNEL_SIZE))); /*********************************************************** ********************* CONSTANTS KERNEL ********************* ***********************************************************/ __attribute__((max_global_work_dim(0))) __kernel void constants(const float div_dxSquared, const float div_dzSquared, const float dtSquared) { /* * Transform constants into OpenCL's data struct */ // Output constants float3 fconstants_out = (float3)(div_dxSquared, div_dzSquared, dtSquared); // Inputs contants float3 fconstants_in; /* * Communication with channels */ // Writes on channels write_channel_intel(fargs[0], fconstants_out); mem_fence(CLK_CHANNEL_MEM_FENCE); // Reads from channels /* * In order to prevent compiler from inferring depth channels with constant's channels, * the code needs to create a false cycle of channels */ fconstants_in = read_channel_intel(fargs[TIME]); mem_fence(CLK_CHANNEL_MEM_FENCE); } /*********************************************************** ********************** READER KERNEL *********************** ***********************************************************/ __kernel void reader(__global volatile float* restrict prev_base, __global volatile float* restrict next_base, __constant float* restrict vel_squared) { /* * Variables to calculate index in grid */ uint group_offset = get_local_id(0) * ACCESS_SIZE; uint x_offset = group_offset - 1; uint y = get_global_id(1); uint x_limit = ACCESS_SIZE + ORDER; /* * Variable to send to PE */ channelData input; for (uint i = 0; i < ACCESS_SIZE; i++) input.data[i] = 0.0f; /* * Sends data to PE */ for (uint i = 0; i < x_limit; i++) { uint real_x = x_offset + i; uint index = y * NX + real_x; if (real_x >= 0 && real_x < NX) { input.data[i] = prev_base[index]; } } write_channel_intel(prev[0], input); } /*********************************************************** ********************** WRITER KERNEL *********************** ***********************************************************/ __kernel void writer(__global volatile float* restrict next_base) { /* * Variables to calculate index in grid */ uint group_offset = get_local_id(0) * ACCESS_SIZE; uint x_offset = group_offset - 1; uint y = get_global_id(1); uint x_limit = ACCESS_SIZE + ORDER; /* * Reads from PE */ channelData output = read_channel_intel(prev[TIME]); /* * Writes matrix */ for (uint i = 0; i < x_limit; i++) { uint real_x = x_offset + i; uint index = y * NX + real_x; if (real_x >= 0 && real_x < NX) { next_base[index] = output.data[i]; } } } /*********************************************************** ******************* PROCESSING ELEMENTS ******************** ***********************************************************/ __attribute__((max_global_work_dim(0))) __attribute__((autorun)) __attribute__((num_compute_units(TIME, 1, 1))) __kernel void PE() { /* * Gets current and next PE */ const uint id = get_compute_id(0); const uint next_id = id + 1; /* * Reads arguments */ const float3 fconstants = read_channel_intel(fargs[id]); mem_fence(CLK_CHANNEL_MEM_FENCE); /* * Writes arguments on next channel */ write_channel_intel(fargs[next_id], fconstants); /* * Memory Allocation */ // Shift registers __local float __attribute__((doublepump, memory("MLAB"), bankwidth(4))) sr_prev[SR_SIZE], sr_next[SR_SIZE], sr_vel[SR_SIZE]; const float div_dxSquared = fconstants.s0; const float div_dzSquared = fconstants.s1; const float dtSquared = fconstants.s2; // Channel communication channelData input; channelData output; // Times channel will be read const uint count = 2 * GROUPS + 1; /* * Computation */ // Index of shift registers uint index = 0; for (uint i = 0; i < count; i++) { input = read_channel_intel(prev[id]); for (uint pos = 0; pos < ACCESS_SIZE; pos++) sr_prev[index++] = input.data[pos]; } for (uint i = 0; i < GROUPS * NZ - count; i++) { for (uint group = 0; group < GROUPS; group++) {} // Writes future discarded values in channels for (uint pos = 0; pos < ACCESS_SIZE; pos++) output.data[pos] = sr_prev[pos]; write_channel_intel(prev[next_id], output); // Shifts registers #pragma unroll for (uint pos = 0; pos < SR_LIMIT; pos++) { uint shift_pos = ACCESS_SIZE + pos; sr_prev[pos] = sr_prev[shift_pos]; } // Read from channels input = read_channel_intel(prev[id]); for (uint pos = SR_LIMIT; pos < SR_SIZE; pos++) sr_prev[pos] = input.data[pos - SR_LIMIT]; } }