Forum Discussion
Altera_Forum
Honored Contributor
8 years ago --- Quote Start --- 1) Is there any error output? 2) What hardware are you using? 3) Did you use a reference design? --- Quote End --- --- Quote Start --- This is a very normal and recurring situation; Altera's emulator has a lot of limitations, first and foremost the fact that it doesn't emulate concurrency/parallelism. When channels are used, it is very likely that if you are not careful enough, your code will block on the FPGA even though it works fine on the emulator. Debugging the code in this situation is not easy. You can try these steps to see if you can find the problem: 1- Add some counters to your code in the emulator and make sure you are writing the same number of values to every channel, that you are reading from it. 2- Pay attention to the "order" of your channels, try to think of situations that if the channels are reordered, your code may block. It is very likely that this is your problem since the compiler does NOT guarantee channel ordering. Carefully read the "Programming Guide > 1.6.4.5.7 Enforcing the Order of Channel Calls" and try using "mem_fence(CLK_CHANNEL_MEM_FENCE)" to force the order of your channels and see if it fixes the issue. 3- Increasing the depth of channels might help. 4- You can add printf to your OpenCL kernel and run it on the FPGA; even though it will heavily slow down the kernel, it might help you find the channel that is blocking the execution. Try to use a light printf (avoid printing values from the kernel, just print a fixed text to see where it is blocking) to avoid new dependencies. --- Quote End --- Thanks for your reply! I want to printf some informations, but the informations are not display.Do you know why?Does it means my code did not reach there before blocked? My code as belows: # pragma OPENCL_EXTENSION cl_altera_channels : enable // Channel declarations channel float DATA_IN __attribute__((depth(8))); channel float DATA_OUT __attribute__((depth(8))); channel float CONV1_WEIGHTS __attribute__((depth(8))); channel float CONV1_BIAS __attribute__((depth(8))); //num_channel = num_pre_feature_maps!!!!! __kernel void data_in(int num_pre_feature_maps, int num_feature_maps, __global const float *restrict input) { printf("data in\n"); int global_idx = get_global_id(0); int global_idy = get_global_id(1); int global_offset = global_idy * get_local_size(0) + global_idx; printf("data in global_idx=%d global_idy=%d\n", global_idx, global_idy); //Read data float data = input[global_offset]; for(int i = 0; i < num_feature_maps; ++i){ for(int j = 0; j < num_pre_feature_maps; ++j){ printf("channel read datanum_feature_map=%d global_idx=%d global_idy=%d\n", i, global_idx, global_idy); write_channel_altera(DATA_IN, data); } } } __kernel void weights_bias_in(__global const float *restrict weights, __global const float *restrict bias) { printf("here\n"); int global_idx = get_global_id(0); int global_idy = get_global_id(1); //Read Weights write_channel_altera(CONV1_WEIGHTS, weights[global_idy * KERNEL_SIZE * KERNEL_SIZE + global_idx]); //Read Bias if(global_idx == 0){ printf("gidx=%d gidy=%d\n", global_idx, global_idy); write_channel_altera(CONV1_BIAS, bias[global_idy]); } } __kernel void conv(int map_size, int num_pre_feature_maps, int num_feature_maps, int relu_on) { printf("conv start\n"); float res_buf[28 * INPUT_SIZE]; float weights_buf[KERNEL_SIZE * KERNEL_SIZE]; float bias_buf[MAX_NUM_FEATURE_MAPS]; float rows[4 * INPUT_SIZE + 5]; //load bias for(int i = 0; i < num_feature_maps; ++i){ printf("i=%d\n", i); bias_buf = read_channel_altera(conv1_bias);
//printf("bias data=%f\n", bias_buf); } //printf("End of load bias\n"); //int input_size = map_size + KERNEL_SIZE - 1; for(int num_feature_map = 0; num_feature_map < num_feature_maps; ++num_feature_map){ for(int i = 0; i < KERNEL_SIZE * KERNEL_SIZE; ++i){ weights_buf = read_channel_altera(conv1_weights);
printf("i=%d w=%f\n", i, weights_buf); } for(int i = 0; i < 1024; ++i){ for(int j = (4 * INPUT_SIZE + 4); j >0; --j){ res_buf[j] = res_buf[j - 1]; } res_buf[0] = read_channel_altera(DATA_IN); } //test for(int i = 0; i < map_size; ++i){ for(int j = 0; j < map_size; ++j){ double res = res_buf[i * INPUT_SIZE + j] + weights_buf[num_feature_map] + bias_buf[num_feature_map]; if(relu_on){ res = (int)res > 0.0f ? (int)res : 0.0f; } printf("write channel i=%d j=%d res=%f\n", i, j, res); write_channel_altera(DATA_OUT, res); } } } } __kernel void data_out(int num_feature_maps, __global float *restrict output) { printf("data out start\n"); int local_idx = get_local_id(0); int local_idy = get_local_id(1); int global_idx = get_global_id(0); int global_idy = get_global_id(1); printf("data out global_idx=%d global_idy=%d\n", global_idx, global_idy); for(int i = 0; i < num_feature_maps; ++i){ printf("data out num_feature_map=%d global_idx=%d global_idy=%d\n", i, global_idx, global_idy); float data = read_channel_altera(DATA_OUT); //float data = 1; int global_offset = global_idy * get_local_size(0) + global_idx; output[global_offset] = data; output += get_global_size(1) * get_global_size(1); } } Can you find any problem?? Thank you!