@BoonBengT_Altera Thank you for your reply,
This is my code snippet:
The OpenCl init and release function:
bool init()
{
cl_int status;
if(!setCwdToExeDir()) {
return false;
}
// Get the OpenCL platform.
platform = findPlatform("Intel(R) FPGA SDK for OpenCL(TM)");
if(platform == NULL) {
printf("ERROR: Unable to find Intel(R) FPGA OpenCL platform.\n");
return false;
}
// User-visible output - Platform information
{
char char_buffer[STRING_BUFFER_LEN];
printf("Querying platform for info:\n");
printf("★★★★★★★★★★★★★★★★★★★★★★★\n");
clGetPlatformInfo(platform, CL_PLATFORM_NAME, STRING_BUFFER_LEN, char_buffer, NULL);
printf("%-40s ★%s★\n", "CL_PLATFORM_NAME", char_buffer);
clGetPlatformInfo(platform, CL_PLATFORM_VERSION, STRING_BUFFER_LEN, char_buffer, NULL);
printf("%-40s ★%s★\n\n", "CL_PLATFORM_VERSION ", char_buffer);
}
// Query the available OpenCL devices.
scoped_array<cl_device_id> devices;
cl_uint num_devices;
devices.reset(getDevices(platform, CL_DEVICE_TYPE_ALL, &num_devices));
// We'll just use the first device.
device = devices[0];
// Create the context.&oclContextCallback
context = clCreateContext(NULL, 1, &device, &oclContextCallback, NULL, &status);
checkError(status, "Failed to create context");
// Create the command queue.
queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status);
checkError(status, "Failed to create command queue");
// Create the program.
std::string binary_file = getBoardBinaryFile("No1k", device);
printf("Using AOCX: %s\n", binary_file.c_str());
program = createProgramFromBinary(context, binary_file.c_str(), &device, 1);
// Build the program that was just created.
status = clBuildProgram(program, 0, NULL, "", NULL, NULL);
checkError(status, "Failed to build program");
// Create the kernel - name passed in here must match kernel name in the
// original CL file, that was compiled into an AOCX file using the AOC tool
const char *kernel_kr = "kr"; // Kernel name, as defined in the CL file
kr = clCreateKernel(program, kernel_kr, &status);
checkError(status, "Failed to create kernel kr");
const char *kernel_kk = "kk"; // Kernel name, as defined in the CL file
kk = clCreateKernel(program, kernel_kk, &status);
checkError(status, "Failed to create kernel kk");
const char *kernel_name = "thirdKernel"; // Kernel name, as defined in the CL file
kernel = clCreateKernel(program, kernel_name, &status);
checkError(status, "Failed to create kernel");
printf("★ OpenCL init successfully!\n");
return true;
}
void cleanup()
{
if(kr) {
clReleaseKernel(kr);
}
if(kk) {
clReleaseKernel(kk);
}
if(kernel) {
clReleaseKernel(kernel);
}
if(program) {
clReleaseProgram(program);
}
if(queue) {
clReleaseCommandQueue(queue);
}
if(context) {
clReleaseContext(context);
}
printf("★ OpenCL clean successfully!\n");
}
The main function:
float *temp = (float *) malloc(sizeof(float) * dim * dim);
for (unsigned int m = 0; m < dim; m++) {
for (unsigned int n = 0; n < dim; n++) {
temp[m * dim + n] = mat[m * dim + n];
if (m == n) {
result[m * dim + n] = 1.0f;
} else {
result[m * dim + n] = 0.0f;
}
}
}
cl_int err = 0;
cl_mem mem_temp = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, sizeof(float) * dim *dim,
temp,
&err);
checkError(err, "Failed to CreateBuffer mem_temp");
cl_mem mem_ratio = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * dim,
NULL,
&err);
checkError(err, "Failed to CreateBuffer mem_ratio");
cl_mem mem_res = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, sizeof(float) * dim *dim,
result,
&err);
checkError(err, "Failed to CreateBuffer mem_res");
size_t work_item;
size_t work_group;
if (dim <= 300) {
work_group = dim * dim;
work_item = dim;
} else {
work_item = 300;
work_group = dim * dim;
}
size_t global_work_size[] = {work_group};
size_t local_work_size[] = {work_item};
for (unsigned int i = 0; i < dim; i++) {
err = clSetKernelArg(kr, 0, sizeof(cl_mem), &mem_temp);
checkError(err, "Failed to SetKernelArg kr mem_temp");
err = clSetKernelArg(kr, 1, sizeof(cl_mem), &mem_ratio);
checkError(err, "Failed to SetKernelArg kr mem_ratio");
err = clSetKernelArg(kr, 2, sizeof(cl_uint), (void *) &i);
checkError(err, "Failed to SetKernelArg kr i");
err = clSetKernelArg(kr, 3, sizeof(cl_uint), (void *) &dim);
checkError(err, "Failed to SetKernelArg kr dim");
err = clEnqueueNDRangeKernel(queue, kr, 1, NULL, &global_work_size[0], &local_work_size[0], 0, NULL, NULL);
checkError(err, "Failed to EnqueueNDRangeKernel kr");
err = clSetKernelArg(kk, 0, sizeof(cl_mem), &mem_res);
checkError(err, "Failed to SetKernelArg kk mem_res");
err = clSetKernelArg(kk, 1, sizeof(cl_mem), &mem_temp);
checkError(err, "Failed to SetKernelArg kk mem_temp");
err = clSetKernelArg(kk, 2, sizeof(cl_mem), &mem_ratio);
checkError(err, "Failed to SetKernelArg kk mem_ratio");
err = clSetKernelArg(kk, 3, sizeof(cl_uint), (void *) &i);
checkError(err, "Failed to SetKernelArg kk i");
err = clSetKernelArg(kk, 4, sizeof(cl_uint), (void *) &dim);
checkError(err, "Failed to SetKernelArg kk dim");
err = clEnqueueNDRangeKernel(queue, kk, 1, NULL, &global_work_size[0], &local_work_size[0], 0, NULL, NULL);
checkError(err, "Failed to EnqueueNDRangeKernel kk");
}
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_res);
checkError(err, "Failed to SetKernelArg mem_res");
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem_temp);
checkError(err, "Failed to SetKernelArg mem_temp");
err = clSetKernelArg(kernel, 2, sizeof(cl_uint), (void *) &dim);
checkError(err, "Failed to SetKernelArg dim");
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size[0], &local_work_size[0], 0, NULL, NULL);
checkError(err, "Failed to EnqueueNDRangeKernel");
err = clFinish(queue);
checkError(err, "Failed to Finish");
err = clEnqueueReadBuffer(queue, mem_res, CL_TRUE, 0, sizeof(float) * dim * dim, result, 0, NULL, NULL);
checkError(err, "Failed to ReadBuffer mem_res");
free(temp);
err = clReleaseMemObject(mem_res);
checkError(err, "Failed to ReleaseMemObject mem_res");
err = clReleaseMemObject(mem_temp);
checkError(err, "Failed to ReleaseMemObject mem_temp");
err = clReleaseMemObject(mem_ratio);
checkError(err, "Failed to ReleaseMemObject mem_ratio");
As you can see, I didn't create any cl_event in the code, but in the callback function, it prompts too many events in the host.
This is my compilation command that I used in Makefile:
ifeq ($(VERBOSE),1)
ECHO :=
else
ECHO := @
endif
# Where is the Intel(R) FPGA SDK for OpenCL(TM) software?
ifeq ($(wildcard $(INTELFPGAOCLSDKROOT)),)
$(error Set INTELFPGAOCLSDKROOT to the root directory of the Intel(R) FPGA SDK for OpenCL(TM) software installation)
endif
ifeq ($(wildcard $(INTELFPGAOCLSDKROOT)/host/include/CL/opencl.h),)
$(error Set INTELFPGAOCLSDKROOT to the root directory of the Intel(R) FPGA SDK for OpenCL(TM) software installation.)
endif
# OpenCL compile and link flags.
AOCL_COMPILE_CONFIG := $(shell aocl compile-config --arm)
#AOCL_LINK_CONFIG := $(shell aocl link-config --arm)
AOCL_LINK_CONFIG := $(wildcard $(INTELFPGAOCLSDKROOT)/host/arm32/lib/*.so) $(wildcard $(AOCL_BOARD_PACKAGE_ROOT)/arm32/lib/*.so)
# Compilation flags
ifeq ($(DEBUG),1)
CXXFLAGS += -g
else
CXXFLAGS += -O3
endif
# Compiler. ARM cross-compiler.
CXX := arm-linux-gnueabihf-g++
# Target
TARGET := host
TARGET_DIR := bin
TARGET_NAME := time_FPGA_trasform_and_kernel_exe
# Directories
INC_DIRS := ../common/inc
LIB_DIRS :=
# Files
INCS := $(wildcard )
SRCS := $(wildcard host/src/*.cpp ../common/src/AOCLUtils/*.cpp)
LIBS := rt pthread
# Make it all!
all : $(TARGET_DIR)/$(TARGET)
# Host executable target.
$(TARGET_DIR)/$(TARGET) : Makefile $(SRCS) $(INCS) $(TARGET_DIR)
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -Wall -fPIC $(foreach D,$(INC_DIRS),-I$D) \
$(AOCL_COMPILE_CONFIG) $(SRCS) $(AOCL_LINK_CONFIG) \
$(foreach D,$(LIB_DIRS),-L$D) \
$(foreach L,$(LIBS),-l$L) \
-o $(TARGET_DIR)/$(TARGET_NAME)
$(TARGET_DIR) :
$(ECHO)mkdir $(TARGET_DIR)
# Standard make targets
clean :
$(ECHO)rm -f $(TARGET_DIR)/$(TARGET_NAME)
.PHONY : all clean
Thanks for your help.