Altera_Forum
Honored Contributor
7 years agomultiple kernel in a .cl file
I have a question about in appended single CL file case,
1、in followed CL file ,when only have clip_8b_kernel、ITransformOne_kernel、GetSSE_kernel and if put ITransformOne_kernel before GetSSE_kernel will get an error as followed if place them in the order of clip_8b_kernel、GetSSE_kernel、ITransformOne_kernel can compile successfully . 2、this CL file include multiple kernel and i find If compile any of these kernels separately have no error but the kernel of ITransform_C_kernel and ITransform_one_kernel Cannot exist simultaneously, if exist simultaneously it will have the error followed. error content: aoc: Linking with IP library ... /build/swbuild/SJ/nightly/17.0/290/l64/p4/acl/llvm/include/acl/Analysis/LocalMemSizing.h(83) ******* Requested memsize for unavailable aspace m_local_mem_size_per_workgroup.count(aspace) FAILED 0 libLLVM-3.0.so 0x00007f348ee2846f 1 libLLVM-3.0.so 0x00007f348ee2a3e2 2 libpthread.so.0 0x00007f348da905e0 3 libLLVM-3.0.so 0x00007f348f617ef9 acl::get_bits(llvm::Value const*, llvm::TargetData*, acl::LocalMemSizing*, bool, bool, bool) + 7289 4 libLLVM-3.0.so 0x00007f348e37a880 grif::Pass_AreaReport::add_private_var_resources() + 8800 5 libLLVM-3.0.so 0x00007f348e37c7e1 grif::Pass_AreaReport::runPass(grif::XNode*) + 913 6 libLLVM-3.0.so 0x00007f348e34c58a acl::DSDKGenerate::runOnModule(llvm::Module&) + 8010 7 libLLVM-3.0.so 0x00007f348f049fc1 llvm::MPPassManager::runOnModule(llvm::Module&) + 577 8 libLLVM-3.0.so 0x00007f348f04a16b llvm::PassManagerImpl::run(llvm::Module&) + 187 9 aocl-llc 0x000000000040be40 main + 5360 10 libc.so.6 0x00007f348ca9ec05 __libc_start_main + 245 11 aocl-llc 0x00000000004098e9 Stack dump: 0. Program arguments: /home/****/inteldevstack/intelFPGA_pro/hld/linux64/bin/aocl-llc -march=griffin -board /home/****/inteldevstack/a10_gx_pac_ias_1_0_prq/opencl/opencl_bsp/hardware/pac_a10/board_spec.xml -dbg-info-enabled test.bc -o test.v 1. Running pass 'Generate DSDK netlist and HDL' on module 'test.bc'. Error: Verilog generator FAILED. Refer to test/test.log for details. cl file: typedef uchar uint8_t; typedef short uint16_t; typedef int uint32_t;# define clip_8b_const_v_LEN 1# define clip_8b_out_clip_8b_return_LEN 1# define ITransformOne_in_ref_LEN 200# define ITransformOne_in_in_LEN 200# define ITransformOne_inout_dst_LEN 200# define GetSSE_in_a_LEN 200# define GetSSE_in_b_LEN 200# define GetSSE_const_w_LEN 1# define GetSSE_const_h_LEN 1# define GetSSE_out_GetSSE_return_LEN 1# define ITransform_C_in_ref_LEN 200# define ITransform_C_in_in_LEN 200# define ITransform_C_inout_dst_LEN 200# define ITransform_C_const_do_two_LEN 1 /****** VP8Calc -- VP8Transform *************************************************/# define WEBP_INLINE inline# define BPS 32 static WEBP_INLINE uchar clip_8b(int v) { return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; } # define STORE(x, y, v) dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))# define kC1 (20091 + (1 << 16)) // DF: NEW# define kC2 35468 // DF: NEW# define MUL(a, b) (((a) * (b)) >> 16) static WEBP_INLINE void ITransformOne(const uint8_t * ref, const uint16_t* in, uint8_t* dst) { int C[4 * 4], *tmp; int i; tmp = C; for (i = 0; i < 4; ++i) { // vertical pass const int a = in[0] + in[8]; const int b = in[0] - in[8]; const int c = MUL(in[4], kC2) - MUL(in[12], kC1); const int d = MUL(in[4], kC1) + MUL(in[12], kC2); tmp[0] = a + d; tmp[1] = b + c; tmp[2] = b - c; tmp[3] = a - d; tmp += 4; in++; } tmp = C; for (i = 0; i < 4; ++i) { // horizontal pass const int dc = tmp[0] + 4; const int a = dc + tmp[8]; const int b = dc - tmp[8]; const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); STORE(0, i, a + d); STORE(1, i, b + c); STORE(2, i, b - c); STORE(3, i, a - d); tmp++; } } void ITransform_C(const uint8_t * ref,const uint16_t * in,uint8_t * dst, int do_two) { ITransformOne(ref, in, dst); if (do_two) { ITransformOne(ref+ 4, in + 16, dst+ 4); } } static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, int w, int h) { int count = 0; int y, x; for (y = 0; y < h; ++y) { // DF: TBC, may need to unroll this for (x = 0; x < w; ++x) { const int diff = (int)a[x] - b[x]; count += diff * diff; } a += BPS; b += BPS; } return count; } __kernel void clip_8b_kernel( int v,__global uchar* restrict clip_8b_return){ printf("start func:clip_8b\n"); uchar clip_8b_return_[clip_8b_out_clip_8b_return_LEN]; clip_8b_return[0] = clip_8b(v); for(int i = 0;i<clip_8b_out_clip_8b_return_LEN;i++){ clip_8b_return = clip_8b_return_; } } __kernel void ITransformOne_kernel(__global uint8_t * restrict ref,__global uint16_t* restrict in,__global uint8_t* restrict dst){ printf("start func:ITransformOne\n"); uint8_t ref_[ITransformOne_in_ref_LEN]; uint16_t in_[ITransformOne_in_in_LEN]; uint8_t dst_[ITransformOne_inout_dst_LEN]; for(int i = 0;i<ITransformOne_in_ref_LEN;i++){ ref_ = ref; } for(int i = 0;i<ITransformOne_in_in_LEN;i++){ in_ = in; } for(int i = 0;i<ITransformOne_inout_dst_LEN;i++){ dst_ = dst; } ITransformOne(ref_,in_,dst_); for(int i = 0;i<ITransformOne_inout_dst_LEN;i++){ dst = dst_; } } __kernel void GetSSE_kernel(__global uint8_t* restrict a,__global uint8_t* restrict b, int w, int h,__global int* restrict GetSSE_return){ printf("start func:GetSSE\n"); uint8_t a_[GetSSE_in_a_LEN]; uint8_t b_[GetSSE_in_b_LEN]; int GetSSE_return_[GetSSE_out_GetSSE_return_LEN]; for(int i = 0;i<GetSSE_in_a_LEN;i++){ a_ = a; } for(int i = 0;i<GetSSE_in_b_LEN;i++){ b_ = b; } GetSSE_return[0] = GetSSE(a_,b_,w,h); for(int i = 0;i<GetSSE_out_GetSSE_return_LEN;i++){ GetSSE_return = getsse_return_; } } __kernel void ITransform_C_kernel(__global uint8_t * restrict ref,__global uint16_t * restrict in,__global uint8_t * restrict dst, int do_two){ printf("start func:ITransform_C\n"); uint8_t ref_[ITransform_C_in_ref_LEN]; uint16_t in_[ITransform_C_in_in_LEN]; uint8_t dst_[ITransform_C_inout_dst_LEN]; for(int i = 0;i<ITransform_C_in_ref_LEN;i++){ ref_ = ref; } for(int i = 0;i<ITransform_C_in_in_LEN;i++){ in_ = in; } for(int i = 0;i<ITransform_C_inout_dst_LEN;i++){ dst_ = dst; } ITransform_C(ref_,in_,dst_,do_two); for(int i = 0;i<ITransform_C_inout_dst_LEN;i++){ dst = dst_; } } who can help me? thanks in advance.