//ACL Kernel #define IDX(i, j, n) ((i) * (n) + (j)) //#include __kernel void PushKernel( uint column,__global int * restrict height, __global int * restrict excessFlow,__global int * restrict netFlowOutS, __global int * restrict netFlowInT,uint s,uint t,uint row, __global int * restrict residualFlow_up,__global int * restrict residualFlow_down, __global int * restrict residualFlow_right,__global int * restrict residualFlow_left) { const uint num_column=6; const uint num_row=4; int FlowOutS=*netFlowOutS; int FlowInT=*netFlowInT; uint source=s; uint destination=t; uint index; __local int heights_horizontal_cache[6]; __local int excessFlow_horizontal_cache[6]; __local int excessFlow_horizontal_cache_temp[6]; __local int residualFlow_right_cache[6]; __local int residualFlow_left_cache[6]; __local int outS_cache; //#pragma unroll //#pragma loop_coalesce #pragma ivdep //#pragma ii 1 for(int i=0; i0 && residualFlow_right_cache[j]>0 && heights_horizontal_cache[j]==heights_horizontal_cache[j+1]+1){ int delta = min(excessFlow_horizontal_cache[j], residualFlow_right_cache[j]); residualFlow_right_cache[j]-=delta; residualFlow_left_cache[j+1]+=delta; excessFlow_horizontal_cache[j]-=delta; //excessFlow_horizontal_cache[j+1]+=delta; excessFlow_horizontal_cache_temp[j+1]=delta; if (IDX(i, j+1, num_column) == s) { //FlowOutS-=delta; outS_cache=delta; } else if (IDX(i, j+1, num_column) == t) { FlowInT+=delta;} } ///////////////////////////////////////////////////////////////////////results back to global //mem_fence(CLK_GLOBAL_MEM_FENCE); } #pragma unroll #pragma ivdep for(int j=0; j