--- Quote Start ---
Are you unrolling your loops? Post your complete kernel since the snippet you have posted should compile correctly.
--- Quote End ---
Below is my kernel code:
# define M 100
channel float c0 __attribute__((depth(2)));
__kernel void mapreduce_page_rank(__global float* restrict page_ranks,
int n,
const __global int* restrict pages,
const __global unsigned int* restrict noutlinks){
int i, j, t;
float new_rank;
float d_factor = 0.85;
float cur;
float maps;
float outbound_rank;
float new_rank_copies[M];
# pragma unroll
for (i = 0; i < M; i ++){
new_rank_copies
= 0;
}
for(i=0; i<n; ++i){
new_rank = 0.0;
for(j=0; j<n; ++j){
outbound_rank = page_ranks[j] / (double)noutlinks[j];
maps = pages[i*n+j] * outbound_rank;
cur = new_rank_copies[m-1] + maps;
# pragma unroll
for(int c = m-1; c > 0; c--){
new_rank_copies[c] = new_rank_copies[c - 1];
}
new_rank_copies[0] = cur;
}
# pragma unroll
for(int j = 0; j < m; j ++){
new_rank += new_rank_copies[j];
new_rank_copies[j] = 0;
}
new_rank = ((1-d_factor)/n)+(d_factor*new_rank);
write_channel_altera(c0, new_rank);
}
}
__kernel void produce_page_rank(__global float* restrict page_ranks,
int n,
const __global int* restrict pages,
const __global unsigned int* restrict noutlinks){
int i, j, t;
float new_rank;
float d_factor = 0.85;
float cur;
float maps;
float outbound_rank;
float new_rank_copies[m];
# pragma unroll
for (i = 0; i < m; i ++){
new_rank_copies = 0;
}
for(int i = 0; i < n; i ++){
page_ranks[i] = read_channel_altera(c0);
}
for(i=0; i<n; ++i){
new_rank = 0.0;
for(j=0; j<n; ++j){
outbound_rank = page_ranks[j] / (double)noutlinks[j];
maps = pages[i*n+j] * outbound_rank;
cur = new_rank_copies[M-1] + maps;
# pragma unroll
for(int c = M-1; c > 0; c--){
new_rank_copies[c] = new_rank_copies[c - 1];
}
new_rank_copies[0] = cur;
}
# pragma unroll
for(int j = 0; j < M; j ++){
new_rank += new_rank_copies[j];
new_rank_copies[j] = 0;
}
new_rank = ((1-d_factor)/n)+(d_factor*new_rank);
write_channel_altera(c0, new_rank);
}
}