Altera_Forum
Honored Contributor
8 years agoProfiling autorun kernel without --profile
Hi, I have a multi-kernel design that goes
Reader -> (autorun) CU_0 -> (autorun) CU_1 -> (autorun) CU_2 -> Writer (where CU_0, CU_1 and CU_2 are the same). And I'm attempting to get the execution time of every kernel in the design (have already used --profile but require run at full Freq). below is a snippet below, but I wanted to check that using the time_start2 - time_end1 is correct as I haven't found any examples of using events across multiple commandqueues and the fluctuation in results appears rather large. Cheers Sam#################### Averages# ####################
Reader Execution Time
min 6812, avg 7308, max, 11493
Processing Execution Time
min 9135, avg 36094, max, 94796
Writer Execution Time
min 6614, avg 7063, max, 9013
Total Execution Time
min 22657, avg 50466, max, 110813# ##################################################
...
uint64_t min = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF};
uint64_t avg = {0};
uint64_t max = {0};
# define N 200
for (uint i = 0; i < N; i++) {
status = clEnqueueTask(queue1, kernel1, 0, NULL, &kernel_event1);
status = clEnqueueTask(queue2, kernel2, 0, NULL, &kernel_event2);
checkError(status, "Failed to launch kernel");
clFinish(queue1);
clFinish(queue2);
uint64_t time_start1, time_end1, time_start2, time_end2;
uint64_t reader_time_ms, processing_time_ms, writer_time_ms, total_time_ms;
clGetEventProfilingInfo(kernel_event1, CL_PROFILING_COMMAND_START, sizeof(uint64_t), &time_start1, NULL);
clGetEventProfilingInfo(kernel_event1, CL_PROFILING_COMMAND_END, sizeof(uint64_t), &time_end1, NULL);
clGetEventProfilingInfo(kernel_event2, CL_PROFILING_COMMAND_START, sizeof(uint64_t), &time_start2, NULL);
clGetEventProfilingInfo(kernel_event2, CL_PROFILING_COMMAND_END, sizeof(uint64_t), &time_end2, NULL);
if (i > 5) {
reader_time_ms = (time_end1 - time_start1);
avg += reader_time_ms;
if (reader_time_ms > max)
max = reader_time_ms;
if (reader_time_ms < min)
min = reader_time_ms;
processing_time_ms = (time_start2 - time_end1);
avg += processing_time_ms;
if (processing_time_ms > max)
max = processing_time_ms;
if (processing_time_ms < min)
min = processing_time_ms;
writer_time_ms = (time_end2 - time_start2);
avg += writer_time_ms;
if (writer_time_ms > max)
max = writer_time_ms;
if (writer_time_ms < min)
min = writer_time_ms;
total_time_ms = (time_end2 - time_start1);
avg += total_time_ms;
if (total_time_ms > max)
max = total_time_ms;
if (total_time_ms < min)
min = total_time_ms;
}
}
printf("#################### Averages# ####################\n");
printf("Reader Execution Time\n");
printf("min %" PRIu64 ", avg %" PRIu64 ", max, %" PRIu64 "\n",min, avg / (N-5), max);
printf("Processing Execution Time\n");
printf("min %" PRIu64 ", avg %" PRIu64 ", max, %" PRIu64 "\n",min, avg / (N-5), max);
printf("Writer Execution Time\n");
printf("min %" PRIu64 ", avg %" PRIu64 ", max, %" PRIu64 "\n",min, avg / (N-5), max);
printf("Total Execution Time\n");
printf("min %" PRIu64 ", avg %" PRIu64 ", max, %" PRIu64 "\n",min, avg / (N-5), max);
printf("###################################################\n\n");
...