Forum Discussion
Christoph9
New Contributor
3 years agoHey BB,
thanks for your help and sorry for the late response, I did not came to work on this for the last week.
I made you the smallest little example of my setup that creates this error, it does not really something useful, just some snippets of the DPCT-translated and slightly modified raytracing benchmark of the Altis GPU Becnhmark.
You need the following files in some directory:
#include <CL/sycl.hpp> #include <dpct/dpct.hpp> #include <dpct/rng_utils.hpp> #include <oneapi/dpl/random> #include <oneapi/mkl.hpp> #include <oneapi/mkl/rng/device.hpp> #include <sycl/ext/intel/fpga_extensions.hpp> #include "vec3.h" #include "sphere.h" #include "camera.h" #include "random_gen.h" namespace fpga_tools { namespace detail { // Autorun implementation template <bool run_forever, typename KernelID> struct Autorun_impl { // Constructor with a kernel name template <typename DeviceSelector, typename KernelFunctor> Autorun_impl(DeviceSelector device_selector, KernelFunctor kernel) { // static asserts to ensure KernelFunctor is callable static_assert(std::is_invocable_r_v<void, KernelFunctor>, "KernelFunctor must be callable with no arguments"); // create the device queue sycl::queue q{device_selector}; // submit the user's kernel if constexpr (run_forever) { if constexpr (std::is_same_v<KernelID, void>) { // AutorunForever, kernel name not given q.single_task([=] { while (1) { kernel(); } }); } else { // AutorunForever, kernel name given q.single_task<KernelID>([=] { while (1) { kernel(); } }); } } else { // run the kernel as-is, if the user wanted it to run forever they // will write their own explicit while-loop if constexpr (std::is_same_v<KernelID, void>) { // Autorun, kernel name not given q.single_task(kernel); } else { // Autorun, kernel name given q.single_task<KernelID>(kernel); } } } }; } // namespace detail // Autorun template <typename KernelID = void> using Autorun = detail::Autorun_impl<false, KernelID>; // AutorunForever template <typename KernelID = void> using AutorunForever = detail::Autorun_impl<true, KernelID>; } // namespace fpga_tools #if defined(FPGA_EMULATOR) sycl::ext::intel::fpga_emulator_selector ds; #else sycl::ext::intel::fpga_selector ds; #endif class random_generator_kernel_id; struct rnd_generator { void operator()() const { auto rand_state = dpct::rng::device::rng_generator< oneapi::mkl::rng::device::philox4x32x10<4>>(1984, { 0, 0 * 8 }); while (1) { sycl::ext::intel::pipe<rnd_out_pipe_id, float, 8>::write( rand_state .generate<oneapi::mkl::rng::device::uniform<float>, 1>()); } } }; fpga_tools::Autorun<random_generator_kernel_id> ar_rnd_gen{ds, rnd_generator{}}; int main(int argc, char *argv[]) { sycl::queue q{ds}; const sycl::range<3> blocks(1, 4, 4); const sycl::range<3> threads(1, 16, 16); const size_t fb_size = 3840 * 2160 * sizeof(vec3); sycl::buffer<vec3> h_fb { sycl::range(fb_size) }; sycl::event render_event = q.submit([&](sycl::handler &cgh) { sycl::accessor a_fb { h_fb, cgh, sycl::write_only, sycl::no_init }; cgh.parallel_for<class sample_kernel_id>( sycl::nd_range<3>(blocks * threads, threads), [=](sycl::nd_item<3> item_ct1) { int i = item_ct1.get_local_id(2) + item_ct1.get_group(2) * item_ct1.get_local_range(2); int j = item_ct1.get_local_id(1) + item_ct1.get_group(1) * item_ct1.get_local_range(1); if ((i >= 3840) || (j >= 2160)) return; int pixel_index = j * 3840 + i; vec3 col(0, 0, 0); vec3 lookfrom(13.0f, 2.0f, 3.0f); vec3 lookat(0.0f, 0.0f, 0.0f); float dist_to_focus = 10.0f; (lookfrom - lookat).length(); float aperture = 0.1f; camera cam(lookfrom, lookat, vec3(0.0f, 1.0f, 0.0f), 30.0f, float(3840) / float(2160), aperture, dist_to_focus); for (int s = 0; s < 10; s++) { float u = float(i + sycl::ext::intel::pipe<rnd_out_pipe_id, float, 8>::read()) / float(3840); float v = float(j + sycl::ext::intel::pipe<rnd_out_pipe_id, float, 8>::read()) / float(2160); ray r = cam.get_ray(u, v); } col /= float(10); col[0] = sycl::sqrt(col[0]); col[1] = sycl::sqrt(col[1]); col[2] = sycl::sqrt(col[2]); a_fb[pixel_index] = col; }); }); render_event.wait(); const float elapsed = render_event.get_profiling_info< sycl::info::event_profiling::command_end>() - render_event.get_profiling_info< sycl::info::event_profiling::command_start>(); std::cout << "Test elapsed: " << elapsed << std::endl; return 0; }
(test.cpp)
//////////////////////////////////////////////////////////////////////////////////////////////////// // file: altis\src\cuda\level2\raytracing\camera.h // // summary: Declares the camera class // // origin: Ray tracing(https://github.com/ssangx/raytracing.cuda) //////////////////////////////////////////////////////////////////////////////////////////////////// #ifndef CAMERAH #define CAMERAH #include <CL/sycl.hpp> #include <dpct/dpct.hpp> #include <oneapi/mkl.hpp> #include <oneapi/mkl/rng/device.hpp> #include <dpct/rng_utils.hpp> #include "ray.h" #include "random_gen.h" //////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Random in unit disk. </summary> /// /// <remarks> Ed, 5/20/2020. </remarks> /// /// <param name="local_rand_state"> [in,out] If non-null, state of the local random. </param> /// /// <returns> A vec3. </returns> //////////////////////////////////////////////////////////////////////////////////////////////////// /* DPCT1032:1298: A different random number generator is used. You may need to adjust the code. */ vec3 random_in_unit_disk() { vec3 p; do { /* DPCT1084:1299: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ p = 2.0f * vec3(sycl::ext::intel::pipe<rnd_out_pipe_id, float, 8>::read(), sycl::ext::intel::pipe<rnd_out_pipe_id, float, 8>::read(), 0) - vec3(1, 1, 0); } while (dot(p,p) >= 1.0f); return p; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> A camera. </summary> /// /// <remarks> Ed, 5/20/2020. </remarks> //////////////////////////////////////////////////////////////////////////////////////////////////// class camera { public: camera(vec3 lookfrom, vec3 lookat, vec3 vup, float vfov, float aspect, float aperture, float focus_dist) { // vfov is top to bottom in degrees lens_radius = aperture / 2.0f; float theta = vfov*((float)M_PI)/180.0f; float half_height = sycl::tan(theta / 2.0f); float half_width = aspect * half_height; origin = lookfrom; /* DPCT1084:1300: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ w = unit_vector(lookfrom - lookat); /* DPCT1084:1301: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ u = unit_vector(cross(vup, w)); /* DPCT1084:1302: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ v = cross(w, u); /* DPCT1084:1303: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ lower_left_corner = origin - half_width * focus_dist * u - half_height * focus_dist * v - focus_dist * w; /* DPCT1084:1304: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ horizontal = 2.0f * half_width * focus_dist * u; /* DPCT1084:1305: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ vertical = 2.0f * half_height * focus_dist * v; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Gets a ray. </summary> /// /// <remarks> Ed, 5/20/2020. </remarks> /// /// <param name="s"> A float to process. </param> /// <param name="t"> A float to process. </param> /// <param name="local_rand_state"> [in,out] If non-null, state of the local random. </param> /// /// <returns> The ray. </returns> //////////////////////////////////////////////////////////////////////////////////////////////////// /* DPCT1032:1306: A different random number generator is used. You may need to adjust the code. */ ray get_ray(float s, float t) { /* DPCT1084:1307: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ vec3 rd = lens_radius * random_in_unit_disk(); /* DPCT1084:1308: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ vec3 offset = u * rd.x() + v * rd.y(); /* DPCT1084:1309: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ return ray(origin + offset, lower_left_corner + s * horizontal + t * vertical - origin - offset); } /// <summary> The origin. </summary> vec3 origin; /// <summary> The lower left corner. </summary> vec3 lower_left_corner; /// <summary> The horizontal. </summary> vec3 horizontal; /// <summary> The vertical. </summary> vec3 vertical; //////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Gets the w. </summary> /// /// <value> The w. </value> //////////////////////////////////////////////////////////////////////////////////////////////////// vec3 u, v, w; /// <summary> The lens radius. </summary> float lens_radius; }; #endif
(camera.h)
#pragma once class rnd_out_pipe_id;
(random_gen.h)
#ifndef RAYH #define RAYH #include <CL/sycl.hpp> #include <dpct/dpct.hpp> #include "vec3.h" class ray { public: ray() {} ray(const vec3& a, const vec3& b) { A = a; B = b; } vec3 origin() const { return A; } vec3 direction() const { return B; } /* DPCT1084:1293: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ vec3 point_at_parameter(float t) const { return A + t * B; } /// <summary> A vec3 to process. </summary> vec3 A; /// <summary> A vec3 to process. </summary> vec3 B; }; #endif
(ray.h)
#ifndef SPHEREH #define SPHEREH #include <CL/sycl.hpp> #include <dpct/dpct.hpp> #include "ray.h" class sphere { public: sphere() {} sphere(vec3 cen, float r) : center(cen), radius(r) {}; SYCL_EXTERNAL bool hit(const ray& r, float tmin, float tmax) const; vec3 center; float radius; }; bool sphere::hit(const ray& r, float t_min, float t_max) const { vec3 oc = r.origin() - center; float a = dot(r.direction(), r.direction()); float b = dot(oc, r.direction()); float c = dot(oc, oc) - radius*radius; float discriminant = b*b - a*c; if (discriminant > 0) { float temp = (-b - sycl::sqrt(discriminant)) / a; if (temp < t_max && temp > t_min) { return true; } temp = (-b + sycl::sqrt(discriminant)) / a; if (temp < t_max && temp > t_min) { return true; } } return false; } #endif
(sphere.h)
#ifndef VEC3H #define VEC3H #include <CL/sycl.hpp> #include <dpct/dpct.hpp> #include <math.h> #include <stdlib.h> #include <iostream> class vec3 { public: vec3() {} vec3(float e0, float e1, float e2) { e[0] = e0; e[1] = e1; e[2] = e2; } inline float x() const { return e[0]; } inline float y() const { return e[1]; } inline float z() const { return e[2]; } inline float r() const { return e[0]; } inline float g() const { return e[1]; } inline float b() const { return e[2]; } inline const vec3& operator+() const { return *this; } inline vec3 operator-() const { return vec3(-e[0], -e[1], -e[2]); } inline float operator[](int i) const { return e[i]; } inline float& operator[](int i) { return e[i]; }; inline vec3& operator+=(const vec3 &v2); inline vec3& operator-=(const vec3 &v2); inline vec3& operator*=(const vec3 &v2); inline vec3& operator/=(const vec3 &v2); inline vec3& operator*=(const float t); inline vec3& operator/=(const float t); inline float length() const { return sycl::sqrt(e[0] * e[0] + e[1] * e[1] + e[2] * e[2]); } inline float squared_length() const { return e[0]*e[0] + e[1]*e[1] + e[2]*e[2]; } inline void make_unit_vector(); float e[3]; }; inline std::istream& operator>>(std::istream &is, vec3 &t) { is >> t.e[0] >> t.e[1] >> t.e[2]; return is; } inline std::ostream& operator<<(std::ostream &os, const vec3 &t) { os << t.e[0] << " " << t.e[1] << " " << t.e[2]; return os; } inline void vec3::make_unit_vector() { float k = 1.0 / sycl::sqrt(e[0] * e[0] + e[1] * e[1] + e[2] * e[2]); e[0] *= k; e[1] *= k; e[2] *= k; } inline vec3 operator+(const vec3 &v1, const vec3 &v2) { return vec3(v1.e[0] + v2.e[0], v1.e[1] + v2.e[1], v1.e[2] + v2.e[2]); } inline vec3 operator-(const vec3 &v1, const vec3 &v2) { return vec3(v1.e[0] - v2.e[0], v1.e[1] - v2.e[1], v1.e[2] - v2.e[2]); } inline vec3 operator*(const vec3 &v1, const vec3 &v2) { return vec3(v1.e[0] * v2.e[0], v1.e[1] * v2.e[1], v1.e[2] * v2.e[2]); } inline vec3 operator/(const vec3 &v1, const vec3 &v2) { return vec3(v1.e[0] / v2.e[0], v1.e[1] / v2.e[1], v1.e[2] / v2.e[2]); } inline vec3 operator*(float t, const vec3 &v) { return vec3(t*v.e[0], t*v.e[1], t*v.e[2]); } inline vec3 operator/(vec3 v, float t) { return vec3(v.e[0]/t, v.e[1]/t, v.e[2]/t); } inline vec3 operator*(const vec3 &v, float t) { return vec3(t*v.e[0], t*v.e[1], t*v.e[2]); } inline float dot(const vec3 &v1, const vec3 &v2) { return v1.e[0] *v2.e[0] + v1.e[1] *v2.e[1] + v1.e[2] *v2.e[2]; } inline vec3 cross(const vec3 &v1, const vec3 &v2) { return vec3( (v1.e[1]*v2.e[2] - v1.e[2]*v2.e[1]), (-(v1.e[0]*v2.e[2] - v1.e[2]*v2.e[0])), (v1.e[0]*v2.e[1] - v1.e[1]*v2.e[0])); } inline vec3& vec3::operator+=(const vec3 &v){ e[0] += v.e[0]; e[1] += v.e[1]; e[2] += v.e[2]; return *this; } inline vec3& vec3::operator*=(const vec3 &v){ e[0] *= v.e[0]; e[1] *= v.e[1]; e[2] *= v.e[2]; return *this; } inline vec3& vec3::operator/=(const vec3 &v){ e[0] /= v.e[0]; e[1] /= v.e[1]; e[2] /= v.e[2]; return *this; } inline vec3& vec3::operator-=(const vec3& v) { e[0] -= v.e[0]; e[1] -= v.e[1]; e[2] -= v.e[2]; return *this; } inline vec3& vec3::operator*=(const float t) { e[0] *= t; e[1] *= t; e[2] *= t; return *this; } inline vec3& vec3::operator/=(const float t) { float k = 1.0/t; e[0] *= k; e[1] *= k; e[2] *= k; return *this; } inline vec3 unit_vector(vec3 v) { /* DPCT1084:1292: The function call has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. */ return v / v.length(); } #endif
(vec3.h)
Compiling the project with
dpcpp -fsycl -fintelfpga -o test.cpp.o -c test.cpp
And linking it with
dpcpp -fsycl -fintelfpga -Xshardware -Xstarget="/opt/intel/oneapi/intel_a10gx_pac:pac_a10" test.cpp.o -o test.fpga
Results in the errors I stated above:
test.cpp:150: Compiler Warning: Pipe ordering required barrier insertion in for.body.i of pipe_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E16sample_kernel_id, but kernel may hang as a result camera.h:47: Compiler Warning: Pipe ordering required barrier insertion in do.body.i.i.i of pipe_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E16sample_kernel_id, but kernel may hang as a result test.cpp:150: Compiler Warning: Pipe ordering required barrier insertion in _ZN6camera7get_rayEff.exit.i of pipe_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E16sample_kernel_id, but kernel may hang as a result
A smaller example did not lead to the errors, but I hope it is still not to time-consuming to look into it.
Thanks for your help in advance,
Christoph