Support for GPU and FPGA programming

New Contributor

5 years ago

Hi Chao,

I am assuming that you want to implement your code parallelly on iGPU and fpga_emulator.

Please find the below code sample which will run a simple vector add on iGPU and fpga_emulator.

#include <iostream>
#include <CL/sycl.hpp>
#include <CL/sycl/intel/fpga_extensions.hpp>
#define N 10

int main(int, char**) {

        float *d1_a=(float *)malloc(N*sizeof(float));
        float *d1_b=(float *)malloc(N*sizeof(float));
        float *d1_c=(float *)malloc(N*sizeof(float));

        float *d2_a=(float *)malloc(N*sizeof(float));
        float *d2_b=(float *)malloc(N*sizeof(float));
        float *d2_c=(float *)malloc(N*sizeof(float));

        for(long int i=0;i<N;i++){
                d1_a[i]=i;
                d1_b[i]=N-i;
                d2_a[i]=i;
                d2_b[i]=N-i;
        }


        auto exception_handler = [] (cl::sycl::exception_list exceptions) {
            for (std::exception_ptr const& e : exceptions) {
                try {
                        std::rethrow_exception(e);
                } catch(cl::sycl::exception const& e) {
                std::cout << "Caught asynchronous SYCL exception:\n"<< e.what() << std::endl;
                }
            }
        };

        cl::sycl::queue queue_d1(cl::sycl::gpu_selector{}, exception_handler);
        cl::sycl::queue queue_d2(cl::sycl::intel::fpga_emulator_selector{}, exception_handler);
        /*std::cout << "Running on "
                << queue_d2.get_device().get_info<cl::sycl::info::device::name>()
                << "\n";
                */

        {
                cl::sycl::buffer<float, 1> d1_a_sycl{d1_a, cl::sycl::range<1>{N} };
                cl::sycl::buffer<float, 1> d1_b_sycl{d1_b, cl::sycl::range<1>{N} };
                cl::sycl::buffer<float, 1> d1_c_sycl{d1_c, cl::sycl::range<1>{N} };

                cl::sycl::buffer<float, 1> d2_a_sycl{d2_a, cl::sycl::range<1>{N} };
                cl::sycl::buffer<float, 1> d2_b_sycl{d2_b, cl::sycl::range<1>{N} };
                cl::sycl::buffer<float, 1> d2_c_sycl{d2_c, cl::sycl::range<1>{N} };

                queue_d1.submit([&] (cl::sycl::handler& cgh) {
                                auto a_acc = d1_a_sycl.get_access<cl::sycl::access::mode::read>(cgh);
                                auto b_acc = d1_b_sycl.get_access<cl::sycl::access::mode::read>(cgh);
                                auto c_acc = d1_c_sycl.get_access<cl::sycl::access::mode::discard_write>(cgh);

                                cgh.parallel_for<class vector_addition_d1>(cl::sycl::range<1>{ N }, [=](cl::sycl::id<1> idx) {
                                                c_acc[idx] = a_acc[idx] + b_acc[idx];

                                });
                });

                queue_d2.submit([&] (cl::sycl::handler& cgh) {
                                auto a_acc = d2_a_sycl.get_access<cl::sycl::access::mode::read>(cgh);
                                auto b_acc = d2_b_sycl.get_access<cl::sycl::access::mode::read>(cgh);
                                auto c_acc = d2_c_sycl.get_access<cl::sycl::access::mode::discard_write>(cgh);

                                cgh.parallel_for<class vector_addition_d2>(cl::sycl::range<1>{ N }, [=](cl::sycl::id<1> idx) {
                                                c_acc[idx] = a_acc[idx] + b_acc[idx];

                                });
                });

        }

        try {
                queue_d1.wait_and_throw();
                queue_d2.wait_and_throw();
        }catch (cl::sycl::exception const& e) {
                std::cout << "Caught synchronous SYCL exception:\n"<< e.what() << std::endl;
        }

        for(int i=0;i<N;i++)
                std::cout<<d1_c[i]<<" ";

        std::cout<<std::endl;

        for(long int i=0;i<N;i++)
                std::cout<<d2_c[i]<<" ";

        std::cout<<std::endl;

        return 0;
}

You can change the device selector according to your use-case.

Warm Regards,

Abhishek

Forum Discussion

Recent Discussions

Agilex 7 FPGA Starter Kit with oneAPI Toolkit flow not detected over PCIe

MCTP over PCIe VDM routing to PMCI in OFS N6000 FIM configuration and datapath clarification

HLS Compiler 24.1 error - aocl-clang.exe - dll entry point not found

Error faced while executing on Agilex FPGA board....

AI Suite System Throughput Issue