diff options
author | Tuowen Zhao <ztuowen@gmail.com> | 2020-10-22 20:19:37 -0600 |
---|---|---|
committer | Tuowen Zhao <ztuowen@gmail.com> | 2020-10-22 20:19:37 -0600 |
commit | c699af920419025f86f284917385f8de5efd8fd3 (patch) | |
tree | 4c920ca9c8ee063913921623a7f8a4647d30bdbc /subgroup.cpp | |
parent | 5ae0da8484744859e09fad869b44dccdb5f66f2f (diff) | |
download | sycltest-c699af920419025f86f284917385f8de5efd8fd3.tar.gz sycltest-c699af920419025f86f284917385f8de5efd8fd3.tar.bz2 sycltest-c699af920419025f86f284917385f8de5efd8fd3.zip |
update
Diffstat (limited to 'subgroup.cpp')
-rw-r--r-- | subgroup.cpp | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/subgroup.cpp b/subgroup.cpp new file mode 100644 index 0000000..1e4bc11 --- /dev/null +++ b/subgroup.cpp @@ -0,0 +1,94 @@ +// +// Created by Tuowen Zhao on 11/11/19. +// + +#include <CL/sycl.hpp> +#include <vector> + +#define N 1024 + +using namespace cl::sycl; + +template<typename T> +inline void dev_shl(intel::sub_group &SG, T &res, T l, T r, unsigned kn, unsigned cw, unsigned cid) { + T l_tmp = SG.shuffle_down(l, cw - (kn)); + T r_tmp = SG.shuffle_up(r, kn); + res = (cid) < kn? l_tmp : r_tmp; +} + +void printInfo(device &Device) { + std::cout << "Using OpenCL " << (Device.is_cpu() ? "CPU" : "GPU") + << " device {" << Device.get_info<info::device::name>() + << "} from {" << Device.get_info<info::device::vendor>() << "}" << std::endl; + + auto dot_num_groups = Device.get_info<info::device::max_compute_units>(); + auto dot_wgsize = Device.get_info<info::device::max_work_group_size>(); + auto max_num_sg = Device.get_info<info::device::max_num_sub_groups>(); + + std::cout << "Compute units: " << dot_num_groups << std::endl; + std::cout << "Workgroup size: " << dot_wgsize << std::endl; +} + +int main() { + class NEOGPUDeviceSelector : public device_selector { + public: + int operator()(const device &Device) const override { + const std::string DeviceName = Device.get_info<info::device::name>(); + const std::string DeviceVendor = Device.get_info<info::device::vendor>(); + + return Device.is_gpu() && DeviceName.find("HD Graphics NEO") ? 1 : -1; + } + }; + + device dev = device(NEOGPUDeviceSelector()); + printInfo(dev); + std::vector<int> a(N); + std::vector<int> b(N); + std::vector<int> c(N); + + buffer<int, 1> A((int *)a.data(), range<1>(N), {property::buffer::use_host_ptr()}); + buffer<int, 1> B((int *)b.data(), range<1>(N), {property::buffer::use_host_ptr()}); + buffer<int, 1> C((int *)c.data(), range<1>(N), {property::buffer::use_host_ptr()}); + + for (int i = 0; i < N; ++i) { + a[i] = i; + b[i] = N - i; + } + + queue Queue(dev, {property::queue::enable_profiling()}); + + nd_range<1> nworkitem(range<1>(N), range<1>(64)); + range<1> nwork{N}; + int len = 1024; + + auto kernel = [&](handler &cgh) { + // Getting write only access to the buffer on a device + auto a = A.get_access<access::mode::read>(cgh); + auto b = B.get_access<access::mode::read>(cgh); + auto c = C.get_access<access::mode::write>(cgh); + + cgh.parallel_for<class FillBuffer>( + nworkitem, [=](nd_item<1> WIid) { + intel::sub_group SG = WIid.get_sub_group(); + int sglid = SG.get_local_id().get(0); + uint32_t i = WIid.get_global_id(0); + int* cp = &c[0]; + const int* ap = &a[0]; + const int* bp = &b[0]; +// c[i] = a[i] + b[i]; + dev_shl(SG, cp[i], ap[i], bp[i], 2, 8, sglid & 7); + c[i] = WIid.get_group(0); + }); + }; + std::cout << "submitting" << std::endl; + auto e = Queue.submit(kernel); + std::cout << "waiting" << std::endl; + e.wait(); + std::cout << "waited" << std::endl; + auto c_r = C.get_access<access::mode::read>(); + +// for (int i = 0; i < N; ++i) +// std::cout << i << " + " << N - i << " = " << c[i] << std::endl; + + return 0; +} |