diff options
Diffstat (limited to 'subgroup.cpp')
-rw-r--r-- | subgroup.cpp | 40 |
1 files changed, 20 insertions, 20 deletions
diff --git a/subgroup.cpp b/subgroup.cpp index 59e873d..f55a1f8 100644 --- a/subgroup.cpp +++ b/subgroup.cpp @@ -9,17 +9,18 @@ using namespace cl::sycl; -template<typename T> -inline void dev_shl(ONEAPI::sub_group &SG, T &res, T l, T r, unsigned kn, unsigned cw, unsigned cid) { +template <typename T> +inline void dev_shl(ONEAPI::sub_group &SG, T &res, T l, T r, unsigned kn, unsigned cw, + unsigned cid) { T l_tmp = SG.shuffle_down(l, cw - (kn)); T r_tmp = SG.shuffle_up(r, kn); - res = (cid) < kn? l_tmp : r_tmp; + res = (cid) < kn ? l_tmp : r_tmp; } void printInfo(device &Device) { - std::cout << "Using OpenCL " << (Device.is_cpu() ? "CPU" : "GPU") - << " device {" << Device.get_info<info::device::name>() - << "} from {" << Device.get_info<info::device::vendor>() << "}" << std::endl; + std::cout << "Using OpenCL " << (Device.is_cpu() ? "CPU" : "GPU") << " device {" + << Device.get_info<info::device::name>() << "} from {" + << Device.get_info<info::device::vendor>() << "}" << std::endl; auto dot_num_groups = Device.get_info<info::device::max_compute_units>(); auto dot_wgsize = Device.get_info<info::device::max_work_group_size>(); @@ -67,18 +68,17 @@ int main() { auto b = B.get_access<access::mode::read>(cgh); auto c = C.get_access<access::mode::write>(cgh); - cgh.parallel_for<class FillBuffer>( - nworkitem, [=](nd_item<1> WIid) { - ONEAPI::sub_group SG = WIid.get_sub_group(); - int sglid = SG.get_local_id().get(0); - uint32_t i = WIid.get_global_id(0); - int* cp = &c[0]; - const int* ap = &a[0]; - const int* bp = &b[0]; -// c[i] = a[i] + b[i]; - dev_shl(SG, cp[i], ap[i], bp[i], 2, 8, sglid & 7); - c[i] = WIid.get_group(0); - }); + cgh.parallel_for<class FillBuffer>(nworkitem, [=](nd_item<1> WIid) { + ONEAPI::sub_group SG = WIid.get_sub_group(); + int sglid = SG.get_local_id().get(0); + uint32_t i = WIid.get_global_id(0); + int *cp = &c[0]; + const int *ap = &a[0]; + const int *bp = &b[0]; + // c[i] = a[i] + b[i]; + dev_shl(SG, cp[i], ap[i], bp[i], 2, 8, sglid & 7); + c[i] = WIid.get_group(0); + }); }; std::cout << "submitting" << std::endl; auto e = Queue.submit(kernel); @@ -87,8 +87,8 @@ int main() { std::cout << "waited" << std::endl; auto c_r = C.get_access<access::mode::read>(); -// for (int i = 0; i < N; ++i) -// std::cout << i << " + " << N - i << " = " << c[i] << std::endl; + // for (int i = 0; i < N; ++i) + // std::cout << i << " + " << N - i << " = " << c[i] << std::endl; return 0; } |