From e38cf4dc8604634daddcc6a6b05ab1187ec83efc Mon Sep 17 00:00:00 2001 From: Tuowen Zhao Date: Mon, 4 Oct 2021 16:23:27 +0000 Subject: Update formatting --- main.cpp | 97 ++++++++++++++++++++++++++-------------------------------------- 1 file changed, 40 insertions(+), 57 deletions(-) (limited to 'main.cpp') diff --git a/main.cpp b/main.cpp index 095bbed..5af0173 100644 --- a/main.cpp +++ b/main.cpp @@ -1,12 +1,12 @@ #include // #include -#include -#include #include +#include +#include #define N 256 #define GZ 16 -#define STRIDE (2*GZ + N) +#define STRIDE (2 * GZ + N) #define TILEK 4 #define TILEJ 4 #define TILEI 16 @@ -17,28 +17,24 @@ std::string demangle(const char *name) { int status = -4; // some arbitrary value to eliminate the compiler warning // enable c++11 by passing the flag -std=c++11 to g++ - std::unique_ptr res{ - abi::__cxa_demangle(name, NULL, NULL, &status), - std::free - }; + std::unique_ptr res{abi::__cxa_demangle(name, NULL, NULL, &status), + std::free}; return (status == 0) ? res.get() : name; } using namespace cl::sycl; -template -class subgr; +template class subgr; -template -class SGfunctor { +template class SGfunctor { public: SGfunctor(accessor sg_sz, - accessor sg_i) : sg_sz(sg_sz), sg_i(sg_i) {} + accessor sg_i) + : sg_sz(sg_sz), sg_i(sg_i) {} - [[intel::reqd_sub_group_size(16)]] - void operator()(nd_item<1> NdItem) const { - ONEAPI::sub_group SG = NdItem.get_sub_group(); + [[intel::reqd_sub_group_size(16)]] void operator()(nd_item<1> NdItem) const { + ONEAPI::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_local_id().get(0); if (wggid == 0) @@ -52,8 +48,7 @@ private: accessor sg_sz; }; -template -void subkrnl(device &Device, size_t G = 256, size_t L = 64) { +template void subkrnl(device &Device, size_t G = 256, size_t L = 64) { buffer sg_buf{1}; buffer sg_info{G}; queue Queue(Device); @@ -101,40 +96,26 @@ void run27pt(device &Device) { auto out = out_buf.get_access(cgh); auto c = c_buf.get_access(cgh); // Executing kernel - cgh.parallel_for( - NumOfWorkItems, [=](nd_item<3> WIid) { - uint32_t i = WIid.get_global_id(0) + GZ; - uint32_t j = WIid.get_global_id(1) + GZ; - uint32_t k = WIid.get_global_id(2) + GZ; - // Fill buffer with indexes - out[id<3>(i, j, k)] = c[0] * in[id<3>(i - 1, j - 1, k - 1)] - + c[1] * in[id<3>(i, j - 1, k - 1)] - + c[2] * in[id<3>(i + 1, j - 1, k - 1)] - + c[3] * in[id<3>(i - 1, j, k - 1)] - + c[4] * in[id<3>(i, j, k - 1)] - + c[5] * in[id<3>(i + 1, j, k - 1)] - + c[6] * in[id<3>(i - 1, j + 1, k - 1)] - + c[7] * in[id<3>(i, j + 1, k - 1)] - + c[8] * in[id<3>(i + 1, j + 1, k - 1)] - + c[9] * in[id<3>(i - 1, j - 1, k)] - + c[10] * in[id<3>(i, j - 1, k)] - + c[11] * in[id<3>(i + 1, j - 1, k)] - + c[12] * in[id<3>(i - 1, j, k)] - + c[13] * in[id<3>(i, j, k)] - + c[14] * in[id<3>(i + 1, j, k)] - + c[15] * in[id<3>(i - 1, j + 1, k)] - + c[16] * in[id<3>(i, j + 1, k)] - + c[17] * in[id<3>(i + 1, j + 1, k)] - + c[18] * in[id<3>(i - 1, j - 1, k + 1)] - + c[19] * in[id<3>(i, j - 1, k + 1)] - + c[20] * in[id<3>(i + 1, j - 1, k + 1)] - + c[21] * in[id<3>(i - 1, j, k + 1)] - + c[22] * in[id<3>(i, j, k + 1)] - + c[23] * in[id<3>(i + 1, j, k + 1)] - + c[24] * in[id<3>(i - 1, j + 1, k + 1)] - + c[25] * in[id<3>(i, j + 1, k + 1)] - + c[26] * in[id<3>(i + 1, j + 1, k + 1)]; - }); + cgh.parallel_for(NumOfWorkItems, [=](nd_item<3> WIid) { + uint32_t i = WIid.get_global_id(0) + GZ; + uint32_t j = WIid.get_global_id(1) + GZ; + uint32_t k = WIid.get_global_id(2) + GZ; + // Fill buffer with indexes + out[i][j][k] = c[0] * in[id<3>(i - 1, j - 1, k - 1)] + c[1] * in[id<3>(i, j - 1, k - 1)] + + c[2] * in[id<3>(i + 1, j - 1, k - 1)] + c[3] * in[id<3>(i - 1, j, k - 1)] + + c[4] * in[id<3>(i, j, k - 1)] + c[5] * in[id<3>(i + 1, j, k - 1)] + + c[6] * in[id<3>(i - 1, j + 1, k - 1)] + c[7] * in[id<3>(i, j + 1, k - 1)] + + c[8] * in[id<3>(i + 1, j + 1, k - 1)] + c[9] * in[id<3>(i - 1, j - 1, k)] + + c[10] * in[id<3>(i, j - 1, k)] + c[11] * in[id<3>(i + 1, j - 1, k)] + + c[12] * in[id<3>(i - 1, j, k)] + c[13] * in[id<3>(i, j, k)] + + c[14] * in[id<3>(i + 1, j, k)] + c[15] * in[id<3>(i - 1, j + 1, k)] + + c[16] * in[id<3>(i, j + 1, k)] + c[17] * in[id<3>(i + 1, j + 1, k)] + + c[18] * in[id<3>(i - 1, j - 1, k + 1)] + c[19] * in[id<3>(i, j - 1, k + 1)] + + c[20] * in[id<3>(i + 1, j - 1, k + 1)] + c[21] * in[id<3>(i - 1, j, k + 1)] + + c[22] * in[id<3>(i, j, k + 1)] + c[23] * in[id<3>(i + 1, j, k + 1)] + + c[24] * in[id<3>(i - 1, j + 1, k + 1)] + c[25] * in[id<3>(i, j + 1, k + 1)] + + c[26] * in[id<3>(i + 1, j + 1, k + 1)]; + }); }; auto st_event = Queue.submit(kernel); for (int i = 0; i < ITER - 2; ++i) { @@ -151,22 +132,24 @@ void run27pt(device &Device) { { // double ed = omp_get_wtime(); double elapsed = (ed_event.get_profiling_info() - - st_event.get_profiling_info()) * 1e-9; + st_event.get_profiling_info()) * + 1e-9; std::cout << "elapsed: " << elapsed / ITER << std::endl; std::cout << "flops: " << N * N * N * 53.0 * ITER / elapsed * 1e-9 << std::endl; } { // double ed = omp_get_wtime(); double elapsed = (ed_event.get_profiling_info() - - ed_event.get_profiling_info()) * 1e-9; + ed_event.get_profiling_info()) * + 1e-9; std::cout << "elapsed: " << elapsed << std::endl; std::cout << "flops: " << N * N * N * 53.0 / elapsed * 1e-9 << std::endl; } } void printInfo(device &Device) { - std::cout << "Using OpenCL device {" << Device.get_info() << - "} from {" << Device.get_info() << "}" << std::endl; + std::cout << "Using OpenCL device {" << Device.get_info() << "} from {" + << Device.get_info() << "}" << std::endl; std::cout << "I'm a " << (Device.is_cpu() ? "CPU" : "GPU") << std::endl; auto dot_num_groups = Device.get_info(); @@ -196,12 +179,12 @@ int main() { // Host device is not for compute if (Device.is_host()) continue; - + const std::string DeviceName = Device.get_info(); std::cout << "Device [" << DeviceNumber << "]:" << DeviceName << std::endl; try { - if (Device.is_gpu() && (DeviceName.find("Gen11") != std::string::npos)) { + if (Device.is_gpu() && (DeviceName.find("Graphics") != std::string::npos)) { printInfo(Device); run27pt(Device); runSubgroups(Device); -- cgit v1.2.3-70-g09d2