From c699af920419025f86f284917385f8de5efd8fd3 Mon Sep 17 00:00:00 2001 From: Tuowen Zhao Date: Thu, 22 Oct 2020 20:19:37 -0600 Subject: update --- main.cpp | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) (limited to 'main.cpp') diff --git a/main.cpp b/main.cpp index 059d8d0..6b1fc89 100644 --- a/main.cpp +++ b/main.cpp @@ -1,16 +1,16 @@ #include -#include +// #include #include #include #include -#define N 64 +#define N 256 #define GZ 16 #define STRIDE (2*GZ + N) #define TILEK 4 #define TILEJ 4 #define TILEI 16 -#define ITER 10 +#define ITER 1000 std::string demangle(const char *name) { @@ -36,8 +36,8 @@ public: SGfunctor(accessor sg_sz, accessor sg_i) : sg_sz(sg_sz), sg_i(sg_i) {} - [[cl::intel_reqd_sub_group_size(16)]] - void operator()(nd_item<1> NdItem) { + [[intel::reqd_sub_group_size(16)]] + void operator()(nd_item<1> NdItem) const { intel::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_local_id().get(0); @@ -94,7 +94,7 @@ void run27pt(device &Device) { queue Queue(Device, {property::queue::enable_profiling()}); nd_range<3> NumOfWorkItems(range<3>(N, N, N), range<3>(TILEI, TILEJ, TILEK)); - float st = omp_get_wtime(); + // float st = omp_get_wtime(); auto kernel = [&](handler &cgh) { // Getting write only access to the buffer on a device auto in = in_buf.get_access(cgh); @@ -148,12 +148,20 @@ void run27pt(device &Device) { // Implicit barrier waiting for queue to complete the work. const auto out_h = out_buf.get_access(); const auto in_h = in_buf.get_access(); - double ed = omp_get_wtime(); - double elapsed = (ed_event.get_profiling_info() - - st_event.get_profiling_info()) * 1e-9; - std::cout << "elapsed: " << (ed - st) / ITER << std::endl; - std::cout << "elapsed: " << elapsed << std::endl; - std::cout << "flops: " << N * N * N * 53.0 * ITER / elapsed * 1e-9 << std::endl; + { + // double ed = omp_get_wtime(); + double elapsed = (ed_event.get_profiling_info() - + st_event.get_profiling_info()) * 1e-9; + std::cout << "elapsed: " << elapsed / ITER << std::endl; + std::cout << "flops: " << N * N * N * 53.0 * ITER / elapsed * 1e-9 << std::endl; + } + { + // double ed = omp_get_wtime(); + double elapsed = (ed_event.get_profiling_info() - + ed_event.get_profiling_info()) * 1e-9; + std::cout << "elapsed: " << elapsed << std::endl; + std::cout << "flops: " << N * N * N * 53.0 / elapsed * 1e-9 << std::endl; + } } void printInfo(device &Device) { @@ -167,7 +175,6 @@ void printInfo(device &Device) { std::cout << "Compute units: " << dot_num_groups << std::endl; std::cout << "Workgroup size: " << dot_wgsize << std::endl; - std::cout << "Maximum subgroup size: " << max_num_sg << std::endl; } int main() { @@ -189,11 +196,13 @@ int main() { // Host device is not for compute if (Device.is_host()) continue; + + const std::string DeviceName = Device.get_info(); std::cout << "Device [" << DeviceNumber << "]:" << std::endl; try { printInfo(Device); - if (Device.is_gpu()) { + if (Device.is_gpu() && (DeviceName.find("HD Graphics NEO") != std::string::npos)) { run27pt(Device); runSubgroups(Device); } else { -- cgit v1.2.3-70-g09d2