diff options
Diffstat (limited to 'main.cpp')
-rw-r--r-- | main.cpp | 37 |
1 files changed, 23 insertions, 14 deletions
@@ -1,16 +1,16 @@ #include <CL/sycl.hpp> -#include <omp.h> +// #include <omp.h> #include <typeinfo> #include <memory> #include <cxxabi.h> -#define N 64 +#define N 256 #define GZ 16 #define STRIDE (2*GZ + N) #define TILEK 4 #define TILEJ 4 #define TILEI 16 -#define ITER 10 +#define ITER 1000 std::string demangle(const char *name) { @@ -36,8 +36,8 @@ public: SGfunctor(accessor<uint32_t, 1, access::mode::write, access::target::global_buffer> sg_sz, accessor<T, 1, access::mode::write, access::target::global_buffer> sg_i) : sg_sz(sg_sz), sg_i(sg_i) {} - [[cl::intel_reqd_sub_group_size(16)]] - void operator()(nd_item<1> NdItem) { + [[intel::reqd_sub_group_size(16)]] + void operator()(nd_item<1> NdItem) const { intel::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_local_id().get(0); @@ -94,7 +94,7 @@ void run27pt(device &Device) { queue Queue(Device, {property::queue::enable_profiling()}); nd_range<3> NumOfWorkItems(range<3>(N, N, N), range<3>(TILEI, TILEJ, TILEK)); - float st = omp_get_wtime(); + // float st = omp_get_wtime(); auto kernel = [&](handler &cgh) { // Getting write only access to the buffer on a device auto in = in_buf.get_access<access::mode::read>(cgh); @@ -148,12 +148,20 @@ void run27pt(device &Device) { // Implicit barrier waiting for queue to complete the work. const auto out_h = out_buf.get_access<access::mode::read>(); const auto in_h = in_buf.get_access<access::mode::read>(); - double ed = omp_get_wtime(); - double elapsed = (ed_event.get_profiling_info<info::event_profiling::command_end>() - - st_event.get_profiling_info<info::event_profiling::command_start>()) * 1e-9; - std::cout << "elapsed: " << (ed - st) / ITER << std::endl; - std::cout << "elapsed: " << elapsed << std::endl; - std::cout << "flops: " << N * N * N * 53.0 * ITER / elapsed * 1e-9 << std::endl; + { + // double ed = omp_get_wtime(); + double elapsed = (ed_event.get_profiling_info<info::event_profiling::command_end>() - + st_event.get_profiling_info<info::event_profiling::command_start>()) * 1e-9; + std::cout << "elapsed: " << elapsed / ITER << std::endl; + std::cout << "flops: " << N * N * N * 53.0 * ITER / elapsed * 1e-9 << std::endl; + } + { + // double ed = omp_get_wtime(); + double elapsed = (ed_event.get_profiling_info<info::event_profiling::command_end>() - + ed_event.get_profiling_info<info::event_profiling::command_start>()) * 1e-9; + std::cout << "elapsed: " << elapsed << std::endl; + std::cout << "flops: " << N * N * N * 53.0 / elapsed * 1e-9 << std::endl; + } } void printInfo(device &Device) { @@ -167,7 +175,6 @@ void printInfo(device &Device) { std::cout << "Compute units: " << dot_num_groups << std::endl; std::cout << "Workgroup size: " << dot_wgsize << std::endl; - std::cout << "Maximum subgroup size: " << max_num_sg << std::endl; } int main() { @@ -189,11 +196,13 @@ int main() { // Host device is not for compute if (Device.is_host()) continue; + + const std::string DeviceName = Device.get_info<info::device::name>(); std::cout << "Device [" << DeviceNumber << "]:" << std::endl; try { printInfo(Device); - if (Device.is_gpu()) { + if (Device.is_gpu() && (DeviceName.find("HD Graphics NEO") != std::string::npos)) { run27pt(Device); runSubgroups(Device); } else { |