summaryrefslogtreecommitdiff
path: root/main.cpp
diff options
context:
space:
mode:
authorTuowen Zhao <ztuowen@gmail.com>2020-10-22 20:19:37 -0600
committerTuowen Zhao <ztuowen@gmail.com>2020-10-22 20:19:37 -0600
commitc699af920419025f86f284917385f8de5efd8fd3 (patch)
tree4c920ca9c8ee063913921623a7f8a4647d30bdbc /main.cpp
parent5ae0da8484744859e09fad869b44dccdb5f66f2f (diff)
downloadsycltest-c699af920419025f86f284917385f8de5efd8fd3.tar.gz
sycltest-c699af920419025f86f284917385f8de5efd8fd3.tar.bz2
sycltest-c699af920419025f86f284917385f8de5efd8fd3.zip
update
Diffstat (limited to 'main.cpp')
-rw-r--r--main.cpp37
1 files changed, 23 insertions, 14 deletions
diff --git a/main.cpp b/main.cpp
index 059d8d0..6b1fc89 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,16 +1,16 @@
#include <CL/sycl.hpp>
-#include <omp.h>
+// #include <omp.h>
#include <typeinfo>
#include <memory>
#include <cxxabi.h>
-#define N 64
+#define N 256
#define GZ 16
#define STRIDE (2*GZ + N)
#define TILEK 4
#define TILEJ 4
#define TILEI 16
-#define ITER 10
+#define ITER 1000
std::string demangle(const char *name) {
@@ -36,8 +36,8 @@ public:
SGfunctor(accessor<uint32_t, 1, access::mode::write, access::target::global_buffer> sg_sz,
accessor<T, 1, access::mode::write, access::target::global_buffer> sg_i) : sg_sz(sg_sz), sg_i(sg_i) {}
- [[cl::intel_reqd_sub_group_size(16)]]
- void operator()(nd_item<1> NdItem) {
+ [[intel::reqd_sub_group_size(16)]]
+ void operator()(nd_item<1> NdItem) const {
intel::sub_group SG = NdItem.get_sub_group();
uint32_t wggid = NdItem.get_global_id(0);
uint32_t sgid = SG.get_local_id().get(0);
@@ -94,7 +94,7 @@ void run27pt(device &Device) {
queue Queue(Device, {property::queue::enable_profiling()});
nd_range<3> NumOfWorkItems(range<3>(N, N, N), range<3>(TILEI, TILEJ, TILEK));
- float st = omp_get_wtime();
+ // float st = omp_get_wtime();
auto kernel = [&](handler &cgh) {
// Getting write only access to the buffer on a device
auto in = in_buf.get_access<access::mode::read>(cgh);
@@ -148,12 +148,20 @@ void run27pt(device &Device) {
// Implicit barrier waiting for queue to complete the work.
const auto out_h = out_buf.get_access<access::mode::read>();
const auto in_h = in_buf.get_access<access::mode::read>();
- double ed = omp_get_wtime();
- double elapsed = (ed_event.get_profiling_info<info::event_profiling::command_end>() -
- st_event.get_profiling_info<info::event_profiling::command_start>()) * 1e-9;
- std::cout << "elapsed: " << (ed - st) / ITER << std::endl;
- std::cout << "elapsed: " << elapsed << std::endl;
- std::cout << "flops: " << N * N * N * 53.0 * ITER / elapsed * 1e-9 << std::endl;
+ {
+ // double ed = omp_get_wtime();
+ double elapsed = (ed_event.get_profiling_info<info::event_profiling::command_end>() -
+ st_event.get_profiling_info<info::event_profiling::command_start>()) * 1e-9;
+ std::cout << "elapsed: " << elapsed / ITER << std::endl;
+ std::cout << "flops: " << N * N * N * 53.0 * ITER / elapsed * 1e-9 << std::endl;
+ }
+ {
+ // double ed = omp_get_wtime();
+ double elapsed = (ed_event.get_profiling_info<info::event_profiling::command_end>() -
+ ed_event.get_profiling_info<info::event_profiling::command_start>()) * 1e-9;
+ std::cout << "elapsed: " << elapsed << std::endl;
+ std::cout << "flops: " << N * N * N * 53.0 / elapsed * 1e-9 << std::endl;
+ }
}
void printInfo(device &Device) {
@@ -167,7 +175,6 @@ void printInfo(device &Device) {
std::cout << "Compute units: " << dot_num_groups << std::endl;
std::cout << "Workgroup size: " << dot_wgsize << std::endl;
- std::cout << "Maximum subgroup size: " << max_num_sg << std::endl;
}
int main() {
@@ -189,11 +196,13 @@ int main() {
// Host device is not for compute
if (Device.is_host())
continue;
+
+ const std::string DeviceName = Device.get_info<info::device::name>();
std::cout << "Device [" << DeviceNumber << "]:" << std::endl;
try {
printInfo(Device);
- if (Device.is_gpu()) {
+ if (Device.is_gpu() && (DeviceName.find("HD Graphics NEO") != std::string::npos)) {
run27pt(Device);
runSubgroups(Device);
} else {