diff options
author | Tuowen Zhao <ztuowen@gmail.com> | 2019-12-04 14:50:16 -0700 |
---|---|---|
committer | Tuowen Zhao <ztuowen@gmail.com> | 2019-12-04 14:50:16 -0700 |
commit | c0b34b3f764e12d296f6126eba3b930559bbcd42 (patch) | |
tree | d2d5a4a39b58bb6f654394f9db465584cd288de8 | |
parent | 79ba7c05e35b18aa2dc24da57399cfbb94a07d0e (diff) | |
download | atsmmap-c0b34b3f764e12d296f6126eba3b930559bbcd42.tar.gz atsmmap-c0b34b3f764e12d296f6126eba3b930559bbcd42.tar.bz2 atsmmap-c0b34b3f764e12d296f6126eba3b930559bbcd42.zip |
CPU bench
-rw-r--r-- | CMakeLists.txt | 4 | ||||
-rw-r--r-- | README.md | 10 | ||||
-rw-r--r-- | main.cpp | 56 |
3 files changed, 60 insertions, 10 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 58e7603..b24a5e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,4 +3,8 @@ project(atsmmap) set(CMAKE_CXX_STANDARD 14) +find_package(OpenMP REQUIRED) +set(CMAKE_C_FLAGS ${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS} -march=native") + add_executable(atsmmap main.cpp)
\ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0a59bc0 --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +# Profiling performance relations between mmap and unified memory with host allocator + +## Unified memory with host allocator + +Can be realized using Address translation service for Power9 (ATS) or Heterogeneous memory management (HMM). + +## Problem description + +Every mmap region could add "normally" imperceptible latency to address resolution. This problem is less visible on + CPU due to xxx. However on GPU, this could contribute to visible latency.
\ No newline at end of file @@ -2,9 +2,10 @@ #include <unistd.h> #include <sys/mman.h> #include <string> +#include <omp.h> -#define GIG (1024*1024*1024ul) -#define VOLUME (2ul*GIG) +#define GIG (1024ul*1024*1024) +#define VOLUME (1024ul*1024*1024) // Real values is in the ~65536, only use half of it to be safe // sysctl vm.max_map_count @@ -13,16 +14,51 @@ int main(int argc, char **argv) { auto page_size = sysconf(_SC_PAGESIZE); std::cout << "The system have a page size of " << page_size << std::endl; - auto dst = (long*)malloc(VOLUME); // Src is using anonymous mapping - long nmaps = std::stoi(argv[1]); - if (GIG % (page_size * nmaps) != 0) { - std::cout << "nmaps is not perfect multiple" << std::endl; - nmaps = GIG / (page_size * nmaps); + long nmaps = 1024; + if (argc > 1) { + nmaps = std::stoi(argv[1]); + if (VOLUME / page_size % nmaps != 0) { + std::cout << "nmaps is not perfect multiple, quit" << std::endl; + return 0; + } } - std::cout << argv[1] << std::endl; + + long mmap_sz = VOLUME / nmaps; + std::cout << "Each mapped region is of size(pages) " << mmap_sz/page_size << std::endl; + + auto dst = (long*)malloc(VOLUME); + uint8_t *hint = (uint8_t*)0x600000000000UL; - mmap(hint); - auto src = + hint -= VOLUME; + auto src = (long*)hint; + for (long i = 0; i < nmaps; ++i) { + auto r = mmap(hint, mmap_sz, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + if (r == MAP_FAILED || r != hint) + printf("MMAP failed somehow\n"); + hint += mmap_sz; + } + +#pragma omp parallel for + for (long i = 0; i < VOLUME/sizeof(long); ++i) { + src[i] = i; + dst[i] = 0; + } + + int ITER = 100; + double st = omp_get_wtime(); + + for (int t = 0; t < ITER; ++t) { +#pragma omp parallel for + for (long i = 0; i < VOLUME/sizeof(long); ++i) + dst[i] = src[i]; + } + + st = (omp_get_wtime() - st) / ITER; + + printf("Average time(s) %f\n", st); +// std::cout << "Average time(s) " << st << std::endl; + printf("Average throughput(GB/s) %f\n", VOLUME * 2 / st * 1e-9); +// std::cout << "Average throughput(GB/s) " << VOLUME * 2 / st * 1e-9 << std::endl; return 0; } |