summaryrefslogtreecommitdiff
path: root/test-chill/test-cases/examples/cuda-chill/cp.lua
diff options
context:
space:
mode:
authordhuth <derickhuth@gmail.com>2014-11-21 13:35:20 -0700
committerdhuth <derickhuth@gmail.com>2014-11-21 13:35:20 -0700
commita1834b22c43c282442b0cb164767e6c877cf0e5b (patch)
treebedc5be7d1bdb8d32c1868caa496a8a1530d8d8a /test-chill/test-cases/examples/cuda-chill/cp.lua
parentded84bb4aec7461738e7b7033d782a518e2c606b (diff)
parenteb9236c5353785472ae132f27e1cfb9f1e4264a5 (diff)
downloadchill-a1834b22c43c282442b0cb164767e6c877cf0e5b.tar.gz
chill-a1834b22c43c282442b0cb164767e6c877cf0e5b.tar.bz2
chill-a1834b22c43c282442b0cb164767e6c877cf0e5b.zip
Merge branch 'master' into doe
Diffstat (limited to 'test-chill/test-cases/examples/cuda-chill/cp.lua')
-rw-r--r--test-chill/test-cases/examples/cuda-chill/cp.lua46
1 files changed, 46 insertions, 0 deletions
diff --git a/test-chill/test-cases/examples/cuda-chill/cp.lua b/test-chill/test-cases/examples/cuda-chill/cp.lua
new file mode 100644
index 0000000..1ef2264
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/cp.lua
@@ -0,0 +1,46 @@
+--CUBLAS 2 MM Multiply
+
+--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you
+--call init() and use global variables to specify procedure and loop
+
+--Second parameter is procedure # and third is loop #
+init("cp.c", "cenergy_cpu", 0)
+
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
+ --copy_to_shared methods
+V=512
+N=4000
+N=1
+
+Tj=32
+Ti=16
+Tii=16
+Tjj=16
+
+--normalize_index("j")
+--normalize_index("i")
+print_code()
+normalize_index("n")
+-- TILE COMMANDS ZEROOOOOOOOOOO:3
+--permute(0,{"i","j","n"})
+--tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j","n"})--CU=-1
+tile_by_index({"j","i"},{Tj,Ti},{l1_control="jj",l2_control="ii"},{"jj","ii","j","i","n"})--CU=-1
+--tile_by_index({"n"},{Tn},{l1_control="nn"},{"jj","ii","nn","j","i","n"})--CU=-1
+
+--tile_by_index({"j","i"},{Tjjj,Tiii},{l1_control="jjj",l2_control="iii"},{"jj","ii","nn","jjj","j","iii","i","n"})--CU=3
+--tile_by_index({"i","j"},{Tii,Tjj},{l1_control="iii",l2_control="jjj"},{"ii","jj","i","iii","j","jjj","n"})--CU=3
+--tile_by_index({"j"}, {Tn}, {l1_control="j",l1_tile="jjj"}, {"ii", "jj", "nn","jjj","j","i","n"})
+--tile_by_index({"i"}, {Tii}, {l1_control="iii",l1_tile="i"}, {"ii", "jj", "iii","i","j","n"})
+print_code()
+cudaize("kernel_GPU",{atoms=N*4,energy=V*V*1},{block={"jj","ii"}, thread={"j","i"}})--CU=3
+--cudaize("kernel_GPU",{atoms=N*4,energy=V*V*1},{block={"ii","jj"}, thread={"i","j"}})--CU=3
+print_code()
+copy_to_shared("tx","atoms",-16)
+copy_to_registers("tx","energy")
+--copy_to_texture("atoms")
+--unroll_to_depth(1)
+--unroll(0,9,0)
+--unroll(0,5,0)
+
+--unroll(0,8,256)
+print_code()