summaryrefslogtreecommitdiff
path: root/test-chill/test-cases/examples/cuda-chill/mriq.lua
diff options
context:
space:
mode:
authorTuowen Zhao <ztuowen@gmail.com>2016-09-18 21:39:45 +0000
committerTuowen Zhao <ztuowen@gmail.com>2016-09-18 21:39:45 +0000
commitf255f2498da1fd985ad1ed79362580bbf4675723 (patch)
tree3cd02e9d3147054820c58aacd1e5d3d336eea0d7 /test-chill/test-cases/examples/cuda-chill/mriq.lua
parent7233faacb7990a6c3a40d2435ede88d7725dfc6e (diff)
downloadchill-f255f2498da1fd985ad1ed79362580bbf4675723.tar.gz
chill-f255f2498da1fd985ad1ed79362580bbf4675723.tar.bz2
chill-f255f2498da1fd985ad1ed79362580bbf4675723.zip
rm test-chill, create doc subdir
Diffstat (limited to 'test-chill/test-cases/examples/cuda-chill/mriq.lua')
-rw-r--r--test-chill/test-cases/examples/cuda-chill/mriq.lua55
1 files changed, 0 insertions, 55 deletions
diff --git a/test-chill/test-cases/examples/cuda-chill/mriq.lua b/test-chill/test-cases/examples/cuda-chill/mriq.lua
deleted file mode 100644
index 1170111..0000000
--- a/test-chill/test-cases/examples/cuda-chill/mriq.lua
+++ /dev/null
@@ -1,55 +0,0 @@
---CUBLAS 2 MM Multiply
-
---This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you
---call init() and use global variables to specify procedure and loop
-
---Second parameter is procedure # and third is loop #
-init("mriq.c", "ComputeQCPU", 0)
-
-dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
- --copy_to_shared methods
-N=32768
-M=3072
-TI=128
-TJ=128
-
-permute(0,{"j","i"})
---tile_by_index({"j","i"}, {TI,TJ}, {l1_control="jj", l2_control="ii"}, {"jj","ii", "j", "i"})
-tile_by_index({"i"}, {TJ}, {l1_control="ii",l1_tile="i"}, {"ii", "j","i"})
-tile_by_index({"j"}, {TI}, {l1_control="jj"}, {"ii","jj", "j", "i"})
---tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"})
---print_code()
-
-normalize_index("j")
-normalize_index("i")
---print_code()
---tile_by_index({"i"}, {TI}, {l1_control="iii",l1_tile="i"}, {"ii","jj", "iii","j","i"})
---print_code()
-cudaize("Kernel_GPU", {x=N,y=N,z=N,Qr=N,Qi=N,kVals=M},{block={"jj"}, thread={"j"}})
-
-copy_to_shared("tx","kVals",1)
---copy_to_shared("tx","x",1)
---copy_to_shared("tx","y",1)
---copy_to_shared("tx","z",1)
-
---copy_to_texture("kVals")
---datacopy(0, 3, "kVals", {"tt","t"},false,0,1,-16,true)
---print_code()
---datacopy_privatized(0,"tx","kVals",{"tx"})
---copy_to_registers("tx","kVals")
-copy_to_registers("ii","x")
-copy_to_registers("ii","y")
-copy_to_registers("ii","z")
-copy_to_registers("ii","Qi")
-copy_to_registers("ii","Qr")
---[[datacopy_privatized(0,"tx","x",{"tx"})
-datacopy_privatized(0,"tx","y",{"tx"})
-datacopy_privatized(0,"tx","z",{"tx"})
-datacopy_privatized(0,"tx","Qi",{"tx"})
-datacopy_privatized(0,"tx","Qr",{"tx"})
-
-
-]]--
---unroll(0,5,64)
-print_code()
---unroll_to_depth(1) --won't unroll past thread/loop mapping, unrolls up to two loop levels