diff options
author | Derick Huth <derickhuth@gmail.com> | 2015-09-24 12:22:41 -0600 |
---|---|---|
committer | Derick Huth <derickhuth@gmail.com> | 2015-09-24 12:22:41 -0600 |
commit | 4631ad76927d433da5d55c3c373a1dfd0f74c9d4 (patch) | |
tree | f8dcba88576ec95e403f0c14efd80e970f30a260 /test-chill/test-cases/examples/cuda-chill/mriq.lua | |
parent | 6eb2b89896da66a77d0dcdf2d72b98c122826949 (diff) | |
parent | 0cff3f9a3c4ccd434900162ebef4bd814850f481 (diff) | |
download | chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.tar.gz chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.tar.bz2 chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.zip |
Merge pull request #7 from dhuth/master
V0.2.1
Diffstat (limited to 'test-chill/test-cases/examples/cuda-chill/mriq.lua')
-rw-r--r-- | test-chill/test-cases/examples/cuda-chill/mriq.lua | 55 |
1 files changed, 0 insertions, 55 deletions
diff --git a/test-chill/test-cases/examples/cuda-chill/mriq.lua b/test-chill/test-cases/examples/cuda-chill/mriq.lua deleted file mode 100644 index 1170111..0000000 --- a/test-chill/test-cases/examples/cuda-chill/mriq.lua +++ /dev/null @@ -1,55 +0,0 @@ ---CUBLAS 2 MM Multiply - ---This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you ---call init() and use global variables to specify procedure and loop - ---Second parameter is procedure # and third is loop # -init("mriq.c", "ComputeQCPU", 0) - -dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers, - --copy_to_shared methods -N=32768 -M=3072 -TI=128 -TJ=128 - -permute(0,{"j","i"}) ---tile_by_index({"j","i"}, {TI,TJ}, {l1_control="jj", l2_control="ii"}, {"jj","ii", "j", "i"}) -tile_by_index({"i"}, {TJ}, {l1_control="ii",l1_tile="i"}, {"ii", "j","i"}) -tile_by_index({"j"}, {TI}, {l1_control="jj"}, {"ii","jj", "j", "i"}) ---tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"}) ---print_code() - -normalize_index("j") -normalize_index("i") ---print_code() ---tile_by_index({"i"}, {TI}, {l1_control="iii",l1_tile="i"}, {"ii","jj", "iii","j","i"}) ---print_code() -cudaize("Kernel_GPU", {x=N,y=N,z=N,Qr=N,Qi=N,kVals=M},{block={"jj"}, thread={"j"}}) - -copy_to_shared("tx","kVals",1) ---copy_to_shared("tx","x",1) ---copy_to_shared("tx","y",1) ---copy_to_shared("tx","z",1) - ---copy_to_texture("kVals") ---datacopy(0, 3, "kVals", {"tt","t"},false,0,1,-16,true) ---print_code() ---datacopy_privatized(0,"tx","kVals",{"tx"}) ---copy_to_registers("tx","kVals") -copy_to_registers("ii","x") -copy_to_registers("ii","y") -copy_to_registers("ii","z") -copy_to_registers("ii","Qi") -copy_to_registers("ii","Qr") ---[[datacopy_privatized(0,"tx","x",{"tx"}) -datacopy_privatized(0,"tx","y",{"tx"}) -datacopy_privatized(0,"tx","z",{"tx"}) -datacopy_privatized(0,"tx","Qi",{"tx"}) -datacopy_privatized(0,"tx","Qr",{"tx"}) - - -]]-- ---unroll(0,5,64) -print_code() ---unroll_to_depth(1) --won't unroll past thread/loop mapping, unrolls up to two loop levels |