summaryrefslogtreecommitdiff
path: root/examples/cuda-chill/tmv-shadow.lua
diff options
context:
space:
mode:
authorDerick Huth <derickhuth@gmail.com>2015-09-24 12:22:41 -0600
committerDerick Huth <derickhuth@gmail.com>2015-09-24 12:22:41 -0600
commit4631ad76927d433da5d55c3c373a1dfd0f74c9d4 (patch)
treef8dcba88576ec95e403f0c14efd80e970f30a260 /examples/cuda-chill/tmv-shadow.lua
parent6eb2b89896da66a77d0dcdf2d72b98c122826949 (diff)
parent0cff3f9a3c4ccd434900162ebef4bd814850f481 (diff)
downloadchill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.tar.gz
chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.tar.bz2
chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.zip
Merge pull request #7 from dhuth/master
V0.2.1
Diffstat (limited to 'examples/cuda-chill/tmv-shadow.lua')
-rw-r--r--examples/cuda-chill/tmv-shadow.lua50
1 files changed, 0 insertions, 50 deletions
diff --git a/examples/cuda-chill/tmv-shadow.lua b/examples/cuda-chill/tmv-shadow.lua
deleted file mode 100644
index 196b939..0000000
--- a/examples/cuda-chill/tmv-shadow.lua
+++ /dev/null
@@ -1,50 +0,0 @@
-init("tmv-shadow.c","normalMV",0)
-dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
- --copy_to_shared methods
-
-N=1024
---N= 8209
---N=129
-TI=64
-N=1024
-TI=32
---tile, "k" for the control loop for the "j" tile, with the final order
---of {"ii", "k", "i", "j"}
-tile_by_index({"i","j"}, {TI,TI}, {l1_control="ii", l2_control="k"}, {"ii", "k", "i", "j"})
---tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"})
---print_code()
---tile_by_index({"i"}, {TI/32}, {l1_control="iii"}, {"ii", "k", "iii","i", "j"})
-
---print_code()
---Normalize indx will do a tile size of one over the loop level specified
---by the input index. This is useful to get a zero lower bound and hard
---upper bound on a loop instead of it being relative to previous loop
---levels.
---normalize_index("i")
---print_code()
-
---Cudaize now determines the grid dimentions from the loops themselves
---(the upper bounds of the block and thread loops). It also renames the
---given block and thread loops's indexes to the approviate values from
---the set {"bx","by","tx","ty","tz"}. The second parameter specifies the
---size of the arrays to be copied in the CUDA scaffolding.
-cudaize("tmv_GPU", {a=N, b=N, c=N*N},{block={"ii"}, thread={"i"}})
-
---print_code()
-
---Does a datacopy, tile, and add_sync to get a shared memory copy
-copy_to_shared("tx", "b", 1)
---copy_to_texture("b")
---print_code()
-
-copy_to_shared("tx", "c", -16)
---copy_to_texture("c")
---print_code()
-
-copy_to_registers("k", "a")
-print_code()
---unroll(0,5,0)
---unroll(0,4,0)
---unroll(2,4,16)
-unroll_to_depth(1)
---print_code()