pre-v0.2.1

author: Derick Huth <derickhuth@gmail.com> 2015-09-24 11:26:53 -0600
committer: Derick Huth <derickhuth@gmail.com> 2015-09-24 11:26:53 -0600
commit: c285135eb903c31cd221f90f03e288a6b67770cd (patch)
tree: 1f6ea3120a09feef7236dac579d5a2d5b774aaa7 /examples/cuda-chill/mv.lua
parent: f5c39e4c6ff55520948c2ef331c968cd84b817d9 (diff)
download: chill-c285135eb903c31cd221f90f03e288a6b67770cd.tar.gz
chill-c285135eb903c31cd221f90f03e288a6b67770cd.tar.bz2
chill-c285135eb903c31cd221f90f03e288a6b67770cd.zip
1 files changed, 0 insertions, 65 deletions
diff --git a/examples/cuda-chill/mv.lua b/examples/cuda-chill/mv.lua
deleted file mode 100644
index ca54501..0000000
--- a/examples/cuda-chill/mv.lua
+++ /dev/null
@@ -1,65 +0,0 @@
-init("mv.c","normalMV",0)
-dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
-                      --copy_to_shared methods
-
-N=129
-TI=32
-TJ=64
-
-N=1024
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
---Tile the i and j loop, introducing "ii" as the control loop for the "i"
---tile, "k" for the control loop fo the "j" tile, with the final order
---of {"ii", "k", "i", "j"}
-tile_by_index({"i","j"}, {TI,TJ}, {l1_control="ii", l2_control="k"}, {"ii", "k", "i", "j"})
---tile_by_index({"i"}, {TI}, {l1_control="iii"}, {"ii", "k", "iii","i", "j"})
---tile_by_index({"j"}, {TI}, {l2_control="k"}, { "k", "i", "j"})
---tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"})
---print_code()
---Normalize indx will do a tile size of one over the loop level specified
---by the input index. This is useful to get a zero lower bound and hard
---upper bound on a loop instead of it being relative to previous loop
---levels.
---normalize_index("ii")
-normalize_index("i")
-print_code()
-
---Cudaize now determines the grid dimentions from the loops themselves
---(the upper bounds of the block and thread loops). It also renames the
---given block and thread loops's indexes to the approviate values from
---the set {"bx","by","tx","ty","tz"}. The second parameter specifies the
---size of the arrays to be copied in the CUDA scaffolding.
-cudaize("mv_GPU", {a=N, b=N, c=N*N}, {block={"ii"}, thread={"i"}})
-
---print_code()
-
---Does a datacopy, tile, and add_sync to get a shared memory copy
-
---copy_to_shared("tx", "b", 1)
---copy_to_shared("tx", "c", -16)
---print_code()
---copy_to_texture("b")
---copy_to_texture("c")
-copy_to_registers("k", "a")
---print_code()
-
-unroll_to_depth(1) --won't unroll past thread/loop mapping, unrolls up to two loop levels
---copy_to_texture("b")
---print_code()
---unroll(0,5,0)
---print_code()
author	Derick Huth <derickhuth@gmail.com>	2015-09-24 11:26:53 -0600
committer	Derick Huth <derickhuth@gmail.com>	2015-09-24 11:26:53 -0600
commit	c285135eb903c31cd221f90f03e288a6b67770cd (patch)
tree	1f6ea3120a09feef7236dac579d5a2d5b774aaa7 /examples/cuda-chill/mv.lua
parent	f5c39e4c6ff55520948c2ef331c968cd84b817d9 (diff)
download	chill-c285135eb903c31cd221f90f03e288a6b67770cd.tar.gz chill-c285135eb903c31cd221f90f03e288a6b67770cd.tar.bz2 chill-c285135eb903c31cd221f90f03e288a6b67770cd.zip