summaryrefslogtreecommitdiff
path: root/test-chill/test-cases/examples/cuda-chill/nbody.lua
diff options
context:
space:
mode:
authorDerick Huth <derickhuth@gmail.com>2014-10-06 12:42:34 -0600
committerDerick Huth <derickhuth@gmail.com>2014-10-06 12:42:34 -0600
commit8d73c8fcc75556c1df71dd39dd99783f8f86fc3e (patch)
tree157d627863d76a4c256a27cae27ce2e8566c7ea0 /test-chill/test-cases/examples/cuda-chill/nbody.lua
parente87b55ad69f0ac6211daae741b32c8ee9dcbe470 (diff)
parent8c646f24570079eac53e58fcf42d0d4fbc437ee3 (diff)
downloadchill-8d73c8fcc75556c1df71dd39dd99783f8f86fc3e.tar.gz
chill-8d73c8fcc75556c1df71dd39dd99783f8f86fc3e.tar.bz2
chill-8d73c8fcc75556c1df71dd39dd99783f8f86fc3e.zip
Merge pull request #2 from dhuth/master
Moved omega into chill.
Diffstat (limited to 'test-chill/test-cases/examples/cuda-chill/nbody.lua')
-rw-r--r--test-chill/test-cases/examples/cuda-chill/nbody.lua53
1 files changed, 53 insertions, 0 deletions
diff --git a/test-chill/test-cases/examples/cuda-chill/nbody.lua b/test-chill/test-cases/examples/cuda-chill/nbody.lua
new file mode 100644
index 0000000..08f88a9
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/nbody.lua
@@ -0,0 +1,53 @@
+--CUBLAS 2 MM Multiply
+
+--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you
+--call init() and use global variables to specify procedure and loop
+
+--Second parameter is procedure # and third is loop #
+init("nbody.c", "nbody_cpu" , 0)
+
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
+ --copy_to_shared methods
+NBODIES=16384
+
+
+--Tj=128 CHANGE FOR BEST..... BEST IS 64BLOCKS 128THREADS
+--Ti=256
+Tj=64
+Ti=32
+Tjjj=1
+Tiii=1
+Tn=0.1
+--normalize_index("j")
+--
+--print_code()
+--normalize_index("n")
+-- TILE COMMANDS ZEROOOOOOOOOOO:3
+--tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j"})--CU=-1
+tile_by_index({"i"},{Ti},{l1_control="ii"},{"ii","i","j"})--CU=-1
+--normalize_index("i")
+--tile_by_index({"n"},{Tn},{l1_control="nn"},{"jj","ii","nn","j","i","n"})--CU=-1
+
+--tile_by_index({"j","i"},{Tjjj,Tiii},{l1_control="jjj",l2_control="iii"},{"jj","ii","nn","jjj","j","iii","i","n"})--CU=3
+--tile_by_index({"j"}, {Tn}, {l1_control="j",l1_tile="jjj"}, {"ii", "jj", "nn","jjj","j","i","n"})
+--tile_by_index({"i"}, {Ti/2}, {l1_control="iii"}, {"ii","iii", "jj","i","j"})
+--print_code()
+cudaize("kernel_GPU",{oldpos=4*NBODIES,oldpos1=4*NBODIES,oldvel=4*NBODIES,force=4*NBODIES,newpos=4*NBODIES,newvel=4*NBODIES},{block={"ii"}, thread={"i"}})--CU=3
+print_code()
+--tile(0,6,6)
+--copy_to_shared("tx","oldpos",-16)
+--copy_to_registers("j","oldpos")
+--copy_to_registers("j","oldpos1")
+--copy_to_registers("j","force")
+
+--copy_to_texture("oldpos")
+--tile(1,3,3)
+--tile(2,3,3)
+
+print_code()
+--unroll_to_depth(1)
+--
+--tile(2,3,3)
+--unroll(2,3,0)
+--unroll(0,5,0)
+--print_code()