diff options
author | dhuth <derickhuth@gmail.com> | 2014-11-21 13:35:20 -0700 |
---|---|---|
committer | dhuth <derickhuth@gmail.com> | 2014-11-21 13:35:20 -0700 |
commit | a1834b22c43c282442b0cb164767e6c877cf0e5b (patch) | |
tree | bedc5be7d1bdb8d32c1868caa496a8a1530d8d8a /test-chill/test-cases/examples/cuda-chill/mriq-fh.lua | |
parent | ded84bb4aec7461738e7b7033d782a518e2c606b (diff) | |
parent | eb9236c5353785472ae132f27e1cfb9f1e4264a5 (diff) | |
download | chill-a1834b22c43c282442b0cb164767e6c877cf0e5b.tar.gz chill-a1834b22c43c282442b0cb164767e6c877cf0e5b.tar.bz2 chill-a1834b22c43c282442b0cb164767e6c877cf0e5b.zip |
Merge branch 'master' into doe
Diffstat (limited to 'test-chill/test-cases/examples/cuda-chill/mriq-fh.lua')
-rwxr-xr-x | test-chill/test-cases/examples/cuda-chill/mriq-fh.lua | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/test-chill/test-cases/examples/cuda-chill/mriq-fh.lua b/test-chill/test-cases/examples/cuda-chill/mriq-fh.lua new file mode 100755 index 0000000..3277bac --- /dev/null +++ b/test-chill/test-cases/examples/cuda-chill/mriq-fh.lua @@ -0,0 +1,73 @@ +--CUBLAS 2 MM Multiply + +--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you +--call init() and use global variables to specify procedure and loop + +--Second parameter is procedure # and third is loop # +init("mriq-fh.c", "mriFH_cpu", 0) + +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers, + --copy_to_shared methods +N=32768 +M=256 +Tx=256 + + +print_code() +--permute(0,{"j","i"}) +--tile_by_index({"j","i"}, {TI,TJ}, {l1_control="jj", l2_control="ii"}, {"jj","ii", "j", "i"}) +tile_by_index({"x"},{Tx},{l1_control="xx"},{"xx","x","k"}) +--tile_by_index({"x"},{16},{l1_control="xx1"},{"xx","x","xx1","k"}) +--tile_by_index({"j"}, {TI}, {l1_control="jj"}, {"ii","jj", "j", "i"}) +--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"}) +print_code() + +normalize_index("x") +--normalize_index("i") +print_code() +--tile_by_index({"i"}, {TI}, {l1_control="iii",l1_tile="i"}, {"ii","jj", "iii","j","i"}) +--print_code() +--cudaize("Kernel_GPU", {x=N,y=N,z=N,Qr=N,Qi=N,kVals=M},{block={"jj"}, thread={"j"}}) +cudaize("kernel_GPU",{dx=N,dy=N,dz=N,iRho=M,kx=M,ky=M,kz=M,rFHref=N,iFHref=N,rRho=M},{block={"xx"}, thread={"x"}}) +--copy_to_shared("tx","iRho",-16) +--copy_to_shared("tx","dz",1) +--copy_to_shared("tx","rRho",-16) +--copy_to_registers("tx","rFHref") +--copy_to_registers("tx","rRho") +--copy_to_registers("tx","iRho") +--copy_to_registers("tx","kx") +--copy_to_registers("tx","dx") +--copy_to_registers("tx","ky") +--copy_to_registers("tx","dy") +--copy_to_registers("tx","kz") +--copy_to_registers("tx","dz") +--copy_to_registers("tx","iFHref") +--copy_to_texture("rRho") +--copy_to_texture("kx") +--copy_to_texture("dx") +--copy_to_texture("ky") +--copy_to_texture("dy") +--copy_to_texture("kz") +--copy_to_texture("dz") +--copy_to_texture("iRho") +--print_code()--]] +--unroll(0,4,0) +--copy_to_constant_no_tile("kx") +--copy_to_constant_no_tile("ky") +--copy_to_constant_no_tile("kz") +--copy_to_constant_no_tile("rRho") +--copy_to_constant_no_tile("iRho") + +--unroll_to_depth(1) +print_code() +--[[ +copy_to_Texture("rRho") +copy_to_Texture("kx") +copy_to_Texture("dx") +copy_to_Texture("ky") +copy_to_Texture("dy") +copy_to_Texture("kz") +copy_to_Texture("dz") +copy_to_Texture("iRho") +--unroll_to_depth(2) +--]] |