diff options
author | dhuth <derickhuth@gmail.com> | 2014-09-17 18:09:29 -0600 |
---|---|---|
committer | dhuth <derickhuth@gmail.com> | 2014-09-17 18:09:29 -0600 |
commit | 600fa18324c21a162c50c40ae5f00c899a41dd24 (patch) | |
tree | d399a8ea49c71a85abf5c07cb96b24676df32a0a /test-chill/test-cases/examples/cuda-chill/mpeg4.lua | |
parent | a2bd0557344bbd8d06e94814abd409f552b0efec (diff) | |
download | chill-600fa18324c21a162c50c40ae5f00c899a41dd24.tar.gz chill-600fa18324c21a162c50c40ae5f00c899a41dd24.tar.bz2 chill-600fa18324c21a162c50c40ae5f00c899a41dd24.zip |
removed submodule, added test-chill
Diffstat (limited to 'test-chill/test-cases/examples/cuda-chill/mpeg4.lua')
-rw-r--r-- | test-chill/test-cases/examples/cuda-chill/mpeg4.lua | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/test-chill/test-cases/examples/cuda-chill/mpeg4.lua b/test-chill/test-cases/examples/cuda-chill/mpeg4.lua new file mode 100644 index 0000000..f025dc0 --- /dev/null +++ b/test-chill/test-cases/examples/cuda-chill/mpeg4.lua @@ -0,0 +1,45 @@ +--CUBLAS 2 MM Multiply + +--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you +--call init() and use global variables to specify procedure and loop + +--Second parameter is procedure # and third is loop # +init("mpeg4.c", "mpeg4_cpu", 0) + +--dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,copy_to_shared methods +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,copy_to_shared methods + +N=4096 +M=4096 +W=16 + +--TI 4ust be <= M +--TJ must be <=TI +Ti=32 +Tj=32 +Tii=16 +Tjj=16 +Tk=4 +--permute(0,{"j","i","k","l"}) +tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j","k","l"}) +--tile_by_index({"k","l"},{Tk*2,Tk*2},{l1_control="kk",l2_control="ll"},{"ii","jj","kk","ll","i","j","k","l"}) +--print_code() +--tile_by_index({"k","l"},{Tk,Tk},{l1_control="kk",l2_control="ll"},{"ii","jj","i","j","kk","k","ll","l"}) +tile_by_index({"i","j"},{Tii,Tjj},{l1_control="iii",l2_control="jjj"},{"ii","jj","iii","i","jjj","j","k","l"}) +--print_code() +--normalize_index("j") +--normalize_index("i") +--print_code() +cudaize("kernel_GPU",{curr=W*W,prev=(N+W)*(M+W),result=N*M},{block={"ii","jj"}, thread={"i","j"}}) +--print_code() +copy_to_shared("iii","prev",16) + +copy_to_registers("jjj","result") + +--print_code() +--copy_to_constant_no_tile("curr") +unroll_to_depth(2) +print_code() +print_space() + + |