summaryrefslogtreecommitdiff
path: root/test-chill/test-cases/examples/cuda-chill/mm.lua
blob: 5bde1b00b6dc08ce4da56c8a82b86b38363dcb53 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
init("mm.c", "normalMM", 0)
dofile("cudaize.lua")
N=1024
Ti=128
Tj=64
Tk=16
Tii=16
Tjj=16




N=1024













tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j","k"})CU=1

tile_by_index({"k"},{Tk},{l1_control="kk"},{"ii","jj","kk","i","j","k"})CU=3

tile_by_index({"i","j"},{Tii,Tjj},{l1_control="iii",l2_control="jjj"},{"ii","jj","kk","i","iii","j","jjj","k"},1)CU=2

cudaize("mm_GPU",{a=1048576,b=1048576,c=1048576},{block={"ii","jj"}, thread={"i","j"}})CU=2
copy_to_shared("tx","a",-16)
copy_to_shared("tx","b",-16)
copy_to_registers("kk","c")
--print_code()
unroll_to_depth(2)