blob: db4d9ad12fbc7f1767a6497223a83ce478e15935 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
init("mv_try.c","normalMV",0)
dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
--copy_to_shared methods
TI=96
N=4096
tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"})
cudaize("mv_GPU", {a=N, b=N, c=N*N},
{block={"ii"}, thread={"i"}})
print_code()
|