# Perform imperfect loop interchange of LU decomposition # to get jki form and then block the k and i loops # #for k = 1 to n do # for i = k+1 to n do # a(i,k) = a(i,k) / a(k,k) # for j = k+1 to n do # a(i,j) = a(i,j) - a(k,j)*a(i,k) # endfor # endfor #endfor # # symbolic n; IS10 := {[k,i] : 1 <= k <= n && k+1 <= i <= n}; IS20 := {[k,i,j] : 1 <= k <= n && k+1 <= i <= n && k+1 <= j <= n}; T10 := {[k,i] -> [t1,t2,k,k,i]: exists (alpha,beta: t1 = 64beta+1 && k-1 = alpha + 64 beta && alpha >= 0 && alpha <= 63) && exists (gamma,delta: t2 = 64delta && i = gamma +64delta && gamma >= 0 && gamma <= 63)}; T20 := {[k,i,j] -> [t1,t2,j,k,i]: exists (alpha,beta: t1 = 64beta+1 && k-1 = alpha + 64 beta && alpha >= 0 && alpha <= 63) && exists (gamma,delta: t2 = 64delta && i = gamma +64delta && gamma >= 0 && gamma <= 63)}; T10; T20; # Generate code with different ammounts of overhead remove # The more overhead we remove, the more code duplication may occur codegen 0 T10:IS10,T20:IS20; codegen 1 T10:IS10,T20:IS20; codegen 2 T10:IS10,T20:IS20; codegen 3 T10:IS10,T20:IS20;