# Perform imperfect loop interchange of LU decomposition 
# to get jki form and then block the k and i loops
#for k = 1 to n do
# for i = k+1 to n do
#  a(i,k) = a(i,k) / a(k,k)
#  for j = k+1 to n do
#   a(i,j) = a(i,j) - a(k,j)*a(i,k)
#  endfor
# endfor

symbolic n;

IS10 := {[k,i] : 1 <= k <= n && k+1 <= i <= n};
IS20 := {[k,i,j] : 1 <= k <= n && k+1 <= i <= n && k+1 <= j <= n};

T10 := {[k,i] -> [t1,t2,k,k,i]:
exists (alpha,beta: t1 = 64beta+1 && k-1 = alpha + 64 beta &&
                    alpha >= 0 && alpha <= 63)
&& exists (gamma,delta: t2 = 64delta && i = gamma +64delta &&
           gamma >= 0 && gamma <= 63)};

T20 := {[k,i,j] -> [t1,t2,j,k,i]:
exists (alpha,beta: t1 = 64beta+1 && k-1 = alpha + 64 beta &&
                    alpha >= 0 && alpha <= 63)
&& exists (gamma,delta: t2 = 64delta && i = gamma +64delta &&
           gamma >= 0 && gamma <= 63)};


# Generate code with different ammounts of overhead remove
# The more overhead we remove, the more code duplication may occur
codegen 0 T10:IS10,T20:IS20;
codegen 1 T10:IS10,T20:IS20;
codegen 2 T10:IS10,T20:IS20;
codegen 3 T10:IS10,T20:IS20;