summaryrefslogtreecommitdiff
path: root/omegalib/examples/lu.out
diff options
context:
space:
mode:
Diffstat (limited to 'omegalib/examples/lu.out')
-rw-r--r--omegalib/examples/lu.out117
1 files changed, 0 insertions, 117 deletions
diff --git a/omegalib/examples/lu.out b/omegalib/examples/lu.out
deleted file mode 100644
index eecaa51..0000000
--- a/omegalib/examples/lu.out
+++ /dev/null
@@ -1,117 +0,0 @@
->>> # Perform imperfect loop interchange of LU decomposition
->>> # to get jki form and then block the k and i loops
->>> #
->>> #for k = 1 to n do
->>> # for i = k+1 to n do
->>> # a(i,k) = a(i,k) / a(k,k)
->>> # for j = k+1 to n do
->>> # a(i,j) = a(i,j) - a(k,j)*a(i,k)
->>> # endfor
->>> # endfor
->>> #endfor
->>> #
->>> #
->>>
->>> symbolic n;
->>>
->>> IS10 := {[k,i] : 1 <= k <= n && k+1 <= i <= n};
->>> IS20 := {[k,i,j] : 1 <= k <= n && k+1 <= i <= n && k+1 <= j <= n};
->>>
->>> T10 := {[k,i] -> [t1,t2,k,k,i]:
->>> exists (alpha,beta: t1 = 64beta+1 && k-1 = alpha + 64 beta &&
->>> alpha >= 0 && alpha <= 63)
->>> && exists (gamma,delta: t2 = 64delta && i = gamma +64delta &&
->>> gamma >= 0 && gamma <= 63)};
->>>
->>> T20 := {[k,i,j] -> [t1,t2,j,k,i]:
->>> exists (alpha,beta: t1 = 64beta+1 && k-1 = alpha + 64 beta &&
->>> alpha >= 0 && alpha <= 63)
->>> && exists (gamma,delta: t2 = 64delta && i = gamma +64delta &&
->>> gamma >= 0 && gamma <= 63)};
->>>
->>> T10;
-{[k,i] -> [t1,t2,k,k,i] : exists ( alpha,beta : t1 = 1+64alpha && t2 = 64beta && k-63 <= t1 <= k && i-63 <= t2 <= i)}
->>> T20;
-{[k,i,j] -> [t1,t2,j,k,i] : exists ( alpha,beta : t1 = 1+64alpha && t2 = 64beta && k-63 <= t1 <= k && i-63 <= t2 <= i)}
->>>
->>> # Generate code with different ammounts of overhead remove
->>> # The more overhead we remove, the more code duplication may occur
->>> codegen 0 T10:IS10,T20:IS20;
-for(t1 = 1; t1 <= n-1; t1 += 64) {
- for(t2 = t1-1; t2 <= n; t2 += 64) {
- for(t3 = t1; t3 <= n; t3++) {
- for(t4 = t1; t4 <= min(t3-1,t2+62,t1+63); t4++) {
- for(t5 = max(t2,t4+1); t5 <= min(n,t2+63); t5++) {
- s1(t4,t5,t3);
- }
- }
- if (t1 >= t3-63) {
- for(t5 = max(t3+1,t2); t5 <= min(n,t2+63); t5++) {
- s0(t3,t5);
- }
- }
- }
- }
-}
-
->>> codegen 1 T10:IS10,T20:IS20;
-for(t1 = 1; t1 <= n-1; t1 += 64) {
- for(t2 = t1-1; t2 <= n; t2 += 64) {
- for(t3 = t1; t3 <= n; t3++) {
- for(t4 = t1; t4 <= min(t3-1,t2+62,t1+63); t4++) {
- for(t5 = max(t2,t4+1); t5 <= min(n,t2+63); t5++) {
- s1(t4,t5,t3);
- }
- }
- if (t1 >= t3-63) {
- for(t5 = max(t3+1,t2); t5 <= min(n,t2+63); t5++) {
- s0(t3,t5);
- }
- }
- }
- }
-}
-
->>> codegen 2 T10:IS10,T20:IS20;
-for(t1 = 1; t1 <= n-1; t1 += 64) {
- for(t2 = t1-1; t2 <= n; t2 += 64) {
- for(t3 = t1; t3 <= n; t3++) {
- for(t4 = t1; t4 <= min(t3-1,t2+62,t1+63); t4++) {
- for(t5 = max(t2,t4+1); t5 <= min(n,t2+63); t5++) {
- s1(t4,t5,t3);
- }
- }
- if (t1 >= t3-63) {
- for(t5 = max(t3+1,t2); t5 <= min(n,t2+63); t5++) {
- s0(t3,t5);
- }
- }
- }
- }
-}
-
->>> codegen 3 T10:IS10,T20:IS20;
-for(t1 = 1; t1 <= n-1; t1 += 64) {
- for(t2 = t1-1; t2 <= n; t2 += 64) {
- for(t3 = t1; t3 <= min(t1+63,n); t3++) {
- for(t4 = t1; t4 <= min(t2+62,t3-1); t4++) {
- for(t5 = max(t2,t4+1); t5 <= min(t2+63,n); t5++) {
- s1(t4,t5,t3);
- }
- }
- for(t5 = max(t2,t3+1); t5 <= min(n,t2+63); t5++) {
- s0(t3,t5);
- }
- }
- for(t3 = t1+64; t3 <= n; t3++) {
- for(t4 = t1; t4 <= min(t2+62,t1+63); t4++) {
- for(t5 = max(t2,t4+1); t5 <= min(t2+63,n); t5++) {
- s1(t4,t5,t3);
- }
- }
- }
- }
-}
-
->>>
->>>