summaryrefslogtreecommitdiff
path: root/omega/examples/lu
diff options
context:
space:
mode:
authorDerick Huth <derickhuth@gmail.com>2014-10-06 12:42:34 -0600
committerDerick Huth <derickhuth@gmail.com>2014-10-06 12:42:34 -0600
commit8d73c8fcc75556c1df71dd39dd99783f8f86fc3e (patch)
tree157d627863d76a4c256a27cae27ce2e8566c7ea0 /omega/examples/lu
parente87b55ad69f0ac6211daae741b32c8ee9dcbe470 (diff)
parent8c646f24570079eac53e58fcf42d0d4fbc437ee3 (diff)
downloadchill-8d73c8fcc75556c1df71dd39dd99783f8f86fc3e.tar.gz
chill-8d73c8fcc75556c1df71dd39dd99783f8f86fc3e.tar.bz2
chill-8d73c8fcc75556c1df71dd39dd99783f8f86fc3e.zip
Merge pull request #2 from dhuth/master
Moved omega into chill.
Diffstat (limited to 'omega/examples/lu')
-rw-r--r--omega/examples/lu41
1 files changed, 41 insertions, 0 deletions
diff --git a/omega/examples/lu b/omega/examples/lu
new file mode 100644
index 0000000..800d8a0
--- /dev/null
+++ b/omega/examples/lu
@@ -0,0 +1,41 @@
+# Perform imperfect loop interchange of LU decomposition
+# to get jki form and then block the k and i loops
+#
+#for k = 1 to n do
+# for i = k+1 to n do
+# a(i,k) = a(i,k) / a(k,k)
+# for j = k+1 to n do
+# a(i,j) = a(i,j) - a(k,j)*a(i,k)
+# endfor
+# endfor
+#endfor
+#
+#
+
+symbolic n;
+
+IS10 := {[k,i] : 1 <= k <= n && k+1 <= i <= n};
+IS20 := {[k,i,j] : 1 <= k <= n && k+1 <= i <= n && k+1 <= j <= n};
+
+T10 := {[k,i] -> [t1,t2,k,k,i]:
+exists (alpha,beta: t1 = 64beta+1 && k-1 = alpha + 64 beta &&
+ alpha >= 0 && alpha <= 63)
+&& exists (gamma,delta: t2 = 64delta && i = gamma +64delta &&
+ gamma >= 0 && gamma <= 63)};
+
+T20 := {[k,i,j] -> [t1,t2,j,k,i]:
+exists (alpha,beta: t1 = 64beta+1 && k-1 = alpha + 64 beta &&
+ alpha >= 0 && alpha <= 63)
+&& exists (gamma,delta: t2 = 64delta && i = gamma +64delta &&
+ gamma >= 0 && gamma <= 63)};
+
+T10;
+T20;
+
+# Generate code with different ammounts of overhead remove
+# The more overhead we remove, the more code duplication may occur
+codegen 0 T10:IS10,T20:IS20;
+codegen 1 T10:IS10,T20:IS20;
+codegen 2 T10:IS10,T20:IS20;
+codegen 3 T10:IS10,T20:IS20;
+