1 files changed, 289 insertions, 0 deletions
diff --git a/omegalib/examples/old_test/ts1d-mp-i_ts-m_b b/omegalib/examples/old_test/ts1d-mp-i_ts-m_b
new file mode 100644
index 0000000..f288263
--- /dev/null
+++ b/omegalib/examples/old_test/ts1d-mp-i_ts-m_b
@@ -0,0 +1,289 @@
+# This is the file facts.prew, which is prepended to the .prew files
+# for the particular code generation we want, defines things like the
+# iteration space and dependences.  Known facts are inserted by the
+# Makefile.
+#
+# If you're looking at a .w file instead of facts.prew, then you should
+# remember to edit the original .prew files, not the .w files.
+#
+# This facts.prew file describes the program
+#
+# for(i = 0; i <= N-1; i++) {
+#  cur[i]=...
+# }
+# for(t = 0; t < T; t++) {
+#   for(i = 0; i <= N-1; i++) {
+#     old[i]=cur[i];
+#   }
+#   for(i = 1; i <= N-2; i++) {
+#     cur[i] = (old[i-1]+old[i]+old[i]+old[i+1])*0.25;
+#   }
+# }
+
+
+
+# first, the spaces and memory maps
+
+symbolic T, N;
+
+IS_INIT := { [1,i,1,0,0]          :           0<=i<=N-1 };
+MM_INIT := { [1,i,1,0,0] -> [0,i] :           0<=i<=N-1 };
+
+IS_COPY := { [2,t,0,i,1]            : 0<=t<T && 0<=i<=N-1 };
+MM_COPY := { [2,t,0,i,1] -> [t+1,i] : 0<=t<T && 0<=i<=N-1 };
+
+IS_CALC := { [2,t,1,i,1]            : 0<=t<T && 0< i< N-1 };
+MM_CALC := { [2,t,1,i,1] -> [t+1,i] : 0<=t<T && 0< i< N-1 };
+
+RESULTS := { [3,0,0,0,0] };
+
+
+# memory-based Output and Flow/anti-dependences (among Assign (copy), and Calc)
+
+FWD5 := {[x,t,y,i,z] -> [x',t',y',i',z'] :
+	(x'>x) or
+	(x'=x and t'>t) or
+	(x'=x and t'=t and y'>y) or
+	(x'=x and t'=t and y'=y and i'>i) or
+	(x'=x and t'=t and y'=y and i'=i and z'>z) };
+FWD7 := {[x,t,y,i,z,a,b] -> [x',t',y',i',z',a',b'] :
+	(x'>x) or
+	(x'=x and t'>t) or
+	(x'=x and t'=t and y'>y) or
+	(x'=x and t'=t and y'=y and i'>i) or
+	(x'=x and t'=t and y'=y and i'=i and z'>z) or
+	(x'=x and t'=t and y'=y and i'=i and z'=z and a'>a) or
+	(x'=x and t'=t and y'=y and i'=i and z'=z and a'=a and b'>b) };
+BWD5 := inverse FWD5;
+BWD7 := inverse FWD7;
+EQi := {[x,t,y,i,z] -> [x',t',y',i',z'] : i'=i };
+
+# output deps
+
+OAA := (IS_COPY * IS_COPY) intersection FWD5 intersection EQi;
+OCC := (IS_CALC * IS_CALC) intersection FWD5 intersection EQi;
+
+# combined flow/anti deps
+
+FAC := (IS_COPY * IS_CALC) intersection FWD5 intersection {[2,t,0,i,1] -> [2,t',1,i',1]  : (i'-1<=i<=i'+1)};
+FCA := (IS_CALC * IS_COPY) intersection FWD5 intersection {[2,t,1,i,1] -> [2,t',0,i',1]  : (i-1<=i'<=i+1)};
+
+# total memory deps in the "core"
+
+COREMEMDEPS := OAA union OCC union FAC union FCA;
+
+
+
+# data flow for original code:
+
+DF_12p1 := ( IS_INIT * IS_COPY ) intersection {[1,i,1,0,0] -> [2,0,0,i,1] : 0<i<N-1 };
+DF_12p2 := ( IS_INIT * IS_COPY ) intersection {[1,0,1,0,0] -> [2,t,0,0,1] };
+DF_12p3 := ( IS_INIT * IS_COPY ) intersection {[1,i,1,0,0] -> [2,t,0,i,1] : i=N-1 && N>1 };
+DF_32   := ( IS_CALC * IS_COPY ) intersection {[2,t,1,i,1] -> [2,t+1,0,i,1]};
+
+DF_23a := ( IS_COPY * IS_CALC ) intersection {[2,t,0,i,1] -> [2,t,1,i+1,1] };
+DF_23b := ( IS_COPY * IS_CALC ) intersection {[2,t,0,i,1] -> [2,t,1,i,1] };
+DF_23c := ( IS_COPY * IS_CALC ) intersection {[2,t,0,i,1] -> [2,t,1,i-1,1] };
+
+
+# data flow for array expanded code,
+# after forward substitution of "old[i] = cur[i]"
+
+DF1Ia := { [1,i,1,0,0] -> [2,t,1,i+1,1] : t=0 } restrictDomain IS_INIT restrictRange IS_CALC;
+DF1Ib := { [1,i,1,0,0] -> [2,t,1,i+1,1] : t>0 && i=0 } restrictDomain IS_INIT restrictRange IS_CALC;
+DF1C  := { [2,t,1,i,1] -> [2,t+1,1,i+1,1] } restrictDomain IS_CALC restrictRange IS_CALC;
+DF2I  := { [1,i,1,0,0] -> [2,t,1,i,1] :   t=0 } restrictDomain IS_INIT restrictRange IS_CALC;
+DF2C  := { [2,t,1,i,1] -> [2,t+1,1,i+0,1] } restrictDomain IS_CALC restrictRange IS_CALC;
+DF3Ia := { [1,i,1,0,0] -> [2,t,1,i-1,1] : t=0 } restrictDomain IS_INIT restrictRange IS_CALC;
+DF3Ib := { [1,i,1,0,0] -> [2,t,1,i-1,1] : t>0 && i=N-1 } restrictDomain IS_INIT restrictRange IS_CALC;
+DF3C  := { [2,t,1,i,1] -> [2,t+1,1,i-1,1] } restrictDomain IS_CALC restrictRange IS_CALC;
+
+# total data flow
+
+COREDATAFLOW := DF1C union DF2C union DF3C;
+
+
+# arity expansion relations
+ex_0_5v := {             [] -> [a,b,c,d,e]     };
+ex_0_7v := {             [] -> [a,b,c,d,e,f,g] };
+ex_3_5 := {         [a,b,c] -> [a,b,c,0,0]     };
+ex_3_7 := {         [a,b,c] -> [a,b,c,0,0,0,0] };
+ex_5_7 := {     [a,b,c,d,e] -> [a,b,c,d,e,0,0] };
+
+ex_5_3 := {     [a,b,c,0,0] -> [a,b,c]         };
+ex_7_3 := { [a,b,c,0,0,0,0] -> [a,b,c]         };
+ex_7_5 := { [a,b,c,d,e,0,0] -> [a,b,c,d,e]     };
+
+
+# stuff used in skew and tskew
+
+# Here is the description of time skewing from the current draft of the paper.
+IS_Trans := { [2,t,1,i,1] -> [2,tb,1,s,1,tt,1] :
+	0<=tt<500 && s=i+1*t && t=500*tb+tt };
+
+IS_Tinv := inverse IS_Trans;
+
+# We use it to transform the iteration spaces
+TS_IS_CALC := IS_CALC join IS_Trans;
+# for some reason OC refuses do to this "join" but will do the reverse:
+# TS_IS_INIT := ex_7_5 join IS_INIT;
+TS_IS_INIT := IS_INIT  join (inverse ex_7_5);
+
+# Now we can update the data flow relations to correspond to the new I.S.'s
+TS_DF1Ia := ex_7_5  join DF1Ia join IS_Trans;
+TS_DF1Ib := ex_7_5  join DF1Ib join IS_Trans;
+TS_DF1C  := IS_Tinv join DF1C  join IS_Trans;
+TS_DF2I  := ex_7_5  join DF2I  join IS_Trans;
+TS_DF2C  := IS_Tinv join DF2C  join IS_Trans;
+TS_DF3Ia := ex_7_5  join DF3Ia join IS_Trans;
+TS_DF3Ib := ex_7_5  join DF3Ib join IS_Trans;
+TS_DF3C  := IS_Tinv join DF3C  join IS_Trans;
+
+ 
+KNOWN := { [] : T >= 0 and N >= 4 };
+ 
+#
+#  multiprocessor version
+#  time skewed iteration space
+#  blocked memory mapping 
+#
+
+#
+# First of all, if 500 is much less than 4000,
+#  there's a problem with the constraints below.
+# To keep send and recv. slices from "crashing", 4000>=2BS+2 (safe approx?)
+#
+
+assertUnsatisfiable( { [] : 4000 < 2 * 500 + 2 } );
+
+# this transformation has no existentially quantified variables;
+#  basically, it factors out the common stuff below,
+#  but the quantified variables are left in the output, so we can get them
+#  everything after the 000 is not needed in final xform
+
+#
+# DANGER WILL ROBINSON!
+#  the .c file depends on the fact that t4 is always the processor number
+#
+
+MP_TSKEW_ALL  := { [2, t, 1, i, 1] ->
+		   [2, tb, slice, proc, t+i, tt, 000, t, i, lproc, t0, i0, ie]:
+##
+##	define time block and tt
+##
+		500*tb+tt = t and 0 <= tt < 500 
+##
+##		define "logical proc", then "wrap" onto physical later:
+##		"logical proc" (lproc) = (t-i) div sigma
+##
+		and	4000*lproc <= t-i < 4000*(lproc+1)
+##
+##  for uniproc. test, just do proc = -lproc (for multi, proc = lproc % 8)
+##
+		and	proc = -lproc
+##
+##  t0,i0 = first iteration in a block;
+##  t0,ie = maximum "i" in t0 of this block)
+##
+		and	t0=500*tb
+		and	t0-ie=4000*lproc
+		and	i0+4000-1=ie
+};
+
+#
+# We need to send things "down" (to same time block of next proc.)
+# and "right" (to next time block of next proc.)
+# The "+2" is for the things to send right (not mentioned in IPDPS paper).
+#
+
+MP_TSKEW_SEND_SL := MP_TSKEW_ALL join
+	{ [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] ->
+	  [2, tb, 1, proc, t_p_i, tt, 0] :
+##  define send slice...
+			(t+i) <= (t0+(500-2) + i0+(500-1) + 2)
+};
+
+MP_TSKEW_SEND_ME := MP_TSKEW_ALL join
+	{ [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] ->
+	  [2, tb, 2, proc, t_p_i, tt, 0] :
+##  in the send slice
+			(t+i) <= (t0+(500-2) + i0+(500-1) + 2)
+##  and near the (t-i) border:
+		and	(t-i) >= ((t0-i0)-1)
+};
+
+MP_TSKEW_COMP_SL := MP_TSKEW_ALL join
+	{ [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] ->
+	  [2, tb, 3, proc, t_p_i, tt, 0] :
+##  define computation slice...
+##  not send
+			(t+i) >  (t0+(500-2) + i0+(500-1) + 2)
+##  and not recv
+		and	(t+i) <= (t0+ie)
+};
+
+
+
+# Receive the iterations that we sent,
+# but after the calculation,
+# and on the neighbor (lower) processor
+
+MP_TSKEW_R_FROM_ME := MP_TSKEW_SEND_ME join 
+	{ [2, tb, 2, proc, t_p_i, tt, 0] ->
+	  [2, tb, 4, proc-1, t_p_i, tt, 0] };
+
+
+MP_TSKEW_RECV_SL := MP_TSKEW_ALL join
+	{ [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] ->
+	  [2, tb, 5, proc, t_p_i, tt, 0] :
+##  define recv slice...
+			(t+i) > (t0+ie)
+};
+
+
+
+
+## stuff to gather each processor's final results...
+
+IS_GATHER := IS_CALC intersection { [2,t,1,i,1] : t=T-1 };
+
+GATHER_EXPANDER := MP_TSKEW_ALL join
+	{ [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] ->
+	  [3, tb, 7, proc, t_p_i, tt, 0] };
+
+## stuff to initialize things right in the first place
+
+### NOTE THAT t4 (processor #) is used in a loop in initialization
+
+IS_INIT_EXP := { [1,t,i,0,0] : (-1=t && 0<=i<=N-1) ||
+			       (0<=t<T && 0=i) ||
+			       (0<=t<T && N-1=i) };
+
+
+# send_slice + calc_slice + recv slice == total
+
+TheSendIS := domain(MP_TSKEW_SEND_SL restrictDomain IS_CALC);
+TheCompIS := domain(MP_TSKEW_COMP_SL restrictDomain IS_CALC);
+TheRecvIS := domain(MP_TSKEW_RECV_SL restrictDomain IS_CALC);
+
+assertUnsatisfiable(TheSendIS intersection TheCompIS);
+assertUnsatisfiable(TheCompIS intersection TheRecvIS);
+assertUnsatisfiable(TheSendIS intersection TheRecvIS);
+#
+# These cause inexact negation and thus blow up...
+#
+# assertUnsatisfiable(IS_CALC - (TheSendIS union TheCompIS union TheRecvIS));
+# assertUnsatisfiable((TheSendIS union TheCompIS union TheRecvIS) - IS_CALC);
+
+
+
+codegen
+	ex_5_7 : IS_INIT_EXP,
+	MP_TSKEW_SEND_SL : IS_CALC,
+	 MP_TSKEW_SEND_ME : IS_CALC,
+	MP_TSKEW_COMP_SL : IS_CALC,
+	 MP_TSKEW_R_FROM_ME : IS_CALC,
+	MP_TSKEW_RECV_SL : IS_CALC,
+	GATHER_EXPANDER : IS_GATHER
+given	(KNOWN join ex_0_7v);
+