diff options
Diffstat (limited to 'omegalib/examples/old_test/ts1d-mp-i_ts-m_b')
-rw-r--r-- | omegalib/examples/old_test/ts1d-mp-i_ts-m_b | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/omegalib/examples/old_test/ts1d-mp-i_ts-m_b b/omegalib/examples/old_test/ts1d-mp-i_ts-m_b new file mode 100644 index 0000000..f288263 --- /dev/null +++ b/omegalib/examples/old_test/ts1d-mp-i_ts-m_b @@ -0,0 +1,289 @@ +# This is the file facts.prew, which is prepended to the .prew files +# for the particular code generation we want, defines things like the +# iteration space and dependences. Known facts are inserted by the +# Makefile. +# +# If you're looking at a .w file instead of facts.prew, then you should +# remember to edit the original .prew files, not the .w files. +# +# This facts.prew file describes the program +# +# for(i = 0; i <= N-1; i++) { +# cur[i]=... +# } +# for(t = 0; t < T; t++) { +# for(i = 0; i <= N-1; i++) { +# old[i]=cur[i]; +# } +# for(i = 1; i <= N-2; i++) { +# cur[i] = (old[i-1]+old[i]+old[i]+old[i+1])*0.25; +# } +# } + + + +# first, the spaces and memory maps + +symbolic T, N; + +IS_INIT := { [1,i,1,0,0] : 0<=i<=N-1 }; +MM_INIT := { [1,i,1,0,0] -> [0,i] : 0<=i<=N-1 }; + +IS_COPY := { [2,t,0,i,1] : 0<=t<T && 0<=i<=N-1 }; +MM_COPY := { [2,t,0,i,1] -> [t+1,i] : 0<=t<T && 0<=i<=N-1 }; + +IS_CALC := { [2,t,1,i,1] : 0<=t<T && 0< i< N-1 }; +MM_CALC := { [2,t,1,i,1] -> [t+1,i] : 0<=t<T && 0< i< N-1 }; + +RESULTS := { [3,0,0,0,0] }; + + +# memory-based Output and Flow/anti-dependences (among Assign (copy), and Calc) + +FWD5 := {[x,t,y,i,z] -> [x',t',y',i',z'] : + (x'>x) or + (x'=x and t'>t) or + (x'=x and t'=t and y'>y) or + (x'=x and t'=t and y'=y and i'>i) or + (x'=x and t'=t and y'=y and i'=i and z'>z) }; +FWD7 := {[x,t,y,i,z,a,b] -> [x',t',y',i',z',a',b'] : + (x'>x) or + (x'=x and t'>t) or + (x'=x and t'=t and y'>y) or + (x'=x and t'=t and y'=y and i'>i) or + (x'=x and t'=t and y'=y and i'=i and z'>z) or + (x'=x and t'=t and y'=y and i'=i and z'=z and a'>a) or + (x'=x and t'=t and y'=y and i'=i and z'=z and a'=a and b'>b) }; +BWD5 := inverse FWD5; +BWD7 := inverse FWD7; +EQi := {[x,t,y,i,z] -> [x',t',y',i',z'] : i'=i }; + +# output deps + +OAA := (IS_COPY * IS_COPY) intersection FWD5 intersection EQi; +OCC := (IS_CALC * IS_CALC) intersection FWD5 intersection EQi; + +# combined flow/anti deps + +FAC := (IS_COPY * IS_CALC) intersection FWD5 intersection {[2,t,0,i,1] -> [2,t',1,i',1] : (i'-1<=i<=i'+1)}; +FCA := (IS_CALC * IS_COPY) intersection FWD5 intersection {[2,t,1,i,1] -> [2,t',0,i',1] : (i-1<=i'<=i+1)}; + +# total memory deps in the "core" + +COREMEMDEPS := OAA union OCC union FAC union FCA; + + + +# data flow for original code: + +DF_12p1 := ( IS_INIT * IS_COPY ) intersection {[1,i,1,0,0] -> [2,0,0,i,1] : 0<i<N-1 }; +DF_12p2 := ( IS_INIT * IS_COPY ) intersection {[1,0,1,0,0] -> [2,t,0,0,1] }; +DF_12p3 := ( IS_INIT * IS_COPY ) intersection {[1,i,1,0,0] -> [2,t,0,i,1] : i=N-1 && N>1 }; +DF_32 := ( IS_CALC * IS_COPY ) intersection {[2,t,1,i,1] -> [2,t+1,0,i,1]}; + +DF_23a := ( IS_COPY * IS_CALC ) intersection {[2,t,0,i,1] -> [2,t,1,i+1,1] }; +DF_23b := ( IS_COPY * IS_CALC ) intersection {[2,t,0,i,1] -> [2,t,1,i,1] }; +DF_23c := ( IS_COPY * IS_CALC ) intersection {[2,t,0,i,1] -> [2,t,1,i-1,1] }; + + +# data flow for array expanded code, +# after forward substitution of "old[i] = cur[i]" + +DF1Ia := { [1,i,1,0,0] -> [2,t,1,i+1,1] : t=0 } restrictDomain IS_INIT restrictRange IS_CALC; +DF1Ib := { [1,i,1,0,0] -> [2,t,1,i+1,1] : t>0 && i=0 } restrictDomain IS_INIT restrictRange IS_CALC; +DF1C := { [2,t,1,i,1] -> [2,t+1,1,i+1,1] } restrictDomain IS_CALC restrictRange IS_CALC; +DF2I := { [1,i,1,0,0] -> [2,t,1,i,1] : t=0 } restrictDomain IS_INIT restrictRange IS_CALC; +DF2C := { [2,t,1,i,1] -> [2,t+1,1,i+0,1] } restrictDomain IS_CALC restrictRange IS_CALC; +DF3Ia := { [1,i,1,0,0] -> [2,t,1,i-1,1] : t=0 } restrictDomain IS_INIT restrictRange IS_CALC; +DF3Ib := { [1,i,1,0,0] -> [2,t,1,i-1,1] : t>0 && i=N-1 } restrictDomain IS_INIT restrictRange IS_CALC; +DF3C := { [2,t,1,i,1] -> [2,t+1,1,i-1,1] } restrictDomain IS_CALC restrictRange IS_CALC; + +# total data flow + +COREDATAFLOW := DF1C union DF2C union DF3C; + + +# arity expansion relations +ex_0_5v := { [] -> [a,b,c,d,e] }; +ex_0_7v := { [] -> [a,b,c,d,e,f,g] }; +ex_3_5 := { [a,b,c] -> [a,b,c,0,0] }; +ex_3_7 := { [a,b,c] -> [a,b,c,0,0,0,0] }; +ex_5_7 := { [a,b,c,d,e] -> [a,b,c,d,e,0,0] }; + +ex_5_3 := { [a,b,c,0,0] -> [a,b,c] }; +ex_7_3 := { [a,b,c,0,0,0,0] -> [a,b,c] }; +ex_7_5 := { [a,b,c,d,e,0,0] -> [a,b,c,d,e] }; + + +# stuff used in skew and tskew + +# Here is the description of time skewing from the current draft of the paper. +IS_Trans := { [2,t,1,i,1] -> [2,tb,1,s,1,tt,1] : + 0<=tt<500 && s=i+1*t && t=500*tb+tt }; + +IS_Tinv := inverse IS_Trans; + +# We use it to transform the iteration spaces +TS_IS_CALC := IS_CALC join IS_Trans; +# for some reason OC refuses do to this "join" but will do the reverse: +# TS_IS_INIT := ex_7_5 join IS_INIT; +TS_IS_INIT := IS_INIT join (inverse ex_7_5); + +# Now we can update the data flow relations to correspond to the new I.S.'s +TS_DF1Ia := ex_7_5 join DF1Ia join IS_Trans; +TS_DF1Ib := ex_7_5 join DF1Ib join IS_Trans; +TS_DF1C := IS_Tinv join DF1C join IS_Trans; +TS_DF2I := ex_7_5 join DF2I join IS_Trans; +TS_DF2C := IS_Tinv join DF2C join IS_Trans; +TS_DF3Ia := ex_7_5 join DF3Ia join IS_Trans; +TS_DF3Ib := ex_7_5 join DF3Ib join IS_Trans; +TS_DF3C := IS_Tinv join DF3C join IS_Trans; + + +KNOWN := { [] : T >= 0 and N >= 4 }; + +# +# multiprocessor version +# time skewed iteration space +# blocked memory mapping +# + +# +# First of all, if 500 is much less than 4000, +# there's a problem with the constraints below. +# To keep send and recv. slices from "crashing", 4000>=2BS+2 (safe approx?) +# + +assertUnsatisfiable( { [] : 4000 < 2 * 500 + 2 } ); + +# this transformation has no existentially quantified variables; +# basically, it factors out the common stuff below, +# but the quantified variables are left in the output, so we can get them +# everything after the 000 is not needed in final xform + +# +# DANGER WILL ROBINSON! +# the .c file depends on the fact that t4 is always the processor number +# + +MP_TSKEW_ALL := { [2, t, 1, i, 1] -> + [2, tb, slice, proc, t+i, tt, 000, t, i, lproc, t0, i0, ie]: +## +## define time block and tt +## + 500*tb+tt = t and 0 <= tt < 500 +## +## define "logical proc", then "wrap" onto physical later: +## "logical proc" (lproc) = (t-i) div sigma +## + and 4000*lproc <= t-i < 4000*(lproc+1) +## +## for uniproc. test, just do proc = -lproc (for multi, proc = lproc % 8) +## + and proc = -lproc +## +## t0,i0 = first iteration in a block; +## t0,ie = maximum "i" in t0 of this block) +## + and t0=500*tb + and t0-ie=4000*lproc + and i0+4000-1=ie +}; + +# +# We need to send things "down" (to same time block of next proc.) +# and "right" (to next time block of next proc.) +# The "+2" is for the things to send right (not mentioned in IPDPS paper). +# + +MP_TSKEW_SEND_SL := MP_TSKEW_ALL join + { [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] -> + [2, tb, 1, proc, t_p_i, tt, 0] : +## define send slice... + (t+i) <= (t0+(500-2) + i0+(500-1) + 2) +}; + +MP_TSKEW_SEND_ME := MP_TSKEW_ALL join + { [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] -> + [2, tb, 2, proc, t_p_i, tt, 0] : +## in the send slice + (t+i) <= (t0+(500-2) + i0+(500-1) + 2) +## and near the (t-i) border: + and (t-i) >= ((t0-i0)-1) +}; + +MP_TSKEW_COMP_SL := MP_TSKEW_ALL join + { [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] -> + [2, tb, 3, proc, t_p_i, tt, 0] : +## define computation slice... +## not send + (t+i) > (t0+(500-2) + i0+(500-1) + 2) +## and not recv + and (t+i) <= (t0+ie) +}; + + + +# Receive the iterations that we sent, +# but after the calculation, +# and on the neighbor (lower) processor + +MP_TSKEW_R_FROM_ME := MP_TSKEW_SEND_ME join + { [2, tb, 2, proc, t_p_i, tt, 0] -> + [2, tb, 4, proc-1, t_p_i, tt, 0] }; + + +MP_TSKEW_RECV_SL := MP_TSKEW_ALL join + { [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] -> + [2, tb, 5, proc, t_p_i, tt, 0] : +## define recv slice... + (t+i) > (t0+ie) +}; + + + + +## stuff to gather each processor's final results... + +IS_GATHER := IS_CALC intersection { [2,t,1,i,1] : t=T-1 }; + +GATHER_EXPANDER := MP_TSKEW_ALL join + { [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] -> + [3, tb, 7, proc, t_p_i, tt, 0] }; + +## stuff to initialize things right in the first place + +### NOTE THAT t4 (processor #) is used in a loop in initialization + +IS_INIT_EXP := { [1,t,i,0,0] : (-1=t && 0<=i<=N-1) || + (0<=t<T && 0=i) || + (0<=t<T && N-1=i) }; + + +# send_slice + calc_slice + recv slice == total + +TheSendIS := domain(MP_TSKEW_SEND_SL restrictDomain IS_CALC); +TheCompIS := domain(MP_TSKEW_COMP_SL restrictDomain IS_CALC); +TheRecvIS := domain(MP_TSKEW_RECV_SL restrictDomain IS_CALC); + +assertUnsatisfiable(TheSendIS intersection TheCompIS); +assertUnsatisfiable(TheCompIS intersection TheRecvIS); +assertUnsatisfiable(TheSendIS intersection TheRecvIS); +# +# These cause inexact negation and thus blow up... +# +# assertUnsatisfiable(IS_CALC - (TheSendIS union TheCompIS union TheRecvIS)); +# assertUnsatisfiable((TheSendIS union TheCompIS union TheRecvIS) - IS_CALC); + + + +codegen + ex_5_7 : IS_INIT_EXP, + MP_TSKEW_SEND_SL : IS_CALC, + MP_TSKEW_SEND_ME : IS_CALC, + MP_TSKEW_COMP_SL : IS_CALC, + MP_TSKEW_R_FROM_ME : IS_CALC, + MP_TSKEW_RECV_SL : IS_CALC, + GATHER_EXPANDER : IS_GATHER +given (KNOWN join ex_0_7v); + |