diff options
Diffstat (limited to 'omega/examples/old_test/ts1d-mp-i_ts-m_b.oc-rt')
-rw-r--r-- | omega/examples/old_test/ts1d-mp-i_ts-m_b.oc-rt | 430 |
1 files changed, 430 insertions, 0 deletions
diff --git a/omega/examples/old_test/ts1d-mp-i_ts-m_b.oc-rt b/omega/examples/old_test/ts1d-mp-i_ts-m_b.oc-rt new file mode 100644 index 0000000..6d3ef2a --- /dev/null +++ b/omega/examples/old_test/ts1d-mp-i_ts-m_b.oc-rt @@ -0,0 +1,430 @@ +# Omega Calculator v1.2 (based on Omega Library 1.2, August, 2000): +# # This is the file facts.prew, which is prepended to the .prew files +# # for the particular code generation we want, defines things like the +# # iteration space and dependences. Known facts are inserted by the +# # Makefile. +# # +# # If you're looking at a .w file instead of facts.prew, then you should +# # remember to edit the original .prew files, not the .w files. +# # +# # This facts.prew file describes the program +# # +# # for(i = 0; i <= N-1; i++) { +# # cur[i]=... +# # } +# # for(t = 0; t < T; t++) { +# # for(i = 0; i <= N-1; i++) { +# # old[i]=cur[i]; +# # } +# # for(i = 1; i <= N-2; i++) { +# # cur[i] = (old[i-1]+old[i]+old[i]+old[i+1])*0.25; +# # } +# # } +# +# +# +# # first, the spaces and memory maps +# +# symbolic T, N; +# +# +# IS_INIT := { [1,i,1,0,0] : 0<=i<=N-1 }; +# +# MM_INIT := { [1,i,1,0,0] -> [0,i] : 0<=i<=N-1 }; +# +# +# IS_COPY := { [2,t,0,i,1] : 0<=t<T && 0<=i<=N-1 }; +# +# MM_COPY := { [2,t,0,i,1] -> [t+1,i] : 0<=t<T && 0<=i<=N-1 }; +# +# +# IS_CALC := { [2,t,1,i,1] : 0<=t<T && 0< i< N-1 }; +# +# MM_CALC := { [2,t,1,i,1] -> [t+1,i] : 0<=t<T && 0< i< N-1 }; +# +# +# RESULTS := { [3,0,0,0,0] }; +# +# +# +# # memory-based Output and Flow/anti-dependences (among Assign (copy), and Calc) +# +# FWD5 := {[x,t,y,i,z] -> [x',t',y',i',z'] : +# (x'>x) or +# (x'=x and t'>t) or +# (x'=x and t'=t and y'>y) or +# (x'=x and t'=t and y'=y and i'>i) or +# (x'=x and t'=t and y'=y and i'=i and z'>z) }; +# +# FWD7 := {[x,t,y,i,z,a,b] -> [x',t',y',i',z',a',b'] : +# (x'>x) or +# (x'=x and t'>t) or +# (x'=x and t'=t and y'>y) or +# (x'=x and t'=t and y'=y and i'>i) or +# (x'=x and t'=t and y'=y and i'=i and z'>z) or +# (x'=x and t'=t and y'=y and i'=i and z'=z and a'>a) or +# (x'=x and t'=t and y'=y and i'=i and z'=z and a'=a and b'>b) }; +# +# BWD5 := inverse FWD5; +# +# BWD7 := inverse FWD7; +# +# EQi := {[x,t,y,i,z] -> [x',t',y',i',z'] : i'=i }; +# +# +# # output deps +# +# OAA := (IS_COPY * IS_COPY) intersection FWD5 intersection EQi; +# +# OCC := (IS_CALC * IS_CALC) intersection FWD5 intersection EQi; +# +# +# # combined flow/anti deps +# +# FAC := (IS_COPY * IS_CALC) intersection FWD5 intersection {[2,t,0,i,1] -> [2,t',1,i',1] : (i'-1<=i<=i'+1)}; +# +# FCA := (IS_CALC * IS_COPY) intersection FWD5 intersection {[2,t,1,i,1] -> [2,t',0,i',1] : (i-1<=i'<=i+1)}; +# +# +# # total memory deps in the "core" +# +# COREMEMDEPS := OAA union OCC union FAC union FCA; +# +# +# +# +# # data flow for original code: +# +# DF_12p1 := ( IS_INIT * IS_COPY ) intersection {[1,i,1,0,0] -> [2,0,0,i,1] : 0<i<N-1 }; +# +# DF_12p2 := ( IS_INIT * IS_COPY ) intersection {[1,0,1,0,0] -> [2,t,0,0,1] }; +# +# DF_12p3 := ( IS_INIT * IS_COPY ) intersection {[1,i,1,0,0] -> [2,t,0,i,1] : i=N-1 && N>1 }; +# +# DF_32 := ( IS_CALC * IS_COPY ) intersection {[2,t,1,i,1] -> [2,t+1,0,i,1]}; +# +# +# DF_23a := ( IS_COPY * IS_CALC ) intersection {[2,t,0,i,1] -> [2,t,1,i+1,1] }; +# +# DF_23b := ( IS_COPY * IS_CALC ) intersection {[2,t,0,i,1] -> [2,t,1,i,1] }; +# +# DF_23c := ( IS_COPY * IS_CALC ) intersection {[2,t,0,i,1] -> [2,t,1,i-1,1] }; +# +# +# +# # data flow for array expanded code, +# # after forward substitution of "old[i] = cur[i]" +# +# DF1Ia := { [1,i,1,0,0] -> [2,t,1,i+1,1] : t=0 } restrictDomain IS_INIT restrictRange IS_CALC; +# +# DF1Ib := { [1,i,1,0,0] -> [2,t,1,i+1,1] : t>0 && i=0 } restrictDomain IS_INIT restrictRange IS_CALC; +# +# DF1C := { [2,t,1,i,1] -> [2,t+1,1,i+1,1] } restrictDomain IS_CALC restrictRange IS_CALC; +# +# DF2I := { [1,i,1,0,0] -> [2,t,1,i,1] : t=0 } restrictDomain IS_INIT restrictRange IS_CALC; +# +# DF2C := { [2,t,1,i,1] -> [2,t+1,1,i+0,1] } restrictDomain IS_CALC restrictRange IS_CALC; +# +# DF3Ia := { [1,i,1,0,0] -> [2,t,1,i-1,1] : t=0 } restrictDomain IS_INIT restrictRange IS_CALC; +# +# DF3Ib := { [1,i,1,0,0] -> [2,t,1,i-1,1] : t>0 && i=N-1 } restrictDomain IS_INIT restrictRange IS_CALC; +# +# DF3C := { [2,t,1,i,1] -> [2,t+1,1,i-1,1] } restrictDomain IS_CALC restrictRange IS_CALC; +# +# +# # total data flow +# +# COREDATAFLOW := DF1C union DF2C union DF3C; +# +# +# +# # arity expansion relations +# ex_0_5v := { [] -> [a,b,c,d,e] }; +# +# ex_0_7v := { [] -> [a,b,c,d,e,f,g] }; +# +# ex_3_5 := { [a,b,c] -> [a,b,c,0,0] }; +# +# ex_3_7 := { [a,b,c] -> [a,b,c,0,0,0,0] }; +# +# ex_5_7 := { [a,b,c,d,e] -> [a,b,c,d,e,0,0] }; +# +# +# ex_5_3 := { [a,b,c,0,0] -> [a,b,c] }; +# +# ex_7_3 := { [a,b,c,0,0,0,0] -> [a,b,c] }; +# +# ex_7_5 := { [a,b,c,d,e,0,0] -> [a,b,c,d,e] }; +# +# +# +# # stuff used in skew and tskew +# +# # Here is the description of time skewing from the current draft of the paper. +# IS_Trans := { [2,t,1,i,1] -> [2,tb,1,s,1,tt,1] : +# 0<=tt<500 && s=i+1*t && t=500*tb+tt }; +# +# +# IS_Tinv := inverse IS_Trans; +# +# +# # We use it to transform the iteration spaces +# TS_IS_CALC := IS_CALC join IS_Trans; +# +# # for some reason OC refuses do to this "join" but will do the reverse: +# # TS_IS_INIT := ex_7_5 join IS_INIT; +# TS_IS_INIT := IS_INIT join (inverse ex_7_5); +# +# +# # Now we can update the data flow relations to correspond to the new I.S.'s +# TS_DF1Ia := ex_7_5 join DF1Ia join IS_Trans; +# +# TS_DF1Ib := ex_7_5 join DF1Ib join IS_Trans; +# +# TS_DF1C := IS_Tinv join DF1C join IS_Trans; +# +# TS_DF2I := ex_7_5 join DF2I join IS_Trans; +# +# TS_DF2C := IS_Tinv join DF2C join IS_Trans; +# +# TS_DF3Ia := ex_7_5 join DF3Ia join IS_Trans; +# +# TS_DF3Ib := ex_7_5 join DF3Ib join IS_Trans; +# +# TS_DF3C := IS_Tinv join DF3C join IS_Trans; +# +# +# +# KNOWN := { [] : T >= 0 and N >= 4 }; +# +# +# # +# # multiprocessor version +# # time skewed iteration space +# # blocked memory mapping +# # +# +# # +# # First of all, if 500 is much less than 4000, +# # there's a problem with the constraints below. +# # To keep send and recv. slices from "crashing", 4000>=2BS+2 (safe approx?) +# # +# +# assertUnsatisfiable( { [] : 4000 < 2 * 500 + 2 } ); + +{ FALSE } + +# +# +# # this transformation has no existentially quantified variables; +# # basically, it factors out the common stuff below, +# # but the quantified variables are left in the output, so we can get them +# # everything after the 000 is not needed in final xform +# +# # +# # DANGER WILL ROBINSON! +# # the .c file depends on the fact that t4 is always the processor number +# # +# +# MP_TSKEW_ALL := { [2, t, 1, i, 1] -> +# [2, tb, slice, proc, t+i, tt, 000, t, i, lproc, t0, i0, ie]: +# ## +# ## define time block and tt +# ## +# 500*tb+tt = t and 0 <= tt < 500 +# ## +# ## define "logical proc", then "wrap" onto physical later: +# ## "logical proc" (lproc) = (t-i) div sigma +# ## +# and 4000*lproc <= t-i < 4000*(lproc+1) +# ## +# ## for uniproc. test, just do proc = -lproc (for multi, proc = lproc % 8) +# ## +# and proc = -lproc +# ## +# ## t0,i0 = first iteration in a block; +# ## t0,ie = maximum "i" in t0 of this block) +# ## +# and t0=500*tb +# and t0-ie=4000*lproc +# and i0+4000-1=ie +# }; +# +# +# # +# # We need to send things "down" (to same time block of next proc.) +# # and "right" (to next time block of next proc.) +# # The "+2" is for the things to send right (not mentioned in IPDPS paper). +# # +# +# MP_TSKEW_SEND_SL := MP_TSKEW_ALL join +# { [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] -> +# [2, tb, 1, proc, t_p_i, tt, 0] : +# ## define send slice... +# (t+i) <= (t0+(500-2) + i0+(500-1) + 2) +# }; +# +# +# MP_TSKEW_SEND_ME := MP_TSKEW_ALL join +# { [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] -> +# [2, tb, 2, proc, t_p_i, tt, 0] : +# ## in the send slice +# (t+i) <= (t0+(500-2) + i0+(500-1) + 2) +# ## and near the (t-i) border: +# and (t-i) >= ((t0-i0)-1) +# }; +# +# +# MP_TSKEW_COMP_SL := MP_TSKEW_ALL join +# { [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] -> +# [2, tb, 3, proc, t_p_i, tt, 0] : +# ## define computation slice... +# ## not send +# (t+i) > (t0+(500-2) + i0+(500-1) + 2) +# ## and not recv +# and (t+i) <= (t0+ie) +# }; +# +# +# +# +# # Receive the iterations that we sent, +# # but after the calculation, +# # and on the neighbor (lower) processor +# +# MP_TSKEW_R_FROM_ME := MP_TSKEW_SEND_ME join +# { [2, tb, 2, proc, t_p_i, tt, 0] -> +# [2, tb, 4, proc-1, t_p_i, tt, 0] }; +# +# +# +# MP_TSKEW_RECV_SL := MP_TSKEW_ALL join +# { [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] -> +# [2, tb, 5, proc, t_p_i, tt, 0] : +# ## define recv slice... +# (t+i) > (t0+ie) +# }; +# +# +# +# +# +# ## stuff to gather each processor's final results... +# +# IS_GATHER := IS_CALC intersection { [2,t,1,i,1] : t=T-1 }; +# +# +# GATHER_EXPANDER := MP_TSKEW_ALL join +# { [2, tb, slice, proc, t_p_i, tt, 000, t, i, lproc, t0, i0, ie] -> +# [3, tb, 7, proc, t_p_i, tt, 0] }; +# +# +# ## stuff to initialize things right in the first place +# +# ### NOTE THAT t4 (processor #) is used in a loop in initialization +# +# IS_INIT_EXP := { [1,t,i,0,0] : (-1=t && 0<=i<=N-1) || +# (0<=t<T && 0=i) || +# (0<=t<T && N-1=i) }; +# +# +# +# # send_slice + calc_slice + recv slice == total +# +# TheSendIS := domain(MP_TSKEW_SEND_SL restrictDomain IS_CALC); +# +# TheCompIS := domain(MP_TSKEW_COMP_SL restrictDomain IS_CALC); +# +# TheRecvIS := domain(MP_TSKEW_RECV_SL restrictDomain IS_CALC); +# +# +# assertUnsatisfiable(TheSendIS intersection TheCompIS); + +{[In_1,t,In_3,i,In_5] : FALSE } + +# +# assertUnsatisfiable(TheCompIS intersection TheRecvIS); + +{[In_1,t,In_3,i,In_5] : FALSE } + +# +# assertUnsatisfiable(TheSendIS intersection TheRecvIS); + +{[In_1,t,In_3,i,In_5] : FALSE } + +# +# # +# # These cause inexact negation and thus blow up... +# # +# # assertUnsatisfiable(IS_CALC - (TheSendIS union TheCompIS union TheRecvIS)); +# # assertUnsatisfiable((TheSendIS union TheCompIS union TheRecvIS) - IS_CALC); +# +# +# +# codegen +# ex_5_7 : IS_INIT_EXP, +# MP_TSKEW_SEND_SL : IS_CALC, +# MP_TSKEW_SEND_ME : IS_CALC, +# MP_TSKEW_COMP_SL : IS_CALC, +# MP_TSKEW_R_FROM_ME : IS_CALC, +# MP_TSKEW_RECV_SL : IS_CALC, +# GATHER_EXPANDER : IS_GATHER +# given (KNOWN join ex_0_7v); +for(t3 = 0; t3 <= N-1; t3++) { + s1(1,-1,t3,0,0); +} +for(t2 = 0; t2 <= T-1; t2++) { + s1(1,t2,0,0,0); + s1(1,t2,N-1,0,0); +} +for(t2 = 0; t2 <= intDiv(T-1,500); t2++) { + for(t4 = intDiv(-t2+7+7,8); t4 <= intDiv(-500*t2+N+3997,4000); t4++) { + for(t5 = max(1000*t2+4000*t4-3999,500*t2+1); t5 <= min(1000*t2+4000*t4-3000,N+T-3,2*N-4000*t4+3995); t5++) { + for(t6 = max(-N+t5-500*t2+2,0); t6 <= min(t5-500*t2-1,T-500*t2-1,intDiv(t5-4000*t4-1000*t2+3999,2)); t6++) { + s2(2,500*t2+t6,1,t5+-500*t2-t6,1); + } + } + } + for(t4 = max(intDiv(-T+4000+3999,4000),intDiv(-t2+7+7,8)); t4 <= intDiv(-500*t2+N+3997,4000); t4++) { + for(t5 = max(1000*t2+4000*t4-3999,-4000*t4+4000); t5 <= min(1000*t2+4000*t4-3000,2*N-4000*t4+3995,2*T+4000*t4-4000); t5++) { + for(t6 = intDiv(t5-4000*t4-1000*t2+3998+1,2); t6 <= intDiv(t5-4000*t4-1000*t2+3999,2); t6++) { + s3(2,500*t2+t6,1,t5+-500*t2-t6,1); + } + } + } + for(t4 = intDiv(-t2+1+7,8); t4 <= min(intDiv(-500*t2+N+3496,4000),intDiv(-1000*t2+N+T+2996,4000)); t4++) { + for(t5 = max(500*t2+1,4000*t4+1000*t2-2999); t5 <= min(N+T-3,4000*t4+1000*t2,N+500*t2+497); t5++) { + for(t6 = max(-N+t5-500*t2+2,0); t6 <= min(T-500*t2-1,t5-500*t2-1,499); t6++) { + s4(2,500*t2+t6,1,t5+-500*t2-t6,1); + } + } + } + for(t4 = max(intDiv(-T+3999,4000),intDiv(-t2-1+7,8)); t4 <= intDiv(-500*t2+N-3,4000); t4++) { + for(t5 = max(1000*t2+4000*t4+1,-4000*t4); t5 <= min(1000*t2+4000*t4+1000,2*N-4000*t4-5,2*T+4000*t4); t5++) { + for(t6 = intDiv(-1000*t2-4000*t4+t5-2+1,2); t6 <= intDiv(-1000*t2-4000*t4+t5-1,2); t6++) { + s5(2,500*t2+t6,1,t5+-500*t2-t6,1); + } + } + } + if (500*t2 <= T-2) { + for(t4 = intDiv(-t2+7,8); t4 <= min(intDiv(-500*t2+N+496,4000),intDiv(-1000*t2+N+T-4,4000)); t4++) { + for(t5 = max(1000*t2+4000*t4+1,-4000*t4+2); t5 <= min(2*T+4000*t4-2,N+T-3,N+500*t2+497,1000*t2+4000*t4+998); t5++) { + for(t6 = max(-N+t5-500*t2+2,intDiv(t5-4000*t4-1000*t2+1,2)); t6 <= min(t5-500*t2-1,T-500*t2-1,499); t6++) { + s6(2,500*t2+t6,1,t5+-500*t2-t6,1); + } + } + } + } +} +if (T >= 1) { + for(t2 = intDiv(T-500+499,500); t2 <= intDiv(T-1,500); t2++) { + for(t4 = intDiv(-T+2+3999,4000); t4 <= intDiv(N-T+3998,4000); t4++) { + for(t5 = max(4000*t4+2*T-4001,T); t5 <= min(4000*t4+2*T-2,N+T-3); t5++) { + s7(2,T-1,1,t5-T+1,1); + } + } + } +} + +# +# |