diff options
Diffstat (limited to 'src/transformations/loop_unroll.cc')
-rw-r--r-- | src/transformations/loop_unroll.cc | 642 |
1 files changed, 320 insertions, 322 deletions
diff --git a/src/transformations/loop_unroll.cc b/src/transformations/loop_unroll.cc index 86ffd84..3238d50 100644 --- a/src/transformations/loop_unroll.cc +++ b/src/transformations/loop_unroll.cc @@ -23,54 +23,54 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, // check for sanity of parameters if (unroll_amount < 0) throw std::invalid_argument( - "invalid unroll amount " + to_string(unroll_amount)); + "invalid unroll amount " + to_string(unroll_amount)); if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invalid statement " + to_string(stmt_num)); if (level <= 0 || level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("invalid loop level " + to_string(level)); - + if (cleanup_split_level == 0) cleanup_split_level = level; if (cleanup_split_level > level) throw std::invalid_argument( - "cleanup code must be split at or outside the unrolled loop level " - + to_string(level)); + "cleanup code must be split at or outside the unrolled loop level " + + to_string(level)); if (cleanup_split_level <= 0) throw std::invalid_argument( - "invalid split loop level " + to_string(cleanup_split_level)); - + "invalid split loop level " + to_string(cleanup_split_level)); + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + int dim = 2 * level - 1; std::vector<int> lex = getLexicalOrder(stmt_num); std::set<int> same_loop = getStatements(lex, dim - 1); - + // nothing to do if (unroll_amount == 1) return std::set<int>(); - + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { std::vector<std::pair<int, DependenceVector> > D; int n = stmt[*i].xform.n_out(); for (DependenceGraph::EdgeList::iterator j = - dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) { if (same_loop.find(j->first) != same_loop.end()) for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; int dim2 = level - 1; if (dv.type != DEP_CONTROL) { - + while (stmt[*i].loop_level[dim2].type == LoopLevelTile) { dim2 = stmt[*i].loop_level[dim2].payload - 1; } dim2 = stmt[*i].loop_level[dim2].payload; - + /*if (dv.isCarried(dim2) && (dv.hasNegative(dim2) && !dv.quasi)) throw loop_error( @@ -82,11 +82,11 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, "loop error: Unrolling is illegal, dependence violation!"); */ bool safe = false; - + if (dv.isCarried(dim2) && dv.hasPositive(dim2)) { if (dv.quasi) throw loop_error( - "loop error: a quasi dependence with a positive carried distance"); + "loop error: a quasi dependence with a positive carried distance"); if (!dv.quasi) { if (dv.lbounds[dim2] != posInfinity) { //if (dv.lbounds[dim2] != negInfinity) @@ -102,33 +102,33 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, } else safe = true; }*/ - + if (!safe) { for (int l = level + 1; l <= (n - 1) / 2; l++) { int dim3 = l - 1; - + if (stmt[*i].loop_level[dim3].type != LoopLevelTile) dim3 = - stmt[*i].loop_level[dim3].payload; + stmt[*i].loop_level[dim3].payload; else { while (stmt[*i].loop_level[dim3].type == LoopLevelTile) { dim3 = - stmt[*i].loop_level[dim3].payload - - 1; + stmt[*i].loop_level[dim3].payload + - 1; } dim3 = - stmt[*i].loop_level[dim3].payload; + stmt[*i].loop_level[dim3].payload; } - + if (dim3 > dim2) { - + if (dv.hasPositive(dim3)) break; else if (dv.hasNegative(dim3)) throw loop_error( - "loop error: Unrolling is illegal, dependence violation!"); + "loop error: Unrolling is illegal, dependence violation!"); } } } @@ -153,7 +153,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, hull = Intersection(hull, omega::Range(Restrict_Domain(mapping, copy(stmt[*i].IS)))); hull.simplify(2, 4); - + } } for (int i = 1; i <= level; i++) { @@ -161,7 +161,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, hull.name_set_var(i, name); } hull.setup_names(); - + // extract the exact loop bound of the dimension to be unrolled if (is_single_loop_iteration(hull, level, this->known)) return std::set<int>(); @@ -169,7 +169,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, if (!bound.has_single_conjunct() || !bound.is_satisfiable() || bound.is_tautology()) throw loop_error("unable to extract loop bound for unrolling"); - + // extract the loop stride coef_t stride; std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, @@ -178,9 +178,9 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, stride = 1; else stride = abs(result.first.get_coef(result.second)) - / gcd(abs(result.first.get_coef(result.second)), - abs(result.first.get_coef(bound.set_var(level)))); - + / gcd(abs(result.first.get_coef(result.second)), + abs(result.first.get_coef(bound.set_var(level)))); + // separate lower and upper bounds std::vector<GEQ_Handle> lb_list, ub_list; { @@ -193,17 +193,17 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, lb_list.push_back(*gi); } } - + // simplify overflow expression for each pair of upper and lower bounds std::vector<std::vector<std::map<Variable_ID, int> > > overflow_table( - lb_list.size(), - std::vector<std::map<Variable_ID, int> >(ub_list.size(), - std::map<Variable_ID, int>())); + lb_list.size(), + std::vector<std::map<Variable_ID, int> >(ub_list.size(), + std::map<Variable_ID, int>())); bool is_overflow_simplifiable = true; for (int i = 0; i < lb_list.size(); i++) { if (!is_overflow_simplifiable) break; - + for (int j = 0; j < ub_list.size(); j++) { // lower bound or upper bound has non-unit coefficient, can't simplify if (ub_list[j].get_coef(bound.set_var(level)) != -1 @@ -211,81 +211,81 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, is_overflow_simplifiable = false; break; } - + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - if ((*ci).var != bound.set_var(level)) + case Input_Var: { + if ((*ci).var != bound.set_var(level)) + overflow_table[i][j][(*ci).var] += (*ci).coef; + + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); overflow_table[i][j][(*ci).var] += (*ci).coef; - - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = bound.get_local(g); - else - v = bound.get_local(g, (*ci).var->function_of()); - overflow_table[i][j][(*ci).var] += (*ci).coef; - break; - } - default: - throw loop_error("failed to calculate overflow amount"); + break; + } + default: + throw loop_error("failed to calculate overflow amount"); } } overflow_table[i][j][NULL] += ub_list[j].get_const(); - + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - if ((*ci).var != bound.set_var(level)) { + case Input_Var: { + if ((*ci).var != bound.set_var(level)) { + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase( + overflow_table[i][j].find((*ci).var)); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); overflow_table[i][j][(*ci).var] += (*ci).coef; if (overflow_table[i][j][(*ci).var] == 0) overflow_table[i][j].erase( - overflow_table[i][j].find((*ci).var)); + overflow_table[i][j].find((*ci).var)); + break; } - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = bound.get_local(g); - else - v = bound.get_local(g, (*ci).var->function_of()); - overflow_table[i][j][(*ci).var] += (*ci).coef; - if (overflow_table[i][j][(*ci).var] == 0) - overflow_table[i][j].erase( - overflow_table[i][j].find((*ci).var)); - break; - } - default: - throw loop_error("failed to calculate overflow amount"); + default: + throw loop_error("failed to calculate overflow amount"); } } overflow_table[i][j][NULL] += lb_list[i].get_const(); - + overflow_table[i][j][NULL] += stride; if (unroll_amount == 0 || (overflow_table[i][j].size() == 1 && overflow_table[i][j][NULL] / stride - < unroll_amount)) + < unroll_amount)) unroll_amount = overflow_table[i][j][NULL] / stride; } } - + // loop iteration count can't be determined, bail out gracefully if (unroll_amount == 0) return std::set<int>(); - + // further simply overflow calculation using coefficients' modular if (is_overflow_simplifiable) { for (int i = 0; i < lb_list.size(); i++) for (int j = 0; j < ub_list.size(); j++) if (stride == 1) { for (std::map<Variable_ID, int>::iterator k = - overflow_table[i][j].begin(); + overflow_table[i][j].begin(); k != overflow_table[i][j].end();) if ((*k).first != NULL) { int t = int_mod_hat((*k).second, unroll_amount); @@ -304,20 +304,20 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, } } else k++; - + overflow_table[i][j][NULL] = int_mod_hat( - overflow_table[i][j][NULL], unroll_amount); - + overflow_table[i][j][NULL], unroll_amount); + // Since we don't have MODULO instruction in SUIF yet (only MOD), // make all coef positive in the final formula for (std::map<Variable_ID, int>::iterator k = - overflow_table[i][j].begin(); + overflow_table[i][j].begin(); k != overflow_table[i][j].end(); k++) if ((*k).second < 0) (*k).second += unroll_amount; } } - + // build overflow statement CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *overflow_code = NULL; @@ -331,17 +331,17 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, // upper splitting condition GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); h.update_const( - ((overflow_table[0][i][NULL] / stride) % unroll_amount) - * -stride); + ((overflow_table[0][i][NULL] / stride) % unroll_amount) + * -stride); } else { // upper splitting condition std::string over_name = overflow_var_name_prefix - + to_string(overflow_var_name_counter++); + + to_string(overflow_var_name_counter++); Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); over_var_list.push_back(over_free_var); GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); h.update_coef(cond_upper.get_local(over_free_var), -stride); - + // insert constraint 0 <= overflow < unroll_amount Variable_ID v = overflow_constraint.get_local(over_free_var); GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); @@ -349,20 +349,20 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); h2.update_coef(v, -1); h2.update_const(unroll_amount - 1); - + // create overflow assignment bound.setup_names(); // hack to fix omega relation variable names issue CG_outputRepr *rhs = NULL; bool is_split_illegal = false; for (std::map<Variable_ID, int>::iterator j = - overflow_table[0][i].begin(); + overflow_table[0][i].begin(); j != overflow_table[0][i].end(); j++) if ((*j).first != NULL) { if ((*j).first->kind() == Input_Var && (*j).first->get_position() - >= cleanup_split_level) + >= cleanup_split_level) is_split_illegal = true; - + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); if ((*j).second != 1) t = ocg->CreateTimes(ocg->CreateInt((*j).second), @@ -370,20 +370,20 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, rhs = ocg->CreatePlus(rhs, t); } else if ((*j).second != 0) rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); - + if (is_split_illegal) { rhs->clear(); delete rhs; throw loop_error( - "cannot split cleanup code at loop level " - + to_string(cleanup_split_level) - + " due to overflow variable data dependence"); + "cannot split cleanup code at loop level " + + to_string(cleanup_split_level) + + " due to overflow variable data dependence"); } - + if (stride != 1) rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); - + CG_outputRepr *lhs = ocg->CreateIdent(over_name); init_code = ocg->StmtListAppend(init_code, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); @@ -392,12 +392,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, ocg->CreateAssignment(0, lhs, rhs)); } } - + // lower splitting condition GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]); } else if (is_overflow_simplifiable && ub_list.size() == 1) { for (int i = 0; i < lb_list.size(); i++) { - + if (overflow_table[i][0].size() == 1) { // lower splitting condition GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); @@ -405,12 +405,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, } else { // lower splitting condition std::string over_name = overflow_var_name_prefix - + to_string(overflow_var_name_counter++); + + to_string(overflow_var_name_counter++); Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); over_var_list.push_back(over_free_var); GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); h.update_coef(cond_lower.get_local(over_free_var), -stride); - + // insert constraint 0 <= overflow < unroll_amount Variable_ID v = overflow_constraint.get_local(over_free_var); GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); @@ -418,12 +418,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); h2.update_coef(v, -1); h2.update_const(unroll_amount - 1); - + // create overflow assignment bound.setup_names(); // hack to fix omega relation variable names issue CG_outputRepr *rhs = NULL; for (std::map<Variable_ID, int>::iterator j = - overflow_table[0][i].begin(); + overflow_table[0][i].begin(); j != overflow_table[0][i].end(); j++) if ((*j).first != NULL) { CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); @@ -433,11 +433,11 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, rhs = ocg->CreatePlus(rhs, t); } else if ((*j).second != 0) rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); - + if (stride != 1) rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); - + CG_outputRepr *lhs = ocg->CreateIdent(over_name); init_code = ocg->StmtListAppend(init_code, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); @@ -446,61 +446,59 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, ocg->CreateAssignment(0, lhs, rhs)); } } - + // upper splitting condition GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]); } else { std::string over_name = overflow_var_name_prefix - + to_string(overflow_var_name_counter++); + + to_string(overflow_var_name_counter++); Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); over_var_list.push_back(over_free_var); - + std::vector<CG_outputRepr *> lb_repr_list, ub_repr_list; for (int i = 0; i < lb_list.size(); i++) { lb_repr_list.push_back( - output_lower_bound_repr(ocg, - lb_list[i], - bound.set_var(dim + 1), result.first, result.second, - bound, Relation::True(bound.n_set()), - std::vector<std::pair<CG_outputRepr *, int> >( - bound.n_set(), - std::make_pair( - static_cast<CG_outputRepr *>(NULL), - 0)), - uninterpreted_symbols[stmt_num])); + output_lower_bound_repr(ocg, + lb_list[i], + bound.set_var(dim + 1), result.first, result.second, + bound, Relation::True(bound.n_set()), + std::vector<std::pair<CG_outputRepr *, int> >( + bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0)), + uninterpreted_symbols[stmt_num])); GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); } for (int i = 0; i < ub_list.size(); i++) { ub_repr_list.push_back( - output_upper_bound_repr(ocg, ub_list[i], - bound.set_var(dim + 1), bound, - std::vector<std::pair<CG_outputRepr *, int> >( - bound.n_set(), - std::make_pair( - static_cast<CG_outputRepr *>(NULL), - 0)), - uninterpreted_symbols[stmt_num])); + output_upper_bound_repr(ocg, ub_list[i], + bound.set_var(dim + 1), bound, + std::vector<std::pair<CG_outputRepr *, int> >( + bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0)), + uninterpreted_symbols[stmt_num])); GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); h.update_coef(cond_upper.get_local(over_free_var), -stride); } - - CG_outputRepr *lbRepr, *ubRepr; + + CG_outputRepr *lbRepr, *ubRepr; if (lb_repr_list.size() > 1) { //fprintf(stderr, "loop_unroll.cc createInvoke( max )\n"); lbRepr = ocg->CreateInvoke("max", lb_repr_list); - } - else if (lb_repr_list.size() == 1) { + } else if (lb_repr_list.size() == 1) { lbRepr = lb_repr_list[0]; } - + if (ub_repr_list.size() > 1) { //fprintf(stderr, "loop_unroll.cc createInvoke( min )\n"); ubRepr = ocg->CreateInvoke("min", ub_repr_list); - } - else if (ub_repr_list.size() == 1) { + } else if (ub_repr_list.size() == 1) { ubRepr = ub_repr_list[0]; } - + // create overflow assignment CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr), ocg->CreateInt(1)); @@ -512,7 +510,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); lhs = ocg->CreateIdent(over_name); overflow_code = ocg->CreateAssignment(0, lhs, rhs); - + // insert constraint 0 <= overflow < unroll_amount Variable_ID v = overflow_constraint.get_local(over_free_var); GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); @@ -521,7 +519,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, h2.update_coef(v, -1); h2.update_const(unroll_amount - 1); } - + // insert overflow statement int overflow_stmt_num = -1; if (overflow_code != NULL) { @@ -537,7 +535,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, for (int i = 1; i < cleanup_split_level; i++) overflow_IS.name_set_var(i, hull.set_var(i)->name()); overflow_IS.setup_names(); - + // build dumb transformation relation for overflow statement Relation overflow_xform(cleanup_split_level - 1, 2 * (cleanup_split_level - 1) + 1); @@ -546,20 +544,20 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, EQ_Handle h = f_root->add_EQ(); h.update_coef(overflow_xform.output_var(2 * i), 1); h.update_coef(overflow_xform.input_var(i), -1); - + h = f_root->add_EQ(); h.update_coef(overflow_xform.output_var(2 * i - 1), 1); h.update_const(-lex[2 * i - 2]); } EQ_Handle h = f_root->add_EQ(); h.update_coef( - overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1), - 1); + overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1), + 1); h.update_const(-lex[2 * (cleanup_split_level - 1)]); - + shiftLexicalOrder(lex, 2 * cleanup_split_level - 2, 1); Statement overflow_stmt; - + overflow_stmt.code = overflow_code; overflow_stmt.IS = overflow_IS; overflow_stmt.xform = overflow_xform; @@ -567,18 +565,18 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, overflow_stmt.ir_stmt_node = NULL; for (int i = 0; i < level - 1; i++) { overflow_stmt.loop_level[i].type = - stmt[stmt_num].loop_level[i].type; + stmt[stmt_num].loop_level[i].type; if (stmt[stmt_num].loop_level[i].type == LoopLevelTile && stmt[stmt_num].loop_level[i].payload >= level) overflow_stmt.loop_level[i].payload = -1; else overflow_stmt.loop_level[i].payload = - stmt[stmt_num].loop_level[i].payload; + stmt[stmt_num].loop_level[i].payload; overflow_stmt.loop_level[i].parallel_level = - stmt[stmt_num].loop_level[i].parallel_level; + stmt[stmt_num].loop_level[i].parallel_level; } - - fprintf(stderr, "loop_unroll.cc L581 adding stmt %d\n", stmt.size()); + + fprintf(stderr, "loop_unroll.cc L581 adding stmt %d\n", stmt.size()); stmt.push_back(overflow_stmt); uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); @@ -586,12 +584,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, dep.insert(); overflow_stmt_num = stmt.size() - 1; overflow[overflow_stmt_num] = over_var_list; - + // update the global known information on overflow variable this->known = Intersection(this->known, Extend_Set(copy(overflow_constraint), this->known.n_set() - overflow_constraint.n_set())); - + // update dependence graph DependenceVector dv; dv.type = DEP_CONTROL; @@ -628,12 +626,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, dep.connect(overflow_stmt_num, overflow_stmt_num, dv); } } - + // split the loop so it can be fully unrolled std::set<int> new_stmts = split(stmt_num, cleanup_split_level, cond_upper); std::set<int> new_stmts2 = split(stmt_num, cleanup_split_level, cond_lower); new_stmts.insert(new_stmts2.begin(), new_stmts2.end()); - + // check if unrolled statements can be trivially lumped together as one statement bool can_be_lumped = true; if (can_be_lumped) { @@ -649,7 +647,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, if (!(stmt[*i].loop_level[j].type == stmt[stmt_num].loop_level[j].type && stmt[*i].loop_level[j].payload - == stmt[stmt_num].loop_level[j].payload)) { + == stmt[stmt_num].loop_level[j].payload)) { can_be_lumped = false; break; } @@ -690,7 +688,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { for (DependenceGraph::EdgeList::iterator j = - dep.vertex[*i].second.begin(); + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) if (same_loop.find(j->first) != same_loop.end()) { for (int k = 0; k < j->second.size(); k++) @@ -706,38 +704,38 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, break; } } - + // insert unrolled statements int old_num_stmt = stmt.size(); if (!can_be_lumped) { std::map<int, std::vector<int> > what_stmt_num; - + for (int j = 1; j < unroll_amount; j++) { for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { Statement new_stmt; - + std::vector<std::string> loop_vars; std::vector<CG_outputRepr *> subs; loop_vars.push_back(stmt[*i].IS.set_var(level)->name()); subs.push_back( - ocg->CreatePlus( - ocg->CreateIdent( - stmt[*i].IS.set_var(level)->name()), - ocg->CreateInt(j * stride))); + ocg->CreatePlus( + ocg->CreateIdent( + stmt[*i].IS.set_var(level)->name()), + ocg->CreateInt(j * stride))); new_stmt.code = ocg->CreateSubstitutedStmt(0, stmt[*i].code->clone(), loop_vars, subs); - + new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride); add_loop_stride(new_stmt.IS, bound, level - 1, unroll_amount * stride); - + new_stmt.xform = copy(stmt[*i].xform); - + new_stmt.loop_level = stmt[*i].loop_level; new_stmt.ir_stmt_node = NULL; - fprintf(stderr, "loop_unroll.cc L740 adding stmt %d\n", stmt.size()); + fprintf(stderr, "loop_unroll.cc L740 adding stmt %d\n", stmt.size()); stmt.push_back(new_stmt); uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); @@ -750,16 +748,16 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, i != same_loop.end(); i++) add_loop_stride(stmt[*i].IS, bound, level - 1, unroll_amount * stride); - + // update dependence graph if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; int new_stride = unroll_amount * stride; for (int i = 0; i < old_num_stmt; i++) { std::vector<std::pair<int, DependenceVector> > D; - + for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { if (same_loop.find(i) != same_loop.end()) { if (same_loop.find(j->first) != same_loop.end()) { @@ -772,7 +770,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, kk++) if (what_stmt_num[i][kk] != -1 && what_stmt_num[j->first][kk] - != -1) + != -1) dep.connect(what_stmt_num[i][kk], what_stmt_num[j->first][kk], dv); @@ -782,35 +780,35 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, if (ub == lb && int_mod(lb, static_cast<coef_t>(new_stride)) - == 0) { + == 0) { D.push_back( - std::make_pair(j->first, dv)); + std::make_pair(j->first, dv)); for (int kk = 0; kk < unroll_amount - 1; kk++) if (what_stmt_num[i][kk] != -1 && what_stmt_num[j->first][kk] - != -1) + != -1) dep.connect( - what_stmt_num[i][kk], - what_stmt_num[j->first][kk], - dv); + what_stmt_num[i][kk], + what_stmt_num[j->first][kk], + dv); } else if (lb == -posInfinity && ub == posInfinity) { D.push_back( - std::make_pair(j->first, dv)); + std::make_pair(j->first, dv)); for (int kk = 0; kk < unroll_amount; kk++) if (kk == 0) D.push_back( - std::make_pair(j->first, - dv)); + std::make_pair(j->first, + dv)); else if (what_stmt_num[j->first][kk - 1] != -1) D.push_back( - std::make_pair( - what_stmt_num[j->first][kk - - 1], - dv)); + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); for (int t = 0; t < unroll_amount - 1; t++) if (what_stmt_num[i][t] != -1) @@ -819,15 +817,15 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, kk++) if (kk == 0) dep.connect( - what_stmt_num[i][t], - j->first, dv); + what_stmt_num[i][t], + j->first, dv); else if (what_stmt_num[j->first][kk - 1] != -1) dep.connect( - what_stmt_num[i][t], - what_stmt_num[j->first][kk - - 1], - dv); + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); } else { for (int kk = 0; kk < unroll_amount; kk++) { @@ -836,49 +834,49 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, < int_mod(lb, static_cast<coef_t>(new_stride))) dv.lbounds[dep_dim] = - floor( - static_cast<double>(lb) - / new_stride) - * new_stride - + new_stride; + floor( + static_cast<double>(lb) + / new_stride) + * new_stride + + new_stride; else dv.lbounds[dep_dim] = - floor( - static_cast<double>(lb) - / new_stride) - * new_stride; + floor( + static_cast<double>(lb) + / new_stride) + * new_stride; } if (ub != posInfinity) { if (kk * stride > int_mod(ub, static_cast<coef_t>(new_stride))) dv.ubounds[dep_dim] = - floor( - static_cast<double>(ub) - / new_stride) - * new_stride - - new_stride; + floor( + static_cast<double>(ub) + / new_stride) + * new_stride + - new_stride; else dv.ubounds[dep_dim] = - floor( - static_cast<double>(ub) - / new_stride) - * new_stride; + floor( + static_cast<double>(ub) + / new_stride) + * new_stride; } if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) { if (kk == 0) D.push_back( - std::make_pair( - j->first, - dv)); + std::make_pair( + j->first, + dv)); else if (what_stmt_num[j->first][kk - 1] != -1) D.push_back( - std::make_pair( - what_stmt_num[j->first][kk - - 1], - dv)); + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); } } for (int t = 0; t < unroll_amount - 1; @@ -890,80 +888,80 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, if (lb != -posInfinity) { if (kk * stride < int_mod( - lb + t - + 1, - static_cast<coef_t>(new_stride))) + lb + t + + 1, + static_cast<coef_t>(new_stride))) dv.lbounds[dep_dim] = - floor( - static_cast<double>(lb - + (t - + 1) - * stride) - / new_stride) - * new_stride - + new_stride; + floor( + static_cast<double>(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride + + new_stride; else dv.lbounds[dep_dim] = - floor( - static_cast<double>(lb - + (t - + 1) - * stride) - / new_stride) - * new_stride; + floor( + static_cast<double>(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride; } if (ub != posInfinity) { if (kk * stride > int_mod( - ub + t - + 1, - static_cast<coef_t>(new_stride))) + ub + t + + 1, + static_cast<coef_t>(new_stride))) dv.ubounds[dep_dim] = - floor( - static_cast<double>(ub - + (t - + 1) - * stride) - / new_stride) - * new_stride - - new_stride; + floor( + static_cast<double>(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride + - new_stride; else dv.ubounds[dep_dim] = - floor( - static_cast<double>(ub - + (t - + 1) - * stride) - / new_stride) - * new_stride; + floor( + static_cast<double>(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride; } if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) { if (kk == 0) dep.connect( - what_stmt_num[i][t], - j->first, - dv); + what_stmt_num[i][t], + j->first, + dv); else if (what_stmt_num[j->first][kk - 1] != -1) dep.connect( - what_stmt_num[i][t], - what_stmt_num[j->first][kk - - 1], - dv); + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); } } } } } - + dep.vertex[i].second.erase(j++); } else { for (int kk = 0; kk < unroll_amount - 1; kk++) if (what_stmt_num[i][kk] != -1) dep.connect(what_stmt_num[i][kk], j->first, j->second); - + j++; } } else { @@ -972,26 +970,26 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, for (int kk = 0; kk < unroll_amount - 1; kk++) if (what_stmt_num[j->first][kk] != -1) D.push_back( - std::make_pair( - what_stmt_num[j->first][kk], - j->second[k])); + std::make_pair( + what_stmt_num[j->first][kk], + j->second[k])); j++; } } - + for (int j = 0; j < D.size(); j++) dep.connect(i, D[j].first, D[j].second); } } - + // reset lexical order for the unrolled loop body std::set<int> new_same_loop; - + int count = 0; - + for (std::map<int, std::vector<int> >::iterator i = - what_stmt_num.begin(); i != what_stmt_num.end(); i++) { - + what_stmt_num.begin(); i != what_stmt_num.end(); i++) { + new_same_loop.insert(i->first); for (int k = dim + 1; k < stmt[i->first].xform.n_out(); k += 2) assign_const(stmt[i->first].xform, k, @@ -1001,11 +999,11 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, for (int j = 0; j < i->second.size(); j++) { new_same_loop.insert(i->second[j]); for (int k = dim + 1; k < stmt[i->second[j]].xform.n_out(); k += - 2) + 2) assign_const(stmt[i->second[j]].xform, k, get_const( - stmt[(what_stmt_num.begin())->first].xform, - k, Output_Var) + count); + stmt[(what_stmt_num.begin())->first].xform, + k, Output_Var) + count); count++; } } @@ -1015,20 +1013,20 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, i != same_loop.end(); i++) add_loop_stride(stmt[*i].IS, bound, level - 1, unroll_amount * stride); - + int max_level = stmt[stmt_num].loop_level.size(); std::vector<std::pair<int, int> > stmt_order; for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) stmt_order.push_back( - std::make_pair( - get_const(stmt[*i].xform, 2 * max_level, - Output_Var), *i)); + std::make_pair( + get_const(stmt[*i].xform, 2 * max_level, + Output_Var), *i)); sort(stmt_order.begin(), stmt_order.end()); - + Statement new_stmt; new_stmt.code = NULL; - for (int j = 1; j < unroll_amount; j++) { + for (int j = 1; j < unroll_amount; j++) { for (int i = 0; i < stmt_order.size(); i++) { std::vector<std::string> loop_vars; std::vector<CG_outputRepr *> subs; @@ -1036,12 +1034,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, //fprintf(stderr, "loop_unroll.cc, will replace '%s with '%s+%d' ??\n", // stmt[stmt_order[i].second].IS.set_var(level)->name().c_str(), // stmt[stmt_order[i].second].IS.set_var(level)->name().c_str(), j * stride); - + loop_vars.push_back( - stmt[stmt_order[i].second].IS.set_var(level)->name()); + stmt[stmt_order[i].second].IS.set_var(level)->name()); subs.push_back( - ocg->CreatePlus(ocg->CreateIdent(stmt[stmt_order[i].second].IS.set_var(level)->name()), - ocg->CreateInt(j * stride))); // BUG HERE + ocg->CreatePlus(ocg->CreateIdent(stmt[stmt_order[i].second].IS.set_var(level)->name()), + ocg->CreateInt(j * stride))); // BUG HERE //fprintf(stderr, "loop_unroll.cc subs now has %d parts\n", subs.size()); //for (int k=0; k< subs.size(); k++) //fprintf(stderr, "subs[%d] = 0x%x\n", k, subs[k]); @@ -1052,7 +1050,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, CG_outputRepr *code = ocg->CreateSubstitutedStmt(0, - stmt[stmt_order[i].second].code->clone(), + stmt[stmt_order[i].second].code->clone(), loop_vars, subs); @@ -1069,7 +1067,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, } } - + //fprintf(stderr, "new_stmt.IS = \n"); @@ -1084,13 +1082,13 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, if (stmt[stmt_num].has_inspector) fprintf(stderr, "OLD STMT HAS INSPECTOR\n"); else fprintf(stderr, "OLD STMT DOES NOT HAVE INSPECTOR\n"); - fprintf(stderr, "loop_unroll.cc L1083 adding stmt %d\n", stmt.size()); + fprintf(stderr, "loop_unroll.cc L1083 adding stmt %d\n", stmt.size()); stmt.push_back(new_stmt); uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]); dep.insert(); - + //fprintf(stderr, "update dependence graph\n"); // update dependence graph if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { @@ -1098,14 +1096,14 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, int new_stride = unroll_amount * stride; for (int i = 0; i < old_num_stmt; i++) { std::vector<std::pair<int, std::vector<DependenceVector> > > D; - + for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { if (same_loop.find(i) != same_loop.end()) { if (same_loop.find(j->first) != same_loop.end()) { std::vector<DependenceVector> dvs11, dvs12, dvs22, - dvs21; + dvs21; for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if (dv.type == DEP_CONTROL @@ -1115,71 +1113,71 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, dvs22.push_back(dv); } else throw loop_error( - "unrolled statements lumped together illegally"); + "unrolled statements lumped together illegally"); } else { coef_t lb = dv.lbounds[dep_dim]; coef_t ub = dv.ubounds[dep_dim]; if (ub == lb && int_mod(lb, static_cast<coef_t>(new_stride)) - == 0) { + == 0) { dvs11.push_back(dv); dvs22.push_back(dv); } else { if (lb != -posInfinity) dv.lbounds[dep_dim] = ceil( - static_cast<double>(lb) - / new_stride) - * new_stride; + static_cast<double>(lb) + / new_stride) + * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = floor( - static_cast<double>(ub) - / new_stride) - * new_stride; + static_cast<double>(ub) + / new_stride) + * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs11.push_back(dv); - + if (lb != -posInfinity) dv.lbounds[dep_dim] = ceil( - static_cast<double>(lb) - / new_stride) - * new_stride; + static_cast<double>(lb) + / new_stride) + * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = ceil( - static_cast<double>(ub) - / new_stride) - * new_stride; + static_cast<double>(ub) + / new_stride) + * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs21.push_back(dv); - + if (lb != -posInfinity) dv.lbounds[dep_dim] = floor( - static_cast<double>(lb) - / new_stride) - * new_stride; + static_cast<double>(lb) + / new_stride) + * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = floor( - static_cast<double>(ub - - stride) - / new_stride) - * new_stride; + static_cast<double>(ub + - stride) + / new_stride) + * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs12.push_back(dv); - + if (lb != -posInfinity) dv.lbounds[dep_dim] = floor( - static_cast<double>(lb) - / new_stride) - * new_stride; + static_cast<double>(lb) + / new_stride) + * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = ceil( - static_cast<double>(ub - - stride) - / new_stride) - * new_stride; + static_cast<double>(ub + - stride) + / new_stride) + * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs22.push_back(dv); @@ -1192,10 +1190,10 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, dep.connect(old_num_stmt, old_num_stmt, dvs22); if (dvs12.size() > 0) D.push_back( - std::make_pair(old_num_stmt, dvs12)); + std::make_pair(old_num_stmt, dvs12)); if (dvs21.size() > 0) dep.connect(old_num_stmt, i, dvs21); - + dep.vertex[i].second.erase(j++); } else { dep.connect(old_num_stmt, j->first, j->second); @@ -1204,17 +1202,17 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, } else { if (same_loop.find(j->first) != same_loop.end()) D.push_back( - std::make_pair(old_num_stmt, j->second)); + std::make_pair(old_num_stmt, j->second)); j++; } } - + for (int j = 0; j < D.size(); j++) dep.connect(i, D[j].first, D[j].second); } } } - + //fprintf(stderr, " loop_unroll.cc returning new_stmts\n"); return new_stmts; } |