/***************************************************************************** Copyright (C) 2008 University of Southern California Copyright (C) 2009-2010 University of Utah All Rights Reserved. Purpose: Core loop transformation functionality. Notes: "level" (starting from 1) means loop level and it corresponds to "dim" (starting from 0) in transformed iteration space [c_1,l_1,c_2,l_2,...., c_n,l_n,c_(n+1)], e.g., l_2 is loop level 2 in generated code, dim 3 in transformed iteration space, and variable 4 in Omega relation. All c's are constant numbers only and they will not show up as actual loops. Formula: dim = 2*level - 1 var = dim + 1 History: 10/2005 Created by Chun Chen. 09/2009 Expand tile functionality, -chun 10/2009 Initialize unfusible loop nest without bailing out, -chun *****************************************************************************/ #include #include #include #include #include #include #include #include "loop.hh" #include "omegatools.hh" #include "irtools.hh" #include "chill_error.hh" #include using namespace omega; const std::string Loop::tmp_loop_var_name_prefix = std::string("_t"); const std::string Loop::overflow_var_name_prefix = std::string("over"); //----------------------------------------------------------------------------- // Class Loop //----------------------------------------------------------------------------- bool Loop::init_loop(std::vector &ir_tree, std::vector &ir_stmt) { ir_stmt = extract_ir_stmts(ir_tree); stmt_nesting_level_.resize(ir_stmt.size()); std::vector stmt_nesting_level(ir_stmt.size()); for (int i = 0; i < ir_stmt.size(); i++) { ir_stmt[i]->payload = i; int t = 0; ir_tree_node *itn = ir_stmt[i]; while (itn->parent != NULL) { itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP) t++; } stmt_nesting_level_[i] = t; stmt_nesting_level[i] = t; } stmt = std::vector(ir_stmt.size()); int n_dim = -1; int max_loc; //std::vector index; for (int i = 0; i < ir_stmt.size(); i++) { int max_nesting_level = -1; int loc; for (int j = 0; j < ir_stmt.size(); j++) if (stmt_nesting_level[j] > max_nesting_level) { max_nesting_level = stmt_nesting_level[j]; loc = j; } // most deeply nested statement acting as a reference point if (n_dim == -1) { n_dim = max_nesting_level; max_loc = loc; index = std::vector(n_dim); ir_tree_node *itn = ir_stmt[loc]; int cur_dim = n_dim - 1; while (itn->parent != NULL) { itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP) { index[cur_dim] = static_cast(itn->content)->index()->name(); itn->payload = cur_dim--; } } } // align loops by names, temporary solution ir_tree_node *itn = ir_stmt[loc]; int depth = stmt_nesting_level_[loc] - 1; /* while (itn->parent != NULL) { itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { std::string name = static_cast(itn->content)->index()->name(); for (int j = 0; j < n_dim; j++) if (index[j] == name) { itn->payload = j; break; } if (itn->payload == -1) throw loop_error("no complex alignment yet"); } } */ for (int t = depth; t >= 0; t--) { int y = t; ir_tree_node *itn = ir_stmt[loc]; while ((itn->parent != NULL) && (y >= 0)) { itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP) y--; } if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { CG_outputBuilder *ocg = ir_->builder(); itn->payload = depth - t; CG_outputRepr *code = static_cast(ir_stmt[loc]->content)->extract(); Tuple index_expr; Tuple old_index; CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]); index_expr.append(repl); old_index.append( static_cast(itn->content)->index()->name()); code = ocg->CreatePlaceHolder(0, code, index_expr, old_index); replace.insert(std::pair(loc, code)); //stmt[loc].code = code; } } // set relation variable names Relation r(n_dim); F_And *f_root = r.add_and(); itn = ir_stmt[loc]; int temp_depth = depth; while (itn->parent != NULL) { itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP) { r.name_set_var(itn->payload + 1, index[temp_depth]); temp_depth--; } //static_cast(itn->content)->index()->name()); } /*while (itn->parent != NULL) { itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP) r.name_set_var(itn->payload+1, static_cast(itn->content)->index()->name()); }*/ // extract information from loop/if structures std::vector processed(n_dim, false); Tuple vars_to_be_reversed; itn = ir_stmt[loc]; while (itn->parent != NULL) { itn = itn->parent; switch (itn->content->type()) { case IR_CONTROL_LOOP: { IR_Loop *lp = static_cast(itn->content); Variable_ID v = r.set_var(itn->payload + 1); int c; try { c = lp->step_size(); if (c > 0) { CG_outputRepr *lb = lp->lower_bound(); exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_GE, true); CG_outputRepr *ub = lp->upper_bound(); IR_CONDITION_TYPE cond = lp->stop_cond(); if (cond == IR_COND_LT || cond == IR_COND_LE) exp2formula(ir, r, f_root, freevar, ub, v, 's', cond, true); else throw ir_error("loop condition not supported"); } else if (c < 0) { CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *lb = lp->lower_bound(); lb = ocg->CreateMinus(NULL, lb); exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_GE, true); CG_outputRepr *ub = lp->upper_bound(); ub = ocg->CreateMinus(NULL, ub); IR_CONDITION_TYPE cond = lp->stop_cond(); if (cond == IR_COND_GE) exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_LE, true); else if (cond == IR_COND_GT) exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_LT, true); else throw ir_error("loop condition not supported"); vars_to_be_reversed.append(lp->index()->name()); } else throw ir_error("loop step size zero"); } catch (const ir_error &e) { for (int i = 0; i < itn->children.size(); i++) delete itn->children[i]; itn->children = std::vector(); itn->content = itn->content->convert(); return false; } if (abs(c) != 1) { F_Exists *f_exists = f_root->add_exists(); Variable_ID e = f_exists->declare(); F_And *f_and = f_exists->add_and(); Stride_Handle h = f_and->add_stride(abs(c)); if (c > 0) h.update_coef(e, 1); else h.update_coef(e, -1); h.update_coef(v, -1); CG_outputRepr *lb = lp->lower_bound(); exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, true); } processed[itn->payload] = true; break; } case IR_CONTROL_IF: { CG_outputRepr *cond = static_cast(itn->content)->condition(); try { if (itn->payload % 2 == 1) exp2constraint(ir, r, f_root, freevar, cond, true); else { F_Not *f_not = f_root->add_not(); F_And *f_and = f_not->add_and(); exp2constraint(ir, r, f_and, freevar, cond, true); } } catch (const ir_error &e) { std::vector *t; if (itn->parent == NULL) t = &ir_tree; else t = &(itn->parent->children); int id = itn->payload; int i = t->size() - 1; while (i >= 0) { if ((*t)[i] == itn) { for (int j = 0; j < itn->children.size(); j++) delete itn->children[j]; itn->children = std::vector(); itn->content = itn->content->convert(); } else if ((*t)[i]->payload >> 1 == id >> 1) { delete (*t)[i]; t->erase(t->begin() + i); } i--; } return false; } break; } default: for (int i = 0; i < itn->children.size(); i++) delete itn->children[i]; itn->children = std::vector(); itn->content = itn->content->convert(); return false; } } // add information for missing loops for (int j = 0; j < n_dim; j++) if (!processed[j]) { ir_tree_node *itn = ir_stmt[max_loc]; while (itn->parent != NULL) { itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == j) break; } Variable_ID v = r.set_var(j + 1); if (loc < max_loc) { CG_outputRepr *lb = static_cast(itn->content)->lower_bound(); exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ, true); } else { // loc > max_loc CG_outputRepr *ub = static_cast(itn->content)->upper_bound(); exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ, true); } } r.setup_names(); r.simplify(); // insert the statement CG_outputBuilder *ocg = ir->builder(); Tuple reverse_expr; for (int j = 1; j <= vars_to_be_reversed.size(); j++) { CG_outputRepr *repl = ocg->CreateIdent(vars_to_be_reversed[j]); repl = ocg->CreateMinus(NULL, repl); reverse_expr.append(repl); } CG_outputRepr *code = static_cast(ir_stmt[loc]->content)->original(); code = ocg->CreatePlaceHolder(0, code, reverse_expr, vars_to_be_reversed); stmt[loc].code = code; stmt[loc].IS = r; stmt[loc].loop_level = std::vector(n_dim); for (int i = 0; i < n_dim; i++) { stmt[loc].loop_level[i].type = LoopLevelOriginal; stmt[loc].loop_level[i].payload = i; stmt[loc].loop_level[i].parallel_level = 0; } stmt_nesting_level[loc] = -1; } return true; } Loop::Loop(const IR_Control *control) { ir = const_cast(control->ir_); init_code = NULL; cleanup_code = NULL; tmp_loop_var_name_counter = 1; overflow_var_name_counter = 1; known = Relation::True(0); std::vector ir_tree = build_ir_tree(control->clone(), NULL); std::vector ir_stmt; while (!init_loop(ir_tree, ir_stmt)) { } // init the dependence graph for (int i = 0; i < stmt.size(); i++) dep.insert(); for (int i = 0; i < stmt.size(); i++) for (int j = i; j < stmt.size(); j++) { std::pair, std::vector > dv = test_data_dependences( ir_, stmt[i].code, stmt[i].IS, stmt[j].code, stmt[j].IS, freevar, index, stmt_nesting_level_[i], stmt_nesting_level[j]); for (int k = 0; k < dv.first.size(); k++) { if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k], true)) dep.connect(i, j, dv.first[k]); else { dep.connect(j, i, dv.first[k].reverse()); } } for (int k = 0; k < dv.second.size(); k++) if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], false)) dep.connect(j, i, dv.second[k]); else { dep.connect(i, j, dv.second[k].reverse()); } // std::pair, // std::vector > dv_ = test_data_dependences( } for (int i = 0; i < stmt.size(); i++) { std::map::iterator it = replace.find(i); if (it != replace.end()) stmt[i].code = (it->second)->clone(); else stmt[i].code = stmt[i].code->clone(); } // cleanup the IR tree for (int i = 0; i < ir_tree.size(); i++) delete ir_tree[i]; // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] for (int i = 0; i < stmt.size(); i++) { int n = stmt[i].IS.n_set(); stmt[i].xform = Relation(n, 2 * n + 1); F_And *f_root = stmt[i].xform.add_and(); for (int j = 1; j <= n; j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(stmt[i].xform.output_var(2 * j), 1); h.update_coef(stmt[i].xform.input_var(j), -1); } for (int j = 1; j <= 2 * n + 1; j += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(stmt[i].xform.output_var(j), 1); } stmt[i].xform.simplify(); } if (stmt.size() != 0) num_dep_dim = stmt[0].IS.n_set(); else num_dep_dim = 0; } Loop::~Loop() { for (int i = 0; i < stmt.size(); i++) if (stmt[i].code != NULL) { stmt[i].code->clear(); delete stmt[i].code; } if (init_code != NULL) { init_code->clear(); delete init_code; } if (cleanup_code != NULL) { cleanup_code->clear(); delete cleanup_code; } } int Loop::get_dep_dim_of(int stmt_num, int level) const { if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invaid statement " + to_string(stmt_num)); if (level < 1 || level > stmt[stmt_num].loop_level.size()) return -1; int trip_count = 0; while (true) { switch (stmt[stmt_num].loop_level[level - 1].type) { case LoopLevelOriginal: return stmt[stmt_num].loop_level[level - 1].payload; case LoopLevelTile: level = stmt[stmt_num].loop_level[level - 1].payload; if (level < 1) return -1; if (level > stmt[stmt_num].loop_level.size()) throw loop_error( "incorrect loop level information for statement " + to_string(stmt_num)); break; default: throw loop_error( "unknown loop level information for statement " + to_string(stmt_num)); } trip_count++; if (trip_count >= stmt[stmt_num].loop_level.size()) throw loop_error( "incorrect loop level information for statement " + to_string(stmt_num)); } } int Loop::get_last_dep_dim_before(int stmt_num, int level) const { if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invaid statement " + to_string(stmt_num)); if (level < 1) return -1; if (level > stmt[stmt_num].loop_level.size()) level = stmt[stmt_num].loop_level.size() + 1; for (int i = level - 1; i >= 1; i--) if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal) return stmt[stmt_num].loop_level[i - 1].payload; return -1; } void Loop::print_internal_loop_structure() const { for (int i = 0; i < stmt.size(); i++) { std::vector lex = getLexicalOrder(i); std::cout << "s" << i + 1 << ": "; for (int j = 0; j < stmt[i].loop_level.size(); j++) { if (2 * j < lex.size()) std::cout << lex[2 * j]; switch (stmt[i].loop_level[j].type) { case LoopLevelOriginal: std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; break; case LoopLevelTile: std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; break; default: std::cout << "(unknown)"; } std::cout << ' '; } for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { std::cout << lex[j]; if (j != lex.size() - 1) std::cout << ' '; } std::cout << std::endl; } } CG_outputRepr *Loop::getCode(int effort) const { const int m = stmt.size(); if (m == 0) return NULL; const int n = stmt[0].xform.n_out(); Tuple ni(m); Tuple < Relation > IS(m); Tuple < Relation > xform(m); for (int i = 0; i < m; i++) { ni[i + 1] = stmt[i].code; IS[i + 1] = stmt[i].IS; xform[i + 1] = stmt[i].xform; } Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *repr = MMGenerateCode(ocg, xform, IS, ni, known, effort); if (init_code != NULL) repr = ocg->StmtListAppend(init_code->clone(), repr); if (cleanup_code != NULL) repr = ocg->StmtListAppend(repr, cleanup_code->clone()); return repr; } void Loop::printCode(int effort) const { const int m = stmt.size(); if (m == 0) return; const int n = stmt[0].xform.n_out(); Tuple < Relation > IS(m); Tuple < Relation > xform(m); for (int i = 0; i < m; i++) { IS[i + 1] = stmt[i].IS; xform[i + 1] = stmt[i].xform; } Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); std::cout << MMGenerateCode(xform, IS, known, effort); } Relation Loop::getNewIS(int stmt_num) const { Relation result; if (stmt[stmt_num].xform.is_null()) { Relation known = Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set()); result = Intersection(copy(stmt[stmt_num].IS), known); } else { Relation known = Extend_Set(copy(this->known), stmt[stmt_num].xform.n_out() - this->known.n_set()); result = Intersection( Range( Restrict_Domain(copy(stmt[stmt_num].xform), copy(stmt[stmt_num].IS))), known); } result.simplify(2, 4); return result; } std::vector Loop::getNewIS() const { const int m = stmt.size(); std::vector new_IS(m); for (int i = 0; i < m; i++) new_IS[i] = getNewIS(i); return new_IS; } void Loop::permute(const std::vector &pi) { std::set active; for (int i = 0; i < stmt.size(); i++) active.insert(i); permute(active, pi); } void Loop::original() { std::set active; for (int i = 0; i < stmt.size(); i++) active.insert(i); setLexicalOrder(0, active); } void Loop::permute(const std::set &active, const std::vector &pi) { if (active.size() == 0 || pi.size() == 0) return; // check for sanity of parameters int level = pi[0]; for (int i = 1; i < pi.size(); i++) if (pi[i] < level) level = pi[i]; if (level < 1) throw std::invalid_argument("invalid permuation"); std::vector reverse_pi(pi.size(), 0); for (int i = 0; i < pi.size(); i++) if (pi[i] >= level + pi.size()) throw std::invalid_argument("invalid permutation"); else reverse_pi[pi[i] - level] = i + level; for (int i = 0; i < reverse_pi.size(); i++) if (reverse_pi[i] == 0) throw std::invalid_argument("invalid permuation"); int ref_stmt_num; std::vector lex; for (std::set::iterator i = active.begin(); i != active.end(); i++) { if (*i < 0 || *i >= stmt.size()) throw std::invalid_argument("invalid statement " + to_string(*i)); if (i == active.begin()) { ref_stmt_num = *i; lex = getLexicalOrder(*i); } else { if (level + pi.size() - 1 > stmt[*i].loop_level.size()) throw std::invalid_argument("invalid permuation"); std::vector lex2 = getLexicalOrder(*i); for (int j = 0; j < 2 * level - 3; j += 2) if (lex[j] != lex2[j]) throw std::invalid_argument( "statements to permute must be in the same subloop"); for (int j = 0; j < pi.size(); j++) if (!(stmt[*i].loop_level[level + j - 1].type == stmt[ref_stmt_num].loop_level[level + j - 1].type && stmt[*i].loop_level[level + j - 1].payload == stmt[ref_stmt_num].loop_level[level + j - 1].payload)) throw std::invalid_argument( "permuted loops must have the same loop level types"); } } // Update transformation relations for (std::set::iterator i = active.begin(); i != active.end(); i++) { int n = stmt[*i].xform.n_out(); Relation mapping(n, n); F_And *f_root = mapping.add_and(); for (int j = 1; j <= n; j += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.output_var(j), 1); h.update_coef(mapping.input_var(j), -1); } for (int j = 0; j < pi.size(); j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.output_var(2 * (level + j)), 1); h.update_coef(mapping.input_var(2 * pi[j]), -1); } for (int j = 1; j < level; j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.output_var(2 * j), 1); h.update_coef(mapping.input_var(2 * j), -1); } for (int j = level + pi.size(); j <= stmt[*i].loop_level.size(); j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.output_var(2 * j), 1); h.update_coef(mapping.input_var(2 * j), -1); } stmt[*i].xform = Composition(mapping, stmt[*i].xform); stmt[*i].xform.simplify(); } // get the permuation for dependence vectors std::vector t; for (int i = 0; i < pi.size(); i++) if (stmt[ref_stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal) t.push_back(stmt[ref_stmt_num].loop_level[pi[i] - 1].payload); int max_dep_dim = -1; int min_dep_dim = num_dep_dim; for (int i = 0; i < t.size(); i++) { if (t[i] > max_dep_dim) max_dep_dim = t[i]; if (t[i] < min_dep_dim) min_dep_dim = t[i]; } if (min_dep_dim > max_dep_dim) return; if (max_dep_dim - min_dep_dim + 1 != t.size()) throw loop_error("cannot update the dependence graph after permuation"); std::vector dep_pi(num_dep_dim); for (int i = 0; i < min_dep_dim; i++) dep_pi[i] = i; for (int i = min_dep_dim; i <= max_dep_dim; i++) dep_pi[i] = t[i - min_dep_dim]; for (int i = max_dep_dim + 1; i < num_dep_dim; i++) dep_pi[i] = i; // update the dependence graph DependenceGraph g; for (int i = 0; i < dep.vertex.size(); i++) g.insert(); for (int i = 0; i < dep.vertex.size(); i++) for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { if ((active.find(i) != active.end() && active.find(j->first) != active.end())) { std::vector dv = j->second; for (int k = 0; k < dv.size(); k++) { switch (dv[k].type) { case DEP_W2R: case DEP_R2W: case DEP_W2W: case DEP_R2R: { std::vector lbounds(num_dep_dim); std::vector ubounds(num_dep_dim); for (int d = 0; d < num_dep_dim; d++) { lbounds[d] = dv[k].lbounds[dep_pi[d]]; ubounds[d] = dv[k].ubounds[dep_pi[d]]; } dv[k].lbounds = lbounds; dv[k].ubounds = ubounds; break; } case DEP_CONTROL: { break; } default: throw loop_error("unknown dependence type"); } } g.connect(i, j->first, dv); } else if (active.find(i) == active.end() && active.find(j->first) == active.end()) { std::vector dv = j->second; g.connect(i, j->first, dv); } else { std::vector dv = j->second; for (int k = 0; k < dv.size(); k++) switch (dv[k].type) { case DEP_W2R: case DEP_R2W: case DEP_W2W: case DEP_R2R: { for (int d = 0; d < num_dep_dim; d++) if (dep_pi[d] != d) { dv[k].lbounds[d] = -posInfinity; dv[k].ubounds[d] = posInfinity; } break; } case DEP_CONTROL: break; default: throw loop_error("unknown dependence type"); } g.connect(i, j->first, dv); } } dep = g; // update loop level information for (std::set::iterator i = active.begin(); i != active.end(); i++) { int cur_dep_dim = min_dep_dim; std::vector new_loop_level(stmt[*i].loop_level.size()); for (int j = 1; j <= stmt[*i].loop_level.size(); j++) if (j >= level && j < level + pi.size()) { switch (stmt[*i].loop_level[reverse_pi[j - level] - 1].type) { case LoopLevelOriginal: new_loop_level[j - 1].type = LoopLevelOriginal; new_loop_level[j - 1].payload = cur_dep_dim++; new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; break; case LoopLevelTile: { new_loop_level[j - 1].type = LoopLevelTile; int ref_level = stmt[*i].loop_level[reverse_pi[j - level] - 1].payload; if (ref_level >= level && ref_level < level + pi.size()) new_loop_level[j - 1].payload = reverse_pi[ref_level - level]; else new_loop_level[j - 1].payload = ref_level; new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; break; } default: throw loop_error( "unknown loop level information for statement " + to_string(*i)); } } else { switch (stmt[*i].loop_level[j - 1].type) { case LoopLevelOriginal: new_loop_level[j - 1].type = LoopLevelOriginal; new_loop_level[j - 1].payload = stmt[*i].loop_level[j - 1].payload; new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j - 1].parallel_level; break; case LoopLevelTile: { new_loop_level[j - 1].type = LoopLevelTile; int ref_level = stmt[*i].loop_level[j - 1].payload; if (ref_level >= level && ref_level < level + pi.size()) new_loop_level[j - 1].payload = reverse_pi[ref_level - level]; else new_loop_level[j - 1].payload = ref_level; new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j - 1].parallel_level; break; } default: throw loop_error( "unknown loop level information for statement " + to_string(*i)); } } stmt[*i].loop_level = new_loop_level; } setLexicalOrder(2 * level - 2, active); } std::set Loop::split(int stmt_num, int level, const Relation &cond) { // check for sanity of parameters if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invalid statement " + to_string(stmt_num)); if (level <= 0 || level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("invalid loop level " + to_string(level)); std::set result; int dim = 2 * level - 1; std::vector lex = getLexicalOrder(stmt_num); std::set same_loop = getStatements(lex, dim - 1); Relation cond2 = copy(cond); cond2.simplify(); cond2 = EQs_to_GEQs(cond2); Conjunct *c = cond2.single_conjunct(); int cur_lex = lex[dim - 1]; for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { int max_level = (*gi).max_tuple_pos(); Relation single_cond(max_level); single_cond.and_with_GEQ(*gi); // TODO: should decide where to place newly created statements with // complementary split condition from dependence graph. bool place_after; if (max_level == 0) place_after = true; else if ((*gi).get_coef(cond2.set_var(max_level)) < 0) place_after = true; else place_after = false; // original statements with split condition, // new statements with complement of split condition int old_num_stmt = stmt.size(); std::map what_stmt_num; apply_xform(same_loop); for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { int n = stmt[*i].IS.n_set(); Relation part1, part2; if (max_level > n) { part1 = copy(stmt[*i].IS); part2 = Relation::False(0); } else { part1 = Intersection(copy(stmt[*i].IS), Extend_Set(copy(single_cond), n - max_level)); part2 = Intersection(copy(stmt[*i].IS), Extend_Set(Complement(copy(single_cond)), n - max_level)); } //split dependence check if (max_level > level) { DNF_Iterator di1(stmt[*i].IS.query_DNF()); DNF_Iterator di2(part1.query_DNF()); for (; di1 && di2; di1++, di2++) { //printf("In next conjunct,\n"); EQ_Iterator ei1 = (*di1)->EQs(); EQ_Iterator ei2 = (*di2)->EQs(); for (; ei1 && ei2; ei1++, ei2++) { //printf(" In next equality constraint,\n"); Constr_Vars_Iter cvi1(*ei1); Constr_Vars_Iter cvi2(*ei2); int dimension = (*cvi1).var->get_position(); int same = 0; bool identical = false; if (identical = !strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name())) { for (; cvi1 && cvi2; cvi1++, cvi2++) { if (((*cvi1).coef != (*cvi2).coef || (*ei1).get_const() != (*ei2).get_const()) || (strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name()))) { same++; } } } if ((same != 0) || !identical) { dimension = dimension - 1; while (stmt[*i].loop_level[dimension].type == LoopLevelTile) dimension = xform_index[dimension].first; dimension = stmt[*i].loop_level[dimension].payload; for (int i = 0; i < stmt.size(); i++) { std::vector > D; for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if ((dv.hasNegative(dimension) && !dv.quasi) || (dv.hasPositive(dimension) && dv.quasi)) throw loop_error( "loop error: Split is illegal, dependence violation!"); } } } } GEQ_Iterator gi1 = (*di1)->GEQs(); GEQ_Iterator gi2 = (*di2)->GEQs(); for (; gi1 && gi2; gi++, gi2++) { Constr_Vars_Iter cvi1(*gi1); Constr_Vars_Iter cvi2(*gi2); int dimension = (*cvi1).var->get_position(); int same = 0; bool identical = false; if (identical = !strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name())) { for (; cvi1 && cvi2; cvi1++, cvi2++) { if (((*cvi1).coef != (*cvi2).coef || (*gi1).get_const() != (*gi2).get_const()) || (strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name()))) { same++; } } } if ((same != 0) || !identical) { dimension = dimension - 1; while (stmt[*i].loop_level[dimension].type == LoopLevelTile) dimension = xform_index[dimension].first; dimension = stmt[*i].loop_level[dimension].payload; for (int i = 0; i < stmt.size(); i++) { std::vector > D; for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if ((dv.hasNegative(dimension) && !dv.quasi) || (dv.hasPositive( dimension) && dv.quasi)) throw loop_error( "loop error: Split is illegal, dependence violation!"); } } } } } } } DNF_Iterator di3(stmt[*i].IS.query_DNF()); DNF_Iterator di4(part2.query_DNF()); for (; di3 && di4; di3++, di4++) { EQ_Iterator ei1 = (*di3)->EQs(); EQ_Iterator ei2 = (*di4)->EQs(); for (; ei1 && ei2; ei1++, ei2++) { Constr_Vars_Iter cvi1(*ei1); Constr_Vars_Iter cvi2(*ei2); int dimension = (*cvi1).var->get_position(); int same = 0; bool identical = false; if (identical = !strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name())) { for (; cvi1 && cvi2; cvi1++, cvi2++) { if (((*cvi1).coef != (*cvi2).coef || (*ei1).get_const() != (*ei2).get_const()) || (strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name()))) { same++; } } } if ((same != 0) || !identical) { dimension = dimension - 1; while (stmt[*i].loop_level[dimension].type == LoopLevelTile) dimension = xform_index[dimension].first; dimension = stmt[*i].loop_level[dimension].payload; for (int i = 0; i < stmt.size(); i++) { std::vector > D; for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if ((dv.hasNegative(dimension) && !dv.quasi) || (dv.hasPositive(dimension) && dv.quasi)) throw loop_error( "loop error: Split is illegal, dependence violation!"); } } } } } GEQ_Iterator gi1 = (*di3)->GEQs(); GEQ_Iterator gi2 = (*di4)->GEQs(); for (; gi1 && gi2; gi++, gi2++) { Constr_Vars_Iter cvi1(*gi1); Constr_Vars_Iter cvi2(*gi2); int dimension = (*cvi1).var->get_position(); int same = 0; bool identical = false; if (identical = !strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name())) { for (; cvi1 && cvi2; cvi1++, cvi2++) { if (((*cvi1).coef != (*cvi2).coef || (*gi1).get_const() != (*gi2).get_const()) || (strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name()))) { same++; } } } if ((same != 0) || !identical) { dimension = dimension - 1; while (stmt[*i].loop_level[dimension].type == LoopLevelTile) dimension = xform_index[dimension].first; dimension = stmt[*i].loop_level[dimension].payload; for (int i = 0; i < stmt.size(); i++) { std::vector > D; for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if ((dv.hasNegative(dimension) && !dv.quasi) || (dv.hasPositive(dimension) && dv.quasi)) throw loop_error( "loop error: Split is illegal, dependence violation!"); } } } } } } } stmt[*i].IS = part1; if (Intersection(copy(part2), Extend_Set(copy(this->known), n - this->known.n_set())).is_upper_bound_satisfiable()) { Statement new_stmt; new_stmt.code = stmt[*i].code->clone(); new_stmt.IS = part2; new_stmt.xform = copy(stmt[*i].xform); new_stmt.loop_level = stmt[*i].loop_level; stmt.push_back(new_stmt); dep.insert(); what_stmt_num[*i] = stmt.size() - 1; if (*i == stmt_num) result.insert(stmt.size() - 1); stmt_nesting_level_.push_back(stmt_nesting_level[*i]); std::pair, std::vector > dv = test_data_dependences(ir_, stmt[*i].code, part1, stmt[*i].code, part2, freevar, index, stmt_nesting_level[*i], stmt_nesting_level[stmt.size() - 1]); int part1_to_part2 = 0; int part2_to_part1 = 0; for (int k = 0; k < dv.first.size(); k++) if (is_dependence_valid_based_on_lex_order(*i, what_stmt_num[*i], dv.first[k], true)) part1_to_part2++; else part2_to_part1++; if (part1_to_part2 > 0 && part2_to_part1 > 0) throw loop_error( "loop error: Aborting, split resulted in impossible dependence cycle!"); for (int k = 0; k < dv.second.size(); k++) if (is_dependence_valid_based_on_lex_order( what_stmt_num[*i], *i, dv.second[k], false)) part2_to_part1++; else part1_to_part2++; if (part1_to_part2 > 0 && part2_to_part1 > 0) throw loop_error( "loop error: Aborting, split resulted in impossible dependence cycle!"); bool temp_place_after; if (part2_to_part1 > 0) temp_place_after = false; else temp_place_after = true; if (i == same_loop.begin()) place_after = temp_place_after; else { if (temp_place_after != place_after) throw loop_error( "loop error: Aborting, split resulted in impossible dependence cycle!"); } if (place_after) assign_const(new_stmt.xform, dim - 1, cur_lex + 1); else assign_const(new_stmt.xform, dim - 1, cur_lex - 1); } } // make adjacent lexical number available for new statements if (place_after) { lex[dim - 1] = cur_lex + 1; shiftLexicalOrder(lex, dim - 1, 1); } else { lex[dim - 1] = cur_lex - 1; shiftLexicalOrder(lex, dim - 1, -1); } // update dependence graph int dep_dim = get_dep_dim_of(stmt_num, level); for (int i = 0; i < old_num_stmt; i++) { std::vector > > D; for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { if (same_loop.find(i) != same_loop.end()) { if (same_loop.find(j->first) != same_loop.end()) { if (what_stmt_num.find(i) != what_stmt_num.end() && what_stmt_num.find(j->first) != what_stmt_num.end()) dep.connect(what_stmt_num[i], what_stmt_num[j->first], j->second); if (place_after && what_stmt_num.find(j->first) != what_stmt_num.end()) { std::vector dvs; for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if (dv.is_data_dependence() && dep_dim != -1) { dv.lbounds[dep_dim] = -posInfinity; dv.ubounds[dep_dim] = posInfinity; } dvs.push_back(dv); } if (dvs.size() > 0) D.push_back( std::make_pair(what_stmt_num[j->first], dvs)); } else if (!place_after && what_stmt_num.find(i) != what_stmt_num.end()) { std::vector dvs; for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if (dv.is_data_dependence() && dep_dim != -1) { dv.lbounds[dep_dim] = -posInfinity; dv.ubounds[dep_dim] = posInfinity; } dvs.push_back(dv); } if (dvs.size() > 0) dep.connect(what_stmt_num[i], j->first, dvs); } } else { if (what_stmt_num.find(i) != what_stmt_num.end()) dep.connect(what_stmt_num[i], j->first, j->second); } } else if (same_loop.find(j->first) != same_loop.end()) { if (what_stmt_num.find(j->first) != what_stmt_num.end()) D.push_back( std::make_pair(what_stmt_num[j->first], j->second)); } } for (int j = 0; j < D.size(); j++) dep.connect(i, D[j].first, D[j].second); } } return result; } void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, TilingMethodType method, int alignment_offset, int alignment_multiple) { // check for sanity of parameters if (tile_size < 0) throw std::invalid_argument("invalid tile size"); if (alignment_multiple < 1 || alignment_offset < 0) throw std::invalid_argument("invalid alignment for tile"); if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invalid statement " + to_string(stmt_num)); if (level <= 0) throw std::invalid_argument("invalid loop level " + to_string(level)); if (level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument( "there is no loop level " + to_string(level) + " for statement " + to_string(stmt_num)); if (outer_level <= 0 || outer_level > level) throw std::invalid_argument( "invalid tile controlling loop level " + to_string(outer_level)); int dim = 2 * level - 1; int outer_dim = 2 * outer_level - 1; std::vector lex = getLexicalOrder(stmt_num); std::set same_tiled_loop = getStatements(lex, dim - 1); std::set same_tile_controlling_loop = getStatements(lex, outer_dim - 1); for (int i = 0; i < stmt.size(); i++) { std::vector > D; for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; int dim2 = level - 1; if ((dv.type != DEP_CONTROL) && (dv.type != DEP_UNKNOWN)) { while (stmt[i].loop_level[dim2].type == LoopLevelTile) { dim2 = stmt[i].loop_level[dim2].payload; } dim2 = stmt[i].loop_level[dim2].payload; if ((dv.hasNegative(dim2) && (!dv.quasi)) || (dv.quasi && dv.hasPositive(dim2))) { for (int l = outer_level; l < level; l++) if (stmt[i].loop_level[l - 1].type != LoopLevelTile) { if (dv.isCarried( stmt[i].loop_level[l - 1].payload)) throw loop_error( "loop error: Tiling is illegal, dependence violation!"); } else { int dim3 = l - 1; while (stmt[i].loop_level[l - 1].type != LoopLevelTile) { dim3 = stmt[i].loop_level[l - 1].payload; } dim3 = stmt[i].loop_level[l - 1].payload; if (dim3 < level - 1) if (dv.isCarried(dim3)) throw loop_error( "loop error: Tiling is illegal, dependence violation!"); } } } } } } // special case for no tiling if (tile_size == 0) { for (std::set::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); F_And *f_root = r.add_and(); for (int j = 1; j <= 2 * outer_level - 1; j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(r.input_var(j), 1); h.update_coef(r.output_var(j), -1); } EQ_Handle h1 = f_root->add_EQ(); h1.update_coef(r.output_var(2 * outer_level), 1); EQ_Handle h2 = f_root->add_EQ(); h2.update_coef(r.output_var(2 * outer_level + 1), 1); for (int j = 2 * outer_level; j <= stmt[*i].xform.n_out(); j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(r.input_var(j), 1); h.update_coef(r.output_var(j + 2), -1); } stmt[*i].xform = Composition(copy(r), stmt[*i].xform); } } // normal tiling else { std::set private_stmt; for (std::set::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { // if (same_tiled_loop.find(*i) == same_tiled_loop.end() && !is_single_iteration(getNewIS(*i), dim)) // same_tiled_loop.insert(*i); // should test dim's value directly but it is ok for now // if (same_tiled_loop.find(*i) == same_tiled_loop.end() && get_const(stmt[*i].xform, dim+1, Output_Var) == posInfinity) if (same_tiled_loop.find(*i) == same_tiled_loop.end() && overflow.find(*i) != overflow.end()) private_stmt.insert(*i); } // extract the union of the iteration space to be considered Relation hull; { Tuple < Relation > r_list; Tuple r_mask; for (std::set::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) if (private_stmt.find(*i) == private_stmt.end()) { Relation r = project_onto_levels(getNewIS(*i), dim + 1, true); for (int j = outer_dim; j < dim; j++) r = Project(r, j + 1, Set_Var); for (int j = 0; j < outer_dim; j += 2) r = Project(r, j + 1, Set_Var); r_list.append(r); r_mask.append(1); } hull = Hull(r_list, r_mask, 1, true); } // extract the bound of the dimension to be tiled Relation bound = get_loop_bound(hull, dim); if (!bound.has_single_conjunct()) { // further simplify the bound hull = Approximate(hull); bound = get_loop_bound(hull, dim); int i = outer_dim - 2; while (!bound.has_single_conjunct() && i >= 0) { hull = Project(hull, i + 1, Set_Var); bound = get_loop_bound(hull, dim); i -= 2; } if (!bound.has_single_conjunct()) throw loop_error("cannot handle tile bounds"); } // separate lower and upper bounds std::vector lb_list, ub_list; { Conjunct *c = bound.query_DNF()->single_conjunct(); for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { int coef = (*gi).get_coef(bound.set_var(dim + 1)); if (coef < 0) ub_list.push_back(*gi); else if (coef > 0) lb_list.push_back(*gi); } } if (lb_list.size() == 0) throw loop_error( "unable to calculate tile controlling loop lower bound"); if (ub_list.size() == 0) throw loop_error( "unable to calculate tile controlling loop upper bound"); // find the simplest lower bound for StridedTile or simplest iteration count for CountedTile int simplest_lb = 0, simplest_ub = 0; if (method == StridedTile) { int best_cost = INT_MAX; for (int i = 0; i < lb_list.size(); i++) { int cost = 0; for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { switch ((*ci).var->kind()) { case Input_Var: { cost += 5; break; } case Global_Var: { cost += 2; break; } default: cost += 15; break; } } if (cost < best_cost) { best_cost = cost; simplest_lb = i; } } } else if (method == CountedTile) { std::map s1, s2, s3; int best_cost = INT_MAX; for (int i = 0; i < lb_list.size(); i++) for (int j = 0; j < ub_list.size(); j++) { int cost = 0; for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { switch ((*ci).var->kind()) { case Input_Var: { s1[(*ci).var] += (*ci).coef; break; } case Global_Var: { s2[(*ci).var] += (*ci).coef; break; } case Exists_Var: case Wildcard_Var: { s3[(*ci).var] += (*ci).coef; break; } default: cost = INT_MAX - 2; break; } } for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { switch ((*ci).var->kind()) { case Input_Var: { s1[(*ci).var] += (*ci).coef; break; } case Global_Var: { s2[(*ci).var] += (*ci).coef; break; } case Exists_Var: case Wildcard_Var: { s3[(*ci).var] += (*ci).coef; break; } default: if (cost == INT_MAX - 2) cost = INT_MAX - 1; else cost = INT_MAX - 3; break; } } if (cost == 0) { for (std::map::iterator k = s1.begin(); k != s1.end(); k++) if ((*k).second != 0) cost += 5; for (std::map::iterator k = s2.begin(); k != s2.end(); k++) if ((*k).second != 0) cost += 2; for (std::map::iterator k = s3.begin(); k != s3.end(); k++) if ((*k).second != 0) cost += 15; } if (cost < best_cost) { best_cost = cost; simplest_lb = i; simplest_ub = j; } } } // prepare the new transformation relations for (std::set::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); F_And *f_root = r.add_and(); for (int j = 0; j < outer_dim - 1; j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(r.output_var(j + 1), 1); h.update_coef(r.input_var(j + 1), -1); } for (int j = outer_dim - 1; j < stmt[*i].xform.n_out(); j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(r.output_var(j + 3), 1); h.update_coef(r.input_var(j + 1), -1); } EQ_Handle h = f_root->add_EQ(); h.update_coef(r.output_var(outer_dim), 1); h.update_const(-lex[outer_dim - 1]); stmt[*i].xform = Composition(r, stmt[*i].xform); } // add tiling constraints. for (std::set::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { F_And *f_super_root = stmt[*i].xform.and_with_and(); F_Exists *f_exists = f_super_root->add_exists(); F_And *f_root = f_exists->add_and(); // create a lower bound variable for easy formula creation later Variable_ID aligned_lb; { Variable_ID lb = f_exists->declare(); coef_t coef = lb_list[simplest_lb].get_coef( bound.set_var(dim + 1)); if (coef == 1) { // e.g. if i >= m+5, then LB = m+5 EQ_Handle h = f_root->add_EQ(); h.update_coef(lb, 1); for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { switch ((*ci).var->kind()) { case Input_Var: { int pos = (*ci).var->get_position(); if (pos != dim + 1) h.update_coef(stmt[*i].xform.output_var(pos), (*ci).coef); break; } case Global_Var: { Global_Var_ID g = (*ci).var->get_global_var(); Variable_ID v; if (g->arity() == 0) v = stmt[*i].xform.get_local(g); else v = stmt[*i].xform.get_local(g, (*ci).var->function_of()); h.update_coef(v, (*ci).coef); break; } default: throw loop_error("cannot handle tile bounds"); } } h.update_const(lb_list[simplest_lb].get_const()); } else { // e.g. if 2i >= m+5, then m+5 <= 2*LB < m+5+2 GEQ_Handle h1 = f_root->add_GEQ(); GEQ_Handle h2 = f_root->add_GEQ(); for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { switch ((*ci).var->kind()) { case Input_Var: { int pos = (*ci).var->get_position(); if (pos == dim + 1) { h1.update_coef(lb, (*ci).coef); h2.update_coef(lb, -(*ci).coef); } else { h1.update_coef(stmt[*i].xform.output_var(pos), (*ci).coef); h2.update_coef(stmt[*i].xform.output_var(pos), -(*ci).coef); } break; } case Global_Var: { Global_Var_ID g = (*ci).var->get_global_var(); Variable_ID v; if (g->arity() == 0) v = stmt[*i].xform.get_local(g); else v = stmt[*i].xform.get_local(g, (*ci).var->function_of()); h1.update_coef(v, (*ci).coef); h2.update_coef(v, -(*ci).coef); break; } default: throw loop_error("cannot handle tile bounds"); } } h1.update_const(lb_list[simplest_lb].get_const()); h2.update_const(-lb_list[simplest_lb].get_const()); h2.update_const(coef - 1); } Variable_ID offset_lb; if (alignment_offset == 0) offset_lb = lb; else { EQ_Handle h = f_root->add_EQ(); offset_lb = f_exists->declare(); h.update_coef(offset_lb, 1); h.update_coef(lb, -1); h.update_const(alignment_offset); } if (alignment_multiple == 1) { // trivial aligned_lb = offset_lb; } else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB aligned_lb = f_exists->declare(); Variable_ID e = f_exists->declare(); EQ_Handle h = f_root->add_EQ(); h.update_coef(aligned_lb, 1); h.update_coef(e, -alignment_multiple); GEQ_Handle h1 = f_root->add_GEQ(); GEQ_Handle h2 = f_root->add_GEQ(); h1.update_coef(e, alignment_multiple); h2.update_coef(e, -alignment_multiple); h1.update_coef(offset_lb, -1); h2.update_coef(offset_lb, 1); h1.update_const(alignment_multiple - 1); } } // create an upper bound variable for easy formula creation later Variable_ID ub = f_exists->declare(); { coef_t coef = -ub_list[simplest_ub].get_coef( bound.set_var(dim + 1)); if (coef == 1) { // e.g. if i <= m+5, then UB = m+5 EQ_Handle h = f_root->add_EQ(); h.update_coef(ub, -1); for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { switch ((*ci).var->kind()) { case Input_Var: { int pos = (*ci).var->get_position(); if (pos != dim + 1) h.update_coef(stmt[*i].xform.output_var(pos), (*ci).coef); break; } case Global_Var: { Global_Var_ID g = (*ci).var->get_global_var(); Variable_ID v; if (g->arity() == 0) v = stmt[*i].xform.get_local(g); else v = stmt[*i].xform.get_local(g, (*ci).var->function_of()); h.update_coef(v, (*ci).coef); break; } default: throw loop_error("cannot handle tile bounds"); } } h.update_const(ub_list[simplest_ub].get_const()); } else { // e.g. if 2i <= m+5, then m+5-2 < 2*UB <= m+5 GEQ_Handle h1 = f_root->add_GEQ(); GEQ_Handle h2 = f_root->add_GEQ(); for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { switch ((*ci).var->kind()) { case Input_Var: { int pos = (*ci).var->get_position(); if (pos == dim + 1) { h1.update_coef(ub, -(*ci).coef); h2.update_coef(ub, (*ci).coef); } else { h1.update_coef(stmt[*i].xform.output_var(pos), -(*ci).coef); h2.update_coef(stmt[*i].xform.output_var(pos), (*ci).coef); } break; } case Global_Var: { Global_Var_ID g = (*ci).var->get_global_var(); Variable_ID v; if (g->arity() == 0) v = stmt[*i].xform.get_local(g); else v = stmt[*i].xform.get_local(g, (*ci).var->function_of()); h1.update_coef(v, -(*ci).coef); h2.update_coef(v, (*ci).coef); break; } default: throw loop_error("cannot handle tile bounds"); } } h1.update_const(-ub_list[simplest_ub].get_const()); h2.update_const(ub_list[simplest_ub].get_const()); h1.update_const(coef - 1); } } // insert tile controlling loop constraints if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0 Variable_ID e = f_exists->declare(); GEQ_Handle h1 = f_root->add_GEQ(); h1.update_coef(e, 1); EQ_Handle h2 = f_root->add_EQ(); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); h2.update_coef(e, -tile_size); h2.update_coef(aligned_lb, -1); } else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32) GEQ_Handle h1 = f_root->add_GEQ(); h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); GEQ_Handle h2 = f_root->add_GEQ(); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -tile_size); h2.update_coef(aligned_lb, -1); h2.update_coef(ub, 1); } // special care for private statements like overflow assignment if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB GEQ_Handle h = f_root->add_GEQ(); h.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1); h.update_coef(ub, 1); } // if (private_stmt.find(*i) != private_stmt.end()) { // if (stmt[*i].xform.n_out() > dim+3) { // e.g. ii <= UB && i = ii // GEQ_Handle h = f_root->add_GEQ(); // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); // h.update_coef(ub, 1); // stmt[*i].xform = Project(stmt[*i].xform, dim+3, Output_Var); // f_root = stmt[*i].xform.and_with_and(); // EQ_Handle h1 = f_root->add_EQ(); // h1.update_coef(stmt[*i].xform.output_var(dim+3), 1); // h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); // } // else if (method == StridedTile) { // e.g. ii <= UB since i does not exist // GEQ_Handle h = f_root->add_GEQ(); // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); // h.update_coef(ub, 1); // } // } // restrict original loop index inside the tile else { if (method == StridedTile) { // e.g. ii <= i < ii + tile_size GEQ_Handle h1 = f_root->add_GEQ(); h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1); GEQ_Handle h2 = f_root->add_GEQ(); h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); h2.update_const(tile_size - 1); } else if (method == CountedTile) { // e.g. LB+32*ii <= i < LB+32*ii+tile_size GEQ_Handle h1 = f_root->add_GEQ(); h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -tile_size); h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); h1.update_coef(aligned_lb, -1); GEQ_Handle h2 = f_root->add_GEQ(); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), tile_size); h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); h2.update_const(tile_size - 1); h2.update_coef(aligned_lb, 1); } } } } // update loop level information for (std::set::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { for (int j = 1; j <= stmt[*i].loop_level.size(); j++) switch (stmt[*i].loop_level[j - 1].type) { case LoopLevelOriginal: break; case LoopLevelTile: if (stmt[*i].loop_level[j - 1].payload >= outer_level) stmt[*i].loop_level[j - 1].payload++; break; default: throw loop_error( "unknown loop level type for statement " + to_string(*i)); } LoopLevel ll; ll.type = LoopLevelTile; ll.payload = level + 1; ll.parallel_level = 0; stmt[*i].loop_level.insert( stmt[*i].loop_level.begin() + (outer_level - 1), ll); } } std::set Loop::unroll(int stmt_num, int level, int unroll_amount) { // check for sanity of parameters if (unroll_amount < 0) throw std::invalid_argument( "invalid unroll amount " + to_string(unroll_amount)); if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invalid statement " + to_string(stmt_num)); if (level <= 0 || level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("invalid loop level " + to_string(level)); int dim = 2 * level - 1; std::vector lex = getLexicalOrder(stmt_num); std::set same_loop = getStatements(lex, dim - 1); // nothing to do if (unroll_amount == 1) return std::set(); for (int i = 0; i < stmt.size(); i++) { std::vector > D; for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; int dim2 = level - 1; if ((dv.type != DEP_CONTROL) && (dv.type != DEP_UNKNOWN)) { while (stmt[i].loop_level[dim2].type == LoopLevelTile) { dim2 = xform_index[dim2].first; } dim2 = stmt[i].loop_level[dim2].payload; if (dv.isCarried(dim2) && (dv.hasNegative(dim2) && !dv.quasi)) throw loop_error( "loop error: Unrolling is illegal, dependence violation!"); if (dv.isCarried(dim2) && (dv.hasPositive(dim2) && dv.quasi)) throw loop_error( "loop error: Unrolling is illegal, dependence violation!"); bool safe = false; if (dv.isCarried(dim2)) { if (!dv.quasi) { if (dv.lbounds[dim2] != posInfinity) { if (dv.lbounds[dim2] != negInfinity) if (dv.lbounds[dim2] > unroll_amount) safe = true; } else safe = true; } else { if (dv.ubounds[dim2] != negInfinity) { if (dv.ubounds[dim2] != posInfinity) if ((-(dv.ubounds[dim2])) > unroll_amount) safe = true; } else safe = true; } if (!safe) { for (int l = level; l <= (n - 1) / 2; l++) { int dim3 = l - 1; if (stmt[i].loop_level[dim3].type != LoopLevelTile) dim3 = stmt[i].loop_level[dim3].payload; else { while (stmt[i].loop_level[dim2].type == LoopLevelTile) { dim3 = stmt[i].loop_level[dim3].payload; } dim3 = stmt[i].loop_level[dim3].payload; } if (dim3 > dim2) { if ((dv.hasPositive(dim3) && !dv.quasi) || (dv.hasNegative(dim3) && dv.quasi)) break; else if ((dv.hasNegative(dim3) && !dv.quasi) || (dv.hasPositive(dim3) && dv.quasi)) throw loop_error( "loop error: Unrolling is illegal, dependence violation!"); } } } } } } } } // extract the intersection of the iteration space to be considered Relation hull = Relation::True(level); apply_xform(same_loop); for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { if (stmt[*i].IS.is_upper_bound_satisfiable()) { Relation mapping(stmt[*i].IS.n_set(), level); F_And *f_root = mapping.add_and(); for (int j = 1; j <= level; j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.input_var(j), 1); h.update_coef(mapping.output_var(j), -1); } hull = Intersection(hull, Range(Restrict_Domain(mapping, copy(stmt[*i].IS)))); hull.simplify(2, 4); } } for (int i = 1; i <= level; i++) { std::string name = tmp_loop_var_name_prefix + to_string(i); hull.name_set_var(i, name); } hull.setup_names(); // extract the exact loop bound of the dimension to be unrolled if (is_single_loop_iteration(hull, level, this->known)) return std::set(); Relation bound = get_loop_bound(hull, level, this->known); if (!bound.has_single_conjunct() || !bound.is_satisfiable() || bound.is_tautology()) throw loop_error("unable to extract loop bound for unrolling"); // extract the loop stride EQ_Handle stride_eq; int stride = 1; { bool simple_stride = true; int strides = countStrides(bound.query_DNF()->single_conjunct(), bound.set_var(level), stride_eq, simple_stride); if (strides > 1) throw loop_error("too many strides"); else if (strides == 1) { int sign = stride_eq.get_coef(bound.set_var(level)); Constr_Vars_Iter it(stride_eq, true); stride = abs((*it).coef / sign); } } // separate lower and upper bounds std::vector lb_list, ub_list; { Conjunct *c = bound.query_DNF()->single_conjunct(); for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { int coef = (*gi).get_coef(bound.set_var(level)); if (coef < 0) ub_list.push_back(*gi); else if (coef > 0) lb_list.push_back(*gi); } } // simplify overflow expression for each pair of upper and lower bounds std::vector > > overflow_table( lb_list.size(), std::vector >(ub_list.size(), std::map())); bool is_overflow_simplifiable = true; for (int i = 0; i < lb_list.size(); i++) { if (!is_overflow_simplifiable) break; for (int j = 0; j < ub_list.size(); j++) { // lower bound or upper bound has non-unit coefficient, can't simplify if (ub_list[j].get_coef(bound.set_var(level)) != -1 || lb_list[i].get_coef(bound.set_var(level)) != 1) { is_overflow_simplifiable = false; break; } for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { switch ((*ci).var->kind()) { case Input_Var: { if ((*ci).var != bound.set_var(level)) overflow_table[i][j][(*ci).var] += (*ci).coef; break; } case Global_Var: { Global_Var_ID g = (*ci).var->get_global_var(); Variable_ID v; if (g->arity() == 0) v = bound.get_local(g); else v = bound.get_local(g, (*ci).var->function_of()); overflow_table[i][j][(*ci).var] += (*ci).coef; break; } default: throw loop_error("failed to calculate overflow amount"); } } overflow_table[i][j][NULL] += ub_list[j].get_const(); for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { switch ((*ci).var->kind()) { case Input_Var: { if ((*ci).var != bound.set_var(level)) { overflow_table[i][j][(*ci).var] += (*ci).coef; if (overflow_table[i][j][(*ci).var] == 0) overflow_table[i][j].erase( overflow_table[i][j].find((*ci).var)); } break; } case Global_Var: { Global_Var_ID g = (*ci).var->get_global_var(); Variable_ID v; if (g->arity() == 0) v = bound.get_local(g); else v = bound.get_local(g, (*ci).var->function_of()); overflow_table[i][j][(*ci).var] += (*ci).coef; if (overflow_table[i][j][(*ci).var] == 0) overflow_table[i][j].erase( overflow_table[i][j].find((*ci).var)); break; } default: throw loop_error("failed to calculate overflow amount"); } } overflow_table[i][j][NULL] += lb_list[i].get_const(); overflow_table[i][j][NULL] += stride; if (unroll_amount == 0 || (overflow_table[i][j].size() == 1 && overflow_table[i][j][NULL] / stride < unroll_amount)) unroll_amount = overflow_table[i][j][NULL] / stride; } } // loop iteration count can't be determined, bail out gracefully if (unroll_amount == 0) return std::set(); // further simply overflow calculation using coefficients' modular if (is_overflow_simplifiable) { for (int i = 0; i < lb_list.size(); i++) for (int j = 0; j < ub_list.size(); j++) if (stride == 1) { for (std::map::iterator k = overflow_table[i][j].begin(); k != overflow_table[i][j].end();) if ((*k).first != NULL) { int t = int_mod_hat((*k).second, unroll_amount); if (t == 0) { overflow_table[i][j].erase(k++); } else { int t2 = hull.query_variable_mod((*k).first, unroll_amount); if (t2 != INT_MAX) { overflow_table[i][j][NULL] += t * t2; overflow_table[i][j].erase(k++); } else { (*k).second = t; k++; } } } else k++; overflow_table[i][j][NULL] = int_mod_hat( overflow_table[i][j][NULL], unroll_amount); // Since we don't have MODULO instruction in SUIF yet (only MOD), make all coef positive in the final formula for (std::map::iterator k = overflow_table[i][j].begin(); k != overflow_table[i][j].end(); k++) if ((*k).second < 0) (*k).second += unroll_amount; } } // build overflow statement CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *overflow_code = NULL; Relation cond_upper(level), cond_lower(level); Relation overflow_constraint(0); F_And *overflow_constraint_root = overflow_constraint.add_and(); std::vector over_var_list; if (is_overflow_simplifiable && lb_list.size() == 1) { for (int i = 0; i < ub_list.size(); i++) { if (overflow_table[0][i].size() == 1) { // upper splitting condition GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); h.update_const( ((overflow_table[0][i][NULL] / stride) % unroll_amount) * -stride); } else { // upper splitting condition std::string over_name = overflow_var_name_prefix + to_string(overflow_var_name_counter++); Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); over_var_list.push_back(over_free_var); GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); h.update_coef(cond_upper.get_local(over_free_var), -stride); // insert constraint 0 <= overflow < unroll_amount Variable_ID v = overflow_constraint.get_local(over_free_var); GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); h1.update_coef(v, 1); GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); h2.update_coef(v, -1); h2.update_const(unroll_amount - 1); // create overflow assignment bound.setup_names(); CG_outputRepr *rhs = NULL; for (std::map::iterator j = overflow_table[0][i].begin(); j != overflow_table[0][i].end(); j++) if ((*j).first != NULL) { CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); if ((*j).second != 1) t = ocg->CreateTimes(ocg->CreateInt((*j).second), t); rhs = ocg->CreatePlus(rhs, t); } else if ((*j).second != 0) rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); if (stride != 1) rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); CG_outputRepr *lhs = ocg->CreateIdent(over_name); init_code = ocg->StmtListAppend(init_code, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); lhs = ocg->CreateIdent(over_name); overflow_code = ocg->StmtListAppend(overflow_code, ocg->CreateAssignment(0, lhs, rhs)); } } // lower splitting condition GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]); } else if (is_overflow_simplifiable && ub_list.size() == 1) { for (int i = 0; i < lb_list.size(); i++) { if (overflow_table[i][0].size() == 1) { // lower splitting condition GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); h.update_const(overflow_table[i][0][NULL] * -stride); } else { // lower splitting condition std::string over_name = overflow_var_name_prefix + to_string(overflow_var_name_counter++); Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); over_var_list.push_back(over_free_var); GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); h.update_coef(cond_lower.get_local(over_free_var), -stride); // insert constraint 0 <= overflow < unroll_amount Variable_ID v = overflow_constraint.get_local(over_free_var); GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); h1.update_coef(v, 1); GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); h2.update_coef(v, -1); h2.update_const(unroll_amount - 1); // create overflow assignment bound.setup_names(); CG_outputRepr *rhs = NULL; for (std::map::iterator j = overflow_table[0][i].begin(); j != overflow_table[0][i].end(); j++) if ((*j).first != NULL) { CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); if ((*j).second != 1) t = ocg->CreateTimes(ocg->CreateInt((*j).second), t); rhs = ocg->CreatePlus(rhs, t); } else if ((*j).second != 0) rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); if (stride != 1) rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); CG_outputRepr *lhs = ocg->CreateIdent(over_name); init_code = ocg->StmtListAppend(init_code, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); lhs = ocg->CreateIdent(over_name); overflow_code = ocg->StmtListAppend(overflow_code, ocg->CreateAssignment(0, lhs, rhs)); } } // upper splitting condition GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]); } else { std::string over_name = overflow_var_name_prefix + to_string(overflow_var_name_counter++); Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); over_var_list.push_back(over_free_var); Tuple lb_repr_list, ub_repr_list; for (int i = 0; i < lb_list.size(); i++) { //lb_repr_list.append(outputLBasRepr(ocg, lb_list[i], bound, bound.set_var(dim+1), stride, stride_eq, Relation::True(bound.n_set()), std::vector(bound.n_set(), NULL))); lb_repr_list.append( outputLBasRepr(ocg, lb_list[i], bound, bound.set_var(dim + 1), stride, stride_eq, Relation::True(bound.n_set()), std::vector(bound.n_set()))); GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); } for (int i = 0; i < ub_list.size(); i++) { //ub_repr_list.append(outputUBasRepr(ocg, ub_list[i], bound, bound.set_var(dim+1), stride, stride_eq, std::vector(bound.n_set(), NULL))); ub_repr_list.append( outputUBasRepr(ocg, ub_list[i], bound, bound.set_var(dim + 1), stride, stride_eq, std::vector(bound.n_set()))); GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); h.update_coef(cond_upper.get_local(over_free_var), -stride); } CG_outputRepr *lbRepr, *ubRepr; if (lb_repr_list.size() > 1) lbRepr = ocg->CreateInvoke("max", lb_repr_list); else if (lb_repr_list.size() == 1) lbRepr = lb_repr_list[1]; if (ub_repr_list.size() > 1) ubRepr = ocg->CreateInvoke("min", ub_repr_list); else if (ub_repr_list.size() == 1) ubRepr = ub_repr_list[1]; // create overflow assignment bound.setup_names(); CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr), ocg->CreateInt(1)); if (stride != 1) rhs = ocg->CreateIntegerDivide(rhs, ocg->CreateInt(stride)); rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); CG_outputRepr *lhs = ocg->CreateIdent(over_name); init_code = ocg->StmtListAppend(init_code, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); lhs = ocg->CreateIdent(over_name); overflow_code = ocg->CreateAssignment(0, lhs, rhs); // insert constraint 0 <= overflow < unroll_amount Variable_ID v = overflow_constraint.get_local(over_free_var); GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); h1.update_coef(v, 1); GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); h2.update_coef(v, -1); h2.update_const(unroll_amount - 1); } // insert overflow statement int overflow_stmt_num = -1; if (overflow_code != NULL) { // build iteration space for overflow statement Relation mapping(level, level - 1); F_And *f_root = mapping.add_and(); for (int i = 1; i < level; i++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.output_var(i), 1); h.update_coef(mapping.input_var(i), -1); } Relation overflow_IS = Range(Restrict_Domain(mapping, copy(hull))); for (int i = 1; i < level; i++) overflow_IS.name_set_var(i, hull.set_var(i)->name()); overflow_IS.setup_names(); // build dumb transformation relation for overflow statement Relation overflow_xform(level - 1, 2 * (level - 1) + 1); f_root = overflow_xform.add_and(); for (int i = 1; i <= level - 1; i++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(overflow_xform.output_var(2 * i), 1); h.update_coef(overflow_xform.input_var(i), -1); h = f_root->add_EQ(); h.update_coef(overflow_xform.output_var(2 * i - 1), 1); h.update_const(-lex[2 * i - 2]); } EQ_Handle h = f_root->add_EQ(); h.update_coef(overflow_xform.output_var(2 * (level - 1) + 1), 1); h.update_const(-lex[2 * (level - 1)]); shiftLexicalOrder(lex, dim - 1, 1); Statement overflow_stmt; overflow_stmt.code = overflow_code; overflow_stmt.IS = overflow_IS; overflow_stmt.xform = overflow_xform; overflow_stmt.loop_level = std::vector(level - 1); for (int i = 0; i < level - 1; i++) { overflow_stmt.loop_level[i].type = stmt[stmt_num].loop_level[i].type; if (stmt[stmt_num].loop_level[i].type == LoopLevelTile && stmt[stmt_num].loop_level[i].payload >= level) overflow_stmt.loop_level[i].payload = -1; else overflow_stmt.loop_level[i].payload = stmt[stmt_num].loop_level[i].payload; overflow_stmt.loop_level[i].parallel_level = stmt[stmt_num].loop_level[i].parallel_level; } stmt.push_back(overflow_stmt); dep.insert(); overflow_stmt_num = stmt.size() - 1; overflow[overflow_stmt_num] = over_var_list; // update the global known information on overflow variable this->known = Intersection(this->known, Extend_Set(copy(overflow_constraint), this->known.n_set() - overflow_constraint.n_set())); // update dependence graph DependenceVector dv; dv.type = DEP_CONTROL; for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) dep.connect(overflow_stmt_num, *i, dv); dv.type = DEP_W2W; { IR_ScalarSymbol *overflow_sym = NULL; std::vector scalars = ir->FindScalarRef( overflow_code); for (int i = scalars.size() - 1; i >= 0; i--) if (scalars[i]->is_write()) { overflow_sym = scalars[i]->symbol(); break; } for (int i = scalars.size() - 1; i >= 0; i--) delete scalars[i]; dv.sym = overflow_sym; } dv.lbounds = std::vector(num_dep_dim, 0); dv.ubounds = std::vector(num_dep_dim, 0); int dep_dim = get_last_dep_dim_before(stmt_num, level); for (int i = dep_dim + 1; i < num_dep_dim; i++) { dv.lbounds[i] = -posInfinity; dv.ubounds[i] = posInfinity; } for (int i = 0; i <= dep_dim; i++) { if (i != 0) { dv.lbounds[i - 1] = 0; dv.ubounds[i - 1] = 0; } dv.lbounds[i] = 1; dv.ubounds[i] = posInfinity; dep.connect(overflow_stmt_num, overflow_stmt_num, dv); } } // split the loop so it can be fully unrolled std::set result = split(stmt_num, level, cond_upper); std::set result2 = split(stmt_num, level, cond_lower); for (std::set::iterator i = result2.begin(); i != result2.end(); i++) result.insert(*i); // check if unrolled statements can be trivially lumped together as one statement bool can_be_lumped = true; if (can_be_lumped) { for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) if (*i != stmt_num) { if (stmt[*i].loop_level.size() != stmt[stmt_num].loop_level.size()) { can_be_lumped = false; break; } for (int j = 0; j < stmt[stmt_num].loop_level.size(); j++) if (!(stmt[*i].loop_level[j].type == stmt[stmt_num].loop_level[j].type && stmt[*i].loop_level[j].payload == stmt[stmt_num].loop_level[j].payload)) { can_be_lumped = false; break; } if (!can_be_lumped) break; std::vector lex2 = getLexicalOrder(*i); for (int j = 2 * level; j < lex.size() - 1; j += 2) if (lex[j] != lex2[j]) { can_be_lumped = false; break; } if (!can_be_lumped) break; } } if (can_be_lumped) { for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) if (is_inner_loop_depend_on_level(stmt[*i].IS, level, known)) { can_be_lumped = false; break; } } if (can_be_lumped) { for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) if (*i != stmt_num) { if (!(Must_Be_Subset(copy(stmt[*i].IS), copy(stmt[stmt_num].IS)) && Must_Be_Subset(copy(stmt[stmt_num].IS), copy(stmt[*i].IS)))) { can_be_lumped = false; break; } } } if (can_be_lumped) { for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { for (DependenceGraph::EdgeList::iterator j = dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) if (same_loop.find(j->first) != same_loop.end()) { for (int k = 0; k < j->second.size(); k++) if (j->second[k].type == DEP_CONTROL || j->second[k].type == DEP_UNKNOWN) { can_be_lumped = false; break; } if (!can_be_lumped) break; } if (!can_be_lumped) break; } } // add strides to original statements // for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) // add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); // std::vector depending_overflow_var; // for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { // add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); // if (overflow.find(*i) != overflow.end()) { // // TO DO: It should check whether overflow vaiable depends on // // this loop index and by how much. This step is important if // // you want to unroll loops in arbitrary order. // depending_overflow_var.insert(depending_overflow_var.end(), overflow[*i].begin(), overflow[*i].end()); // continue; // } // } // std::map > pending; // for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { // add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); // if (overflow.find(*i) != overflow.end()) { // // TO DO: It should check whether overflow vaiable depends on // // this loop index and by how much. This step is important if // // you want to unroll loops in arbitrary order. // depending_overflow_var.insert(depending_overflow_var.end(), overflow[*i].begin(), overflow[*i].end()); // continue; // } // // create copy for each unroll amount // for (int j = 1; j < unroll_amount; j++) { // Tuple funcList; // Tuple loop_vars; // loop_vars.append(stmt[*i].IS.set_var((dim+1)/2)->name()); // funcList.append(ocg->CreatePlus(ocg->CreateIdent(stmt[*i].IS.set_var(level)->name()), ocg->CreateInt(j*stride))); // CG_outputRepr *code = ocg->CreatePlaceHolder(0, stmt[*i].code->clone(), funcList, loop_vars); // // prepare the new statment to insert // Statement unrolled_stmt; // unrolled_stmt.IS = copy(stmt[*i].IS); // // adjust_loop_bound(unrolled_stmt.IS, (dim-1)/2, j); // unrolled_stmt.xform = copy(stmt[*i].xform); // unrolled_stmt.code = code; // unrolled_stmt.loop_level = stmt[*i].loop_level; // pending[*i].push_back(unrolled_stmt); // } // } // // adjust iteration space due to loop bounds depending on this loop // // index and affected overflow variables // for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { // for (int j = 0; j < pending[*i].size(); j++) { // adjust_loop_bound(pending[*i][j].IS, (dim-1)/2, j+1, depending_overflow_var); // //pending[*i][j].IS = Intersection(pending[*i][j].IS, Extend_Set(copy(this->known), pending[*i][j].IS.n_set() - this->known.n_set())); // } // } // insert unrolled statements int old_num_stmt = stmt.size(); if (!can_be_lumped) { std::map > what_stmt_num; for (int j = 1; j < unroll_amount; j++) { for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { Statement new_stmt; Tuple funcList; Tuple loop_vars; loop_vars.append(stmt[*i].IS.set_var(level)->name()); funcList.append( ocg->CreatePlus( ocg->CreateIdent( stmt[*i].IS.set_var(level)->name()), ocg->CreateInt(j * stride))); new_stmt.code = ocg->CreatePlaceHolder(0, stmt[*i].code->clone(), funcList, loop_vars); new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride); add_loop_stride(new_stmt.IS, bound, level - 1, unroll_amount * stride); new_stmt.xform = copy(stmt[*i].xform); new_stmt.loop_level = stmt[*i].loop_level; stmt.push_back(new_stmt); dep.insert(); what_stmt_num[*i].push_back(stmt.size() - 1); } } for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) add_loop_stride(stmt[*i].IS, bound, level - 1, unroll_amount * stride); // update dependence graph if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; int new_stride = unroll_amount * stride; for (int i = 0; i < old_num_stmt; i++) { std::vector > D; for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { if (same_loop.find(i) != same_loop.end()) { if (same_loop.find(j->first) != same_loop.end()) { for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if (dv.type == DEP_CONTROL || dv.type == DEP_UNKNOWN) { D.push_back(std::make_pair(j->first, dv)); for (int kk = 0; kk < unroll_amount - 1; kk++) if (what_stmt_num[i][kk] != -1 && what_stmt_num[j->first][kk] != -1) dep.connect(what_stmt_num[i][kk], what_stmt_num[j->first][kk], dv); } else { coef_t lb = dv.lbounds[dep_dim]; coef_t ub = dv.ubounds[dep_dim]; if (ub == lb && int_mod(lb, static_cast(new_stride)) == 0) { D.push_back( std::make_pair(j->first, dv)); for (int kk = 0; kk < unroll_amount - 1; kk++) if (what_stmt_num[i][kk] != -1 && what_stmt_num[j->first][kk] != -1) dep.connect( what_stmt_num[i][kk], what_stmt_num[j->first][kk], dv); } else if (lb == -posInfinity && ub == posInfinity) { D.push_back( std::make_pair(j->first, dv)); for (int kk = 0; kk < unroll_amount; kk++) if (kk == 0) D.push_back( std::make_pair(j->first, dv)); else if (what_stmt_num[j->first][kk - 1] != -1) D.push_back( std::make_pair( what_stmt_num[j->first][kk - 1], dv)); for (int t = 0; t < unroll_amount - 1; t++) if (what_stmt_num[i][t] != -1) for (int kk = 0; kk < unroll_amount; kk++) if (kk == 0) dep.connect( what_stmt_num[i][t], j->first, dv); else if (what_stmt_num[j->first][kk - 1] != -1) dep.connect( what_stmt_num[i][t], what_stmt_num[j->first][kk - 1], dv); } else { for (int kk = 0; kk < unroll_amount; kk++) { if (lb != -posInfinity) { if (kk * stride < int_mod(lb, static_cast(new_stride))) dv.lbounds[dep_dim] = floor( static_cast(lb) / new_stride) * new_stride + new_stride; else dv.lbounds[dep_dim] = floor( static_cast(lb) / new_stride) * new_stride; } if (ub != posInfinity) { if (kk * stride > int_mod(ub, static_cast(new_stride))) dv.ubounds[dep_dim] = floor( static_cast(ub) / new_stride) * new_stride - new_stride; else dv.ubounds[dep_dim] = floor( static_cast(ub) / new_stride) * new_stride; } if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) { if (kk == 0) D.push_back( std::make_pair( j->first, dv)); else if (what_stmt_num[j->first][kk - 1] != -1) D.push_back( std::make_pair( what_stmt_num[j->first][kk - 1], dv)); } } for (int t = 0; t < unroll_amount - 1; t++) if (what_stmt_num[i][t] != -1) for (int kk = 0; kk < unroll_amount; kk++) { if (lb != -posInfinity) { if (kk * stride < int_mod( lb + t + 1, static_cast(new_stride))) dv.lbounds[dep_dim] = floor( static_cast(lb + (t + 1) * stride) / new_stride) * new_stride + new_stride; else dv.lbounds[dep_dim] = floor( static_cast(lb + (t + 1) * stride) / new_stride) * new_stride; } if (ub != posInfinity) { if (kk * stride > int_mod( ub + t + 1, static_cast(new_stride))) dv.ubounds[dep_dim] = floor( static_cast(ub + (t + 1) * stride) / new_stride) * new_stride - new_stride; else dv.ubounds[dep_dim] = floor( static_cast(ub + (t + 1) * stride) / new_stride) * new_stride; } if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) { if (kk == 0) dep.connect( what_stmt_num[i][t], j->first, dv); else if (what_stmt_num[j->first][kk - 1] != -1) dep.connect( what_stmt_num[i][t], what_stmt_num[j->first][kk - 1], dv); } } } } } dep.vertex[i].second.erase(j++); } else { for (int kk = 0; kk < unroll_amount - 1; kk++) if (what_stmt_num[i][kk] != -1) dep.connect(what_stmt_num[i][kk], j->first, j->second); j++; } } else { if (same_loop.find(j->first) != same_loop.end()) for (int k = 0; k < j->second.size(); k++) for (int kk = 0; kk < unroll_amount - 1; kk++) if (what_stmt_num[j->first][kk] != -1) D.push_back( std::make_pair( what_stmt_num[j->first][kk], j->second[k])); j++; } } for (int j = 0; j < D.size(); j++) dep.connect(i, D[j].first, D[j].second); } } // reset lexical order for the unrolled loop body std::set new_same_loop; for (std::map >::iterator i = what_stmt_num.begin(); i != what_stmt_num.end(); i++) { new_same_loop.insert(i->first); for (int j = 0; j < i->second.size(); j++) new_same_loop.insert(i->second[j]); } setLexicalOrder(dim + 1, new_same_loop); } else { for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) add_loop_stride(stmt[*i].IS, bound, level - 1, unroll_amount * stride); int max_level = stmt[stmt_num].loop_level.size(); std::vector > stmt_order; for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) stmt_order.push_back( std::make_pair( get_const(stmt[*i].xform, 2 * max_level, Output_Var), *i)); sort(stmt_order.begin(), stmt_order.end()); Statement new_stmt; new_stmt.code = NULL; for (int j = 1; j < unroll_amount; j++) for (int i = 0; i < stmt_order.size(); i++) { Tuple funcList; Tuple loop_vars; loop_vars.append( stmt[stmt_order[i].second].IS.set_var(level)->name()); funcList.append( ocg->CreatePlus( ocg->CreateIdent( stmt[stmt_order[i].second].IS.set_var( level)->name()), ocg->CreateInt(j * stride))); CG_outputRepr *code = ocg->CreatePlaceHolder(0, stmt[stmt_order[i].second].code->clone(), funcList, loop_vars); new_stmt.code = ocg->StmtListAppend(new_stmt.code, code); } new_stmt.IS = copy(stmt[stmt_num].IS); new_stmt.xform = copy(stmt[stmt_num].xform); assign_const(new_stmt.xform, 2 * max_level, stmt_order[stmt_order.size() - 1].first + 1); new_stmt.loop_level = stmt[stmt_num].loop_level; stmt.push_back(new_stmt); dep.insert(); // update dependence graph if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; int new_stride = unroll_amount * stride; for (int i = 0; i < old_num_stmt; i++) { std::vector > > D; for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { if (same_loop.find(i) != same_loop.end()) { if (same_loop.find(j->first) != same_loop.end()) { std::vector dvs11, dvs12, dvs22, dvs21; for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if (dv.type == DEP_CONTROL || dv.type == DEP_UNKNOWN) { if (i == j->first) { dvs11.push_back(dv); dvs22.push_back(dv); } else throw loop_error( "unrolled statements lumped together illegally"); } else { coef_t lb = dv.lbounds[dep_dim]; coef_t ub = dv.ubounds[dep_dim]; if (ub == lb && int_mod(lb, static_cast(new_stride)) == 0) { dvs11.push_back(dv); dvs22.push_back(dv); } else { if (lb != -posInfinity) dv.lbounds[dep_dim] = ceil( static_cast(lb) / new_stride) * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = floor( static_cast(ub) / new_stride) * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs11.push_back(dv); if (lb != -posInfinity) dv.lbounds[dep_dim] = ceil( static_cast(lb) / new_stride) * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = ceil( static_cast(ub) / new_stride) * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs21.push_back(dv); if (lb != -posInfinity) dv.lbounds[dep_dim] = floor( static_cast(lb) / new_stride) * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = floor( static_cast(ub - stride) / new_stride) * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs12.push_back(dv); if (lb != -posInfinity) dv.lbounds[dep_dim] = floor( static_cast(lb) / new_stride) * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = ceil( static_cast(ub - stride) / new_stride) * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs22.push_back(dv); } } } if (dvs11.size() > 0) D.push_back(std::make_pair(i, dvs11)); if (dvs22.size() > 0) dep.connect(old_num_stmt, old_num_stmt, dvs22); if (dvs12.size() > 0) D.push_back( std::make_pair(old_num_stmt, dvs12)); if (dvs21.size() > 0) dep.connect(old_num_stmt, i, dvs21); dep.vertex[i].second.erase(j++); } else { dep.connect(old_num_stmt, j->first, j->second); j++; } } else { if (same_loop.find(j->first) != same_loop.end()) D.push_back( std::make_pair(old_num_stmt, j->second)); j++; } } for (int j = 0; j < D.size(); j++) dep.connect(i, D[j].first, D[j].second); } } } return result; } std::vector Loop::getLexicalOrder(int stmt_num) const { assert(stmt_num < stmt.size()); const int n = stmt[stmt_num].xform.n_out(); std::vector lex(n, 0); for (int i = 0; i < n; i += 2) lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); return lex; } std::set Loop::getStatements(const std::vector &lex, int dim) const { const int m = stmt.size(); std::set same_loops; for (int i = 0; i < m; i++) { if (dim < 0) same_loops.insert(i); else { std::vector a_lex = getLexicalOrder(i); int j; for (j = 0; j <= dim; j += 2) if (lex[j] != a_lex[j]) break; if (j > dim) same_loops.insert(i); } } return same_loops; } void Loop::shiftLexicalOrder(const std::vector &lex, int dim, int amount) { const int m = stmt.size(); if (amount == 0) return; for (int i = 0; i < m; i++) { std::vector lex2 = getLexicalOrder(i); bool need_shift = true; for (int j = 0; j < dim; j++) if (lex2[j] != lex[j]) { need_shift = false; break; } if (!need_shift) continue; if (amount > 0) { if (lex2[dim] < lex[dim]) continue; } else if (amount < 0) { if (lex2[dim] > lex[dim]) continue; } assign_const(stmt[i].xform, dim, lex2[dim] + amount); } } void Loop::setLexicalOrder(int dim, const std::set &active, int starting_order) { if (active.size() == 0) return; // check for sanity of parameters if (dim < 0 || dim % 2 != 0) throw std::invalid_argument( "invalid constant loop level to set lexicographical order"); std::vector lex; int ref_stmt_num; for (std::set::iterator i = active.begin(); i != active.end(); i++) { if ((*i) < 0 || (*i) >= stmt.size()) throw std::invalid_argument( "invalid statement number " + to_string(*i)); if (dim >= stmt[*i].xform.n_out()) throw std::invalid_argument( "invalid constant loop level to set lexicographical order"); if (i == active.begin()) { lex = getLexicalOrder(*i); ref_stmt_num = *i; } else { std::vector lex2 = getLexicalOrder(*i); for (int j = 0; j < dim; j += 2) if (lex[j] != lex2[j]) throw std::invalid_argument( "statements are not in the same sub loop nest"); } } // sepearate statements by current loop level types int level = (dim + 2) / 2; std::map, std::set > active_by_level_type; std::set active_by_no_level; for (std::set::iterator i = active.begin(); i != active.end(); i++) { if (level > stmt[*i].loop_level.size()) active_by_no_level.insert(*i); else active_by_level_type[std::make_pair( stmt[*i].loop_level[level - 1].type, stmt[*i].loop_level[level - 1].payload)].insert(*i); } // further separate statements due to control dependences std::vector > active_by_level_type_splitted; for (std::map, std::set >::iterator i = active_by_level_type.begin(); i != active_by_level_type.end(); i++) active_by_level_type_splitted.push_back(i->second); for (std::set::iterator i = active_by_no_level.begin(); i != active_by_no_level.end(); i++) for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) { std::set controlled, not_controlled; for (std::set::iterator k = active_by_level_type_splitted[j].begin(); k != active_by_level_type_splitted[j].end(); k++) { std::vector dvs = dep.getEdge(*i, *k); bool is_controlled = false; for (int kk = 0; kk < dvs.size(); kk++) if (dvs[kk].type = DEP_CONTROL) { is_controlled = true; break; } if (is_controlled) controlled.insert(*k); else not_controlled.insert(*k); } if (controlled.size() != 0 && not_controlled.size() != 0) { active_by_level_type_splitted.erase( active_by_level_type_splitted.begin() + j); active_by_level_type_splitted.push_back(controlled); active_by_level_type_splitted.push_back(not_controlled); } } // set lexical order separating loops with different loop types first if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) { int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; Graph, Empty> g; for (std::vector >::iterator i = active_by_level_type_splitted.begin(); i != active_by_level_type_splitted.end(); i++) g.insert(*i); for (std::set::iterator i = active_by_no_level.begin(); i != active_by_no_level.end(); i++) { std::set t; t.insert(*i); g.insert(t); } for (int i = 0; i < g.vertex.size(); i++) for (int j = i + 1; j < g.vertex.size(); j++) { bool connected = false; for (std::set::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) { for (std::set::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { std::vector dvs = dep.getEdge(*ii, *jj); for (int k = 0; k < dvs.size(); k++) if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && !dvs[k].has_been_carried_before( dep_dim))) { g.connect(i, j); connected = true; break; } if (connected) break; } if (connected) break; } connected = false; for (std::set::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) { for (std::set::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { std::vector dvs = dep.getEdge(*jj, *ii); for (int k = 0; k < dvs.size(); k++) if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && !dvs[k].has_been_carried_before( dep_dim))) { g.connect(j, i); connected = true; break; } if (connected) break; } if (connected) break; } } std::vector > s = g.topoSort(); if (s.size() != g.vertex.size()) throw loop_error( "cannot separate statements with different loop types at loop level " + to_string(level)); // assign lexical order int order = starting_order; for (int i = 0; i < s.size(); i++) { std::set &cur_scc = g.vertex[*(s[i].begin())].first; int sz = cur_scc.size(); if (sz == 1) { int cur_stmt = *(cur_scc.begin()); assign_const(stmt[cur_stmt].xform, dim, order); for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) assign_const(stmt[cur_stmt].xform, j, 0); order++; } else { setLexicalOrder(dim, cur_scc, order); order += sz; } } } // set lexical order seperating single iteration statements and loops else { std::set true_singles; std::set nonsingles; std::map > fake_singles; // sort out statements that do not require loops for (std::set::iterator i = active.begin(); i != active.end(); i++) { Relation cur_IS = getNewIS(*i); if (is_single_iteration(cur_IS, dim + 1)) { bool is_all_single = true; for (int j = dim + 3; j < stmt[*i].xform.n_out(); j += 2) if (!is_single_iteration(cur_IS, j)) { is_all_single = false; break; } if (is_all_single) true_singles.insert(*i); else { try { fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert( *i); } catch (const std::exception &e) { fake_singles[posInfinity].insert(*i); } } } else nonsingles.insert(*i); } // split nonsingles forcibly according to negative dependences present (loop unfusible) int dep_dim = get_dep_dim_of(ref_stmt_num, level); Graph g2; for (std::set::iterator i = nonsingles.begin(); i != nonsingles.end(); i++) g2.insert(*i); for (int i = 0; i < g2.vertex.size(); i++) for (int j = i + 1; j < g2.vertex.size(); j++) { std::vector dvs = dep.getEdge( g2.vertex[i].first, g2.vertex[j].first); for (int k = 0; k < dvs.size(); k++) if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at( dep_dim))) { g2.connect(i, j); break; } dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first); for (int k = 0; k < dvs.size(); k++) if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at( dep_dim))) { g2.connect(j, i); break; } } std::vector > s2 = g2.packed_topoSort(); std::vector > splitted_nonsingles; for (int i = 0; i < s2.size(); i++) { std::set cur_scc; for (std::set::iterator j = s2[i].begin(); j != s2[i].end(); j++) cur_scc.insert(g2.vertex[*j].first); splitted_nonsingles.push_back(cur_scc); } // convert to dependence graph for grouped statements dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; Graph, Empty> g; for (std::set::iterator i = true_singles.begin(); i != true_singles.end(); i++) { std::set t; t.insert(*i); g.insert(t); } for (int i = 0; i < splitted_nonsingles.size(); i++) { g.insert(splitted_nonsingles[i]); } for (std::map >::iterator i = fake_singles.begin(); i != fake_singles.end(); i++) g.insert((*i).second); for (int i = 0; i < g.vertex.size(); i++) for (int j = i + 1; j < g.vertex.size(); j++) { bool connected = false; for (std::set::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) { for (std::set::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { std::vector dvs = dep.getEdge(*ii, *jj); for (int k = 0; k < dvs.size(); k++) if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && !dvs[k].has_been_carried_before( dep_dim))) { g.connect(i, j); connected = true; break; } if (connected) break; } if (connected) break; } connected = false; for (std::set::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) { for (std::set::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { std::vector dvs = dep.getEdge(*jj, *ii); for (int k = 0; k < dvs.size(); k++) if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && !dvs[k].has_been_carried_before( dep_dim))) { g.connect(j, i); connected = true; break; } if (connected) break; } if (connected) break; } } // topological sort according to chun's permute algorithm std::vector > s = g.topoSort(); // assign lexical order int order = starting_order; for (int i = 0; i < s.size(); i++) { // translate each SCC into original statements std::set cur_scc; for (std::set::iterator j = s[i].begin(); j != s[i].end(); j++) copy(g.vertex[*j].first.begin(), g.vertex[*j].first.end(), inserter(cur_scc, cur_scc.begin())); // now assign the constant for (std::set::iterator j = cur_scc.begin(); j != cur_scc.end(); j++) assign_const(stmt[*j].xform, dim, order); if (cur_scc.size() > 1) setLexicalOrder(dim + 2, cur_scc); else if (cur_scc.size() == 1) { int cur_stmt = *(cur_scc.begin()); for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) assign_const(stmt[cur_stmt].xform, j, 0); } if (cur_scc.size() > 0) order++; } } } void Loop::apply_xform() { std::set active; for (int i = 0; i < stmt.size(); i++) active.insert(i); apply_xform(active); } void Loop::apply_xform(int stmt_num) { std::set active; active.insert(stmt_num); apply_xform(active); } void Loop::apply_xform(std::set &active) { int max_n = 0; CG_outputBuilder *ocg = ir->builder(); for (std::set::iterator i = active.begin(); i != active.end(); i++) { int n = stmt[*i].loop_level.size(); if (n > max_n) max_n = n; std::vector lex = getLexicalOrder(*i); Relation mapping(2 * n + 1, n); F_And *f_root = mapping.add_and(); for (int j = 1; j <= n; j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.output_var(j), 1); h.update_coef(mapping.input_var(2 * j), -1); } mapping = Composition(mapping, stmt[*i].xform); mapping.simplify(); // match omega input/output variables to variable names in the code for (int j = 1; j <= stmt[*i].IS.n_set(); j++) mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); for (int j = 1; j <= n; j++) mapping.name_output_var(j, tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter + j - 1)); mapping.setup_names(); Relation known = Extend_Set(copy(this->known), mapping.n_out() - this->known.n_set()); //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector(mapping.n_out(), NULL)); stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector(mapping.n_out())); stmt[*i].IS = Range(Restrict_Domain(mapping, stmt[*i].IS)); stmt[*i].IS.simplify(); // replace original transformation relation with straight 1-1 mapping mapping = Relation(n, 2 * n + 1); f_root = mapping.add_and(); for (int j = 1; j <= n; j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.output_var(2 * j), 1); h.update_coef(mapping.input_var(j), -1); } for (int j = 1; j <= 2 * n + 1; j += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.output_var(j), 1); h.update_const(-lex[j - 1]); } stmt[*i].xform = mapping; } tmp_loop_var_name_counter += max_n; } void Loop::addKnown(const Relation &cond) { int n1 = this->known.n_set(); Relation r = copy(cond); int n2 = r.n_set(); if (n1 < n2) this->known = Extend_Set(this->known, n2 - n1); else if (n1 > n2) r = Extend_Set(r, n1 - n2); this->known = Intersection(this->known, r); } bool Loop::nonsingular(const std::vector > &T) { if (stmt.size() == 0) return true; // check for sanity of parameters for (int i = 0; i < stmt.size(); i++) { if (stmt[i].loop_level.size() != num_dep_dim) throw std::invalid_argument( "nonsingular loop transformations must be applied to original perfect loop nest"); for (int j = 0; j < stmt[i].loop_level.size(); j++) if (stmt[i].loop_level[j].type != LoopLevelOriginal) throw std::invalid_argument( "nonsingular loop transformations must be applied to original perfect loop nest"); } if (T.size() != num_dep_dim) throw std::invalid_argument("invalid transformation matrix"); for (int i = 0; i < stmt.size(); i++) if (T[i].size() != num_dep_dim + 1 && T[i].size() != num_dep_dim) throw std::invalid_argument("invalid transformation matrix"); // build relation from matrix Relation mapping(2 * num_dep_dim + 1, 2 * num_dep_dim + 1); F_And *f_root = mapping.add_and(); for (int i = 0; i < num_dep_dim; i++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.output_var(2 * (i + 1)), -1); for (int j = 0; j < num_dep_dim; j++) if (T[i][j] != 0) h.update_coef(mapping.input_var(2 * (j + 1)), T[i][j]); if (T[i].size() == num_dep_dim + 1) h.update_const(T[i][num_dep_dim]); } for (int i = 1; i <= 2 * num_dep_dim + 1; i += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.output_var(i), -1); h.update_coef(mapping.input_var(i), 1); } // update transformation relations for (int i = 0; i < stmt.size(); i++) stmt[i].xform = Composition(copy(mapping), stmt[i].xform); // update dependence graph for (int i = 0; i < dep.vertex.size(); i++) for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { std::vector dvs = j->second; for (int k = 0; k < dvs.size(); k++) { DependenceVector &dv = dvs[k]; switch (dv.type) { case DEP_W2R: case DEP_R2W: case DEP_W2W: case DEP_R2R: { std::vector lbounds(num_dep_dim), ubounds( num_dep_dim); for (int p = 0; p < num_dep_dim; p++) { coef_t lb = 0; coef_t ub = 0; for (int q = 0; q < num_dep_dim; q++) { if (T[p][q] > 0) { if (lb == -posInfinity || dv.lbounds[q] == -posInfinity) lb = -posInfinity; else lb += T[p][q] * dv.lbounds[q]; if (ub == posInfinity || dv.ubounds[q] == posInfinity) ub = posInfinity; else ub += T[p][q] * dv.ubounds[q]; } else if (T[p][q] < 0) { if (lb == -posInfinity || dv.ubounds[q] == posInfinity) lb = -posInfinity; else lb += T[p][q] * dv.ubounds[q]; if (ub == posInfinity || dv.lbounds[q] == -posInfinity) ub = posInfinity; else ub += T[p][q] * dv.lbounds[q]; } } if (T[p].size() == num_dep_dim + 1) { if (lb != -posInfinity) lb += T[p][num_dep_dim]; if (ub != posInfinity) ub += T[p][num_dep_dim]; } lbounds[p] = lb; ubounds[p] = ub; } dv.lbounds = lbounds; dv.ubounds = ubounds; break; } default: ; } } j->second = dvs; } // set constant loop values std::set active; for (int i = 0; i < stmt.size(); i++) active.insert(i); setLexicalOrder(0, active); return true; } void Loop::skew(const std::set &stmt_nums, int level, const std::vector &skew_amount) { if (stmt_nums.size() == 0) return; // check for sanity of parameters int ref_stmt_num = *(stmt_nums.begin()); std::vector > array_of_deps; for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { if (*i < 0 || *i >= stmt.size()) throw std::invalid_argument( "invalid statement number " + to_string(*i)); if (level < 1 || level > stmt[*i].loop_level.size()) throw std::invalid_argument( "invalid loop level " + to_string(level)); for (int j = stmt[*i].loop_level.size(); j < skew_amount.size(); j++) if (skew_amount[j] != 0) throw std::invalid_argument("invalid skewing formula"); } // set trasformation relations for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { int n = stmt[*i].xform.n_out(); Relation r(n, n); F_And *f_root = r.add_and(); for (int j = 1; j <= n; j++) if (j != 2 * level) { EQ_Handle h = f_root->add_EQ(); h.update_coef(r.input_var(j), 1); h.update_coef(r.output_var(j), -1); } EQ_Handle h = f_root->add_EQ(); h.update_coef(r.output_var(2 * level), -1); for (int j = 0; j < skew_amount.size(); j++) if (skew_amount[j] != 0) h.update_coef(r.input_var(2 * (j + 1)), skew_amount[j]); stmt[*i].xform = Composition(r, stmt[*i].xform); stmt[*i].xform.simplify(); applyXform(*i); std::set dont_consider; //} // update dependence graph if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) for (DependenceGraph::EdgeList::iterator j = dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) if (stmt_nums.find(j->first) != stmt_nums.end()) { // dependence between skewed statements std::vector dvs = j->second; for (int k = 0; k < dvs.size(); k++) { DependenceVector &dv = dvs[k]; if (dv.is_data_dependence()) { coef_t lb = 0; coef_t ub = 0; for (int kk = 0; kk < skew_amount.size(); kk++) { int cur_dep_dim = get_dep_dim_of(*i, kk + 1); if (skew_amount[kk] > 0) { if (lb != -posInfinity && stmt[*i].loop_level[kk].type == LoopLevelOriginal && dv.lbounds[cur_dep_dim] != -posInfinity) lb += skew_amount[kk] * dv.lbounds[cur_dep_dim]; else { if (cur_dep_dim != -1 && !(dv.lbounds[cur_dep_dim] == 0 && dv.ubounds[cur_dep_dim] == 0)) lb = -posInfinity; } if (ub != posInfinity && stmt[*i].loop_level[kk].type == LoopLevelOriginal && dv.ubounds[cur_dep_dim] != posInfinity) ub += skew_amount[kk] * dv.ubounds[cur_dep_dim]; else { if (cur_dep_dim != -1 && !(dv.lbounds[cur_dep_dim] == 0 && dv.ubounds[cur_dep_dim] == 0)) ub = posInfinity; } } else if (skew_amount[kk] < 0) { if (lb != -posInfinity && stmt[*i].loop_level[kk].type == LoopLevelOriginal && dv.ubounds[cur_dep_dim] != posInfinity) lb += skew_amount[kk] * dv.ubounds[cur_dep_dim]; else { if (cur_dep_dim != -1 && !(dv.lbounds[cur_dep_dim] == 0 && dv.ubounds[cur_dep_dim] == 0)) lb = -posInfinity; } if (ub != posInfinity && stmt[*i].loop_level[kk].type == LoopLevelOriginal && dv.lbounds[cur_dep_dim] != -posInfinity) ub += skew_amount[kk] * dv.lbounds[cur_dep_dim]; else { if (cur_dep_dim != -1 && !(dv.lbounds[cur_dep_dim] == 0 && dv.ubounds[cur_dep_dim] == 0)) ub = posInfinity; } } } if ((dv.isCarried(dep_dim) && dv.hasPositive(dep_dim)) && dv.quasi) dv.quasi = false; if ((dv.isCarried(dep_dim) && dv.hasNegative(dep_dim)) && !dv.quasi) throw loop_error( "loop error: Skewing is illegal, dependence violation!"); dv.lbounds[dep_dim] = lb; dv.ubounds[dep_dim] = ub; if ((dv.isCarried(dep_dim) && dv.hasPositive(dep_dim)) && dv.quasi) dv.quasi = false; if ((dv.isCarried(dep_dim) && dv.hasNegative(dep_dim)) && !dv.quasi) throw loop_error( "loop error: Skewing is illegal, dependence violation!"); } } j->second = dvs; } } else { // dependence from skewed statement to unskewed statement becomes jumbled, // put distance value at skewed dimension to unknown /*std::vector dvs = j->second; for (int k = 0; k < dvs.size(); k++) { DependenceVector &dv = dvs[k]; if (dv.is_data_dependence()) { dv.lbounds[dep_dim] = -posInfinity; dv.ubounds[dep_dim] = posInfinity; } } j->second = dvs; */ dont_consider.insert(j->first); } for (int l = 0; l < dep.vertex.size(); l++) if (stmt_nums.find(l) == stmt_nums.end()) if (dont_consider.find(l) == stmt_nums.end() && (dep.vertex[l].second.find(*i) != dep.vertex[l].second.end())) dont_consider.insert(l); array_of_deps.push_back(dont_consider); } /*for (int i = 0; i < dep.vertex.size(); i++) if (stmt_nums.find(i) == stmt_nums.end()) for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) if (stmt_nums.find(j->first) != stmt_nums.end()) { // dependence from unskewed statement to skewed statement becomes jumbled, // put distance value at skewed dimension to unknown std::vector dvs = j->second; for (int k = 0; k < dvs.size(); k++) { DependenceVector &dv = dvs[k]; if (dv.is_data_dependence()) { dv.lbounds[dep_dim] = -posInfinity; dv.ubounds[dep_dim] = posInfinity; } } j->second = dvs; } }*/ std::set::const_iterator w = stmt_nums.begin(); for (int i = 0; i < array_of_deps.size() && w != stmt_nums.end(); i++) for (std::set::const_iterator j = array_of_deps[i].begin(); j != array_of_deps[i].end(); j++) { if (dep.vertex[*w].second.find(*j) != dep.vertex[*w].second.end()) dep.disconnect(*w, *j); if (dep.vertex[*j].second.find(*w) != dep.vertex[*j].second.end()) dep.disconnect(*j, *w); int x, y; std::pair, std::vector > dv_s; if ((*w) <= (*j)) { x = *w; y = *j; dv_s = test_data_dependences(ir_, stmt[x].code, stmt[x].IS, stmt[y].code, stmt[y].IS, freevar, index, x, y); } else { x = *j; y = *w; dv_s = test_data_dependences(ir_, stmt[y].code, stmt[y].IS, stmt[x].code, stmt[x].IS, freevar, index, x, y); } for (int k = 0; k < dv_s.first.size(); k++) { if (is_dependence_valid_based_on_lex_order(x, y, dv_s.first[k], true)) dep.connect(x, y, dv_s.first[k]); else dep.connect(y, x, dv_s.first[k].reverse()); } for (int k = 0; k < dv_s.second.size(); k++) { if (is_dependence_valid_based_on_lex_order(x, y, dv_s.second[k], false)) dep.connect(y, x, dv_s.second[k]); else dep.connect(x, y, dv_s.second[k].reverse()); } w++; } } void Loop::shift(const std::set &stmt_nums, int level, int shift_amount) { if (stmt_nums.size() == 0) return; // check for sanity of parameters int ref_stmt_num = *(stmt_nums.begin()); for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { if (*i < 0 || *i >= stmt.size()) throw std::invalid_argument( "invalid statement number " + to_string(*i)); if (level < 1 || level > stmt[*i].loop_level.size()) throw std::invalid_argument( "invalid loop level " + to_string(level)); } // do nothing if (shift_amount == 0) return; // set trasformation relations for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { int n = stmt[*i].xform.n_out(); Relation r(n, n); F_And *f_root = r.add_and(); for (int j = 1; j <= n; j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(r.input_var(j), 1); h.update_coef(r.output_var(j), -1); if (j == 2 * level) h.update_const(shift_amount); } stmt[*i].xform = Composition(r, stmt[*i].xform); stmt[*i].xform.simplify(); } // update dependence graph if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) for (DependenceGraph::EdgeList::iterator j = dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) if (stmt_nums.find(j->first) == stmt_nums.end()) { // dependence from shifted statement to unshifted statement std::vector dvs = j->second; for (int k = 0; k < dvs.size(); k++) { DependenceVector &dv = dvs[k]; if (dv.is_data_dependence()) { if (dv.lbounds[dep_dim] != -posInfinity) dv.lbounds[dep_dim] -= shift_amount; if (dv.ubounds[dep_dim] != posInfinity) dv.ubounds[dep_dim] -= shift_amount; } } j->second = dvs; } for (int i = 0; i < dep.vertex.size(); i++) if (stmt_nums.find(i) == stmt_nums.end()) for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) if (stmt_nums.find(j->first) != stmt_nums.end()) { // dependence from unshifted statement to shifted statement std::vector dvs = j->second; for (int k = 0; k < dvs.size(); k++) { DependenceVector &dv = dvs[k]; if (dv.is_data_dependence()) { if (dv.lbounds[dep_dim] != -posInfinity) dv.lbounds[dep_dim] += shift_amount; if (dv.ubounds[dep_dim] != posInfinity) dv.ubounds[dep_dim] += shift_amount; } } j->second = dvs; } } } // bool Loop::fuse(const std::set &stmt_nums, int level) { // if (stmt_nums.size() == 0 || stmt_nums.size() == 1) // return true; // int dim = 2*level-1; // // check for sanity of parameters // std::vector ref_lex; // for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { // if (*i < 0 || *i >= stmt.size()) // throw std::invalid_argument("invalid statement number " + to_string(*i)); // if (level < 1 || level > (stmt[*i].xform.n_out()-1)/2) // throw std::invalid_argument("invalid loop level " + to_string(level)); // if (ref_lex.size() == 0) // ref_lex = getLexicalOrder(*i); // else { // std::vector lex = getLexicalOrder(*i); // for (int j = 0; j < dim-1; j+=2) // if (lex[j] != ref_lex[j]) // throw std::invalid_argument("statements for fusion must be in the same level-" + to_string(level-1) + " subloop"); // } // } // // collect lexicographical order values from to-be-fused statements // std::set lex_values; // for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { // std::vector lex = getLexicalOrder(*i); // lex_values.insert(lex[dim-1]); // } // if (lex_values.size() == 1) // return true; // // negative dependence would prevent fusion // int dep_dim = xform_index[dim].first; // for (std::set::iterator i = lex_values.begin(); i != lex_values.end(); i++) { // ref_lex[dim-1] = *i; // std::set a = getStatements(ref_lex, dim-1); // std::set::iterator j = i; // j++; // for (; j != lex_values.end(); j++) { // ref_lex[dim-1] = *j; // std::set b = getStatements(ref_lex, dim-1); // for (std::set::iterator ii = a.begin(); ii != a.end(); ii++) // for (std::set::iterator jj = b.begin(); jj != b.end(); jj++) { // std::vector dvs; // dvs = dep.getEdge(*ii, *jj); // for (int k = 0; k < dvs.size(); k++) // if (dvs[k].isCarried(dep_dim) && dvs[k].hasNegative(dep_dim)) // throw loop_error("loop error: statements " + to_string(*ii) + " and " + to_string(*jj) + " cannot be fused together due to negative dependence"); // dvs = dep.getEdge(*jj, *ii); // for (int k = 0; k < dvs.size(); k++) // if (dvs[k].isCarried(dep_dim) && dvs[k].hasNegative(dep_dim)) // throw loop_error("loop error: statements " + to_string(*jj) + " and " + to_string(*ii) + " cannot be fused together due to negative dependence"); // } // } // } // // collect all other lexicographical order values from the subloop // // enclosing these to-be-fused loops // std::set same_loop = getStatements(ref_lex, dim-3); // std::set other_lex_values; // for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { // std::vector lex = getLexicalOrder(*i); // if (lex_values.find(lex[dim-1]) == lex_values.end()) // other_lex_values.insert(lex[dim-1]); // } // // update to-be-fused loops due to dependence cycle // Graph, Empty> g; // { // std::set t; // for (std::set::iterator i = lex_values.begin(); i != lex_values.end(); i++) { // ref_lex[dim-1] = *i; // std::set t2 = getStatements(ref_lex, dim-1); // std::set_union(t.begin(), t.end(), t2.begin(), t2.end(), inserter(t, t.begin())); // } // g.insert(t); // } // for (std::set::iterator i = other_lex_values.begin(); i != other_lex_values.end(); i++) { // ref_lex[dim-1] = *i; // std::set t = getStatements(ref_lex, dim-1); // g.insert(t); // } // for (int i = 0; i < g.vertex.size(); i++) // for (int j = i+1; j < g.vertex.size(); j++) // for (std::set::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) // for (std::set::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { // std::vector dvs; // dvs = dep.getEdge(*ii, *jj); // for (int k = 0; k < dvs.size(); k++) // if (dvs[k].isCarried(dep_dim)) { // g.connect(i, j); // break; // } // dvs = dep.getEdge(*jj, *ii); // for (int k = 0; k < dvs.size(); k++) // if (dvs[k].isCarried(dep_dim)) { // g.connect(j, i); // break; // } // } // std::vector > s = g.topoSort(); // int fused_lex_value = 0; // for (int i = 0; i < s.size(); i++) // if (s[i].find(0) != s[i].end()) { // // now add additional lexicographical order values // for (std::set::iterator j = s[i].begin(); j != s[i].end(); j++) // if (*j != 0) { // int stmt = *(g.vertex[*j].first.begin()); // std::vector lex = getLexicalOrder(stmt); // lex_values.insert(lex[dim-1]); // } // if (s.size() > 1) { // if (i == 0) { // int min_lex_value; // for (std::set::iterator j = s[i+1].begin(); j != s[i+1].end(); j++) { // int stmt = *(g.vertex[*j].first.begin()); // std::vector lex = getLexicalOrder(stmt); // if (j == s[i+1].begin()) // min_lex_value = lex[dim-1]; // else if (lex[dim-1] < min_lex_value) // min_lex_value = lex[dim-1]; // } // fused_lex_value = min_lex_value - 1; // } // else { // int max_lex_value; // for (std::set::iterator j = s[i-1].begin(); j != s[i-1].end(); j++) { // int stmt = *(g.vertex[*j].first.begin()); // std::vector lex = getLexicalOrder(stmt); // if (j == s[i-1].begin()) // max_lex_value = lex[dim-1]; // else if (lex[dim-1] > max_lex_value) // max_lex_value = lex[dim-1]; // } // fused_lex_value = max_lex_value + 1; // } // } // break; // } // // sort the newly updated to-be-fused lexicographical order values // std::vector ordered_lex_values; // for (std::set::iterator i = lex_values.begin(); i != lex_values.end(); i++) // ordered_lex_values.push_back(*i); // std::sort(ordered_lex_values.begin(), ordered_lex_values.end()); // // make sure internal loops inside to-be-fused loops have the same // // lexicographical order before and after fusion // std::vector > inside_lex_range(ordered_lex_values.size()); // for (int i = 0; i < ordered_lex_values.size(); i++) { // ref_lex[dim-1] = ordered_lex_values[i]; // std::set the_stmts = getStatements(ref_lex, dim-1); // std::set::iterator j = the_stmts.begin(); // std::vector lex = getLexicalOrder(*j); // int min_inside_lex_value = lex[dim+1]; // int max_inside_lex_value = lex[dim+1]; // j++; // for (; j != the_stmts.end(); j++) { // std::vector lex = getLexicalOrder(*j); // if (lex[dim+1] < min_inside_lex_value) // min_inside_lex_value = lex[dim+1]; // if (lex[dim+1] > max_inside_lex_value) // max_inside_lex_value = lex[dim+1]; // } // inside_lex_range[i].first = min_inside_lex_value; // inside_lex_range[i].second = max_inside_lex_value; // } // for (int i = 1; i < ordered_lex_values.size(); i++) // if (inside_lex_range[i].first <= inside_lex_range[i-1].second) { // int shift_lex_value = inside_lex_range[i-1].second - inside_lex_range[i].first + 1; // ref_lex[dim-1] = ordered_lex_values[i]; // ref_lex[dim+1] = inside_lex_range[i].first; // shiftLexicalOrder(ref_lex, dim+1, shift_lex_value); // inside_lex_range[i].first += shift_lex_value; // inside_lex_range[i].second += shift_lex_value; // } // // set lexicographical order for fused loops // for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { // std::vector lex = getLexicalOrder(*i); // if (lex_values.find(lex[dim-1]) != lex_values.end()) // assign_const(stmt[*i].xform, dim-1, fused_lex_value); // } // // no need to update dependence graph // ; // return true; // } // bool Loop::distribute(const std::set &stmt_nums, int level) { // if (stmt_nums.size() == 0 || stmt_nums.size() == 1) // return true; // int dim = 2*level-1; // // check for sanity of parameters // std::vector ref_lex; // for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { // if (*i < 0 || *i >= stmt.size()) // throw std::invalid_argument("invalid statement number " + to_string(*i)); // if (level < 1 || level > (stmt[*i].xform.n_out()-1)/2) // throw std::invalid_argument("invalid loop level " + to_string(level)); // if (ref_lex.size() == 0) // ref_lex = getLexicalOrder(*i); // else { // std::vector lex = getLexicalOrder(*i); // for (int j = 0; j <= dim-1; j+=2) // if (lex[j] != ref_lex[j]) // throw std::invalid_argument("statements for distribution must be in the same level-" + to_string(level) + " subloop"); // } // } // // find SCC in the to-be-distributed loop // int dep_dim = xform_index[dim].first; // std::set same_loop = getStatements(ref_lex, dim-1); // Graph g; // for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) // g.insert(*i); // for (int i = 0; i < g.vertex.size(); i++) // for (int j = i+1; j < g.vertex.size(); j++) { // std::vector dvs; // dvs = dep.getEdge(g.vertex[i].first, g.vertex[j].first); // for (int k = 0; k < dvs.size(); k++) // if (dvs[k].isCarried(dep_dim)) { // g.connect(i, j); // break; // } // dvs = dep.getEdge(g.vertex[j].first, g.vertex[i].first); // for (int k = 0; k < dvs.size(); k++) // if (dvs[k].isCarried(dep_dim)) { // g.connect(j, i); // break; // } // } // std::vector > s = g.topoSort(); // // find statements that cannot be distributed due to dependence cycle // Graph, Empty> g2; // for (int i = 0; i < s.size(); i++) { // std::set t; // for (std::set::iterator j = s[i].begin(); j != s[i].end(); j++) // if (stmt_nums.find(g.vertex[*j].first) != stmt_nums.end()) // t.insert(g.vertex[*j].first); // if (!t.empty()) // g2.insert(t); // } // for (int i = 0; i < g2.vertex.size(); i++) // for (int j = i+1; j < g2.vertex.size(); j++) // for (std::set::iterator ii = g2.vertex[i].first.begin(); ii != g2.vertex[i].first.end(); ii++) // for (std::set::iterator jj = g2.vertex[j].first.begin(); jj != g2.vertex[j].first.end(); jj++) { // std::vector dvs; // dvs = dep.getEdge(*ii, *jj); // for (int k = 0; k < dvs.size(); k++) // if (dvs[k].isCarried(dep_dim)) { // g2.connect(i, j); // break; // } // dvs = dep.getEdge(*jj, *ii); // for (int k = 0; k < dvs.size(); k++) // if (dvs[k].isCarried(dep_dim)) { // g2.connect(j, i); // break; // } // } // std::vector > s2 = g2.topoSort(); // // nothing to distribute // if (s2.size() == 1) // throw loop_error("loop error: no statement can be distributed due to dependence cycle"); // std::vector > s3; // for (int i = 0; i < s2.size(); i++) { // std::set t; // for (std::set::iterator j = s2[i].begin(); j != s2[i].end(); j++) // std::set_union(t.begin(), t.end(), g2.vertex[*j].first.begin(), g2.vertex[*j].first.end(), inserter(t, t.begin())); // s3.push_back(t); // } // // associate other affected statements with the right distributed statements // for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) // if (stmt_nums.find(*i) == stmt_nums.end()) { // bool is_inserted = false; // int potential_insertion_point = 0; // for (int j = 0; j < s3.size(); j++) { // for (std::set::iterator k = s3[j].begin(); k != s3[j].end(); k++) { // std::vector dvs; // dvs = dep.getEdge(*i, *k); // for (int kk = 0; kk < dvs.size(); kk++) // if (dvs[kk].isCarried(dep_dim)) { // s3[j].insert(*i); // is_inserted = true; // break; // } // dvs = dep.getEdge(*k, *i); // for (int kk = 0; kk < dvs.size(); kk++) // if (dvs[kk].isCarried(dep_dim)) // potential_insertion_point = j; // } // if (is_inserted) // break; // } // if (!is_inserted) // s3[potential_insertion_point].insert(*i); // } // // set lexicographical order after distribution // int order = ref_lex[dim-1]; // shiftLexicalOrder(ref_lex, dim-1, s3.size()-1); // for (std::vector >::iterator i = s3.begin(); i != s3.end(); i++) { // for (std::set::iterator j = (*i).begin(); j != (*i).end(); j++) // assign_const(stmt[*j].xform, dim-1, order); // order++; // } // // no need to update dependence graph // ; // return true; // }