diff options
Diffstat (limited to 'chill/src/loop.cc')
-rw-r--r-- | chill/src/loop.cc | 1870 |
1 files changed, 1870 insertions, 0 deletions
diff --git a/chill/src/loop.cc b/chill/src/loop.cc new file mode 100644 index 0000000..0a82f7a --- /dev/null +++ b/chill/src/loop.cc @@ -0,0 +1,1870 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Core loop transformation functionality. + + Notes: + "level" (starting from 1) means loop level and it corresponds to "dim" + (starting from 0) in transformed iteration space [c_1,l_1,c_2,l_2,...., + c_n,l_n,c_(n+1)], e.g., l_2 is loop level 2 in generated code, dim 3 + in transformed iteration space, and variable 4 in Omega relation. + All c's are constant numbers only and they will not show up as actual loops. + Formula: + dim = 2*level - 1 + var = dim + 1 + + History: + 10/2005 Created by Chun Chen. + 09/2009 Expand tile functionality, -chun + 10/2009 Initialize unfusible loop nest without bailing out, -chun +*****************************************************************************/ + +#include <limits.h> +#include <math.h> +#include <codegen.h> +#include <code_gen/CG_utils.h> +#include <iostream> +#include <algorithm> +#include <map> +#include "loop.hh" +#include "omegatools.hh" +#include "irtools.hh" +#include "chill_error.hh" +#include <string.h> +#include <list> +using namespace omega; + +const std::string Loop::tmp_loop_var_name_prefix = std::string("chill_t"); // Manu:: In fortran, first character of a variable name must be a letter, so this change +const std::string Loop::overflow_var_name_prefix = std::string("over"); + +//----------------------------------------------------------------------------- +// Class Loop +//----------------------------------------------------------------------------- +// --begin Anand: Added from CHiLL 0.2 + +bool Loop::isInitialized() const { + return stmt.size() != 0 && !stmt[0].xform.is_null(); +} + +//--end Anand: added from CHiLL 0.2 + +bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, + std::vector<ir_tree_node *> &ir_stmt) { + + ir_stmt = extract_ir_stmts(ir_tree); + stmt_nesting_level_.resize(ir_stmt.size()); + std::vector<int> stmt_nesting_level(ir_stmt.size()); + for (int i = 0; i < ir_stmt.size(); i++) { + ir_stmt[i]->payload = i; + int t = 0; + ir_tree_node *itn = ir_stmt[i]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + t++; + } + stmt_nesting_level_[i] = t; + stmt_nesting_level[i] = t; + } + + stmt = std::vector<Statement>(ir_stmt.size()); + int n_dim = -1; + int max_loc; + //std::vector<std::string> index; + for (int i = 0; i < ir_stmt.size(); i++) { + int max_nesting_level = -1; + int loc; + for (int j = 0; j < ir_stmt.size(); j++) + if (stmt_nesting_level[j] > max_nesting_level) { + max_nesting_level = stmt_nesting_level[j]; + loc = j; + } + + // most deeply nested statement acting as a reference point + if (n_dim == -1) { + n_dim = max_nesting_level; + max_loc = loc; + + index = std::vector<std::string>(n_dim); + + ir_tree_node *itn = ir_stmt[loc]; + int cur_dim = n_dim - 1; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) { + index[cur_dim] = + static_cast<IR_Loop *>(itn->content)->index()->name(); + itn->payload = cur_dim--; + } + } + } + + // align loops by names, temporary solution + ir_tree_node *itn = ir_stmt[loc]; + int depth = stmt_nesting_level_[loc] - 1; + /* while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { + std::string name = static_cast<IR_Loop *>(itn->content)->index()->name(); + for (int j = 0; j < n_dim; j++) + if (index[j] == name) { + itn->payload = j; + break; + } + if (itn->payload == -1) + throw loop_error("no complex alignment yet"); + } + } + */ + for (int t = depth; t >= 0; t--) { + int y = t; + ir_tree_node *itn = ir_stmt[loc]; + + while ((itn->parent != NULL) && (y >= 0)) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + y--; + } + + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { + CG_outputBuilder *ocg = ir->builder(); + + itn->payload = depth - t; + + CG_outputRepr *code = + static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); + + std::vector<CG_outputRepr *> index_expr; + std::vector<std::string> old_index; + CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]); + index_expr.push_back(repl); + old_index.push_back( + static_cast<IR_Loop *>(itn->content)->index()->name()); + code = ocg->CreateSubstitutedStmt(0, code, old_index, + index_expr); + + replace.insert(std::pair<int, CG_outputRepr*>(loc, code)); + //stmt[loc].code = code; + + } + } + + // set relation variable names + Relation r(n_dim); + F_And *f_root = r.add_and(); + itn = ir_stmt[loc]; + int temp_depth = depth; + while (itn->parent != NULL) { + + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) { + r.name_set_var(itn->payload + 1, index[temp_depth]); + + temp_depth--; + } + //static_cast<IR_Loop *>(itn->content)->index()->name()); + } + + /*while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name()); + }*/ + + // extract information from loop/if structures + std::vector<bool> processed(n_dim, false); + std::vector<std::string> vars_to_be_reversed; + itn = ir_stmt[loc]; + while (itn->parent != NULL) { + itn = itn->parent; + + switch (itn->content->type()) { + case IR_CONTROL_LOOP: { + IR_Loop *lp = static_cast<IR_Loop *>(itn->content); + Variable_ID v = r.set_var(itn->payload + 1); + int c; + + try { + c = lp->step_size(); + if (c > 0) { + CG_outputRepr *lb = lp->lower_bound(); + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true); + CG_outputRepr *ub = lp->upper_bound(); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + cond, true); + else + throw ir_error("loop condition not supported"); + + } else if (c < 0) { + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *lb = lp->lower_bound(); + lb = ocg->CreateMinus(NULL, lb); + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true); + CG_outputRepr *ub = lp->upper_bound(); + ub = ocg->CreateMinus(NULL, ub); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_GE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LE, true); + else if (cond == IR_COND_GT) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LT, true); + else + throw ir_error("loop condition not supported"); + + vars_to_be_reversed.push_back(lp->index()->name()); + } else + throw ir_error("loop step size zero"); + } catch (const ir_error &e) { + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + + if (abs(c) != 1) { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(); + F_And *f_and = f_exists->add_and(); + Stride_Handle h = f_and->add_stride(abs(c)); + if (c > 0) + h.update_coef(e, 1); + else + h.update_coef(e, -1); + h.update_coef(v, -1); + CG_outputRepr *lb = lp->lower_bound(); + exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, + true); + } + + processed[itn->payload] = true; + break; + } + case IR_CONTROL_IF: { + CG_outputRepr *cond = + static_cast<IR_If *>(itn->content)->condition(); + try { + if (itn->payload % 2 == 1) + exp2constraint(ir, r, f_root, freevar, cond, true); + else { + F_Not *f_not = f_root->add_not(); + F_And *f_and = f_not->add_and(); + exp2constraint(ir, r, f_and, freevar, cond, true); + } + } catch (const ir_error &e) { + std::vector<ir_tree_node *> *t; + if (itn->parent == NULL) + t = &ir_tree; + else + t = &(itn->parent->children); + int id = itn->payload; + int i = t->size() - 1; + while (i >= 0) { + if ((*t)[i] == itn) { + for (int j = 0; j < itn->children.size(); j++) + delete itn->children[j]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + } else if ((*t)[i]->payload >> 1 == id >> 1) { + delete (*t)[i]; + t->erase(t->begin() + i); + } + i--; + } + return false; + } + + break; + } + default: + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + } + + // add information for missing loops + for (int j = 0; j < n_dim; j++) + if (!processed[j]) { + ir_tree_node *itn = ir_stmt[max_loc]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP + && itn->payload == j) + break; + } + + Variable_ID v = r.set_var(j + 1); + if (loc < max_loc) { + + CG_outputBuilder *ocg = ir->builder(); + + CG_outputRepr *lb = + static_cast<IR_Loop *>(itn->content)->lower_bound(); + + exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ, + false); + + /* if (ir->QueryExpOperation( + static_cast<IR_Loop *>(itn->content)->lower_bound()) + == IR_OP_VARIABLE) { + IR_ScalarRef *ref = + static_cast<IR_ScalarRef *>(ir->Repr2Ref( + static_cast<IR_Loop *>(itn->content)->lower_bound())); + std::string name_ = ref->name(); + + for (int i = 0; i < index.size(); i++) + if (index[i] == name_) { + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, false); + + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + IR_CONDITION_TYPE cond = + static_cast<IR_Loop *>(itn->content)->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, + 's', cond, false); + + + + } + + } + */ + + } else { // loc > max_loc + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + + exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ, + false); + /*if (ir->QueryExpOperation( + static_cast<IR_Loop *>(itn->content)->upper_bound()) + == IR_OP_VARIABLE) { + IR_ScalarRef *ref = + static_cast<IR_ScalarRef *>(ir->Repr2Ref( + static_cast<IR_Loop *>(itn->content)->upper_bound())); + std::string name_ = ref->name(); + + for (int i = 0; i < index.size(); i++) + if (index[i] == name_) { + + CG_outputRepr *lb = + static_cast<IR_Loop *>(itn->content)->lower_bound(); + + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, false); + + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + IR_CONDITION_TYPE cond = + static_cast<IR_Loop *>(itn->content)->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, + 's', cond, false); + + + } + } + */ + } + } + + r.setup_names(); + r.simplify(); + + // insert the statement + CG_outputBuilder *ocg = ir->builder(); + std::vector<CG_outputRepr *> reverse_expr; + for (int j = 1; j <= vars_to_be_reversed.size(); j++) { + CG_outputRepr *repl = ocg->CreateIdent(vars_to_be_reversed[j]); + repl = ocg->CreateMinus(NULL, repl); + reverse_expr.push_back(repl); + } + CG_outputRepr *code = + static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); + code = ocg->CreateSubstitutedStmt(0, code, vars_to_be_reversed, + reverse_expr); + stmt[loc].code = code; + stmt[loc].IS = r; + stmt[loc].loop_level = std::vector<LoopLevel>(n_dim); + stmt[loc].ir_stmt_node = ir_stmt[loc]; + for (int i = 0; i < n_dim; i++) { + stmt[loc].loop_level[i].type = LoopLevelOriginal; + stmt[loc].loop_level[i].payload = i; + stmt[loc].loop_level[i].parallel_level = 0; + } + + stmt_nesting_level[loc] = -1; + } + + return true; +} + +Loop::Loop(const IR_Control *control) { + + last_compute_cgr_ = NULL; + last_compute_cg_ = NULL; + + ir = const_cast<IR_Code *>(control->ir_); + init_code = NULL; + cleanup_code = NULL; + tmp_loop_var_name_counter = 1; + overflow_var_name_counter = 1; + known = Relation::True(0); + + ir_tree = build_ir_tree(control->clone(), NULL); + // std::vector<ir_tree_node *> ir_stmt; + + while (!init_loop(ir_tree, ir_stmt)) { + } + + + + for (int i = 0; i < stmt.size(); i++) { + std::map<int, CG_outputRepr*>::iterator it = replace.find(i); + + if (it != replace.end()) + stmt[i].code = it->second; + else + stmt[i].code = stmt[i].code; + } + + if (stmt.size() != 0) + dep = DependenceGraph(stmt[0].IS.n_set()); + else + dep = DependenceGraph(0); + // init the dependence graph + for (int i = 0; i < stmt.size(); i++) + dep.insert(); + + for (int i = 0; i < stmt.size(); i++) + for (int j = i; j < stmt.size(); j++) { + std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = test_data_dependences( + ir, stmt[i].code, stmt[i].IS, stmt[j].code, stmt[j].IS, + freevar, index, stmt_nesting_level_[i], + stmt_nesting_level_[j]); + + for (int k = 0; k < dv.first.size(); k++) { + if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k], + true)) + dep.connect(i, j, dv.first[k]); + else { + dep.connect(j, i, dv.first[k].reverse()); + } + + } + for (int k = 0; k < dv.second.size(); k++) + if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], + false)) + dep.connect(j, i, dv.second[k]); + else { + dep.connect(i, j, dv.second[k].reverse()); + } + // std::pair<std::vector<DependenceVector>, + // std::vector<DependenceVector> > dv_ = test_data_dependences( + + } + + + + // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] + for (int i = 0; i < stmt.size(); i++) { + int n = stmt[i].IS.n_set(); + stmt[i].xform = Relation(n, 2 * n + 1); + F_And *f_root = stmt[i].xform.add_and(); + + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(2 * j), 1); + h.update_coef(stmt[i].xform.input_var(j), -1); + } + + for (int j = 1; j <= 2 * n + 1; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(j), 1); + } + stmt[i].xform.simplify(); + } + + if (stmt.size() != 0) + num_dep_dim = stmt[0].IS.n_set(); + else + num_dep_dim = 0; + // debug + /*for (int i = 0; i < stmt.size(); i++) { + std::cout << i << ": "; + //stmt[i].xform.print(); + stmt[i].IS.print(); + std::cout << std::endl; + + }*/ + //end debug +} + +Loop::~Loop() { + + delete last_compute_cgr_; + delete last_compute_cg_; + + for (int i = 0; i < stmt.size(); i++) + if (stmt[i].code != NULL) { + stmt[i].code->clear(); + delete stmt[i].code; + } + + for (int i = 0; i < ir_tree.size(); i++) + delete ir_tree[i]; + + if (init_code != NULL) { + init_code->clear(); + delete init_code; + } + if (cleanup_code != NULL) { + cleanup_code->clear(); + delete cleanup_code; + } +} + +int Loop::get_dep_dim_of(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1 || level > stmt[stmt_num].loop_level.size()) + return -1; + + int trip_count = 0; + while (true) { + switch (stmt[stmt_num].loop_level[level - 1].type) { + case LoopLevelOriginal: + return stmt[stmt_num].loop_level[level - 1].payload; + case LoopLevelTile: + level = stmt[stmt_num].loop_level[level - 1].payload; + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + throw loop_error( + "incorrect loop level information for statement " + + to_string(stmt_num)); + break; + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(stmt_num)); + } + trip_count++; + if (trip_count >= stmt[stmt_num].loop_level.size()) + throw loop_error( + "incorrect loop level information for statement " + + to_string(stmt_num)); + } +} + +int Loop::get_last_dep_dim_before(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + level = stmt[stmt_num].loop_level.size() + 1; + + for (int i = level - 1; i >= 1; i--) + if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal) + return stmt[stmt_num].loop_level[i - 1].payload; + + return -1; +} + +void Loop::print_internal_loop_structure() const { + for (int i = 0; i < stmt.size(); i++) { + std::vector<int> lex = getLexicalOrder(i); + std::cout << "s" << i + 1 << ": "; + for (int j = 0; j < stmt[i].loop_level.size(); j++) { + if (2 * j < lex.size()) + std::cout << lex[2 * j]; + switch (stmt[i].loop_level[j].type) { + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; + } + std::cout << ' '; + } + for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { + std::cout << lex[j]; + if (j != lex.size() - 1) + std::cout << ' '; + } + std::cout << std::endl; + } +} + +CG_outputRepr *Loop::getCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return NULL; + const int n = stmt[0].xform.n_out(); + + if (last_compute_cg_ == NULL) { + std::vector<Relation> IS(m); + std::vector<Relation> xforms(m); + for (int i = 0; i < m; i++) { + IS[i] = stmt[i].IS; + xforms[i] = stmt[i].xform; + } + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + + last_compute_cg_ = new CodeGen(xforms, IS, known); + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + } + + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { + delete last_compute_cgr_; + last_compute_cgr_ = last_compute_cg_->buildAST(effort); + last_compute_effort_ = effort; + } + + std::vector<CG_outputRepr *> stmts(m); + for (int i = 0; i < m; i++) + stmts[i] = stmt[i].code; + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts); + + if (init_code != NULL) + repr = ocg->StmtListAppend(init_code->clone(), repr); + if (cleanup_code != NULL) + repr = ocg->StmtListAppend(repr, cleanup_code->clone()); + + return repr; +} + +void Loop::printCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return; + const int n = stmt[0].xform.n_out(); + + if (last_compute_cg_ == NULL) { + std::vector<Relation> IS(m); + std::vector<Relation> xforms(m); + for (int i = 0; i < m; i++) { + IS[i] = stmt[i].IS; + xforms[i] = stmt[i].xform; + } + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + + last_compute_cg_ = new CodeGen(xforms, IS, known); + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + } + + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { + delete last_compute_cgr_; + last_compute_cgr_ = last_compute_cg_->buildAST(effort); + last_compute_effort_ = effort; + } + + std::string repr = last_compute_cgr_->printString(); + std::cout << repr << std::endl; +} + +void Loop::printIterationSpace() const { + for (int i = 0; i < stmt.size(); i++) { + std::cout << "s" << i << ": "; + Relation r = getNewIS(i); + for (int j = 1; j <= r.n_inp(); j++) + r.name_input_var(j, CodeGen::loop_var_name_prefix + to_string(j)); + r.setup_names(); + r.print(); + } +} + +void Loop::printDependenceGraph() const { + if (dep.edgeCount() == 0) + std::cout << "no dependence exists" << std::endl; + else { + std::cout << "dependence graph:" << std::endl; + std::cout << dep; + } +} + +Relation Loop::getNewIS(int stmt_num) const { + Relation result; + + if (stmt[stmt_num].xform.is_null()) { + Relation known = Extend_Set(copy(this->known), + stmt[stmt_num].IS.n_set() - this->known.n_set()); + result = Intersection(copy(stmt[stmt_num].IS), known); + } else { + Relation known = Extend_Set(copy(this->known), + stmt[stmt_num].xform.n_out() - this->known.n_set()); + result = Intersection( + Range( + Restrict_Domain(copy(stmt[stmt_num].xform), + copy(stmt[stmt_num].IS))), known); + } + + result.simplify(2, 4); + + return result; +} + +std::vector<Relation> Loop::getNewIS() const { + const int m = stmt.size(); + + std::vector<Relation> new_IS(m); + for (int i = 0; i < m; i++) + new_IS[i] = getNewIS(i); + + return new_IS; +} + +void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePragmaAttribute(code, level, pragmaText); +} +/* +void Loop::prefetch(int stmt_num, int level, const std::string &arrName, const std::string &indexName, int offset, int hint) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePrefetchAttribute(code, level, arrName, indexName, int offset, hint); +} +*/ + +void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hint) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePrefetchAttribute(code, level, arrName, hint); +} + +std::vector<int> Loop::getLexicalOrder(int stmt_num) const { + assert(stmt_num < stmt.size()); + + const int n = stmt[stmt_num].xform.n_out(); + std::vector<int> lex(n, 0); + + for (int i = 0; i < n; i += 2) + lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); + + return lex; +} + +// find the sub loop nest specified by stmt_num and level, +// only iteration space satisfiable statements returned. +std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const { + assert(stmt_num >= 0 && stmt_num < stmt.size()); + assert(level > 0 && level <= stmt[stmt_num].loop_level.size()); + + std::set<int> working; + for (int i = 0; i < stmt.size(); i++) + if (const_cast<Loop *>(this)->stmt[i].IS.is_upper_bound_satisfiable() + && stmt[i].loop_level.size() >= level) + working.insert(i); + + for (int i = 1; i <= level; i++) { + int a = getLexicalOrder(stmt_num, i); + for (std::set<int>::iterator j = working.begin(); j != working.end();) { + int b = getLexicalOrder(*j, i); + if (b != a) + working.erase(j++); + else + ++j; + } + } + + return working; +} + +int Loop::getLexicalOrder(int stmt_num, int level) const { + assert(stmt_num >= 0 && stmt_num < stmt.size()); + assert(level > 0 && level <= stmt[stmt_num].loop_level.size()+1); + + Relation &r = const_cast<Loop *>(this)->stmt[stmt_num].xform; + for (EQ_Iterator e(r.single_conjunct()->EQs()); e; e++) + if (abs((*e).get_coef(r.output_var(2 * level - 1))) == 1) { + bool is_const = true; + for (Constr_Vars_Iter cvi(*e); cvi; cvi++) + if (cvi.curr_var() != r.output_var(2 * level - 1)) { + is_const = false; + break; + } + if (is_const) { + int t = static_cast<int>((*e).get_const()); + return (*e).get_coef(r.output_var(2 * level - 1)) > 0 ? -t : t; + } + } + + throw loop_error( + "can't find lexical order for statement " + to_string(stmt_num) + + "'s loop level " + to_string(level)); +} + +std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const { + const int m = stmt.size(); + + std::set<int> same_loops; + for (int i = 0; i < m; i++) { + if (dim < 0) + same_loops.insert(i); + else { + std::vector<int> a_lex = getLexicalOrder(i); + int j; + for (j = 0; j <= dim; j += 2) + if (lex[j] != a_lex[j]) + break; + if (j > dim) + same_loops.insert(i); + } + + } + + return same_loops; +} + +void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) { + const int m = stmt.size(); + + if (amount == 0) + return; + + for (int i = 0; i < m; i++) { + std::vector<int> lex2 = getLexicalOrder(i); + + bool need_shift = true; + + for (int j = 0; j < dim; j++) + if (lex2[j] != lex[j]) { + need_shift = false; + break; + } + + if (!need_shift) + continue; + + if (amount > 0) { + if (lex2[dim] < lex[dim]) + continue; + } else if (amount < 0) { + if (lex2[dim] > lex[dim]) + continue; + } + + assign_const(stmt[i].xform, dim, lex2[dim] + amount); + } +} + +std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active, + int level) { + + std::set<int> not_nested_at_this_level; + std::map<ir_tree_node*, std::set<int> > sorted_by_loop; + std::map<int, std::set<int> > sorted_by_lex_order; + std::vector<std::set<int> > to_return; + bool lex_order_already_set = false; + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + + if (stmt[*it].ir_stmt_node == NULL) + lex_order_already_set = true; + } + + if (lex_order_already_set) { + + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + std::map<int, std::set<int> >::iterator it2 = + sorted_by_lex_order.find( + get_const(stmt[*it].xform, 2 * (level - 1), + Output_Var)); + + if (it2 != sorted_by_lex_order.end()) + it2->second.insert(*it); + else { + + std::set<int> to_insert; + + to_insert.insert(*it); + + sorted_by_lex_order.insert( + std::pair<int, std::set<int> >( + get_const(stmt[*it].xform, 2 * (level - 1), + Output_Var), to_insert)); + + } + + } + + for (std::map<int, std::set<int> >::iterator it2 = + sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end(); + it2++) + to_return.push_back(it2->second); + + } else { + + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + + ir_tree_node* itn = stmt[*it].ir_stmt_node; + itn = itn->parent; + while ((itn != NULL) && (itn->payload != level - 1)) + itn = itn->parent; + + if (itn == NULL) + not_nested_at_this_level.insert(*it); + else { + std::map<ir_tree_node*, std::set<int> >::iterator it2 = + sorted_by_loop.find(itn); + + if (it2 != sorted_by_loop.end()) + it2->second.insert(*it); + else { + std::set<int> to_insert; + + to_insert.insert(*it); + + sorted_by_loop.insert( + std::pair<ir_tree_node*, std::set<int> >(itn, + to_insert)); + + } + + } + + } + if (not_nested_at_this_level.size() > 0) { + for (std::set<int>::iterator it = not_nested_at_this_level.begin(); + it != not_nested_at_this_level.end(); it++) { + std::set<int> temp; + temp.insert(*it); + to_return.push_back(temp); + + } + } + for (std::map<ir_tree_node*, std::set<int> >::iterator it2 = + sorted_by_loop.begin(); it2 != sorted_by_loop.end(); it2++) + to_return.push_back(it2->second); + } + return to_return; +} + +void update_successors(int n, int node_num[], int cant_fuse_with[], + Graph<std::set<int>, bool> &g, std::list<int> &work_list) { + + std::set<int> disconnect; + for (Graph<std::set<int>, bool>::EdgeList::iterator i = + g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) { + int m = i->first; + + if (node_num[m] != -1) + throw loop_error("Graph input for fusion has cycles not a DAG!!"); + + std::vector<bool> check_ = g.getEdge(n, m); + + bool has_bad_edge_path = false; + for (int i = 0; i < check_.size(); i++) + if (!check_[i]) { + has_bad_edge_path = true; + break; + } + if (has_bad_edge_path) + cant_fuse_with[m] = std::max(cant_fuse_with[m], node_num[n]); + else + cant_fuse_with[m] = std::max(cant_fuse_with[m], cant_fuse_with[n]); + disconnect.insert(m); + } + + + for (std::set<int>::iterator i = disconnect.begin(); i != disconnect.end(); + i++) { + g.disconnect(n, *i); + + bool no_incoming_edges = true; + for (int j = 0; j < g.vertex.size(); j++) + if (j != *i) + if (g.hasEdge(j, *i)) { + no_incoming_edges = false; + break; + } + + + if (no_incoming_edges) + work_list.push_back(*i); + } + +} + +Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level( + std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim) { + Graph<std::set<int>, bool> g; + + for (int i = 0; i < s.size(); i++) + g.insert(s[i]); + + for (int i = 0; i < s.size(); i++) { + + for (int j = i + 1; j < s.size(); j++) { + bool has_true_edge_i_to_j = false; + bool has_true_edge_j_to_i = false; + bool is_connected_i_to_j = false; + bool is_connected_j_to_i = false; + for (std::set<int>::iterator ii = s[i].begin(); ii != s[i].end(); + ii++) { + + for (std::set<int>::iterator jj = s[j].begin(); + jj != s[j].end(); jj++) { + + std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && dvs[k].has_been_carried_at(dep_dim))) { + + if (dvs[k].is_data_dependence() + && dvs[k].has_negative_been_carried_at( + dep_dim)) { + //g.connect(i, j, false); + is_connected_i_to_j = true; + break; + } else { + //g.connect(i, j, true); + + has_true_edge_i_to_j = true; + //break + } + } + + //if (is_connected) + + // break; + // if (has_true_edge_i_to_j && !is_connected_i_to_j) + // g.connect(i, j, true); + dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && dvs[k].has_been_carried_at(dep_dim))) { + + if (is_connected_i_to_j || has_true_edge_i_to_j) + throw loop_error( + "Graph input for fusion has cycles not a DAG!!"); + + if (dvs[k].is_data_dependence() + && dvs[k].has_negative_been_carried_at( + dep_dim)) { + //g.connect(i, j, false); + is_connected_j_to_i = true; + break; + } else { + //g.connect(i, j, true); + + has_true_edge_j_to_i = true; + //break; + } + } + + // if (is_connected) + //break; + // if (is_connected) + //break; + } + + + //if (is_connected) + // break; + } + + + if (is_connected_i_to_j) + g.connect(i, j, false); + else if (has_true_edge_i_to_j) + g.connect(i, j, true); + + if (is_connected_j_to_i) + g.connect(j, i, false); + else if (has_true_edge_j_to_i) + g.connect(j, i, true); + + + } + } + return g; +} + +std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g) { + + bool roots[g.vertex.size()]; + + for (int i = 0; i < g.vertex.size(); i++) + roots[i] = true; + + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + + if (g.hasEdge(i, j)) + roots[j] = false; + + if (g.hasEdge(j, i)) + roots[i] = false; + + } + + std::list<int> work_list; + int cant_fuse_with[g.vertex.size()]; + std::vector<std::set<int> > s; + //Each Fused set's representative node + + int node_to_fused_nodes[g.vertex.size()]; + int node_num[g.vertex.size()]; + for (int i = 0; i < g.vertex.size(); i++) { + if (roots[i] == true) + work_list.push_back(i); + cant_fuse_with[i] = 0; + node_to_fused_nodes[i] = 0; + node_num[i] = -1; + } + // topological sort according to chun's permute algorithm + // std::vector<std::set<int> > s = g.topoSort(); + std::vector<std::set<int> > s2 = g.topoSort(); + if (work_list.empty() || (s2.size() != g.vertex.size())) { + + std::cout << s2.size() << "\t" << g.vertex.size() << std::endl; + throw loop_error("Input for fusion not a DAG!!"); + + + } + int fused_nodes_counter = 0; + while (!work_list.empty()) { + int n = work_list.front(); + //int n_ = g.vertex[n].first; + work_list.pop_front(); + int node; + if (cant_fuse_with[n] == 0) + node = 0; + else + node = cant_fuse_with[n]; + + if ((fused_nodes_counter != 0) && (node != fused_nodes_counter)) { + int rep_node = node_to_fused_nodes[node]; + node_num[n] = node_num[rep_node]; + + try { + update_successors(n, node_num, cant_fuse_with, g, work_list); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + + } + for (std::set<int>::iterator it = g.vertex[n].first.begin(); + it != g.vertex[n].first.end(); it++) + s[node].insert(*it); + } else { + //std::set<int> new_node; + //new_node.insert(n_); + s.push_back(g.vertex[n].first); + node_to_fused_nodes[node] = n; + node_num[n] = ++node; + try { + update_successors(n, node_num, cant_fuse_with, g, work_list); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + + } + fused_nodes_counter++; + } + } + + return s; +} + +void Loop::setLexicalOrder(int dim, const std::set<int> &active, + int starting_order, std::vector<std::vector<std::string> > idxNames) { + if (active.size() == 0) + return; + + // check for sanity of parameters + if (dim < 0 || dim % 2 != 0) + throw std::invalid_argument( + "invalid constant loop level to set lexicographical order"); + std::vector<int> lex; + int ref_stmt_num; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if ((*i) < 0 || (*i) >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (dim >= stmt[*i].xform.n_out()) + throw std::invalid_argument( + "invalid constant loop level to set lexicographical order"); + if (i == active.begin()) { + lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } else { + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 0; j < dim; j += 2) + if (lex[j] != lex2[j]) + throw std::invalid_argument( + "statements are not in the same sub loop nest"); + } + } + + // sepearate statements by current loop level types + int level = (dim + 2) / 2; + std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type; + std::set<int> active_by_no_level; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if (level > stmt[*i].loop_level.size()) + active_by_no_level.insert(*i); + else + active_by_level_type[std::make_pair( + stmt[*i].loop_level[level - 1].type, + stmt[*i].loop_level[level - 1].payload)].insert(*i); + } + + // further separate statements due to control dependences + std::vector<std::set<int> > active_by_level_type_splitted; + for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i = + active_by_level_type.begin(); i != active_by_level_type.end(); i++) + active_by_level_type_splitted.push_back(i->second); + for (std::set<int>::iterator i = active_by_no_level.begin(); + i != active_by_no_level.end(); i++) + for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) { + std::set<int> controlled, not_controlled; + for (std::set<int>::iterator k = + active_by_level_type_splitted[j].begin(); + k != active_by_level_type_splitted[j].end(); k++) { + std::vector<DependenceVector> dvs = dep.getEdge(*i, *k); + bool is_controlled = false; + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].type = DEP_CONTROL) { + is_controlled = true; + break; + } + if (is_controlled) + controlled.insert(*k); + else + not_controlled.insert(*k); + } + if (controlled.size() != 0 && not_controlled.size() != 0) { + active_by_level_type_splitted.erase( + active_by_level_type_splitted.begin() + j); + active_by_level_type_splitted.push_back(controlled); + active_by_level_type_splitted.push_back(not_controlled); + } + } + + // set lexical order separating loops with different loop types first + if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) { + int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + + Graph<std::set<int>, Empty> g; + for (std::vector<std::set<int> >::iterator i = + active_by_level_type_splitted.begin(); + i != active_by_level_type_splitted.end(); i++) + g.insert(*i); + for (std::set<int>::iterator i = active_by_no_level.begin(); + i != active_by_no_level.end(); i++) { + std::set<int> t; + t.insert(*i); + g.insert(t); + } + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + bool connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*ii, + *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(i, j); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*jj, + *ii); + // find the sub loop nest specified by stmt_num and level, + // only iteration space satisfiable statements returned. + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(j, i); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + } + + std::vector<std::set<int> > s = g.topoSort(); + if (s.size() != g.vertex.size()) + throw loop_error( + "cannot separate statements with different loop types at loop level " + + to_string(level)); + + // assign lexical order + int order = starting_order; + for (int i = 0; i < s.size(); i++) { + std::set<int> &cur_scc = g.vertex[*(s[i].begin())].first; + int sz = cur_scc.size(); + if (sz == 1) { + int cur_stmt = *(cur_scc.begin()); + assign_const(stmt[cur_stmt].xform, dim, order); + for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) + assign_const(stmt[cur_stmt].xform, j, 0); + order++; + } else { + setLexicalOrder(dim, cur_scc, order, idxNames); + order += sz; + } + } + } + // set lexical order seperating single iteration statements and loops + else { + std::set<int> true_singles; + std::set<int> nonsingles; + std::map<coef_t, std::set<int> > fake_singles; + std::set<int> fake_singles_; + + // sort out statements that do not require loops + for (std::set<int>::iterator i = active.begin(); i != active.end(); + i++) { + Relation cur_IS = getNewIS(*i); + if (is_single_iteration(cur_IS, dim + 1)) { + bool is_all_single = true; + for (int j = dim + 3; j < stmt[*i].xform.n_out(); j += 2) + if (!is_single_iteration(cur_IS, j)) { + is_all_single = false; + break; + } + if (is_all_single) + true_singles.insert(*i); + else { + fake_singles_.insert(*i); + try { + fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert( + *i); + } catch (const std::exception &e) { + fake_singles[posInfinity].insert(*i); + } + } + } else + nonsingles.insert(*i); + } + + + // split nonsingles forcibly according to negative dependences present (loop unfusible) + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + + if (dim < stmt[ref_stmt_num].xform.n_out() - 1) { + + bool dummy_level_found = false; + + std::vector<std::set<int> > s; + + s = sort_by_same_loops(active, level); + bool further_levels_exist = false; + + if (!idxNames.empty()) + if (level <= idxNames[ref_stmt_num].size()) + if (idxNames[ref_stmt_num][level - 1].length() == 0) { + // && s.size() == 1) { + int order1 = 0; + dummy_level_found = true; + + for (int i = level; i < idxNames[ref_stmt_num].size(); + i++) + if (idxNames[ref_stmt_num][i].length() > 0) + further_levels_exist = true; + + } + + //if (!dummy_level_found) { + + if (s.size() > 1) { + + Graph<std::set<int>, bool> g = construct_induced_graph_at_level( + s, dep, dep_dim); + s = typed_fusion(g); + } + int order = 0; + for (int i = 0; i < s.size(); i++) { + + for (std::set<int>::iterator it = s[i].begin(); + it != s[i].end(); it++) + assign_const(stmt[*it].xform, dim, order); + + if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1)) + setLexicalOrder(dim + 2, s[i], order, idxNames); + + order++; + } + //} + /* else { + + int order1 = 0; + int order = 0; + for (std::set<int>::iterator i = active.begin(); + i != active.end(); i++) { + if (!further_levels_exist) + assign_const(stmt[*i].xform, dim, order1++); + else + assign_const(stmt[*i].xform, dim, order1); + + } + + if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1) && further_levels_exist) + setLexicalOrder(dim + 2, active, order, idxNames); + } + */ + } else { + int dummy_order = 0; + for (std::set<int>::iterator i = active.begin(); i != active.end(); + i++) + assign_const(stmt[*i].xform, dim, dummy_order++); + } + /*for (int i = 0; i < g2.vertex.size(); i++) + for (int j = i+1; j < g2.vertex.size(); j++) { + std::vector<DependenceVector> dvs = dep.getEdge(g2.vertex[i].first, g2.vertex[j].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { + g2.connect(i, j); + break; + } + dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { + g2.connect(j, i); + break; + } + } + + std::vector<std::set<int> > s2 = g2.packed_topoSort(); + + std::vector<std::set<int> > splitted_nonsingles; + for (int i = 0; i < s2.size(); i++) { + std::set<int> cur_scc; + for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) + cur_scc.insert(g2.vertex[*j].first); + splitted_nonsingles.push_back(cur_scc); + } + */ + //convert to dependence graph for grouped statements + //dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + /*int order = 0; + for (std::set<int>::iterator j = active.begin(); j != active.end(); + j++) { + std::set<int> continuous; + std::cout<< active.size()<<std::endl; + while (nonsingles.find(*j) != nonsingles.end() && j != active.end()) { + continuous.insert(*j); + j++; + } + + printf("continuous size is %d\n", continuous.size()); + + + + if (continuous.size() > 0) { + std::vector<std::set<int> > s = typed_fusion(continuous, dep, + dep_dim); + + for (int i = 0; i < s.size(); i++) { + for (std::set<int>::iterator l = s[i].begin(); + l != s[i].end(); l++) { + assign_const(stmt[*l].xform, dim + 2, order); + setLexicalOrder(dim + 2, s[i]); + } + order++; + } + } + + if (j != active.end()) { + assign_const(stmt[*j].xform, dim + 2, order); + + for (int k = dim + 4; k < stmt[*j].xform.n_out(); k += 2) + assign_const(stmt[*j].xform, k, 0); + order++; + } + + if( j == active.end()) + break; + } + */ + + + // assign lexical order + /*int order = starting_order; + for (int i = 0; i < s.size(); i++) { + // translate each SCC into original statements + std::set<int> cur_scc; + for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) + copy(s[i].begin(), s[i].end(), + inserter(cur_scc, cur_scc.begin())); + + // now assign the constant + for (std::set<int>::iterator j = cur_scc.begin(); + j != cur_scc.end(); j++) + assign_const(stmt[*j].xform, dim, order); + + if (cur_scc.size() > 1) + setLexicalOrder(dim + 2, cur_scc); + else if (cur_scc.size() == 1) { + int cur_stmt = *(cur_scc.begin()); + for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) + assign_const(stmt[cur_stmt].xform, j, 0); + } + + if (cur_scc.size() > 0) + order++; + } + */ + } +} + +void Loop::apply_xform() { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + apply_xform(active); +} + +void Loop::apply_xform(int stmt_num) { + std::set<int> active; + active.insert(stmt_num); + apply_xform(active); +} + +void Loop::apply_xform(std::set<int> &active) { + int max_n = 0; + + CG_outputBuilder *ocg = ir->builder(); + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].loop_level.size(); + if (n > max_n) + max_n = n; + + std::vector<int> lex = getLexicalOrder(*i); + + Relation mapping(2 * n + 1, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + mapping = Composition(mapping, stmt[*i].xform); + mapping.simplify(); + + // match omega input/output variables to variable names in the code + for (int j = 1; j <= stmt[*i].IS.n_set(); j++) + mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); + for (int j = 1; j <= n; j++) + mapping.name_output_var(j, + tmp_loop_var_name_prefix + + to_string(tmp_loop_var_name_counter + j - 1)); + mapping.setup_names(); + + Relation known = Extend_Set(copy(this->known), + mapping.n_out() - this->known.n_set()); + //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL)); + std::vector<std::string> loop_vars; + for (int j = 1; j <= stmt[*i].IS.n_set(); j++) + loop_vars.push_back(stmt[*i].IS.set_var(j)->name()); + std::vector<CG_outputRepr *> subs = output_substitutions(ocg, + Inverse(copy(mapping)), + std::vector<std::pair<CG_outputRepr *, int> >(mapping.n_out(), + std::make_pair(static_cast<CG_outputRepr *>(NULL), 0))); + stmt[*i].code = ocg->CreateSubstitutedStmt(0, stmt[*i].code, loop_vars, + subs); + stmt[*i].IS = Range(Restrict_Domain(mapping, stmt[*i].IS)); + stmt[*i].IS.simplify(); + + // replace original transformation relation with straight 1-1 mapping + mapping = Relation(n, 2 * n + 1); + f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 1; j <= 2 * n + 1; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_const(-lex[j - 1]); + } + stmt[*i].xform = mapping; + } + + tmp_loop_var_name_counter += max_n; +} + +void Loop::addKnown(const Relation &cond) { + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int n1 = this->known.n_set(); + + Relation r = copy(cond); + int n2 = r.n_set(); + + if (n1 < n2) + this->known = Extend_Set(this->known, n2 - n1); + else if (n1 > n2) + r = Extend_Set(r, n1 - n2); + + this->known = Intersection(this->known, r); +} + +void Loop::removeDependence(int stmt_num_from, int stmt_num_to) { + // check for sanity of parameters + if (stmt_num_from >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num_from)); + if (stmt_num_to >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num_to)); + + dep.disconnect(stmt_num_from, stmt_num_to); +} + +void Loop::dump() const { + for (int i = 0; i < stmt.size(); i++) { + std::vector<int> lex = getLexicalOrder(i); + std::cout << "s" << i + 1 << ": "; + for (int j = 0; j < stmt[i].loop_level.size(); j++) { + if (2 * j < lex.size()) + std::cout << lex[2 * j]; + switch (stmt[i].loop_level[j].type) { + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; + } + std::cout << ' '; + } + for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { + std::cout << lex[j]; + if (j != lex.size() - 1) + std::cout << ' '; + } + std::cout << std::endl; + } +} + +bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { + if (stmt.size() == 0) + return true; + + // check for sanity of parameters + for (int i = 0; i < stmt.size(); i++) { + if (stmt[i].loop_level.size() != num_dep_dim) + throw std::invalid_argument( + "nonsingular loop transformations must be applied to original perfect loop nest"); + for (int j = 0; j < stmt[i].loop_level.size(); j++) + if (stmt[i].loop_level[j].type != LoopLevelOriginal) + throw std::invalid_argument( + "nonsingular loop transformations must be applied to original perfect loop nest"); + } + if (T.size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + for (int i = 0; i < stmt.size(); i++) + if (T[i].size() != num_dep_dim + 1 && T[i].size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + // build relation from matrix + Relation mapping(2 * num_dep_dim + 1, 2 * num_dep_dim + 1); + F_And *f_root = mapping.add_and(); + for (int i = 0; i < num_dep_dim; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * (i + 1)), -1); + for (int j = 0; j < num_dep_dim; j++) + if (T[i][j] != 0) + h.update_coef(mapping.input_var(2 * (j + 1)), T[i][j]); + if (T[i].size() == num_dep_dim + 1) + h.update_const(T[i][num_dep_dim]); + } + for (int i = 1; i <= 2 * num_dep_dim + 1; i += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), -1); + h.update_coef(mapping.input_var(i), 1); + } + + // update transformation relations + for (int i = 0; i < stmt.size(); i++) + stmt[i].xform = Composition(copy(mapping), stmt[i].xform); + + // update dependence graph + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + switch (dv.type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(num_dep_dim), ubounds( + num_dep_dim); + for (int p = 0; p < num_dep_dim; p++) { + coef_t lb = 0; + coef_t ub = 0; + for (int q = 0; q < num_dep_dim; q++) { + if (T[p][q] > 0) { + if (lb == -posInfinity + || dv.lbounds[q] == -posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.lbounds[q]; + if (ub == posInfinity + || dv.ubounds[q] == posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.ubounds[q]; + } else if (T[p][q] < 0) { + if (lb == -posInfinity + || dv.ubounds[q] == posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.ubounds[q]; + if (ub == posInfinity + || dv.lbounds[q] == -posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.lbounds[q]; + } + } + if (T[p].size() == num_dep_dim + 1) { + if (lb != -posInfinity) + lb += T[p][num_dep_dim]; + if (ub != posInfinity) + ub += T[p][num_dep_dim]; + } + lbounds[p] = lb; + ubounds[p] = ub; + } + dv.lbounds = lbounds; + dv.ubounds = ubounds; + + break; + } + default: + ; + } + } + j->second = dvs; + } + + // set constant loop values + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + setLexicalOrder(0, active); + + return true; +} + + +bool Loop::is_dependence_valid_based_on_lex_order(int i, int j, + const DependenceVector &dv, bool before) { + std::vector<int> lex_i = getLexicalOrder(i); + std::vector<int> lex_j = getLexicalOrder(j); + int last_dim; + if (!dv.is_scalar_dependence) { + for (last_dim = 0; + last_dim < lex_i.size() && (lex_i[last_dim] == lex_j[last_dim]); + last_dim++) + ; + last_dim = last_dim / 2; + if (last_dim == 0) + return true; + + for (int i = 0; i < last_dim; i++) { + if (dv.lbounds[i] > 0) + return true; + else if (dv.lbounds[i] < 0) + return false; + } + } + if (before) + return true; + + return false; + +} + |