summaryrefslogtreecommitdiff
path: root/src/loop.cc
diff options
context:
space:
mode:
authorTuowen Zhao <ztuowen@gmail.com>2016-09-19 21:14:58 +0000
committerTuowen Zhao <ztuowen@gmail.com>2016-09-19 21:14:58 +0000
commit210f77d2c32f14d2e99577fd3c9842bb19d47e50 (patch)
tree5edb327c919b8309e301c3440fb6668a0075c8ef /src/loop.cc
parenta66ce5cd670c4d3c0dc449720f5bc45dd4c281b8 (diff)
downloadchill-210f77d2c32f14d2e99577fd3c9842bb19d47e50.tar.gz
chill-210f77d2c32f14d2e99577fd3c9842bb19d47e50.tar.bz2
chill-210f77d2c32f14d2e99577fd3c9842bb19d47e50.zip
Moved most modules into lib
Diffstat (limited to 'src/loop.cc')
-rw-r--r--src/loop.cc1859
1 files changed, 1859 insertions, 0 deletions
diff --git a/src/loop.cc b/src/loop.cc
new file mode 100644
index 0000000..f187a50
--- /dev/null
+++ b/src/loop.cc
@@ -0,0 +1,1859 @@
+/*****************************************************************************
+ Copyright (C) 2008 University of Southern California
+ Copyright (C) 2009-2010 University of Utah
+ All Rights Reserved.
+
+ Purpose:
+ Core loop transformation functionality.
+
+ Notes:
+ "level" (starting from 1) means loop level and it corresponds to "dim"
+ (starting from 0) in transformed iteration space [c_1,l_1,c_2,l_2,....,
+ c_n,l_n,c_(n+1)], e.g., l_2 is loop level 2 in generated code, dim 3
+ in transformed iteration space, and variable 4 in Omega relation.
+ All c's are constant numbers only and they will not show up as actual loops.
+ Formula:
+ dim = 2*level - 1
+ var = dim + 1
+
+ History:
+ 10/2005 Created by Chun Chen.
+ 09/2009 Expand tile functionality, -chun
+ 10/2009 Initialize unfusible loop nest without bailing out, -chun
+*****************************************************************************/
+
+#include <limits.h>
+#include <math.h>
+#include <code_gen/codegen.h>
+#include <code_gen/CG_utils.h>
+#include <iostream>
+#include <algorithm>
+#include <map>
+#include "loop.hh"
+#include "omegatools.hh"
+#include "irtools.hh"
+#include "chill_error.hh"
+#include <string.h>
+#include <list>
+using namespace omega;
+
+const std::string Loop::tmp_loop_var_name_prefix = std::string("chill_t"); // Manu:: In fortran, first character of a variable name must be a letter, so this change
+const std::string Loop::overflow_var_name_prefix = std::string("over");
+
+//-----------------------------------------------------------------------------
+// Class Loop
+//-----------------------------------------------------------------------------
+// --begin Anand: Added from CHiLL 0.2
+
+bool Loop::isInitialized() const {
+ return stmt.size() != 0 && !stmt[0].xform.is_null();
+}
+
+//--end Anand: added from CHiLL 0.2
+
+bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
+ std::vector<ir_tree_node *> &ir_stmt) {
+
+ ir_stmt = extract_ir_stmts(ir_tree);
+ stmt_nesting_level_.resize(ir_stmt.size());
+ std::vector<int> stmt_nesting_level(ir_stmt.size());
+ for (int i = 0; i < ir_stmt.size(); i++) {
+ ir_stmt[i]->payload = i;
+ int t = 0;
+ ir_tree_node *itn = ir_stmt[i];
+ while (itn->parent != NULL) {
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP)
+ t++;
+ }
+ stmt_nesting_level_[i] = t;
+ stmt_nesting_level[i] = t;
+ }
+
+ stmt = std::vector<Statement>(ir_stmt.size());
+ int n_dim = -1;
+ int max_loc;
+ //std::vector<std::string> index;
+ for (int i = 0; i < ir_stmt.size(); i++) {
+ int max_nesting_level = -1;
+ int loc;
+ for (int j = 0; j < ir_stmt.size(); j++)
+ if (stmt_nesting_level[j] > max_nesting_level) {
+ max_nesting_level = stmt_nesting_level[j];
+ loc = j;
+ }
+
+ // most deeply nested statement acting as a reference point
+ if (n_dim == -1) {
+ n_dim = max_nesting_level;
+ max_loc = loc;
+
+ index = std::vector<std::string>(n_dim);
+
+ ir_tree_node *itn = ir_stmt[loc];
+ int cur_dim = n_dim - 1;
+ while (itn->parent != NULL) {
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP) {
+ index[cur_dim] =
+ static_cast<IR_Loop *>(itn->content)->index()->name();
+ itn->payload = cur_dim--;
+ }
+ }
+ }
+
+ // align loops by names, temporary solution
+ ir_tree_node *itn = ir_stmt[loc];
+ int depth = stmt_nesting_level_[loc] - 1;
+ /* while (itn->parent != NULL) {
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) {
+ std::string name = static_cast<IR_Loop *>(itn->content)->index()->name();
+ for (int j = 0; j < n_dim; j++)
+ if (index[j] == name) {
+ itn->payload = j;
+ break;
+ }
+ if (itn->payload == -1)
+ throw loop_error("no complex alignment yet");
+ }
+ }
+ */
+ for (int t = depth; t >= 0; t--) {
+ int y = t;
+ ir_tree_node *itn = ir_stmt[loc];
+
+ while ((itn->parent != NULL) && (y >= 0)) {
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP)
+ y--;
+ }
+
+ if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) {
+ CG_outputBuilder *ocg = ir->builder();
+
+ itn->payload = depth - t;
+
+ CG_outputRepr *code =
+ static_cast<IR_Block *>(ir_stmt[loc]->content)->extract();
+
+ std::vector<CG_outputRepr *> index_expr;
+ std::vector<std::string> old_index;
+ CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]);
+ index_expr.push_back(repl);
+ old_index.push_back(
+ static_cast<IR_Loop *>(itn->content)->index()->name());
+ code = ocg->CreateSubstitutedStmt(0, code, old_index,
+ index_expr);
+
+ replace.insert(std::pair<int, CG_outputRepr*>(loc, code));
+ //stmt[loc].code = code;
+
+ }
+ }
+
+ // set relation variable names
+ Relation r(n_dim);
+ F_And *f_root = r.add_and();
+ itn = ir_stmt[loc];
+ int temp_depth = depth;
+ while (itn->parent != NULL) {
+
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP) {
+ r.name_set_var(itn->payload + 1, index[temp_depth]);
+
+ temp_depth--;
+ }
+ //static_cast<IR_Loop *>(itn->content)->index()->name());
+ }
+
+ /*while (itn->parent != NULL) {
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP)
+ r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name());
+ }*/
+
+ // extract information from loop/if structures
+ std::vector<bool> processed(n_dim, false);
+ std::vector<std::string> vars_to_be_reversed;
+ itn = ir_stmt[loc];
+ while (itn->parent != NULL) {
+ itn = itn->parent;
+
+ switch (itn->content->type()) {
+ case IR_CONTROL_LOOP: {
+ IR_Loop *lp = static_cast<IR_Loop *>(itn->content);
+ Variable_ID v = r.set_var(itn->payload + 1);
+ int c;
+
+ try {
+ c = lp->step_size();
+ if (c > 0) {
+ CG_outputRepr *lb = lp->lower_bound();
+ exp2formula(ir, r, f_root, freevar, lb, v, 's',
+ IR_COND_GE, true);
+ CG_outputRepr *ub = lp->upper_bound();
+ IR_CONDITION_TYPE cond = lp->stop_cond();
+ if (cond == IR_COND_LT || cond == IR_COND_LE)
+ exp2formula(ir, r, f_root, freevar, ub, v, 's',
+ cond, true);
+ else
+ throw ir_error("loop condition not supported");
+
+ } else if (c < 0) {
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *lb = lp->lower_bound();
+ lb = ocg->CreateMinus(NULL, lb);
+ exp2formula(ir, r, f_root, freevar, lb, v, 's',
+ IR_COND_GE, true);
+ CG_outputRepr *ub = lp->upper_bound();
+ ub = ocg->CreateMinus(NULL, ub);
+ IR_CONDITION_TYPE cond = lp->stop_cond();
+ if (cond == IR_COND_GE)
+ exp2formula(ir, r, f_root, freevar, ub, v, 's',
+ IR_COND_LE, true);
+ else if (cond == IR_COND_GT)
+ exp2formula(ir, r, f_root, freevar, ub, v, 's',
+ IR_COND_LT, true);
+ else
+ throw ir_error("loop condition not supported");
+
+ vars_to_be_reversed.push_back(lp->index()->name());
+ } else
+ throw ir_error("loop step size zero");
+ } catch (const ir_error &e) {
+ for (int i = 0; i < itn->children.size(); i++)
+ delete itn->children[i];
+ itn->children = std::vector<ir_tree_node *>();
+ itn->content = itn->content->convert();
+ return false;
+ }
+
+ if (abs(c) != 1) {
+ F_Exists *f_exists = f_root->add_exists();
+ Variable_ID e = f_exists->declare();
+ F_And *f_and = f_exists->add_and();
+ Stride_Handle h = f_and->add_stride(abs(c));
+ if (c > 0)
+ h.update_coef(e, 1);
+ else
+ h.update_coef(e, -1);
+ h.update_coef(v, -1);
+ CG_outputRepr *lb = lp->lower_bound();
+ exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ,
+ true);
+ }
+
+ processed[itn->payload] = true;
+ break;
+ }
+ case IR_CONTROL_IF: {
+ CG_outputRepr *cond =
+ static_cast<IR_If *>(itn->content)->condition();
+ try {
+ if (itn->payload % 2 == 1)
+ exp2constraint(ir, r, f_root, freevar, cond, true);
+ else {
+ F_Not *f_not = f_root->add_not();
+ F_And *f_and = f_not->add_and();
+ exp2constraint(ir, r, f_and, freevar, cond, true);
+ }
+ } catch (const ir_error &e) {
+ std::vector<ir_tree_node *> *t;
+ if (itn->parent == NULL)
+ t = &ir_tree;
+ else
+ t = &(itn->parent->children);
+ int id = itn->payload;
+ int i = t->size() - 1;
+ while (i >= 0) {
+ if ((*t)[i] == itn) {
+ for (int j = 0; j < itn->children.size(); j++)
+ delete itn->children[j];
+ itn->children = std::vector<ir_tree_node *>();
+ itn->content = itn->content->convert();
+ } else if ((*t)[i]->payload >> 1 == id >> 1) {
+ delete (*t)[i];
+ t->erase(t->begin() + i);
+ }
+ i--;
+ }
+ return false;
+ }
+
+ break;
+ }
+ default:
+ for (int i = 0; i < itn->children.size(); i++)
+ delete itn->children[i];
+ itn->children = std::vector<ir_tree_node *>();
+ itn->content = itn->content->convert();
+ return false;
+ }
+ }
+
+ // add information for missing loops
+ for (int j = 0; j < n_dim; j++)
+ if (!processed[j]) {
+ ir_tree_node *itn = ir_stmt[max_loc];
+ while (itn->parent != NULL) {
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP
+ && itn->payload == j)
+ break;
+ }
+
+ Variable_ID v = r.set_var(j + 1);
+ if (loc < max_loc) {
+
+ CG_outputBuilder *ocg = ir->builder();
+
+ CG_outputRepr *lb =
+ static_cast<IR_Loop *>(itn->content)->lower_bound();
+
+ exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ,
+ false);
+
+ /* if (ir->QueryExpOperation(
+ static_cast<IR_Loop *>(itn->content)->lower_bound())
+ == IR_OP_VARIABLE) {
+ IR_ScalarRef *ref =
+ static_cast<IR_ScalarRef *>(ir->Repr2Ref(
+ static_cast<IR_Loop *>(itn->content)->lower_bound()));
+ std::string name_ = ref->name();
+
+ for (int i = 0; i < index.size(); i++)
+ if (index[i] == name_) {
+ exp2formula(ir, r, f_root, freevar, lb, v, 's',
+ IR_COND_GE, false);
+
+ CG_outputRepr *ub =
+ static_cast<IR_Loop *>(itn->content)->upper_bound();
+ IR_CONDITION_TYPE cond =
+ static_cast<IR_Loop *>(itn->content)->stop_cond();
+ if (cond == IR_COND_LT || cond == IR_COND_LE)
+ exp2formula(ir, r, f_root, freevar, ub, v,
+ 's', cond, false);
+
+
+
+ }
+
+ }
+ */
+
+ } else { // loc > max_loc
+
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *ub =
+ static_cast<IR_Loop *>(itn->content)->upper_bound();
+
+ exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ,
+ false);
+ /*if (ir->QueryExpOperation(
+ static_cast<IR_Loop *>(itn->content)->upper_bound())
+ == IR_OP_VARIABLE) {
+ IR_ScalarRef *ref =
+ static_cast<IR_ScalarRef *>(ir->Repr2Ref(
+ static_cast<IR_Loop *>(itn->content)->upper_bound()));
+ std::string name_ = ref->name();
+
+ for (int i = 0; i < index.size(); i++)
+ if (index[i] == name_) {
+
+ CG_outputRepr *lb =
+ static_cast<IR_Loop *>(itn->content)->lower_bound();
+
+ exp2formula(ir, r, f_root, freevar, lb, v, 's',
+ IR_COND_GE, false);
+
+ CG_outputRepr *ub =
+ static_cast<IR_Loop *>(itn->content)->upper_bound();
+ IR_CONDITION_TYPE cond =
+ static_cast<IR_Loop *>(itn->content)->stop_cond();
+ if (cond == IR_COND_LT || cond == IR_COND_LE)
+ exp2formula(ir, r, f_root, freevar, ub, v,
+ 's', cond, false);
+
+
+ }
+ }
+ */
+ }
+ }
+
+ r.setup_names();
+ r.simplify();
+
+ // insert the statement
+ CG_outputBuilder *ocg = ir->builder();
+ std::vector<CG_outputRepr *> reverse_expr;
+ for (int j = 1; j <= vars_to_be_reversed.size(); j++) {
+ CG_outputRepr *repl = ocg->CreateIdent(vars_to_be_reversed[j]);
+ repl = ocg->CreateMinus(NULL, repl);
+ reverse_expr.push_back(repl);
+ }
+ CG_outputRepr *code =
+ static_cast<IR_Block *>(ir_stmt[loc]->content)->extract();
+ code = ocg->CreateSubstitutedStmt(0, code, vars_to_be_reversed,
+ reverse_expr);
+ stmt[loc].code = code;
+ stmt[loc].IS = r;
+ stmt[loc].loop_level = std::vector<LoopLevel>(n_dim);
+ stmt[loc].ir_stmt_node = ir_stmt[loc];
+ for (int i = 0; i < n_dim; i++) {
+ stmt[loc].loop_level[i].type = LoopLevelOriginal;
+ stmt[loc].loop_level[i].payload = i;
+ stmt[loc].loop_level[i].parallel_level = 0;
+ }
+
+ stmt_nesting_level[loc] = -1;
+ }
+
+ return true;
+}
+
+Loop::Loop(const IR_Control *control) {
+
+ last_compute_cgr_ = NULL;
+ last_compute_cg_ = NULL;
+
+ ir = const_cast<IR_Code *>(control->ir_);
+ init_code = NULL;
+ cleanup_code = NULL;
+ tmp_loop_var_name_counter = 1;
+ overflow_var_name_counter = 1;
+ known = Relation::True(0);
+
+ ir_tree = build_ir_tree(control->clone(), NULL);
+ // std::vector<ir_tree_node *> ir_stmt;
+
+ while (!init_loop(ir_tree, ir_stmt)) {
+ }
+
+
+
+ for (int i = 0; i < stmt.size(); i++) {
+ std::map<int, CG_outputRepr*>::iterator it = replace.find(i);
+
+ if (it != replace.end())
+ stmt[i].code = it->second;
+ else
+ stmt[i].code = stmt[i].code;
+ }
+
+ if (stmt.size() != 0)
+ dep = DependenceGraph(stmt[0].IS.n_set());
+ else
+ dep = DependenceGraph(0);
+ // init the dependence graph
+ for (int i = 0; i < stmt.size(); i++)
+ dep.insert();
+
+ for (int i = 0; i < stmt.size(); i++)
+ for (int j = i; j < stmt.size(); j++) {
+ std::pair<std::vector<DependenceVector>,
+ std::vector<DependenceVector> > dv = test_data_dependences(
+ ir, stmt[i].code, stmt[i].IS, stmt[j].code, stmt[j].IS,
+ freevar, index, stmt_nesting_level_[i],
+ stmt_nesting_level_[j]);
+
+ for (int k = 0; k < dv.first.size(); k++) {
+ if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k],
+ true))
+ dep.connect(i, j, dv.first[k]);
+ else {
+ dep.connect(j, i, dv.first[k].reverse());
+ }
+
+ }
+ for (int k = 0; k < dv.second.size(); k++)
+ if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k],
+ false))
+ dep.connect(j, i, dv.second[k]);
+ else {
+ dep.connect(i, j, dv.second[k].reverse());
+ }
+ // std::pair<std::vector<DependenceVector>,
+ // std::vector<DependenceVector> > dv_ = test_data_dependences(
+
+ }
+
+
+
+ // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0]
+ for (int i = 0; i < stmt.size(); i++) {
+ int n = stmt[i].IS.n_set();
+ stmt[i].xform = Relation(n, 2 * n + 1);
+ F_And *f_root = stmt[i].xform.add_and();
+
+ for (int j = 1; j <= n; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(stmt[i].xform.output_var(2 * j), 1);
+ h.update_coef(stmt[i].xform.input_var(j), -1);
+ }
+
+ for (int j = 1; j <= 2 * n + 1; j += 2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(stmt[i].xform.output_var(j), 1);
+ }
+ stmt[i].xform.simplify();
+ }
+
+ if (stmt.size() != 0)
+ num_dep_dim = stmt[0].IS.n_set();
+ else
+ num_dep_dim = 0;
+ // debug
+ /*for (int i = 0; i < stmt.size(); i++) {
+ std::cout << i << ": ";
+ //stmt[i].xform.print();
+ stmt[i].IS.print();
+ std::cout << std::endl;
+
+ }*/
+ //end debug
+}
+
+Loop::~Loop() {
+
+ delete last_compute_cgr_;
+ delete last_compute_cg_;
+
+ for (int i = 0; i < stmt.size(); i++)
+ if (stmt[i].code != NULL) {
+ stmt[i].code->clear();
+ delete stmt[i].code;
+ }
+
+ for (int i = 0; i < ir_tree.size(); i++)
+ delete ir_tree[i];
+
+ if (init_code != NULL) {
+ init_code->clear();
+ delete init_code;
+ }
+ if (cleanup_code != NULL) {
+ cleanup_code->clear();
+ delete cleanup_code;
+ }
+}
+
+int Loop::get_dep_dim_of(int stmt_num, int level) const {
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invaid statement " + to_string(stmt_num));
+
+ if (level < 1 || level > stmt[stmt_num].loop_level.size())
+ return -1;
+
+ int trip_count = 0;
+ while (true) {
+ switch (stmt[stmt_num].loop_level[level - 1].type) {
+ case LoopLevelOriginal:
+ return stmt[stmt_num].loop_level[level - 1].payload;
+ case LoopLevelTile:
+ level = stmt[stmt_num].loop_level[level - 1].payload;
+ if (level < 1)
+ return -1;
+ if (level > stmt[stmt_num].loop_level.size())
+ throw loop_error(
+ "incorrect loop level information for statement "
+ + to_string(stmt_num));
+ break;
+ default:
+ throw loop_error(
+ "unknown loop level information for statement "
+ + to_string(stmt_num));
+ }
+ trip_count++;
+ if (trip_count >= stmt[stmt_num].loop_level.size())
+ throw loop_error(
+ "incorrect loop level information for statement "
+ + to_string(stmt_num));
+ }
+}
+
+int Loop::get_last_dep_dim_before(int stmt_num, int level) const {
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invaid statement " + to_string(stmt_num));
+
+ if (level < 1)
+ return -1;
+ if (level > stmt[stmt_num].loop_level.size())
+ level = stmt[stmt_num].loop_level.size() + 1;
+
+ for (int i = level - 1; i >= 1; i--)
+ if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal)
+ return stmt[stmt_num].loop_level[i - 1].payload;
+
+ return -1;
+}
+
+void Loop::print_internal_loop_structure() const {
+ for (int i = 0; i < stmt.size(); i++) {
+ std::vector<int> lex = getLexicalOrder(i);
+ std::cout << "s" << i + 1 << ": ";
+ for (int j = 0; j < stmt[i].loop_level.size(); j++) {
+ if (2 * j < lex.size())
+ std::cout << lex[2 * j];
+ switch (stmt[i].loop_level[j].type) {
+ case LoopLevelOriginal:
+ std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ case LoopLevelTile:
+ std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ default:
+ std::cout << "(unknown)";
+ }
+ std::cout << ' ';
+ }
+ for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) {
+ std::cout << lex[j];
+ if (j != lex.size() - 1)
+ std::cout << ' ';
+ }
+ std::cout << std::endl;
+ }
+}
+
+CG_outputRepr *Loop::getCode(int effort) const {
+ const int m = stmt.size();
+ if (m == 0)
+ return NULL;
+ const int n = stmt[0].xform.n_out();
+
+ if (last_compute_cg_ == NULL) {
+ std::vector<Relation> IS(m);
+ std::vector<Relation> xforms(m);
+ for (int i = 0; i < m; i++) {
+ IS[i] = stmt[i].IS;
+ xforms[i] = stmt[i].xform;
+ }
+ Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
+
+ last_compute_cg_ = new CodeGen(xforms, IS, known);
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ }
+
+ if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {
+ delete last_compute_cgr_;
+ last_compute_cgr_ = last_compute_cg_->buildAST(effort);
+ last_compute_effort_ = effort;
+ }
+
+ std::vector<CG_outputRepr *> stmts(m);
+ for (int i = 0; i < m; i++)
+ stmts[i] = stmt[i].code;
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts);
+
+ if (init_code != NULL)
+ repr = ocg->StmtListAppend(init_code->clone(), repr);
+ if (cleanup_code != NULL)
+ repr = ocg->StmtListAppend(repr, cleanup_code->clone());
+
+ return repr;
+}
+
+void Loop::printCode(int effort) const {
+ const int m = stmt.size();
+ if (m == 0)
+ return;
+ const int n = stmt[0].xform.n_out();
+
+ if (last_compute_cg_ == NULL) {
+ std::vector<Relation> IS(m);
+ std::vector<Relation> xforms(m);
+ for (int i = 0; i < m; i++) {
+ IS[i] = stmt[i].IS;
+ xforms[i] = stmt[i].xform;
+ }
+ Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
+
+ last_compute_cg_ = new CodeGen(xforms, IS, known);
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ }
+
+ if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {
+ delete last_compute_cgr_;
+ last_compute_cgr_ = last_compute_cg_->buildAST(effort);
+ last_compute_effort_ = effort;
+ }
+
+ std::string repr = last_compute_cgr_->printString();
+ std::cout << repr << std::endl;
+}
+
+void Loop::printIterationSpace() const {
+ for (int i = 0; i < stmt.size(); i++) {
+ std::cout << "s" << i << ": ";
+ Relation r = getNewIS(i);
+ for (int j = 1; j <= r.n_inp(); j++)
+ r.name_input_var(j, CodeGen::loop_var_name_prefix + to_string(j));
+ r.setup_names();
+ r.print();
+ }
+}
+
+void Loop::printDependenceGraph() const {
+ if (dep.edgeCount() == 0)
+ std::cout << "no dependence exists" << std::endl;
+ else {
+ std::cout << "dependence graph:" << std::endl;
+ std::cout << dep;
+ }
+}
+
+Relation Loop::getNewIS(int stmt_num) const {
+ Relation result;
+
+ if (stmt[stmt_num].xform.is_null()) {
+ Relation known = Extend_Set(copy(this->known),
+ stmt[stmt_num].IS.n_set() - this->known.n_set());
+ result = Intersection(copy(stmt[stmt_num].IS), known);
+ } else {
+ Relation known = Extend_Set(copy(this->known),
+ stmt[stmt_num].xform.n_out() - this->known.n_set());
+ result = Intersection(
+ Range(
+ Restrict_Domain(copy(stmt[stmt_num].xform),
+ copy(stmt[stmt_num].IS))), known);
+ }
+
+ result.simplify(2, 4);
+
+ return result;
+}
+
+std::vector<Relation> Loop::getNewIS() const {
+ const int m = stmt.size();
+
+ std::vector<Relation> new_IS(m);
+ for (int i = 0; i < m; i++)
+ new_IS[i] = getNewIS(i);
+
+ return new_IS;
+}
+
+void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) {
+ // check sanity of parameters
+ if(stmt_num < 0)
+ throw std::invalid_argument("invalid statement " + to_string(stmt_num));
+
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *code = stmt[stmt_num].code;
+ ocg->CreatePragmaAttribute(code, level, pragmaText);
+}
+
+void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hint) {
+ // check sanity of parameters
+ if(stmt_num < 0)
+ throw std::invalid_argument("invalid statement " + to_string(stmt_num));
+
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *code = stmt[stmt_num].code;
+ ocg->CreatePrefetchAttribute(code, level, arrName, hint);
+}
+
+std::vector<int> Loop::getLexicalOrder(int stmt_num) const {
+ assert(stmt_num < stmt.size());
+
+ const int n = stmt[stmt_num].xform.n_out();
+ std::vector<int> lex(n, 0);
+
+ for (int i = 0; i < n; i += 2)
+ lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var);
+
+ return lex;
+}
+
+// find the sub loop nest specified by stmt_num and level,
+// only iteration space satisfiable statements returned.
+std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const {
+ assert(stmt_num >= 0 && stmt_num < stmt.size());
+ assert(level > 0 && level <= stmt[stmt_num].loop_level.size());
+
+ std::set<int> working;
+ for (int i = 0; i < stmt.size(); i++)
+ if (const_cast<Loop *>(this)->stmt[i].IS.is_upper_bound_satisfiable()
+ && stmt[i].loop_level.size() >= level)
+ working.insert(i);
+
+ for (int i = 1; i <= level; i++) {
+ int a = getLexicalOrder(stmt_num, i);
+ for (std::set<int>::iterator j = working.begin(); j != working.end();) {
+ int b = getLexicalOrder(*j, i);
+ if (b != a)
+ working.erase(j++);
+ else
+ ++j;
+ }
+ }
+
+ return working;
+}
+
+int Loop::getLexicalOrder(int stmt_num, int level) const {
+ assert(stmt_num >= 0 && stmt_num < stmt.size());
+ assert(level > 0 && level <= stmt[stmt_num].loop_level.size()+1);
+
+ Relation &r = const_cast<Loop *>(this)->stmt[stmt_num].xform;
+ for (EQ_Iterator e(r.single_conjunct()->EQs()); e; e++)
+ if (abs((*e).get_coef(r.output_var(2 * level - 1))) == 1) {
+ bool is_const = true;
+ for (Constr_Vars_Iter cvi(*e); cvi; cvi++)
+ if (cvi.curr_var() != r.output_var(2 * level - 1)) {
+ is_const = false;
+ break;
+ }
+ if (is_const) {
+ int t = static_cast<int>((*e).get_const());
+ return (*e).get_coef(r.output_var(2 * level - 1)) > 0 ? -t : t;
+ }
+ }
+
+ throw loop_error(
+ "can't find lexical order for statement " + to_string(stmt_num)
+ + "'s loop level " + to_string(level));
+}
+
+std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const {
+ const int m = stmt.size();
+
+ std::set<int> same_loops;
+ for (int i = 0; i < m; i++) {
+ if (dim < 0)
+ same_loops.insert(i);
+ else {
+ std::vector<int> a_lex = getLexicalOrder(i);
+ int j;
+ for (j = 0; j <= dim; j += 2)
+ if (lex[j] != a_lex[j])
+ break;
+ if (j > dim)
+ same_loops.insert(i);
+ }
+
+ }
+
+ return same_loops;
+}
+
+void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) {
+ const int m = stmt.size();
+
+ if (amount == 0)
+ return;
+
+ for (int i = 0; i < m; i++) {
+ std::vector<int> lex2 = getLexicalOrder(i);
+
+ bool need_shift = true;
+
+ for (int j = 0; j < dim; j++)
+ if (lex2[j] != lex[j]) {
+ need_shift = false;
+ break;
+ }
+
+ if (!need_shift)
+ continue;
+
+ if (amount > 0) {
+ if (lex2[dim] < lex[dim])
+ continue;
+ } else if (amount < 0) {
+ if (lex2[dim] > lex[dim])
+ continue;
+ }
+
+ assign_const(stmt[i].xform, dim, lex2[dim] + amount);
+ }
+}
+
+std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active,
+ int level) {
+
+ std::set<int> not_nested_at_this_level;
+ std::map<ir_tree_node*, std::set<int> > sorted_by_loop;
+ std::map<int, std::set<int> > sorted_by_lex_order;
+ std::vector<std::set<int> > to_return;
+ bool lex_order_already_set = false;
+ for (std::set<int>::iterator it = active.begin(); it != active.end();
+ it++) {
+
+ if (stmt[*it].ir_stmt_node == NULL)
+ lex_order_already_set = true;
+ }
+
+ if (lex_order_already_set) {
+
+ for (std::set<int>::iterator it = active.begin(); it != active.end();
+ it++) {
+ std::map<int, std::set<int> >::iterator it2 =
+ sorted_by_lex_order.find(
+ get_const(stmt[*it].xform, 2 * (level - 1),
+ Output_Var));
+
+ if (it2 != sorted_by_lex_order.end())
+ it2->second.insert(*it);
+ else {
+
+ std::set<int> to_insert;
+
+ to_insert.insert(*it);
+
+ sorted_by_lex_order.insert(
+ std::pair<int, std::set<int> >(
+ get_const(stmt[*it].xform, 2 * (level - 1),
+ Output_Var), to_insert));
+
+ }
+
+ }
+
+ for (std::map<int, std::set<int> >::iterator it2 =
+ sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end();
+ it2++)
+ to_return.push_back(it2->second);
+
+ } else {
+
+ for (std::set<int>::iterator it = active.begin(); it != active.end();
+ it++) {
+
+ ir_tree_node* itn = stmt[*it].ir_stmt_node;
+ itn = itn->parent;
+ while ((itn != NULL) && (itn->payload != level - 1))
+ itn = itn->parent;
+
+ if (itn == NULL)
+ not_nested_at_this_level.insert(*it);
+ else {
+ std::map<ir_tree_node*, std::set<int> >::iterator it2 =
+ sorted_by_loop.find(itn);
+
+ if (it2 != sorted_by_loop.end())
+ it2->second.insert(*it);
+ else {
+ std::set<int> to_insert;
+
+ to_insert.insert(*it);
+
+ sorted_by_loop.insert(
+ std::pair<ir_tree_node*, std::set<int> >(itn,
+ to_insert));
+
+ }
+
+ }
+
+ }
+ if (not_nested_at_this_level.size() > 0) {
+ for (std::set<int>::iterator it = not_nested_at_this_level.begin();
+ it != not_nested_at_this_level.end(); it++) {
+ std::set<int> temp;
+ temp.insert(*it);
+ to_return.push_back(temp);
+
+ }
+ }
+ for (std::map<ir_tree_node*, std::set<int> >::iterator it2 =
+ sorted_by_loop.begin(); it2 != sorted_by_loop.end(); it2++)
+ to_return.push_back(it2->second);
+ }
+ return to_return;
+}
+
+void update_successors(int n, int node_num[], int cant_fuse_with[],
+ Graph<std::set<int>, bool> &g, std::list<int> &work_list) {
+
+ std::set<int> disconnect;
+ for (Graph<std::set<int>, bool>::EdgeList::iterator i =
+ g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) {
+ int m = i->first;
+
+ if (node_num[m] != -1)
+ throw loop_error("Graph input for fusion has cycles not a DAG!!");
+
+ std::vector<bool> check_ = g.getEdge(n, m);
+
+ bool has_bad_edge_path = false;
+ for (int i = 0; i < check_.size(); i++)
+ if (!check_[i]) {
+ has_bad_edge_path = true;
+ break;
+ }
+ if (has_bad_edge_path)
+ cant_fuse_with[m] = std::max(cant_fuse_with[m], node_num[n]);
+ else
+ cant_fuse_with[m] = std::max(cant_fuse_with[m], cant_fuse_with[n]);
+ disconnect.insert(m);
+ }
+
+
+ for (std::set<int>::iterator i = disconnect.begin(); i != disconnect.end();
+ i++) {
+ g.disconnect(n, *i);
+
+ bool no_incoming_edges = true;
+ for (int j = 0; j < g.vertex.size(); j++)
+ if (j != *i)
+ if (g.hasEdge(j, *i)) {
+ no_incoming_edges = false;
+ break;
+ }
+
+
+ if (no_incoming_edges)
+ work_list.push_back(*i);
+ }
+
+}
+
+Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level(
+ std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim) {
+ Graph<std::set<int>, bool> g;
+
+ for (int i = 0; i < s.size(); i++)
+ g.insert(s[i]);
+
+ for (int i = 0; i < s.size(); i++) {
+
+ for (int j = i + 1; j < s.size(); j++) {
+ bool has_true_edge_i_to_j = false;
+ bool has_true_edge_j_to_i = false;
+ bool is_connected_i_to_j = false;
+ bool is_connected_j_to_i = false;
+ for (std::set<int>::iterator ii = s[i].begin(); ii != s[i].end();
+ ii++) {
+
+ for (std::set<int>::iterator jj = s[j].begin();
+ jj != s[j].end(); jj++) {
+
+ std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence()
+ || (dvs[k].is_data_dependence()
+ && dvs[k].has_been_carried_at(dep_dim))) {
+
+ if (dvs[k].is_data_dependence()
+ && dvs[k].has_negative_been_carried_at(
+ dep_dim)) {
+ //g.connect(i, j, false);
+ is_connected_i_to_j = true;
+ break;
+ } else {
+ //g.connect(i, j, true);
+
+ has_true_edge_i_to_j = true;
+ //break
+ }
+ }
+
+ //if (is_connected)
+
+ // break;
+ // if (has_true_edge_i_to_j && !is_connected_i_to_j)
+ // g.connect(i, j, true);
+ dvs = dep.getEdge(*jj, *ii);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence()
+ || (dvs[k].is_data_dependence()
+ && dvs[k].has_been_carried_at(dep_dim))) {
+
+ if (is_connected_i_to_j || has_true_edge_i_to_j)
+ throw loop_error(
+ "Graph input for fusion has cycles not a DAG!!");
+
+ if (dvs[k].is_data_dependence()
+ && dvs[k].has_negative_been_carried_at(
+ dep_dim)) {
+ //g.connect(i, j, false);
+ is_connected_j_to_i = true;
+ break;
+ } else {
+ //g.connect(i, j, true);
+
+ has_true_edge_j_to_i = true;
+ //break;
+ }
+ }
+
+ // if (is_connected)
+ //break;
+ // if (is_connected)
+ //break;
+ }
+
+
+ //if (is_connected)
+ // break;
+ }
+
+
+ if (is_connected_i_to_j)
+ g.connect(i, j, false);
+ else if (has_true_edge_i_to_j)
+ g.connect(i, j, true);
+
+ if (is_connected_j_to_i)
+ g.connect(j, i, false);
+ else if (has_true_edge_j_to_i)
+ g.connect(j, i, true);
+
+
+ }
+ }
+ return g;
+}
+
+std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g) {
+
+ bool roots[g.vertex.size()];
+
+ for (int i = 0; i < g.vertex.size(); i++)
+ roots[i] = true;
+
+ for (int i = 0; i < g.vertex.size(); i++)
+ for (int j = i + 1; j < g.vertex.size(); j++) {
+
+ if (g.hasEdge(i, j))
+ roots[j] = false;
+
+ if (g.hasEdge(j, i))
+ roots[i] = false;
+
+ }
+
+ std::list<int> work_list;
+ int cant_fuse_with[g.vertex.size()];
+ std::vector<std::set<int> > s;
+ //Each Fused set's representative node
+
+ int node_to_fused_nodes[g.vertex.size()];
+ int node_num[g.vertex.size()];
+ for (int i = 0; i < g.vertex.size(); i++) {
+ if (roots[i] == true)
+ work_list.push_back(i);
+ cant_fuse_with[i] = 0;
+ node_to_fused_nodes[i] = 0;
+ node_num[i] = -1;
+ }
+ // topological sort according to chun's permute algorithm
+ // std::vector<std::set<int> > s = g.topoSort();
+ std::vector<std::set<int> > s2 = g.topoSort();
+ if (work_list.empty() || (s2.size() != g.vertex.size())) {
+
+ std::cout << s2.size() << "\t" << g.vertex.size() << std::endl;
+ throw loop_error("Input for fusion not a DAG!!");
+
+
+ }
+ int fused_nodes_counter = 0;
+ while (!work_list.empty()) {
+ int n = work_list.front();
+ //int n_ = g.vertex[n].first;
+ work_list.pop_front();
+ int node;
+ if (cant_fuse_with[n] == 0)
+ node = 0;
+ else
+ node = cant_fuse_with[n];
+
+ if ((fused_nodes_counter != 0) && (node != fused_nodes_counter)) {
+ int rep_node = node_to_fused_nodes[node];
+ node_num[n] = node_num[rep_node];
+
+ try {
+ update_successors(n, node_num, cant_fuse_with, g, work_list);
+ } catch (const loop_error &e) {
+
+ throw loop_error(
+ "statements cannot be fused together due to negative dependence");
+
+
+ }
+ for (std::set<int>::iterator it = g.vertex[n].first.begin();
+ it != g.vertex[n].first.end(); it++)
+ s[node].insert(*it);
+ } else {
+ //std::set<int> new_node;
+ //new_node.insert(n_);
+ s.push_back(g.vertex[n].first);
+ node_to_fused_nodes[node] = n;
+ node_num[n] = ++node;
+ try {
+ update_successors(n, node_num, cant_fuse_with, g, work_list);
+ } catch (const loop_error &e) {
+
+ throw loop_error(
+ "statements cannot be fused together due to negative dependence");
+
+
+ }
+ fused_nodes_counter++;
+ }
+ }
+
+ return s;
+}
+
+void Loop::setLexicalOrder(int dim, const std::set<int> &active,
+ int starting_order, std::vector<std::vector<std::string> > idxNames) {
+ if (active.size() == 0)
+ return;
+
+ // check for sanity of parameters
+ if (dim < 0 || dim % 2 != 0)
+ throw std::invalid_argument(
+ "invalid constant loop level to set lexicographical order");
+ std::vector<int> lex;
+ int ref_stmt_num;
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ if ((*i) < 0 || (*i) >= stmt.size())
+ throw std::invalid_argument(
+ "invalid statement number " + to_string(*i));
+ if (dim >= stmt[*i].xform.n_out())
+ throw std::invalid_argument(
+ "invalid constant loop level to set lexicographical order");
+ if (i == active.begin()) {
+ lex = getLexicalOrder(*i);
+ ref_stmt_num = *i;
+ } else {
+ std::vector<int> lex2 = getLexicalOrder(*i);
+ for (int j = 0; j < dim; j += 2)
+ if (lex[j] != lex2[j])
+ throw std::invalid_argument(
+ "statements are not in the same sub loop nest");
+ }
+ }
+
+ // sepearate statements by current loop level types
+ int level = (dim + 2) / 2;
+ std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type;
+ std::set<int> active_by_no_level;
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ if (level > stmt[*i].loop_level.size())
+ active_by_no_level.insert(*i);
+ else
+ active_by_level_type[std::make_pair(
+ stmt[*i].loop_level[level - 1].type,
+ stmt[*i].loop_level[level - 1].payload)].insert(*i);
+ }
+
+ // further separate statements due to control dependences
+ std::vector<std::set<int> > active_by_level_type_splitted;
+ for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i =
+ active_by_level_type.begin(); i != active_by_level_type.end(); i++)
+ active_by_level_type_splitted.push_back(i->second);
+ for (std::set<int>::iterator i = active_by_no_level.begin();
+ i != active_by_no_level.end(); i++)
+ for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) {
+ std::set<int> controlled, not_controlled;
+ for (std::set<int>::iterator k =
+ active_by_level_type_splitted[j].begin();
+ k != active_by_level_type_splitted[j].end(); k++) {
+ std::vector<DependenceVector> dvs = dep.getEdge(*i, *k);
+ bool is_controlled = false;
+ for (int kk = 0; kk < dvs.size(); kk++)
+ if (dvs[kk].type = DEP_CONTROL) {
+ is_controlled = true;
+ break;
+ }
+ if (is_controlled)
+ controlled.insert(*k);
+ else
+ not_controlled.insert(*k);
+ }
+ if (controlled.size() != 0 && not_controlled.size() != 0) {
+ active_by_level_type_splitted.erase(
+ active_by_level_type_splitted.begin() + j);
+ active_by_level_type_splitted.push_back(controlled);
+ active_by_level_type_splitted.push_back(not_controlled);
+ }
+ }
+
+ // set lexical order separating loops with different loop types first
+ if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) {
+ int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1;
+
+ Graph<std::set<int>, Empty> g;
+ for (std::vector<std::set<int> >::iterator i =
+ active_by_level_type_splitted.begin();
+ i != active_by_level_type_splitted.end(); i++)
+ g.insert(*i);
+ for (std::set<int>::iterator i = active_by_no_level.begin();
+ i != active_by_no_level.end(); i++) {
+ std::set<int> t;
+ t.insert(*i);
+ g.insert(t);
+ }
+ for (int i = 0; i < g.vertex.size(); i++)
+ for (int j = i + 1; j < g.vertex.size(); j++) {
+ bool connected = false;
+ for (std::set<int>::iterator ii = g.vertex[i].first.begin();
+ ii != g.vertex[i].first.end(); ii++) {
+ for (std::set<int>::iterator jj = g.vertex[j].first.begin();
+ jj != g.vertex[j].first.end(); jj++) {
+ std::vector<DependenceVector> dvs = dep.getEdge(*ii,
+ *jj);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence()
+ || (dvs[k].is_data_dependence()
+ && !dvs[k].has_been_carried_before(
+ dep_dim))) {
+ g.connect(i, j);
+ connected = true;
+ break;
+ }
+ if (connected)
+ break;
+ }
+ if (connected)
+ break;
+ }
+ connected = false;
+ for (std::set<int>::iterator ii = g.vertex[i].first.begin();
+ ii != g.vertex[i].first.end(); ii++) {
+ for (std::set<int>::iterator jj = g.vertex[j].first.begin();
+ jj != g.vertex[j].first.end(); jj++) {
+ std::vector<DependenceVector> dvs = dep.getEdge(*jj,
+ *ii);
+ // find the sub loop nest specified by stmt_num and level,
+ // only iteration space satisfiable statements returned.
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence()
+ || (dvs[k].is_data_dependence()
+ && !dvs[k].has_been_carried_before(
+ dep_dim))) {
+ g.connect(j, i);
+ connected = true;
+ break;
+ }
+ if (connected)
+ break;
+ }
+ if (connected)
+ break;
+ }
+ }
+
+ std::vector<std::set<int> > s = g.topoSort();
+ if (s.size() != g.vertex.size())
+ throw loop_error(
+ "cannot separate statements with different loop types at loop level "
+ + to_string(level));
+
+ // assign lexical order
+ int order = starting_order;
+ for (int i = 0; i < s.size(); i++) {
+ std::set<int> &cur_scc = g.vertex[*(s[i].begin())].first;
+ int sz = cur_scc.size();
+ if (sz == 1) {
+ int cur_stmt = *(cur_scc.begin());
+ assign_const(stmt[cur_stmt].xform, dim, order);
+ for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2)
+ assign_const(stmt[cur_stmt].xform, j, 0);
+ order++;
+ } else {
+ setLexicalOrder(dim, cur_scc, order, idxNames);
+ order += sz;
+ }
+ }
+ }
+ // set lexical order seperating single iteration statements and loops
+ else {
+ std::set<int> true_singles;
+ std::set<int> nonsingles;
+ std::map<coef_t, std::set<int> > fake_singles;
+ std::set<int> fake_singles_;
+
+ // sort out statements that do not require loops
+ for (std::set<int>::iterator i = active.begin(); i != active.end();
+ i++) {
+ Relation cur_IS = getNewIS(*i);
+ if (is_single_iteration(cur_IS, dim + 1)) {
+ bool is_all_single = true;
+ for (int j = dim + 3; j < stmt[*i].xform.n_out(); j += 2)
+ if (!is_single_iteration(cur_IS, j)) {
+ is_all_single = false;
+ break;
+ }
+ if (is_all_single)
+ true_singles.insert(*i);
+ else {
+ fake_singles_.insert(*i);
+ try {
+ fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert(
+ *i);
+ } catch (const std::exception &e) {
+ fake_singles[posInfinity].insert(*i);
+ }
+ }
+ } else
+ nonsingles.insert(*i);
+ }
+
+
+ // split nonsingles forcibly according to negative dependences present (loop unfusible)
+ int dep_dim = get_dep_dim_of(ref_stmt_num, level);
+
+ if (dim < stmt[ref_stmt_num].xform.n_out() - 1) {
+
+ bool dummy_level_found = false;
+
+ std::vector<std::set<int> > s;
+
+ s = sort_by_same_loops(active, level);
+ bool further_levels_exist = false;
+
+ if (!idxNames.empty())
+ if (level <= idxNames[ref_stmt_num].size())
+ if (idxNames[ref_stmt_num][level - 1].length() == 0) {
+ // && s.size() == 1) {
+ int order1 = 0;
+ dummy_level_found = true;
+
+ for (int i = level; i < idxNames[ref_stmt_num].size();
+ i++)
+ if (idxNames[ref_stmt_num][i].length() > 0)
+ further_levels_exist = true;
+
+ }
+
+ //if (!dummy_level_found) {
+
+ if (s.size() > 1) {
+
+ Graph<std::set<int>, bool> g = construct_induced_graph_at_level(
+ s, dep, dep_dim);
+ s = typed_fusion(g);
+ }
+ int order = 0;
+ for (int i = 0; i < s.size(); i++) {
+
+ for (std::set<int>::iterator it = s[i].begin();
+ it != s[i].end(); it++)
+ assign_const(stmt[*it].xform, dim, order);
+
+ if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1))
+ setLexicalOrder(dim + 2, s[i], order, idxNames);
+
+ order++;
+ }
+ //}
+ /* else {
+
+ int order1 = 0;
+ int order = 0;
+ for (std::set<int>::iterator i = active.begin();
+ i != active.end(); i++) {
+ if (!further_levels_exist)
+ assign_const(stmt[*i].xform, dim, order1++);
+ else
+ assign_const(stmt[*i].xform, dim, order1);
+
+ }
+
+ if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1) && further_levels_exist)
+ setLexicalOrder(dim + 2, active, order, idxNames);
+ }
+ */
+ } else {
+ int dummy_order = 0;
+ for (std::set<int>::iterator i = active.begin(); i != active.end();
+ i++)
+ assign_const(stmt[*i].xform, dim, dummy_order++);
+ }
+ /*for (int i = 0; i < g2.vertex.size(); i++)
+ for (int j = i+1; j < g2.vertex.size(); j++) {
+ std::vector<DependenceVector> dvs = dep.getEdge(g2.vertex[i].first, g2.vertex[j].first);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence() ||
+ (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) {
+ g2.connect(i, j);
+ break;
+ }
+ dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence() ||
+ (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) {
+ g2.connect(j, i);
+ break;
+ }
+ }
+
+ std::vector<std::set<int> > s2 = g2.packed_topoSort();
+
+ std::vector<std::set<int> > splitted_nonsingles;
+ for (int i = 0; i < s2.size(); i++) {
+ std::set<int> cur_scc;
+ for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++)
+ cur_scc.insert(g2.vertex[*j].first);
+ splitted_nonsingles.push_back(cur_scc);
+ }
+ */
+ //convert to dependence graph for grouped statements
+ //dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1;
+ /*int order = 0;
+ for (std::set<int>::iterator j = active.begin(); j != active.end();
+ j++) {
+ std::set<int> continuous;
+ std::cout<< active.size()<<std::endl;
+ while (nonsingles.find(*j) != nonsingles.end() && j != active.end()) {
+ continuous.insert(*j);
+ j++;
+ }
+
+ printf("continuous size is %d\n", continuous.size());
+
+
+
+ if (continuous.size() > 0) {
+ std::vector<std::set<int> > s = typed_fusion(continuous, dep,
+ dep_dim);
+
+ for (int i = 0; i < s.size(); i++) {
+ for (std::set<int>::iterator l = s[i].begin();
+ l != s[i].end(); l++) {
+ assign_const(stmt[*l].xform, dim + 2, order);
+ setLexicalOrder(dim + 2, s[i]);
+ }
+ order++;
+ }
+ }
+
+ if (j != active.end()) {
+ assign_const(stmt[*j].xform, dim + 2, order);
+
+ for (int k = dim + 4; k < stmt[*j].xform.n_out(); k += 2)
+ assign_const(stmt[*j].xform, k, 0);
+ order++;
+ }
+
+ if( j == active.end())
+ break;
+ }
+ */
+
+
+ // assign lexical order
+ /*int order = starting_order;
+ for (int i = 0; i < s.size(); i++) {
+ // translate each SCC into original statements
+ std::set<int> cur_scc;
+ for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++)
+ copy(s[i].begin(), s[i].end(),
+ inserter(cur_scc, cur_scc.begin()));
+
+ // now assign the constant
+ for (std::set<int>::iterator j = cur_scc.begin();
+ j != cur_scc.end(); j++)
+ assign_const(stmt[*j].xform, dim, order);
+
+ if (cur_scc.size() > 1)
+ setLexicalOrder(dim + 2, cur_scc);
+ else if (cur_scc.size() == 1) {
+ int cur_stmt = *(cur_scc.begin());
+ for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2)
+ assign_const(stmt[cur_stmt].xform, j, 0);
+ }
+
+ if (cur_scc.size() > 0)
+ order++;
+ }
+ */
+ }
+}
+
+void Loop::apply_xform() {
+ std::set<int> active;
+ for (int i = 0; i < stmt.size(); i++)
+ active.insert(i);
+ apply_xform(active);
+}
+
+void Loop::apply_xform(int stmt_num) {
+ std::set<int> active;
+ active.insert(stmt_num);
+ apply_xform(active);
+}
+
+void Loop::apply_xform(std::set<int> &active) {
+ int max_n = 0;
+
+ CG_outputBuilder *ocg = ir->builder();
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ int n = stmt[*i].loop_level.size();
+ if (n > max_n)
+ max_n = n;
+
+ std::vector<int> lex = getLexicalOrder(*i);
+
+ Relation mapping(2 * n + 1, n);
+ F_And *f_root = mapping.add_and();
+ for (int j = 1; j <= n; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(j), 1);
+ h.update_coef(mapping.input_var(2 * j), -1);
+ }
+ mapping = Composition(mapping, stmt[*i].xform);
+ mapping.simplify();
+
+ // match omega input/output variables to variable names in the code
+ for (int j = 1; j <= stmt[*i].IS.n_set(); j++)
+ mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name());
+ for (int j = 1; j <= n; j++)
+ mapping.name_output_var(j,
+ tmp_loop_var_name_prefix
+ + to_string(tmp_loop_var_name_counter + j - 1));
+ mapping.setup_names();
+
+ Relation known = Extend_Set(copy(this->known),
+ mapping.n_out() - this->known.n_set());
+ //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL));
+ std::vector<std::string> loop_vars;
+ for (int j = 1; j <= stmt[*i].IS.n_set(); j++)
+ loop_vars.push_back(stmt[*i].IS.set_var(j)->name());
+ std::vector<CG_outputRepr *> subs = output_substitutions(ocg,
+ Inverse(copy(mapping)),
+ std::vector<std::pair<CG_outputRepr *, int> >(mapping.n_out(),
+ std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)));
+ stmt[*i].code = ocg->CreateSubstitutedStmt(0, stmt[*i].code, loop_vars,
+ subs);
+ stmt[*i].IS = Range(Restrict_Domain(mapping, stmt[*i].IS));
+ stmt[*i].IS.simplify();
+
+ // replace original transformation relation with straight 1-1 mapping
+ mapping = Relation(n, 2 * n + 1);
+ f_root = mapping.add_and();
+ for (int j = 1; j <= n; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(2 * j), 1);
+ h.update_coef(mapping.input_var(j), -1);
+ }
+ for (int j = 1; j <= 2 * n + 1; j += 2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(j), 1);
+ h.update_const(-lex[j - 1]);
+ }
+ stmt[*i].xform = mapping;
+ }
+
+ tmp_loop_var_name_counter += max_n;
+}
+
+void Loop::addKnown(const Relation &cond) {
+
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+
+ int n1 = this->known.n_set();
+
+ Relation r = copy(cond);
+ int n2 = r.n_set();
+
+ if (n1 < n2)
+ this->known = Extend_Set(this->known, n2 - n1);
+ else if (n1 > n2)
+ r = Extend_Set(r, n1 - n2);
+
+ this->known = Intersection(this->known, r);
+}
+
+void Loop::removeDependence(int stmt_num_from, int stmt_num_to) {
+ // check for sanity of parameters
+ if (stmt_num_from >= stmt.size())
+ throw std::invalid_argument(
+ "invalid statement number " + to_string(stmt_num_from));
+ if (stmt_num_to >= stmt.size())
+ throw std::invalid_argument(
+ "invalid statement number " + to_string(stmt_num_to));
+
+ dep.disconnect(stmt_num_from, stmt_num_to);
+}
+
+void Loop::dump() const {
+ for (int i = 0; i < stmt.size(); i++) {
+ std::vector<int> lex = getLexicalOrder(i);
+ std::cout << "s" << i + 1 << ": ";
+ for (int j = 0; j < stmt[i].loop_level.size(); j++) {
+ if (2 * j < lex.size())
+ std::cout << lex[2 * j];
+ switch (stmt[i].loop_level[j].type) {
+ case LoopLevelOriginal:
+ std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ case LoopLevelTile:
+ std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ default:
+ std::cout << "(unknown)";
+ }
+ std::cout << ' ';
+ }
+ for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) {
+ std::cout << lex[j];
+ if (j != lex.size() - 1)
+ std::cout << ' ';
+ }
+ std::cout << std::endl;
+ }
+}
+
+bool Loop::nonsingular(const std::vector<std::vector<int> > &T) {
+ if (stmt.size() == 0)
+ return true;
+
+ // check for sanity of parameters
+ for (int i = 0; i < stmt.size(); i++) {
+ if (stmt[i].loop_level.size() != num_dep_dim)
+ throw std::invalid_argument(
+ "nonsingular loop transformations must be applied to original perfect loop nest");
+ for (int j = 0; j < stmt[i].loop_level.size(); j++)
+ if (stmt[i].loop_level[j].type != LoopLevelOriginal)
+ throw std::invalid_argument(
+ "nonsingular loop transformations must be applied to original perfect loop nest");
+ }
+ if (T.size() != num_dep_dim)
+ throw std::invalid_argument("invalid transformation matrix");
+ for (int i = 0; i < stmt.size(); i++)
+ if (T[i].size() != num_dep_dim + 1 && T[i].size() != num_dep_dim)
+ throw std::invalid_argument("invalid transformation matrix");
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+ // build relation from matrix
+ Relation mapping(2 * num_dep_dim + 1, 2 * num_dep_dim + 1);
+ F_And *f_root = mapping.add_and();
+ for (int i = 0; i < num_dep_dim; i++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(2 * (i + 1)), -1);
+ for (int j = 0; j < num_dep_dim; j++)
+ if (T[i][j] != 0)
+ h.update_coef(mapping.input_var(2 * (j + 1)), T[i][j]);
+ if (T[i].size() == num_dep_dim + 1)
+ h.update_const(T[i][num_dep_dim]);
+ }
+ for (int i = 1; i <= 2 * num_dep_dim + 1; i += 2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(i), -1);
+ h.update_coef(mapping.input_var(i), 1);
+ }
+
+ // update transformation relations
+ for (int i = 0; i < stmt.size(); i++)
+ stmt[i].xform = Composition(copy(mapping), stmt[i].xform);
+
+ // update dependence graph
+ for (int i = 0; i < dep.vertex.size(); i++)
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();
+ j++) {
+ std::vector<DependenceVector> dvs = j->second;
+ for (int k = 0; k < dvs.size(); k++) {
+ DependenceVector &dv = dvs[k];
+ switch (dv.type) {
+ case DEP_W2R:
+ case DEP_R2W:
+ case DEP_W2W:
+ case DEP_R2R: {
+ std::vector<coef_t> lbounds(num_dep_dim), ubounds(
+ num_dep_dim);
+ for (int p = 0; p < num_dep_dim; p++) {
+ coef_t lb = 0;
+ coef_t ub = 0;
+ for (int q = 0; q < num_dep_dim; q++) {
+ if (T[p][q] > 0) {
+ if (lb == -posInfinity
+ || dv.lbounds[q] == -posInfinity)
+ lb = -posInfinity;
+ else
+ lb += T[p][q] * dv.lbounds[q];
+ if (ub == posInfinity
+ || dv.ubounds[q] == posInfinity)
+ ub = posInfinity;
+ else
+ ub += T[p][q] * dv.ubounds[q];
+ } else if (T[p][q] < 0) {
+ if (lb == -posInfinity
+ || dv.ubounds[q] == posInfinity)
+ lb = -posInfinity;
+ else
+ lb += T[p][q] * dv.ubounds[q];
+ if (ub == posInfinity
+ || dv.lbounds[q] == -posInfinity)
+ ub = posInfinity;
+ else
+ ub += T[p][q] * dv.lbounds[q];
+ }
+ }
+ if (T[p].size() == num_dep_dim + 1) {
+ if (lb != -posInfinity)
+ lb += T[p][num_dep_dim];
+ if (ub != posInfinity)
+ ub += T[p][num_dep_dim];
+ }
+ lbounds[p] = lb;
+ ubounds[p] = ub;
+ }
+ dv.lbounds = lbounds;
+ dv.ubounds = ubounds;
+
+ break;
+ }
+ default:
+ ;
+ }
+ }
+ j->second = dvs;
+ }
+
+ // set constant loop values
+ std::set<int> active;
+ for (int i = 0; i < stmt.size(); i++)
+ active.insert(i);
+ setLexicalOrder(0, active);
+
+ return true;
+}
+
+
+bool Loop::is_dependence_valid_based_on_lex_order(int i, int j,
+ const DependenceVector &dv, bool before) {
+ std::vector<int> lex_i = getLexicalOrder(i);
+ std::vector<int> lex_j = getLexicalOrder(j);
+ int last_dim;
+ if (!dv.is_scalar_dependence) {
+ for (last_dim = 0;
+ last_dim < lex_i.size() && (lex_i[last_dim] == lex_j[last_dim]);
+ last_dim++)
+ ;
+ last_dim = last_dim / 2;
+ if (last_dim == 0)
+ return true;
+
+ for (int i = 0; i < last_dim; i++) {
+ if (dv.lbounds[i] > 0)
+ return true;
+ else if (dv.lbounds[i] < 0)
+ return false;
+ }
+ }
+ if (before)
+ return true;
+
+ return false;
+
+}
+