diff options
Diffstat (limited to 'loop_modified.cc')
-rw-r--r-- | loop_modified.cc | 4234 |
1 files changed, 0 insertions, 4234 deletions
diff --git a/loop_modified.cc b/loop_modified.cc deleted file mode 100644 index 9686f6d..0000000 --- a/loop_modified.cc +++ /dev/null @@ -1,4234 +0,0 @@ -/***************************************************************************** - Copyright (C) 2008 University of Southern California - Copyright (C) 2009-2010 University of Utah - All Rights Reserved. - - Purpose: - Core loop transformation functionality. - - Notes: - "level" (starting from 1) means loop level and it corresponds to "dim" - (starting from 0) in transformed iteration space [c_1,l_1,c_2,l_2,...., - c_n,l_n,c_(n+1)], e.g., l_2 is loop level 2 in generated code, dim 3 - in transformed iteration space, and variable 4 in Omega relation. - All c's are constant numbers only and they will not show up as actual loops. - Formula: - dim = 2*level - 1 - var = dim + 1 - - History: - 10/2005 Created by Chun Chen. - 09/2009 Expand tile functionality, -chun - 10/2009 Initialize unfusible loop nest without bailing out, -chun -*****************************************************************************/ - -#include <limits.h> -#include <math.h> -#include <code_gen/code_gen.h> -#include <code_gen/CG_outputBuilder.h> -#include <code_gen/output_repr.h> -#include <iostream> -#include <map> -#include "loop.hh" -#include "omegatools.hh" -#include "irtools.hh" -#include "chill_error.hh" -#include <string.h> -using namespace omega; - -const std::string Loop::tmp_loop_var_name_prefix = std::string("_t"); -const std::string Loop::overflow_var_name_prefix = std::string("over"); - -//----------------------------------------------------------------------------- -// Class Loop -//----------------------------------------------------------------------------- - -bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, - std::vector<ir_tree_node *> &ir_stmt) { - ir_stmt = extract_ir_stmts(ir_tree); - stmt_nesting_level_.resize(ir_stmt.size()); - std::vector<int> stmt_nesting_level(ir_stmt.size()); - for (int i = 0; i < ir_stmt.size(); i++) { - ir_stmt[i]->payload = i; - int t = 0; - ir_tree_node *itn = ir_stmt[i]; - while (itn->parent != NULL) { - itn = itn->parent; - if (itn->content->type() == IR_CONTROL_LOOP) - t++; - } - stmt_nesting_level_[i] = t; - stmt_nesting_level[i] = t; - } - - stmt = std::vector<Statement>(ir_stmt.size()); - int n_dim = -1; - int max_loc; - //std::vector<std::string> index; - for (int i = 0; i < ir_stmt.size(); i++) { - int max_nesting_level = -1; - int loc; - for (int j = 0; j < ir_stmt.size(); j++) - if (stmt_nesting_level[j] > max_nesting_level) { - max_nesting_level = stmt_nesting_level[j]; - loc = j; - } - - // most deeply nested statement acting as a reference point - if (n_dim == -1) { - n_dim = max_nesting_level; - max_loc = loc; - - index = std::vector<std::string>(n_dim); - - ir_tree_node *itn = ir_stmt[loc]; - int cur_dim = n_dim - 1; - while (itn->parent != NULL) { - itn = itn->parent; - if (itn->content->type() == IR_CONTROL_LOOP) { - index[cur_dim] = - static_cast<IR_Loop *>(itn->content)->index()->name(); - itn->payload = cur_dim--; - } - } - } - - // align loops by names, temporary solution - ir_tree_node *itn = ir_stmt[loc]; - int depth = stmt_nesting_level_[loc] - 1; - /* while (itn->parent != NULL) { - itn = itn->parent; - if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { - std::string name = static_cast<IR_Loop *>(itn->content)->index()->name(); - for (int j = 0; j < n_dim; j++) - if (index[j] == name) { - itn->payload = j; - break; - } - if (itn->payload == -1) - throw loop_error("no complex alignment yet"); - } - } - */ - for (int t = depth; t >= 0; t--) { - int y = t; - ir_tree_node *itn = ir_stmt[loc]; - - while ((itn->parent != NULL) && (y >= 0)) { - itn = itn->parent; - if (itn->content->type() == IR_CONTROL_LOOP) - y--; - } - - if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { - CG_outputBuilder *ocg = ir_->builder(); - - itn->payload = depth - t; - - CG_outputRepr *code = - static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); - - Tuple<CG_outputRepr *> index_expr; - Tuple<std::string> old_index; - CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]); - index_expr.append(repl); - old_index.append( - static_cast<IR_Loop *>(itn->content)->index()->name()); - - code = ocg->CreatePlaceHolder(0, code, index_expr, old_index); - replace.insert(std::pair<int, CG_outputRepr*>(loc, code)); - //stmt[loc].code = code; - - } - } - - // set relation variable names - Relation r(n_dim); - F_And *f_root = r.add_and(); - itn = ir_stmt[loc]; - int temp_depth = depth; - while (itn->parent != NULL) { - - itn = itn->parent; - if (itn->content->type() == IR_CONTROL_LOOP) { - r.name_set_var(itn->payload + 1, index[temp_depth]); - - temp_depth--; - } - //static_cast<IR_Loop *>(itn->content)->index()->name()); - } - - /*while (itn->parent != NULL) { - itn = itn->parent; - if (itn->content->type() == IR_CONTROL_LOOP) - r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name()); - }*/ - - // extract information from loop/if structures - std::vector<bool> processed(n_dim, false); - Tuple<std::string> vars_to_be_reversed; - itn = ir_stmt[loc]; - while (itn->parent != NULL) { - itn = itn->parent; - - switch (itn->content->type()) { - case IR_CONTROL_LOOP: { - IR_Loop *lp = static_cast<IR_Loop *>(itn->content); - Variable_ID v = r.set_var(itn->payload + 1); - int c; - - try { - c = lp->step_size(); - if (c > 0) { - CG_outputRepr *lb = lp->lower_bound(); - exp2formula(ir, r, f_root, freevar, lb, v, 's', - IR_COND_GE, true); - CG_outputRepr *ub = lp->upper_bound(); - IR_CONDITION_TYPE cond = lp->stop_cond(); - if (cond == IR_COND_LT || cond == IR_COND_LE) - exp2formula(ir, r, f_root, freevar, ub, v, 's', - cond, true); - else - throw ir_error("loop condition not supported"); - - } else if (c < 0) { - CG_outputBuilder *ocg = ir->builder(); - CG_outputRepr *lb = lp->lower_bound(); - lb = ocg->CreateMinus(NULL, lb); - exp2formula(ir, r, f_root, freevar, lb, v, 's', - IR_COND_GE, true); - CG_outputRepr *ub = lp->upper_bound(); - ub = ocg->CreateMinus(NULL, ub); - IR_CONDITION_TYPE cond = lp->stop_cond(); - if (cond == IR_COND_GE) - exp2formula(ir, r, f_root, freevar, ub, v, 's', - IR_COND_LE, true); - else if (cond == IR_COND_GT) - exp2formula(ir, r, f_root, freevar, ub, v, 's', - IR_COND_LT, true); - else - throw ir_error("loop condition not supported"); - - vars_to_be_reversed.append(lp->index()->name()); - } else - throw ir_error("loop step size zero"); - } catch (const ir_error &e) { - for (int i = 0; i < itn->children.size(); i++) - delete itn->children[i]; - itn->children = std::vector<ir_tree_node *>(); - itn->content = itn->content->convert(); - return false; - } - - if (abs(c) != 1) { - F_Exists *f_exists = f_root->add_exists(); - Variable_ID e = f_exists->declare(); - F_And *f_and = f_exists->add_and(); - Stride_Handle h = f_and->add_stride(abs(c)); - if (c > 0) - h.update_coef(e, 1); - else - h.update_coef(e, -1); - h.update_coef(v, -1); - CG_outputRepr *lb = lp->lower_bound(); - exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, - true); - } - - processed[itn->payload] = true; - break; - } - case IR_CONTROL_IF: { - CG_outputRepr *cond = - static_cast<IR_If *>(itn->content)->condition(); - try { - if (itn->payload % 2 == 1) - exp2constraint(ir, r, f_root, freevar, cond, true); - else { - F_Not *f_not = f_root->add_not(); - F_And *f_and = f_not->add_and(); - exp2constraint(ir, r, f_and, freevar, cond, true); - } - } catch (const ir_error &e) { - std::vector<ir_tree_node *> *t; - if (itn->parent == NULL) - t = &ir_tree; - else - t = &(itn->parent->children); - int id = itn->payload; - int i = t->size() - 1; - while (i >= 0) { - if ((*t)[i] == itn) { - for (int j = 0; j < itn->children.size(); j++) - delete itn->children[j]; - itn->children = std::vector<ir_tree_node *>(); - itn->content = itn->content->convert(); - } else if ((*t)[i]->payload >> 1 == id >> 1) { - delete (*t)[i]; - t->erase(t->begin() + i); - } - i--; - } - return false; - } - - break; - } - default: - for (int i = 0; i < itn->children.size(); i++) - delete itn->children[i]; - itn->children = std::vector<ir_tree_node *>(); - itn->content = itn->content->convert(); - return false; - } - } - - // add information for missing loops - for (int j = 0; j < n_dim; j++) - if (!processed[j]) { - ir_tree_node *itn = ir_stmt[max_loc]; - while (itn->parent != NULL) { - itn = itn->parent; - if (itn->content->type() == IR_CONTROL_LOOP - && itn->payload == j) - break; - } - - Variable_ID v = r.set_var(j + 1); - if (loc < max_loc) { - CG_outputRepr *lb = - static_cast<IR_Loop *>(itn->content)->lower_bound(); - exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ, - true); - } else { // loc > max_loc - CG_outputRepr *ub = - static_cast<IR_Loop *>(itn->content)->upper_bound(); - exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ, - true); - } - } - - r.setup_names(); - r.simplify(); - - // insert the statement - CG_outputBuilder *ocg = ir->builder(); - Tuple<CG_outputRepr *> reverse_expr; - for (int j = 1; j <= vars_to_be_reversed.size(); j++) { - CG_outputRepr *repl = ocg->CreateIdent(vars_to_be_reversed[j]); - repl = ocg->CreateMinus(NULL, repl); - reverse_expr.append(repl); - } - CG_outputRepr *code = - static_cast<IR_Block *>(ir_stmt[loc]->content)->original(); - code = ocg->CreatePlaceHolder(0, code, reverse_expr, - vars_to_be_reversed); - stmt[loc].code = code; - stmt[loc].IS = r; - stmt[loc].loop_level = std::vector<LoopLevel>(n_dim); - for (int i = 0; i < n_dim; i++) { - stmt[loc].loop_level[i].type = LoopLevelOriginal; - stmt[loc].loop_level[i].payload = i; - stmt[loc].loop_level[i].parallel_level = 0; - } - - stmt_nesting_level[loc] = -1; - } - - return true; -} - -Loop::Loop(const IR_Control *control) { - ir = const_cast<IR_Code *>(control->ir_); - init_code = NULL; - cleanup_code = NULL; - tmp_loop_var_name_counter = 1; - overflow_var_name_counter = 1; - known = Relation::True(0); - - std::vector<ir_tree_node *> ir_tree = build_ir_tree(control->clone(), NULL); - std::vector<ir_tree_node *> ir_stmt; - - while (!init_loop(ir_tree, ir_stmt)) { - } - - // init the dependence graph - for (int i = 0; i < stmt.size(); i++) - dep.insert(); - - for (int i = 0; i < stmt.size(); i++) - for (int j = i; j < stmt.size(); j++) { - std::pair<std::vector<DependenceVector>, - std::vector<DependenceVector> > dv = test_data_dependences( - ir_, stmt[i].code, stmt[i].IS, stmt[j].code, stmt[j].IS, - freevar, index, stmt_nesting_level_[i], - stmt_nesting_level[j]); - - for (int k = 0; k < dv.first.size(); k++) { - if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k], - true)) - dep.connect(i, j, dv.first[k]); - else { - dep.connect(j, i, dv.first[k].reverse()); - } - - } - for (int k = 0; k < dv.second.size(); k++) - if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], - false)) - dep.connect(j, i, dv.second[k]); - else { - dep.connect(i, j, dv.second[k].reverse()); - } - // std::pair<std::vector<DependenceVector>, - // std::vector<DependenceVector> > dv_ = test_data_dependences( - - } - - for (int i = 0; i < stmt.size(); i++) { - std::map<int, CG_outputRepr*>::iterator it = replace.find(i); - - if (it != replace.end()) - stmt[i].code = (it->second)->clone(); - else - stmt[i].code = stmt[i].code->clone(); - } - - // cleanup the IR tree - for (int i = 0; i < ir_tree.size(); i++) - delete ir_tree[i]; - - // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] - for (int i = 0; i < stmt.size(); i++) { - int n = stmt[i].IS.n_set(); - stmt[i].xform = Relation(n, 2 * n + 1); - F_And *f_root = stmt[i].xform.add_and(); - - for (int j = 1; j <= n; j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(stmt[i].xform.output_var(2 * j), 1); - h.update_coef(stmt[i].xform.input_var(j), -1); - } - - for (int j = 1; j <= 2 * n + 1; j += 2) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(stmt[i].xform.output_var(j), 1); - } - stmt[i].xform.simplify(); - } - - if (stmt.size() != 0) - num_dep_dim = stmt[0].IS.n_set(); - else - num_dep_dim = 0; -} - -Loop::~Loop() { - for (int i = 0; i < stmt.size(); i++) - if (stmt[i].code != NULL) { - stmt[i].code->clear(); - delete stmt[i].code; - } - if (init_code != NULL) { - init_code->clear(); - delete init_code; - } - if (cleanup_code != NULL) { - cleanup_code->clear(); - delete cleanup_code; - } -} - -int Loop::get_dep_dim_of(int stmt_num, int level) const { - if (stmt_num < 0 || stmt_num >= stmt.size()) - throw std::invalid_argument("invaid statement " + to_string(stmt_num)); - - if (level < 1 || level > stmt[stmt_num].loop_level.size()) - return -1; - - int trip_count = 0; - while (true) { - switch (stmt[stmt_num].loop_level[level - 1].type) { - case LoopLevelOriginal: - return stmt[stmt_num].loop_level[level - 1].payload; - case LoopLevelTile: - level = stmt[stmt_num].loop_level[level - 1].payload; - if (level < 1) - return -1; - if (level > stmt[stmt_num].loop_level.size()) - throw loop_error( - "incorrect loop level information for statement " - + to_string(stmt_num)); - break; - default: - throw loop_error( - "unknown loop level information for statement " - + to_string(stmt_num)); - } - trip_count++; - if (trip_count >= stmt[stmt_num].loop_level.size()) - throw loop_error( - "incorrect loop level information for statement " - + to_string(stmt_num)); - } -} - -int Loop::get_last_dep_dim_before(int stmt_num, int level) const { - if (stmt_num < 0 || stmt_num >= stmt.size()) - throw std::invalid_argument("invaid statement " + to_string(stmt_num)); - - if (level < 1) - return -1; - if (level > stmt[stmt_num].loop_level.size()) - level = stmt[stmt_num].loop_level.size() + 1; - - for (int i = level - 1; i >= 1; i--) - if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal) - return stmt[stmt_num].loop_level[i - 1].payload; - - return -1; -} - -void Loop::print_internal_loop_structure() const { - for (int i = 0; i < stmt.size(); i++) { - std::vector<int> lex = getLexicalOrder(i); - std::cout << "s" << i + 1 << ": "; - for (int j = 0; j < stmt[i].loop_level.size(); j++) { - if (2 * j < lex.size()) - std::cout << lex[2 * j]; - switch (stmt[i].loop_level[j].type) { - case LoopLevelOriginal: - std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; - break; - case LoopLevelTile: - std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; - break; - default: - std::cout << "(unknown)"; - } - std::cout << ' '; - } - for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { - std::cout << lex[j]; - if (j != lex.size() - 1) - std::cout << ' '; - } - std::cout << std::endl; - } -} - -CG_outputRepr *Loop::getCode(int effort) const { - const int m = stmt.size(); - if (m == 0) - return NULL; - const int n = stmt[0].xform.n_out(); - - Tuple<CG_outputRepr *> ni(m); - Tuple < Relation > IS(m); - Tuple < Relation > xform(m); - for (int i = 0; i < m; i++) { - ni[i + 1] = stmt[i].code; - IS[i + 1] = stmt[i].IS; - xform[i + 1] = stmt[i].xform; - } - - Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); - CG_outputBuilder *ocg = ir->builder(); - CG_outputRepr *repr = MMGenerateCode(ocg, xform, IS, ni, known, effort); - - if (init_code != NULL) - repr = ocg->StmtListAppend(init_code->clone(), repr); - if (cleanup_code != NULL) - repr = ocg->StmtListAppend(repr, cleanup_code->clone()); - - return repr; -} - -void Loop::printCode(int effort) const { - const int m = stmt.size(); - if (m == 0) - return; - const int n = stmt[0].xform.n_out(); - - Tuple < Relation > IS(m); - Tuple < Relation > xform(m); - for (int i = 0; i < m; i++) { - IS[i + 1] = stmt[i].IS; - xform[i + 1] = stmt[i].xform; - } - - Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); - std::cout << MMGenerateCode(xform, IS, known, effort); -} - -Relation Loop::getNewIS(int stmt_num) const { - Relation result; - - if (stmt[stmt_num].xform.is_null()) { - Relation known = Extend_Set(copy(this->known), - stmt[stmt_num].IS.n_set() - this->known.n_set()); - result = Intersection(copy(stmt[stmt_num].IS), known); - } else { - Relation known = Extend_Set(copy(this->known), - stmt[stmt_num].xform.n_out() - this->known.n_set()); - result = Intersection( - Range( - Restrict_Domain(copy(stmt[stmt_num].xform), - copy(stmt[stmt_num].IS))), known); - } - - result.simplify(2, 4); - - return result; -} - -std::vector<Relation> Loop::getNewIS() const { - const int m = stmt.size(); - - std::vector<Relation> new_IS(m); - for (int i = 0; i < m; i++) - new_IS[i] = getNewIS(i); - - return new_IS; -} - -void Loop::permute(const std::vector<int> &pi) { - std::set<int> active; - for (int i = 0; i < stmt.size(); i++) - active.insert(i); - - permute(active, pi); -} - -void Loop::original() { - std::set<int> active; - for (int i = 0; i < stmt.size(); i++) - active.insert(i); - setLexicalOrder(0, active); -} - -void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) { - if (active.size() == 0 || pi.size() == 0) - return; - - // check for sanity of parameters - int level = pi[0]; - for (int i = 1; i < pi.size(); i++) - if (pi[i] < level) - level = pi[i]; - if (level < 1) - throw std::invalid_argument("invalid permuation"); - std::vector<int> reverse_pi(pi.size(), 0); - for (int i = 0; i < pi.size(); i++) - if (pi[i] >= level + pi.size()) - throw std::invalid_argument("invalid permutation"); - else - reverse_pi[pi[i] - level] = i + level; - for (int i = 0; i < reverse_pi.size(); i++) - if (reverse_pi[i] == 0) - throw std::invalid_argument("invalid permuation"); - int ref_stmt_num; - std::vector<int> lex; - for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { - if (*i < 0 || *i >= stmt.size()) - throw std::invalid_argument("invalid statement " + to_string(*i)); - if (i == active.begin()) { - ref_stmt_num = *i; - lex = getLexicalOrder(*i); - } else { - if (level + pi.size() - 1 > stmt[*i].loop_level.size()) - throw std::invalid_argument("invalid permuation"); - std::vector<int> lex2 = getLexicalOrder(*i); - for (int j = 0; j < 2 * level - 3; j += 2) - if (lex[j] != lex2[j]) - throw std::invalid_argument( - "statements to permute must be in the same subloop"); - for (int j = 0; j < pi.size(); j++) - if (!(stmt[*i].loop_level[level + j - 1].type - == stmt[ref_stmt_num].loop_level[level + j - 1].type - && stmt[*i].loop_level[level + j - 1].payload - == stmt[ref_stmt_num].loop_level[level + j - 1].payload)) - throw std::invalid_argument( - "permuted loops must have the same loop level types"); - } - } - - // Update transformation relations - for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { - int n = stmt[*i].xform.n_out(); - Relation mapping(n, n); - F_And *f_root = mapping.add_and(); - for (int j = 1; j <= n; j += 2) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.output_var(j), 1); - h.update_coef(mapping.input_var(j), -1); - } - for (int j = 0; j < pi.size(); j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.output_var(2 * (level + j)), 1); - h.update_coef(mapping.input_var(2 * pi[j]), -1); - } - for (int j = 1; j < level; j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.output_var(2 * j), 1); - h.update_coef(mapping.input_var(2 * j), -1); - } - for (int j = level + pi.size(); j <= stmt[*i].loop_level.size(); j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.output_var(2 * j), 1); - h.update_coef(mapping.input_var(2 * j), -1); - } - - stmt[*i].xform = Composition(mapping, stmt[*i].xform); - stmt[*i].xform.simplify(); - } - - // get the permuation for dependence vectors - std::vector<int> t; - for (int i = 0; i < pi.size(); i++) - if (stmt[ref_stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal) - t.push_back(stmt[ref_stmt_num].loop_level[pi[i] - 1].payload); - int max_dep_dim = -1; - int min_dep_dim = num_dep_dim; - for (int i = 0; i < t.size(); i++) { - if (t[i] > max_dep_dim) - max_dep_dim = t[i]; - if (t[i] < min_dep_dim) - min_dep_dim = t[i]; - } - if (min_dep_dim > max_dep_dim) - return; - if (max_dep_dim - min_dep_dim + 1 != t.size()) - throw loop_error("cannot update the dependence graph after permuation"); - std::vector<int> dep_pi(num_dep_dim); - for (int i = 0; i < min_dep_dim; i++) - dep_pi[i] = i; - for (int i = min_dep_dim; i <= max_dep_dim; i++) - dep_pi[i] = t[i - min_dep_dim]; - for (int i = max_dep_dim + 1; i < num_dep_dim; i++) - dep_pi[i] = i; - - // update the dependence graph - DependenceGraph g; - for (int i = 0; i < dep.vertex.size(); i++) - g.insert(); - for (int i = 0; i < dep.vertex.size(); i++) - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); - j++) { - if ((active.find(i) != active.end() - && active.find(j->first) != active.end())) { - std::vector<DependenceVector> dv = j->second; - for (int k = 0; k < dv.size(); k++) { - switch (dv[k].type) { - case DEP_W2R: - case DEP_R2W: - case DEP_W2W: - case DEP_R2R: { - std::vector<coef_t> lbounds(num_dep_dim); - std::vector<coef_t> ubounds(num_dep_dim); - for (int d = 0; d < num_dep_dim; d++) { - lbounds[d] = dv[k].lbounds[dep_pi[d]]; - ubounds[d] = dv[k].ubounds[dep_pi[d]]; - } - dv[k].lbounds = lbounds; - dv[k].ubounds = ubounds; - break; - } - case DEP_CONTROL: { - break; - } - default: - throw loop_error("unknown dependence type"); - } - } - g.connect(i, j->first, dv); - } else if (active.find(i) == active.end() - && active.find(j->first) == active.end()) { - std::vector<DependenceVector> dv = j->second; - g.connect(i, j->first, dv); - } else { - std::vector<DependenceVector> dv = j->second; - for (int k = 0; k < dv.size(); k++) - switch (dv[k].type) { - case DEP_W2R: - case DEP_R2W: - case DEP_W2W: - case DEP_R2R: { - for (int d = 0; d < num_dep_dim; d++) - if (dep_pi[d] != d) { - dv[k].lbounds[d] = -posInfinity; - dv[k].ubounds[d] = posInfinity; - } - break; - } - case DEP_CONTROL: - break; - default: - throw loop_error("unknown dependence type"); - } - g.connect(i, j->first, dv); - } - } - dep = g; - - // update loop level information - for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { - int cur_dep_dim = min_dep_dim; - std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size()); - for (int j = 1; j <= stmt[*i].loop_level.size(); j++) - if (j >= level && j < level + pi.size()) { - switch (stmt[*i].loop_level[reverse_pi[j - level] - 1].type) { - case LoopLevelOriginal: - new_loop_level[j - 1].type = LoopLevelOriginal; - new_loop_level[j - 1].payload = cur_dep_dim++; - new_loop_level[j - 1].parallel_level = - stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; - break; - case LoopLevelTile: { - new_loop_level[j - 1].type = LoopLevelTile; - int ref_level = stmt[*i].loop_level[reverse_pi[j - level] - - 1].payload; - if (ref_level >= level && ref_level < level + pi.size()) - new_loop_level[j - 1].payload = reverse_pi[ref_level - - level]; - else - new_loop_level[j - 1].payload = ref_level; - new_loop_level[j - 1].parallel_level = - stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; - break; - } - default: - throw loop_error( - "unknown loop level information for statement " - + to_string(*i)); - } - } else { - switch (stmt[*i].loop_level[j - 1].type) { - case LoopLevelOriginal: - new_loop_level[j - 1].type = LoopLevelOriginal; - new_loop_level[j - 1].payload = - stmt[*i].loop_level[j - 1].payload; - new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j - - 1].parallel_level; - break; - case LoopLevelTile: { - new_loop_level[j - 1].type = LoopLevelTile; - int ref_level = stmt[*i].loop_level[j - 1].payload; - if (ref_level >= level && ref_level < level + pi.size()) - new_loop_level[j - 1].payload = reverse_pi[ref_level - - level]; - else - new_loop_level[j - 1].payload = ref_level; - new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j - - 1].parallel_level; - break; - } - default: - throw loop_error( - "unknown loop level information for statement " - + to_string(*i)); - } - } - stmt[*i].loop_level = new_loop_level; - } - - setLexicalOrder(2 * level - 2, active); -} - -std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) { - // check for sanity of parameters - if (stmt_num < 0 || stmt_num >= stmt.size()) - throw std::invalid_argument("invalid statement " + to_string(stmt_num)); - if (level <= 0 || level > stmt[stmt_num].loop_level.size()) - throw std::invalid_argument("invalid loop level " + to_string(level)); - - std::set<int> result; - int dim = 2 * level - 1; - std::vector<int> lex = getLexicalOrder(stmt_num); - std::set<int> same_loop = getStatements(lex, dim - 1); - - Relation cond2 = copy(cond); - cond2.simplify(); - cond2 = EQs_to_GEQs(cond2); - Conjunct *c = cond2.single_conjunct(); - int cur_lex = lex[dim - 1]; - for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { - int max_level = (*gi).max_tuple_pos(); - Relation single_cond(max_level); - single_cond.and_with_GEQ(*gi); - - // TODO: should decide where to place newly created statements with - // complementary split condition from dependence graph. - bool place_after; - if (max_level == 0) - place_after = true; - else if ((*gi).get_coef(cond2.set_var(max_level)) < 0) - place_after = true; - else - place_after = false; - - // original statements with split condition, - // new statements with complement of split condition - int old_num_stmt = stmt.size(); - std::map<int, int> what_stmt_num; - apply_xform(same_loop); - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) { - int n = stmt[*i].IS.n_set(); - Relation part1, part2; - if (max_level > n) { - part1 = copy(stmt[*i].IS); - part2 = Relation::False(0); - } else { - part1 = Intersection(copy(stmt[*i].IS), - Extend_Set(copy(single_cond), n - max_level)); - part2 = Intersection(copy(stmt[*i].IS), - Extend_Set(Complement(copy(single_cond)), - n - max_level)); - } - - //split dependence check - - if (max_level > level) { - - DNF_Iterator di1(stmt[*i].IS.query_DNF()); - DNF_Iterator di2(part1.query_DNF()); - for (; di1 && di2; di1++, di2++) { - //printf("In next conjunct,\n"); - EQ_Iterator ei1 = (*di1)->EQs(); - EQ_Iterator ei2 = (*di2)->EQs(); - for (; ei1 && ei2; ei1++, ei2++) { - //printf(" In next equality constraint,\n"); - Constr_Vars_Iter cvi1(*ei1); - Constr_Vars_Iter cvi2(*ei2); - int dimension = (*cvi1).var->get_position(); - int same = 0; - bool identical = false; - if (identical = !strcmp((*cvi1).var->char_name(), - (*cvi2).var->char_name())) { - - for (; cvi1 && cvi2; cvi1++, cvi2++) { - - if (((*cvi1).coef != (*cvi2).coef - || (*ei1).get_const() - != (*ei2).get_const()) - || (strcmp((*cvi1).var->char_name(), - (*cvi2).var->char_name()))) { - - same++; - } - } - } - if ((same != 0) || !identical) { - - dimension = dimension - 1; - - while (stmt[*i].loop_level[dimension].type - == LoopLevelTile) - dimension = xform_index[dimension].first; - - dimension = stmt[*i].loop_level[dimension].payload; - - for (int i = 0; i < stmt.size(); i++) { - std::vector<std::pair<int, DependenceVector> > D; - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); - j != dep.vertex[i].second.end(); j++) { - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - if ((dv.hasNegative(dimension) - && !dv.quasi) - || (dv.hasPositive(dimension) - && dv.quasi)) - - throw loop_error( - "loop error: Split is illegal, dependence violation!"); - - } - } - } - - } - - GEQ_Iterator gi1 = (*di1)->GEQs(); - GEQ_Iterator gi2 = (*di2)->GEQs(); - - for (; gi1 && gi2; gi++, gi2++) { - - Constr_Vars_Iter cvi1(*gi1); - Constr_Vars_Iter cvi2(*gi2); - int dimension = (*cvi1).var->get_position(); - int same = 0; - bool identical = false; - if (identical = !strcmp((*cvi1).var->char_name(), - (*cvi2).var->char_name())) { - - for (; cvi1 && cvi2; cvi1++, cvi2++) { - - if (((*cvi1).coef != (*cvi2).coef - || (*gi1).get_const() - != (*gi2).get_const()) - || (strcmp((*cvi1).var->char_name(), - (*cvi2).var->char_name()))) { - - same++; - } - } - } - if ((same != 0) || !identical) { - dimension = dimension - 1; - - while (stmt[*i].loop_level[dimension].type - == LoopLevelTile) - dimension = xform_index[dimension].first; - - dimension = - stmt[*i].loop_level[dimension].payload; - - for (int i = 0; i < stmt.size(); i++) { - std::vector<std::pair<int, DependenceVector> > D; - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); - j != dep.vertex[i].second.end(); - j++) { - for (int k = 0; k < j->second.size(); - k++) { - DependenceVector dv = j->second[k]; - if ((dv.hasNegative(dimension) - && !dv.quasi) - || (dv.hasPositive( - dimension) - && dv.quasi)) - - throw loop_error( - "loop error: Split is illegal, dependence violation!"); - - } - } - } - - } - - } - - } - - } - - DNF_Iterator di3(stmt[*i].IS.query_DNF()); - DNF_Iterator di4(part2.query_DNF()); - for (; di3 && di4; di3++, di4++) { - EQ_Iterator ei1 = (*di3)->EQs(); - EQ_Iterator ei2 = (*di4)->EQs(); - for (; ei1 && ei2; ei1++, ei2++) { - Constr_Vars_Iter cvi1(*ei1); - Constr_Vars_Iter cvi2(*ei2); - int dimension = (*cvi1).var->get_position(); - int same = 0; - bool identical = false; - if (identical = !strcmp((*cvi1).var->char_name(), - (*cvi2).var->char_name())) { - - for (; cvi1 && cvi2; cvi1++, cvi2++) { - - if (((*cvi1).coef != (*cvi2).coef - || (*ei1).get_const() - != (*ei2).get_const()) - || (strcmp((*cvi1).var->char_name(), - (*cvi2).var->char_name()))) { - - same++; - } - } - } - if ((same != 0) || !identical) { - dimension = dimension - 1; - - while (stmt[*i].loop_level[dimension].type - == LoopLevelTile) - dimension = xform_index[dimension].first; - - dimension = stmt[*i].loop_level[dimension].payload; - - for (int i = 0; i < stmt.size(); i++) { - std::vector<std::pair<int, DependenceVector> > D; - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); - j != dep.vertex[i].second.end(); j++) { - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - if ((dv.hasNegative(dimension) - && !dv.quasi) - || (dv.hasPositive(dimension) - && dv.quasi)) - - throw loop_error( - "loop error: Split is illegal, dependence violation!"); - - } - } - } - - } - - } - GEQ_Iterator gi1 = (*di3)->GEQs(); - GEQ_Iterator gi2 = (*di4)->GEQs(); - - for (; gi1 && gi2; gi++, gi2++) { - Constr_Vars_Iter cvi1(*gi1); - Constr_Vars_Iter cvi2(*gi2); - int dimension = (*cvi1).var->get_position(); - int same = 0; - bool identical = false; - if (identical = !strcmp((*cvi1).var->char_name(), - (*cvi2).var->char_name())) { - - for (; cvi1 && cvi2; cvi1++, cvi2++) { - - if (((*cvi1).coef != (*cvi2).coef - || (*gi1).get_const() - != (*gi2).get_const()) - || (strcmp((*cvi1).var->char_name(), - (*cvi2).var->char_name()))) { - - same++; - } - } - } - if ((same != 0) || !identical) { - dimension = dimension - 1; - - while (stmt[*i].loop_level[dimension].type - == LoopLevelTile) - dimension = xform_index[dimension].first; - - dimension = stmt[*i].loop_level[dimension].payload; - - for (int i = 0; i < stmt.size(); i++) { - std::vector<std::pair<int, DependenceVector> > D; - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); - j != dep.vertex[i].second.end(); j++) { - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - if ((dv.hasNegative(dimension) - && !dv.quasi) - || (dv.hasPositive(dimension) - && dv.quasi)) - - throw loop_error( - "loop error: Split is illegal, dependence violation!"); - - } - } - } - - } - - } - - } - - } - - stmt[*i].IS = part1; - - if (Intersection(copy(part2), - Extend_Set(copy(this->known), n - this->known.n_set())).is_upper_bound_satisfiable()) { - Statement new_stmt; - new_stmt.code = stmt[*i].code->clone(); - new_stmt.IS = part2; - new_stmt.xform = copy(stmt[*i].xform); - - new_stmt.loop_level = stmt[*i].loop_level; - stmt.push_back(new_stmt); - dep.insert(); - what_stmt_num[*i] = stmt.size() - 1; - if (*i == stmt_num) - result.insert(stmt.size() - 1); - - stmt_nesting_level_.push_back(stmt_nesting_level[*i]); - std::pair<std::vector<DependenceVector>, - std::vector<DependenceVector> > dv = - test_data_dependences(ir_, stmt[*i].code, part1, - stmt[*i].code, part2, freevar, index, - stmt_nesting_level[*i], - stmt_nesting_level[stmt.size() - 1]); - - int part1_to_part2 = 0; - int part2_to_part1 = 0; - - for (int k = 0; k < dv.first.size(); k++) - if (is_dependence_valid_based_on_lex_order(*i, - what_stmt_num[*i], dv.first[k], true)) - part1_to_part2++; - else - part2_to_part1++; - - if (part1_to_part2 > 0 && part2_to_part1 > 0) - throw loop_error( - "loop error: Aborting, split resulted in impossible dependence cycle!"); - - for (int k = 0; k < dv.second.size(); k++) - if (is_dependence_valid_based_on_lex_order( - what_stmt_num[*i], *i, dv.second[k], false)) - part2_to_part1++; - - else - part1_to_part2++; - - if (part1_to_part2 > 0 && part2_to_part1 > 0) - throw loop_error( - "loop error: Aborting, split resulted in impossible dependence cycle!"); - bool temp_place_after; - if (part2_to_part1 > 0) - temp_place_after = false; - else - temp_place_after = true; - - if (i == same_loop.begin()) - place_after = temp_place_after; - else { - if (temp_place_after != place_after) - throw loop_error( - "loop error: Aborting, split resulted in impossible dependence cycle!"); - - } - - if (place_after) - assign_const(new_stmt.xform, dim - 1, cur_lex + 1); - else - assign_const(new_stmt.xform, dim - 1, cur_lex - 1); - - } - - } - // make adjacent lexical number available for new statements - if (place_after) { - lex[dim - 1] = cur_lex + 1; - shiftLexicalOrder(lex, dim - 1, 1); - } else { - lex[dim - 1] = cur_lex - 1; - shiftLexicalOrder(lex, dim - 1, -1); - } - // update dependence graph - int dep_dim = get_dep_dim_of(stmt_num, level); - for (int i = 0; i < old_num_stmt; i++) { - std::vector<std::pair<int, std::vector<DependenceVector> > > D; - - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); - j != dep.vertex[i].second.end(); j++) { - if (same_loop.find(i) != same_loop.end()) { - if (same_loop.find(j->first) != same_loop.end()) { - if (what_stmt_num.find(i) != what_stmt_num.end() - && what_stmt_num.find(j->first) - != what_stmt_num.end()) - dep.connect(what_stmt_num[i], - what_stmt_num[j->first], j->second); - if (place_after - && what_stmt_num.find(j->first) - != what_stmt_num.end()) { - std::vector<DependenceVector> dvs; - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - if (dv.is_data_dependence() && dep_dim != -1) { - dv.lbounds[dep_dim] = -posInfinity; - dv.ubounds[dep_dim] = posInfinity; - } - dvs.push_back(dv); - } - if (dvs.size() > 0) - D.push_back( - std::make_pair(what_stmt_num[j->first], - dvs)); - } else if (!place_after - && what_stmt_num.find(i) - != what_stmt_num.end()) { - std::vector<DependenceVector> dvs; - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - if (dv.is_data_dependence() && dep_dim != -1) { - dv.lbounds[dep_dim] = -posInfinity; - dv.ubounds[dep_dim] = posInfinity; - } - dvs.push_back(dv); - } - if (dvs.size() > 0) - dep.connect(what_stmt_num[i], j->first, dvs); - - } - } else { - if (what_stmt_num.find(i) != what_stmt_num.end()) - dep.connect(what_stmt_num[i], j->first, j->second); - } - } else if (same_loop.find(j->first) != same_loop.end()) { - if (what_stmt_num.find(j->first) != what_stmt_num.end()) - D.push_back( - std::make_pair(what_stmt_num[j->first], - j->second)); - } - } - - for (int j = 0; j < D.size(); j++) - dep.connect(i, D[j].first, D[j].second); - } - - } - - return result; -} - -void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, - TilingMethodType method, int alignment_offset, int alignment_multiple) { - // check for sanity of parameters - if (tile_size < 0) - throw std::invalid_argument("invalid tile size"); - if (alignment_multiple < 1 || alignment_offset < 0) - throw std::invalid_argument("invalid alignment for tile"); - if (stmt_num < 0 || stmt_num >= stmt.size()) - throw std::invalid_argument("invalid statement " + to_string(stmt_num)); - if (level <= 0) - throw std::invalid_argument("invalid loop level " + to_string(level)); - if (level > stmt[stmt_num].loop_level.size()) - throw std::invalid_argument( - "there is no loop level " + to_string(level) + " for statement " - + to_string(stmt_num)); - if (outer_level <= 0 || outer_level > level) - throw std::invalid_argument( - "invalid tile controlling loop level " - + to_string(outer_level)); - - int dim = 2 * level - 1; - int outer_dim = 2 * outer_level - 1; - std::vector<int> lex = getLexicalOrder(stmt_num); - std::set<int> same_tiled_loop = getStatements(lex, dim - 1); - std::set<int> same_tile_controlling_loop = getStatements(lex, - outer_dim - 1); - - for (int i = 0; i < stmt.size(); i++) { - std::vector<std::pair<int, DependenceVector> > D; - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); - j++) { - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - int dim2 = level - 1; - if ((dv.type != DEP_CONTROL) && (dv.type != DEP_UNKNOWN)) { - while (stmt[i].loop_level[dim2].type == LoopLevelTile) { - dim2 = stmt[i].loop_level[dim2].payload; - } - dim2 = stmt[i].loop_level[dim2].payload; - - if ((dv.hasNegative(dim2) && (!dv.quasi)) - || (dv.quasi && dv.hasPositive(dim2))) { - for (int l = outer_level; l < level; l++) - if (stmt[i].loop_level[l - 1].type - != LoopLevelTile) { - if (dv.isCarried( - stmt[i].loop_level[l - 1].payload)) - throw loop_error( - "loop error: Tiling is illegal, dependence violation!"); - } else { - - int dim3 = l - 1; - while (stmt[i].loop_level[l - 1].type - != LoopLevelTile) { - dim3 = stmt[i].loop_level[l - 1].payload; - - } - - dim3 = stmt[i].loop_level[l - 1].payload; - if (dim3 < level - 1) - if (dv.isCarried(dim3)) - throw loop_error( - "loop error: Tiling is illegal, dependence violation!"); - } - } - } - } - } - } - // special case for no tiling - if (tile_size == 0) { - for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); - i != same_tile_controlling_loop.end(); i++) { - Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); - F_And *f_root = r.add_and(); - for (int j = 1; j <= 2 * outer_level - 1; j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(r.input_var(j), 1); - h.update_coef(r.output_var(j), -1); - } - EQ_Handle h1 = f_root->add_EQ(); - h1.update_coef(r.output_var(2 * outer_level), 1); - EQ_Handle h2 = f_root->add_EQ(); - h2.update_coef(r.output_var(2 * outer_level + 1), 1); - for (int j = 2 * outer_level; j <= stmt[*i].xform.n_out(); j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(r.input_var(j), 1); - h.update_coef(r.output_var(j + 2), -1); - } - - stmt[*i].xform = Composition(copy(r), stmt[*i].xform); - } - } - // normal tiling - else { - std::set<int> private_stmt; - for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); - i != same_tile_controlling_loop.end(); i++) { -// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && !is_single_iteration(getNewIS(*i), dim)) -// same_tiled_loop.insert(*i); - - // should test dim's value directly but it is ok for now -// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && get_const(stmt[*i].xform, dim+1, Output_Var) == posInfinity) - if (same_tiled_loop.find(*i) == same_tiled_loop.end() - && overflow.find(*i) != overflow.end()) - private_stmt.insert(*i); - } - - // extract the union of the iteration space to be considered - Relation hull; - { - Tuple < Relation > r_list; - Tuple<int> r_mask; - - for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); - i != same_tile_controlling_loop.end(); i++) - if (private_stmt.find(*i) == private_stmt.end()) { - Relation r = project_onto_levels(getNewIS(*i), dim + 1, - true); - for (int j = outer_dim; j < dim; j++) - r = Project(r, j + 1, Set_Var); - for (int j = 0; j < outer_dim; j += 2) - r = Project(r, j + 1, Set_Var); - r_list.append(r); - r_mask.append(1); - } - - hull = Hull(r_list, r_mask, 1, true); - } - - // extract the bound of the dimension to be tiled - Relation bound = get_loop_bound(hull, dim); - if (!bound.has_single_conjunct()) { - // further simplify the bound - hull = Approximate(hull); - bound = get_loop_bound(hull, dim); - - int i = outer_dim - 2; - while (!bound.has_single_conjunct() && i >= 0) { - hull = Project(hull, i + 1, Set_Var); - bound = get_loop_bound(hull, dim); - i -= 2; - } - - if (!bound.has_single_conjunct()) - throw loop_error("cannot handle tile bounds"); - } - - // separate lower and upper bounds - std::vector<GEQ_Handle> lb_list, ub_list; - { - Conjunct *c = bound.query_DNF()->single_conjunct(); - for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { - int coef = (*gi).get_coef(bound.set_var(dim + 1)); - if (coef < 0) - ub_list.push_back(*gi); - else if (coef > 0) - lb_list.push_back(*gi); - } - } - if (lb_list.size() == 0) - throw loop_error( - "unable to calculate tile controlling loop lower bound"); - if (ub_list.size() == 0) - throw loop_error( - "unable to calculate tile controlling loop upper bound"); - - // find the simplest lower bound for StridedTile or simplest iteration count for CountedTile - int simplest_lb = 0, simplest_ub = 0; - if (method == StridedTile) { - int best_cost = INT_MAX; - for (int i = 0; i < lb_list.size(); i++) { - int cost = 0; - for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: { - cost += 5; - break; - } - case Global_Var: { - cost += 2; - break; - } - default: - cost += 15; - break; - } - } - - if (cost < best_cost) { - best_cost = cost; - simplest_lb = i; - } - } - } else if (method == CountedTile) { - std::map<Variable_ID, coef_t> s1, s2, s3; - int best_cost = INT_MAX; - for (int i = 0; i < lb_list.size(); i++) - for (int j = 0; j < ub_list.size(); j++) { - int cost = 0; - - for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: { - s1[(*ci).var] += (*ci).coef; - break; - } - case Global_Var: { - s2[(*ci).var] += (*ci).coef; - break; - } - case Exists_Var: - case Wildcard_Var: { - s3[(*ci).var] += (*ci).coef; - break; - } - default: - cost = INT_MAX - 2; - break; - } - } - - for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: { - s1[(*ci).var] += (*ci).coef; - break; - } - case Global_Var: { - s2[(*ci).var] += (*ci).coef; - break; - } - case Exists_Var: - case Wildcard_Var: { - s3[(*ci).var] += (*ci).coef; - break; - } - default: - if (cost == INT_MAX - 2) - cost = INT_MAX - 1; - else - cost = INT_MAX - 3; - break; - } - } - - if (cost == 0) { - for (std::map<Variable_ID, coef_t>::iterator k = - s1.begin(); k != s1.end(); k++) - if ((*k).second != 0) - cost += 5; - for (std::map<Variable_ID, coef_t>::iterator k = - s2.begin(); k != s2.end(); k++) - if ((*k).second != 0) - cost += 2; - for (std::map<Variable_ID, coef_t>::iterator k = - s3.begin(); k != s3.end(); k++) - if ((*k).second != 0) - cost += 15; - } - - if (cost < best_cost) { - best_cost = cost; - simplest_lb = i; - simplest_ub = j; - } - } - } - - // prepare the new transformation relations - for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); - i != same_tile_controlling_loop.end(); i++) { - Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); - F_And *f_root = r.add_and(); - for (int j = 0; j < outer_dim - 1; j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(r.output_var(j + 1), 1); - h.update_coef(r.input_var(j + 1), -1); - } - - for (int j = outer_dim - 1; j < stmt[*i].xform.n_out(); j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(r.output_var(j + 3), 1); - h.update_coef(r.input_var(j + 1), -1); - } - - EQ_Handle h = f_root->add_EQ(); - h.update_coef(r.output_var(outer_dim), 1); - h.update_const(-lex[outer_dim - 1]); - - stmt[*i].xform = Composition(r, stmt[*i].xform); - } - - // add tiling constraints. - for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); - i != same_tile_controlling_loop.end(); i++) { - F_And *f_super_root = stmt[*i].xform.and_with_and(); - F_Exists *f_exists = f_super_root->add_exists(); - F_And *f_root = f_exists->add_and(); - - // create a lower bound variable for easy formula creation later - Variable_ID aligned_lb; - { - Variable_ID lb = f_exists->declare(); - coef_t coef = lb_list[simplest_lb].get_coef( - bound.set_var(dim + 1)); - if (coef == 1) { // e.g. if i >= m+5, then LB = m+5 - EQ_Handle h = f_root->add_EQ(); - h.update_coef(lb, 1); - for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos != dim + 1) - h.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); - } - } - h.update_const(lb_list[simplest_lb].get_const()); - } else { // e.g. if 2i >= m+5, then m+5 <= 2*LB < m+5+2 - GEQ_Handle h1 = f_root->add_GEQ(); - GEQ_Handle h2 = f_root->add_GEQ(); - for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos == dim + 1) { - h1.update_coef(lb, (*ci).coef); - h2.update_coef(lb, -(*ci).coef); - } else { - h1.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); - h2.update_coef(stmt[*i].xform.output_var(pos), - -(*ci).coef); - } - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h1.update_coef(v, (*ci).coef); - h2.update_coef(v, -(*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); - } - } - h1.update_const(lb_list[simplest_lb].get_const()); - h2.update_const(-lb_list[simplest_lb].get_const()); - h2.update_const(coef - 1); - } - - Variable_ID offset_lb; - if (alignment_offset == 0) - offset_lb = lb; - else { - EQ_Handle h = f_root->add_EQ(); - offset_lb = f_exists->declare(); - h.update_coef(offset_lb, 1); - h.update_coef(lb, -1); - h.update_const(alignment_offset); - } - - if (alignment_multiple == 1) { // trivial - aligned_lb = offset_lb; - } else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB - aligned_lb = f_exists->declare(); - Variable_ID e = f_exists->declare(); - - EQ_Handle h = f_root->add_EQ(); - h.update_coef(aligned_lb, 1); - h.update_coef(e, -alignment_multiple); - - GEQ_Handle h1 = f_root->add_GEQ(); - GEQ_Handle h2 = f_root->add_GEQ(); - h1.update_coef(e, alignment_multiple); - h2.update_coef(e, -alignment_multiple); - h1.update_coef(offset_lb, -1); - h2.update_coef(offset_lb, 1); - h1.update_const(alignment_multiple - 1); - } - } - - // create an upper bound variable for easy formula creation later - Variable_ID ub = f_exists->declare(); - { - coef_t coef = -ub_list[simplest_ub].get_coef( - bound.set_var(dim + 1)); - if (coef == 1) { // e.g. if i <= m+5, then UB = m+5 - EQ_Handle h = f_root->add_EQ(); - h.update_coef(ub, -1); - for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos != dim + 1) - h.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); - } - } - h.update_const(ub_list[simplest_ub].get_const()); - } else { // e.g. if 2i <= m+5, then m+5-2 < 2*UB <= m+5 - GEQ_Handle h1 = f_root->add_GEQ(); - GEQ_Handle h2 = f_root->add_GEQ(); - for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos == dim + 1) { - h1.update_coef(ub, -(*ci).coef); - h2.update_coef(ub, (*ci).coef); - } else { - h1.update_coef(stmt[*i].xform.output_var(pos), - -(*ci).coef); - h2.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); - } - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h1.update_coef(v, -(*ci).coef); - h2.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); - } - } - h1.update_const(-ub_list[simplest_ub].get_const()); - h2.update_const(ub_list[simplest_ub].get_const()); - h1.update_const(coef - 1); - } - } - - // insert tile controlling loop constraints - if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0 - Variable_ID e = f_exists->declare(); - GEQ_Handle h1 = f_root->add_GEQ(); - h1.update_coef(e, 1); - - EQ_Handle h2 = f_root->add_EQ(); - h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); - h2.update_coef(e, -tile_size); - h2.update_coef(aligned_lb, -1); - } else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32) - GEQ_Handle h1 = f_root->add_GEQ(); - h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); - - GEQ_Handle h2 = f_root->add_GEQ(); - h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), - -tile_size); - h2.update_coef(aligned_lb, -1); - h2.update_coef(ub, 1); - } - - // special care for private statements like overflow assignment - if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB - GEQ_Handle h = f_root->add_GEQ(); - h.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1); - h.update_coef(ub, 1); - } - // if (private_stmt.find(*i) != private_stmt.end()) { - // if (stmt[*i].xform.n_out() > dim+3) { // e.g. ii <= UB && i = ii - // GEQ_Handle h = f_root->add_GEQ(); - // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); - // h.update_coef(ub, 1); - - // stmt[*i].xform = Project(stmt[*i].xform, dim+3, Output_Var); - // f_root = stmt[*i].xform.and_with_and(); - // EQ_Handle h1 = f_root->add_EQ(); - // h1.update_coef(stmt[*i].xform.output_var(dim+3), 1); - // h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); - // } - // else if (method == StridedTile) { // e.g. ii <= UB since i does not exist - // GEQ_Handle h = f_root->add_GEQ(); - // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); - // h.update_coef(ub, 1); - // } - // } - - // restrict original loop index inside the tile - else { - if (method == StridedTile) { // e.g. ii <= i < ii + tile_size - GEQ_Handle h1 = f_root->add_GEQ(); - h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); - h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), - -1); - - GEQ_Handle h2 = f_root->add_GEQ(); - h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); - h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); - h2.update_const(tile_size - 1); - } else if (method == CountedTile) { // e.g. LB+32*ii <= i < LB+32*ii+tile_size - GEQ_Handle h1 = f_root->add_GEQ(); - h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), - -tile_size); - h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); - h1.update_coef(aligned_lb, -1); - - GEQ_Handle h2 = f_root->add_GEQ(); - h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), - tile_size); - h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); - h2.update_const(tile_size - 1); - h2.update_coef(aligned_lb, 1); - } - } - } - } - - // update loop level information - for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); - i != same_tile_controlling_loop.end(); i++) { - for (int j = 1; j <= stmt[*i].loop_level.size(); j++) - switch (stmt[*i].loop_level[j - 1].type) { - case LoopLevelOriginal: - break; - case LoopLevelTile: - if (stmt[*i].loop_level[j - 1].payload >= outer_level) - stmt[*i].loop_level[j - 1].payload++; - break; - default: - throw loop_error( - "unknown loop level type for statement " - + to_string(*i)); - } - - LoopLevel ll; - ll.type = LoopLevelTile; - ll.payload = level + 1; - ll.parallel_level = 0; - stmt[*i].loop_level.insert( - stmt[*i].loop_level.begin() + (outer_level - 1), ll); - } -} - -std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount) { - // check for sanity of parameters - if (unroll_amount < 0) - throw std::invalid_argument( - "invalid unroll amount " + to_string(unroll_amount)); - if (stmt_num < 0 || stmt_num >= stmt.size()) - throw std::invalid_argument("invalid statement " + to_string(stmt_num)); - if (level <= 0 || level > stmt[stmt_num].loop_level.size()) - throw std::invalid_argument("invalid loop level " + to_string(level)); - - int dim = 2 * level - 1; - std::vector<int> lex = getLexicalOrder(stmt_num); - std::set<int> same_loop = getStatements(lex, dim - 1); - - // nothing to do - if (unroll_amount == 1) - return std::set<int>(); - - for (int i = 0; i < stmt.size(); i++) { - std::vector<std::pair<int, DependenceVector> > D; - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); - j++) { - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - int dim2 = level - 1; - if ((dv.type != DEP_CONTROL) && (dv.type != DEP_UNKNOWN)) { - - while (stmt[i].loop_level[dim2].type == LoopLevelTile) { - dim2 = xform_index[dim2].first; - } - dim2 = stmt[i].loop_level[dim2].payload; - - if (dv.isCarried(dim2) - && (dv.hasNegative(dim2) && !dv.quasi)) - throw loop_error( - "loop error: Unrolling is illegal, dependence violation!"); - - if (dv.isCarried(dim2) - && (dv.hasPositive(dim2) && dv.quasi)) - throw loop_error( - "loop error: Unrolling is illegal, dependence violation!"); - bool safe = false; - - if (dv.isCarried(dim2)) { - - if (!dv.quasi) { - if (dv.lbounds[dim2] != posInfinity) { - if (dv.lbounds[dim2] != negInfinity) - if (dv.lbounds[dim2] > unroll_amount) - safe = true; - } else - safe = true; - } else { - if (dv.ubounds[dim2] != negInfinity) { - if (dv.ubounds[dim2] != posInfinity) - if ((-(dv.ubounds[dim2])) > unroll_amount) - safe = true; - } else - safe = true; - } - - if (!safe) { - for (int l = level; l <= (n - 1) / 2; l++) { - int dim3 = l - 1; - - if (stmt[i].loop_level[dim3].type - != LoopLevelTile) - dim3 = stmt[i].loop_level[dim3].payload; - else { - while (stmt[i].loop_level[dim2].type - == LoopLevelTile) { - dim3 = stmt[i].loop_level[dim3].payload; - } - dim3 = stmt[i].loop_level[dim3].payload; - } - - if (dim3 > dim2) { - if ((dv.hasPositive(dim3) && !dv.quasi) - || (dv.hasNegative(dim3) && dv.quasi)) - break; - else if ((dv.hasNegative(dim3) && !dv.quasi) - || (dv.hasPositive(dim3) && dv.quasi)) - throw loop_error( - "loop error: Unrolling is illegal, dependence violation!"); - } - } - } - } - } - } - } - } - - // extract the intersection of the iteration space to be considered - Relation hull = Relation::True(level); - apply_xform(same_loop); - for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); - i++) { - if (stmt[*i].IS.is_upper_bound_satisfiable()) { - Relation mapping(stmt[*i].IS.n_set(), level); - F_And *f_root = mapping.add_and(); - for (int j = 1; j <= level; j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.input_var(j), 1); - h.update_coef(mapping.output_var(j), -1); - } - hull = Intersection(hull, - Range(Restrict_Domain(mapping, copy(stmt[*i].IS)))); - hull.simplify(2, 4); - } - } - for (int i = 1; i <= level; i++) { - std::string name = tmp_loop_var_name_prefix + to_string(i); - hull.name_set_var(i, name); - } - hull.setup_names(); - - // extract the exact loop bound of the dimension to be unrolled - if (is_single_loop_iteration(hull, level, this->known)) - return std::set<int>(); - Relation bound = get_loop_bound(hull, level, this->known); - if (!bound.has_single_conjunct() || !bound.is_satisfiable() - || bound.is_tautology()) - throw loop_error("unable to extract loop bound for unrolling"); - - // extract the loop stride - EQ_Handle stride_eq; - int stride = 1; - { - bool simple_stride = true; - int strides = countStrides(bound.query_DNF()->single_conjunct(), - bound.set_var(level), stride_eq, simple_stride); - if (strides > 1) - throw loop_error("too many strides"); - else if (strides == 1) { - int sign = stride_eq.get_coef(bound.set_var(level)); - Constr_Vars_Iter it(stride_eq, true); - stride = abs((*it).coef / sign); - } - } - - // separate lower and upper bounds - std::vector<GEQ_Handle> lb_list, ub_list; - { - Conjunct *c = bound.query_DNF()->single_conjunct(); - for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { - int coef = (*gi).get_coef(bound.set_var(level)); - if (coef < 0) - ub_list.push_back(*gi); - else if (coef > 0) - lb_list.push_back(*gi); - } - } - - // simplify overflow expression for each pair of upper and lower bounds - std::vector<std::vector<std::map<Variable_ID, int> > > overflow_table( - lb_list.size(), - std::vector<std::map<Variable_ID, int> >(ub_list.size(), - std::map<Variable_ID, int>())); - bool is_overflow_simplifiable = true; - for (int i = 0; i < lb_list.size(); i++) { - if (!is_overflow_simplifiable) - break; - - for (int j = 0; j < ub_list.size(); j++) { - // lower bound or upper bound has non-unit coefficient, can't simplify - if (ub_list[j].get_coef(bound.set_var(level)) != -1 - || lb_list[i].get_coef(bound.set_var(level)) != 1) { - is_overflow_simplifiable = false; - break; - } - - for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: { - if ((*ci).var != bound.set_var(level)) - overflow_table[i][j][(*ci).var] += (*ci).coef; - - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = bound.get_local(g); - else - v = bound.get_local(g, (*ci).var->function_of()); - overflow_table[i][j][(*ci).var] += (*ci).coef; - break; - } - default: - throw loop_error("failed to calculate overflow amount"); - } - } - overflow_table[i][j][NULL] += ub_list[j].get_const(); - - for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: { - if ((*ci).var != bound.set_var(level)) { - overflow_table[i][j][(*ci).var] += (*ci).coef; - if (overflow_table[i][j][(*ci).var] == 0) - overflow_table[i][j].erase( - overflow_table[i][j].find((*ci).var)); - } - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = bound.get_local(g); - else - v = bound.get_local(g, (*ci).var->function_of()); - overflow_table[i][j][(*ci).var] += (*ci).coef; - if (overflow_table[i][j][(*ci).var] == 0) - overflow_table[i][j].erase( - overflow_table[i][j].find((*ci).var)); - break; - } - default: - throw loop_error("failed to calculate overflow amount"); - } - } - overflow_table[i][j][NULL] += lb_list[i].get_const(); - - overflow_table[i][j][NULL] += stride; - if (unroll_amount == 0 - || (overflow_table[i][j].size() == 1 - && overflow_table[i][j][NULL] / stride - < unroll_amount)) - unroll_amount = overflow_table[i][j][NULL] / stride; - } - } - - // loop iteration count can't be determined, bail out gracefully - if (unroll_amount == 0) - return std::set<int>(); - - // further simply overflow calculation using coefficients' modular - if (is_overflow_simplifiable) { - for (int i = 0; i < lb_list.size(); i++) - for (int j = 0; j < ub_list.size(); j++) - if (stride == 1) { - for (std::map<Variable_ID, int>::iterator k = - overflow_table[i][j].begin(); - k != overflow_table[i][j].end();) - if ((*k).first != NULL) { - int t = int_mod_hat((*k).second, unroll_amount); - if (t == 0) { - overflow_table[i][j].erase(k++); - } else { - int t2 = hull.query_variable_mod((*k).first, - unroll_amount); - if (t2 != INT_MAX) { - overflow_table[i][j][NULL] += t * t2; - overflow_table[i][j].erase(k++); - } else { - (*k).second = t; - k++; - } - } - } else - k++; - - overflow_table[i][j][NULL] = int_mod_hat( - overflow_table[i][j][NULL], unroll_amount); - - // Since we don't have MODULO instruction in SUIF yet (only MOD), make all coef positive in the final formula - for (std::map<Variable_ID, int>::iterator k = - overflow_table[i][j].begin(); - k != overflow_table[i][j].end(); k++) - if ((*k).second < 0) - (*k).second += unroll_amount; - } - } - - // build overflow statement - CG_outputBuilder *ocg = ir->builder(); - CG_outputRepr *overflow_code = NULL; - Relation cond_upper(level), cond_lower(level); - Relation overflow_constraint(0); - F_And *overflow_constraint_root = overflow_constraint.add_and(); - std::vector<Free_Var_Decl *> over_var_list; - if (is_overflow_simplifiable && lb_list.size() == 1) { - for (int i = 0; i < ub_list.size(); i++) { - if (overflow_table[0][i].size() == 1) { - // upper splitting condition - GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); - h.update_const( - ((overflow_table[0][i][NULL] / stride) % unroll_amount) - * -stride); - } else { - // upper splitting condition - std::string over_name = overflow_var_name_prefix - + to_string(overflow_var_name_counter++); - Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); - over_var_list.push_back(over_free_var); - GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); - h.update_coef(cond_upper.get_local(over_free_var), -stride); - - // insert constraint 0 <= overflow < unroll_amount - Variable_ID v = overflow_constraint.get_local(over_free_var); - GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); - h1.update_coef(v, 1); - GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); - h2.update_coef(v, -1); - h2.update_const(unroll_amount - 1); - - // create overflow assignment - bound.setup_names(); - CG_outputRepr *rhs = NULL; - for (std::map<Variable_ID, int>::iterator j = - overflow_table[0][i].begin(); - j != overflow_table[0][i].end(); j++) - if ((*j).first != NULL) { - CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); - if ((*j).second != 1) - t = ocg->CreateTimes(ocg->CreateInt((*j).second), - t); - rhs = ocg->CreatePlus(rhs, t); - } else if ((*j).second != 0) - rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); - - if (stride != 1) - rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); - rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); - - CG_outputRepr *lhs = ocg->CreateIdent(over_name); - init_code = ocg->StmtListAppend(init_code, - ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); - lhs = ocg->CreateIdent(over_name); - overflow_code = ocg->StmtListAppend(overflow_code, - ocg->CreateAssignment(0, lhs, rhs)); - } - } - - // lower splitting condition - GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]); - } else if (is_overflow_simplifiable && ub_list.size() == 1) { - for (int i = 0; i < lb_list.size(); i++) { - - if (overflow_table[i][0].size() == 1) { - // lower splitting condition - GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); - h.update_const(overflow_table[i][0][NULL] * -stride); - } else { - // lower splitting condition - std::string over_name = overflow_var_name_prefix - + to_string(overflow_var_name_counter++); - Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); - over_var_list.push_back(over_free_var); - GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); - h.update_coef(cond_lower.get_local(over_free_var), -stride); - - // insert constraint 0 <= overflow < unroll_amount - Variable_ID v = overflow_constraint.get_local(over_free_var); - GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); - h1.update_coef(v, 1); - GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); - h2.update_coef(v, -1); - h2.update_const(unroll_amount - 1); - - // create overflow assignment - bound.setup_names(); - CG_outputRepr *rhs = NULL; - for (std::map<Variable_ID, int>::iterator j = - overflow_table[0][i].begin(); - j != overflow_table[0][i].end(); j++) - if ((*j).first != NULL) { - CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); - if ((*j).second != 1) - t = ocg->CreateTimes(ocg->CreateInt((*j).second), - t); - rhs = ocg->CreatePlus(rhs, t); - } else if ((*j).second != 0) - rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); - - if (stride != 1) - rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); - rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); - - CG_outputRepr *lhs = ocg->CreateIdent(over_name); - init_code = ocg->StmtListAppend(init_code, - ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); - lhs = ocg->CreateIdent(over_name); - overflow_code = ocg->StmtListAppend(overflow_code, - ocg->CreateAssignment(0, lhs, rhs)); - } - } - - // upper splitting condition - GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]); - } else { - std::string over_name = overflow_var_name_prefix - + to_string(overflow_var_name_counter++); - Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); - over_var_list.push_back(over_free_var); - - Tuple<CG_outputRepr *> lb_repr_list, ub_repr_list; - for (int i = 0; i < lb_list.size(); i++) { - //lb_repr_list.append(outputLBasRepr(ocg, lb_list[i], bound, bound.set_var(dim+1), stride, stride_eq, Relation::True(bound.n_set()), std::vector<CG_outputRepr *>(bound.n_set(), NULL))); - lb_repr_list.append( - outputLBasRepr(ocg, lb_list[i], bound, - bound.set_var(dim + 1), stride, stride_eq, - Relation::True(bound.n_set()), - std::vector<CG_outputRepr *>(bound.n_set()))); - GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); - } - for (int i = 0; i < ub_list.size(); i++) { - //ub_repr_list.append(outputUBasRepr(ocg, ub_list[i], bound, bound.set_var(dim+1), stride, stride_eq, std::vector<CG_outputRepr *>(bound.n_set(), NULL))); - ub_repr_list.append( - outputUBasRepr(ocg, ub_list[i], bound, - bound.set_var(dim + 1), stride, stride_eq, - std::vector<CG_outputRepr *>(bound.n_set()))); - GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); - h.update_coef(cond_upper.get_local(over_free_var), -stride); - } - - CG_outputRepr *lbRepr, *ubRepr; - if (lb_repr_list.size() > 1) - lbRepr = ocg->CreateInvoke("max", lb_repr_list); - else if (lb_repr_list.size() == 1) - lbRepr = lb_repr_list[1]; - - if (ub_repr_list.size() > 1) - ubRepr = ocg->CreateInvoke("min", ub_repr_list); - else if (ub_repr_list.size() == 1) - ubRepr = ub_repr_list[1]; - - // create overflow assignment - bound.setup_names(); - CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr), - ocg->CreateInt(1)); - if (stride != 1) - rhs = ocg->CreateIntegerDivide(rhs, ocg->CreateInt(stride)); - rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); - CG_outputRepr *lhs = ocg->CreateIdent(over_name); - init_code = ocg->StmtListAppend(init_code, - ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); - lhs = ocg->CreateIdent(over_name); - overflow_code = ocg->CreateAssignment(0, lhs, rhs); - - // insert constraint 0 <= overflow < unroll_amount - Variable_ID v = overflow_constraint.get_local(over_free_var); - GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); - h1.update_coef(v, 1); - GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); - h2.update_coef(v, -1); - h2.update_const(unroll_amount - 1); - } - - // insert overflow statement - int overflow_stmt_num = -1; - if (overflow_code != NULL) { - // build iteration space for overflow statement - Relation mapping(level, level - 1); - F_And *f_root = mapping.add_and(); - for (int i = 1; i < level; i++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.output_var(i), 1); - h.update_coef(mapping.input_var(i), -1); - } - Relation overflow_IS = Range(Restrict_Domain(mapping, copy(hull))); - for (int i = 1; i < level; i++) - overflow_IS.name_set_var(i, hull.set_var(i)->name()); - overflow_IS.setup_names(); - - // build dumb transformation relation for overflow statement - Relation overflow_xform(level - 1, 2 * (level - 1) + 1); - f_root = overflow_xform.add_and(); - for (int i = 1; i <= level - 1; i++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(overflow_xform.output_var(2 * i), 1); - h.update_coef(overflow_xform.input_var(i), -1); - - h = f_root->add_EQ(); - h.update_coef(overflow_xform.output_var(2 * i - 1), 1); - h.update_const(-lex[2 * i - 2]); - } - EQ_Handle h = f_root->add_EQ(); - h.update_coef(overflow_xform.output_var(2 * (level - 1) + 1), 1); - h.update_const(-lex[2 * (level - 1)]); - - shiftLexicalOrder(lex, dim - 1, 1); - Statement overflow_stmt; - overflow_stmt.code = overflow_code; - overflow_stmt.IS = overflow_IS; - overflow_stmt.xform = overflow_xform; - overflow_stmt.loop_level = std::vector<LoopLevel>(level - 1); - for (int i = 0; i < level - 1; i++) { - overflow_stmt.loop_level[i].type = - stmt[stmt_num].loop_level[i].type; - if (stmt[stmt_num].loop_level[i].type == LoopLevelTile - && stmt[stmt_num].loop_level[i].payload >= level) - overflow_stmt.loop_level[i].payload = -1; - else - overflow_stmt.loop_level[i].payload = - stmt[stmt_num].loop_level[i].payload; - overflow_stmt.loop_level[i].parallel_level = - stmt[stmt_num].loop_level[i].parallel_level; - } - stmt.push_back(overflow_stmt); - dep.insert(); - overflow_stmt_num = stmt.size() - 1; - overflow[overflow_stmt_num] = over_var_list; - - // update the global known information on overflow variable - this->known = Intersection(this->known, - Extend_Set(copy(overflow_constraint), - this->known.n_set() - overflow_constraint.n_set())); - - // update dependence graph - DependenceVector dv; - dv.type = DEP_CONTROL; - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) - dep.connect(overflow_stmt_num, *i, dv); - dv.type = DEP_W2W; - { - IR_ScalarSymbol *overflow_sym = NULL; - std::vector<IR_ScalarRef *> scalars = ir->FindScalarRef( - overflow_code); - for (int i = scalars.size() - 1; i >= 0; i--) - if (scalars[i]->is_write()) { - overflow_sym = scalars[i]->symbol(); - break; - } - for (int i = scalars.size() - 1; i >= 0; i--) - delete scalars[i]; - dv.sym = overflow_sym; - } - dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); - dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); - int dep_dim = get_last_dep_dim_before(stmt_num, level); - for (int i = dep_dim + 1; i < num_dep_dim; i++) { - dv.lbounds[i] = -posInfinity; - dv.ubounds[i] = posInfinity; - } - for (int i = 0; i <= dep_dim; i++) { - if (i != 0) { - dv.lbounds[i - 1] = 0; - dv.ubounds[i - 1] = 0; - } - dv.lbounds[i] = 1; - dv.ubounds[i] = posInfinity; - dep.connect(overflow_stmt_num, overflow_stmt_num, dv); - } - } - - // split the loop so it can be fully unrolled - std::set<int> result = split(stmt_num, level, cond_upper); - std::set<int> result2 = split(stmt_num, level, cond_lower); - for (std::set<int>::iterator i = result2.begin(); i != result2.end(); i++) - result.insert(*i); - - // check if unrolled statements can be trivially lumped together as one statement - bool can_be_lumped = true; - if (can_be_lumped) { - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) - if (*i != stmt_num) { - if (stmt[*i].loop_level.size() - != stmt[stmt_num].loop_level.size()) { - can_be_lumped = false; - break; - } - for (int j = 0; j < stmt[stmt_num].loop_level.size(); j++) - if (!(stmt[*i].loop_level[j].type - == stmt[stmt_num].loop_level[j].type - && stmt[*i].loop_level[j].payload - == stmt[stmt_num].loop_level[j].payload)) { - can_be_lumped = false; - break; - } - if (!can_be_lumped) - break; - std::vector<int> lex2 = getLexicalOrder(*i); - for (int j = 2 * level; j < lex.size() - 1; j += 2) - if (lex[j] != lex2[j]) { - can_be_lumped = false; - break; - } - if (!can_be_lumped) - break; - } - } - if (can_be_lumped) { - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) - if (is_inner_loop_depend_on_level(stmt[*i].IS, level, known)) { - can_be_lumped = false; - break; - } - } - if (can_be_lumped) { - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) - if (*i != stmt_num) { - if (!(Must_Be_Subset(copy(stmt[*i].IS), copy(stmt[stmt_num].IS)) - && Must_Be_Subset(copy(stmt[stmt_num].IS), - copy(stmt[*i].IS)))) { - can_be_lumped = false; - break; - } - } - } - if (can_be_lumped) { - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) { - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[*i].second.begin(); - j != dep.vertex[*i].second.end(); j++) - if (same_loop.find(j->first) != same_loop.end()) { - for (int k = 0; k < j->second.size(); k++) - if (j->second[k].type == DEP_CONTROL - || j->second[k].type == DEP_UNKNOWN) { - can_be_lumped = false; - break; - } - if (!can_be_lumped) - break; - } - if (!can_be_lumped) - break; - } - } - - // add strides to original statements - // for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) - // add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); - - // std::vector<Free_Var_Decl *> depending_overflow_var; - // for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { - // add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); - // if (overflow.find(*i) != overflow.end()) { - // // TO DO: It should check whether overflow vaiable depends on - // // this loop index and by how much. This step is important if - // // you want to unroll loops in arbitrary order. - // depending_overflow_var.insert(depending_overflow_var.end(), overflow[*i].begin(), overflow[*i].end()); - - // continue; - // } - // } - -// std::map<int, std::vector<Statement> > pending; -// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { -// add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); - -// if (overflow.find(*i) != overflow.end()) { -// // TO DO: It should check whether overflow vaiable depends on -// // this loop index and by how much. This step is important if -// // you want to unroll loops in arbitrary order. -// depending_overflow_var.insert(depending_overflow_var.end(), overflow[*i].begin(), overflow[*i].end()); - -// continue; -// } - -// // create copy for each unroll amount -// for (int j = 1; j < unroll_amount; j++) { -// Tuple<CG_outputRepr *> funcList; -// Tuple<std::string> loop_vars; -// loop_vars.append(stmt[*i].IS.set_var((dim+1)/2)->name()); -// funcList.append(ocg->CreatePlus(ocg->CreateIdent(stmt[*i].IS.set_var(level)->name()), ocg->CreateInt(j*stride))); -// CG_outputRepr *code = ocg->CreatePlaceHolder(0, stmt[*i].code->clone(), funcList, loop_vars); - -// // prepare the new statment to insert -// Statement unrolled_stmt; -// unrolled_stmt.IS = copy(stmt[*i].IS); -// // adjust_loop_bound(unrolled_stmt.IS, (dim-1)/2, j); -// unrolled_stmt.xform = copy(stmt[*i].xform); -// unrolled_stmt.code = code; -// unrolled_stmt.loop_level = stmt[*i].loop_level; -// pending[*i].push_back(unrolled_stmt); -// } -// } - -// // adjust iteration space due to loop bounds depending on this loop -// // index and affected overflow variables -// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { -// for (int j = 0; j < pending[*i].size(); j++) { -// adjust_loop_bound(pending[*i][j].IS, (dim-1)/2, j+1, depending_overflow_var); -// //pending[*i][j].IS = Intersection(pending[*i][j].IS, Extend_Set(copy(this->known), pending[*i][j].IS.n_set() - this->known.n_set())); -// } -// } - - // insert unrolled statements - int old_num_stmt = stmt.size(); - if (!can_be_lumped) { - std::map<int, std::vector<int> > what_stmt_num; - - for (int j = 1; j < unroll_amount; j++) { - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) { - Statement new_stmt; - - Tuple<CG_outputRepr *> funcList; - Tuple<std::string> loop_vars; - loop_vars.append(stmt[*i].IS.set_var(level)->name()); - funcList.append( - ocg->CreatePlus( - ocg->CreateIdent( - stmt[*i].IS.set_var(level)->name()), - ocg->CreateInt(j * stride))); - new_stmt.code = ocg->CreatePlaceHolder(0, - stmt[*i].code->clone(), funcList, loop_vars); - - new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride); - add_loop_stride(new_stmt.IS, bound, level - 1, - unroll_amount * stride); - - new_stmt.xform = copy(stmt[*i].xform); - new_stmt.loop_level = stmt[*i].loop_level; - stmt.push_back(new_stmt); - dep.insert(); - what_stmt_num[*i].push_back(stmt.size() - 1); - } - } - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) - add_loop_stride(stmt[*i].IS, bound, level - 1, - unroll_amount * stride); - - // update dependence graph - if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { - int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; - int new_stride = unroll_amount * stride; - for (int i = 0; i < old_num_stmt; i++) { - std::vector<std::pair<int, DependenceVector> > D; - - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); - j != dep.vertex[i].second.end();) { - if (same_loop.find(i) != same_loop.end()) { - if (same_loop.find(j->first) != same_loop.end()) { - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - if (dv.type == DEP_CONTROL - || dv.type == DEP_UNKNOWN) { - D.push_back(std::make_pair(j->first, dv)); - for (int kk = 0; kk < unroll_amount - 1; - kk++) - if (what_stmt_num[i][kk] != -1 - && what_stmt_num[j->first][kk] - != -1) - dep.connect(what_stmt_num[i][kk], - what_stmt_num[j->first][kk], - dv); - } else { - coef_t lb = dv.lbounds[dep_dim]; - coef_t ub = dv.ubounds[dep_dim]; - if (ub == lb - && int_mod(lb, - static_cast<coef_t>(new_stride)) - == 0) { - D.push_back( - std::make_pair(j->first, dv)); - for (int kk = 0; kk < unroll_amount - 1; - kk++) - if (what_stmt_num[i][kk] != -1 - && what_stmt_num[j->first][kk] - != -1) - dep.connect( - what_stmt_num[i][kk], - what_stmt_num[j->first][kk], - dv); - } else if (lb == -posInfinity - && ub == posInfinity) { - D.push_back( - std::make_pair(j->first, dv)); - for (int kk = 0; kk < unroll_amount; - kk++) - if (kk == 0) - D.push_back( - std::make_pair(j->first, - dv)); - else if (what_stmt_num[j->first][kk - - 1] != -1) - D.push_back( - std::make_pair( - what_stmt_num[j->first][kk - - 1], - dv)); - for (int t = 0; t < unroll_amount - 1; - t++) - if (what_stmt_num[i][t] != -1) - for (int kk = 0; - kk < unroll_amount; - kk++) - if (kk == 0) - dep.connect( - what_stmt_num[i][t], - j->first, dv); - else if (what_stmt_num[j->first][kk - - 1] != -1) - dep.connect( - what_stmt_num[i][t], - what_stmt_num[j->first][kk - - 1], - dv); - } else { - for (int kk = 0; kk < unroll_amount; - kk++) { - if (lb != -posInfinity) { - if (kk * stride - < int_mod(lb, - static_cast<coef_t>(new_stride))) - dv.lbounds[dep_dim] = - floor( - static_cast<double>(lb) - / new_stride) - * new_stride - + new_stride; - else - dv.lbounds[dep_dim] = - floor( - static_cast<double>(lb) - / new_stride) - * new_stride; - } - if (ub != posInfinity) { - if (kk * stride - > int_mod(ub, - static_cast<coef_t>(new_stride))) - dv.ubounds[dep_dim] = - floor( - static_cast<double>(ub) - / new_stride) - * new_stride - - new_stride; - else - dv.ubounds[dep_dim] = - floor( - static_cast<double>(ub) - / new_stride) - * new_stride; - } - if (dv.ubounds[dep_dim] - >= dv.lbounds[dep_dim]) { - if (kk == 0) - D.push_back( - std::make_pair( - j->first, - dv)); - else if (what_stmt_num[j->first][kk - - 1] != -1) - D.push_back( - std::make_pair( - what_stmt_num[j->first][kk - - 1], - dv)); - } - } - for (int t = 0; t < unroll_amount - 1; - t++) - if (what_stmt_num[i][t] != -1) - for (int kk = 0; - kk < unroll_amount; - kk++) { - if (lb != -posInfinity) { - if (kk * stride - < int_mod( - lb + t - + 1, - static_cast<coef_t>(new_stride))) - dv.lbounds[dep_dim] = - floor( - static_cast<double>(lb - + (t - + 1) - * stride) - / new_stride) - * new_stride - + new_stride; - else - dv.lbounds[dep_dim] = - floor( - static_cast<double>(lb - + (t - + 1) - * stride) - / new_stride) - * new_stride; - } - if (ub != posInfinity) { - if (kk * stride - > int_mod( - ub + t - + 1, - static_cast<coef_t>(new_stride))) - dv.ubounds[dep_dim] = - floor( - static_cast<double>(ub - + (t - + 1) - * stride) - / new_stride) - * new_stride - - new_stride; - else - dv.ubounds[dep_dim] = - floor( - static_cast<double>(ub - + (t - + 1) - * stride) - / new_stride) - * new_stride; - } - if (dv.ubounds[dep_dim] - >= dv.lbounds[dep_dim]) { - if (kk == 0) - dep.connect( - what_stmt_num[i][t], - j->first, - dv); - else if (what_stmt_num[j->first][kk - - 1] != -1) - dep.connect( - what_stmt_num[i][t], - what_stmt_num[j->first][kk - - 1], - dv); - } - } - } - } - } - - dep.vertex[i].second.erase(j++); - } else { - for (int kk = 0; kk < unroll_amount - 1; kk++) - if (what_stmt_num[i][kk] != -1) - dep.connect(what_stmt_num[i][kk], j->first, - j->second); - - j++; - } - } else { - if (same_loop.find(j->first) != same_loop.end()) - for (int k = 0; k < j->second.size(); k++) - for (int kk = 0; kk < unroll_amount - 1; kk++) - if (what_stmt_num[j->first][kk] != -1) - D.push_back( - std::make_pair( - what_stmt_num[j->first][kk], - j->second[k])); - j++; - } - } - - for (int j = 0; j < D.size(); j++) - dep.connect(i, D[j].first, D[j].second); - } - } - - // reset lexical order for the unrolled loop body - std::set<int> new_same_loop; - for (std::map<int, std::vector<int> >::iterator i = - what_stmt_num.begin(); i != what_stmt_num.end(); i++) { - new_same_loop.insert(i->first); - for (int j = 0; j < i->second.size(); j++) - new_same_loop.insert(i->second[j]); - } - setLexicalOrder(dim + 1, new_same_loop); - } else { - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) - add_loop_stride(stmt[*i].IS, bound, level - 1, - unroll_amount * stride); - - int max_level = stmt[stmt_num].loop_level.size(); - std::vector<std::pair<int, int> > stmt_order; - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) - stmt_order.push_back( - std::make_pair( - get_const(stmt[*i].xform, 2 * max_level, - Output_Var), *i)); - sort(stmt_order.begin(), stmt_order.end()); - - Statement new_stmt; - new_stmt.code = NULL; - for (int j = 1; j < unroll_amount; j++) - for (int i = 0; i < stmt_order.size(); i++) { - Tuple<CG_outputRepr *> funcList; - Tuple<std::string> loop_vars; - loop_vars.append( - stmt[stmt_order[i].second].IS.set_var(level)->name()); - funcList.append( - ocg->CreatePlus( - ocg->CreateIdent( - stmt[stmt_order[i].second].IS.set_var( - level)->name()), - ocg->CreateInt(j * stride))); - CG_outputRepr *code = ocg->CreatePlaceHolder(0, - stmt[stmt_order[i].second].code->clone(), funcList, - loop_vars); - new_stmt.code = ocg->StmtListAppend(new_stmt.code, code); - } - - new_stmt.IS = copy(stmt[stmt_num].IS); - new_stmt.xform = copy(stmt[stmt_num].xform); - assign_const(new_stmt.xform, 2 * max_level, - stmt_order[stmt_order.size() - 1].first + 1); - new_stmt.loop_level = stmt[stmt_num].loop_level; - stmt.push_back(new_stmt); - dep.insert(); - - // update dependence graph - if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { - int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; - int new_stride = unroll_amount * stride; - for (int i = 0; i < old_num_stmt; i++) { - std::vector<std::pair<int, std::vector<DependenceVector> > > D; - - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); - j != dep.vertex[i].second.end();) { - if (same_loop.find(i) != same_loop.end()) { - if (same_loop.find(j->first) != same_loop.end()) { - std::vector<DependenceVector> dvs11, dvs12, dvs22, - dvs21; - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - if (dv.type == DEP_CONTROL - || dv.type == DEP_UNKNOWN) { - if (i == j->first) { - dvs11.push_back(dv); - dvs22.push_back(dv); - } else - throw loop_error( - "unrolled statements lumped together illegally"); - } else { - coef_t lb = dv.lbounds[dep_dim]; - coef_t ub = dv.ubounds[dep_dim]; - if (ub == lb - && int_mod(lb, - static_cast<coef_t>(new_stride)) - == 0) { - dvs11.push_back(dv); - dvs22.push_back(dv); - } else { - if (lb != -posInfinity) - dv.lbounds[dep_dim] = ceil( - static_cast<double>(lb) - / new_stride) - * new_stride; - if (ub != posInfinity) - dv.ubounds[dep_dim] = floor( - static_cast<double>(ub) - / new_stride) - * new_stride; - if (dv.ubounds[dep_dim] - >= dv.lbounds[dep_dim]) - dvs11.push_back(dv); - - if (lb != -posInfinity) - dv.lbounds[dep_dim] = ceil( - static_cast<double>(lb) - / new_stride) - * new_stride; - if (ub != posInfinity) - dv.ubounds[dep_dim] = ceil( - static_cast<double>(ub) - / new_stride) - * new_stride; - if (dv.ubounds[dep_dim] - >= dv.lbounds[dep_dim]) - dvs21.push_back(dv); - - if (lb != -posInfinity) - dv.lbounds[dep_dim] = floor( - static_cast<double>(lb) - / new_stride) - * new_stride; - if (ub != posInfinity) - dv.ubounds[dep_dim] = floor( - static_cast<double>(ub - - stride) - / new_stride) - * new_stride; - if (dv.ubounds[dep_dim] - >= dv.lbounds[dep_dim]) - dvs12.push_back(dv); - - if (lb != -posInfinity) - dv.lbounds[dep_dim] = floor( - static_cast<double>(lb) - / new_stride) - * new_stride; - if (ub != posInfinity) - dv.ubounds[dep_dim] = ceil( - static_cast<double>(ub - - stride) - / new_stride) - * new_stride; - if (dv.ubounds[dep_dim] - >= dv.lbounds[dep_dim]) - dvs22.push_back(dv); - } - } - } - if (dvs11.size() > 0) - D.push_back(std::make_pair(i, dvs11)); - if (dvs22.size() > 0) - dep.connect(old_num_stmt, old_num_stmt, dvs22); - if (dvs12.size() > 0) - D.push_back( - std::make_pair(old_num_stmt, dvs12)); - if (dvs21.size() > 0) - dep.connect(old_num_stmt, i, dvs21); - - dep.vertex[i].second.erase(j++); - } else { - dep.connect(old_num_stmt, j->first, j->second); - j++; - } - } else { - if (same_loop.find(j->first) != same_loop.end()) - D.push_back( - std::make_pair(old_num_stmt, j->second)); - j++; - } - } - - for (int j = 0; j < D.size(); j++) - dep.connect(i, D[j].first, D[j].second); - } - } - } - - return result; -} - -std::vector<int> Loop::getLexicalOrder(int stmt_num) const { - assert(stmt_num < stmt.size()); - - const int n = stmt[stmt_num].xform.n_out(); - std::vector<int> lex(n, 0); - - for (int i = 0; i < n; i += 2) - lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); - - return lex; -} - -std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const { - const int m = stmt.size(); - - std::set<int> same_loops; - for (int i = 0; i < m; i++) { - if (dim < 0) - same_loops.insert(i); - else { - std::vector<int> a_lex = getLexicalOrder(i); - int j; - for (j = 0; j <= dim; j += 2) - if (lex[j] != a_lex[j]) - break; - if (j > dim) - same_loops.insert(i); - } - } - - return same_loops; -} - -void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) { - const int m = stmt.size(); - - if (amount == 0) - return; - - for (int i = 0; i < m; i++) { - std::vector<int> lex2 = getLexicalOrder(i); - - bool need_shift = true; - - for (int j = 0; j < dim; j++) - if (lex2[j] != lex[j]) { - need_shift = false; - break; - } - - if (!need_shift) - continue; - - if (amount > 0) { - if (lex2[dim] < lex[dim]) - continue; - } else if (amount < 0) { - if (lex2[dim] > lex[dim]) - continue; - } - - assign_const(stmt[i].xform, dim, lex2[dim] + amount); - } -} - -void Loop::setLexicalOrder(int dim, const std::set<int> &active, - int starting_order) { - if (active.size() == 0) - return; - - // check for sanity of parameters - if (dim < 0 || dim % 2 != 0) - throw std::invalid_argument( - "invalid constant loop level to set lexicographical order"); - std::vector<int> lex; - int ref_stmt_num; - for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { - if ((*i) < 0 || (*i) >= stmt.size()) - throw std::invalid_argument( - "invalid statement number " + to_string(*i)); - if (dim >= stmt[*i].xform.n_out()) - throw std::invalid_argument( - "invalid constant loop level to set lexicographical order"); - if (i == active.begin()) { - lex = getLexicalOrder(*i); - ref_stmt_num = *i; - } else { - std::vector<int> lex2 = getLexicalOrder(*i); - for (int j = 0; j < dim; j += 2) - if (lex[j] != lex2[j]) - throw std::invalid_argument( - "statements are not in the same sub loop nest"); - } - } - - // sepearate statements by current loop level types - int level = (dim + 2) / 2; - std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type; - std::set<int> active_by_no_level; - for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { - if (level > stmt[*i].loop_level.size()) - active_by_no_level.insert(*i); - else - active_by_level_type[std::make_pair( - stmt[*i].loop_level[level - 1].type, - stmt[*i].loop_level[level - 1].payload)].insert(*i); - } - - // further separate statements due to control dependences - std::vector<std::set<int> > active_by_level_type_splitted; - for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i = - active_by_level_type.begin(); i != active_by_level_type.end(); i++) - active_by_level_type_splitted.push_back(i->second); - for (std::set<int>::iterator i = active_by_no_level.begin(); - i != active_by_no_level.end(); i++) - for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) { - std::set<int> controlled, not_controlled; - for (std::set<int>::iterator k = - active_by_level_type_splitted[j].begin(); - k != active_by_level_type_splitted[j].end(); k++) { - std::vector<DependenceVector> dvs = dep.getEdge(*i, *k); - bool is_controlled = false; - for (int kk = 0; kk < dvs.size(); kk++) - if (dvs[kk].type = DEP_CONTROL) { - is_controlled = true; - break; - } - if (is_controlled) - controlled.insert(*k); - else - not_controlled.insert(*k); - } - if (controlled.size() != 0 && not_controlled.size() != 0) { - active_by_level_type_splitted.erase( - active_by_level_type_splitted.begin() + j); - active_by_level_type_splitted.push_back(controlled); - active_by_level_type_splitted.push_back(not_controlled); - } - } - - // set lexical order separating loops with different loop types first - if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) { - int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; - - Graph<std::set<int>, Empty> g; - for (std::vector<std::set<int> >::iterator i = - active_by_level_type_splitted.begin(); - i != active_by_level_type_splitted.end(); i++) - g.insert(*i); - for (std::set<int>::iterator i = active_by_no_level.begin(); - i != active_by_no_level.end(); i++) { - std::set<int> t; - t.insert(*i); - g.insert(t); - } - for (int i = 0; i < g.vertex.size(); i++) - for (int j = i + 1; j < g.vertex.size(); j++) { - bool connected = false; - for (std::set<int>::iterator ii = g.vertex[i].first.begin(); - ii != g.vertex[i].first.end(); ii++) { - for (std::set<int>::iterator jj = g.vertex[j].first.begin(); - jj != g.vertex[j].first.end(); jj++) { - std::vector<DependenceVector> dvs = dep.getEdge(*ii, - *jj); - for (int k = 0; k < dvs.size(); k++) - if (dvs[k].is_control_dependence() - || (dvs[k].is_data_dependence() - && !dvs[k].has_been_carried_before( - dep_dim))) { - g.connect(i, j); - connected = true; - break; - } - if (connected) - break; - } - if (connected) - break; - } - connected = false; - for (std::set<int>::iterator ii = g.vertex[i].first.begin(); - ii != g.vertex[i].first.end(); ii++) { - for (std::set<int>::iterator jj = g.vertex[j].first.begin(); - jj != g.vertex[j].first.end(); jj++) { - std::vector<DependenceVector> dvs = dep.getEdge(*jj, - *ii); - for (int k = 0; k < dvs.size(); k++) - if (dvs[k].is_control_dependence() - || (dvs[k].is_data_dependence() - && !dvs[k].has_been_carried_before( - dep_dim))) { - g.connect(j, i); - connected = true; - break; - } - if (connected) - break; - } - if (connected) - break; - } - } - - std::vector<std::set<int> > s = g.topoSort(); - if (s.size() != g.vertex.size()) - throw loop_error( - "cannot separate statements with different loop types at loop level " - + to_string(level)); - - // assign lexical order - int order = starting_order; - for (int i = 0; i < s.size(); i++) { - std::set<int> &cur_scc = g.vertex[*(s[i].begin())].first; - int sz = cur_scc.size(); - if (sz == 1) { - int cur_stmt = *(cur_scc.begin()); - assign_const(stmt[cur_stmt].xform, dim, order); - for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) - assign_const(stmt[cur_stmt].xform, j, 0); - order++; - } else { - setLexicalOrder(dim, cur_scc, order); - order += sz; - } - } - } - // set lexical order seperating single iteration statements and loops - else { - std::set<int> true_singles; - std::set<int> nonsingles; - std::map<coef_t, std::set<int> > fake_singles; - - // sort out statements that do not require loops - for (std::set<int>::iterator i = active.begin(); i != active.end(); - i++) { - Relation cur_IS = getNewIS(*i); - if (is_single_iteration(cur_IS, dim + 1)) { - bool is_all_single = true; - for (int j = dim + 3; j < stmt[*i].xform.n_out(); j += 2) - if (!is_single_iteration(cur_IS, j)) { - is_all_single = false; - break; - } - if (is_all_single) - true_singles.insert(*i); - else { - try { - fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert( - *i); - } catch (const std::exception &e) { - fake_singles[posInfinity].insert(*i); - } - } - } else - nonsingles.insert(*i); - } - - // split nonsingles forcibly according to negative dependences present (loop unfusible) - int dep_dim = get_dep_dim_of(ref_stmt_num, level); - Graph<int, Empty> g2; - for (std::set<int>::iterator i = nonsingles.begin(); - i != nonsingles.end(); i++) - g2.insert(*i); - for (int i = 0; i < g2.vertex.size(); i++) - for (int j = i + 1; j < g2.vertex.size(); j++) { - std::vector<DependenceVector> dvs = dep.getEdge( - g2.vertex[i].first, g2.vertex[j].first); - for (int k = 0; k < dvs.size(); k++) - if (dvs[k].is_control_dependence() - || (dvs[k].is_data_dependence() - && dvs[k].has_negative_been_carried_at( - dep_dim))) { - g2.connect(i, j); - break; - } - dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first); - for (int k = 0; k < dvs.size(); k++) - if (dvs[k].is_control_dependence() - || (dvs[k].is_data_dependence() - && dvs[k].has_negative_been_carried_at( - dep_dim))) { - g2.connect(j, i); - break; - } - } - - std::vector<std::set<int> > s2 = g2.packed_topoSort(); - - std::vector<std::set<int> > splitted_nonsingles; - for (int i = 0; i < s2.size(); i++) { - std::set<int> cur_scc; - for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); - j++) - cur_scc.insert(g2.vertex[*j].first); - splitted_nonsingles.push_back(cur_scc); - } - - // convert to dependence graph for grouped statements - dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; - Graph<std::set<int>, Empty> g; - for (std::set<int>::iterator i = true_singles.begin(); - i != true_singles.end(); i++) { - std::set<int> t; - t.insert(*i); - g.insert(t); - } - for (int i = 0; i < splitted_nonsingles.size(); i++) { - g.insert(splitted_nonsingles[i]); - } - for (std::map<coef_t, std::set<int> >::iterator i = - fake_singles.begin(); i != fake_singles.end(); i++) - g.insert((*i).second); - - for (int i = 0; i < g.vertex.size(); i++) - for (int j = i + 1; j < g.vertex.size(); j++) { - bool connected = false; - for (std::set<int>::iterator ii = g.vertex[i].first.begin(); - ii != g.vertex[i].first.end(); ii++) { - for (std::set<int>::iterator jj = g.vertex[j].first.begin(); - jj != g.vertex[j].first.end(); jj++) { - std::vector<DependenceVector> dvs = dep.getEdge(*ii, - *jj); - for (int k = 0; k < dvs.size(); k++) - if (dvs[k].is_control_dependence() - || (dvs[k].is_data_dependence() - && !dvs[k].has_been_carried_before( - dep_dim))) { - g.connect(i, j); - connected = true; - break; - } - if (connected) - break; - } - if (connected) - break; - } - connected = false; - for (std::set<int>::iterator ii = g.vertex[i].first.begin(); - ii != g.vertex[i].first.end(); ii++) { - for (std::set<int>::iterator jj = g.vertex[j].first.begin(); - jj != g.vertex[j].first.end(); jj++) { - std::vector<DependenceVector> dvs = dep.getEdge(*jj, - *ii); - for (int k = 0; k < dvs.size(); k++) - if (dvs[k].is_control_dependence() - || (dvs[k].is_data_dependence() - && !dvs[k].has_been_carried_before( - dep_dim))) { - g.connect(j, i); - connected = true; - break; - } - if (connected) - break; - } - if (connected) - break; - } - } - - // topological sort according to chun's permute algorithm - std::vector<std::set<int> > s = g.topoSort(); - - // assign lexical order - int order = starting_order; - for (int i = 0; i < s.size(); i++) { - // translate each SCC into original statements - std::set<int> cur_scc; - for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) - copy(g.vertex[*j].first.begin(), g.vertex[*j].first.end(), - inserter(cur_scc, cur_scc.begin())); - - // now assign the constant - for (std::set<int>::iterator j = cur_scc.begin(); - j != cur_scc.end(); j++) - assign_const(stmt[*j].xform, dim, order); - - if (cur_scc.size() > 1) - setLexicalOrder(dim + 2, cur_scc); - else if (cur_scc.size() == 1) { - int cur_stmt = *(cur_scc.begin()); - for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) - assign_const(stmt[cur_stmt].xform, j, 0); - } - - if (cur_scc.size() > 0) - order++; - } - } -} - -void Loop::apply_xform() { - std::set<int> active; - for (int i = 0; i < stmt.size(); i++) - active.insert(i); - apply_xform(active); -} - -void Loop::apply_xform(int stmt_num) { - std::set<int> active; - active.insert(stmt_num); - apply_xform(active); -} - -void Loop::apply_xform(std::set<int> &active) { - int max_n = 0; - - CG_outputBuilder *ocg = ir->builder(); - for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { - int n = stmt[*i].loop_level.size(); - if (n > max_n) - max_n = n; - - std::vector<int> lex = getLexicalOrder(*i); - - Relation mapping(2 * n + 1, n); - F_And *f_root = mapping.add_and(); - for (int j = 1; j <= n; j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.output_var(j), 1); - h.update_coef(mapping.input_var(2 * j), -1); - } - mapping = Composition(mapping, stmt[*i].xform); - mapping.simplify(); - - // match omega input/output variables to variable names in the code - for (int j = 1; j <= stmt[*i].IS.n_set(); j++) - mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); - for (int j = 1; j <= n; j++) - mapping.name_output_var(j, - tmp_loop_var_name_prefix - + to_string(tmp_loop_var_name_counter + j - 1)); - mapping.setup_names(); - - Relation known = Extend_Set(copy(this->known), - mapping.n_out() - this->known.n_set()); - //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL)); - stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, - std::vector<CG_outputRepr *>(mapping.n_out())); - stmt[*i].IS = Range(Restrict_Domain(mapping, stmt[*i].IS)); - stmt[*i].IS.simplify(); - - // replace original transformation relation with straight 1-1 mapping - mapping = Relation(n, 2 * n + 1); - f_root = mapping.add_and(); - for (int j = 1; j <= n; j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.output_var(2 * j), 1); - h.update_coef(mapping.input_var(j), -1); - } - for (int j = 1; j <= 2 * n + 1; j += 2) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.output_var(j), 1); - h.update_const(-lex[j - 1]); - } - stmt[*i].xform = mapping; - } - - tmp_loop_var_name_counter += max_n; -} - -void Loop::addKnown(const Relation &cond) { - int n1 = this->known.n_set(); - - Relation r = copy(cond); - int n2 = r.n_set(); - - if (n1 < n2) - this->known = Extend_Set(this->known, n2 - n1); - else if (n1 > n2) - r = Extend_Set(r, n1 - n2); - - this->known = Intersection(this->known, r); -} - -bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { - if (stmt.size() == 0) - return true; - - // check for sanity of parameters - for (int i = 0; i < stmt.size(); i++) { - if (stmt[i].loop_level.size() != num_dep_dim) - throw std::invalid_argument( - "nonsingular loop transformations must be applied to original perfect loop nest"); - for (int j = 0; j < stmt[i].loop_level.size(); j++) - if (stmt[i].loop_level[j].type != LoopLevelOriginal) - throw std::invalid_argument( - "nonsingular loop transformations must be applied to original perfect loop nest"); - } - if (T.size() != num_dep_dim) - throw std::invalid_argument("invalid transformation matrix"); - for (int i = 0; i < stmt.size(); i++) - if (T[i].size() != num_dep_dim + 1 && T[i].size() != num_dep_dim) - throw std::invalid_argument("invalid transformation matrix"); - - // build relation from matrix - Relation mapping(2 * num_dep_dim + 1, 2 * num_dep_dim + 1); - F_And *f_root = mapping.add_and(); - for (int i = 0; i < num_dep_dim; i++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.output_var(2 * (i + 1)), -1); - for (int j = 0; j < num_dep_dim; j++) - if (T[i][j] != 0) - h.update_coef(mapping.input_var(2 * (j + 1)), T[i][j]); - if (T[i].size() == num_dep_dim + 1) - h.update_const(T[i][num_dep_dim]); - } - for (int i = 1; i <= 2 * num_dep_dim + 1; i += 2) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.output_var(i), -1); - h.update_coef(mapping.input_var(i), 1); - } - - // update transformation relations - for (int i = 0; i < stmt.size(); i++) - stmt[i].xform = Composition(copy(mapping), stmt[i].xform); - - // update dependence graph - for (int i = 0; i < dep.vertex.size(); i++) - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); - j++) { - std::vector<DependenceVector> dvs = j->second; - for (int k = 0; k < dvs.size(); k++) { - DependenceVector &dv = dvs[k]; - switch (dv.type) { - case DEP_W2R: - case DEP_R2W: - case DEP_W2W: - case DEP_R2R: { - std::vector<coef_t> lbounds(num_dep_dim), ubounds( - num_dep_dim); - for (int p = 0; p < num_dep_dim; p++) { - coef_t lb = 0; - coef_t ub = 0; - for (int q = 0; q < num_dep_dim; q++) { - if (T[p][q] > 0) { - if (lb == -posInfinity - || dv.lbounds[q] == -posInfinity) - lb = -posInfinity; - else - lb += T[p][q] * dv.lbounds[q]; - if (ub == posInfinity - || dv.ubounds[q] == posInfinity) - ub = posInfinity; - else - ub += T[p][q] * dv.ubounds[q]; - } else if (T[p][q] < 0) { - if (lb == -posInfinity - || dv.ubounds[q] == posInfinity) - lb = -posInfinity; - else - lb += T[p][q] * dv.ubounds[q]; - if (ub == posInfinity - || dv.lbounds[q] == -posInfinity) - ub = posInfinity; - else - ub += T[p][q] * dv.lbounds[q]; - } - } - if (T[p].size() == num_dep_dim + 1) { - if (lb != -posInfinity) - lb += T[p][num_dep_dim]; - if (ub != posInfinity) - ub += T[p][num_dep_dim]; - } - lbounds[p] = lb; - ubounds[p] = ub; - } - dv.lbounds = lbounds; - dv.ubounds = ubounds; - - break; - } - default: - ; - } - } - j->second = dvs; - } - - // set constant loop values - std::set<int> active; - for (int i = 0; i < stmt.size(); i++) - active.insert(i); - setLexicalOrder(0, active); - - return true; -} - -void Loop::skew(const std::set<int> &stmt_nums, int level, - const std::vector<int> &skew_amount) { - if (stmt_nums.size() == 0) - return; - - // check for sanity of parameters - int ref_stmt_num = *(stmt_nums.begin()); - std::vector<std::set<int> > array_of_deps; - for (std::set<int>::const_iterator i = stmt_nums.begin(); - i != stmt_nums.end(); i++) { - if (*i < 0 || *i >= stmt.size()) - throw std::invalid_argument( - "invalid statement number " + to_string(*i)); - if (level < 1 || level > stmt[*i].loop_level.size()) - throw std::invalid_argument( - "invalid loop level " + to_string(level)); - for (int j = stmt[*i].loop_level.size(); j < skew_amount.size(); j++) - if (skew_amount[j] != 0) - throw std::invalid_argument("invalid skewing formula"); - } - - // set trasformation relations - for (std::set<int>::const_iterator i = stmt_nums.begin(); - i != stmt_nums.end(); i++) { - int n = stmt[*i].xform.n_out(); - Relation r(n, n); - F_And *f_root = r.add_and(); - for (int j = 1; j <= n; j++) - if (j != 2 * level) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(r.input_var(j), 1); - h.update_coef(r.output_var(j), -1); - } - EQ_Handle h = f_root->add_EQ(); - h.update_coef(r.output_var(2 * level), -1); - for (int j = 0; j < skew_amount.size(); j++) - if (skew_amount[j] != 0) - h.update_coef(r.input_var(2 * (j + 1)), skew_amount[j]); - - stmt[*i].xform = Composition(r, stmt[*i].xform); - stmt[*i].xform.simplify(); - applyXform(*i); - std::set<int> dont_consider; - //} - - // update dependence graph - if (stmt[ref_stmt_num].loop_level[level - 1].type - == LoopLevelOriginal) { - int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; - for (std::set<int>::const_iterator i = stmt_nums.begin(); - i != stmt_nums.end(); i++) - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[*i].second.begin(); - j != dep.vertex[*i].second.end(); j++) - if (stmt_nums.find(j->first) != stmt_nums.end()) { - // dependence between skewed statements - std::vector<DependenceVector> dvs = j->second; - for (int k = 0; k < dvs.size(); k++) { - DependenceVector &dv = dvs[k]; - if (dv.is_data_dependence()) { - coef_t lb = 0; - coef_t ub = 0; - for (int kk = 0; kk < skew_amount.size(); - kk++) { - int cur_dep_dim = get_dep_dim_of(*i, - kk + 1); - if (skew_amount[kk] > 0) { - if (lb != -posInfinity - && stmt[*i].loop_level[kk].type - == LoopLevelOriginal - && dv.lbounds[cur_dep_dim] - != -posInfinity) - lb += skew_amount[kk] - * dv.lbounds[cur_dep_dim]; - else { - if (cur_dep_dim != -1 - && !(dv.lbounds[cur_dep_dim] - == 0 - && dv.ubounds[cur_dep_dim] - == 0)) - lb = -posInfinity; - } - if (ub != posInfinity - && stmt[*i].loop_level[kk].type - == LoopLevelOriginal - && dv.ubounds[cur_dep_dim] - != posInfinity) - ub += skew_amount[kk] - * dv.ubounds[cur_dep_dim]; - else { - if (cur_dep_dim != -1 - && !(dv.lbounds[cur_dep_dim] - == 0 - && dv.ubounds[cur_dep_dim] - == 0)) - ub = posInfinity; - } - } else if (skew_amount[kk] < 0) { - if (lb != -posInfinity - && stmt[*i].loop_level[kk].type - == LoopLevelOriginal - && dv.ubounds[cur_dep_dim] - != posInfinity) - lb += skew_amount[kk] - * dv.ubounds[cur_dep_dim]; - else { - if (cur_dep_dim != -1 - && !(dv.lbounds[cur_dep_dim] - == 0 - && dv.ubounds[cur_dep_dim] - == 0)) - lb = -posInfinity; - } - if (ub != posInfinity - && stmt[*i].loop_level[kk].type - == LoopLevelOriginal - && dv.lbounds[cur_dep_dim] - != -posInfinity) - ub += skew_amount[kk] - * dv.lbounds[cur_dep_dim]; - else { - if (cur_dep_dim != -1 - && !(dv.lbounds[cur_dep_dim] - == 0 - && dv.ubounds[cur_dep_dim] - == 0)) - ub = posInfinity; - } - } - } - if ((dv.isCarried(dep_dim) - && dv.hasPositive(dep_dim)) && dv.quasi) - dv.quasi = false; - - if ((dv.isCarried(dep_dim) - && dv.hasNegative(dep_dim)) - && !dv.quasi) - throw loop_error( - "loop error: Skewing is illegal, dependence violation!"); - dv.lbounds[dep_dim] = lb; - dv.ubounds[dep_dim] = ub; - if ((dv.isCarried(dep_dim) - && dv.hasPositive(dep_dim)) && dv.quasi) - dv.quasi = false; - - if ((dv.isCarried(dep_dim) - && dv.hasNegative(dep_dim)) - && !dv.quasi) - throw loop_error( - "loop error: Skewing is illegal, dependence violation!"); - } - } - - j->second = dvs; - } - } else { - // dependence from skewed statement to unskewed statement becomes jumbled, - // put distance value at skewed dimension to unknown - /*std::vector<DependenceVector> dvs = j->second; - for (int k = 0; k < dvs.size(); k++) { - DependenceVector &dv = dvs[k]; - if (dv.is_data_dependence()) { - dv.lbounds[dep_dim] = -posInfinity; - dv.ubounds[dep_dim] = posInfinity; - } - } - j->second = dvs; - */ - dont_consider.insert(j->first); - } - for (int l = 0; l < dep.vertex.size(); l++) - if (stmt_nums.find(l) == stmt_nums.end()) - if (dont_consider.find(l) == stmt_nums.end() - && (dep.vertex[l].second.find(*i) - != dep.vertex[l].second.end())) - dont_consider.insert(l); - array_of_deps.push_back(dont_consider); - } - /*for (int i = 0; i < dep.vertex.size(); i++) - if (stmt_nums.find(i) == stmt_nums.end()) - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); - j != dep.vertex[i].second.end(); j++) - if (stmt_nums.find(j->first) != stmt_nums.end()) { - // dependence from unskewed statement to skewed statement becomes jumbled, - // put distance value at skewed dimension to unknown - std::vector<DependenceVector> dvs = j->second; - for (int k = 0; k < dvs.size(); k++) { - DependenceVector &dv = dvs[k]; - if (dv.is_data_dependence()) { - dv.lbounds[dep_dim] = -posInfinity; - dv.ubounds[dep_dim] = posInfinity; - } - } - j->second = dvs; - } - }*/ - std::set<int>::const_iterator w = stmt_nums.begin(); - for (int i = 0; i < array_of_deps.size() && w != stmt_nums.end(); i++) - for (std::set<int>::const_iterator j = array_of_deps[i].begin(); - j != array_of_deps[i].end(); j++) { - if (dep.vertex[*w].second.find(*j) != dep.vertex[*w].second.end()) - dep.disconnect(*w, *j); - if (dep.vertex[*j].second.find(*w) != dep.vertex[*j].second.end()) - dep.disconnect(*j, *w); - int x, y; - std::pair<std::vector<DependenceVector>, - std::vector<DependenceVector> > dv_s; - if ((*w) <= (*j)) { - x = *w; - y = *j; - - dv_s = test_data_dependences(ir_, stmt[x].code, stmt[x].IS, - stmt[y].code, stmt[y].IS, freevar, index, x, y); - } else { - x = *j; - y = *w; - dv_s = test_data_dependences(ir_, stmt[y].code, stmt[y].IS, - stmt[x].code, stmt[x].IS, freevar, index, x, y); - } - for (int k = 0; k < dv_s.first.size(); k++) { - if (is_dependence_valid_based_on_lex_order(x, y, dv_s.first[k], - true)) - dep.connect(x, y, dv_s.first[k]); - else - dep.connect(y, x, dv_s.first[k].reverse()); - } - for (int k = 0; k < dv_s.second.size(); k++) { - if (is_dependence_valid_based_on_lex_order(x, y, dv_s.second[k], - false)) - dep.connect(y, x, dv_s.second[k]); - else - dep.connect(x, y, dv_s.second[k].reverse()); - } - w++; - } -} - -void Loop::shift(const std::set<int> &stmt_nums, int level, int shift_amount) { - if (stmt_nums.size() == 0) - return; - - // check for sanity of parameters - int ref_stmt_num = *(stmt_nums.begin()); - for (std::set<int>::const_iterator i = stmt_nums.begin(); - i != stmt_nums.end(); i++) { - if (*i < 0 || *i >= stmt.size()) - throw std::invalid_argument( - "invalid statement number " + to_string(*i)); - if (level < 1 || level > stmt[*i].loop_level.size()) - throw std::invalid_argument( - "invalid loop level " + to_string(level)); - } - - // do nothing - if (shift_amount == 0) - return; - - // set trasformation relations - for (std::set<int>::const_iterator i = stmt_nums.begin(); - i != stmt_nums.end(); i++) { - int n = stmt[*i].xform.n_out(); - - Relation r(n, n); - F_And *f_root = r.add_and(); - for (int j = 1; j <= n; j++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(r.input_var(j), 1); - h.update_coef(r.output_var(j), -1); - if (j == 2 * level) - h.update_const(shift_amount); - } - - stmt[*i].xform = Composition(r, stmt[*i].xform); - stmt[*i].xform.simplify(); - } - - // update dependence graph - if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { - int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; - for (std::set<int>::const_iterator i = stmt_nums.begin(); - i != stmt_nums.end(); i++) - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[*i].second.begin(); - j != dep.vertex[*i].second.end(); j++) - if (stmt_nums.find(j->first) == stmt_nums.end()) { - // dependence from shifted statement to unshifted statement - std::vector<DependenceVector> dvs = j->second; - for (int k = 0; k < dvs.size(); k++) { - DependenceVector &dv = dvs[k]; - if (dv.is_data_dependence()) { - if (dv.lbounds[dep_dim] != -posInfinity) - dv.lbounds[dep_dim] -= shift_amount; - if (dv.ubounds[dep_dim] != posInfinity) - dv.ubounds[dep_dim] -= shift_amount; - } - } - j->second = dvs; - } - for (int i = 0; i < dep.vertex.size(); i++) - if (stmt_nums.find(i) == stmt_nums.end()) - for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); - j != dep.vertex[i].second.end(); j++) - if (stmt_nums.find(j->first) != stmt_nums.end()) { - // dependence from unshifted statement to shifted statement - std::vector<DependenceVector> dvs = j->second; - for (int k = 0; k < dvs.size(); k++) { - DependenceVector &dv = dvs[k]; - if (dv.is_data_dependence()) { - if (dv.lbounds[dep_dim] != -posInfinity) - dv.lbounds[dep_dim] += shift_amount; - if (dv.ubounds[dep_dim] != posInfinity) - dv.ubounds[dep_dim] += shift_amount; - } - } - j->second = dvs; - } - } -} - -// bool Loop::fuse(const std::set<int> &stmt_nums, int level) { -// if (stmt_nums.size() == 0 || stmt_nums.size() == 1) -// return true; -// int dim = 2*level-1; - -// // check for sanity of parameters -// std::vector<int> ref_lex; -// for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { -// if (*i < 0 || *i >= stmt.size()) -// throw std::invalid_argument("invalid statement number " + to_string(*i)); -// if (level < 1 || level > (stmt[*i].xform.n_out()-1)/2) -// throw std::invalid_argument("invalid loop level " + to_string(level)); -// if (ref_lex.size() == 0) -// ref_lex = getLexicalOrder(*i); -// else { -// std::vector<int> lex = getLexicalOrder(*i); -// for (int j = 0; j < dim-1; j+=2) -// if (lex[j] != ref_lex[j]) -// throw std::invalid_argument("statements for fusion must be in the same level-" + to_string(level-1) + " subloop"); -// } -// } - -// // collect lexicographical order values from to-be-fused statements -// std::set<int> lex_values; -// for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { -// std::vector<int> lex = getLexicalOrder(*i); -// lex_values.insert(lex[dim-1]); -// } -// if (lex_values.size() == 1) -// return true; - -// // negative dependence would prevent fusion -// int dep_dim = xform_index[dim].first; -// for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); i++) { -// ref_lex[dim-1] = *i; -// std::set<int> a = getStatements(ref_lex, dim-1); -// std::set<int>::iterator j = i; -// j++; -// for (; j != lex_values.end(); j++) { -// ref_lex[dim-1] = *j; -// std::set<int> b = getStatements(ref_lex, dim-1); -// for (std::set<int>::iterator ii = a.begin(); ii != a.end(); ii++) -// for (std::set<int>::iterator jj = b.begin(); jj != b.end(); jj++) { -// std::vector<DependenceVector> dvs; -// dvs = dep.getEdge(*ii, *jj); -// for (int k = 0; k < dvs.size(); k++) -// if (dvs[k].isCarried(dep_dim) && dvs[k].hasNegative(dep_dim)) -// throw loop_error("loop error: statements " + to_string(*ii) + " and " + to_string(*jj) + " cannot be fused together due to negative dependence"); -// dvs = dep.getEdge(*jj, *ii); -// for (int k = 0; k < dvs.size(); k++) -// if (dvs[k].isCarried(dep_dim) && dvs[k].hasNegative(dep_dim)) -// throw loop_error("loop error: statements " + to_string(*jj) + " and " + to_string(*ii) + " cannot be fused together due to negative dependence"); -// } -// } -// } - -// // collect all other lexicographical order values from the subloop -// // enclosing these to-be-fused loops -// std::set<int> same_loop = getStatements(ref_lex, dim-3); -// std::set<int> other_lex_values; -// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { -// std::vector<int> lex = getLexicalOrder(*i); -// if (lex_values.find(lex[dim-1]) == lex_values.end()) -// other_lex_values.insert(lex[dim-1]); -// } - -// // update to-be-fused loops due to dependence cycle -// Graph<std::set<int>, Empty> g; -// { -// std::set<int> t; -// for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); i++) { -// ref_lex[dim-1] = *i; -// std::set<int> t2 = getStatements(ref_lex, dim-1); -// std::set_union(t.begin(), t.end(), t2.begin(), t2.end(), inserter(t, t.begin())); -// } -// g.insert(t); -// } -// for (std::set<int>::iterator i = other_lex_values.begin(); i != other_lex_values.end(); i++) { -// ref_lex[dim-1] = *i; -// std::set<int> t = getStatements(ref_lex, dim-1); -// g.insert(t); -// } -// for (int i = 0; i < g.vertex.size(); i++) -// for (int j = i+1; j < g.vertex.size(); j++) -// for (std::set<int>::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) -// for (std::set<int>::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { -// std::vector<DependenceVector> dvs; -// dvs = dep.getEdge(*ii, *jj); -// for (int k = 0; k < dvs.size(); k++) -// if (dvs[k].isCarried(dep_dim)) { -// g.connect(i, j); -// break; -// } -// dvs = dep.getEdge(*jj, *ii); -// for (int k = 0; k < dvs.size(); k++) -// if (dvs[k].isCarried(dep_dim)) { -// g.connect(j, i); -// break; -// } -// } -// std::vector<std::set<int> > s = g.topoSort(); -// int fused_lex_value = 0; -// for (int i = 0; i < s.size(); i++) -// if (s[i].find(0) != s[i].end()) { -// // now add additional lexicographical order values -// for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) -// if (*j != 0) { -// int stmt = *(g.vertex[*j].first.begin()); -// std::vector<int> lex = getLexicalOrder(stmt); -// lex_values.insert(lex[dim-1]); -// } - -// if (s.size() > 1) { -// if (i == 0) { -// int min_lex_value; -// for (std::set<int>::iterator j = s[i+1].begin(); j != s[i+1].end(); j++) { -// int stmt = *(g.vertex[*j].first.begin()); -// std::vector<int> lex = getLexicalOrder(stmt); -// if (j == s[i+1].begin()) -// min_lex_value = lex[dim-1]; -// else if (lex[dim-1] < min_lex_value) -// min_lex_value = lex[dim-1]; -// } -// fused_lex_value = min_lex_value - 1; -// } -// else { -// int max_lex_value; -// for (std::set<int>::iterator j = s[i-1].begin(); j != s[i-1].end(); j++) { -// int stmt = *(g.vertex[*j].first.begin()); -// std::vector<int> lex = getLexicalOrder(stmt); -// if (j == s[i-1].begin()) -// max_lex_value = lex[dim-1]; -// else if (lex[dim-1] > max_lex_value) -// max_lex_value = lex[dim-1]; -// } -// fused_lex_value = max_lex_value + 1; -// } -// } - -// break; -// } - -// // sort the newly updated to-be-fused lexicographical order values -// std::vector<int> ordered_lex_values; -// for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); i++) -// ordered_lex_values.push_back(*i); -// std::sort(ordered_lex_values.begin(), ordered_lex_values.end()); - -// // make sure internal loops inside to-be-fused loops have the same -// // lexicographical order before and after fusion -// std::vector<std::pair<int, int> > inside_lex_range(ordered_lex_values.size()); -// for (int i = 0; i < ordered_lex_values.size(); i++) { -// ref_lex[dim-1] = ordered_lex_values[i]; -// std::set<int> the_stmts = getStatements(ref_lex, dim-1); -// std::set<int>::iterator j = the_stmts.begin(); -// std::vector<int> lex = getLexicalOrder(*j); -// int min_inside_lex_value = lex[dim+1]; -// int max_inside_lex_value = lex[dim+1]; -// j++; -// for (; j != the_stmts.end(); j++) { -// std::vector<int> lex = getLexicalOrder(*j); -// if (lex[dim+1] < min_inside_lex_value) -// min_inside_lex_value = lex[dim+1]; -// if (lex[dim+1] > max_inside_lex_value) -// max_inside_lex_value = lex[dim+1]; -// } -// inside_lex_range[i].first = min_inside_lex_value; -// inside_lex_range[i].second = max_inside_lex_value; -// } -// for (int i = 1; i < ordered_lex_values.size(); i++) -// if (inside_lex_range[i].first <= inside_lex_range[i-1].second) { -// int shift_lex_value = inside_lex_range[i-1].second - inside_lex_range[i].first + 1; -// ref_lex[dim-1] = ordered_lex_values[i]; -// ref_lex[dim+1] = inside_lex_range[i].first; -// shiftLexicalOrder(ref_lex, dim+1, shift_lex_value); -// inside_lex_range[i].first += shift_lex_value; -// inside_lex_range[i].second += shift_lex_value; -// } - -// // set lexicographical order for fused loops -// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { -// std::vector<int> lex = getLexicalOrder(*i); -// if (lex_values.find(lex[dim-1]) != lex_values.end()) -// assign_const(stmt[*i].xform, dim-1, fused_lex_value); -// } - -// // no need to update dependence graph -// ; - -// return true; -// } - -// bool Loop::distribute(const std::set<int> &stmt_nums, int level) { -// if (stmt_nums.size() == 0 || stmt_nums.size() == 1) -// return true; -// int dim = 2*level-1; - -// // check for sanity of parameters -// std::vector<int> ref_lex; -// for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { -// if (*i < 0 || *i >= stmt.size()) -// throw std::invalid_argument("invalid statement number " + to_string(*i)); -// if (level < 1 || level > (stmt[*i].xform.n_out()-1)/2) -// throw std::invalid_argument("invalid loop level " + to_string(level)); -// if (ref_lex.size() == 0) -// ref_lex = getLexicalOrder(*i); -// else { -// std::vector<int> lex = getLexicalOrder(*i); -// for (int j = 0; j <= dim-1; j+=2) -// if (lex[j] != ref_lex[j]) -// throw std::invalid_argument("statements for distribution must be in the same level-" + to_string(level) + " subloop"); -// } -// } - -// // find SCC in the to-be-distributed loop -// int dep_dim = xform_index[dim].first; -// std::set<int> same_loop = getStatements(ref_lex, dim-1); -// Graph<int, Empty> g; -// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) -// g.insert(*i); -// for (int i = 0; i < g.vertex.size(); i++) -// for (int j = i+1; j < g.vertex.size(); j++) { -// std::vector<DependenceVector> dvs; -// dvs = dep.getEdge(g.vertex[i].first, g.vertex[j].first); -// for (int k = 0; k < dvs.size(); k++) -// if (dvs[k].isCarried(dep_dim)) { -// g.connect(i, j); -// break; -// } -// dvs = dep.getEdge(g.vertex[j].first, g.vertex[i].first); -// for (int k = 0; k < dvs.size(); k++) -// if (dvs[k].isCarried(dep_dim)) { -// g.connect(j, i); -// break; -// } -// } -// std::vector<std::set<int> > s = g.topoSort(); - -// // find statements that cannot be distributed due to dependence cycle -// Graph<std::set<int>, Empty> g2; -// for (int i = 0; i < s.size(); i++) { -// std::set<int> t; -// for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) -// if (stmt_nums.find(g.vertex[*j].first) != stmt_nums.end()) -// t.insert(g.vertex[*j].first); -// if (!t.empty()) -// g2.insert(t); -// } -// for (int i = 0; i < g2.vertex.size(); i++) -// for (int j = i+1; j < g2.vertex.size(); j++) -// for (std::set<int>::iterator ii = g2.vertex[i].first.begin(); ii != g2.vertex[i].first.end(); ii++) -// for (std::set<int>::iterator jj = g2.vertex[j].first.begin(); jj != g2.vertex[j].first.end(); jj++) { -// std::vector<DependenceVector> dvs; -// dvs = dep.getEdge(*ii, *jj); -// for (int k = 0; k < dvs.size(); k++) -// if (dvs[k].isCarried(dep_dim)) { -// g2.connect(i, j); -// break; -// } -// dvs = dep.getEdge(*jj, *ii); -// for (int k = 0; k < dvs.size(); k++) -// if (dvs[k].isCarried(dep_dim)) { -// g2.connect(j, i); -// break; -// } -// } -// std::vector<std::set<int> > s2 = g2.topoSort(); - -// // nothing to distribute -// if (s2.size() == 1) -// throw loop_error("loop error: no statement can be distributed due to dependence cycle"); - -// std::vector<std::set<int> > s3; -// for (int i = 0; i < s2.size(); i++) { -// std::set<int> t; -// for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) -// std::set_union(t.begin(), t.end(), g2.vertex[*j].first.begin(), g2.vertex[*j].first.end(), inserter(t, t.begin())); -// s3.push_back(t); -// } - -// // associate other affected statements with the right distributed statements -// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) -// if (stmt_nums.find(*i) == stmt_nums.end()) { -// bool is_inserted = false; -// int potential_insertion_point = 0; -// for (int j = 0; j < s3.size(); j++) { -// for (std::set<int>::iterator k = s3[j].begin(); k != s3[j].end(); k++) { -// std::vector<DependenceVector> dvs; -// dvs = dep.getEdge(*i, *k); -// for (int kk = 0; kk < dvs.size(); kk++) -// if (dvs[kk].isCarried(dep_dim)) { -// s3[j].insert(*i); -// is_inserted = true; -// break; -// } -// dvs = dep.getEdge(*k, *i); -// for (int kk = 0; kk < dvs.size(); kk++) -// if (dvs[kk].isCarried(dep_dim)) -// potential_insertion_point = j; -// } -// if (is_inserted) -// break; -// } - -// if (!is_inserted) -// s3[potential_insertion_point].insert(*i); -// } - -// // set lexicographical order after distribution -// int order = ref_lex[dim-1]; -// shiftLexicalOrder(ref_lex, dim-1, s3.size()-1); -// for (std::vector<std::set<int> >::iterator i = s3.begin(); i != s3.end(); i++) { -// for (std::set<int>::iterator j = (*i).begin(); j != (*i).end(); j++) -// assign_const(stmt[*j].xform, dim-1, order); -// order++; -// } - -// // no need to update dependence graph -// ; - -// return true; -// } - |