summaryrefslogtreecommitdiff
path: root/loop_unroll.cc
diff options
context:
space:
mode:
authorTuowen Zhao <ztuowen@gmail.com>2016-09-17 03:22:53 +0000
committerTuowen Zhao <ztuowen@gmail.com>2016-09-17 03:22:53 +0000
commit75ff98e4d65862ff5b36b533b4f6e3ea71ede1d5 (patch)
tree498ac06b4cf78568b807fafd2619856afff69c28 /loop_unroll.cc
parent29efa7b1a0d089e02a70f73f348f11878955287c (diff)
downloadchill-75ff98e4d65862ff5b36b533b4f6e3ea71ede1d5.tar.gz
chill-75ff98e4d65862ff5b36b533b4f6e3ea71ede1d5.tar.bz2
chill-75ff98e4d65862ff5b36b533b4f6e3ea71ede1d5.zip
cmake build
Diffstat (limited to 'loop_unroll.cc')
-rw-r--r--loop_unroll.cc1166
1 files changed, 0 insertions, 1166 deletions
diff --git a/loop_unroll.cc b/loop_unroll.cc
deleted file mode 100644
index b75b738..0000000
--- a/loop_unroll.cc
+++ /dev/null
@@ -1,1166 +0,0 @@
-/*
- * loop_unroll.cc
- *
- * Created on: Nov 12, 2012
- * Author: anand
- */
-
-#include <codegen.h>
-#include <code_gen/CG_utils.h>
-#include "loop.hh"
-#include "omegatools.hh"
-#include "ir_code.hh"
-#include "chill_error.hh"
-#include <math.h>
-
-using namespace omega;
-
-
-std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
- std::vector<std::vector<std::string> > idxNames,
- int cleanup_split_level) {
- // check for sanity of parameters
- // check for sanity of parameters
- if (unroll_amount < 0)
- throw std::invalid_argument(
- "invalid unroll amount " + to_string(unroll_amount));
- if (stmt_num < 0 || stmt_num >= stmt.size())
- throw std::invalid_argument("invalid statement " + to_string(stmt_num));
- if (level <= 0 || level > stmt[stmt_num].loop_level.size())
- throw std::invalid_argument("invalid loop level " + to_string(level));
-
- if (cleanup_split_level == 0)
- cleanup_split_level = level;
- if (cleanup_split_level > level)
- throw std::invalid_argument(
- "cleanup code must be split at or outside the unrolled loop level "
- + to_string(level));
- if (cleanup_split_level <= 0)
- throw std::invalid_argument(
- "invalid split loop level " + to_string(cleanup_split_level));
-
- // invalidate saved codegen computation
- delete last_compute_cgr_;
- last_compute_cgr_ = NULL;
- delete last_compute_cg_;
- last_compute_cg_ = NULL;
-
- int dim = 2 * level - 1;
- std::vector<int> lex = getLexicalOrder(stmt_num);
- std::set<int> same_loop = getStatements(lex, dim - 1);
-
- // nothing to do
- if (unroll_amount == 1)
- return std::set<int>();
-
- for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end();
- i++) {
- std::vector<std::pair<int, DependenceVector> > D;
- int n = stmt[*i].xform.n_out();
- for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end();
- j++) {
- if (same_loop.find(j->first) != same_loop.end())
- for (int k = 0; k < j->second.size(); k++) {
- DependenceVector dv = j->second[k];
- int dim2 = level - 1;
- if (dv.type != DEP_CONTROL) {
-
- while (stmt[*i].loop_level[dim2].type == LoopLevelTile) {
- dim2 = stmt[*i].loop_level[dim2].payload - 1;
- }
- dim2 = stmt[*i].loop_level[dim2].payload;
-
- /*if (dv.isCarried(dim2)
- && (dv.hasNegative(dim2) && !dv.quasi))
- throw loop_error(
- "loop error: Unrolling is illegal, dependence violation!");
-
- if (dv.isCarried(dim2)
- && (dv.hasPositive(dim2) && dv.quasi))
- throw loop_error(
- "loop error: Unrolling is illegal, dependence violation!");
- */
- bool safe = false;
-
- if (dv.isCarried(dim2) && dv.hasPositive(dim2)) {
- if (dv.quasi)
- throw loop_error(
- "loop error: a quasi dependence with a positive carried distance");
- if (!dv.quasi) {
- if (dv.lbounds[dim2] != posInfinity) {
- //if (dv.lbounds[dim2] != negInfinity)
- if (dv.lbounds[dim2] > unroll_amount)
- safe = true;
- } else
- safe = true;
- }/* else {
- if (dv.ubounds[dim2] != negInfinity) {
- if (dv.ubounds[dim2] != posInfinity)
- if ((-(dv.ubounds[dim2])) > unroll_amount)
- safe = true;
- } else
- safe = true;
- }*/
-
- if (!safe) {
- for (int l = level + 1; l <= (n - 1) / 2; l++) {
- int dim3 = l - 1;
-
- if (stmt[*i].loop_level[dim3].type
- != LoopLevelTile)
- dim3 =
- stmt[*i].loop_level[dim3].payload;
- else {
- while (stmt[*i].loop_level[dim3].type
- == LoopLevelTile) {
- dim3 =
- stmt[*i].loop_level[dim3].payload
- - 1;
- }
- dim3 =
- stmt[*i].loop_level[dim3].payload;
- }
-
- if (dim3 > dim2) {
-
- if (dv.hasPositive(dim3))
- break;
- else if (dv.hasNegative(dim3))
- throw loop_error(
- "loop error: Unrolling is illegal, dependence violation!");
- }
- }
- }
- }
- }
- }
- }
- }
- // extract the intersection of the iteration space to be considered
- Relation hull = Relation::True(level);
- apply_xform(same_loop);
- for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end();
- i++) {
- if (stmt[*i].IS.is_upper_bound_satisfiable()) {
- Relation mapping(stmt[*i].IS.n_set(), level);
- F_And *f_root = mapping.add_and();
- for (int j = 1; j <= level; j++) {
- EQ_Handle h = f_root->add_EQ();
- h.update_coef(mapping.input_var(j), 1);
- h.update_coef(mapping.output_var(j), -1);
- }
- hull = Intersection(hull,
- Range(Restrict_Domain(mapping, copy(stmt[*i].IS))));
- hull.simplify(2, 4);
-
- }
- }
- for (int i = 1; i <= level; i++) {
- std::string name = tmp_loop_var_name_prefix + to_string(i);
- hull.name_set_var(i, name);
- }
- hull.setup_names();
-
- // extract the exact loop bound of the dimension to be unrolled
- if (is_single_loop_iteration(hull, level, this->known))
- return std::set<int>();
- Relation bound = get_loop_bound(hull, level, this->known);
- if (!bound.has_single_conjunct() || !bound.is_satisfiable()
- || bound.is_tautology())
- throw loop_error("unable to extract loop bound for unrolling");
-
- // extract the loop stride
- coef_t stride;
- std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound,
- bound.set_var(level));
- if (result.second == NULL)
- stride = 1;
- else
- stride = abs(result.first.get_coef(result.second))
- / gcd(abs(result.first.get_coef(result.second)),
- abs(result.first.get_coef(bound.set_var(level))));
-
- // separate lower and upper bounds
- std::vector<GEQ_Handle> lb_list, ub_list;
- {
- Conjunct *c = bound.query_DNF()->single_conjunct();
- for (GEQ_Iterator gi(c->GEQs()); gi; gi++) {
- int coef = (*gi).get_coef(bound.set_var(level));
- if (coef < 0)
- ub_list.push_back(*gi);
- else if (coef > 0)
- lb_list.push_back(*gi);
- }
- }
-
- // simplify overflow expression for each pair of upper and lower bounds
- std::vector<std::vector<std::map<Variable_ID, int> > > overflow_table(
- lb_list.size(),
- std::vector<std::map<Variable_ID, int> >(ub_list.size(),
- std::map<Variable_ID, int>()));
- bool is_overflow_simplifiable = true;
- for (int i = 0; i < lb_list.size(); i++) {
- if (!is_overflow_simplifiable)
- break;
-
- for (int j = 0; j < ub_list.size(); j++) {
- // lower bound or upper bound has non-unit coefficient, can't simplify
- if (ub_list[j].get_coef(bound.set_var(level)) != -1
- || lb_list[i].get_coef(bound.set_var(level)) != 1) {
- is_overflow_simplifiable = false;
- break;
- }
-
- for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) {
- switch ((*ci).var->kind()) {
- case Input_Var: {
- if ((*ci).var != bound.set_var(level))
- overflow_table[i][j][(*ci).var] += (*ci).coef;
-
- break;
- }
- case Global_Var: {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = bound.get_local(g);
- else
- v = bound.get_local(g, (*ci).var->function_of());
- overflow_table[i][j][(*ci).var] += (*ci).coef;
- break;
- }
- default:
- throw loop_error("failed to calculate overflow amount");
- }
- }
- overflow_table[i][j][NULL] += ub_list[j].get_const();
-
- for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) {
- switch ((*ci).var->kind()) {
- case Input_Var: {
- if ((*ci).var != bound.set_var(level)) {
- overflow_table[i][j][(*ci).var] += (*ci).coef;
- if (overflow_table[i][j][(*ci).var] == 0)
- overflow_table[i][j].erase(
- overflow_table[i][j].find((*ci).var));
- }
- break;
- }
- case Global_Var: {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = bound.get_local(g);
- else
- v = bound.get_local(g, (*ci).var->function_of());
- overflow_table[i][j][(*ci).var] += (*ci).coef;
- if (overflow_table[i][j][(*ci).var] == 0)
- overflow_table[i][j].erase(
- overflow_table[i][j].find((*ci).var));
- break;
- }
- default:
- throw loop_error("failed to calculate overflow amount");
- }
- }
- overflow_table[i][j][NULL] += lb_list[i].get_const();
-
- overflow_table[i][j][NULL] += stride;
- if (unroll_amount == 0
- || (overflow_table[i][j].size() == 1
- && overflow_table[i][j][NULL] / stride
- < unroll_amount))
- unroll_amount = overflow_table[i][j][NULL] / stride;
- }
- }
-
- // loop iteration count can't be determined, bail out gracefully
- if (unroll_amount == 0)
- return std::set<int>();
-
- // further simply overflow calculation using coefficients' modular
- if (is_overflow_simplifiable) {
- for (int i = 0; i < lb_list.size(); i++)
- for (int j = 0; j < ub_list.size(); j++)
- if (stride == 1) {
- for (std::map<Variable_ID, int>::iterator k =
- overflow_table[i][j].begin();
- k != overflow_table[i][j].end();)
- if ((*k).first != NULL) {
- int t = int_mod_hat((*k).second, unroll_amount);
- if (t == 0) {
- overflow_table[i][j].erase(k++);
- } else {
- int t2 = hull.query_variable_mod((*k).first,
- unroll_amount);
- if (t2 != INT_MAX) {
- overflow_table[i][j][NULL] += t * t2;
- overflow_table[i][j].erase(k++);
- } else {
- (*k).second = t;
- k++;
- }
- }
- } else
- k++;
-
- overflow_table[i][j][NULL] = int_mod_hat(
- overflow_table[i][j][NULL], unroll_amount);
-
- // Since we don't have MODULO instruction in SUIF yet (only MOD), make all coef positive in the final formula
- for (std::map<Variable_ID, int>::iterator k =
- overflow_table[i][j].begin();
- k != overflow_table[i][j].end(); k++)
- if ((*k).second < 0)
- (*k).second += unroll_amount;
- }
- }
-
- // build overflow statement
- CG_outputBuilder *ocg = ir->builder();
- CG_outputRepr *overflow_code = NULL;
- Relation cond_upper(level), cond_lower(level);
- Relation overflow_constraint(0);
- F_And *overflow_constraint_root = overflow_constraint.add_and();
- std::vector<Free_Var_Decl *> over_var_list;
- if (is_overflow_simplifiable && lb_list.size() == 1) {
- for (int i = 0; i < ub_list.size(); i++) {
- if (overflow_table[0][i].size() == 1) {
- // upper splitting condition
- GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]);
- h.update_const(
- ((overflow_table[0][i][NULL] / stride) % unroll_amount)
- * -stride);
- } else {
- // upper splitting condition
- std::string over_name = overflow_var_name_prefix
- + to_string(overflow_var_name_counter++);
- Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name);
- over_var_list.push_back(over_free_var);
- GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]);
- h.update_coef(cond_upper.get_local(over_free_var), -stride);
-
- // insert constraint 0 <= overflow < unroll_amount
- Variable_ID v = overflow_constraint.get_local(over_free_var);
- GEQ_Handle h1 = overflow_constraint_root->add_GEQ();
- h1.update_coef(v, 1);
- GEQ_Handle h2 = overflow_constraint_root->add_GEQ();
- h2.update_coef(v, -1);
- h2.update_const(unroll_amount - 1);
-
- // create overflow assignment
- bound.setup_names(); // hack to fix omega relation variable names issue
- CG_outputRepr *rhs = NULL;
- bool is_split_illegal = false;
- for (std::map<Variable_ID, int>::iterator j =
- overflow_table[0][i].begin();
- j != overflow_table[0][i].end(); j++)
- if ((*j).first != NULL) {
- if ((*j).first->kind() == Input_Var
- && (*j).first->get_position()
- >= cleanup_split_level)
- is_split_illegal = true;
-
- CG_outputRepr *t = ocg->CreateIdent((*j).first->name());
- if ((*j).second != 1)
- t = ocg->CreateTimes(ocg->CreateInt((*j).second),
- t);
- rhs = ocg->CreatePlus(rhs, t);
- } else if ((*j).second != 0)
- rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second));
-
- if (is_split_illegal) {
- rhs->clear();
- delete rhs;
- throw loop_error(
- "cannot split cleanup code at loop level "
- + to_string(cleanup_split_level)
- + " due to overflow variable data dependence");
- }
-
- if (stride != 1)
- rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride));
- rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount));
-
- CG_outputRepr *lhs = ocg->CreateIdent(over_name);
- init_code = ocg->StmtListAppend(init_code,
- ocg->CreateAssignment(0, lhs, ocg->CreateInt(0)));
- lhs = ocg->CreateIdent(over_name);
- overflow_code = ocg->StmtListAppend(overflow_code,
- ocg->CreateAssignment(0, lhs, rhs));
- }
- }
-
- // lower splitting condition
- GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]);
- } else if (is_overflow_simplifiable && ub_list.size() == 1) {
- for (int i = 0; i < lb_list.size(); i++) {
-
- if (overflow_table[i][0].size() == 1) {
- // lower splitting condition
- GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]);
- h.update_const(overflow_table[i][0][NULL] * -stride);
- } else {
- // lower splitting condition
- std::string over_name = overflow_var_name_prefix
- + to_string(overflow_var_name_counter++);
- Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name);
- over_var_list.push_back(over_free_var);
- GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]);
- h.update_coef(cond_lower.get_local(over_free_var), -stride);
-
- // insert constraint 0 <= overflow < unroll_amount
- Variable_ID v = overflow_constraint.get_local(over_free_var);
- GEQ_Handle h1 = overflow_constraint_root->add_GEQ();
- h1.update_coef(v, 1);
- GEQ_Handle h2 = overflow_constraint_root->add_GEQ();
- h2.update_coef(v, -1);
- h2.update_const(unroll_amount - 1);
-
- // create overflow assignment
- bound.setup_names(); // hack to fix omega relation variable names issue
- CG_outputRepr *rhs = NULL;
- for (std::map<Variable_ID, int>::iterator j =
- overflow_table[0][i].begin();
- j != overflow_table[0][i].end(); j++)
- if ((*j).first != NULL) {
- CG_outputRepr *t = ocg->CreateIdent((*j).first->name());
- if ((*j).second != 1)
- t = ocg->CreateTimes(ocg->CreateInt((*j).second),
- t);
- rhs = ocg->CreatePlus(rhs, t);
- } else if ((*j).second != 0)
- rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second));
-
- if (stride != 1)
- rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride));
- rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount));
-
- CG_outputRepr *lhs = ocg->CreateIdent(over_name);
- init_code = ocg->StmtListAppend(init_code,
- ocg->CreateAssignment(0, lhs, ocg->CreateInt(0)));
- lhs = ocg->CreateIdent(over_name);
- overflow_code = ocg->StmtListAppend(overflow_code,
- ocg->CreateAssignment(0, lhs, rhs));
- }
- }
-
- // upper splitting condition
- GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]);
- } else {
- std::string over_name = overflow_var_name_prefix
- + to_string(overflow_var_name_counter++);
- Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name);
- over_var_list.push_back(over_free_var);
-
- std::vector<CG_outputRepr *> lb_repr_list, ub_repr_list;
- for (int i = 0; i < lb_list.size(); i++) {
- lb_repr_list.push_back(
- output_lower_bound_repr(ocg, lb_list[i],
- bound.set_var(dim + 1), result.first, result.second,
- bound, Relation::True(bound.n_set()),
- std::vector<std::pair<CG_outputRepr *, int> >(
- bound.n_set(),
- std::make_pair(
- static_cast<CG_outputRepr *>(NULL),
- 0))));
- GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]);
- }
- for (int i = 0; i < ub_list.size(); i++) {
- ub_repr_list.push_back(
- output_upper_bound_repr(ocg, ub_list[i],
- bound.set_var(dim + 1), bound,
- std::vector<std::pair<CG_outputRepr *, int> >(
- bound.n_set(),
- std::make_pair(
- static_cast<CG_outputRepr *>(NULL),
- 0))));
- GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]);
- h.update_coef(cond_upper.get_local(over_free_var), -stride);
- }
-
- CG_outputRepr *lbRepr, *ubRepr;
- if (lb_repr_list.size() > 1)
- lbRepr = ocg->CreateInvoke("max", lb_repr_list);
- else if (lb_repr_list.size() == 1)
- lbRepr = lb_repr_list[0];
-
- if (ub_repr_list.size() > 1)
- ubRepr = ocg->CreateInvoke("min", ub_repr_list);
- else if (ub_repr_list.size() == 1)
- ubRepr = ub_repr_list[0];
-
- // create overflow assignment
- CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr),
- ocg->CreateInt(1));
- if (stride != 1)
- rhs = ocg->CreateIntegerFloor(rhs, ocg->CreateInt(stride));
- rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount));
- CG_outputRepr *lhs = ocg->CreateIdent(over_name);
- init_code = ocg->StmtListAppend(init_code,
- ocg->CreateAssignment(0, lhs, ocg->CreateInt(0)));
- lhs = ocg->CreateIdent(over_name);
- overflow_code = ocg->CreateAssignment(0, lhs, rhs);
-
- // insert constraint 0 <= overflow < unroll_amount
- Variable_ID v = overflow_constraint.get_local(over_free_var);
- GEQ_Handle h1 = overflow_constraint_root->add_GEQ();
- h1.update_coef(v, 1);
- GEQ_Handle h2 = overflow_constraint_root->add_GEQ();
- h2.update_coef(v, -1);
- h2.update_const(unroll_amount - 1);
- }
-
- // insert overflow statement
- int overflow_stmt_num = -1;
- if (overflow_code != NULL) {
- // build iteration space for overflow statement
- Relation mapping(level, cleanup_split_level - 1);
- F_And *f_root = mapping.add_and();
- for (int i = 1; i < cleanup_split_level; i++) {
- EQ_Handle h = f_root->add_EQ();
- h.update_coef(mapping.output_var(i), 1);
- h.update_coef(mapping.input_var(i), -1);
- }
- Relation overflow_IS = Range(Restrict_Domain(mapping, copy(hull)));
- for (int i = 1; i < cleanup_split_level; i++)
- overflow_IS.name_set_var(i, hull.set_var(i)->name());
- overflow_IS.setup_names();
-
- // build dumb transformation relation for overflow statement
- Relation overflow_xform(cleanup_split_level - 1,
- 2 * (cleanup_split_level - 1) + 1);
- f_root = overflow_xform.add_and();
- for (int i = 1; i <= cleanup_split_level - 1; i++) {
- EQ_Handle h = f_root->add_EQ();
- h.update_coef(overflow_xform.output_var(2 * i), 1);
- h.update_coef(overflow_xform.input_var(i), -1);
-
- h = f_root->add_EQ();
- h.update_coef(overflow_xform.output_var(2 * i - 1), 1);
- h.update_const(-lex[2 * i - 2]);
- }
- EQ_Handle h = f_root->add_EQ();
- h.update_coef(
- overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1),
- 1);
- h.update_const(-lex[2 * (cleanup_split_level - 1)]);
-
- shiftLexicalOrder(lex, 2 * cleanup_split_level - 2, 1);
- Statement overflow_stmt;
-
- overflow_stmt.code = overflow_code;
- overflow_stmt.IS = overflow_IS;
- overflow_stmt.xform = overflow_xform;
- overflow_stmt.loop_level = std::vector<LoopLevel>(level - 1);
- overflow_stmt.ir_stmt_node = NULL;
- for (int i = 0; i < level - 1; i++) {
- overflow_stmt.loop_level[i].type =
- stmt[stmt_num].loop_level[i].type;
- if (stmt[stmt_num].loop_level[i].type == LoopLevelTile
- && stmt[stmt_num].loop_level[i].payload >= level)
- overflow_stmt.loop_level[i].payload = -1;
- else
- overflow_stmt.loop_level[i].payload =
- stmt[stmt_num].loop_level[i].payload;
- overflow_stmt.loop_level[i].parallel_level =
- stmt[stmt_num].loop_level[i].parallel_level;
- }
-
- stmt.push_back(overflow_stmt);
- dep.insert();
- overflow_stmt_num = stmt.size() - 1;
- overflow[overflow_stmt_num] = over_var_list;
-
- // update the global known information on overflow variable
- this->known = Intersection(this->known,
- Extend_Set(copy(overflow_constraint),
- this->known.n_set() - overflow_constraint.n_set()));
-
- // update dependence graph
- DependenceVector dv;
- dv.type = DEP_CONTROL;
- for (std::set<int>::iterator i = same_loop.begin();
- i != same_loop.end(); i++)
- dep.connect(overflow_stmt_num, *i, dv);
- dv.type = DEP_W2W;
- {
- IR_ScalarSymbol *overflow_sym = NULL;
- std::vector<IR_ScalarRef *> scalars = ir->FindScalarRef(
- overflow_code);
- for (int i = scalars.size() - 1; i >= 0; i--)
- if (scalars[i]->is_write()) {
- overflow_sym = scalars[i]->symbol();
- break;
- }
- for (int i = scalars.size() - 1; i >= 0; i--)
- delete scalars[i];
- dv.sym = overflow_sym;
- }
- dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0);
- dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0);
- int dep_dim = get_last_dep_dim_before(stmt_num, level);
- for (int i = dep_dim + 1; i < dep.num_dim(); i++) {
- dv.lbounds[i] = -posInfinity;
- dv.ubounds[i] = posInfinity;
- }
- for (int i = 0; i <= dep_dim; i++) {
- if (i != 0) {
- dv.lbounds[i - 1] = 0;
- dv.ubounds[i - 1] = 0;
- }
- dv.lbounds[i] = 1;
- dv.ubounds[i] = posInfinity;
- dep.connect(overflow_stmt_num, overflow_stmt_num, dv);
- }
- }
-
- // split the loop so it can be fully unrolled
- std::set<int> new_stmts = split(stmt_num, cleanup_split_level, cond_upper);
- std::set<int> new_stmts2 = split(stmt_num, cleanup_split_level, cond_lower);
- new_stmts.insert(new_stmts2.begin(), new_stmts2.end());
-
- // check if unrolled statements can be trivially lumped together as one statement
- bool can_be_lumped = true;
- if (can_be_lumped) {
- for (std::set<int>::iterator i = same_loop.begin();
- i != same_loop.end(); i++)
- if (*i != stmt_num) {
- if (stmt[*i].loop_level.size()
- != stmt[stmt_num].loop_level.size()) {
- can_be_lumped = false;
- break;
- }
- for (int j = 0; j < stmt[stmt_num].loop_level.size(); j++)
- if (!(stmt[*i].loop_level[j].type
- == stmt[stmt_num].loop_level[j].type
- && stmt[*i].loop_level[j].payload
- == stmt[stmt_num].loop_level[j].payload)) {
- can_be_lumped = false;
- break;
- }
- if (!can_be_lumped)
- break;
- std::vector<int> lex2 = getLexicalOrder(*i);
- for (int j = 2 * level; j < lex.size() - 1; j += 2)
- if (lex[j] != lex2[j]) {
- can_be_lumped = false;
- break;
- }
- if (!can_be_lumped)
- break;
- }
- }
- if (can_be_lumped) {
- for (std::set<int>::iterator i = same_loop.begin();
- i != same_loop.end(); i++)
- if (is_inner_loop_depend_on_level(stmt[*i].IS, level,
- this->known)) {
- can_be_lumped = false;
- break;
- }
- }
- if (can_be_lumped) {
- for (std::set<int>::iterator i = same_loop.begin();
- i != same_loop.end(); i++)
- if (*i != stmt_num) {
- if (!(Must_Be_Subset(copy(stmt[*i].IS), copy(stmt[stmt_num].IS))
- && Must_Be_Subset(copy(stmt[stmt_num].IS),
- copy(stmt[*i].IS)))) {
- can_be_lumped = false;
- break;
- }
- }
- }
- if (can_be_lumped) {
- for (std::set<int>::iterator i = same_loop.begin();
- i != same_loop.end(); i++) {
- for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[*i].second.begin();
- j != dep.vertex[*i].second.end(); j++)
- if (same_loop.find(j->first) != same_loop.end()) {
- for (int k = 0; k < j->second.size(); k++)
- if (j->second[k].type == DEP_CONTROL
- || j->second[k].type == DEP_UNKNOWN) {
- can_be_lumped = false;
- break;
- }
- if (!can_be_lumped)
- break;
- }
- if (!can_be_lumped)
- break;
- }
- }
-
- // insert unrolled statements
- int old_num_stmt = stmt.size();
- if (!can_be_lumped) {
- std::map<int, std::vector<int> > what_stmt_num;
-
- for (int j = 1; j < unroll_amount; j++) {
- for (std::set<int>::iterator i = same_loop.begin();
- i != same_loop.end(); i++) {
- Statement new_stmt;
-
- std::vector<std::string> loop_vars;
- std::vector<CG_outputRepr *> subs;
- loop_vars.push_back(stmt[*i].IS.set_var(level)->name());
- subs.push_back(
- ocg->CreatePlus(
- ocg->CreateIdent(
- stmt[*i].IS.set_var(level)->name()),
- ocg->CreateInt(j * stride)));
- new_stmt.code = ocg->CreateSubstitutedStmt(0,
- stmt[*i].code->clone(), loop_vars, subs);
-
- new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride);
- add_loop_stride(new_stmt.IS, bound, level - 1,
- unroll_amount * stride);
-
- new_stmt.xform = copy(stmt[*i].xform);
-
- new_stmt.loop_level = stmt[*i].loop_level;
- new_stmt.ir_stmt_node = NULL;
- stmt.push_back(new_stmt);
- dep.insert();
- what_stmt_num[*i].push_back(stmt.size() - 1);
- }
- }
- for (std::set<int>::iterator i = same_loop.begin();
- i != same_loop.end(); i++)
- add_loop_stride(stmt[*i].IS, bound, level - 1,
- unroll_amount * stride);
-
- // update dependence graph
- if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) {
- int dep_dim = stmt[stmt_num].loop_level[level - 1].payload;
- int new_stride = unroll_amount * stride;
- for (int i = 0; i < old_num_stmt; i++) {
- std::vector<std::pair<int, DependenceVector> > D;
-
- for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin();
- j != dep.vertex[i].second.end();) {
- if (same_loop.find(i) != same_loop.end()) {
- if (same_loop.find(j->first) != same_loop.end()) {
- for (int k = 0; k < j->second.size(); k++) {
- DependenceVector dv = j->second[k];
- if (dv.type == DEP_CONTROL
- || dv.type == DEP_UNKNOWN) {
- D.push_back(std::make_pair(j->first, dv));
- for (int kk = 0; kk < unroll_amount - 1;
- kk++)
- if (what_stmt_num[i][kk] != -1
- && what_stmt_num[j->first][kk]
- != -1)
- dep.connect(what_stmt_num[i][kk],
- what_stmt_num[j->first][kk],
- dv);
- } else {
- coef_t lb = dv.lbounds[dep_dim];
- coef_t ub = dv.ubounds[dep_dim];
- if (ub == lb
- && int_mod(lb,
- static_cast<coef_t>(new_stride))
- == 0) {
- D.push_back(
- std::make_pair(j->first, dv));
- for (int kk = 0; kk < unroll_amount - 1;
- kk++)
- if (what_stmt_num[i][kk] != -1
- && what_stmt_num[j->first][kk]
- != -1)
- dep.connect(
- what_stmt_num[i][kk],
- what_stmt_num[j->first][kk],
- dv);
- } else if (lb == -posInfinity
- && ub == posInfinity) {
- D.push_back(
- std::make_pair(j->first, dv));
- for (int kk = 0; kk < unroll_amount;
- kk++)
- if (kk == 0)
- D.push_back(
- std::make_pair(j->first,
- dv));
- else if (what_stmt_num[j->first][kk
- - 1] != -1)
- D.push_back(
- std::make_pair(
- what_stmt_num[j->first][kk
- - 1],
- dv));
- for (int t = 0; t < unroll_amount - 1;
- t++)
- if (what_stmt_num[i][t] != -1)
- for (int kk = 0;
- kk < unroll_amount;
- kk++)
- if (kk == 0)
- dep.connect(
- what_stmt_num[i][t],
- j->first, dv);
- else if (what_stmt_num[j->first][kk
- - 1] != -1)
- dep.connect(
- what_stmt_num[i][t],
- what_stmt_num[j->first][kk
- - 1],
- dv);
- } else {
- for (int kk = 0; kk < unroll_amount;
- kk++) {
- if (lb != -posInfinity) {
- if (kk * stride
- < int_mod(lb,
- static_cast<coef_t>(new_stride)))
- dv.lbounds[dep_dim] =
- floor(
- static_cast<double>(lb)
- / new_stride)
- * new_stride
- + new_stride;
- else
- dv.lbounds[dep_dim] =
- floor(
- static_cast<double>(lb)
- / new_stride)
- * new_stride;
- }
- if (ub != posInfinity) {
- if (kk * stride
- > int_mod(ub,
- static_cast<coef_t>(new_stride)))
- dv.ubounds[dep_dim] =
- floor(
- static_cast<double>(ub)
- / new_stride)
- * new_stride
- - new_stride;
- else
- dv.ubounds[dep_dim] =
- floor(
- static_cast<double>(ub)
- / new_stride)
- * new_stride;
- }
- if (dv.ubounds[dep_dim]
- >= dv.lbounds[dep_dim]) {
- if (kk == 0)
- D.push_back(
- std::make_pair(
- j->first,
- dv));
- else if (what_stmt_num[j->first][kk
- - 1] != -1)
- D.push_back(
- std::make_pair(
- what_stmt_num[j->first][kk
- - 1],
- dv));
- }
- }
- for (int t = 0; t < unroll_amount - 1;
- t++)
- if (what_stmt_num[i][t] != -1)
- for (int kk = 0;
- kk < unroll_amount;
- kk++) {
- if (lb != -posInfinity) {
- if (kk * stride
- < int_mod(
- lb + t
- + 1,
- static_cast<coef_t>(new_stride)))
- dv.lbounds[dep_dim] =
- floor(
- static_cast<double>(lb
- + (t
- + 1)
- * stride)
- / new_stride)
- * new_stride
- + new_stride;
- else
- dv.lbounds[dep_dim] =
- floor(
- static_cast<double>(lb
- + (t
- + 1)
- * stride)
- / new_stride)
- * new_stride;
- }
- if (ub != posInfinity) {
- if (kk * stride
- > int_mod(
- ub + t
- + 1,
- static_cast<coef_t>(new_stride)))
- dv.ubounds[dep_dim] =
- floor(
- static_cast<double>(ub
- + (t
- + 1)
- * stride)
- / new_stride)
- * new_stride
- - new_stride;
- else
- dv.ubounds[dep_dim] =
- floor(
- static_cast<double>(ub
- + (t
- + 1)
- * stride)
- / new_stride)
- * new_stride;
- }
- if (dv.ubounds[dep_dim]
- >= dv.lbounds[dep_dim]) {
- if (kk == 0)
- dep.connect(
- what_stmt_num[i][t],
- j->first,
- dv);
- else if (what_stmt_num[j->first][kk
- - 1] != -1)
- dep.connect(
- what_stmt_num[i][t],
- what_stmt_num[j->first][kk
- - 1],
- dv);
- }
- }
- }
- }
- }
-
- dep.vertex[i].second.erase(j++);
- } else {
- for (int kk = 0; kk < unroll_amount - 1; kk++)
- if (what_stmt_num[i][kk] != -1)
- dep.connect(what_stmt_num[i][kk], j->first,
- j->second);
-
- j++;
- }
- } else {
- if (same_loop.find(j->first) != same_loop.end())
- for (int k = 0; k < j->second.size(); k++)
- for (int kk = 0; kk < unroll_amount - 1; kk++)
- if (what_stmt_num[j->first][kk] != -1)
- D.push_back(
- std::make_pair(
- what_stmt_num[j->first][kk],
- j->second[k]));
- j++;
- }
- }
-
- for (int j = 0; j < D.size(); j++)
- dep.connect(i, D[j].first, D[j].second);
- }
- }
-
- // reset lexical order for the unrolled loop body
- std::set<int> new_same_loop;
-
- int count = 0;
-
- for (std::map<int, std::vector<int> >::iterator i =
- what_stmt_num.begin(); i != what_stmt_num.end(); i++) {
-
- new_same_loop.insert(i->first);
- for (int k = dim + 1; k < stmt[i->first].xform.n_out(); k += 2)
- assign_const(stmt[i->first].xform, k,
- get_const(stmt[(what_stmt_num.begin())->first].xform, k,
- Output_Var) + count);
- count++;
- for (int j = 0; j < i->second.size(); j++) {
- new_same_loop.insert(i->second[j]);
- for (int k = dim + 1; k < stmt[i->second[j]].xform.n_out(); k +=
- 2)
- assign_const(stmt[i->second[j]].xform, k,
- get_const(
- stmt[(what_stmt_num.begin())->first].xform,
- k, Output_Var) + count);
- count++;
- }
- }
- setLexicalOrder(dim + 1, new_same_loop, 0, idxNames);
- } else {
- for (std::set<int>::iterator i = same_loop.begin();
- i != same_loop.end(); i++)
- add_loop_stride(stmt[*i].IS, bound, level - 1,
- unroll_amount * stride);
-
- int max_level = stmt[stmt_num].loop_level.size();
- std::vector<std::pair<int, int> > stmt_order;
- for (std::set<int>::iterator i = same_loop.begin();
- i != same_loop.end(); i++)
- stmt_order.push_back(
- std::make_pair(
- get_const(stmt[*i].xform, 2 * max_level,
- Output_Var), *i));
- sort(stmt_order.begin(), stmt_order.end());
-
- Statement new_stmt;
- new_stmt.code = NULL;
- for (int j = 1; j < unroll_amount; j++)
- for (int i = 0; i < stmt_order.size(); i++) {
- std::vector<std::string> loop_vars;
- std::vector<CG_outputRepr *> subs;
- loop_vars.push_back(
- stmt[stmt_order[i].second].IS.set_var(level)->name());
- subs.push_back(
- ocg->CreatePlus(
- ocg->CreateIdent(
- stmt[stmt_order[i].second].IS.set_var(
- level)->name()),
- ocg->CreateInt(j * stride)));
- CG_outputRepr *code = ocg->CreateSubstitutedStmt(0,
- stmt[stmt_order[i].second].code->clone(), loop_vars,
- subs);
- new_stmt.code = ocg->StmtListAppend(new_stmt.code, code);
- }
-
- new_stmt.IS = copy(stmt[stmt_num].IS);
- new_stmt.xform = copy(stmt[stmt_num].xform);
- assign_const(new_stmt.xform, 2 * max_level,
- stmt_order[stmt_order.size() - 1].first + 1);
- new_stmt.loop_level = stmt[stmt_num].loop_level;
- new_stmt.ir_stmt_node = NULL;
- stmt.push_back(new_stmt);
- dep.insert();
-
- // update dependence graph
- if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) {
- int dep_dim = stmt[stmt_num].loop_level[level - 1].payload;
- int new_stride = unroll_amount * stride;
- for (int i = 0; i < old_num_stmt; i++) {
- std::vector<std::pair<int, std::vector<DependenceVector> > > D;
-
- for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin();
- j != dep.vertex[i].second.end();) {
- if (same_loop.find(i) != same_loop.end()) {
- if (same_loop.find(j->first) != same_loop.end()) {
- std::vector<DependenceVector> dvs11, dvs12, dvs22,
- dvs21;
- for (int k = 0; k < j->second.size(); k++) {
- DependenceVector dv = j->second[k];
- if (dv.type == DEP_CONTROL
- || dv.type == DEP_UNKNOWN) {
- if (i == j->first) {
- dvs11.push_back(dv);
- dvs22.push_back(dv);
- } else
- throw loop_error(
- "unrolled statements lumped together illegally");
- } else {
- coef_t lb = dv.lbounds[dep_dim];
- coef_t ub = dv.ubounds[dep_dim];
- if (ub == lb
- && int_mod(lb,
- static_cast<coef_t>(new_stride))
- == 0) {
- dvs11.push_back(dv);
- dvs22.push_back(dv);
- } else {
- if (lb != -posInfinity)
- dv.lbounds[dep_dim] = ceil(
- static_cast<double>(lb)
- / new_stride)
- * new_stride;
- if (ub != posInfinity)
- dv.ubounds[dep_dim] = floor(
- static_cast<double>(ub)
- / new_stride)
- * new_stride;
- if (dv.ubounds[dep_dim]
- >= dv.lbounds[dep_dim])
- dvs11.push_back(dv);
-
- if (lb != -posInfinity)
- dv.lbounds[dep_dim] = ceil(
- static_cast<double>(lb)
- / new_stride)
- * new_stride;
- if (ub != posInfinity)
- dv.ubounds[dep_dim] = ceil(
- static_cast<double>(ub)
- / new_stride)
- * new_stride;
- if (dv.ubounds[dep_dim]
- >= dv.lbounds[dep_dim])
- dvs21.push_back(dv);
-
- if (lb != -posInfinity)
- dv.lbounds[dep_dim] = floor(
- static_cast<double>(lb)
- / new_stride)
- * new_stride;
- if (ub != posInfinity)
- dv.ubounds[dep_dim] = floor(
- static_cast<double>(ub
- - stride)
- / new_stride)
- * new_stride;
- if (dv.ubounds[dep_dim]
- >= dv.lbounds[dep_dim])
- dvs12.push_back(dv);
-
- if (lb != -posInfinity)
- dv.lbounds[dep_dim] = floor(
- static_cast<double>(lb)
- / new_stride)
- * new_stride;
- if (ub != posInfinity)
- dv.ubounds[dep_dim] = ceil(
- static_cast<double>(ub
- - stride)
- / new_stride)
- * new_stride;
- if (dv.ubounds[dep_dim]
- >= dv.lbounds[dep_dim])
- dvs22.push_back(dv);
- }
- }
- }
- if (dvs11.size() > 0)
- D.push_back(std::make_pair(i, dvs11));
- if (dvs22.size() > 0)
- dep.connect(old_num_stmt, old_num_stmt, dvs22);
- if (dvs12.size() > 0)
- D.push_back(
- std::make_pair(old_num_stmt, dvs12));
- if (dvs21.size() > 0)
- dep.connect(old_num_stmt, i, dvs21);
-
- dep.vertex[i].second.erase(j++);
- } else {
- dep.connect(old_num_stmt, j->first, j->second);
- j++;
- }
- } else {
- if (same_loop.find(j->first) != same_loop.end())
- D.push_back(
- std::make_pair(old_num_stmt, j->second));
- j++;
- }
- }
-
- for (int j = 0; j < D.size(); j++)
- dep.connect(i, D[j].first, D[j].second);
- }
- }
- }
-
- return new_stmts;
-}
-
-