summaryrefslogtreecommitdiff
path: root/src/transformations
diff options
context:
space:
mode:
Diffstat (limited to 'src/transformations')
-rw-r--r--src/transformations/loop.cc4433
-rw-r--r--src/transformations/loop_basic.cc1839
-rw-r--r--src/transformations/loop_datacopy.cc1369
-rw-r--r--src/transformations/loop_extra.cc224
-rw-r--r--src/transformations/loop_tile.cc587
-rw-r--r--src/transformations/loop_unroll.cc1222
6 files changed, 9674 insertions, 0 deletions
diff --git a/src/transformations/loop.cc b/src/transformations/loop.cc
new file mode 100644
index 0000000..570bc90
--- /dev/null
+++ b/src/transformations/loop.cc
@@ -0,0 +1,4433 @@
+/*****************************************************************************
+ Copyright (C) 2008 University of Southern California
+ Copyright (C) 2009-2010 University of Utah
+ All Rights Reserved.
+
+ Purpose:
+ Core loop transformation functionality.
+
+ Notes:
+ "level" (starting from 1) means loop level and it corresponds to "dim"
+ (starting from 0) in transformed iteration space [c_1,l_1,c_2,l_2,....,
+ c_n,l_n,c_(n+1)], e.g., l_2 is loop level 2 in generated code, dim 3
+ in transformed iteration space, and variable 4 in Omega relation.
+ All c's are constant numbers only and they will not show up as actual loops.
+ Formula:
+ dim = 2*level - 1
+ var = dim + 1
+
+ History:
+ 10/2005 Created by Chun Chen.
+ 09/2009 Expand tile functionality, -chun
+ 10/2009 Initialize unfusible loop nest without bailing out, -chun
+*****************************************************************************/
+
+#include <limits.h>
+#include <math.h>
+#include <code_gen/codegen.h>
+#include <code_gen/CG_utils.h>
+#include <code_gen/CG_stringRepr.h>
+#include <code_gen/CG_chillRepr.h> // Mark. Bad idea. TODO
+#include <iostream>
+#include <algorithm>
+#include <map>
+#include "loop.hh"
+#include "omegatools.hh"
+#include "irtools.hh"
+#include "chill_error.hh"
+#include <string.h>
+#include <list>
+#include <chilldebug.h>
+
+// TODO
+#define _DEBUG_ true
+
+
+
+using namespace omega;
+
+const std::string Loop::tmp_loop_var_name_prefix = std::string("chill_t"); // Manu:: In fortran, first character of a variable name must be a letter, so this change
+const std::string Loop::overflow_var_name_prefix = std::string("over");
+
+void echocontroltype( const IR_Control *control ) {
+ switch(control->type()) {
+ case IR_CONTROL_BLOCK: {
+ CHILL_DEBUG_PRINT("IR_CONTROL_BLOCK\n");
+ break;
+ }
+ case IR_CONTROL_LOOP: {
+ CHILL_DEBUG_PRINT("IR_CONTROL_LOOP\n");
+ break;
+ }
+ case IR_CONTROL_IF: {
+ CHILL_DEBUG_PRINT("IR_CONTROL_IF\n");
+ break;
+ }
+ default:
+ CHILL_DEBUG_PRINT("just a bunch of statements?\n");
+
+ } // switch
+}
+
+omega::Relation Loop::getNewIS(int stmt_num) const {
+
+ omega::Relation result;
+
+ if (stmt[stmt_num].xform.is_null()) {
+ omega::Relation known = omega::Extend_Set(omega::copy(this->known),
+ stmt[stmt_num].IS.n_set() - this->known.n_set());
+ result = omega::Intersection(omega::copy(stmt[stmt_num].IS), known);
+ } else {
+ omega::Relation known = omega::Extend_Set(omega::copy(this->known),
+ stmt[stmt_num].xform.n_out() - this->known.n_set());
+ result = omega::Intersection(
+ omega::Range(
+ omega::Restrict_Domain(
+ omega::copy(stmt[stmt_num].xform),
+ omega::copy(stmt[stmt_num].IS))), known);
+ }
+
+ result.simplify(2, 4);
+
+ return result;
+}
+
+
+
+void Loop::reduce(int stmt_num,
+ std::vector<int> &level,
+ int param,
+ std::string func_name,
+ std::vector<int> &seq_levels,
+ std::vector<int> cudaized_levels,
+ int bound_level) {
+
+ // illegal instruction?? fprintf(stderr, " Loop::reduce( stmt %d, param %d, func_name (encrypted)...)\n", stmt, param); // , func_name.c_str());
+
+ //std::cout << "Reducing stmt# " << stmt_num << " at level " << level << "\n";
+ //ir->printStmt(stmt[stmt_num].code);
+
+ if (stmt[stmt_num].reduction != 1) {
+ CHILL_DEBUG_PRINT("Cannot reduce this statement\n");
+ return;
+ }
+ CHILL_DEBUG_PRINT("CAN reduce this statment?\n");
+
+ /*for (int i = 0; i < level.size(); i++)
+ if (stmt[stmt_num].loop_level[level[i] - 1].segreducible != true) {
+ std::cout << "Cannot reduce this statement\n";
+ return;
+ }
+ for (int i = 0; i < seq_levels.size(); i++)
+ if (stmt[stmt_num].loop_level[seq_levels[i] - 1].segreducible != true) {
+ std::cout << "Cannot reduce this statement\n";
+ return;
+ }
+ */
+ // std::pair<int, std::string> to_insert(level, func_name);
+ // reduced_statements.insert(std::pair<int, std::pair<int, std::string> >(stmt_num, to_insert ));
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+ fprintf(stderr, "set last_compute_cg_ = NULL;\n");
+
+ omega::CG_outputBuilder *ocg = ir->builder();
+
+ omega::CG_outputRepr *funCallRepr;
+ std::vector<omega::CG_outputRepr *> arg_repr_list;
+ apply_xform(stmt_num);
+ std::vector<IR_ArrayRef *> access = ir->FindArrayRef(stmt[stmt_num].code);
+ std::set<std::string> names;
+ for (int i = 0; i < access.size(); i++) {
+ std::vector<IR_ArrayRef *> access2;
+ for (int j = 0; j < access[i]->n_dim(); j++) {
+ std::vector<IR_ArrayRef *> access3 = ir->FindArrayRef(
+ access[i]->index(j));
+ access2.insert(access2.end(), access3.begin(), access3.end());
+ }
+ if (access2.size() == 0) {
+ if (names.find(access[i]->name()) == names.end()) {
+ arg_repr_list.push_back(
+ ocg->CreateAddressOf(access[i]->convert()));
+ names.insert(access[i]->name());
+ if (access[i]->is_write())
+ reduced_write_refs.insert(access[i]->name());
+ }
+ } else {
+ if (names.find(access[i]->name()) == names.end()) {
+ arg_repr_list.push_back(ocg->CreateAddressOf(ocg->CreateArrayRefExpression(ocg->CreateIdent(access[i]->name()),
+ ocg->CreateInt(0))));
+ names.insert(access[i]->name());
+ if (access[i]->is_write())
+ reduced_write_refs.insert(access[i]->name());
+ }
+ }
+ }
+
+ for (int i = 0; i < seq_levels.size(); i++)
+ arg_repr_list.push_back(
+ ocg->CreateIdent(
+ stmt[stmt_num].IS.set_var(seq_levels[i])->name()));
+
+ if (bound_level != -1) {
+
+ omega::Relation new_IS = copy(stmt[stmt_num].IS);
+ new_IS.copy_names(stmt[stmt_num].IS);
+ new_IS.setup_names();
+ new_IS.simplify();
+ int dim = bound_level;
+ //omega::Relation r = getNewIS(stmt_num);
+ for (int j = dim + 1; j <= new_IS.n_set(); j++)
+ new_IS = omega::Project(new_IS, new_IS.set_var(j));
+
+ new_IS.simplify(2, 4);
+
+ omega::Relation bound_ = get_loop_bound(copy(new_IS), dim - 1);
+ omega::Variable_ID v = bound_.set_var(dim);
+ std::vector<omega::CG_outputRepr *> ubList;
+ for (omega::GEQ_Iterator e(
+ const_cast<omega::Relation &>(bound_).single_conjunct()->GEQs());
+ e; e++) {
+ if ((*e).get_coef(v) < 0) {
+ // && (*e).is_const_except_for_global(v))
+ omega::CG_outputRepr *UPPERBOUND =
+ omega::output_upper_bound_repr(ir->builder(), *e, v,
+ bound_,
+ std::vector<
+ std::pair<omega::CG_outputRepr *, int> >(
+ bound_.n_set(),
+ std::make_pair(
+ static_cast<omega::CG_outputRepr *>(NULL),
+ 0)), uninterpreted_symbols[stmt_num]);
+ if (UPPERBOUND != NULL)
+ ubList.push_back(UPPERBOUND);
+
+ }
+
+ }
+
+ omega::CG_outputRepr * ubRepr;
+ if (ubList.size() > 1) {
+
+ ubRepr = ir->builder()->CreateInvoke("min", ubList);
+ arg_repr_list.push_back(ubRepr);
+ } else if (ubList.size() == 1)
+ arg_repr_list.push_back(ubList[0]);
+ }
+
+ funCallRepr = ocg->CreateInvoke(func_name, arg_repr_list);
+ stmt[stmt_num].code = funCallRepr;
+ for (int i = 0; i < level.size(); i++) {
+ //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL));
+ std::vector<std::string> loop_vars;
+ loop_vars.push_back(stmt[stmt_num].IS.set_var(level[i])->name());
+
+ std::vector<omega::CG_outputRepr *> subs;
+ subs.push_back(ocg->CreateInt(0));
+
+ stmt[stmt_num].code = ocg->CreateSubstitutedStmt(0, stmt[stmt_num].code,
+ loop_vars, subs);
+
+ }
+
+ omega::Relation new_IS = copy(stmt[stmt_num].IS);
+ new_IS.copy_names(stmt[stmt_num].IS);
+ new_IS.setup_names();
+ new_IS.simplify();
+ int old_size = new_IS.n_set();
+
+ omega::Relation R = omega::copy(stmt[stmt_num].IS);
+ R.copy_names(stmt[stmt_num].IS);
+ R.setup_names();
+
+ for (int i = level.size() - 1; i >= 0; i--) {
+ int j;
+
+ for (j = 0; j < cudaized_levels.size(); j++) {
+ if (cudaized_levels[j] == level[i])
+ break;
+
+ }
+
+ if (j == cudaized_levels.size()) {
+ R = omega::Project(R, level[i], omega::Input_Var);
+ R.simplify();
+
+ }
+ //
+
+ }
+
+ omega::F_And *f_Root = R.and_with_and();
+ for (int i = level.size() - 1; i >= 0; i--) {
+ int j;
+
+ for (j = 0; j < cudaized_levels.size(); j++) {
+ if (cudaized_levels[j] == level[i])
+ break;
+
+ }
+
+ if (j == cudaized_levels.size()) {
+
+ omega::EQ_Handle h = f_Root->add_EQ();
+
+ h.update_coef(R.set_var(level[i]), 1);
+ h.update_const(-1);
+ }
+ //
+
+ }
+
+ R.simplify();
+ stmt[stmt_num].IS = R;
+}
+
+
+
+
+
+
+//-----------------------------------------------------------------------------
+// Class Loop
+//-----------------------------------------------------------------------------
+// --begin Anand: Added from CHiLL 0.2
+
+bool Loop::isInitialized() const {
+ return stmt.size() != 0 && !stmt[0].xform.is_null();
+}
+
+//--end Anand: added from CHiLL 0.2
+
+bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
+ std::vector<ir_tree_node *> &ir_stmt) {
+
+ CHILL_DEBUG_PRINT("extract_ir_stmts()\n");
+ CHILL_DEBUG_PRINT("ir_tree has %d statements\n", ir_tree.size());
+
+ ir_stmt = extract_ir_stmts(ir_tree);
+
+ CHILL_DEBUG_PRINT("nesting level stmt size = %d\n", (int)ir_stmt.size());
+ stmt_nesting_level_.resize(ir_stmt.size());
+
+ std::vector<int> stmt_nesting_level(ir_stmt.size());
+
+ CHILL_DEBUG_PRINT("%d statements?\n", (int)ir_stmt.size());
+
+ // find out how deeply nested each statement is. (how can these be different?)
+ for (int i = 0; i < ir_stmt.size(); i++) {
+ fprintf(stderr, "i %d\n", i);
+ ir_stmt[i]->payload = i;
+ int t = 0;
+ ir_tree_node *itn = ir_stmt[i];
+ while (itn->parent != NULL) {
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP)
+ t++;
+ }
+ stmt_nesting_level_[i] = t;
+ stmt_nesting_level[i] = t;
+ CHILL_DEBUG_PRINT("stmt_nesting_level[%d] = %d\n", i, t);
+ }
+
+ if (actual_code.size() == 0)
+ actual_code = std::vector<CG_outputRepr*>(ir_stmt.size());
+
+ stmt = std::vector<Statement>(ir_stmt.size());
+ CHILL_DEBUG_PRINT("in init_loop, made %d stmts\n", (int)ir_stmt.size());
+
+ uninterpreted_symbols = std::vector<std::map<std::string, std::vector<omega::CG_outputRepr * > > >(ir_stmt.size());
+ uninterpreted_symbols_stringrepr = std::vector<std::map<std::string, std::vector<omega::CG_outputRepr * > > >(ir_stmt.size());
+
+ int n_dim = -1;
+ int max_loc;
+ //std::vector<std::string> index;
+ for (int i = 0; i < ir_stmt.size(); i++) {
+ int max_nesting_level = -1;
+ int loc;
+
+ // find the max nesting level and remember the statement that was at that level
+ for (int j = 0; j < ir_stmt.size(); j++) {
+ if (stmt_nesting_level[j] > max_nesting_level) {
+ max_nesting_level = stmt_nesting_level[j];
+ loc = j;
+ }
+ }
+
+ CHILL_DEBUG_PRINT("max nesting level %d at location %d\n", max_nesting_level, loc);
+
+ // most deeply nested statement acting as a reference point
+ if (n_dim == -1) {
+ CHILL_DEBUG_PRINT("n_dim now max_nesting_level %d\n", max_nesting_level);
+ n_dim = max_nesting_level;
+ max_loc = loc;
+
+ index = std::vector<std::string>(n_dim);
+
+ ir_tree_node *itn = ir_stmt[loc];
+ CHILL_DEBUG_PRINT("itn = stmt[%d]\n", loc);
+ int cur_dim = n_dim - 1;
+ while (itn->parent != NULL) {
+ CHILL_DEBUG_PRINT("parent\n");
+
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP) {
+ CHILL_DEBUG_PRINT("IR_CONTROL_LOOP cur_dim %d\n", cur_dim);
+ IR_Loop *IRL = static_cast<IR_Loop *>(itn->content);
+ index[cur_dim] = IRL->index()->name();
+ CHILL_DEBUG_PRINT("index[%d] = '%s'\n", cur_dim, index[cur_dim].c_str());
+ itn->payload = cur_dim--;
+ }
+ }
+ }
+
+ CHILL_DEBUG_PRINT("align loops by names,\n");
+ // align loops by names, temporary solution
+ ir_tree_node *itn = ir_stmt[loc]; // defined outside loops??
+ int depth = stmt_nesting_level_[loc] - 1;
+
+ for (int t = depth; t >= 0; t--) {
+ int y = t;
+ itn = ir_stmt[loc];
+
+ while ((itn->parent != NULL) && (y >= 0)) {
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP)
+ y--;
+ }
+
+ if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) {
+ CG_outputBuilder *ocg = ir->builder();
+
+ itn->payload = depth - t;
+
+ CG_outputRepr *code =
+ static_cast<IR_Block *>(ir_stmt[loc]->content)->extract();
+
+ std::vector<CG_outputRepr *> index_expr;
+ std::vector<std::string> old_index;
+ CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]);
+ index_expr.push_back(repl);
+ old_index.push_back(
+ static_cast<IR_Loop *>(itn->content)->index()->name());
+ code = ocg->CreateSubstitutedStmt(0, code, old_index,
+ index_expr);
+
+ replace.insert(std::pair<int, CG_outputRepr*>(loc, code));
+ //stmt[loc].code = code;
+
+ }
+ }
+
+ CHILL_DEBUG_PRINT("set relation variable names ****\n");
+ // set relation variable names
+
+ // this finds the loop variables for loops enclosing this statement and puts
+ // them in an Omega Relation (just their names, which could fail)
+
+ CHILL_DEBUG_PRINT("Relation r(%d)\n", n_dim);
+ Relation r(n_dim);
+ F_And *f_root = r.add_and();
+ itn = ir_stmt[loc];
+ int temp_depth = depth;
+ while (itn->parent != NULL) {
+
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP) {
+ fprintf(stderr, "it's a loop. temp_depth %d\n", temp_depth);
+ fprintf(stderr, "r.name_set_var( %d, %s )\n", itn->payload + 1, index[temp_depth].c_str());
+ r.name_set_var(itn->payload + 1, index[temp_depth]);
+
+ temp_depth--;
+ }
+ //static_cast<IR_Loop *>(itn->content)->index()->name());
+ }
+ fprintf(stderr, "Relation r "); r.print(); fflush(stdout);
+ //fprintf(stderr, "f_root "); f_root->print(stderr); fprintf(stderr, "\n");
+
+ /*while (itn->parent != NULL) {
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP)
+ r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name());
+ }*/
+
+
+
+
+ fprintf(stderr, "extract information from loop/if structures\n");
+ // extract information from loop/if structures
+ std::vector<bool> processed(n_dim, false);
+ std::vector<std::string> vars_to_be_reversed;
+
+ std::vector<std::string> insp_lb;
+ std::vector<std::string> insp_ub;
+
+ itn = ir_stmt[loc];
+ while (itn->parent != NULL) { // keep heading upward
+ itn = itn->parent;
+
+ switch (itn->content->type()) {
+ case IR_CONTROL_LOOP: {
+ fprintf(stderr, "loop.cc l 462 IR_CONTROL_LOOP\n");
+ IR_Loop *lp = static_cast<IR_Loop *>(itn->content);
+ Variable_ID v = r.set_var(itn->payload + 1);
+ int c;
+
+ try {
+ c = lp->step_size();
+ //fprintf(stderr, "step size %d\n", c);
+ if (c > 0) {
+ CG_outputRepr *lb = lp->lower_bound();
+ fprintf(stderr, "loop.cc, got the lower bound. it is:\n");
+ lb->dump(); printf("\n"); fflush(stdout);
+
+ exp2formula(ir, r, f_root, freevar, lb, v, 's',
+ IR_COND_GE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+
+ CG_outputRepr *ub = lp->upper_bound();
+ //fprintf(stderr, "loop.cc, got the upper bound. it is:\n");
+ //ub->dump(); printf("\n"); fflush(stdout);
+
+
+
+ IR_CONDITION_TYPE cond = lp->stop_cond();
+ if (cond == IR_COND_LT || cond == IR_COND_LE)
+ exp2formula(ir, r, f_root, freevar, ub, v, 's',
+ cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+ else
+ throw ir_error("loop condition not supported");
+
+
+ if ((ir->QueryExpOperation(lp->lower_bound())
+ == IR_OP_ARRAY_VARIABLE)
+ && (ir->QueryExpOperation(lp->lower_bound())
+ == ir->QueryExpOperation(
+ lp->upper_bound()))) {
+
+ fprintf(stderr, "loop.cc lower and upper are both IR_OP_ARRAY_VARIABLE?\n");
+
+ std::vector<CG_outputRepr *> v =
+ ir->QueryExpOperand(lp->lower_bound());
+ IR_ArrayRef *ref =
+ static_cast<IR_ArrayRef *>(ir->Repr2Ref(
+ v[0]));
+ std::string s0 = ref->name();
+ std::vector<CG_outputRepr *> v2 =
+ ir->QueryExpOperand(lp->upper_bound());
+ IR_ArrayRef *ref2 =
+ static_cast<IR_ArrayRef *>(ir->Repr2Ref(
+ v2[0]));
+ std::string s1 = ref2->name();
+
+ if (s0 == s1) {
+ insp_lb.push_back(s0);
+ insp_ub.push_back(s1);
+
+ }
+
+ }
+
+
+ } else if (c < 0) {
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *lb = lp->lower_bound();
+ lb = ocg->CreateMinus(NULL, lb);
+ exp2formula(ir, r, f_root, freevar, lb, v, 's',
+ IR_COND_GE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+ CG_outputRepr *ub = lp->upper_bound();
+ ub = ocg->CreateMinus(NULL, ub);
+ IR_CONDITION_TYPE cond = lp->stop_cond();
+ if (cond == IR_COND_GE)
+ exp2formula(ir, r, f_root, freevar, ub, v, 's',
+ IR_COND_LE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+ else if (cond == IR_COND_GT)
+ exp2formula(ir, r, f_root, freevar, ub, v, 's',
+ IR_COND_LT, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+ else
+ throw ir_error("loop condition not supported");
+
+ vars_to_be_reversed.push_back(lp->index()->name());
+ } else
+ throw ir_error("loop step size zero");
+ } catch (const ir_error &e) {
+ actual_code[loc] =
+ static_cast<IR_Block *>(ir_stmt[loc]->content)->extract();
+ for (int i = 0; i < itn->children.size(); i++)
+ delete itn->children[i];
+ itn->children = std::vector<ir_tree_node *>();
+ itn->content = itn->content->convert();
+ return false;
+ }
+
+ // check for loop increment or decrement that is not 1
+ //fprintf(stderr, "abs(c)\n");
+ if (abs(c) != 1) {
+ F_Exists *f_exists = f_root->add_exists();
+ Variable_ID e = f_exists->declare();
+ F_And *f_and = f_exists->add_and();
+ Stride_Handle h = f_and->add_stride(abs(c));
+ if (c > 0)
+ h.update_coef(e, 1);
+ else
+ h.update_coef(e, -1);
+ h.update_coef(v, -1);
+ CG_outputRepr *lb = lp->lower_bound();
+ exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ,
+ true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+ }
+
+ processed[itn->payload] = true;
+ break;
+ }
+
+
+ case IR_CONTROL_IF: {
+ fprintf(stderr, "IR_CONTROL_IF\n");
+ IR_If *theif = static_cast<IR_If *>(itn->content);
+
+ CG_outputRepr *cond =
+ static_cast<IR_If *>(itn->content)->condition();
+
+ try {
+ if (itn->payload % 2 == 1)
+ exp2constraint(ir, r, f_root, freevar, cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+ else {
+ F_Not *f_not = f_root->add_not();
+ F_And *f_and = f_not->add_and();
+ exp2constraint(ir, r, f_and, freevar, cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+ }
+ } catch (const ir_error &e) {
+ std::vector<ir_tree_node *> *t;
+ if (itn->parent == NULL)
+ t = &ir_tree;
+ else
+ t = &(itn->parent->children);
+ int id = itn->payload;
+ int i = t->size() - 1;
+ while (i >= 0) {
+ if ((*t)[i] == itn) {
+ for (int j = 0; j < itn->children.size(); j++)
+ delete itn->children[j];
+ itn->children = std::vector<ir_tree_node *>();
+ itn->content = itn->content->convert();
+ } else if ((*t)[i]->payload >> 1 == id >> 1) {
+ delete (*t)[i];
+ t->erase(t->begin() + i);
+ }
+ i--;
+ }
+ return false;
+ }
+
+ break;
+ }
+ default:
+ //fprintf(stderr, "default?\n");
+ for (int i = 0; i < itn->children.size(); i++)
+ delete itn->children[i];
+ itn->children = std::vector<ir_tree_node *>();
+ itn->content = itn->content->convert();
+ return false;
+ }
+ }
+
+
+ //fprintf(stderr, "add information for missing loops n_dim(%d)\n", n_dim);
+ // add information for missing loops
+ for (int j = 0; j < n_dim; j++)
+ if (!processed[j]) {
+ ir_tree_node *itn = ir_stmt[max_loc];
+ while (itn->parent != NULL) {
+ itn = itn->parent;
+ if (itn->content->type() == IR_CONTROL_LOOP
+ && itn->payload == j)
+ break;
+ }
+
+ Variable_ID v = r.set_var(j + 1);
+ if (loc < max_loc) {
+
+ CG_outputBuilder *ocg = ir->builder();
+
+ CG_outputRepr *lb =
+ static_cast<IR_Loop *>(itn->content)->lower_bound();
+
+ exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ,
+ false,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+
+ /* if (ir->QueryExpOperation(
+ static_cast<IR_Loop *>(itn->content)->lower_bound())
+ == IR_OP_VARIABLE) {
+ IR_ScalarRef *ref =
+ static_cast<IR_ScalarRef *>(ir->Repr2Ref(
+ static_cast<IR_Loop *>(itn->content)->lower_bound()));
+ std::string name_ = ref->name();
+
+ for (int i = 0; i < index.size(); i++)
+ if (index[i] == name_) {
+ exp2formula(ir, r, f_root, freevar, lb, v, 's',
+ IR_COND_GE, false);
+
+ CG_outputRepr *ub =
+ static_cast<IR_Loop *>(itn->content)->upper_bound();
+ IR_CONDITION_TYPE cond =
+ static_cast<IR_Loop *>(itn->content)->stop_cond();
+ if (cond == IR_COND_LT || cond == IR_COND_LE)
+ exp2formula(ir, r, f_root, freevar, ub, v,
+ 's', cond, false);
+
+
+
+ }
+
+ }
+ */
+
+ } else { // loc > max_loc
+
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *ub =
+ static_cast<IR_Loop *>(itn->content)->upper_bound();
+
+ exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ,
+ false,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+ /*if (ir->QueryExpOperation(
+ static_cast<IR_Loop *>(itn->content)->upper_bound())
+ == IR_OP_VARIABLE) {
+ IR_ScalarRef *ref =
+ static_cast<IR_ScalarRef *>(ir->Repr2Ref(
+ static_cast<IR_Loop *>(itn->content)->upper_bound()));
+ std::string name_ = ref->name();
+
+ for (int i = 0; i < index.size(); i++)
+ if (index[i] == name_) {
+
+ CG_outputRepr *lb =
+ static_cast<IR_Loop *>(itn->content)->lower_bound();
+
+ exp2formula(ir, r, f_root, freevar, lb, v, 's',
+ IR_COND_GE, false);
+
+ CG_outputRepr *ub =
+ static_cast<IR_Loop *>(itn->content)->upper_bound();
+ IR_CONDITION_TYPE cond =
+ static_cast<IR_Loop *>(itn->content)->stop_cond();
+ if (cond == IR_COND_LT || cond == IR_COND_LE)
+ exp2formula(ir, r, f_root, freevar, ub, v,
+ 's', cond, false);
+
+
+ }
+ }
+ */
+ }
+ }
+
+ r.setup_names();
+ r.simplify();
+
+ // THIS IS MISSING IN PROTONU's
+ for (int j = 0; j < insp_lb.size(); j++) {
+
+ std::string lb = insp_lb[j] + "_";
+ std::string ub = lb + "_";
+
+ Global_Var_ID u, l;
+ bool found_ub = false;
+ bool found_lb = false;
+ for (DNF_Iterator di(copy(r).query_DNF()); di; di++)
+ for (Constraint_Iterator ci = (*di)->constraints(); ci; ci++)
+
+ for (Constr_Vars_Iter cvi(*ci); cvi; cvi++) {
+ Variable_ID v = cvi.curr_var();
+ if (v->kind() == Global_Var)
+ if (v->get_global_var()->arity() > 0) {
+
+ std::string name =
+ v->get_global_var()->base_name();
+ if (name == lb) {
+ l = v->get_global_var();
+ found_lb = true;
+ } else if (name == ub) {
+ u = v->get_global_var();
+ found_ub = true;
+ }
+ }
+
+ }
+
+ if (found_lb && found_ub) {
+ Relation known_(copy(r).n_set());
+ known_.copy_names(copy(r));
+ known_.setup_names();
+ Variable_ID index_lb = known_.get_local(l, Input_Tuple);
+ Variable_ID index_ub = known_.get_local(u, Input_Tuple);
+ F_And *fr = known_.add_and();
+ GEQ_Handle g = fr->add_GEQ();
+ g.update_coef(index_ub, 1);
+ g.update_coef(index_lb, -1);
+ g.update_const(-1);
+ addKnown(known_);
+
+ }
+
+ }
+
+
+ fprintf(stderr, "loop.cc L441 insert the statement\n");
+ // insert the statement
+ CG_outputBuilder *ocg = ir->builder();
+ std::vector<CG_outputRepr *> reverse_expr;
+ for (int j = 1; j <= vars_to_be_reversed.size(); j++) {
+ CG_outputRepr *repl = ocg->CreateIdent(vars_to_be_reversed[j]);
+ repl = ocg->CreateMinus(NULL, repl);
+ reverse_expr.push_back(repl);
+ }
+ fprintf(stderr, "loop.cc before extract\n");
+ CG_outputRepr *code =
+ static_cast<IR_Block *>(ir_stmt[loc]->content)->extract();
+ fprintf(stderr, "code = ocg->CreateSubstitutedStmt(...)\n");
+ ((CG_chillRepr *)code)->Dump(); fflush(stdout);
+
+ code = ocg->CreateSubstitutedStmt(0, code, vars_to_be_reversed,
+ reverse_expr);
+ fprintf(stderr, "stmt\n");
+ ((CG_chillRepr *)code)->Dump(); fflush(stdout);
+
+ stmt[loc].code = code;
+ stmt[loc].IS = r;
+
+ //Anand: Add Information on uninterpreted function constraints to
+ //Known relation
+
+ fprintf(stderr, "loop.cc stmt[%d].loop_level has size n_dim %d\n", loc, n_dim);
+
+ stmt[loc].loop_level = std::vector<LoopLevel>(n_dim);
+ stmt[loc].ir_stmt_node = ir_stmt[loc];
+ stmt[loc].has_inspector = false;
+ fprintf(stderr, "for int i < n_dim(%d)\n", n_dim);
+ for (int ii = 0; ii < n_dim; ii++) {
+ stmt[loc].loop_level[ii].type = LoopLevelOriginal;
+ stmt[loc].loop_level[ii].payload = ii;
+ stmt[loc].loop_level[ii].parallel_level = 0;
+ }
+ fprintf(stderr, "whew\n");
+
+ stmt_nesting_level[loc] = -1;
+ }
+ dump();
+ fprintf(stderr, " loop.cc Loop::init_loop() END\n\n");
+
+ return true;
+}
+
+
+
+Loop::Loop(const IR_Control *control) {
+
+ CHILL_DEBUG_PRINT("control type is %d ", control->type());
+ echocontroltype(control);
+
+ CHILL_DEBUG_PRINT("2set last_compute_cg_ = NULL; \n");
+ last_compute_cgr_ = NULL;
+ last_compute_cg_ = NULL;
+
+ ir = const_cast<IR_Code *>(control->ir_); // point to the CHILL IR that this loop came from
+ if (ir == 0) {
+ CHILL_DEBUG_PRINT("ir gotten from control = 0x%x\n", (long)ir);
+ CHILL_DEBUG_PRINT("loop.cc GONNA DIE SOON *******************************\n\n");
+ }
+
+ init_code = NULL;
+ cleanup_code = NULL;
+ tmp_loop_var_name_counter = 1;
+ overflow_var_name_counter = 1;
+ known = Relation::True(0);
+
+ CHILL_DEBUG_PRINT("calling build_ir_tree()\n");
+ CHILL_DEBUG_PRINT("about to clone control\n");
+ ir_tree = build_ir_tree(control->clone(), NULL);
+ //fprintf(stderr,"in Loop::Loop. ir_tree has %ld parts\n", ir_tree.size());
+
+ // std::vector<ir_tree_node *> ir_stmt;
+ //fprintf(stderr, "loop.cc after build_ir_tree() %ld statements\n", stmt.size());
+
+ int count = 0;
+ //fprintf(stderr, "before init_loops, %d freevar\n", freevar.size());
+ //fprintf(stderr, "count %d\n", count++);
+ //fprintf(stderr, "loop.cc before init_loop, %ld statements\n", stmt.size());
+ while (!init_loop(ir_tree, ir_stmt)) {
+ //fprintf(stderr, "count %d\n", count++);
+ }
+ fprintf(stderr, "after init_loop, %d freevar\n", (int)freevar.size());
+
+
+ fprintf(stderr, "loop.cc after init_loop, %d statements\n", (int)stmt.size());
+ for (int i = 0; i < stmt.size(); i++) {
+ std::map<int, CG_outputRepr*>::iterator it = replace.find(i);
+
+ if (it != replace.end())
+ stmt[i].code = it->second;
+ else
+ stmt[i].code = stmt[i].code;
+ }
+
+ if (stmt.size() != 0)
+ dep = DependenceGraph(stmt[0].IS.n_set());
+ else
+ dep = DependenceGraph(0);
+ // init the dependence graph
+ for (int i = 0; i < stmt.size(); i++)
+ dep.insert();
+
+ fprintf(stderr, "this really REALLY needs some comments\n");
+ // this really REALLY needs some comments
+ for (int i = 0; i < stmt.size(); i++) {
+ fprintf(stderr, "i %d\n", i);
+ stmt[i].reduction = 0; // Manu -- initialization
+ for (int j = i; j < stmt.size(); j++) {
+ fprintf(stderr, "j %d\n", j);
+ std::pair<std::vector<DependenceVector>,
+ std::vector<DependenceVector> > dv = test_data_dependences(
+ ir,
+ stmt[i].code,
+ stmt[i].IS,
+ stmt[j].code,
+ stmt[j].IS,
+ freevar,
+ index,
+ stmt_nesting_level_[i],
+ stmt_nesting_level_[j],
+ uninterpreted_symbols[ i ],
+ uninterpreted_symbols_stringrepr[ i ]);
+
+ fprintf(stderr, "dv.first.size() %d\n", (int)dv.first.size());
+ for (int k = 0; k < dv.first.size(); k++) {
+ fprintf(stderr, "k1 %d\n", k);
+ if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k],
+ true))
+ dep.connect(i, j, dv.first[k]);
+ else {
+ dep.connect(j, i, dv.first[k].reverse());
+ }
+
+ }
+
+ for (int k = 0; k < dv.second.size(); k++) {
+ fprintf(stderr, "k2 %d\n", k);
+ if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k],
+ false))
+ dep.connect(j, i, dv.second[k]);
+ else {
+ dep.connect(i, j, dv.second[k].reverse());
+ }
+ }
+ }
+ }
+
+ fprintf(stderr, "\n\n*** LOTS OF REDUCTIONS ***\n\n");
+
+ // TODO: Reduction check
+ // Manu:: Initial implementation / algorithm
+ std::set<int> reducCand = std::set<int>();
+ std::vector<int> canReduce = std::vector<int>();
+ fprintf(stderr, "\ni range %d\n", stmt.size());
+ for (int i = 0; i < stmt.size(); i++) {
+ fprintf(stderr, "i %d\n", i);
+ if (!dep.hasEdge(i, i)) {
+ continue;
+ }
+ fprintf(stderr, "dep.hasEdge(%d, %d)\n", i, i);
+
+ // for each statement check if it has all the three dependences (RAW, WAR, WAW)
+ // If there is such a statement, it is a reduction candidate. Mark all reduction candidates.
+ std::vector<DependenceVector> tdv = dep.getEdge(i, i);
+ fprintf(stderr, "tdv size %d\n", tdv.size());
+ for (int j = 0; j < tdv.size(); j++) {
+ fprintf(stderr, "ij %d %d\n", i, j);
+ if (tdv[j].is_reduction_cand) {
+ fprintf(stderr, "reducCand.insert( %d )\n", i);
+ reducCand.insert(i);
+ }
+ }
+ }
+
+ fprintf(stderr, "loop.cc reducCand.size() %d\n", reducCand.size());
+ bool reduc;
+ std::set<int>::iterator it;
+ int counter = 0;
+ for (it = reducCand.begin(); it != reducCand.end(); it++) {
+ fprintf(stderr, "counter %d\n", counter);
+ reduc = true;
+ for (int j = 0; j < stmt.size(); j++) {
+ fprintf(stderr, "j %d\n", j);
+ if ((*it != j)
+ && (stmt_nesting_level_[*it] < stmt_nesting_level_[j])) {
+ if (dep.hasEdge(*it, j) || dep.hasEdge(j, *it)) {
+ fprintf(stderr, "counter %d j %d reduc = false\n", counter, j);
+ reduc = false;
+ break;
+ }
+ }
+ counter += 1;
+ }
+
+ if (reduc) {
+ fprintf(stderr, "canReduce.push_back()\n");
+ canReduce.push_back(*it);
+ stmt[*it].reduction = 2; // First, assume that reduction is possible with some processing
+ }
+ }
+
+
+ // If reduction is possible without processing, update the value of the reduction variable to 1
+ fprintf(stderr, "loop.cc canReduce.size() %d\n", canReduce.size());
+ for (int i = 0; i < canReduce.size(); i++) {
+ // Here, assuming that stmtType returns 1 when there is a single statement within stmt[i]
+ if (stmtType(ir, stmt[canReduce[i]].code) == 1) {
+ stmt[canReduce[i]].reduction = 1;
+ IR_OPERATION_TYPE opType;
+ opType = getReductionOperator(ir, stmt[canReduce[i]].code);
+ stmt[canReduce[i]].reductionOp = opType;
+ }
+ }
+
+ // printing out stuff for debugging
+
+ if (DEP_DEBUG) {
+ std::cout << "STATEMENTS THAT CAN BE REDUCED: \n";
+ for (int i = 0; i < canReduce.size(); i++) {
+ std::cout << "------- " << canReduce[i] << " ------- "
+ << stmt[canReduce[i]].reduction << "\n";
+ ir->printStmt(stmt[canReduce[i]].code); // Manu
+ if (stmt[canReduce[i]].reductionOp == IR_OP_PLUS)
+ std::cout << "Reduction type:: + \n";
+ else if (stmt[canReduce[i]].reductionOp == IR_OP_MINUS)
+ std::cout << "Reduction type:: - \n";
+ else if (stmt[canReduce[i]].reductionOp == IR_OP_MULTIPLY)
+ std::cout << "Reduction type:: * \n";
+ else if (stmt[canReduce[i]].reductionOp == IR_OP_DIVIDE)
+ std::cout << "Reduction type:: / \n";
+ else
+ std::cout << "Unknown reduction type\n";
+ }
+ }
+ // cleanup the IR tree
+
+ fprintf(stderr, "init dumb transformation relations\n");
+
+ // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0]
+ for (int i = 0; i < stmt.size(); i++) {
+ int n = stmt[i].IS.n_set();
+ stmt[i].xform = Relation(n, 2 * n + 1);
+ F_And *f_root = stmt[i].xform.add_and();
+
+ for (int j = 1; j <= n; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(stmt[i].xform.output_var(2 * j), 1);
+ h.update_coef(stmt[i].xform.input_var(j), -1);
+ }
+
+ for (int j = 1; j <= 2 * n + 1; j += 2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(stmt[i].xform.output_var(j), 1);
+ }
+ stmt[i].xform.simplify();
+ }
+ //fprintf(stderr, "done with dumb\n");
+
+ if (stmt.size() != 0)
+ num_dep_dim = stmt[0].IS.n_set();
+ else
+ num_dep_dim = 0;
+ // debug
+ /*for (int i = 0; i < stmt.size(); i++) {
+ std::cout << i << ": ";
+ //stmt[i].xform.print();
+ stmt[i].IS.print();
+ std::cout << std::endl;
+
+ }*/
+ //end debug
+ fprintf(stderr, " at bottom of Loop::Loop, printCode\n");
+ printCode(); // this dies TODO figure out why
+}
+
+Loop::~Loop() {
+
+ delete last_compute_cgr_;
+ delete last_compute_cg_;
+
+ for (int i = 0; i < stmt.size(); i++)
+ if (stmt[i].code != NULL) {
+ stmt[i].code->clear();
+ delete stmt[i].code;
+ }
+
+ for (int i = 0; i < ir_tree.size(); i++)
+ delete ir_tree[i];
+
+ if (init_code != NULL) {
+ init_code->clear();
+ delete init_code;
+ }
+ if (cleanup_code != NULL) {
+ cleanup_code->clear();
+ delete cleanup_code;
+ }
+}
+
+
+
+
+int Loop::get_dep_dim_of(int stmt_num, int level) const {
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invaid statement " + to_string(stmt_num));
+
+ if (level < 1 || level > stmt[stmt_num].loop_level.size())
+ return -1;
+
+ int trip_count = 0;
+ while (true) {
+ switch (stmt[stmt_num].loop_level[level - 1].type) {
+ case LoopLevelOriginal:
+ return stmt[stmt_num].loop_level[level - 1].payload;
+ case LoopLevelTile:
+ level = stmt[stmt_num].loop_level[level - 1].payload;
+ if (level < 1)
+ return -1;
+ if (level > stmt[stmt_num].loop_level.size())
+ throw loop_error("incorrect loop level information for statement "
+ + to_string(stmt_num));
+ break;
+ default:
+ throw loop_error(
+ "unknown loop level information for statement "
+ + to_string(stmt_num));
+ }
+ trip_count++;
+ if (trip_count >= stmt[stmt_num].loop_level.size())
+ throw loop_error(
+ "incorrect loop level information for statement "
+ + to_string(stmt_num));
+ }
+}
+
+int Loop::get_last_dep_dim_before(int stmt_num, int level) const {
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invaid statement " + to_string(stmt_num));
+
+ if (level < 1)
+ return -1;
+ if (level > stmt[stmt_num].loop_level.size())
+ level = stmt[stmt_num].loop_level.size() + 1;
+
+ for (int i = level - 1; i >= 1; i--)
+ if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal)
+ return stmt[stmt_num].loop_level[i - 1].payload;
+
+ return -1;
+}
+
+void Loop::print_internal_loop_structure() const {
+ for (int i = 0; i < stmt.size(); i++) {
+ std::vector<int> lex = getLexicalOrder(i);
+ std::cout << "s" << i + 1 << ": ";
+ for (int j = 0; j < stmt[i].loop_level.size(); j++) {
+ if (2 * j < lex.size())
+ std::cout << lex[2 * j];
+ switch (stmt[i].loop_level[j].type) {
+ case LoopLevelOriginal:
+ std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ case LoopLevelTile:
+ std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ default:
+ std::cout << "(unknown)";
+ }
+ std::cout << ' ';
+ }
+ for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) {
+ std::cout << lex[j];
+ if (j != lex.size() - 1)
+ std::cout << ' ';
+ }
+ std::cout << std::endl;
+ }
+}
+
+void Loop::debugRelations() const {
+ const int m = stmt.size();
+ {
+ std::vector<Relation> IS(m);
+ std::vector<Relation> xforms(m);
+
+ for (int i = 0; i < m; i++) {
+ IS[i] = stmt[i].IS;
+ xforms[i] = stmt[i].xform; // const stucks
+ }
+
+ printf("\nxforms:\n");
+ for (int i = 0; i < m; i++) { xforms[i].print(); printf("\n"); }
+ printf("\nIS:\n");
+ for (int i = 0; i < m; i++) { IS[i].print(); printf("\n"); }
+ fflush(stdout);
+ }
+}
+
+
+CG_outputRepr *Loop::getCode(int effort) const {
+ fprintf(stderr,"\nloop.cc Loop::getCode( effort %d )\n", effort );
+
+ const int m = stmt.size();
+ if (m == 0)
+ return NULL;
+ const int n = stmt[0].xform.n_out();
+
+ if (last_compute_cg_ == NULL) {
+ fprintf(stderr, "Loop::getCode() last_compute_cg_ == NULL\n");
+
+ std::vector<Relation> IS(m);
+ std::vector<Relation> xforms(m);
+ for (int i = 0; i < m; i++) {
+ IS[i] = stmt[i].IS;
+ xforms[i] = stmt[i].xform;
+ }
+
+ debugRelations();
+
+
+ Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
+ printf("\nknown:\n"); known.print(); printf("\n\n"); fflush(stdout);
+
+ last_compute_cg_ = new CodeGen(xforms, IS, known);
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ }
+ else {
+ fprintf(stderr, "Loop::getCode() last_compute_cg_ NOT NULL\n");
+ }
+
+
+ if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {
+ delete last_compute_cgr_;
+ last_compute_cgr_ = last_compute_cg_->buildAST(effort);
+ last_compute_effort_ = effort;
+ }
+
+ std::vector<CG_outputRepr *> stmts(m);
+ fprintf(stderr, "%d stmts\n", m);
+ for (int i = 0; i < m; i++)
+ stmts[i] = stmt[i].code;
+ CG_outputBuilder *ocg = ir->builder();
+
+ fprintf(stderr, "calling last_compute_cgr_->printRepr()\n");
+ CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts,
+ uninterpreted_symbols);
+
+ if (init_code != NULL)
+ repr = ocg->StmtListAppend(init_code->clone(), repr);
+ if (cleanup_code != NULL)
+ repr = ocg->StmtListAppend(repr, cleanup_code->clone());
+
+ fprintf(stderr,"\nloop.cc Loop::getCode( effort %d ) DONE\n", effort );
+ return repr;
+}
+
+
+
+
+void Loop::printCode(int effort) const {
+ fprintf(stderr,"\nloop.cc Loop::printCode( effort %d )\n", effort );
+ const int m = stmt.size();
+ if (m == 0)
+ return;
+ const int n = stmt[0].xform.n_out();
+
+ if (last_compute_cg_ == NULL) {
+ fprintf(stderr, "Loop::printCode(), last_compute_cg_ == NULL\n");
+ std::vector<Relation> IS(m);
+ std::vector<Relation> xforms(m);
+ for (int i = 0; i < m; i++) {
+ IS[i] = stmt[i].IS;
+ xforms[i] = stmt[i].xform;
+ }
+ Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
+
+ last_compute_cg_ = new CodeGen(xforms, IS, known);
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ }
+ else fprintf(stderr, "Loop::printCode(), last_compute_cg_ NOT NULL\n");
+
+ if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {
+ delete last_compute_cgr_;
+ last_compute_cgr_ = last_compute_cg_->buildAST(effort);
+ last_compute_effort_ = effort;
+ }
+
+ std::string repr = last_compute_cgr_->printString(
+ uninterpreted_symbols_stringrepr);
+ fprintf(stderr, "leaving Loop::printCode()\n");
+ std::cout << repr << std::endl;
+}
+
+void Loop::printIterationSpace() const {
+ for (int i = 0; i < stmt.size(); i++) {
+ std::cout << "s" << i << ": ";
+ Relation r = getNewIS(i);
+ for (int j = 1; j <= r.n_inp(); j++)
+ r.name_input_var(j, CodeGen::loop_var_name_prefix + to_string(j));
+ r.setup_names();
+ r.print();
+ }
+}
+
+void Loop::printDependenceGraph() const {
+ if (dep.edgeCount() == 0)
+ std::cout << "no dependence exists" << std::endl;
+ else {
+ std::cout << "dependence graph:" << std::endl;
+ std::cout << dep;
+ }
+}
+
+std::vector<Relation> Loop::getNewIS() const {
+ const int m = stmt.size();
+
+ std::vector<Relation> new_IS(m);
+ for (int i = 0; i < m; i++)
+ new_IS[i] = getNewIS(i);
+
+ return new_IS;
+}
+
+// pragmas are tied to loops only ???
+void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) {
+ // check sanity of parameters
+ if(stmt_num < 0)
+ throw std::invalid_argument("invalid statement " + to_string(stmt_num));
+
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *code = stmt[stmt_num].code;
+ ocg->CreatePragmaAttribute(code, level, pragmaText);
+}
+
+
+/*
+ void Loop::prefetch(int stmt_num, int level, const std::string &arrName, const std::string &indexName, int offset, int hint) {
+ // check sanity of parameters
+ if(stmt_num < 0)
+ throw std::invalid_argument("invalid statement " + to_string(stmt_num));
+
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *code = stmt[stmt_num].code;
+ ocg->CreatePrefetchAttribute(code, level, arrName, indexName, int offset, hint);
+ }
+*/
+
+void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hint) {
+ // check sanity of parameters
+ if(stmt_num < 0)
+ throw std::invalid_argument("invalid statement " + to_string(stmt_num));
+
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *code = stmt[stmt_num].code;
+ ocg->CreatePrefetchAttribute(code, level, arrName, hint);
+}
+
+std::vector<int> Loop::getLexicalOrder(int stmt_num) const {
+ assert(stmt_num < stmt.size());
+
+ const int n = stmt[stmt_num].xform.n_out();
+ std::vector<int> lex(n, 0);
+
+ for (int i = 0; i < n; i += 2)
+ lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var);
+
+ return lex;
+}
+
+// find the sub loop nest specified by stmt_num and level,
+// only iteration space satisfiable statements returned.
+std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const {
+ assert(stmt_num >= 0 && stmt_num < stmt.size());
+ assert(level > 0 && level <= stmt[stmt_num].loop_level.size());
+
+ std::set<int> working;
+ for (int i = 0; i < stmt.size(); i++)
+ if (const_cast<Loop *>(this)->stmt[i].IS.is_upper_bound_satisfiable()
+ && stmt[i].loop_level.size() >= level)
+ working.insert(i);
+
+ for (int i = 1; i <= level; i++) {
+ int a = getLexicalOrder(stmt_num, i);
+ for (std::set<int>::iterator j = working.begin(); j != working.end();) {
+ int b = getLexicalOrder(*j, i);
+ if (b != a)
+ working.erase(j++);
+ else
+ ++j;
+ }
+ }
+
+ return working;
+}
+
+int Loop::getLexicalOrder(int stmt_num, int level) const {
+ assert(stmt_num >= 0 && stmt_num < stmt.size());
+ assert(level > 0 && level <= stmt[stmt_num].loop_level.size()+1);
+
+ Relation &r = const_cast<Loop *>(this)->stmt[stmt_num].xform;
+ for (EQ_Iterator e(r.single_conjunct()->EQs()); e; e++)
+ if (abs((*e).get_coef(r.output_var(2 * level - 1))) == 1) {
+ bool is_const = true;
+ for (Constr_Vars_Iter cvi(*e); cvi; cvi++)
+ if (cvi.curr_var() != r.output_var(2 * level - 1)) {
+ is_const = false;
+ break;
+ }
+ if (is_const) {
+ int t = static_cast<int>((*e).get_const());
+ return (*e).get_coef(r.output_var(2 * level - 1)) > 0 ? -t : t;
+ }
+ }
+
+ throw loop_error(
+ "can't find lexical order for statement " + to_string(stmt_num)
+ + "'s loop level " + to_string(level));
+}
+
+std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const {
+ const int m = stmt.size();
+
+ std::set<int> same_loops;
+ for (int i = 0; i < m; i++) {
+ if (dim < 0)
+ same_loops.insert(i);
+ else {
+ std::vector<int> a_lex = getLexicalOrder(i);
+ int j;
+ for (j = 0; j <= dim; j += 2)
+ if (lex[j] != a_lex[j])
+ break;
+ if (j > dim)
+ same_loops.insert(i);
+ }
+
+ }
+
+ return same_loops;
+}
+
+void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) {
+ const int m = stmt.size();
+
+ if (amount == 0)
+ return;
+
+ for (int i = 0; i < m; i++) {
+ std::vector<int> lex2 = getLexicalOrder(i);
+
+ bool need_shift = true;
+
+ for (int j = 0; j < dim; j++)
+ if (lex2[j] != lex[j]) {
+ need_shift = false;
+ break;
+ }
+
+ if (!need_shift)
+ continue;
+
+ if (amount > 0) {
+ if (lex2[dim] < lex[dim])
+ continue;
+ } else if (amount < 0) {
+ if (lex2[dim] > lex[dim])
+ continue;
+ }
+
+ assign_const(stmt[i].xform, dim, lex2[dim] + amount);
+ }
+}
+
+std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active,
+ int level) {
+
+ std::set<int> not_nested_at_this_level;
+ std::map<ir_tree_node*, std::set<int> > sorted_by_loop;
+ std::map<int, std::set<int> > sorted_by_lex_order;
+ std::vector<std::set<int> > to_return;
+ bool lex_order_already_set = false;
+ for (std::set<int>::iterator it = active.begin(); it != active.end();
+ it++) {
+
+ if (stmt[*it].ir_stmt_node == NULL)
+ lex_order_already_set = true;
+ }
+
+ if (lex_order_already_set) {
+
+ for (std::set<int>::iterator it = active.begin(); it != active.end();
+ it++) {
+ std::map<int, std::set<int> >::iterator it2 =
+ sorted_by_lex_order.find(
+ get_const(stmt[*it].xform, 2 * (level - 1),
+ Output_Var));
+
+ if (it2 != sorted_by_lex_order.end())
+ it2->second.insert(*it);
+ else {
+
+ std::set<int> to_insert;
+
+ to_insert.insert(*it);
+
+ sorted_by_lex_order.insert(
+ std::pair<int, std::set<int> >(
+ get_const(stmt[*it].xform, 2 * (level - 1),
+ Output_Var), to_insert));
+
+ }
+
+ }
+
+ for (std::map<int, std::set<int> >::iterator it2 =
+ sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end();
+ it2++)
+ to_return.push_back(it2->second);
+
+ } else {
+
+ for (std::set<int>::iterator it = active.begin(); it != active.end();
+ it++) {
+
+ ir_tree_node* itn = stmt[*it].ir_stmt_node;
+ itn = itn->parent;
+ //while (itn->content->type() != IR_CONTROL_LOOP && itn != NULL)
+ // itn = itn->parent;
+
+ while ((itn != NULL) && (itn->payload != level - 1)) {
+ itn = itn->parent;
+ while (itn != NULL && itn->content->type() != IR_CONTROL_LOOP )
+ itn = itn->parent;
+ }
+
+ if (itn == NULL)
+ not_nested_at_this_level.insert(*it);
+ else {
+ std::map<ir_tree_node*, std::set<int> >::iterator it2 =
+ sorted_by_loop.find(itn);
+
+ if (it2 != sorted_by_loop.end())
+ it2->second.insert(*it);
+ else {
+ std::set<int> to_insert;
+
+ to_insert.insert(*it);
+
+ sorted_by_loop.insert(
+ std::pair<ir_tree_node*, std::set<int> >(itn,
+ to_insert));
+
+ }
+
+ }
+
+ }
+ if (not_nested_at_this_level.size() > 0) {
+ for (std::set<int>::iterator it = not_nested_at_this_level.begin();
+ it != not_nested_at_this_level.end(); it++) {
+ std::set<int> temp;
+ temp.insert(*it);
+ to_return.push_back(temp);
+
+ }
+ }
+ for (std::map<ir_tree_node*, std::set<int> >::iterator it2 =
+ sorted_by_loop.begin(); it2 != sorted_by_loop.end(); it2++)
+ to_return.push_back(it2->second);
+ }
+ return to_return;
+}
+
+void update_successors(int n,
+ int node_num[],
+ int cant_fuse_with[],
+ Graph<std::set<int>, bool> &g,
+ std::list<int> &work_list,
+ std::list<bool> &type_list,
+ std::vector<bool> types) {
+
+ std::set<int> disconnect;
+ for (Graph<std::set<int>, bool>::EdgeList::iterator i =
+ g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) {
+ int m = i->first;
+
+ if (node_num[m] != -1)
+ throw loop_error("Graph input for fusion has cycles not a DAG!!");
+
+ std::vector<bool> check_ = g.getEdge(n, m);
+
+ bool has_bad_edge_path = false;
+ for (int i = 0; i < check_.size(); i++)
+ if (!check_[i]) {
+ has_bad_edge_path = true;
+ break;
+ }
+ if (!types[m]) {
+ cant_fuse_with[m] = std::max(cant_fuse_with[m], cant_fuse_with[n]);
+ } else {
+ if (has_bad_edge_path)
+ cant_fuse_with[m] = std::max(cant_fuse_with[m], node_num[n]);
+ else
+ cant_fuse_with[m] = std::max(cant_fuse_with[m], cant_fuse_with[n]);
+ }
+ disconnect.insert(m);
+ }
+
+
+ for (std::set<int>::iterator i = disconnect.begin(); i != disconnect.end();
+ i++) {
+ g.disconnect(n, *i);
+
+ bool no_incoming_edges = true;
+ for (int j = 0; j < g.vertex.size(); j++)
+ if (j != *i)
+ if (g.hasEdge(j, *i)) {
+ no_incoming_edges = false;
+ break;
+ }
+
+ if (no_incoming_edges) {
+ work_list.push_back(*i);
+ type_list.push_back(types[*i]);
+ }
+ }
+}
+
+
+
+int Loop::getMinLexValue(std::set<int> stmts, int level) {
+
+ int min;
+
+ std::set<int>::iterator it = stmts.begin();
+ min = getLexicalOrder(*it, level);
+
+ for (; it != stmts.end(); it++) {
+ int curr = getLexicalOrder(*it, level);
+ if (curr < min)
+ min = curr;
+ }
+
+ return min;
+}
+
+
+
+
+Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level(
+ std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim) {
+ Graph<std::set<int>, bool> g;
+
+ for (int i = 0; i < s.size(); i++)
+ g.insert(s[i]);
+
+ for (int i = 0; i < s.size(); i++) {
+
+ for (int j = i + 1; j < s.size(); j++) {
+ bool has_true_edge_i_to_j = false;
+ bool has_true_edge_j_to_i = false;
+ bool is_connected_i_to_j = false;
+ bool is_connected_j_to_i = false;
+ for (std::set<int>::iterator ii = s[i].begin(); ii != s[i].end();
+ ii++) {
+
+ for (std::set<int>::iterator jj = s[j].begin();
+ jj != s[j].end(); jj++) {
+
+ std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence()
+ || (dvs[k].is_data_dependence()
+ && dvs[k].has_been_carried_at(dep_dim))) {
+
+ if (dvs[k].is_data_dependence()
+ && dvs[k].has_negative_been_carried_at(
+ dep_dim)) {
+ //g.connect(i, j, false);
+ is_connected_i_to_j = true;
+ break;
+ } else {
+ //g.connect(i, j, true);
+
+ has_true_edge_i_to_j = true;
+ //break
+ }
+ }
+
+ //if (is_connected)
+
+ // break;
+ // if (has_true_edge_i_to_j && !is_connected_i_to_j)
+ // g.connect(i, j, true);
+ dvs = dep.getEdge(*jj, *ii);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence()
+ || (dvs[k].is_data_dependence()
+ && dvs[k].has_been_carried_at(dep_dim))) {
+
+ if (is_connected_i_to_j || has_true_edge_i_to_j)
+ throw loop_error(
+ "Graph input for fusion has cycles not a DAG!!");
+
+ if (dvs[k].is_data_dependence()
+ && dvs[k].has_negative_been_carried_at(
+ dep_dim)) {
+ //g.connect(i, j, false);
+ is_connected_j_to_i = true;
+ break;
+ } else {
+ //g.connect(i, j, true);
+
+ has_true_edge_j_to_i = true;
+ //break;
+ }
+ }
+
+ // if (is_connected)
+ //break;
+ // if (is_connected)
+ //break;
+ }
+
+ //if (is_connected)
+ // break;
+ }
+
+
+ if (is_connected_i_to_j)
+ g.connect(i, j, false);
+ else if (has_true_edge_i_to_j)
+ g.connect(i, j, true);
+
+ if (is_connected_j_to_i)
+ g.connect(j, i, false);
+ else if (has_true_edge_j_to_i)
+ g.connect(j, i, true);
+
+ }
+ }
+ return g;
+}
+
+
+
+std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g,
+ std::vector<bool> &types) {
+
+ bool roots[g.vertex.size()];
+
+ for (int i = 0; i < g.vertex.size(); i++)
+ roots[i] = true;
+
+ for (int i = 0; i < g.vertex.size(); i++)
+ for (int j = i + 1; j < g.vertex.size(); j++) {
+
+ if (g.hasEdge(i, j))
+ roots[j] = false;
+
+ if (g.hasEdge(j, i))
+ roots[i] = false;
+
+ }
+
+ std::list<int> work_list;
+ std::list<bool> type_list;
+ int cant_fuse_with[g.vertex.size()];
+ int fused = 0;
+ int lastfused = 0;
+ int lastnum = 0;
+ std::vector<std::set<int> > s;
+ //Each Fused set's representative node
+
+ int node_to_fused_nodes[g.vertex.size()];
+ int node_num[g.vertex.size()];
+ int next[g.vertex.size()];
+
+ for (int i = 0; i < g.vertex.size(); i++) {
+ if (roots[i] == true) {
+ work_list.push_back(i);
+ type_list.push_back(types[i]);
+ }
+ cant_fuse_with[i] = 0;
+ node_to_fused_nodes[i] = 0;
+ node_num[i] = -1;
+ next[i] = 0;
+ }
+
+
+ // topological sort according to chun's permute algorithm
+ // std::vector<std::set<int> > s = g.topoSort();
+ std::vector<std::set<int> > s2 = g.topoSort();
+ if (work_list.empty() || (s2.size() != g.vertex.size())) {
+
+ std::cout << s2.size() << "\t" << g.vertex.size() << std::endl;
+ throw loop_error("Input for fusion not a DAG!!");
+
+
+ }
+ int fused_nodes_counter = 0;
+ while (!work_list.empty()) {
+ int n = work_list.front();
+ bool type = type_list.front();
+ //int n_ = g.vertex[n].first;
+ work_list.pop_front();
+ type_list.pop_front();
+ int node;
+ /*if (cant_fuse_with[n] == 0)
+ node = 0;
+ else
+ node = cant_fuse_with[n];
+ */
+ int p;
+ if (type) {
+ //if ((fused_nodes_counter != 0) && (node != fused_nodes_counter)) {
+ if (cant_fuse_with[n] == 0)
+ p = fused;
+ else
+ p = next[cant_fuse_with[n]];
+
+ if (p != 0) {
+ int rep_node = node_to_fused_nodes[p];
+ node_num[n] = node_num[rep_node];
+
+ try {
+ update_successors(n, node_num, cant_fuse_with, g, work_list,
+ type_list, types);
+ } catch (const loop_error &e) {
+
+ throw loop_error(
+ "statements cannot be fused together due to negative dependence");
+
+ }
+ for (std::set<int>::iterator it = g.vertex[n].first.begin();
+ it != g.vertex[n].first.end(); it++)
+ s[node_num[n] - 1].insert(*it);
+ } else {
+ //std::set<int> new_node;
+ //new_node.insert(n_);
+ s.push_back(g.vertex[n].first);
+ lastnum = lastnum + 1;
+ node_num[n] = lastnum;
+ node_to_fused_nodes[node_num[n]] = n;
+
+ if (lastfused == 0) {
+ fused = lastnum;
+ lastfused = fused;
+ } else {
+ next[lastfused] = lastnum;
+ lastfused = lastnum;
+
+ }
+
+ try {
+ update_successors(n, node_num, cant_fuse_with, g, work_list,
+ type_list, types);
+ } catch (const loop_error &e) {
+
+ throw loop_error(
+ "statements cannot be fused together due to negative dependence");
+
+ }
+ fused_nodes_counter++;
+ }
+
+ } else {
+ s.push_back(g.vertex[n].first);
+ lastnum = lastnum + 1;
+ node_num[n] = lastnum;
+ node_to_fused_nodes[node_num[n]] = n;
+
+ try {
+ update_successors(n, node_num, cant_fuse_with, g, work_list,
+ type_list, types);
+ } catch (const loop_error &e) {
+
+ throw loop_error(
+ "statements cannot be fused together due to negative dependence");
+
+ }
+ //fused_nodes_counter++;
+
+ }
+
+ }
+
+ return s;
+}
+
+
+
+
+void Loop::setLexicalOrder(int dim, const std::set<int> &active,
+ int starting_order, std::vector<std::vector<std::string> > idxNames) {
+ fprintf(stderr, "Loop::setLexicalOrder() %d idxNames active size %d starting_order %d\n", idxNames.size(), active.size(), starting_order);
+ if (active.size() == 0)
+ return;
+
+ for (int i=0; i< idxNames.size(); i++) {
+ std::vector<std::string> what = idxNames[i];
+ for (int j=0; j<what.size(); j++) {
+ fprintf(stderr, "%2d %2d %s\n", i,j, what[j].c_str());
+ }
+ }
+
+ // check for sanity of parameters
+ if (dim < 0 || dim % 2 != 0)
+ throw std::invalid_argument(
+ "invalid constant loop level to set lexicographical order");
+ std::vector<int> lex;
+ int ref_stmt_num;
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ if ((*i) < 0 || (*i) >= stmt.size())
+ throw std::invalid_argument(
+ "invalid statement number " + to_string(*i));
+ if (dim >= stmt[*i].xform.n_out())
+ throw std::invalid_argument(
+ "invalid constant loop level to set lexicographical order");
+ if (i == active.begin()) {
+ lex = getLexicalOrder(*i);
+ ref_stmt_num = *i;
+ } else {
+ std::vector<int> lex2 = getLexicalOrder(*i);
+ for (int j = 0; j < dim; j += 2)
+ if (lex[j] != lex2[j])
+ throw std::invalid_argument(
+ "statements are not in the same sub loop nest");
+ }
+ }
+
+ // separate statements by current loop level types
+ int level = (dim + 2) / 2;
+ std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type;
+ std::set<int> active_by_no_level;
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ if (level > stmt[*i].loop_level.size())
+ active_by_no_level.insert(*i);
+ else
+ active_by_level_type[std::make_pair(
+ stmt[*i].loop_level[level - 1].type,
+ stmt[*i].loop_level[level - 1].payload)].insert(*i);
+ }
+
+ // further separate statements due to control dependences
+ std::vector<std::set<int> > active_by_level_type_splitted;
+ for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i =
+ active_by_level_type.begin(); i != active_by_level_type.end(); i++)
+ active_by_level_type_splitted.push_back(i->second);
+ for (std::set<int>::iterator i = active_by_no_level.begin();
+ i != active_by_no_level.end(); i++)
+ for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) {
+ std::set<int> controlled, not_controlled;
+ for (std::set<int>::iterator k =
+ active_by_level_type_splitted[j].begin();
+ k != active_by_level_type_splitted[j].end(); k++) {
+ std::vector<DependenceVector> dvs = dep.getEdge(*i, *k);
+ bool is_controlled = false;
+ for (int kk = 0; kk < dvs.size(); kk++)
+ if (dvs[kk].type = DEP_CONTROL) {
+ is_controlled = true;
+ break;
+ }
+ if (is_controlled)
+ controlled.insert(*k);
+ else
+ not_controlled.insert(*k);
+ }
+ if (controlled.size() != 0 && not_controlled.size() != 0) {
+ active_by_level_type_splitted.erase(
+ active_by_level_type_splitted.begin() + j);
+ active_by_level_type_splitted.push_back(controlled);
+ active_by_level_type_splitted.push_back(not_controlled);
+ }
+ }
+
+ // set lexical order separating loops with different loop types first
+ if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) {
+ int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1;
+
+ Graph<std::set<int>, Empty> g;
+ for (std::vector<std::set<int> >::iterator i =
+ active_by_level_type_splitted.begin();
+ i != active_by_level_type_splitted.end(); i++)
+ g.insert(*i);
+ for (std::set<int>::iterator i = active_by_no_level.begin();
+ i != active_by_no_level.end(); i++) {
+ std::set<int> t;
+ t.insert(*i);
+ g.insert(t);
+ }
+ for (int i = 0; i < g.vertex.size(); i++)
+ for (int j = i + 1; j < g.vertex.size(); j++) {
+ bool connected = false;
+ for (std::set<int>::iterator ii = g.vertex[i].first.begin();
+ ii != g.vertex[i].first.end(); ii++) {
+ for (std::set<int>::iterator jj = g.vertex[j].first.begin();
+ jj != g.vertex[j].first.end(); jj++) {
+ std::vector<DependenceVector> dvs = dep.getEdge(*ii,
+ *jj);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence()
+ || (dvs[k].is_data_dependence()
+ && !dvs[k].has_been_carried_before(
+ dep_dim))) {
+ g.connect(i, j);
+ connected = true;
+ break;
+ }
+ if (connected)
+ break;
+ }
+ if (connected)
+ break;
+ }
+ connected = false;
+ for (std::set<int>::iterator ii = g.vertex[i].first.begin();
+ ii != g.vertex[i].first.end(); ii++) {
+ for (std::set<int>::iterator jj = g.vertex[j].first.begin();
+ jj != g.vertex[j].first.end(); jj++) {
+ std::vector<DependenceVector> dvs = dep.getEdge(*jj,
+ *ii);
+ // find the sub loop nest specified by stmt_num and level,
+ // only iteration space satisfiable statements returned.
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence()
+ || (dvs[k].is_data_dependence()
+ && !dvs[k].has_been_carried_before(
+ dep_dim))) {
+ g.connect(j, i);
+ connected = true;
+ break;
+ }
+ if (connected)
+ break;
+ }
+ if (connected)
+ break;
+ }
+ }
+
+ std::vector<std::set<int> > s = g.topoSort();
+ if (s.size() != g.vertex.size())
+ throw loop_error(
+ "cannot separate statements with different loop types at loop level "
+ + to_string(level));
+
+ // assign lexical order
+ int order = starting_order;
+ for (int i = 0; i < s.size(); i++) {
+ std::set<int> &cur_scc = g.vertex[*(s[i].begin())].first;
+ int sz = cur_scc.size();
+ if (sz == 1) {
+ int cur_stmt = *(cur_scc.begin());
+ assign_const(stmt[cur_stmt].xform, dim, order);
+ for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2)
+ assign_const(stmt[cur_stmt].xform, j, 0);
+ order++;
+ } else { // recurse !
+ fprintf(stderr, "Loop:setLexicalOrder() recursing\n");
+ setLexicalOrder(dim, cur_scc, order, idxNames);
+ order += sz;
+ }
+ }
+ }
+ else { // set lexical order separating single iteration statements and loops
+
+ std::set<int> true_singles;
+ std::set<int> nonsingles;
+ std::map<coef_t, std::set<int> > fake_singles;
+ std::set<int> fake_singles_;
+
+ // sort out statements that do not require loops
+ for (std::set<int>::iterator i = active.begin(); i != active.end();
+ i++) {
+ Relation cur_IS = getNewIS(*i);
+ if (is_single_iteration(cur_IS, dim + 1)) {
+ bool is_all_single = true;
+ for (int j = dim + 3; j < stmt[*i].xform.n_out(); j += 2)
+ if (!is_single_iteration(cur_IS, j)) {
+ is_all_single = false;
+ break;
+ }
+ if (is_all_single)
+ true_singles.insert(*i);
+ else {
+ fake_singles_.insert(*i);
+ try {
+ fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert(
+ *i);
+ } catch (const std::exception &e) {
+ fake_singles[posInfinity].insert(*i);
+ }
+ }
+ } else
+ nonsingles.insert(*i);
+ }
+
+
+ // split nonsingles forcibly according to negative dependences present (loop unfusible)
+ int dep_dim = get_dep_dim_of(ref_stmt_num, level);
+
+ if (dim < stmt[ref_stmt_num].xform.n_out() - 1) {
+
+ bool dummy_level_found = false;
+
+ std::vector<std::set<int> > s;
+
+ s = sort_by_same_loops(active, level);
+ bool further_levels_exist = false;
+
+ if (!idxNames.empty())
+ if (level <= idxNames[ref_stmt_num].size())
+ if (idxNames[ref_stmt_num][level - 1].length() == 0) {
+ // && s.size() == 1) {
+ int order1 = 0;
+ dummy_level_found = true;
+
+ for (int i = level; i < idxNames[ref_stmt_num].size();
+ i++)
+ if (idxNames[ref_stmt_num][i].length() > 0)
+ further_levels_exist = true;
+
+ }
+
+ //if (!dummy_level_found) {
+
+ if (s.size() > 1) {
+
+ std::vector<bool> types;
+ for (int i = 0; i < s.size(); i++)
+ types.push_back(true);
+
+ Graph<std::set<int>, bool> g = construct_induced_graph_at_level(
+ s, dep, dep_dim);
+ s = typed_fusion(g, types);
+ }
+ int order = starting_order;
+ for (int i = 0; i < s.size(); i++) {
+
+ for (std::set<int>::iterator it = s[i].begin();
+ it != s[i].end(); it++) {
+ assign_const(stmt[*it].xform, dim, order);
+ stmt[*it].xform.simplify();
+ }
+
+ if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1)) { // recurse !
+ fprintf(stderr, "Loop:setLexicalOrder() recursing\n");
+ setLexicalOrder(dim + 2, s[i], order, idxNames);
+ }
+
+ order++;
+ }
+ //}
+ /* else {
+
+ int order1 = 0;
+ int order = 0;
+ for (std::set<int>::iterator i = active.begin();
+ i != active.end(); i++) {
+ if (!further_levels_exist)
+ assign_const(stmt[*i].xform, dim, order1++);
+ else
+ assign_const(stmt[*i].xform, dim, order1);
+
+ }
+
+ if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1) && further_levels_exist)
+ setLexicalOrder(dim + 2, active, order, idxNames);
+ }
+ */
+ } else {
+ int dummy_order = 0;
+ for (std::set<int>::iterator i = active.begin(); i != active.end();
+ i++) {
+ assign_const(stmt[*i].xform, dim, dummy_order++);
+ stmt[*i].xform.simplify();
+ }
+ }
+ /*for (int i = 0; i < g2.vertex.size(); i++)
+ for (int j = i+1; j < g2.vertex.size(); j++) {
+ std::vector<DependenceVector> dvs = dep.getEdge(g2.vertex[i].first, g2.vertex[j].first);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence() ||
+ (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) {
+ g2.connect(i, j);
+ break;
+ }
+ dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].is_control_dependence() ||
+ (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) {
+ g2.connect(j, i);
+ break;
+ }
+ }
+
+ std::vector<std::set<int> > s2 = g2.packed_topoSort();
+
+ std::vector<std::set<int> > splitted_nonsingles;
+ for (int i = 0; i < s2.size(); i++) {
+ std::set<int> cur_scc;
+ for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++)
+ cur_scc.insert(g2.vertex[*j].first);
+ splitted_nonsingles.push_back(cur_scc);
+ }
+ */
+ //convert to dependence graph for grouped statements
+ //dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1;
+ /*int order = 0;
+ for (std::set<int>::iterator j = active.begin(); j != active.end();
+ j++) {
+ std::set<int> continuous;
+ std::cout<< active.size()<<std::endl;
+ while (nonsingles.find(*j) != nonsingles.end() && j != active.end()) {
+ continuous.insert(*j);
+ j++;
+ }
+
+ printf("continuous size is %d\n", continuous.size());
+
+
+
+ if (continuous.size() > 0) {
+ std::vector<std::set<int> > s = typed_fusion(continuous, dep,
+ dep_dim);
+
+ for (int i = 0; i < s.size(); i++) {
+ for (std::set<int>::iterator l = s[i].begin();
+ l != s[i].end(); l++) {
+ assign_const(stmt[*l].xform, dim + 2, order);
+ setLexicalOrder(dim + 2, s[i]);
+ }
+ order++;
+ }
+ }
+
+ if (j != active.end()) {
+ assign_const(stmt[*j].xform, dim + 2, order);
+
+ for (int k = dim + 4; k < stmt[*j].xform.n_out(); k += 2)
+ assign_const(stmt[*j].xform, k, 0);
+ order++;
+ }
+
+ if( j == active.end())
+ break;
+ }
+ */
+
+
+ // assign lexical order
+ /*int order = starting_order;
+ for (int i = 0; i < s.size(); i++) {
+ // translate each SCC into original statements
+ std::set<int> cur_scc;
+ for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++)
+ copy(s[i].begin(), s[i].end(),
+ inserter(cur_scc, cur_scc.begin()));
+
+ // now assign the constant
+ for (std::set<int>::iterator j = cur_scc.begin();
+ j != cur_scc.end(); j++)
+ assign_const(stmt[*j].xform, dim, order);
+
+ if (cur_scc.size() > 1)
+ setLexicalOrder(dim + 2, cur_scc);
+ else if (cur_scc.size() == 1) {
+ int cur_stmt = *(cur_scc.begin());
+ for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2)
+ assign_const(stmt[cur_stmt].xform, j, 0);
+ }
+
+ if (cur_scc.size() > 0)
+ order++;
+ }
+ */
+ }
+
+ fprintf(stderr, "LEAVING Loop::setLexicalOrder() %d idxNames\n", idxNames.size());
+ for (int i=0; i< idxNames.size(); i++) {
+ std::vector<std::string> what = idxNames[i];
+ for (int j=0; j<what.size(); j++) {
+ fprintf(stderr, "%2d %2d %s\n", i,j, what[j].c_str());
+ }
+ }
+}
+
+
+
+void Loop::apply_xform() {
+ std::set<int> active;
+ for (int i = 0; i < stmt.size(); i++)
+ active.insert(i);
+ apply_xform(active);
+}
+
+void Loop::apply_xform(int stmt_num) {
+ fprintf(stderr, "apply_xform( %d )\n", stmt_num);
+ std::set<int> active;
+ active.insert(stmt_num);
+ apply_xform(active);
+}
+
+void Loop::apply_xform(std::set<int> &active) {
+ fflush(stdout);
+ fprintf(stderr, "loop.cc apply_xform( set )\n");
+
+ int max_n = 0;
+
+ omega::CG_outputBuilder *ocg = ir->builder();
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ int n = stmt[*i].loop_level.size();
+ if (n > max_n)
+ max_n = n;
+
+ std::vector<int> lex = getLexicalOrder(*i);
+
+ omega::Relation mapping(2 * n + 1, n);
+ omega::F_And *f_root = mapping.add_and();
+ for (int j = 1; j <= n; j++) {
+ omega::EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(j), 1);
+ h.update_coef(mapping.input_var(2 * j), -1);
+ }
+ mapping = omega::Composition(mapping, stmt[*i].xform);
+ mapping.simplify();
+
+ // match omega input/output variables to variable names in the code
+ for (int j = 1; j <= stmt[*i].IS.n_set(); j++)
+ mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name());
+ for (int j = 1; j <= n; j++)
+ mapping.name_output_var(j,
+ tmp_loop_var_name_prefix
+ + omega::to_string(
+ tmp_loop_var_name_counter + j - 1));
+ mapping.setup_names();
+ mapping.print(); // "{[I] -> [_t1] : I = _t1 }
+ fflush(stdout);
+
+ omega::Relation known = Extend_Set(copy(this->known),
+ mapping.n_out() - this->known.n_set());
+ //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL));
+
+ omega::CG_outputBuilder *ocgr = ir->builder();
+
+
+ //this is probably CG_chillBuilder;
+
+ omega::CG_stringBuilder *ocgs = new omega::CG_stringBuilder;
+ if (uninterpreted_symbols[*i].size() == 0) {
+
+
+ std::set<std::string> globals;
+
+ for (omega::DNF_Iterator di(stmt[*i].IS.query_DNF()); di; di++) {
+
+ for (omega::Constraint_Iterator e(*di); e; e++) {
+ for (omega::Constr_Vars_Iter cvi(*e); cvi; cvi++) {
+ omega::Variable_ID v = cvi.curr_var();
+ if (v->kind() == omega::Global_Var
+ && v->get_global_var()->arity() > 0
+ && globals.find(v->name()) == globals.end()) {
+ omega::Global_Var_ID g = v->get_global_var();
+ globals.insert(v->name());
+ std::vector<omega::CG_outputRepr *> reprs;
+ std::vector<omega::CG_outputRepr *> reprs2;
+
+ for (int l = 1; l <= g->arity(); l++) {
+ omega::CG_outputRepr *temp = ocgr->CreateIdent(
+ stmt[*i].IS.set_var(l)->name());
+ omega::CG_outputRepr *temp2 = ocgs->CreateIdent(
+ stmt[*i].IS.set_var(l)->name());
+
+ reprs.push_back(temp);
+ reprs2.push_back(temp2);
+ }
+ uninterpreted_symbols[*i].insert(
+ std::pair<std::string,
+ std::vector<omega::CG_outputRepr *> >(
+ v->get_global_var()->base_name(),
+ reprs));
+ uninterpreted_symbols_stringrepr[*i].insert(
+ std::pair<std::string,
+ std::vector<omega::CG_outputRepr *> >(
+ v->get_global_var()->base_name(),
+ reprs2));
+ }
+ }
+ }
+ }
+ }
+
+ std::vector<std::string> loop_vars;
+ for (int j = 1; j <= stmt[*i].IS.n_set(); j++) {
+ loop_vars.push_back(stmt[*i].IS.set_var(j)->name());
+ }
+ for (int j = 0; j<loop_vars.size(); j++) {
+ fprintf(stderr, "loop vars %d %s\n", j, loop_vars[j].c_str());
+ }
+ std::vector<CG_outputRepr *> subs = output_substitutions(ocg,
+ Inverse(copy(mapping)),
+ std::vector<std::pair<CG_outputRepr *, int> >(
+ mapping.n_out(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL), 0)),
+ uninterpreted_symbols[*i]);
+
+ std::vector<CG_outputRepr *> subs2;
+ for (int l = 0; l < subs.size(); l++)
+ subs2.push_back(subs[l]->clone());
+
+ fprintf(stderr, "%d uninterpreted symbols\n", (int)uninterpreted_symbols.size());
+ for (int j = 0; j<loop_vars.size(); j++) {
+ fprintf(stderr, "loop vars %d %s\n", j, loop_vars[j].c_str());
+ }
+
+
+ int count = 0;
+ for (std::map<std::string, std::vector<CG_outputRepr *> >::iterator it =
+ uninterpreted_symbols[*i].begin();
+ it != uninterpreted_symbols[*i].end(); it++) {
+ fprintf(stderr, "\ncount %d\n", count);
+
+ std::vector<CG_outputRepr *> reprs_ = it->second;
+ fprintf(stderr, "%d reprs_\n", (int)reprs_.size());
+
+ std::vector<CG_outputRepr *> reprs_2;
+ for (int k = 0; k < reprs_.size(); k++) {
+ fprintf(stderr, "k %d\n", k);
+ std::vector<CG_outputRepr *> subs;
+ for (int l = 0; l < subs2.size(); l++) {
+ fprintf(stderr, "l %d\n", l);
+ subs.push_back(subs2[l]->clone());
+ }
+
+ fprintf(stderr, "clone\n");
+ CG_outputRepr *c = reprs_[k]->clone();
+ c->dump(); fflush(stdout);
+
+ fprintf(stderr, "createsub\n");
+ CG_outputRepr *s = ocgr->CreateSubstitutedStmt(0, c,
+ loop_vars, subs, true);
+
+ fprintf(stderr, "push back\n");
+ reprs_2.push_back( s );
+
+ }
+
+ it->second = reprs_2;
+ count++;
+ fprintf(stderr, "bottom\n");
+ }
+
+ std::vector<CG_outputRepr *> subs3 = output_substitutions(
+ ocgs, Inverse(copy(mapping)),
+ std::vector<std::pair<CG_outputRepr *, int> >(
+ mapping.n_out(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL), 0)),
+ uninterpreted_symbols_stringrepr[*i]);
+
+ for (std::map<std::string, std::vector<CG_outputRepr *> >::iterator it =
+ uninterpreted_symbols_stringrepr[*i].begin();
+ it != uninterpreted_symbols_stringrepr[*i].end(); it++) {
+
+ std::vector<CG_outputRepr *> reprs_ = it->second;
+ std::vector<CG_outputRepr *> reprs_2;
+ for (int k = 0; k < reprs_.size(); k++) {
+ std::vector<CG_outputRepr *> subs;
+ /* for (int l = 0; l < subs3.size(); l++)
+ subs.push_back(subs3[l]->clone());
+ reprs_2.push_back(
+ ocgs->CreateSubstitutedStmt(0, reprs_[k]->clone(),
+ loop_vars, subs));
+ */
+ reprs_2.push_back(subs3[k]->clone());
+ }
+
+ it->second = reprs_2;
+
+ }
+
+
+ fprintf(stderr, "loop.cc stmt[*i].code =\n");
+ //stmt[*i].code->dump();
+ //fprintf(stderr, "\n");
+ stmt[*i].code = ocg->CreateSubstitutedStmt(0, stmt[*i].code, loop_vars,
+ subs);
+ //fprintf(stderr, "loop.cc substituted code =\n");
+ //stmt[*i].code->dump();
+ //fprintf(stderr, "\n");
+
+ stmt[*i].IS = omega::Range(Restrict_Domain(mapping, stmt[*i].IS));
+ stmt[*i].IS.simplify();
+
+ // replace original transformation relation with straight 1-1 mapping
+ //fprintf(stderr, "replace original transformation relation with straight 1-1 mapping\n");
+ mapping = Relation(n, 2 * n + 1);
+ f_root = mapping.add_and();
+ for (int j = 1; j <= n; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(2 * j), 1);
+ h.update_coef(mapping.input_var(j), -1);
+ }
+ for (int j = 1; j <= 2 * n + 1; j += 2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(j), 1);
+ h.update_const(-lex[j - 1]);
+ }
+ stmt[*i].xform = mapping;
+
+ //fprintf(stderr, "\ncode is: \n");
+ //stmt[*i].code->dump();
+ //fprintf(stderr, "\n\n");
+
+ }
+
+ tmp_loop_var_name_counter += max_n;
+ fflush(stdout);
+ fprintf(stderr, "loop.cc LEAVING apply_xform( set )\n\n");
+ //for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ // fprintf(stderr, "\nloop.cc stmt[i].code =\n");
+ // stmt[*i].code->dump();
+ // fprintf(stderr, "\n\n");
+ //}
+
+}
+
+
+
+
+void Loop::addKnown(const Relation &cond) {
+
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+ fprintf(stderr, "Loop::addKnown(), SETTING last_compute_cg_ = NULL\n");
+
+ int n1 = this->known.n_set();
+
+ Relation r = copy(cond);
+ int n2 = r.n_set();
+
+ if (n1 < n2)
+ this->known = Extend_Set(this->known, n2 - n1);
+ else if (n1 > n2)
+ r = Extend_Set(r, n1 - n2);
+
+ this->known = Intersection(this->known, r);
+}
+
+void Loop::removeDependence(int stmt_num_from, int stmt_num_to) {
+ // check for sanity of parameters
+ if (stmt_num_from >= stmt.size())
+ throw std::invalid_argument(
+ "invalid statement number " + to_string(stmt_num_from));
+ if (stmt_num_to >= stmt.size())
+ throw std::invalid_argument(
+ "invalid statement number " + to_string(stmt_num_to));
+
+ dep.disconnect(stmt_num_from, stmt_num_to);
+}
+
+void Loop::dump() const {
+ for (int i = 0; i < stmt.size(); i++) {
+ std::vector<int> lex = getLexicalOrder(i);
+ std::cout << "s" << i + 1 << ": ";
+ for (int j = 0; j < stmt[i].loop_level.size(); j++) {
+ if (2 * j < lex.size())
+ std::cout << lex[2 * j];
+ switch (stmt[i].loop_level[j].type) {
+ case LoopLevelOriginal:
+ std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ case LoopLevelTile:
+ std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ default:
+ std::cout << "(unknown)";
+ }
+ std::cout << ' ';
+ }
+ for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) {
+ std::cout << lex[j];
+ if (j != lex.size() - 1)
+ std::cout << ' ';
+ }
+ std::cout << std::endl;
+ }
+}
+
+bool Loop::nonsingular(const std::vector<std::vector<int> > &T) {
+ if (stmt.size() == 0)
+ return true;
+
+ // check for sanity of parameters
+ for (int i = 0; i < stmt.size(); i++) {
+ if (stmt[i].loop_level.size() != num_dep_dim)
+ throw std::invalid_argument(
+ "nonsingular loop transformations must be applied to original perfect loop nest");
+ for (int j = 0; j < stmt[i].loop_level.size(); j++)
+ if (stmt[i].loop_level[j].type != LoopLevelOriginal)
+ throw std::invalid_argument(
+ "nonsingular loop transformations must be applied to original perfect loop nest");
+ }
+ if (T.size() != num_dep_dim)
+ throw std::invalid_argument("invalid transformation matrix");
+ for (int i = 0; i < stmt.size(); i++)
+ if (T[i].size() != num_dep_dim + 1 && T[i].size() != num_dep_dim)
+ throw std::invalid_argument("invalid transformation matrix");
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+ fprintf(stderr, "Loop::nonsingular(), SETTING last_compute_cg_ = NULL\n");
+
+ // build relation from matrix
+ Relation mapping(2 * num_dep_dim + 1, 2 * num_dep_dim + 1);
+ F_And *f_root = mapping.add_and();
+ for (int i = 0; i < num_dep_dim; i++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(2 * (i + 1)), -1);
+ for (int j = 0; j < num_dep_dim; j++)
+ if (T[i][j] != 0)
+ h.update_coef(mapping.input_var(2 * (j + 1)), T[i][j]);
+ if (T[i].size() == num_dep_dim + 1)
+ h.update_const(T[i][num_dep_dim]);
+ }
+ for (int i = 1; i <= 2 * num_dep_dim + 1; i += 2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(i), -1);
+ h.update_coef(mapping.input_var(i), 1);
+ }
+
+ // update transformation relations
+ for (int i = 0; i < stmt.size(); i++)
+ stmt[i].xform = Composition(copy(mapping), stmt[i].xform);
+
+ // update dependence graph
+ for (int i = 0; i < dep.vertex.size(); i++)
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();
+ j++) {
+ std::vector<DependenceVector> dvs = j->second;
+ for (int k = 0; k < dvs.size(); k++) {
+ DependenceVector &dv = dvs[k];
+ switch (dv.type) {
+ case DEP_W2R:
+ case DEP_R2W:
+ case DEP_W2W:
+ case DEP_R2R: {
+ std::vector<coef_t> lbounds(num_dep_dim), ubounds(
+ num_dep_dim);
+ for (int p = 0; p < num_dep_dim; p++) {
+ coef_t lb = 0;
+ coef_t ub = 0;
+ for (int q = 0; q < num_dep_dim; q++) {
+ if (T[p][q] > 0) {
+ if (lb == -posInfinity
+ || dv.lbounds[q] == -posInfinity)
+ lb = -posInfinity;
+ else
+ lb += T[p][q] * dv.lbounds[q];
+ if (ub == posInfinity
+ || dv.ubounds[q] == posInfinity)
+ ub = posInfinity;
+ else
+ ub += T[p][q] * dv.ubounds[q];
+ } else if (T[p][q] < 0) {
+ if (lb == -posInfinity
+ || dv.ubounds[q] == posInfinity)
+ lb = -posInfinity;
+ else
+ lb += T[p][q] * dv.ubounds[q];
+ if (ub == posInfinity
+ || dv.lbounds[q] == -posInfinity)
+ ub = posInfinity;
+ else
+ ub += T[p][q] * dv.lbounds[q];
+ }
+ }
+ if (T[p].size() == num_dep_dim + 1) {
+ if (lb != -posInfinity)
+ lb += T[p][num_dep_dim];
+ if (ub != posInfinity)
+ ub += T[p][num_dep_dim];
+ }
+ lbounds[p] = lb;
+ ubounds[p] = ub;
+ }
+ dv.lbounds = lbounds;
+ dv.ubounds = ubounds;
+
+ break;
+ }
+ default:
+ ;
+ }
+ }
+ j->second = dvs;
+ }
+
+ // set constant loop values
+ std::set<int> active;
+ for (int i = 0; i < stmt.size(); i++)
+ active.insert(i);
+ setLexicalOrder(0, active);
+
+ return true;
+}
+
+
+bool Loop::is_dependence_valid_based_on_lex_order(int i, int j,
+ const DependenceVector &dv, bool before) {
+ std::vector<int> lex_i = getLexicalOrder(i);
+ std::vector<int> lex_j = getLexicalOrder(j);
+ int last_dim;
+ if (!dv.is_scalar_dependence) {
+ for (last_dim = 0;
+ last_dim < lex_i.size() && (lex_i[last_dim] == lex_j[last_dim]);
+ last_dim++)
+ ;
+ last_dim = last_dim / 2;
+ if (last_dim == 0)
+ return true;
+
+ for (int i = 0; i < last_dim; i++) {
+ if (dv.lbounds[i] > 0)
+ return true;
+ else if (dv.lbounds[i] < 0)
+ return false;
+ }
+ }
+ if (before)
+ return true;
+
+ return false;
+
+}
+
+// Manu:: reduction operation
+
+void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
+ std::string arrName, int memory_type, int padding_alignment,
+ int assign_then_accumulate, int padding_stride) {
+
+ //std::cout << "In scalar_expand function: " << stmt_num << ", " << arrName << "\n";
+ //std::cout.flush();
+
+ //fprintf(stderr, "\n%d statements\n", stmt.size());
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "%2d ", i);
+ // ((CG_chillRepr *)stmt[i].code)->Dump();
+ //}
+ //fprintf(stderr, "\n");
+
+ // check for sanity of parameters
+ bool found_non_constant_size_dimension = false;
+
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument(
+ "invalid statement number " + to_string(stmt_num));
+ //Anand: adding check for privatized levels
+ //if (arrName != "RHS")
+ // throw std::invalid_argument(
+ // "invalid 3rd argument: only 'RHS' supported " + arrName);
+ for (int i = 0; i < levels.size(); i++) {
+ if (levels[i] <= 0 || levels[i] > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument(
+ "1invalid loop level " + to_string(levels[i]));
+
+ if (i > 0) {
+ if (levels[i] < levels[i - 1])
+ throw std::invalid_argument(
+ "loop levels must be in ascending order");
+ }
+ }
+ //end --adding check for privatized levels
+
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+ fprintf(stderr, "Loop::scalar_expand(), SETTING last_compute_cg_ = NULL\n");
+
+ fprintf(stderr, "\nloop.cc finding array accesses in stmt %d of the code\n",stmt_num );
+ std::vector<IR_ArrayRef *> access = ir->FindArrayRef(stmt[stmt_num].code);
+ fprintf(stderr, "loop.cc L2726 %d access\n", access.size());
+
+ IR_ArraySymbol *sym = NULL;
+ fprintf(stderr, "arrName %s\n", arrName.c_str());
+ if (arrName == "RHS") {
+ fprintf(stderr, "sym RHS\n");
+ sym = access[0]->symbol();
+ }
+ else {
+ fprintf(stderr, "looking for array %s in access\n", arrName.c_str());
+ for (int k = 0; k < access.size(); k++) { // BUH
+
+ //fprintf(stderr, "access[%d] = %s ", k, access[k]->getTypeString()); access[k]->print(0,stderr); fprintf(stderr, "\n");
+
+ std::string name = access[k]->symbol()->name();
+ //fprintf(stderr, "comparing %s to %s\n", name.c_str(), arrName.c_str());
+
+ if (access[k]->symbol()->name() == arrName) {
+ fprintf(stderr, "found it sym access[ k=%d ]\n", k);
+ sym = access[k]->symbol();
+ }
+ }
+ }
+ if (!sym) fprintf(stderr, "DIDN'T FIND IT\n");
+ fprintf(stderr, "sym %p\n", sym);
+
+ // collect array references by name
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ int dim = 2 * levels[levels.size() - 1] - 1;
+ std::set<int> same_loop = getStatements(lex, dim - 1);
+
+ //Anand: shifting this down
+ // assign_const(stmt[newStmt_num].xform, 2*level+1, 1);
+
+ // std::cout << " before temp array name \n ";
+ // create a temporary variable
+ IR_Symbol *tmp_sym;
+
+ // get the loop upperbound, that would be the size of the temp array.
+ omega::coef_t lb[levels.size()], ub[levels.size()], size[levels.size()];
+
+ //Anand Adding apply xform so that tiled loop bounds are reflected
+ fprintf(stderr, "Adding apply xform so that tiled loop bounds are reflected\n");
+ apply_xform(same_loop);
+ fprintf(stderr, "loop.cc, back from apply_xform()\n");
+
+ //Anand commenting out the folowing 4 lines
+ /* copy(stmt[stmt_num].IS).query_variable_bounds(
+ copy(stmt[stmt_num].IS).set_var(level), lb, ub);
+ std::cout << "Upper Bound = " << ub << "\n";
+ std::cout << "lower Bound = " << lb << "\n";
+ */
+ // testing testing -- Manu ////////////////////////////////////////////////
+ /*
+ // int n_dim = sym->n_dim();
+ // std::cout << "------- n_dim ----------- " << n_dim << "\n";
+ std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(stmt[stmt_num].IS, stmt[stmt_num].IS.set_var(level));
+ omega::coef_t index_stride;
+ if (result.second != NULL) {
+ index_stride = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(stmt[stmt_num].IS.set_var(level))));
+ std::cout << "simplest_stride :: " << index_stride << ", " << result.first.get_coef(result.second) << ", " << result.first.get_coef(stmt[stmt_num].IS.set_var(level))<< "\n";
+ }
+ Relation bound;
+ // bound = get_loop_bound(stmt[stmt_num].IS, level);
+ bound = SimpleHull(stmt[stmt_num].IS,true, true);
+ bound.print();
+
+ bound = copy(stmt[stmt_num].IS);
+ for (int i = 1; i < level; i++) {
+ bound = Project(bound, i, Set_Var);
+ std::cout << "-------------------------------\n";
+ bound.print();
+ }
+
+ bound.simplify();
+ bound.print();
+ // bound = get_loop_bound(bound, level);
+
+ copy(bound).query_variable_bounds(copy(bound).set_var(level), lb, ub);
+ std::cout << "Upper Bound = " << ub << "\n";
+ std::cout << "lower Bound = " << lb << "\n";
+
+ result = find_simplest_stride(bound, bound.set_var(level));
+ if (result.second != NULL)
+ index_stride = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level))));
+ else
+ index_stride = 1;
+ std::cout << "simplest_stride 11:: " << index_stride << "\n";
+ */
+ ////////////////////////////////////////////////////////////////////////////////
+ ///////////////////////////// copied datacopy code here /////////////////////////////////////////////
+
+ //std::cout << "In scalar_expand function 2: " << stmt_num << ", " << arrName << "\n";
+ //std::cout.flush();
+
+ //fprintf(stderr, "\n%d statements\n", stmt.size());
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "%2d ", i);
+ // ((CG_chillRepr *)stmt[i].code)->Dump();
+ //}
+ //fprintf(stderr, "\n");
+
+
+
+ int n_dim = levels.size();
+ Relation copy_is = copy(stmt[stmt_num].IS);
+ // extract temporary array information
+ CG_outputBuilder *ocg1 = ir->builder();
+ std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL
+ std::vector<coef_t> index_stride(n_dim);
+ std::vector<bool> is_index_eq(n_dim, false);
+ std::vector<std::pair<int, CG_outputRepr *> > index_sz(0);
+ Relation reduced_copy_is = copy(copy_is);
+ std::vector<CG_outputRepr *> size_repr;
+ std::vector<int> size_int;
+ Relation xform = copy(stmt[stmt_num].xform);
+ for (int i = 0; i < n_dim; i++) {
+
+ dim = 2 * levels[i] - 1;
+ //Anand: Commenting out the lines below: not required
+ // if (i != 0)
+ // reduced_copy_is = Project(reduced_copy_is, level - 1 + i, Set_Var);
+ Relation bound = get_loop_bound(copy(reduced_copy_is), levels[i] - 1);
+
+ // extract stride
+ std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound,
+ bound.set_var(levels[i]));
+ if (result.second != NULL)
+ index_stride[i] = abs(result.first.get_coef(result.second))
+ / gcd(abs(result.first.get_coef(result.second)),
+ abs(
+ result.first.get_coef(
+ bound.set_var(levels[i]))));
+ else
+ index_stride[i] = 1;
+ // std::cout << "simplest_stride 11:: " << index_stride[i] << "\n";
+
+ // check if this array index requires loop
+ Conjunct *c = bound.query_DNF()->single_conjunct();
+ for (EQ_Iterator ei(c->EQs()); ei; ei++) {
+ if ((*ei).has_wildcards())
+ continue;
+
+ int coef = (*ei).get_coef(bound.set_var(levels[i]));
+ if (coef != 0) {
+ int sign = 1;
+ if (coef < 0) {
+ coef = -coef;
+ sign = -1;
+ }
+
+ CG_outputRepr *op = NULL;
+ for (Constr_Vars_Iter ci(*ei); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ if ((*ci).var != bound.set_var(levels[i]))
+ if ((*ci).coef * sign == 1)
+ op = ocg1->CreateMinus(op,
+ ocg1->CreateIdent((*ci).var->name()));
+ else if ((*ci).coef * sign == -1)
+ op = ocg1->CreatePlus(op,
+ ocg1->CreateIdent((*ci).var->name()));
+ else if ((*ci).coef * sign > 1) {
+ op = ocg1->CreateMinus(op,
+ ocg1->CreateTimes(
+ ocg1->CreateInt(
+ abs((*ci).coef)),
+ ocg1->CreateIdent(
+ (*ci).var->name())));
+ }
+ else
+ // (*ci).coef*sign < -1
+ op = ocg1->CreatePlus(op,
+ ocg1->CreateTimes(
+ ocg1->CreateInt(
+ abs((*ci).coef)),
+ ocg1->CreateIdent(
+ (*ci).var->name())));
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ if ((*ci).coef * sign == 1)
+ op = ocg1->CreateMinus(op,
+ ocg1->CreateIdent(g->base_name()));
+ else if ((*ci).coef * sign == -1)
+ op = ocg1->CreatePlus(op,
+ ocg1->CreateIdent(g->base_name()));
+ else if ((*ci).coef * sign > 1)
+ op = ocg1->CreateMinus(op,
+ ocg1->CreateTimes(
+ ocg1->CreateInt(abs((*ci).coef)),
+ ocg1->CreateIdent(g->base_name())));
+ else
+ // (*ci).coef*sign < -1
+ op = ocg1->CreatePlus(op,
+ ocg1->CreateTimes(
+ ocg1->CreateInt(abs((*ci).coef)),
+ ocg1->CreateIdent(g->base_name())));
+ break;
+ }
+ default:
+ throw loop_error("unsupported array index expression");
+ }
+ }
+ if ((*ei).get_const() != 0)
+ op = ocg1->CreatePlus(op,
+ ocg1->CreateInt(-sign * ((*ei).get_const())));
+ if (coef != 1)
+ op = ocg1->CreateIntegerFloor(op, ocg1->CreateInt(coef));
+
+ index_lb[i] = op;
+ is_index_eq[i] = true;
+ break;
+ }
+ }
+ if (is_index_eq[i])
+ continue;
+
+ // separate lower and upper bounds
+ std::vector<GEQ_Handle> lb_list, ub_list;
+ std::set<Variable_ID> excluded_floor_vars;
+ excluded_floor_vars.insert(bound.set_var(levels[i]));
+ for (GEQ_Iterator gi(c->GEQs()); gi; gi++) {
+ int coef = (*gi).get_coef(bound.set_var(levels[i]));
+ if (coef != 0 && (*gi).has_wildcards()) {
+ bool clean_bound = true;
+ GEQ_Handle h;
+ for (Constr_Vars_Iter cvi(*gi, true); gi; gi++)
+ if (!find_floor_definition(bound, (*cvi).var,
+ excluded_floor_vars).first) {
+ clean_bound = false;
+ break;
+ }
+ else
+ h= find_floor_definition(bound, (*cvi).var,
+ excluded_floor_vars).second;
+
+ if (!clean_bound)
+ continue;
+ else{
+ if (coef > 0)
+ lb_list.push_back(h);
+ else if (coef < 0)
+ ub_list.push_back(h);
+ continue;
+ }
+
+ }
+
+ if (coef > 0)
+ lb_list.push_back(*gi);
+ else if (coef < 0)
+ ub_list.push_back(*gi);
+ }
+ if (lb_list.size() == 0 || ub_list.size() == 0)
+ throw loop_error("failed to calcuate array footprint size");
+
+ // build lower bound representation
+ std::vector<CG_outputRepr *> lb_repr_list;
+ /* for (int j = 0; j < lb_list.size(); j++){
+ if(this->known.n_set() == 0)
+ lb_repr_list.push_back(output_lower_bound_repr(ocg1, lb_list[j], bound.set_var(level-1+i+1), result.first, result.second, bound, Relation::True(bound.n_set()), std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0))));
+ else
+ lb_repr_list.push_back(output_lower_bound_repr(ocg1, lb_list[j], bound.set_var(level-1+i+1), result.first, result.second, bound, this->known, std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0))));
+
+ }
+ */
+ if (lb_repr_list.size() > 1)
+ index_lb[i] = ocg1->CreateInvoke("max", lb_repr_list);
+ else if (lb_repr_list.size() == 1)
+ index_lb[i] = lb_repr_list[0];
+
+ // build temporary array size representation
+ {
+ Relation cal(copy_is.n_set(), 1);
+ F_And *f_root = cal.add_and();
+ for (int j = 0; j < ub_list.size(); j++)
+ for (int k = 0; k < lb_list.size(); k++) {
+ GEQ_Handle h = f_root->add_GEQ();
+
+ for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ h.update_coef(cal.input_var(pos), (*ci).coef);
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = cal.get_local(g);
+ else
+ v = cal.get_local(g, (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error(
+ "cannot calculate temporay array size statically");
+ }
+ }
+ h.update_const(ub_list[j].get_const());
+
+ for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ h.update_coef(cal.input_var(pos), (*ci).coef);
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = cal.get_local(g);
+ else
+ v = cal.get_local(g, (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error(
+ "cannot calculate temporay array size statically");
+ }
+ }
+ h.update_const(lb_list[k].get_const());
+
+ h.update_const(1);
+ h.update_coef(cal.output_var(1), -1);
+ }
+
+ cal = Restrict_Domain(cal, copy(copy_is));
+ for (int j = 1; j <= cal.n_inp(); j++) {
+ cal = Project(cal, j, Input_Var);
+ }
+ cal.simplify();
+
+ // pad temporary array size
+ // TODO: for variable array size, create padding formula
+ //int padding_stride = 0;
+ Conjunct *c = cal.query_DNF()->single_conjunct();
+ bool is_index_bound_const = false;
+ if (padding_stride != 0 && i == n_dim - 1) {
+ //size = (size + index_stride[i] - 1) / index_stride[i];
+ size_repr.push_back(ocg1->CreateInt(padding_stride));
+ } else {
+ for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const;
+ gi++)
+ if ((*gi).is_const(cal.output_var(1))) {
+ coef_t size = (*gi).get_const()
+ / (-(*gi).get_coef(cal.output_var(1)));
+
+ if (padding_alignment > 1 && i == n_dim - 1) { // align to boundary for data packing
+ int residue = size % padding_alignment;
+ if (residue)
+ size = size + padding_alignment - residue;
+ }
+
+ index_sz.push_back(
+ std::make_pair(i, ocg1->CreateInt(size)));
+ is_index_bound_const = true;
+ size_int.push_back(size);
+ size_repr.push_back(ocg1->CreateInt(size));
+
+ // std::cout << "============================== size :: "
+ // << size << "\n";
+
+ }
+
+ if (!is_index_bound_const) {
+
+ found_non_constant_size_dimension = true;
+ Conjunct *c = bound.query_DNF()->single_conjunct();
+ for (GEQ_Iterator gi(c->GEQs());
+ gi && !is_index_bound_const; gi++) {
+ int coef = (*gi).get_coef(bound.set_var(levels[i]));
+ if (coef < 0) {
+
+ size_repr.push_back(
+ ocg1->CreatePlus(
+ output_upper_bound_repr(ocg1, *gi,
+ bound.set_var(levels[i]),
+ bound,
+ std::vector<
+ std::pair<
+ CG_outputRepr *,
+ int> >(
+ bound.n_set(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL),
+ 0)),
+ uninterpreted_symbols[stmt_num]),
+ ocg1->CreateInt(1)));
+
+ /*CG_outputRepr *op = NULL;
+ for (Constr_Vars_Iter ci(*gi); ci; ci++) {
+ if ((*ci).var != cal.output_var(1)) {
+ switch ((*ci).var->kind()) {
+ case Global_Var: {
+ Global_Var_ID g =
+ (*ci).var->get_global_var();
+ if ((*ci).coef == 1)
+ op = ocg1->CreatePlus(op,
+ ocg1->CreateIdent(
+ g->base_name()));
+ else if ((*ci).coef == -1)
+ op = ocg1->CreateMinus(op,
+ ocg1->CreateIdent(
+ g->base_name()));
+ else if ((*ci).coef > 1)
+ op =
+ ocg1->CreatePlus(op,
+ ocg1->CreateTimes(
+ ocg1->CreateInt(
+ (*ci).coef),
+ ocg1->CreateIdent(
+ g->base_name())));
+ else
+ // (*ci).coef < -1
+ op =
+ ocg1->CreateMinus(op,
+ ocg1->CreateTimes(
+ ocg1->CreateInt(
+ -(*ci).coef),
+ ocg1->CreateIdent(
+ g->base_name())));
+ break;
+ }
+ default:
+ throw loop_error(
+ "failed to generate array index bound code");
+ }
+ }
+ }
+ int c = (*gi).get_const();
+ if (c > 0)
+ op = ocg1->CreatePlus(op, ocg1->CreateInt(c));
+ else if (c < 0)
+ op = ocg1->CreateMinus(op, ocg1->CreateInt(-c));
+ */
+ /* if (padding_stride != 0) {
+ if (i == fastest_changing_dimension) {
+ coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride));
+ coef_t t1 = index_stride[i] / g;
+ if (t1 != 1)
+ op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1));
+ coef_t t2 = padding_stride / g;
+ if (t2 != 1)
+ op = ocg->CreateTimes(op, ocg->CreateInt(t2));
+ }
+ else if (index_stride[i] != 1) {
+ op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i]));
+ }
+ }
+ */
+ //index_sz.push_back(std::make_pair(i, op));
+ //break;
+ }
+ }
+ }
+ }
+ }
+ //size[i] = ub[i];
+
+ }
+ /////////////////////////////////////////////////////////////////////////////////////////////////////
+ //
+
+ //Anand: Creating IS of new statement
+
+ //for(int l = dim; l < stmt[stmt_num].xform.n_out(); l+=2)
+ //std::cout << "In scalar_expand function 3: " << stmt_num << ", " << arrName << "\n";
+ //std::cout.flush();
+
+ //fprintf(stderr, "\n%d statements\n", stmt.size());
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "%2d ", i);
+ // ((CG_chillRepr *)stmt[i].code)->Dump();
+ //}
+ //fprintf(stderr, "\n");
+
+
+ shiftLexicalOrder(lex, dim + 1, 1);
+ Statement s = stmt[stmt_num];
+ s.ir_stmt_node = NULL;
+ int newStmt_num = stmt.size();
+
+ fprintf(stderr, "loop.cc L3249 adding stmt %d\n", stmt.size());
+ stmt.push_back(s);
+
+ fprintf(stderr, "uninterpreted_symbols.push_back() newStmt_num %d\n", newStmt_num);
+ uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]);
+ uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]);
+ stmt[newStmt_num].code = stmt[stmt_num].code->clone();
+ stmt[newStmt_num].IS = copy(stmt[stmt_num].IS);
+ stmt[newStmt_num].xform = xform;
+ stmt[newStmt_num].reduction = stmt[stmt_num].reduction;
+ stmt[newStmt_num].reductionOp = stmt[stmt_num].reductionOp;
+
+
+ //fprintf(stderr, "\nafter clone, %d statements\n", stmt.size());
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "%2d ", i);
+ // ((CG_chillRepr *)stmt[i].code)->Dump();
+ //}
+ //fprintf(stderr, "\n");
+
+
+
+ //assign_const(stmt[newStmt_num].xform, stmt[stmt_num].xform.n_out(), 1);//Anand: change from 2*level + 1 to stmt[stmt_num].xform.size()
+ //Anand-End creating IS of new statement
+
+ CG_outputRepr * tmpArrSz;
+ CG_outputBuilder *ocg = ir->builder();
+
+ //for(int k =0; k < levels.size(); k++ )
+ // size_repr.push_back(ocg->CreateInt(size[k]));//Anand: copying apply_xform functionality to prevent IS modification
+ //due to side effects with uninterpreted function symbols and failures in omega
+
+ //int n = stmt[stmt_num].loop_level.size();
+
+ /*Relation mapping(2 * n + 1, n);
+ F_And *f_root = mapping.add_and();
+ for (int j = 1; j <= n; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(j), 1);
+ h.update_coef(mapping.input_var(2 * j), -1);
+ }
+ mapping = Composition(mapping, copy(stmt[stmt_num].xform));
+ mapping.simplify();
+
+ // match omega input/output variables to variable names in the code
+ for (int j = 1; j <= stmt[stmt_num].IS.n_set(); j++)
+ mapping.name_input_var(j, stmt[stmt_num].IS.set_var(j)->name());
+ for (int j = 1; j <= n; j++)
+ mapping.name_output_var(j,
+ tmp_loop_var_name_prefix
+ + to_string(tmp_loop_var_name_counter + j - 1));
+ mapping.setup_names();
+
+ Relation size_ = omega::Range(Restrict_Domain(mapping, copy(stmt[stmt_num].IS)));
+ size_.simplify();
+ */
+
+ //Anand -commenting out tmp sym creation as symbol may have more than one dimension
+ //tmp_sym = ir->CreateArraySymbol(tmpArrSz, sym);
+ std::vector<CG_outputRepr *> lhs_index;
+ CG_outputRepr *arr_ref_repr;
+ arr_ref_repr = ocg->CreateIdent(
+ stmt[stmt_num].IS.set_var(levels[levels.size() - 1])->name());
+
+ CG_outputRepr *total_size = size_repr[0];
+ fprintf(stderr, "total_size = "); total_size->dump(); fflush(stdout);
+
+ for (int i = 1; i < size_repr.size(); i++) {
+ fprintf(stderr, "total_size now "); total_size->dump(); fflush(stdout); fprintf(stderr, " times something\n\n");
+
+ total_size = ocg->CreateTimes(total_size->clone(),
+ size_repr[i]->clone());
+
+ }
+
+ // COMMENT NEEDED
+ //fprintf(stderr, "\nloop.cc COMMENT NEEDED\n");
+ for (int k = levels.size() - 2; k >= 0; k--) {
+ CG_outputRepr *temp_repr =ocg->CreateIdent(stmt[stmt_num].IS.set_var(levels[k])->name());
+ for (int l = k + 1; l < levels.size(); l++) {
+ //fprintf(stderr, "\nloop.cc CREATETIMES\n");
+ temp_repr = ocg->CreateTimes(temp_repr->clone(),
+ size_repr[l]->clone());
+ }
+
+ //fprintf(stderr, "\nloop.cc CREATEPLUS\n");
+ arr_ref_repr = ocg->CreatePlus(arr_ref_repr->clone(),
+ temp_repr->clone());
+ }
+
+
+ //fprintf(stderr, "loop.cc, about to die\n");
+ std::vector<CG_outputRepr *> to_push;
+ to_push.push_back(total_size);
+
+ if (!found_non_constant_size_dimension) {
+ fprintf(stderr, "constant size dimension\n");
+ tmp_sym = ir->CreateArraySymbol(sym, to_push, memory_type);
+ }
+ else {
+ fprintf(stderr, "NON constant size dimension?\n");
+ //tmp_sym = ir->CreatePointerSymbol(sym, to_push);
+ tmp_sym = ir->CreatePointerSymbol(sym, to_push);
+
+ static_cast<IR_PointerSymbol *>(tmp_sym)->set_size(0, total_size); // ??
+ ptr_variables.push_back(static_cast<IR_PointerSymbol *>(tmp_sym));
+ fprintf(stderr, "ptr_variables now has %d entries\n", ptr_variables.size());
+ }
+
+ // add tmp_sym to Loop symtables ??
+
+
+ // std::cout << " temp array name == " << tmp_sym->name().c_str() << "\n";
+
+ // get loop index variable at the given "level"
+ // Relation R = omega::Range(Restrict_Domain(copy(stmt[stmt_num].xform), copy(stmt[stmt_num].IS)));
+ // stmt[stmt_num].IS.print();
+ //stmt[stmt_num].IS.
+ // std::cout << stmt[stmt_num].IS.n_set() << std::endl;
+ // std::string v = stmt[stmt_num].IS.set_var(level)->name();
+ // std::cout << "loop index variable is '" << v.c_str() << "'\n";
+
+ // create a reference for the temporary array
+ fprintf(stderr, "create a reference for the temporary array\n");
+ //std::cout << "In scalar_expand function 4: " << stmt_num << ", " << arrName << "\n";
+ //std::cout.flush();
+
+ //fprintf(stderr, "\n%d statements\n", stmt.size());
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "%2d ", i);
+ // ((CG_chillRepr *)stmt[i].code)->Dump();
+ //}
+ //fprintf(stderr, "\n");
+
+
+
+ std::vector<CG_outputRepr *> to_push2;
+ to_push2.push_back(arr_ref_repr); // can have only one entry
+
+ //lhs_index[0] = ocg->CreateIdent(v);
+
+
+ IR_ArrayRef *tmp_array_ref;
+ IR_PointerArrayRef * tmp_ptr_array_ref; // was IR_PointerArrayref
+
+ if (!found_non_constant_size_dimension) {
+ fprintf(stderr, "constant size\n");
+
+ tmp_array_ref = ir->CreateArrayRef(
+ static_cast<IR_ArraySymbol *>(tmp_sym), to_push2);
+ }
+ else {
+ fprintf(stderr, "NON constant size\n");
+ tmp_ptr_array_ref = ir->CreatePointerArrayRef(
+ static_cast<IR_PointerSymbol *>(tmp_sym), to_push2);
+ // TODO static_cast<IR_PointerSymbol *>(tmp_sym), to_push2);
+ }
+ fflush(stdout);
+
+ //fprintf(stderr, "\n%d statements\n", stmt.size());
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "%2d ", i);
+ // ((CG_chillRepr *)stmt[i].code)->Dump();
+ //}
+ //fprintf(stderr, "\n");
+
+
+ //std::string stemp;
+ //stemp = tmp_array_ref->name();
+ //std::cout << "Created array reference --> " << stemp.c_str() << "\n";
+
+ // get the RHS expression
+ fprintf(stderr, "get the RHS expression arrName %s\n", arrName.c_str());
+
+ CG_outputRepr *rhs;
+ if (arrName == "RHS") {
+ rhs = ir->GetRHSExpression(stmt[stmt_num].code);
+
+ std::vector<IR_ArrayRef *> symbols = ir->FindArrayRef(rhs);
+ }
+ std::set<std::string> sym_names;
+
+ //for (int i = 0; i < symbols.size(); i++)
+ // sym_names.insert(symbols[i]->symbol()->name());
+
+ fflush(stdout);
+
+ //fprintf(stderr, "\nbefore if (arrName == RHS)\n%d statements\n", stmt.size()); // problem is after here
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "%2d ", i);
+ // ((CG_chillRepr *)stmt[i].code)->Dump();
+ //}
+ //fprintf(stderr, "\n");
+
+ if (arrName == "RHS") {
+
+ std::vector<IR_ArrayRef *> symbols = ir->FindArrayRef(rhs);
+
+ for (int i = 0; i < symbols.size(); i++)
+ sym_names.insert(symbols[i]->symbol()->name());
+ }
+ else {
+
+ fprintf(stderr, "finding array refs in stmt_num %d\n", stmt_num);
+ //fprintf(stderr, "\n%d statements\n", stmt.size());
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "%2d ", i);
+ // ((CG_chillRepr *)stmt[i].code)->Dump();
+ //}
+ //fprintf(stderr, "\n");
+
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code);
+ fprintf(stderr, "\n%d refs\n", refs.size());
+
+
+ bool found = false;
+
+ for (int j = 0; j < refs.size(); j++) {
+ CG_outputRepr* to_replace;
+
+ fprintf(stderr, "j %d build new assignment statement with temporary array\n",j);
+ // build new assignment statement with temporary array
+ if (!found_non_constant_size_dimension) {
+ to_replace = tmp_array_ref->convert();
+ } else {
+ to_replace = tmp_ptr_array_ref->convert();
+ }
+ //fprintf(stderr, "to_replace %p\n", to_replace);
+ //CG_chillRepr *CR = (CG_chillRepr *) to_replace;
+ //CR->Dump();
+
+ if (refs[j]->name() == arrName) {
+ fflush(stdout);
+ fprintf(stderr, "loop.cc L353\n"); // problem is after here
+ //fprintf(stderr, "\n%d statements\n", stmt.size());
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "%2d ", i);
+ // ((CG_chillRepr *)stmt[i].code)->Dump();
+ //}
+ //fprintf(stderr, "\n");
+
+
+ sym_names.insert(refs[j]->symbol()->name());
+
+ if (!found) {
+ if (!found_non_constant_size_dimension) {
+ fprintf(stderr, "constant size2\n");
+ omega::CG_outputRepr * t = tmp_array_ref->convert();
+ omega::CG_outputRepr * r = refs[j]->convert()->clone();
+ //CR = (CG_chillRepr *) t;
+ //CR->Dump();
+ //CR = (CG_chillRepr *) r;
+ //CR->Dump();
+
+ //fprintf(stderr, "lhs t %p lhs r %p\n", t, r);
+ stmt[newStmt_num].code =
+ ir->builder()->CreateAssignment(0,
+ t, // tmp_array_ref->convert(),
+ r); // refs[j]->convert()->clone()
+ }
+ else {
+ fprintf(stderr, "NON constant size2\n");
+ omega::CG_outputRepr * t = tmp_ptr_array_ref->convert(); // this fails
+ omega::CG_outputRepr * r = refs[j]->convert()->clone();
+
+ //omega::CG_chillRepr *CR = (omega::CG_chillRepr *) t;
+ //CR->Dump();
+ //CR = (omega::CG_chillRepr *) r;
+ //CR->Dump();
+
+ //fprintf(stderr, "lhs t %p lhs r %p\n", t, r);
+ stmt[newStmt_num].code =
+ ir->builder()->CreateAssignment(0,
+ t, // tmp_ptr_array_ref->convert(),
+ r ); // refs[j]->convert()->clone());
+ }
+ found = true;
+
+ }
+
+ // refs[j] has no parent?
+ fprintf(stderr, "replacing refs[%d]\n", j );
+ ir->ReplaceExpression(refs[j], to_replace);
+ }
+
+ }
+
+ }
+ //ToDo need to update the dependence graph
+ //Anand adding dependence graph update
+ fprintf(stderr, "adding dependence graph update\n"); // problem is before here
+ //fprintf(stderr, "\n%d statements\n", stmt.size());
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "%2d ", i);
+ // ((CG_chillRepr *)stmt[i].code)->Dump();
+ //}
+ //fprintf(stderr, "\n");
+
+ dep.insert();
+
+ //Anand:Copying Dependence checks from datacopy code, might need to be a separate function/module
+ // in the future
+
+ /*for (int i = 0; i < newStmt_num; i++) {
+ std::vector<std::vector<DependenceVector> > D;
+
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();
+ ) {
+ if (same_loop.find(i) != same_loop.end()
+ && same_loop.find(j->first) == same_loop.end()) {
+ std::vector<DependenceVector> dvs1, dvs2;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.sym != NULL
+ && sym_names.find(dv.sym->name()) != sym_names.end()
+ && (dv.type == DEP_R2R || dv.type == DEP_R2W))
+ dvs1.push_back(dv);
+ else
+ dvs2.push_back(dv);
+ }
+ j->second = dvs2;
+ if (dvs1.size() > 0)
+ dep.connect(newStmt_num, j->first, dvs1);
+ } else if (same_loop.find(i) == same_loop.end()
+ && same_loop.find(j->first) != same_loop.end()) {
+ std::vector<DependenceVector> dvs1, dvs2;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.sym != NULL
+ && sym_names.find(dv.sym->name()) != sym_names.end()
+ && (dv.type == DEP_R2R || dv.type == DEP_W2R))
+ dvs1.push_back(dv);
+ else
+ dvs2.push_back(dv);
+ }
+ j->second = dvs2;
+ if (dvs1.size() > 0)
+ D.push_back(dvs1);
+ }
+
+ if (j->second.size() == 0)
+ dep.vertex[i].second.erase(j++);
+ else
+ j++;
+ }
+
+ for (int j = 0; j < D.size(); j++)
+ dep.connect(i, newStmt_num, D[j]);
+ }
+ */
+ //Anand--end dependence check
+ if (arrName == "RHS") {
+
+ // build new assignment statement with temporary array
+ if (!found_non_constant_size_dimension) {
+ if (assign_then_accumulate) {
+ stmt[newStmt_num].code = ir->builder()->CreateAssignment(0,
+ tmp_array_ref->convert(), rhs);
+ fprintf(stderr, "ir->ReplaceRHSExpression( stmt_ num %d )\n", stmt_num);
+ ir->ReplaceRHSExpression(stmt[stmt_num].code, tmp_array_ref);
+ } else {
+ CG_outputRepr *temp = tmp_array_ref->convert()->clone();
+ if (ir->QueryExpOperation(stmt[stmt_num].code)
+ != IR_OP_PLUS_ASSIGNMENT)
+ throw ir_error(
+ "Statement is not a += accumulation statement");
+
+ fprintf(stderr, "replacing in a +=\n");
+ stmt[newStmt_num].code = ir->builder()->CreatePlusAssignment(0,
+ temp->clone(), rhs);
+
+ CG_outputRepr * lhs = ir->GetLHSExpression(stmt[stmt_num].code);
+
+ CG_outputRepr *assignment = ir->builder()->CreateAssignment(0,
+ lhs, temp->clone());
+ Statement init_ = stmt[newStmt_num]; // copy ??
+ init_.ir_stmt_node = NULL;
+
+ init_.code = stmt[newStmt_num].code->clone();
+ init_.IS = copy(stmt[newStmt_num].IS);
+ init_.xform = copy(stmt[newStmt_num].xform);
+ init_.has_inspector = false; // ??
+
+ Relation mapping(init_.IS.n_set(), init_.IS.n_set());
+
+ F_And *f_root = mapping.add_and();
+
+ for (int i = 1; i <= mapping.n_inp(); i++) {
+ EQ_Handle h = f_root->add_EQ();
+ //if (i < levels[0]) {
+ if (i <= levels[levels.size() - 1]) {
+ h.update_coef(mapping.input_var(i), 1);
+ h.update_coef(mapping.output_var(i), -1);
+ } else {
+ h.update_const(-1);
+ h.update_coef(mapping.output_var(i), 1);
+ }
+
+ /*else {
+ int j;
+ for (j = 0; j < levels.size(); j++)
+ if (i == levels[j])
+ break;
+
+ if (j == levels.size()) {
+
+ h.update_coef(mapping.output_var(i), 1);
+ h.update_const(-1);
+
+ } else {
+
+
+ h.update_coef(mapping.input_var(i), 1);
+ h.update_coef(mapping.output_var(i), -1);
+
+
+ }
+ */
+ //}
+ }
+
+ mapping.simplify();
+ // match omega input/output variables to variable names in the code
+ for (int j = 1; j <= init_.IS.n_set(); j++)
+ mapping.name_output_var(j, init_.IS.set_var(j)->name());
+ for (int j = 1; j <= init_.IS.n_set(); j++)
+ mapping.name_input_var(j, init_.IS.set_var(j)->name());
+
+ mapping.setup_names();
+
+ init_.IS = omega::Range(
+ omega::Restrict_Domain(mapping, init_.IS));
+ std::vector<int> lex = getLexicalOrder(newStmt_num);
+ int dim = 2 * levels[0] - 1;
+ //init_.IS.print();
+ // init_.xform.print();
+ //stmt[newStmt_num].xform.print();
+ // shiftLexicalOrder(lex, dim + 1, 1);
+ shiftLexicalOrder(lex, dim + 1, 1);
+ init_.reduction = stmt[newStmt_num].reduction;
+ init_.reductionOp = stmt[newStmt_num].reductionOp;
+
+ init_.code = ir->builder()->CreateAssignment(0, temp->clone(),
+ ir->builder()->CreateInt(0));
+
+ fprintf(stderr, "loop.cc L3693 adding stmt %d\n", stmt.size());
+ stmt.push_back(init_);
+
+ uninterpreted_symbols.push_back(uninterpreted_symbols[newStmt_num]);
+ uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[newStmt_num]);
+ stmt[stmt_num].code = assignment;
+ }
+ } else {
+ if (assign_then_accumulate) {
+ stmt[newStmt_num].code = ir->builder()->CreateAssignment(0,
+ tmp_ptr_array_ref->convert(), rhs);
+ ir->ReplaceRHSExpression(stmt[stmt_num].code,
+ tmp_ptr_array_ref);
+ } else {
+ CG_outputRepr *temp = tmp_ptr_array_ref->convert()->clone();
+ if (ir->QueryExpOperation(stmt[stmt_num].code)
+ != IR_OP_PLUS_ASSIGNMENT)
+ throw ir_error(
+ "Statement is not a += accumulation statement");
+ stmt[newStmt_num].code = ir->builder()->CreatePlusAssignment(0,
+ temp->clone(), rhs);
+
+ CG_outputRepr * lhs = ir->GetLHSExpression(stmt[stmt_num].code);
+
+ CG_outputRepr *assignment = ir->builder()->CreateAssignment(0,
+ lhs, temp->clone());
+
+ stmt[stmt_num].code = assignment;
+ }
+ // call function to replace rhs with temporary array
+ }
+ }
+
+ //std::cout << "End of scalar_expand function!! \n";
+
+ // if(arrName == "RHS"){
+ DependenceVector dv;
+ std::vector<DependenceVector> E;
+ dv.lbounds = std::vector<omega::coef_t>(4);
+ dv.ubounds = std::vector<omega::coef_t>(4);
+ dv.type = DEP_W2R;
+
+ for (int k = 0; k < 4; k++) {
+ dv.lbounds[k] = 0;
+ dv.ubounds[k] = 0;
+
+ }
+
+ //std::vector<IR_ArrayRef*> array_refs = ir->FindArrayRef(stmt[newStmt_num].code);
+ dv.sym = tmp_sym->clone();
+
+ E.push_back(dv);
+
+ dep.connect(newStmt_num, stmt_num, E);
+ // }
+
+}
+
+
+
+
+std::pair<Relation, Relation> createCSRstyleISandXFORM(CG_outputBuilder *ocg,
+ std::vector<Relation> &outer_loop_bounds, std::string index_name,
+ std::map<int, Relation> &zero_loop_bounds,
+ std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols,
+ std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols_string,
+ Loop *this_loop) {
+
+ Relation IS(outer_loop_bounds.size() + 1 + zero_loop_bounds.size());
+ Relation XFORM(outer_loop_bounds.size() + 1 + zero_loop_bounds.size(),
+ 2 * (outer_loop_bounds.size() + 1 + zero_loop_bounds.size()) + 1);
+
+ F_And * f_r_ = IS.add_and();
+ F_And * f_root = XFORM.add_and();
+
+ if (outer_loop_bounds.size() > 0) {
+ for (int it = 0; it < IS.n_set(); it++) {
+ IS.name_set_var(it + 1,
+ const_cast<Relation &>(outer_loop_bounds[0]).set_var(it + 1)->name());
+ XFORM.name_input_var(it + 1,
+ const_cast<Relation &>(outer_loop_bounds[0]).set_var(it + 1)->name());
+
+ }
+ } else if (zero_loop_bounds.size() > 0) {
+ for (int it = 0; it < IS.n_set(); it++) {
+ IS.name_set_var(it + 1,
+ const_cast<Relation &>(zero_loop_bounds.begin()->second).set_var(
+ it + 1)->name());
+ XFORM.name_input_var(it + 1,
+ const_cast<Relation &>(zero_loop_bounds.begin()->second).set_var(
+ it + 1)->name());
+
+ }
+
+ }
+
+ for (int i = 0; i < outer_loop_bounds.size(); i++)
+ IS = replace_set_var_as_another_set_var(IS, outer_loop_bounds[i], i + 1,
+ i + 1);
+
+ int count = 1;
+ for (std::map<int, Relation>::iterator i = zero_loop_bounds.begin();
+ i != zero_loop_bounds.end(); i++, count++)
+ IS = replace_set_var_as_another_set_var(IS, i->second,
+ outer_loop_bounds.size() + 1 + count, i->first);
+
+ if (outer_loop_bounds.size() > 0) {
+ Free_Var_Decl *lb = new Free_Var_Decl(index_name + "_", 1); // index_
+ Variable_ID csr_lb = IS.get_local(lb, Input_Tuple);
+
+ Free_Var_Decl *ub = new Free_Var_Decl(index_name + "__", 1); // index__
+ Variable_ID csr_ub = IS.get_local(ub, Input_Tuple);
+
+ //lower bound
+
+ F_And * f_r = IS.and_with_and();
+ GEQ_Handle lower_bound = f_r->add_GEQ();
+ lower_bound.update_coef(csr_lb, -1);
+ lower_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), 1);
+
+ //upper bound
+
+ GEQ_Handle upper_bound = f_r->add_GEQ();
+ upper_bound.update_coef(csr_ub, 1);
+ upper_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), -1);
+ upper_bound.update_const(-1);
+
+ omega::CG_stringBuilder *ocgs = new CG_stringBuilder;
+
+ std::vector<omega::CG_outputRepr *> reprs;
+ std::vector<omega::CG_outputRepr *> reprs2;
+
+ std::vector<omega::CG_outputRepr *> reprs3;
+ std::vector<omega::CG_outputRepr *> reprs4;
+
+ reprs.push_back(
+ ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name()));
+ reprs2.push_back(
+ ocgs->CreateIdent(
+ IS.set_var(outer_loop_bounds.size())->name()));
+ uninterpreted_symbols.insert(
+ std::pair<std::string, std::vector<CG_outputRepr *> >(
+ index_name + "_", reprs));
+ uninterpreted_symbols_string.insert(
+ std::pair<std::string, std::vector<CG_outputRepr *> >(
+ index_name + "_", reprs2));
+
+ std::string arg = "(" + IS.set_var(outer_loop_bounds.size())->name()
+ + ")";
+ std::vector< std::string > argvec;
+ argvec.push_back( arg );
+
+ CG_outputRepr *repr = ocg->CreateArrayRefExpression(index_name,
+ ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name()));
+
+ //fprintf(stderr, "( VECTOR _)\n");
+ //fprintf(stderr, "loop.cc calling CreateDefineMacro( %s, argvec, repr)\n", (index_name + "_").c_str());
+ this_loop->ir->CreateDefineMacro(index_name + "_", argvec, repr);
+
+ Relation known_(copy(IS).n_set());
+ known_.copy_names(copy(IS));
+ known_.setup_names();
+ Variable_ID index_lb = known_.get_local(lb, Input_Tuple);
+ Variable_ID index_ub = known_.get_local(ub, Input_Tuple);
+ F_And *fr = known_.add_and();
+ GEQ_Handle g = fr->add_GEQ();
+ g.update_coef(index_ub, 1);
+ g.update_coef(index_lb, -1);
+ g.update_const(-1);
+ this_loop->addKnown(known_);
+
+ reprs3.push_back(
+
+ ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name()));
+ reprs4.push_back(
+
+ ocgs->CreateIdent(IS.set_var(outer_loop_bounds.size())->name()));
+
+ CG_outputRepr *repr2 = ocg->CreateArrayRefExpression(index_name,
+ ocg->CreatePlus(
+ ocg->CreateIdent(
+ IS.set_var(outer_loop_bounds.size())->name()),
+ ocg->CreateInt(1)));
+
+ //fprintf(stderr, "( VECTOR __)\n");
+ //fprintf(stderr, "loop.cc calling CreateDefineMacro( %s, argvec, repr)\n", (index_name + "__").c_str());
+
+ this_loop->ir->CreateDefineMacro(index_name + "__", argvec, repr2);
+
+ uninterpreted_symbols.insert(
+ std::pair<std::string, std::vector<CG_outputRepr *> >(
+ index_name + "__", reprs3));
+ uninterpreted_symbols_string.insert(
+ std::pair<std::string, std::vector<CG_outputRepr *> >(
+ index_name + "__", reprs4));
+ } else {
+ Free_Var_Decl *ub = new Free_Var_Decl(index_name);
+ Variable_ID csr_ub = IS.get_local(ub);
+ F_And * f_r = IS.and_with_and();
+ GEQ_Handle upper_bound = f_r->add_GEQ();
+ upper_bound.update_coef(csr_ub, 1);
+ upper_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), -1);
+ upper_bound.update_const(-1);
+
+ GEQ_Handle lower_bound = f_r->add_GEQ();
+ lower_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), 1);
+
+ }
+
+ for (int j = 1; j <= XFORM.n_inp(); j++) {
+ omega::EQ_Handle h = f_root->add_EQ();
+ h.update_coef(XFORM.output_var(2 * j), 1);
+ h.update_coef(XFORM.input_var(j), -1);
+ }
+
+ for (int j = 1; j <= XFORM.n_out(); j += 2) {
+ omega::EQ_Handle h = f_root->add_EQ();
+ h.update_coef(XFORM.output_var(j), 1);
+ }
+
+ if (_DEBUG_) {
+ IS.print();
+ XFORM.print();
+
+ }
+
+ return std::pair<Relation, Relation>(IS, XFORM);
+
+}
+
+std::pair<Relation, Relation> construct_reduced_IS_And_XFORM(IR_Code *ir,
+ const Relation &is, const Relation &xform, const std::vector<int> loops,
+ std::vector<int> &lex_order, Relation &known,
+ std::map<std::string, std::vector<CG_outputRepr *> > &uninterpreted_symbols) {
+
+ Relation IS(loops.size());
+ Relation XFORM(loops.size(), 2 * loops.size() + 1);
+ int count_ = 1;
+ std::map<int, int> pos_mapping;
+
+ int n = is.n_set();
+ Relation is_and_known = Intersection(copy(is),
+ Extend_Set(copy(known), n - known.n_set()));
+
+ for (int it = 0; it < loops.size(); it++, count_++) {
+ IS.name_set_var(count_,
+ const_cast<Relation &>(is).set_var(loops[it])->name());
+ XFORM.name_input_var(count_,
+ const_cast<Relation &>(xform).input_var(loops[it])->name());
+ XFORM.name_output_var(2 * count_,
+ const_cast<Relation &>(xform).output_var((loops[it]) * 2)->name());
+ XFORM.name_output_var(2 * count_ - 1,
+ const_cast<Relation &>(xform).output_var((loops[it]) * 2 - 1)->name());
+ pos_mapping.insert(std::pair<int, int>(count_, loops[it]));
+ }
+
+ XFORM.name_output_var(2 * loops.size() + 1,
+ const_cast<Relation &>(xform).output_var(is.n_set() * 2 + 1)->name());
+
+ F_And * f_r = IS.add_and();
+ for (std::map<int, int>::iterator it = pos_mapping.begin();
+ it != pos_mapping.end(); it++)
+ IS = replace_set_var_as_another_set_var(IS, is_and_known, it->first,
+ it->second);
+ /*
+ for (std::map<std::string, std::vector<CG_outputRepr *> >::iterator it2 =
+ uninterpreted_symbols.begin();
+ it2 != uninterpreted_symbols.end(); it2++) {
+ std::vector<CG_outputRepr *> reprs_ = it2->second;
+ //std::vector<CG_outputRepr *> reprs_2;
+
+ for (int k = 0; k < reprs_.size(); k++) {
+ std::vector<IR_ScalarRef *> refs = ir->FindScalarRef(reprs_[k]);
+ bool exception_found = false;
+ for (int m = 0; m < refs.size(); m++){
+
+ if (refs[m]->name()
+ == const_cast<Relation &>(is).set_var(it->second)->name())
+ try {
+ ir->ReplaceExpression(refs[m],
+ ir->builder()->CreateIdent(
+ IS.set_var(it->first)->name()));
+ } catch (ir_error &e) {
+
+ reprs_[k] = ir->builder()->CreateIdent(
+ IS.set_var(it->first)->name());
+ exception_found = true;
+ }
+ if(exception_found)
+ break;
+ }
+
+ }
+ it2->second = reprs_;
+ }
+
+ }
+ */
+ CHILL_DEBUG_BEGIN
+ std::cout << "relation debug" << std::endl;
+ IS.print();
+ CHILL_DEBUG_END
+
+ F_And *f_root = XFORM.add_and();
+
+ count_ = 1;
+
+ for (int j = 1; j <= loops.size(); j++) {
+ omega::EQ_Handle h = f_root->add_EQ();
+ h.update_coef(XFORM.output_var(2 * j), 1);
+ h.update_coef(XFORM.input_var(j), -1);
+ }
+ for (int j = 0; j < loops.size(); j++, count_++) {
+ omega::EQ_Handle h = f_root->add_EQ();
+ h.update_coef(XFORM.output_var(count_ * 2 - 1), 1);
+ h.update_const(-lex_order[count_ * 2 - 2]);
+ }
+
+ omega::EQ_Handle h = f_root->add_EQ();
+ h.update_coef(XFORM.output_var((loops.size()) * 2 + 1), 1);
+ h.update_const(-lex_order[xform.n_out() - 1]);
+
+ CHILL_DEBUG_BEGIN
+ std::cout << "relation debug" << std::endl;
+ IS.print();
+ XFORM.print();
+ CHILL_DEBUG_END
+
+ return std::pair<Relation, Relation>(IS, XFORM);
+
+}
+
+std::set<std::string> inspect_repr_for_scalars(IR_Code *ir,
+ CG_outputRepr * repr, std::set<std::string> ignore) {
+
+ std::vector<IR_ScalarRef *> refs = ir->FindScalarRef(repr);
+ std::set<std::string> loop_vars;
+
+ for (int i = 0; i < refs.size(); i++)
+ if (ignore.find(refs[i]->name()) == ignore.end())
+ loop_vars.insert(refs[i]->name());
+
+ return loop_vars;
+
+}
+
+std::set<std::string> inspect_loop_bounds(IR_Code *ir, const Relation &R,
+ int pos,
+ std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols) {
+
+ if (!R.is_set())
+ throw loop_error("Input R has to be a set not a relation!");
+
+ std::set<std::string> vars;
+
+ std::vector<CG_outputRepr *> refs;
+ Variable_ID v = const_cast<Relation &>(R).set_var(pos);
+ for (DNF_Iterator di(const_cast<Relation &>(R).query_DNF()); di; di++) {
+ for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) {
+ if ((*gi).get_coef(v) != 0 && (*gi).is_const_except_for_global(v)) {
+ for (Constr_Vars_Iter cvi(*gi); cvi; cvi++) {
+ Variable_ID v = cvi.curr_var();
+ switch (v->kind()) {
+
+ case Global_Var: {
+ Global_Var_ID g = v->get_global_var();
+ Variable_ID v2;
+ if (g->arity() > 0) {
+
+ std::string s = g->base_name();
+ std::copy(
+ uninterpreted_symbols.find(s)->second.begin(),
+ uninterpreted_symbols.find(s)->second.end(),
+ back_inserter(refs));
+
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ }
+ }
+ }
+
+ for (int i = 0; i < refs.size(); i++) {
+ std::vector<IR_ScalarRef *> refs_ = ir->FindScalarRef(refs[i]);
+
+ for (int j = 0; j < refs_.size(); j++)
+ vars.insert(refs_[j]->name());
+
+ }
+ return vars;
+}
+
+CG_outputRepr * create_counting_loop_body(IR_Code *ir, const Relation &R,
+ int pos, CG_outputRepr * count,
+ std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols) {
+
+ if (!R.is_set())
+ throw loop_error("Input R has to be a set not a relation!");
+
+ CG_outputRepr *ub, *lb;
+ ub = NULL;
+ lb = NULL;
+ std::vector<CG_outputRepr *> refs;
+ Variable_ID v = const_cast<Relation &>(R).set_var(pos);
+ for (DNF_Iterator di(const_cast<Relation &>(R).query_DNF()); di; di++) {
+ for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) {
+ if ((*gi).get_coef(v) != 0 && (*gi).is_const_except_for_global(v)) {
+ bool same_ge_1 = false;
+ bool same_ge_2 = false;
+ for (Constr_Vars_Iter cvi(*gi); cvi; cvi++) {
+ Variable_ID v = cvi.curr_var();
+ switch (v->kind()) {
+
+ case Global_Var: {
+ Global_Var_ID g = v->get_global_var();
+ Variable_ID v2;
+ if (g->arity() > 0) {
+
+ std::string s = g->base_name();
+
+ if ((*gi).get_coef(v) > 0) {
+ if (ub != NULL)
+ throw ir_error(
+ "bound expression too complex!");
+
+ ub = ir->builder()->CreateInvoke(s,
+ uninterpreted_symbols.find(s)->second);
+ //ub = ir->builder()->CreateMinus(ub->clone(), ir->builder()->CreateInt(-(*gi).get_const()));
+ same_ge_1 = true;
+
+ } else {
+ if (lb != NULL)
+ throw ir_error(
+ "bound expression too complex!");
+ lb = ir->builder()->CreateInvoke(s,
+ uninterpreted_symbols.find(s)->second);
+ same_ge_2 = true;
+
+ }
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (same_ge_1 && same_ge_2)
+ lb = ir->builder()->CreatePlus(lb->clone(),
+ ir->builder()->CreateInt(-(*gi).get_const()));
+ else if (same_ge_1)
+ ub = ir->builder()->CreatePlus(ub->clone(),
+ ir->builder()->CreateInt(-(*gi).get_const()));
+ else if (same_ge_2)
+ lb = ir->builder()->CreatePlus(lb->clone(),
+ ir->builder()->CreateInt(-(*gi).get_const()));
+ }
+ }
+
+ }
+
+ return ir->builder()->CreatePlusAssignment(0, count,
+ ir->builder()->CreatePlus(
+ ir->builder()->CreateMinus(ub->clone(), lb->clone()),
+ ir->builder()->CreateInt(1)));
+}
+
+
+
+std::map<std::string, std::vector<std::string> > recurse_on_exp_for_arrays(
+ IR_Code * ir, CG_outputRepr * exp) {
+
+ std::map<std::string, std::vector<std::string> > arr_index_to_ref;
+ switch (ir->QueryExpOperation(exp)) {
+
+ case IR_OP_ARRAY_VARIABLE: {
+ IR_ArrayRef *ref = dynamic_cast<IR_ArrayRef *>(ir->Repr2Ref(exp));
+ IR_PointerArrayRef *ref_ =
+ dynamic_cast<IR_PointerArrayRef *>(ir->Repr2Ref(exp));
+ if (ref == NULL && ref_ == NULL)
+ throw loop_error("Array symbol unidentifiable!");
+
+ if (ref != NULL) {
+ std::vector<std::string> s0;
+
+ for (int i = 0; i < ref->n_dim(); i++) {
+ CG_outputRepr * index = ref->index(i);
+ std::map<std::string, std::vector<std::string> > a0 =
+ recurse_on_exp_for_arrays(ir, index);
+ std::vector<std::string> s;
+ for (std::map<std::string, std::vector<std::string> >::iterator j =
+ a0.begin(); j != a0.end(); j++) {
+ if (j->second.size() != 1 && (j->second)[0] != "")
+ throw loop_error(
+ "indirect array references not allowed in guard!");
+ s.push_back(j->first);
+ }
+ std::copy(s.begin(), s.end(), back_inserter(s0));
+ }
+ arr_index_to_ref.insert(
+ std::pair<std::string, std::vector<std::string> >(
+ ref->name(), s0));
+ } else {
+ std::vector<std::string> s0;
+ for (int i = 0; i < ref_->n_dim(); i++) {
+ CG_outputRepr * index = ref_->index(i);
+ std::map<std::string, std::vector<std::string> > a0 =
+ recurse_on_exp_for_arrays(ir, index);
+ std::vector<std::string> s;
+ for (std::map<std::string, std::vector<std::string> >::iterator j =
+ a0.begin(); j != a0.end(); j++) {
+ if (j->second.size() != 1 && (j->second)[0] != "")
+ throw loop_error(
+ "indirect array references not allowed in guard!");
+ s.push_back(j->first);
+ }
+ std::copy(s.begin(), s.end(), back_inserter(s0));
+ }
+ arr_index_to_ref.insert(
+ std::pair<std::string, std::vector<std::string> >(
+ ref_->name(), s0));
+ }
+ break;
+ }
+ case IR_OP_PLUS:
+ case IR_OP_MINUS:
+ case IR_OP_MULTIPLY:
+ case IR_OP_DIVIDE: {
+ std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
+ std::map<std::string, std::vector<std::string> > a0 =
+ recurse_on_exp_for_arrays(ir, v[0]);
+ std::map<std::string, std::vector<std::string> > a1 =
+ recurse_on_exp_for_arrays(ir, v[1]);
+ arr_index_to_ref.insert(a0.begin(), a0.end());
+ arr_index_to_ref.insert(a1.begin(), a1.end());
+ break;
+
+ }
+ case IR_OP_POSITIVE:
+ case IR_OP_NEGATIVE: {
+ std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
+ std::map<std::string, std::vector<std::string> > a0 =
+ recurse_on_exp_for_arrays(ir, v[0]);
+
+ arr_index_to_ref.insert(a0.begin(), a0.end());
+ break;
+
+ }
+ case IR_OP_VARIABLE: {
+ std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
+ IR_ScalarRef *ref = static_cast<IR_ScalarRef *>(ir->Repr2Ref(v[0]));
+
+ std::string s = ref->name();
+ std::vector<std::string> to_insert;
+ to_insert.push_back("");
+ arr_index_to_ref.insert(
+ std::pair<std::string, std::vector<std::string> >(s,
+ to_insert));
+ break;
+ }
+ case IR_OP_CONSTANT:
+ break;
+
+ default: {
+ std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
+
+ for (int i = 0; i < v.size(); i++) {
+ std::map<std::string, std::vector<std::string> > a0 =
+ recurse_on_exp_for_arrays(ir, v[i]);
+
+ arr_index_to_ref.insert(a0.begin(), a0.end());
+ }
+
+ break;
+ }
+ }
+ return arr_index_to_ref;
+}
+
+
+
+std::vector<CG_outputRepr *> find_guards(IR_Code *ir, IR_Control *code) {
+ CHILL_DEBUG_PRINT("find_guards()\n");
+ std::vector<CG_outputRepr *> guards;
+ switch (code->type()) {
+ case IR_CONTROL_IF: {
+ CHILL_DEBUG_PRINT("find_guards() it's an if\n");
+ CG_outputRepr *cond = dynamic_cast<IR_If*>(code)->condition();
+
+ std::vector<CG_outputRepr *> then_body;
+ std::vector<CG_outputRepr *> else_body;
+ IR_Block *ORTB = dynamic_cast<IR_If*>(code)->then_body();
+ if (ORTB != NULL) {
+ CHILL_DEBUG_PRINT("recursing on then\n");
+ then_body = find_guards(ir, ORTB);
+ //dynamic_cast<IR_If*>(code)->then_body());
+ }
+ if (dynamic_cast<IR_If*>(code)->else_body() != NULL) {
+ CHILL_DEBUG_PRINT("recursing on then\n");
+ else_body = find_guards(ir,
+ dynamic_cast<IR_If*>(code)->else_body());
+ }
+
+ guards.push_back(cond);
+ if (then_body.size() > 0)
+ std::copy(then_body.begin(), then_body.end(),
+ back_inserter(guards));
+ if (else_body.size() > 0)
+ std::copy(else_body.begin(), else_body.end(),
+ back_inserter(guards));
+ break;
+ }
+ case IR_CONTROL_BLOCK: {
+ CHILL_DEBUG_PRINT("it's a control block\n");
+ IR_Block* IRCB = dynamic_cast<IR_Block*>(code);
+ CHILL_DEBUG_PRINT("calling ir->FindOneLevelControlStructure(IRCB);\n");
+ std::vector<IR_Control *> stmts = ir->FindOneLevelControlStructure(IRCB);
+
+ for (int i = 0; i < stmts.size(); i++) {
+ std::vector<CG_outputRepr *> stmt_repr = find_guards(ir, stmts[i]);
+ std::copy(stmt_repr.begin(), stmt_repr.end(),
+ back_inserter(guards));
+ }
+ break;
+ }
+ case IR_CONTROL_LOOP: {
+ CHILL_DEBUG_PRINT("it's a control loop\n");
+ std::vector<CG_outputRepr *> body = find_guards(ir,
+ dynamic_cast<IR_Loop*>(code)->body());
+ if (body.size() > 0)
+ std::copy(body.begin(), body.end(), back_inserter(guards));
+ break;
+ } // loop
+ } // switch
+ return guards;
+}
+
+bool sort_helper(std::pair<std::string, std::vector<std::string> > i,
+ std::pair<std::string, std::vector<std::string> > j) {
+ int c1 = 0;
+ int c2 = 0;
+ for (int k = 0; k < i.second.size(); k++)
+ if (i.second[k] != "")
+ c1++;
+
+ for (int k = 0; k < j.second.size(); k++)
+ if (j.second[k] != "")
+ c2++;
+ return (c1 < c2);
+
+}
+
+bool sort_helper_2(std::pair<int, int> i, std::pair<int, int> j) {
+
+ return (i.second < j.second);
+
+}
+
+std::vector<std::string> construct_iteration_order(
+ std::map<std::string, std::vector<std::string> > & input) {
+ std::vector<std::string> arrays;
+ std::vector<std::string> scalars;
+ std::vector<std::pair<std::string, std::vector<std::string> > > input_aid;
+
+ for (std::map<std::string, std::vector<std::string> >::iterator j =
+ input.begin(); j != input.end(); j++)
+ input_aid.push_back(
+ std::pair<std::string, std::vector<std::string> >(j->first,
+ j->second));
+
+ std::sort(input_aid.begin(), input_aid.end(), sort_helper);
+
+ for (int j = 0; j < input_aid[input_aid.size() - 1].second.size(); j++)
+ if (input_aid[input_aid.size() - 1].second[j] != "") {
+ arrays.push_back(input_aid[input_aid.size() - 1].second[j]);
+
+ }
+
+ if (arrays.size() > 0) {
+ for (int i = input_aid.size() - 2; i >= 0; i--) {
+
+ int max_count = 0;
+ for (int j = 0; j < input_aid[i].second.size(); j++)
+ if (input_aid[i].second[j] != "") {
+ max_count++;
+ }
+ if (max_count > 0) {
+ for (int j = 0; j < max_count; j++) {
+ std::string s = input_aid[i].second[j];
+ bool found = false;
+ for (int k = 0; k < max_count; k++)
+ if (s == arrays[k])
+ found = true;
+ if (!found)
+ throw loop_error("guard condition not solvable");
+ }
+ } else {
+ bool found = false;
+ for (int k = 0; k < arrays.size(); k++)
+ if (arrays[k] == input_aid[i].first)
+ found = true;
+ if (!found)
+ arrays.push_back(input_aid[i].first);
+ }
+ }
+ } else {
+
+ for (int i = input_aid.size() - 1; i >= 0; i--) {
+ arrays.push_back(input_aid[i].first);
+ }
+ }
+ return arrays;
+}
+
+
+
diff --git a/src/transformations/loop_basic.cc b/src/transformations/loop_basic.cc
new file mode 100644
index 0000000..a058598
--- /dev/null
+++ b/src/transformations/loop_basic.cc
@@ -0,0 +1,1839 @@
+/*
+ * loop_basic.cc
+ *
+ * Created on: Nov 12, 2012
+ * Author: anand
+ */
+
+#include "loop.hh"
+#include "chill_error.hh"
+#include <omega.h>
+#include "omegatools.hh"
+#include <string.h>
+
+#include <code_gen/CG_utils.h>
+
+using namespace omega;
+
+void Loop::permute(const std::vector<int> &pi) {
+ std::set<int> active;
+ for (int i = 0; i < stmt.size(); i++)
+ active.insert(i);
+
+ permute(active, pi);
+}
+
+void Loop::original() {
+ std::set<int> active;
+ for (int i = 0; i < stmt.size(); i++)
+ active.insert(i);
+ setLexicalOrder(0, active);
+ //apply_xform();
+}
+void Loop::permute(int stmt_num, int level, const std::vector<int> &pi) {
+ // check for sanity of parameters
+ int starting_order;
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument(
+ "invalid statement number " + to_string(stmt_num));
+ std::set<int> active;
+ if (level < 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("3invalid loop level " + to_string(level));
+ else if (level == 0) {
+ for (int i = 0; i < stmt.size(); i++)
+ active.insert(i);
+ level = 1;
+ starting_order = 0;
+ } else {
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ active = getStatements(lex, 2 * level - 2);
+ starting_order = lex[2 * level - 2];
+ lex[2 * level - 2]++;
+ shiftLexicalOrder(lex, 2 * level - 2, active.size() - 1);
+ }
+ std::vector<int> pi_inverse(pi.size(), 0);
+ for (int i = 0; i < pi.size(); i++) {
+ if (pi[i] >= level + pi.size() || pi[i] < level
+ || pi_inverse[pi[i] - level] != 0)
+ throw std::invalid_argument("invalid permuation");
+ pi_inverse[pi[i] - level] = level + i;
+ }
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++)
+ if (level + pi.size() - 1 > stmt[*i].loop_level.size())
+ throw std::invalid_argument(
+ "invalid permutation for statement " + to_string(*i));
+
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+
+ // Update transformation relations
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ int n = stmt[*i].xform.n_out();
+ Relation mapping(n, n);
+ F_And *f_root = mapping.add_and();
+ for (int j = 1; j <= 2 * level - 2; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(j), 1);
+ h.update_coef(mapping.input_var(j), -1);
+ }
+ for (int j = level; j <= level + pi.size() - 1; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(2 * j), 1);
+ h.update_coef(mapping.input_var(2 * pi[j - level]), -1);
+ }
+ for (int j = level; j <= level + pi.size() - 1; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(2 * j - 1), 1);
+ h.update_coef(mapping.input_var(2 * j - 1), -1);
+ }
+ for (int j = 2 * (level + pi.size() - 1) + 1; j <= n; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(j), 1);
+ h.update_coef(mapping.input_var(j), -1);
+ }
+ stmt[*i].xform = Composition(mapping, stmt[*i].xform);
+ stmt[*i].xform.simplify();
+ }
+
+ // get the permuation for dependence vectors
+ std::vector<int> t;
+ for (int i = 0; i < pi.size(); i++)
+ if (stmt[stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal)
+ t.push_back(stmt[stmt_num].loop_level[pi[i] - 1].payload);
+ int max_dep_dim = -1;
+ int min_dep_dim = dep.num_dim();
+ for (int i = 0; i < t.size(); i++) {
+ if (t[i] > max_dep_dim)
+ max_dep_dim = t[i];
+ if (t[i] < min_dep_dim)
+ min_dep_dim = t[i];
+ }
+ if (min_dep_dim > max_dep_dim)
+ return;
+ if (max_dep_dim - min_dep_dim + 1 != t.size())
+ throw loop_error("cannot update the dependence graph after permuation");
+ std::vector<int> dep_pi(dep.num_dim());
+ for (int i = 0; i < min_dep_dim; i++)
+ dep_pi[i] = i;
+ for (int i = min_dep_dim; i <= max_dep_dim; i++)
+ dep_pi[i] = t[i - min_dep_dim];
+ for (int i = max_dep_dim + 1; i < dep.num_dim(); i++)
+ dep_pi[i] = i;
+
+ dep.permute(dep_pi, active);
+
+ // update the dependence graph
+ DependenceGraph g(dep.num_dim());
+ for (int i = 0; i < dep.vertex.size(); i++)
+ g.insert();
+ for (int i = 0; i < dep.vertex.size(); i++)
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();
+ j++) {
+ if ((active.find(i) != active.end()
+ && active.find(j->first) != active.end())) {
+ std::vector<DependenceVector> dv = j->second;
+ for (int k = 0; k < dv.size(); k++) {
+ switch (dv[k].type) {
+ case DEP_W2R:
+ case DEP_R2W:
+ case DEP_W2W:
+ case DEP_R2R: {
+ std::vector<coef_t> lbounds(dep.num_dim());
+ std::vector<coef_t> ubounds(dep.num_dim());
+ for (int d = 0; d < dep.num_dim(); d++) {
+ lbounds[d] = dv[k].lbounds[dep_pi[d]];
+ ubounds[d] = dv[k].ubounds[dep_pi[d]];
+ }
+ dv[k].lbounds = lbounds;
+ dv[k].ubounds = ubounds;
+ break;
+ }
+ case DEP_CONTROL: {
+ break;
+ }
+ default:
+ throw loop_error("unknown dependence type");
+ }
+ }
+ g.connect(i, j->first, dv);
+ } else if (active.find(i) == active.end()
+ && active.find(j->first) == active.end()) {
+ std::vector<DependenceVector> dv = j->second;
+ g.connect(i, j->first, dv);
+ } else {
+ std::vector<DependenceVector> dv = j->second;
+ for (int k = 0; k < dv.size(); k++)
+ switch (dv[k].type) {
+ case DEP_W2R:
+ case DEP_R2W:
+ case DEP_W2W:
+ case DEP_R2R: {
+ for (int d = 0; d < dep.num_dim(); d++)
+ if (dep_pi[d] != d) {
+ dv[k].lbounds[d] = -posInfinity;
+ dv[k].ubounds[d] = posInfinity;
+ }
+ break;
+ }
+ case DEP_CONTROL:
+ break;
+ default:
+ throw loop_error("unknown dependence type");
+ }
+ g.connect(i, j->first, dv);
+ }
+ }
+ dep = g;
+
+ // update loop level information
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ int cur_dep_dim = min_dep_dim;
+ std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size());
+ for (int j = 1; j <= stmt[*i].loop_level.size(); j++)
+ if (j >= level && j < level + pi.size()) {
+ switch (stmt[*i].loop_level[pi_inverse[j - level] - 1].type) {
+ case LoopLevelOriginal:
+ new_loop_level[j - 1].type = LoopLevelOriginal;
+ new_loop_level[j - 1].payload = cur_dep_dim++;
+ new_loop_level[j - 1].parallel_level =
+ stmt[*i].loop_level[pi_inverse[j - level] - 1].parallel_level;
+ break;
+ case LoopLevelTile: {
+ new_loop_level[j - 1].type = LoopLevelTile;
+ int ref_level = stmt[*i].loop_level[pi_inverse[j - level]
+ - 1].payload;
+ if (ref_level >= level && ref_level < level + pi.size())
+ new_loop_level[j - 1].payload = pi_inverse[ref_level
+ - level];
+ else
+ new_loop_level[j - 1].payload = ref_level;
+ new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
+ - 1].parallel_level;
+ break;
+ }
+ default:
+ throw loop_error(
+ "unknown loop level information for statement "
+ + to_string(*i));
+ }
+ } else {
+ switch (stmt[*i].loop_level[j - 1].type) {
+ case LoopLevelOriginal:
+ new_loop_level[j - 1].type = LoopLevelOriginal;
+ new_loop_level[j - 1].payload =
+ stmt[*i].loop_level[j - 1].payload;
+ new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
+ - 1].parallel_level;
+ break;
+ case LoopLevelTile: {
+ new_loop_level[j - 1].type = LoopLevelTile;
+ int ref_level = stmt[*i].loop_level[j - 1].payload;
+ if (ref_level >= level && ref_level < level + pi.size())
+ new_loop_level[j - 1].payload = pi_inverse[ref_level
+ - level];
+ else
+ new_loop_level[j - 1].payload = ref_level;
+ new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
+ - 1].parallel_level;
+ break;
+ }
+ default:
+ throw loop_error(
+ "unknown loop level information for statement "
+ + to_string(*i));
+ }
+ }
+ stmt[*i].loop_level = new_loop_level;
+ }
+
+ setLexicalOrder(2 * level - 2, active, starting_order);
+}
+void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) {
+ if (active.size() == 0 || pi.size() == 0)
+ return;
+
+ // check for sanity of parameters
+ int level = pi[0];
+ for (int i = 1; i < pi.size(); i++)
+ if (pi[i] < level)
+ level = pi[i];
+ if (level < 1)
+ throw std::invalid_argument("invalid permuation");
+ std::vector<int> reverse_pi(pi.size(), 0);
+ for (int i = 0; i < pi.size(); i++)
+ if (pi[i] >= level + pi.size())
+ throw std::invalid_argument("invalid permutation");
+ else
+ reverse_pi[pi[i] - level] = i + level;
+ for (int i = 0; i < reverse_pi.size(); i++)
+ if (reverse_pi[i] == 0)
+ throw std::invalid_argument("invalid permuation");
+ int ref_stmt_num;
+ std::vector<int> lex;
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ if (*i < 0 || *i >= stmt.size())
+ throw std::invalid_argument("invalid statement " + to_string(*i));
+ if (i == active.begin()) {
+ ref_stmt_num = *i;
+ lex = getLexicalOrder(*i);
+ } else {
+ if (level + pi.size() - 1 > stmt[*i].loop_level.size())
+ throw std::invalid_argument("invalid permuation");
+ std::vector<int> lex2 = getLexicalOrder(*i);
+ for (int j = 0; j < 2 * level - 3; j += 2)
+ if (lex[j] != lex2[j])
+ throw std::invalid_argument(
+ "statements to permute must be in the same subloop");
+ for (int j = 0; j < pi.size(); j++)
+ if (!(stmt[*i].loop_level[level + j - 1].type
+ == stmt[ref_stmt_num].loop_level[level + j - 1].type
+ && stmt[*i].loop_level[level + j - 1].payload
+ == stmt[ref_stmt_num].loop_level[level + j - 1].payload))
+ throw std::invalid_argument(
+ "permuted loops must have the same loop level types");
+ }
+ }
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+
+ // Update transformation relations
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ int n = stmt[*i].xform.n_out();
+ Relation mapping(n, n);
+ F_And *f_root = mapping.add_and();
+ for (int j = 1; j <= n; j += 2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(j), 1);
+ h.update_coef(mapping.input_var(j), -1);
+ }
+ for (int j = 0; j < pi.size(); j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(2 * (level + j)), 1);
+ h.update_coef(mapping.input_var(2 * pi[j]), -1);
+ }
+ for (int j = 1; j < level; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(2 * j), 1);
+ h.update_coef(mapping.input_var(2 * j), -1);
+ }
+ for (int j = level + pi.size(); j <= stmt[*i].loop_level.size(); j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(2 * j), 1);
+ h.update_coef(mapping.input_var(2 * j), -1);
+ }
+
+ stmt[*i].xform = Composition(mapping, stmt[*i].xform);
+ stmt[*i].xform.simplify();
+ }
+
+ // get the permuation for dependence vectors
+ std::vector<int> t;
+ for (int i = 0; i < pi.size(); i++)
+ if (stmt[ref_stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal)
+ t.push_back(stmt[ref_stmt_num].loop_level[pi[i] - 1].payload);
+ int max_dep_dim = -1;
+ int min_dep_dim = num_dep_dim;
+ for (int i = 0; i < t.size(); i++) {
+ if (t[i] > max_dep_dim)
+ max_dep_dim = t[i];
+ if (t[i] < min_dep_dim)
+ min_dep_dim = t[i];
+ }
+ if (min_dep_dim > max_dep_dim)
+ return;
+ if (max_dep_dim - min_dep_dim + 1 != t.size())
+ throw loop_error("cannot update the dependence graph after permuation");
+ std::vector<int> dep_pi(num_dep_dim);
+ for (int i = 0; i < min_dep_dim; i++)
+ dep_pi[i] = i;
+ for (int i = min_dep_dim; i <= max_dep_dim; i++)
+ dep_pi[i] = t[i - min_dep_dim];
+ for (int i = max_dep_dim + 1; i < num_dep_dim; i++)
+ dep_pi[i] = i;
+
+ dep.permute(dep_pi, active);
+
+ // update the dependence graph
+ DependenceGraph g(dep.num_dim());
+ for (int i = 0; i < dep.vertex.size(); i++)
+ g.insert();
+ for (int i = 0; i < dep.vertex.size(); i++)
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();
+ j++) { //
+ if ((active.find(i) != active.end()
+ && active.find(j->first) != active.end())) {
+ std::vector<DependenceVector> dv = j->second;
+ for (int k = 0; k < dv.size(); k++) {
+ switch (dv[k].type) {
+ case DEP_W2R:
+ case DEP_R2W:
+ case DEP_W2W:
+ case DEP_R2R: {
+ std::vector<coef_t> lbounds(num_dep_dim);
+ std::vector<coef_t> ubounds(num_dep_dim);
+ for (int d = 0; d < num_dep_dim; d++) {
+ lbounds[d] = dv[k].lbounds[dep_pi[d]];
+ ubounds[d] = dv[k].ubounds[dep_pi[d]];
+ }
+ dv[k].lbounds = lbounds;
+ dv[k].ubounds = ubounds;
+ break;
+ }
+ case DEP_CONTROL: {
+ break;
+ }
+ default:
+ throw loop_error("unknown dependence type");
+ }
+ }
+ g.connect(i, j->first, dv);
+ } else if (active.find(i) == active.end()
+ && active.find(j->first) == active.end()) {
+ std::vector<DependenceVector> dv = j->second;
+ g.connect(i, j->first, dv);
+ } else {
+ std::vector<DependenceVector> dv = j->second;
+ for (int k = 0; k < dv.size(); k++)
+ switch (dv[k].type) {
+ case DEP_W2R:
+ case DEP_R2W:
+ case DEP_W2W:
+ case DEP_R2R: {
+ for (int d = 0; d < num_dep_dim; d++)
+ if (dep_pi[d] != d) {
+ dv[k].lbounds[d] = -posInfinity;
+ dv[k].ubounds[d] = posInfinity;
+ }
+ break;
+ }
+ case DEP_CONTROL:
+ break;
+ default:
+ throw loop_error("unknown dependence type");
+ }
+ g.connect(i, j->first, dv);
+ }
+ }
+ dep = g;
+
+ // update loop level information
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ int cur_dep_dim = min_dep_dim;
+ std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size());
+ for (int j = 1; j <= stmt[*i].loop_level.size(); j++)
+ if (j >= level && j < level + pi.size()) {
+ switch (stmt[*i].loop_level[reverse_pi[j - level] - 1].type) {
+ case LoopLevelOriginal:
+ new_loop_level[j - 1].type = LoopLevelOriginal;
+ new_loop_level[j - 1].payload = cur_dep_dim++;
+ new_loop_level[j - 1].parallel_level =
+ stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level;
+ break;
+ case LoopLevelTile: {
+ new_loop_level[j - 1].type = LoopLevelTile;
+ int ref_level = stmt[*i].loop_level[reverse_pi[j - level]-1].payload;
+ if (ref_level >= level && ref_level < level + pi.size())
+ new_loop_level[j - 1].payload = reverse_pi[ref_level
+ - level];
+ else
+ new_loop_level[j - 1].payload = ref_level;
+ new_loop_level[j - 1].parallel_level =
+ stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level;
+ break;
+ }
+ default:
+ throw loop_error(
+ "unknown loop level information for statement "
+ + to_string(*i));
+ }
+ } else {
+ switch (stmt[*i].loop_level[j - 1].type) {
+ case LoopLevelOriginal:
+ new_loop_level[j - 1].type = LoopLevelOriginal;
+ new_loop_level[j - 1].payload =
+ stmt[*i].loop_level[j - 1].payload;
+ new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
+ - 1].parallel_level;
+ break;
+ case LoopLevelTile: {
+ new_loop_level[j - 1].type = LoopLevelTile;
+ int ref_level = stmt[*i].loop_level[j - 1].payload;
+ if (ref_level >= level && ref_level < level + pi.size())
+ new_loop_level[j - 1].payload = reverse_pi[ref_level
+ - level];
+ else
+ new_loop_level[j - 1].payload = ref_level;
+ new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
+ - 1].parallel_level;
+ break;
+ }
+ default:
+ throw loop_error(
+ "unknown loop level information for statement "
+ + to_string(*i));
+ }
+ }
+ stmt[*i].loop_level = new_loop_level;
+ }
+
+ setLexicalOrder(2 * level - 2, active);
+}
+
+
+void Loop::set_array_size(std::string name, int size ){
+ array_dims.insert(std::pair<std::string, int >(name, size));
+}
+
+
+std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
+ // check for sanity of parameters
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement " + to_string(stmt_num));
+ if (level <= 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("4invalid loop level " + to_string(level));
+
+ std::set<int> result;
+ int dim = 2 * level - 1;
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ std::set<int> same_loop = getStatements(lex, dim - 1);
+
+ Relation cond2 = copy(cond);
+ cond2.simplify();
+ cond2 = EQs_to_GEQs(cond2);
+ Conjunct *c = cond2.single_conjunct();
+ int cur_lex = lex[dim - 1];
+
+ for (GEQ_Iterator gi(c->GEQs()); gi; gi++) {
+ int max_level = (*gi).max_tuple_pos();
+ Relation single_cond(max_level);
+ single_cond.and_with_GEQ(*gi);
+
+ // TODO: should decide where to place newly created statements with
+ // complementary split condition from dependence graph.
+ bool place_after;
+ if (max_level == 0)
+ place_after = true;
+ else if ((*gi).get_coef(cond2.set_var(max_level)) < 0)
+ place_after = true;
+ else
+ place_after = false;
+
+ bool temp_place_after; // = place_after;
+ bool assigned = false;
+ int part1_to_part2;
+ int part2_to_part1;
+ // original statements with split condition,
+ // new statements with complement of split condition
+ int old_num_stmt = stmt.size();
+ std::map<int, int> what_stmt_num;
+ apply_xform(same_loop);
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++) {
+ int n = stmt[*i].IS.n_set();
+ Relation part1, part2;
+ if (max_level > n) {
+ part1 = copy(stmt[*i].IS);
+ part2 = Relation::False(0);
+ } else {
+ part1 = Intersection(copy(stmt[*i].IS),
+ Extend_Set(copy(single_cond), n - max_level));
+ part2 = Intersection(copy(stmt[*i].IS),
+ Extend_Set(Complement(copy(single_cond)),
+ n - max_level));
+ }
+
+ //split dependence check
+
+ if (max_level > level) {
+
+ DNF_Iterator di1(stmt[*i].IS.query_DNF());
+ DNF_Iterator di2(part1.query_DNF());
+ for (; di1 && di2; di1++, di2++) {
+ //printf("In next conjunct,\n");
+ EQ_Iterator ei1 = (*di1)->EQs();
+ EQ_Iterator ei2 = (*di2)->EQs();
+ for (; ei1 && ei2; ei1++, ei2++) {
+ //printf(" In next equality constraint,\n");
+ Constr_Vars_Iter cvi1(*ei1);
+ Constr_Vars_Iter cvi2(*ei2);
+ int dimension = (*cvi1).var->get_position();
+ int same = 0;
+ bool identical = false;
+ if (identical = !strcmp((*cvi1).var->char_name(),
+ (*cvi2).var->char_name())) {
+
+ for (; cvi1 && cvi2; cvi1++, cvi2++) {
+
+ if (((*cvi1).coef != (*cvi2).coef
+ || (*ei1).get_const()
+ != (*ei2).get_const())
+ || (strcmp((*cvi1).var->char_name(),
+ (*cvi2).var->char_name()))) {
+
+ same++;
+ }
+ }
+ }
+ if ((same != 0) || !identical) {
+
+ dimension = dimension - 1;
+
+ while (stmt[*i].loop_level[dimension].type
+ == LoopLevelTile)
+ dimension =
+ stmt[*i].loop_level[dimension].payload;
+
+ dimension = stmt[*i].loop_level[dimension].payload;
+
+ for (int i = 0; i < stmt.size(); i++) {
+ std::vector<std::pair<int, DependenceVector> > D;
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin();
+ j != dep.vertex[i].second.end(); j++) {
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.type != DEP_CONTROL)
+ if (dv.hasNegative(dimension)
+ && !dv.quasi)
+ throw loop_error(
+ "loop error: Split is illegal, dependence violation!");
+
+ }
+ }
+ }
+
+ }
+
+ GEQ_Iterator gi1 = (*di1)->GEQs();
+ GEQ_Iterator gi2 = (*di2)->GEQs();
+
+ for (; gi1 && gi2; gi++, gi2++) {
+
+ Constr_Vars_Iter cvi1(*gi1);
+ Constr_Vars_Iter cvi2(*gi2);
+ int dimension = (*cvi1).var->get_position();
+ int same = 0;
+ bool identical = false;
+ if (identical = !strcmp((*cvi1).var->char_name(),
+ (*cvi2).var->char_name())) {
+
+ for (; cvi1 && cvi2; cvi1++, cvi2++) {
+
+ if (((*cvi1).coef != (*cvi2).coef
+ || (*gi1).get_const()
+ != (*gi2).get_const())
+ || (strcmp((*cvi1).var->char_name(),
+ (*cvi2).var->char_name()))) {
+
+ same++;
+ }
+ }
+ }
+ if ((same != 0) || !identical) {
+ dimension = dimension - 1;
+
+ while (stmt[*i].loop_level[dimension].type
+ == LoopLevelTile)
+ stmt[*i].loop_level[dimension].payload;
+
+ dimension =
+ stmt[*i].loop_level[dimension].payload;
+
+ for (int i = 0; i < stmt.size(); i++) {
+ std::vector<std::pair<int, DependenceVector> > D;
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin();
+ j != dep.vertex[i].second.end();
+ j++) {
+ for (int k = 0; k < j->second.size();
+ k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.type != DEP_CONTROL)
+ if (dv.hasNegative(dimension)
+ && !dv.quasi)
+
+ throw loop_error(
+ "loop error: Split is illegal, dependence violation!");
+
+ }
+ }
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ DNF_Iterator di3(stmt[*i].IS.query_DNF());
+ DNF_Iterator di4(part2.query_DNF()); //
+ for (; di3 && di4; di3++, di4++) {
+ EQ_Iterator ei1 = (*di3)->EQs();
+ EQ_Iterator ei2 = (*di4)->EQs();
+ for (; ei1 && ei2; ei1++, ei2++) {
+ Constr_Vars_Iter cvi1(*ei1);
+ Constr_Vars_Iter cvi2(*ei2);
+ int dimension = (*cvi1).var->get_position();
+ int same = 0;
+ bool identical = false;
+ if (identical = !strcmp((*cvi1).var->char_name(),
+ (*cvi2).var->char_name())) {
+
+ for (; cvi1 && cvi2; cvi1++, cvi2++) {
+
+ if (((*cvi1).coef != (*cvi2).coef
+ || (*ei1).get_const()
+ != (*ei2).get_const())
+ || (strcmp((*cvi1).var->char_name(),
+ (*cvi2).var->char_name()))) {
+
+ same++;
+ }
+ }
+ }
+ if ((same != 0) || !identical) {
+ dimension = dimension - 1;
+
+ while (stmt[*i].loop_level[dimension].type
+ == LoopLevelTile)
+ stmt[*i].loop_level[dimension].payload;
+
+ dimension = stmt[*i].loop_level[dimension].payload;
+
+ for (int i = 0; i < stmt.size(); i++) {
+ std::vector<std::pair<int, DependenceVector> > D;
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin();
+ j != dep.vertex[i].second.end(); j++) {
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.type != DEP_CONTROL)
+ if (dv.hasNegative(dimension)
+ && !dv.quasi)
+
+ throw loop_error(
+ "loop error: Split is illegal, dependence violation!");
+
+ }
+ }
+ }
+
+ }
+
+ }
+ GEQ_Iterator gi1 = (*di3)->GEQs();
+ GEQ_Iterator gi2 = (*di4)->GEQs();
+
+ for (; gi1 && gi2; gi++, gi2++) {
+ Constr_Vars_Iter cvi1(*gi1);
+ Constr_Vars_Iter cvi2(*gi2);
+ int dimension = (*cvi1).var->get_position();
+ int same = 0;
+ bool identical = false;
+ if (identical = !strcmp((*cvi1).var->char_name(),
+ (*cvi2).var->char_name())) {
+
+ for (; cvi1 && cvi2; cvi1++, cvi2++) {
+
+ if (((*cvi1).coef != (*cvi2).coef
+ || (*gi1).get_const()
+ != (*gi2).get_const())
+ || (strcmp((*cvi1).var->char_name(),
+ (*cvi2).var->char_name()))) {
+
+ same++;
+ }
+ }
+ }
+ if ((same != 0) || !identical) {
+ dimension = dimension - 1;
+
+ while (stmt[*i].loop_level[dimension].type //
+ == LoopLevelTile)
+ stmt[*i].loop_level[dimension].payload;
+
+ dimension = stmt[*i].loop_level[dimension].payload;
+
+ for (int i = 0; i < stmt.size(); i++) {
+ std::vector<std::pair<int, DependenceVector> > D;
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin();
+ j != dep.vertex[i].second.end(); j++) {
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.type != DEP_CONTROL)
+ if (dv.hasNegative(dimension)
+ && !dv.quasi)
+
+ throw loop_error(
+ "loop error: Split is illegal, dependence violation!");
+
+ }
+ }
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ stmt[*i].IS = part1;
+
+ int n1 = part2.n_set();
+ int m = this->known.n_set();
+ Relation test;
+ if(m > n1)
+ test = Intersection(copy(this->known),
+ Extend_Set(copy(part2), m - part2.n_set()));
+ else
+ test = Intersection(copy(part2),
+ Extend_Set(copy(this->known), n1 - this->known.n_set()));
+
+ if (test.is_upper_bound_satisfiable()) {
+ Statement new_stmt;
+ new_stmt.code = stmt[*i].code->clone();
+ new_stmt.IS = part2;
+ new_stmt.xform = copy(stmt[*i].xform);
+ new_stmt.ir_stmt_node = NULL;
+ new_stmt.loop_level = stmt[*i].loop_level;
+
+ new_stmt.has_inspector = stmt[*i].has_inspector;
+ new_stmt.reduction = stmt[*i].reduction;
+ new_stmt.reductionOp = stmt[*i].reductionOp;
+
+ stmt_nesting_level_.push_back(stmt_nesting_level_[*i]);
+
+
+ if (place_after)
+ assign_const(new_stmt.xform, dim - 1, cur_lex + 1);
+ else
+ assign_const(new_stmt.xform, dim - 1, cur_lex - 1);
+
+ fprintf(stderr, "loop_basic.cc L828 adding stmt %d\n", stmt.size());
+ stmt.push_back(new_stmt);
+
+ uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]);
+ uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]);
+ dep.insert();
+ what_stmt_num[*i] = stmt.size() - 1;
+ if (*i == stmt_num)
+ result.insert(stmt.size() - 1);
+ }
+
+ }
+ // make adjacent lexical number available for new statements
+ if (place_after) {
+ lex[dim - 1] = cur_lex + 1;
+ shiftLexicalOrder(lex, dim - 1, 1);
+ } else {
+ lex[dim - 1] = cur_lex - 1;
+ shiftLexicalOrder(lex, dim - 1, -1);
+ }
+ // update dependence graph
+ int dep_dim = get_dep_dim_of(stmt_num, level);
+ for (int i = 0; i < old_num_stmt; i++) {
+ std::vector<std::pair<int, std::vector<DependenceVector> > > D;
+
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin();
+ j != dep.vertex[i].second.end(); j++) {
+ if (same_loop.find(i) != same_loop.end()) {
+ if (same_loop.find(j->first) != same_loop.end()) {
+ if (what_stmt_num.find(i) != what_stmt_num.end()
+ && what_stmt_num.find(j->first)
+ != what_stmt_num.end())
+ dep.connect(what_stmt_num[i],
+ what_stmt_num[j->first], j->second);
+ if (place_after
+ && what_stmt_num.find(j->first)
+ != what_stmt_num.end()) {
+ std::vector<DependenceVector> dvs;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.is_data_dependence() && dep_dim != -1) {
+ dv.lbounds[dep_dim] = -posInfinity;
+ dv.ubounds[dep_dim] = posInfinity;
+ }
+ dvs.push_back(dv);
+ }
+ if (dvs.size() > 0)
+ D.push_back(
+ std::make_pair(what_stmt_num[j->first],
+ dvs));
+ } else if (!place_after
+ && what_stmt_num.find(i)
+ != what_stmt_num.end()) {
+ std::vector<DependenceVector> dvs;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.is_data_dependence() && dep_dim != -1) {
+ dv.lbounds[dep_dim] = -posInfinity;
+ dv.ubounds[dep_dim] = posInfinity;
+ }
+ dvs.push_back(dv);
+ }
+ if (dvs.size() > 0)
+ dep.connect(what_stmt_num[i], j->first, dvs);
+
+ }
+ } else {
+ if (what_stmt_num.find(i) != what_stmt_num.end())
+ dep.connect(what_stmt_num[i], j->first, j->second);
+ }
+ } else if (same_loop.find(j->first) != same_loop.end()) {
+ if (what_stmt_num.find(j->first) != what_stmt_num.end())
+ D.push_back(
+ std::make_pair(what_stmt_num[j->first],
+ j->second));
+ }
+ }
+
+ for (int j = 0; j < D.size(); j++)
+ dep.connect(i, D[j].first, D[j].second);
+ }
+
+ }
+
+ return result;
+}
+
+void Loop::skew(const std::set<int> &stmt_nums, int level,
+ const std::vector<int> &skew_amount) {
+ if (stmt_nums.size() == 0)
+ return;
+
+ // check for sanity of parameters
+ int ref_stmt_num = *(stmt_nums.begin());
+ for (std::set<int>::const_iterator i = stmt_nums.begin();
+ i != stmt_nums.end(); i++) {
+ if (*i < 0 || *i >= stmt.size())
+ throw std::invalid_argument(
+ "invalid statement number " + to_string(*i));
+ if (level < 1 || level > stmt[*i].loop_level.size())
+ throw std::invalid_argument(
+ "5invalid loop level " + to_string(level));
+ for (int j = stmt[*i].loop_level.size(); j < skew_amount.size(); j++)
+ if (skew_amount[j] != 0)
+ throw std::invalid_argument("invalid skewing formula");
+ }
+
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+
+ // set trasformation relations
+ for (std::set<int>::const_iterator i = stmt_nums.begin();
+ i != stmt_nums.end(); i++) {
+ int n = stmt[*i].xform.n_out();
+ Relation r(n, n);
+ F_And *f_root = r.add_and();
+ for (int j = 1; j <= n; j++)
+ if (j != 2 * level) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(r.input_var(j), 1);
+ h.update_coef(r.output_var(j), -1);
+ }
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(r.output_var(2 * level), -1);
+ for (int j = 0; j < skew_amount.size(); j++)
+ if (skew_amount[j] != 0)
+ h.update_coef(r.input_var(2 * (j + 1)), skew_amount[j]);
+
+ stmt[*i].xform = Composition(r, stmt[*i].xform);
+ stmt[*i].xform.simplify();
+ }
+
+ // update dependence graph
+ if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) {
+ int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload;
+ for (std::set<int>::const_iterator i = stmt_nums.begin();
+ i != stmt_nums.end(); i++)
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[*i].second.begin();
+ j != dep.vertex[*i].second.end(); j++)
+ if (stmt_nums.find(j->first) != stmt_nums.end()) {
+ // dependence between skewed statements
+ std::vector<DependenceVector> dvs = j->second;
+ for (int k = 0; k < dvs.size(); k++) {
+ DependenceVector &dv = dvs[k];
+ if (dv.is_data_dependence()) {
+ coef_t lb = 0;
+ coef_t ub = 0;
+ for (int kk = 0; kk < skew_amount.size(); kk++) {
+ int cur_dep_dim = get_dep_dim_of(*i, kk + 1);
+ if (skew_amount[kk] > 0) {
+ if (lb != -posInfinity
+ && stmt[*i].loop_level[kk].type == LoopLevelOriginal
+ && dv.lbounds[cur_dep_dim] != -posInfinity)
+ lb += skew_amount[kk] * dv.lbounds[cur_dep_dim];
+ else {
+ if (cur_dep_dim != -1
+ && !(dv.lbounds[cur_dep_dim] == 0
+ && dv.ubounds[cur_dep_dim]== 0))
+ lb = -posInfinity;
+ }
+ if (ub != posInfinity
+ && stmt[*i].loop_level[kk].type == LoopLevelOriginal
+ && dv.ubounds[cur_dep_dim] != posInfinity)
+ ub += skew_amount[kk] * dv.ubounds[cur_dep_dim];
+ else {
+ if (cur_dep_dim != -1
+ && !(dv.lbounds[cur_dep_dim] == 0
+ && dv.ubounds[cur_dep_dim] == 0))
+ ub = posInfinity;
+ }
+ } else if (skew_amount[kk] < 0) {
+ if (lb != -posInfinity
+ && stmt[*i].loop_level[kk].type == LoopLevelOriginal
+ && dv.ubounds[cur_dep_dim] != posInfinity)
+ lb += skew_amount[kk] * dv.ubounds[cur_dep_dim];
+ else {
+ if (cur_dep_dim != -1
+ && !(dv.lbounds[cur_dep_dim] == 0
+ && dv.ubounds[cur_dep_dim] == 0))
+ lb = -posInfinity;
+ }
+ if (ub != posInfinity
+ && stmt[*i].loop_level[kk].type == LoopLevelOriginal
+ && dv.lbounds[cur_dep_dim] != -posInfinity)
+ ub += skew_amount[kk] * dv.lbounds[cur_dep_dim];
+ else {
+ if (cur_dep_dim != -1
+ && !(dv.lbounds[cur_dep_dim] == 0
+ && dv.ubounds[cur_dep_dim] == 0))
+ ub = posInfinity;
+ }
+ }
+ }
+ dv.lbounds[dep_dim] = lb;
+ dv.ubounds[dep_dim] = ub;
+ if ((dv.isCarried(dep_dim) && dv.hasPositive(dep_dim))
+ && dv.quasi)
+ dv.quasi = false;
+
+ if ((dv.isCarried(dep_dim) && dv.hasNegative(dep_dim))
+ && !dv.quasi)
+ throw loop_error(
+ "loop error: Skewing is illegal, dependence violation!");
+ dv.lbounds[dep_dim] = lb;
+ dv.ubounds[dep_dim] = ub;
+ if ((dv.isCarried(dep_dim)
+ && dv.hasPositive(dep_dim)) && dv.quasi)
+ dv.quasi = false;
+
+ if ((dv.isCarried(dep_dim)
+ && dv.hasNegative(dep_dim)) && !dv.quasi)
+ throw loop_error(
+ "loop error: Skewing is illegal, dependence violation!");
+ }
+ }
+ j->second = dvs;
+ } else {
+ // dependence from skewed statement to unskewed statement becomes jumbled,
+ // put distance value at skewed dimension to unknown
+ std::vector<DependenceVector> dvs = j->second;
+ for (int k = 0; k < dvs.size(); k++) {
+ DependenceVector &dv = dvs[k];
+ if (dv.is_data_dependence()) {
+ dv.lbounds[dep_dim] = -posInfinity;
+ dv.ubounds[dep_dim] = posInfinity;
+ }
+ }
+ j->second = dvs;
+ }
+ for (int i = 0; i < dep.vertex.size(); i++)
+ if (stmt_nums.find(i) == stmt_nums.end())
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin();
+ j != dep.vertex[i].second.end(); j++)
+ if (stmt_nums.find(j->first) != stmt_nums.end()) {
+ // dependence from unskewed statement to skewed statement becomes jumbled,
+ // put distance value at skewed dimension to unknown
+ std::vector<DependenceVector> dvs = j->second;
+ for (int k = 0; k < dvs.size(); k++) {
+ DependenceVector &dv = dvs[k];
+ if (dv.is_data_dependence()) {
+ dv.lbounds[dep_dim] = -posInfinity;
+ dv.ubounds[dep_dim] = posInfinity;
+ }
+ }
+ j->second = dvs;
+ }
+ }
+}
+
+
+void Loop::shift(const std::set<int> &stmt_nums, int level, int shift_amount) {
+ if (stmt_nums.size() == 0)
+ return;
+
+ // check for sanity of parameters
+ int ref_stmt_num = *(stmt_nums.begin());
+ for (std::set<int>::const_iterator i = stmt_nums.begin();
+ i != stmt_nums.end(); i++) {
+ if (*i < 0 || *i >= stmt.size())
+ throw std::invalid_argument(
+ "invalid statement number " + to_string(*i));
+ if (level < 1 || level > stmt[*i].loop_level.size())
+ throw std::invalid_argument(
+ "6invalid loop level " + to_string(level));
+ }
+
+ // do nothing
+ if (shift_amount == 0)
+ return;
+
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+
+ // set trasformation relations
+ for (std::set<int>::const_iterator i = stmt_nums.begin();
+ i != stmt_nums.end(); i++) {
+ int n = stmt[*i].xform.n_out();
+
+ Relation r(n, n);
+ F_And *f_root = r.add_and();
+ for (int j = 1; j <= n; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(r.input_var(j), 1);
+ h.update_coef(r.output_var(j), -1);
+ if (j == 2 * level)
+ h.update_const(shift_amount);
+ }
+
+ stmt[*i].xform = Composition(r, stmt[*i].xform);
+ stmt[*i].xform.simplify();
+ }
+
+ // update dependence graph
+ if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) {
+ int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload;
+ for (std::set<int>::const_iterator i = stmt_nums.begin();
+ i != stmt_nums.end(); i++)
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[*i].second.begin();
+ j != dep.vertex[*i].second.end(); j++)
+ if (stmt_nums.find(j->first) == stmt_nums.end()) {
+ // dependence from shifted statement to unshifted statement
+ std::vector<DependenceVector> dvs = j->second;
+ for (int k = 0; k < dvs.size(); k++) {
+ DependenceVector &dv = dvs[k];
+ if (dv.is_data_dependence()) {
+ if (dv.lbounds[dep_dim] != -posInfinity)
+ dv.lbounds[dep_dim] -= shift_amount;
+ if (dv.ubounds[dep_dim] != posInfinity)
+ dv.ubounds[dep_dim] -= shift_amount;
+ }
+ }
+ j->second = dvs;
+ }
+ for (int i = 0; i < dep.vertex.size(); i++)
+ if (stmt_nums.find(i) == stmt_nums.end())
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin();
+ j != dep.vertex[i].second.end(); j++)
+ if (stmt_nums.find(j->first) != stmt_nums.end()) {
+ // dependence from unshifted statement to shifted statement
+ std::vector<DependenceVector> dvs = j->second;
+ for (int k = 0; k < dvs.size(); k++) {
+ DependenceVector &dv = dvs[k];
+ if (dv.is_data_dependence()) {
+ if (dv.lbounds[dep_dim] != -posInfinity)
+ dv.lbounds[dep_dim] += shift_amount;
+ if (dv.ubounds[dep_dim] != posInfinity)
+ dv.ubounds[dep_dim] += shift_amount;
+ }
+ }
+ j->second = dvs;
+ }
+ }
+}
+
+void Loop::scale(const std::set<int> &stmt_nums, int level, int scale_amount) {
+ std::vector<int> skew_amount(level, 0);
+ skew_amount[level - 1] = scale_amount;
+ skew(stmt_nums, level, skew_amount);
+}
+
+void Loop::reverse(const std::set<int> &stmt_nums, int level) {
+ scale(stmt_nums, level, -1);
+}
+
+void Loop::fuse(const std::set<int> &stmt_nums, int level) {
+ if (stmt_nums.size() == 0 || stmt_nums.size() == 1)
+ return;
+
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+
+ int dim = 2 * level - 1;
+ // check for sanity of parameters
+ std::vector<int> ref_lex;
+ int ref_stmt_num;
+ apply_xform();
+ for (std::set<int>::const_iterator i = stmt_nums.begin();
+ i != stmt_nums.end(); i++) {
+ if (*i < 0 || *i >= stmt.size()) {
+ fprintf(stderr, "statement number %d should be in [0, %d)\n", *i, stmt.size());
+ throw std::invalid_argument(
+ "FUSE invalid statement number " + to_string(*i));
+ }
+ if (level <= 0
+ // || (level > (stmt[*i].xform.n_out() - 1) / 2
+ // || level > stmt[*i].loop_level.size())
+ ) {
+ fprintf(stderr, "FUSE level %d ", level);
+ fprintf(stderr, "must be greater than zero and \n");
+ fprintf(stderr, "must NOT be greater than (%d - 1)/2 == %d and\n", stmt[*i].xform.n_out(), (stmt[*i].xform.n_out() - 1) / 2);
+ fprintf(stderr, "must NOT be greater than %d\n", stmt[*i].loop_level.size());
+ throw std::invalid_argument(
+ "FUSE invalid loop level " + to_string(level));
+ }
+ if (ref_lex.size() == 0) {
+ ref_lex = getLexicalOrder(*i);
+ ref_stmt_num = *i;
+ } else {
+ std::vector<int> lex = getLexicalOrder(*i);
+ for (int j = 0; j < dim - 1; j += 2)
+ if (lex[j] != ref_lex[j])
+ throw std::invalid_argument(
+ "statements for fusion must be in the same level-"
+ + to_string(level - 1) + " subloop");
+ }
+ }
+
+ // collect lexicographical order values from to-be-fused statements
+ std::set<int> lex_values;
+ for (std::set<int>::const_iterator i = stmt_nums.begin();
+ i != stmt_nums.end(); i++) {
+ std::vector<int> lex = getLexicalOrder(*i);
+ lex_values.insert(lex[dim - 1]);
+ }
+ if (lex_values.size() == 1)
+ return;
+ // negative dependence would prevent fusion
+
+ int dep_dim = get_dep_dim_of(ref_stmt_num, level);
+
+ for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end();
+ i++) {
+ ref_lex[dim - 1] = *i;
+ std::set<int> a = getStatements(ref_lex, dim - 1);
+ std::set<int>::iterator j = i;
+ j++;
+ for (; j != lex_values.end(); j++) {
+ ref_lex[dim - 1] = *j;
+ std::set<int> b = getStatements(ref_lex, dim - 1);
+ for (std::set<int>::iterator ii = a.begin(); ii != a.end(); ii++)
+ for (std::set<int>::iterator jj = b.begin(); jj != b.end();
+ jj++) {
+ std::vector<DependenceVector> dvs;
+ dvs = dep.getEdge(*ii, *jj);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].isCarried(dep_dim)
+ && dvs[k].hasNegative(dep_dim))
+ throw loop_error(
+ "loop error: statements " + to_string(*ii)
+ + " and " + to_string(*jj)
+ + " cannot be fused together due to negative dependence");
+ dvs = dep.getEdge(*jj, *ii);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].isCarried(dep_dim)
+ && dvs[k].hasNegative(dep_dim))
+ throw loop_error(
+ "loop error: statements " + to_string(*jj)
+ + " and " + to_string(*ii)
+ + " cannot be fused together due to negative dependence");
+ }
+ }
+ }
+
+ std::set<int> same_loop = getStatements(ref_lex, dim - 3);
+
+ std::vector<std::set<int> > s = sort_by_same_loops(same_loop, level);
+
+ std::vector<bool> s2;
+
+ for (int i = 0; i < s.size(); i++) {
+ s2.push_back(false);
+ }
+
+ for (std::set<int>::iterator kk = stmt_nums.begin(); kk != stmt_nums.end();
+ kk++)
+ for (int i = 0; i < s.size(); i++)
+ if (s[i].find(*kk) != s[i].end()) {
+
+ s2[i] = true;
+ }
+
+ try {
+
+ //Dependence Check for Ordering Constraint
+ //Graph<std::set<int>, bool> dummy = construct_induced_graph_at_level(s5,
+ // dep, dep_dim);
+
+ Graph<std::set<int>, bool> g = construct_induced_graph_at_level(s, dep,
+ dep_dim);
+ std::cout << g;
+ s = typed_fusion(g, s2);
+ } catch (const loop_error &e) {
+
+ throw loop_error(
+ "statements cannot be fused together due to negative dependence");
+
+ }
+
+ int order = 0;
+ for (int i = 0; i < s.size(); i++) {
+ for (std::set<int>::iterator it = s[i].begin(); it != s[i].end(); it++) {
+ assign_const(stmt[*it].xform, 2 * level - 2, order);
+ }
+ order++;
+ }
+
+
+ //plan for selective typed fusion
+
+ /*
+ 1. sort the lex values of the statements
+ 2. construct induced graph on sorted statements
+ 3. pick a node from the graph, check if it is before/after from the candidate set for fusion
+ equal-> set the max fused node of this node to be the start/target node for fusion
+ before -> augment and continue
+
+ 4. once target node identified and is on work queue update successors and other nodes to start node
+ 5. augment and continue
+ 6. if all candidate nodes dont end up in start node throw error
+ 7. Get nodes and update lexical values
+
+ */
+
+ /* for (std::set<int>::iterator kk = stmt_nums.begin(); kk != stmt_nums.end();
+ kk++)
+ for (int i = 0; i < s.size(); i++)
+ if (s[i].find(*kk) != s[i].end()) {
+ s1.insert(s[i].begin(), s[i].end());
+ s2.insert(i);
+ }
+
+ s3.push_back(s1);
+ for (int i = 0; i < s.size(); i++)
+ if (s2.find(i) == s2.end()) {
+ s3.push_back(s[i]);
+ s4.insert(s[i].begin(), s[i].end());
+ }
+ try {
+ std::vector<std::set<int> > s5;
+ s5.push_back(s1);
+ s5.push_back(s4);
+
+ //Dependence Check for Ordering Constraint
+ //Graph<std::set<int>, bool> dummy = construct_induced_graph_at_level(s5,
+ // dep, dep_dim);
+
+ Graph<std::set<int>, bool> g = construct_induced_graph_at_level(s3, dep,
+ dep_dim);
+ std::cout<< g;
+ s = typed_fusion(g);
+ } catch (const loop_error &e) {
+
+ throw loop_error(
+ "statements cannot be fused together due to negative dependence");
+
+ }
+
+ if (s3.size() == s.size()) {
+ int order = 0;
+ for (int i = 0; i < s.size(); i++) {
+
+ for (std::set<int>::iterator it = s[i].begin(); it != s[i].end();
+ it++) {
+
+ assign_const(stmt[*it].xform, 2 * level - 2, order);
+
+ }
+
+ order++;
+ }
+ } else if (s3.size() > s.size()) {
+
+ int order = 0;
+ for (int j = 0; j < s.size(); j++) {
+ std::set<int>::iterator it3;
+ for (it3 = s1.begin(); it3 != s1.end(); it3++) {
+ if (s[j].find(*it3) != s[j].end())
+ break;
+ }
+ if (it3 != s1.end()) {
+ for (std::set<int>::iterator it = s1.begin(); it != s1.end();
+ it++)
+ assign_const(stmt[*it].xform, 2 * level - 2, order);
+
+ order++;
+
+ }
+
+ for (int i = 0; i < s3.size(); i++) {
+ std::set<int>::iterator it2;
+
+ for (it2 = s3[i].begin(); it2 != s3[i].end(); it2++) {
+ if (s[j].find(*it2) != s[j].end())
+ break;
+ }
+
+ if (it2 != s3[i].end()) {
+ for (std::set<int>::iterator it = s3[i].begin();
+ it != s3[i].end(); it++)
+ assign_const(stmt[*it].xform, 2 * level - 2, order);
+
+ order++;
+
+ }
+ }
+ }
+
+ } else
+ throw loop_error("Typed Fusion Error");
+ */
+}
+
+
+
+void Loop::distribute(const std::set<int> &stmt_nums, int level) {
+ if (stmt_nums.size() == 0 || stmt_nums.size() == 1)
+ return;
+ fprintf(stderr, "Loop::distribute()\n");
+
+
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+ int dim = 2 * level - 1;
+ int ref_stmt_num;
+ // check for sanity of parameters
+ std::vector<int> ref_lex;
+ for (std::set<int>::const_iterator i = stmt_nums.begin();
+ i != stmt_nums.end(); i++) {
+ if (*i < 0 || *i >= stmt.size())
+ throw std::invalid_argument(
+ "invalid statement number " + to_string(*i));
+
+ if (level < 1
+ || (level > (stmt[*i].xform.n_out() - 1) / 2
+ || level > stmt[*i].loop_level.size()))
+ throw std::invalid_argument(
+ "8invalid loop level " + to_string(level));
+ if (ref_lex.size() == 0) {
+ ref_lex = getLexicalOrder(*i);
+ ref_stmt_num = *i;
+ } else {
+ std::vector<int> lex = getLexicalOrder(*i);
+ for (int j = 0; j <= dim - 1; j += 2)
+ if (lex[j] != ref_lex[j])
+ throw std::invalid_argument(
+ "statements for distribution must be in the same level-"
+ + to_string(level) + " subloop");
+ }
+ }
+
+ // find SCC in the to-be-distributed loop
+ int dep_dim = get_dep_dim_of(ref_stmt_num, level);
+ std::set<int> same_loop = getStatements(ref_lex, dim - 1);
+ Graph<int, Empty> g;
+ for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end();
+ i++)
+ g.insert(*i);
+ for (int i = 0; i < g.vertex.size(); i++)
+ for (int j = i + 1; j < g.vertex.size(); j++) {
+ std::vector<DependenceVector> dvs;
+ dvs = dep.getEdge(g.vertex[i].first, g.vertex[j].first);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].isCarried(dep_dim)) {
+ g.connect(i, j);
+ break;
+ }
+ dvs = dep.getEdge(g.vertex[j].first, g.vertex[i].first);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].isCarried(dep_dim)) {
+ g.connect(j, i);
+ break;
+ }
+ }
+ std::vector<std::set<int> > s = g.topoSort();
+ // find statements that cannot be distributed due to dependence cycle
+ Graph<std::set<int>, Empty> g2;
+ for (int i = 0; i < s.size(); i++) {
+ std::set<int> t;
+ for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++)
+ if (stmt_nums.find(g.vertex[*j].first) != stmt_nums.end())
+ t.insert(g.vertex[*j].first);
+ if (!t.empty())
+ g2.insert(t);
+ }
+ for (int i = 0; i < g2.vertex.size(); i++)
+ for (int j = i + 1; j < g2.vertex.size(); j++)
+ for (std::set<int>::iterator ii = g2.vertex[i].first.begin();
+ ii != g2.vertex[i].first.end(); ii++)
+ for (std::set<int>::iterator jj = g2.vertex[j].first.begin();
+ jj != g2.vertex[j].first.end(); jj++) {
+ std::vector<DependenceVector> dvs;
+ dvs = dep.getEdge(*ii, *jj);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].isCarried(dep_dim)) {
+ g2.connect(i, j);
+ break;
+ }
+ dvs = dep.getEdge(*jj, *ii);
+ for (int k = 0; k < dvs.size(); k++)
+ if (dvs[k].isCarried(dep_dim)) {
+ g2.connect(j, i);
+ break;
+ }
+ }
+ std::vector<std::set<int> > s2 = g2.topoSort();
+ // nothing to distribute
+ if (s2.size() == 1)
+ throw loop_error(
+ "loop error: no statement can be distributed due to dependence cycle");
+ std::vector<std::set<int> > s3;
+ for (int i = 0; i < s2.size(); i++) {
+ std::set<int> t;
+ for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++)
+ std::set_union(t.begin(), t.end(), g2.vertex[*j].first.begin(),
+ g2.vertex[*j].first.end(), inserter(t, t.begin()));
+ s3.push_back(t);
+ }
+ // associate other affected statements with the right distributed statements
+ for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end();
+ i++)
+ if (stmt_nums.find(*i) == stmt_nums.end()) {
+ bool is_inserted = false;
+ int potential_insertion_point = 0;
+ for (int j = 0; j < s3.size(); j++) {
+ for (std::set<int>::iterator k = s3[j].begin();
+ k != s3[j].end(); k++) {
+ std::vector<DependenceVector> dvs;
+ dvs = dep.getEdge(*i, *k);
+ for (int kk = 0; kk < dvs.size(); kk++)
+ if (dvs[kk].isCarried(dep_dim)) {
+ s3[j].insert(*i);
+ is_inserted = true;
+ break;
+ }
+ dvs = dep.getEdge(*k, *i);
+ for (int kk = 0; kk < dvs.size(); kk++)
+ if (dvs[kk].isCarried(dep_dim))
+ potential_insertion_point = j;
+ }
+ if (is_inserted)
+ break;
+ }
+ if (!is_inserted)
+ s3[potential_insertion_point].insert(*i);
+ }
+ // set lexicographical order after distribution
+ int order = ref_lex[dim - 1];
+ shiftLexicalOrder(ref_lex, dim - 1, s3.size() - 1);
+ for (std::vector<std::set<int> >::iterator i = s3.begin(); i != s3.end();
+ i++) {
+ for (std::set<int>::iterator j = (*i).begin(); j != (*i).end(); j++)
+ assign_const(stmt[*j].xform, dim - 1, order);
+ order++;
+ }
+ // no need to update dependence graph
+
+ return;
+}
+
+
+
+
+std::vector<IR_ArrayRef *> FindOuterArrayRefs(IR_Code *ir,
+ std::vector<IR_ArrayRef *> &arr_refs) {
+ std::vector<IR_ArrayRef *> to_return;
+ for (int i = 0; i < arr_refs.size(); i++)
+ if (!ir->parent_is_array(arr_refs[i])) {
+ int j;
+ for (j = 0; j < to_return.size(); j++)
+ if (*to_return[j] == *arr_refs[i])
+ break;
+ if (j == to_return.size())
+ to_return.push_back(arr_refs[i]);
+ }
+ return to_return;
+}
+
+
+
+
+
+std::vector<std::vector<std::string> > constructInspectorVariables(IR_Code *ir,
+ std::set<IR_ArrayRef *> &arr, std::vector<std::string> &index) {
+
+ fprintf(stderr, "constructInspectorVariables()\n");
+
+ std::vector<std::vector<std::string> > to_return;
+
+ for (std::set<IR_ArrayRef *>::iterator i = arr.begin(); i != arr.end();
+ i++) {
+
+ std::vector<std::string> per_index;
+
+ CG_outputRepr *subscript = (*i)->index(0);
+
+ if ((*i)->n_dim() > 1)
+ throw ir_error(
+ "multi-dimensional array support non-existent for flattening currently");
+
+ while (ir->QueryExpOperation(subscript) == IR_OP_ARRAY_VARIABLE) {
+
+ std::vector<CG_outputRepr *> v = ir->QueryExpOperand(subscript);
+
+ IR_ArrayRef *ref = static_cast<IR_ArrayRef *>(ir->Repr2Ref(v[0]));
+ //per_index.push_back(ref->name());
+
+ subscript = ref->index(0);
+
+ }
+
+ if (ir->QueryExpOperation(subscript) == IR_OP_VARIABLE) {
+ std::vector<CG_outputRepr *> v = ir->QueryExpOperand(subscript);
+ IR_ScalarRef *ref = static_cast<IR_ScalarRef *>(ir->Repr2Ref(v[0]));
+ per_index.push_back(ref->name());
+ int j;
+ for (j = 0; j < index.size(); j++)
+ if (index[j] == ref->name())
+ break;
+
+ if (j == index.size())
+ throw ir_error("Non index variable in array expression");
+
+ int k;
+ for (k = 0; k < to_return.size(); k++)
+ if (to_return[k][0] == ref->name())
+ break;
+ if (k == to_return.size()) {
+ to_return.push_back(per_index);
+ fprintf(stderr, "adding index %s\n", ref->name().c_str());
+ }
+
+ }
+
+ }
+
+ return to_return;
+
+}
+
+/*std::vector<CG_outputRepr *> constructInspectorData(IR_Code *ir, std::vector<std::vector<std::string> > &indices){
+
+ std::vector<CG_outputRepr *> to_return;
+
+ for(int i =0; i < indices.size(); i++)
+ ir->CreateVariableDeclaration(indices[i][0]);
+ return to_return;
+ }
+
+
+ CG_outputRepr* constructInspectorFunction(IR_Code* ir, std::vector<std::vector<std::string> > &indices){
+
+ CG_outputRepr *to_return;
+
+
+
+ return to_return;
+ }
+
+*/
+
+CG_outputRepr * checkAndGenerateIndirectMappings(CG_outputBuilder * ocg,
+ std::vector<std::vector<std::string> > &indices,
+ CG_outputRepr * instance, CG_outputRepr * class_def,
+ CG_outputRepr * count_var) {
+
+ CG_outputRepr *to_return = NULL;
+
+ for (int i = 0; i < indices.size(); i++)
+ if (indices[i].size() > 1) {
+ std::string index = indices[i][indices[i].size() - 1];
+ CG_outputRepr *rep = ocg->CreateArrayRefExpression(
+ ocg->CreateDotExpression(instance,
+ ocg->lookup_member_data(class_def, index, instance)),
+ count_var);
+ for (int j = indices[i].size() - 2; j >= 0; j--)
+ rep = ocg->CreateArrayRefExpression(indices[i][j], rep);
+
+ CG_outputRepr *lhs = ocg->CreateArrayRefExpression(
+ ocg->CreateDotExpression(instance,
+ ocg->lookup_member_data(class_def, indices[i][0], instance)),
+ count_var);
+
+ to_return = ocg->StmtListAppend(to_return,
+ ocg->CreateAssignment(0, lhs, rep));
+
+ }
+
+ return to_return;
+
+}
+
+CG_outputRepr *generatePointerAssignments(CG_outputBuilder *ocg,
+ std::string prefix_name,
+ std::vector<std::vector<std::string> > &indices,
+ CG_outputRepr *instance,
+ CG_outputRepr *class_def) {
+
+ fprintf(stderr, "generatePointerAssignments()\n");
+ CG_outputRepr *list = NULL;
+
+ fprintf(stderr, "prefix '%s', %d indices\n", prefix_name.c_str(), indices.size());
+ for (int i = 0; i < indices.size(); i++) {
+
+ std::string s = prefix_name + "_" + indices[i][0];
+
+ fprintf(stderr, "s %s\n", s.c_str());
+
+ // create a variable definition for a pointer to int with this name
+ // that seems to be the only actual result of this routine ...
+ //chillAST_VarDecl *vd = new chillAST_VarDecl( "int", prefix_name.c_str(), "*", NULL);
+ //vd->print(); printf("\n"); fflush(stdout);
+ //vd->dump(); printf("\n"); fflush(stdout);
+
+ CG_outputRepr *ptr_exp = ocg->CreatePointer(s); // but dropped on the floor. unused
+ //fprintf(stderr, "ptr_exp created\n");
+
+ //CG_outputRepr *rhs = ocg->CreateDotExpression(instance,
+ // ocg->lookup_member_data(class_def, indices[i][0], instance));
+
+ //CG_outputRepr *ptr_assignment = ocg->CreateAssignment(0, ptr_exp, rhs);
+
+ //list = ocg->StmtListAppend(list, ptr_assignment);
+
+ }
+
+ fprintf(stderr, "generatePointerAssignments() DONE\n\n");
+ return list;
+}
+
+void Loop::normalize(int stmt_num, int loop_level) {
+
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument(
+
+ "invalid statement number " + to_string(stmt_num));
+
+ if (loop_level <= 0)
+ throw std::invalid_argument(
+ "12invalid loop level " + to_string(loop_level));
+ if (loop_level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument(
+ "there is no loop level " + to_string(loop_level)
+ + " for statement " + to_string(stmt_num));
+
+ apply_xform(stmt_num);
+
+ Relation r = copy(stmt[stmt_num].IS);
+
+ Relation bound = get_loop_bound(r, loop_level, this->known);
+ if (!bound.has_single_conjunct() || !bound.is_satisfiable()
+ || bound.is_tautology())
+ throw loop_error("unable to extract loop bound for normalize");
+
+ // extract the loop stride
+ coef_t stride;
+ std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound,
+ bound.set_var(loop_level));
+ if (result.second == NULL)
+ stride = 1;
+ else
+ stride = abs(result.first.get_coef(result.second))
+ / gcd(abs(result.first.get_coef(result.second)),
+ abs(result.first.get_coef(bound.set_var(loop_level))));
+
+ if (stride != 1)
+ throw loop_error(
+ "normalize currently only handles unit stride, non unit stride present in loop bounds");
+
+ GEQ_Handle lb;
+
+ Conjunct *c = bound.query_DNF()->single_conjunct();
+ for (GEQ_Iterator gi(c->GEQs()); gi; gi++) {
+ int coef = (*gi).get_coef(bound.set_var(loop_level));
+ if (coef > 0)
+ lb = *gi;
+ }
+
+ //Loop bound already zero
+ //Nothing to do.
+ if (lb.is_const(bound.set_var(loop_level)) && lb.get_const() == 0)
+ return;
+
+ if (lb.is_const_except_for_global(bound.set_var(loop_level))) {
+
+ int n = stmt[stmt_num].xform.n_out();
+
+ Relation r(n, n);
+ F_And *f_root = r.add_and();
+ for (int j = 1; j <= n; j++)
+ if (j != 2 * loop_level) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(r.input_var(j), 1);
+ h.update_coef(r.output_var(j), -1);
+ }
+
+ stmt[stmt_num].xform = Composition(r, stmt[stmt_num].xform);
+ stmt[stmt_num].xform.simplify();
+
+ for (Constr_Vars_Iter ci(lb); ci; ci++) {
+ if ((*ci).var->kind() == Global_Var) {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = stmt[stmt_num].xform.get_local(g);
+ else
+ v = stmt[stmt_num].xform.get_local(g,
+ (*ci).var->function_of());
+
+ F_And *f_super_root = stmt[stmt_num].xform.and_with_and();
+ F_Exists *f_exists = f_super_root->add_exists();
+ F_And *f_root = f_exists->add_and();
+
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(stmt[stmt_num].xform.output_var(2 * loop_level),
+ 1);
+ h.update_coef(stmt[stmt_num].xform.input_var(loop_level), -1);
+ h.update_coef(v, 1);
+
+ stmt[stmt_num].xform.simplify();
+ }
+
+ }
+
+ } else
+ throw loop_error("loop bounds too complex for normalize!");
+
+}
+
diff --git a/src/transformations/loop_datacopy.cc b/src/transformations/loop_datacopy.cc
new file mode 100644
index 0000000..12d74fd
--- /dev/null
+++ b/src/transformations/loop_datacopy.cc
@@ -0,0 +1,1369 @@
+/*****************************************************************************
+ Copyright (C) 2008 University of Southern California
+ Copyright (C) 2009-2010 University of Utah
+ All Rights Reserved.
+
+ Purpose:
+ Various data copy schemes.
+
+ Notes:
+
+ History:
+ 02/20/09 Created by Chun Chen by splitting original datacopy from loop.cc
+*****************************************************************************/
+
+#include <code_gen/codegen.h>
+#include <code_gen/CG_utils.h>
+#include "loop.hh"
+#include "omegatools.hh"
+#include "ir_code.hh"
+#include "chill_error.hh"
+
+using namespace omega;
+
+//
+// data copy function by referring arrays by numbers.
+// e.g. A[i] = A[i-1] + B[i]
+// parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i]
+//
+bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level,
+ bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+ //fprintf(stderr, "Loop::datacopy()\n");
+
+ // check for sanity of parameters
+ std::set<int> same_loop;
+ for (int i = 0; i < array_ref_nums.size(); i++) {
+ int stmt_num = array_ref_nums[i].first;
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
+ if (level <= 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+ if (i == 0) {
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ same_loop = getStatements(lex, 2*level-2);
+ }
+ else if (same_loop.find(stmt_num) == same_loop.end())
+ throw std::invalid_argument("array references for data copy must be located in the same subloop");
+ }
+
+ // convert array reference numbering scheme to actual array references
+ std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
+ for (int i = 0; i < array_ref_nums.size(); i++) {
+ if (array_ref_nums[i].second.size() == 0)
+ continue;
+
+ int stmt_num = array_ref_nums[i].first;
+ selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>()));
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code);
+ std::vector<bool> selected(refs.size(), false);
+ for (int j = 0; j < array_ref_nums[i].second.size(); j++) {
+ int ref_num = array_ref_nums[i].second[j];
+ if (ref_num < 0 || ref_num >= refs.size()) {
+ for (int k = 0; k < refs.size(); k++)
+ delete refs[k];
+ throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num));
+ }
+ selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]);
+ selected[ref_num] = true;
+ }
+ for (int j = 0; j < refs.size(); j++)
+ if (!selected[j])
+ delete refs[j];
+ }
+ if (selected_refs.size() == 0)
+ throw std::invalid_argument("found no array references to copy");
+
+ // do the copy
+ bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+ return whatever;
+}
+
+//
+// data copy function by referring arrays by name.
+// e.g. A[i] = A[i-1] + B[i]
+// parameter array_name=A means to copy data touched by A[i-1] and A[i]
+//
+bool Loop::datacopy(int stmt_num, int level, const std::string &array_name,
+ bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+
+ fflush(stdout);
+ //fprintf(stderr, "Loop::datacopy2()\n");
+ //fprintf(stderr, "array name %s stmt num %d\n", array_name.c_str(), stmt_num);
+
+ // check for sanity of parameters
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
+ if (level <= 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+
+ // collect array references by name
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ int dim = 2*level - 1;
+ std::set<int> same_loop = getStatements(lex, dim-1);
+
+ std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
+ for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) {
+ std::vector<IR_ArrayRef *> t;
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code);
+ for (int j = 0; j < refs.size(); j++)
+ if (refs[j]->name() == array_name)
+ t.push_back(refs[j]);
+ else
+ delete refs[j];
+ if (t.size() != 0)
+ selected_refs.push_back(std::make_pair(*i, t));
+ }
+
+ //fprintf(stderr, "selected refs:\n");
+ //for (int i=0; i<selected_refs.size(); i++) {
+ // //fprintf(stderr, "%d 0x%x ", selected_refs[i].first, selected_refs[i].second[0]);
+ // selected_refs[i].second[0]->Dump(); printf("\n"); fflush(stdout);
+ //}
+
+ if (selected_refs.size() == 0)
+ throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy");
+
+ IR_ArrayRef *AR = selected_refs[0].second[0];
+ //IR_roseArrayRef *RAR = (IR_roseArrayRef *)AR;
+ //fprintf(stderr, "before datacopy_privatized, ");
+ //AR->Dump();
+
+ // do the copy
+ //fprintf(stderr, "\nLoop::datacopy2 calling privatized\n");
+
+ bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+
+ //AR = selected_refs[0].second[0];
+ //fprintf(stderr, "after datacopy_privatized, ");
+ //AR->Dump();
+
+ return whatever;
+}
+
+
+bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels,
+ bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+ //fprintf(stderr, "Loop::datacopy_privatized()\n");
+
+ // check for sanity of parameters
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
+ if (level <= 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+
+ // collect array references by name
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ int dim = 2*level - 1;
+ std::set<int> same_loop = getStatements(lex, dim-1);
+
+ std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
+ for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) {
+ selected_refs.push_back(std::make_pair(*i, std::vector<IR_ArrayRef *>()));
+
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code);
+ for (int j = 0; j < refs.size(); j++)
+ if (refs[j]->name() == array_name)
+ selected_refs[selected_refs.size()-1].second.push_back(refs[j]);
+ else
+ delete refs[j];
+ }
+ if (selected_refs.size() == 0)
+ throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy");
+
+ // do the copy
+ bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+ return whatever;
+}
+
+
+bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, const std::vector<int> &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+ //fprintf(stderr, "Loop::datacopy_privatized2()\n");
+
+ // check for sanity of parameters
+ std::set<int> same_loop;
+ for (int i = 0; i < array_ref_nums.size(); i++) {
+ int stmt_num = array_ref_nums[i].first;
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
+ if (level <= 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+ if (i == 0) {
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ same_loop = getStatements(lex, 2*level-2);
+ }
+ else if (same_loop.find(stmt_num) == same_loop.end())
+ throw std::invalid_argument("array references for data copy must be located in the same subloop");
+ }
+
+ // convert array reference numbering scheme to actual array references
+ std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
+ for (int i = 0; i < array_ref_nums.size(); i++) {
+ if (array_ref_nums[i].second.size() == 0)
+ continue;
+
+ int stmt_num = array_ref_nums[i].first;
+ selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>()));
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code);
+ std::vector<bool> selected(refs.size(), false);
+ for (int j = 0; j < array_ref_nums[i].second.size(); j++) {
+ int ref_num = array_ref_nums[i].second[j];
+ if (ref_num < 0 || ref_num >= refs.size()) {
+ for (int k = 0; k < refs.size(); k++)
+ delete refs[k];
+ throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num));
+ }
+ selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]);
+ selected[ref_num] = true;
+ }
+ for (int j = 0; j < refs.size(); j++)
+ if (!selected[j])
+ delete refs[j];
+ }
+ if (selected_refs.size() == 0)
+ throw std::invalid_argument("found no array references to copy");
+
+ // do the copy
+ bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+ return whatever;
+}
+
+
+//
+// Implement low level datacopy function with lots of options.
+//
+
+bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs,
+ int level,
+ const std::vector<int> &privatized_levels,
+ bool allow_extra_read,
+ int fastest_changing_dimension,
+ int padding_stride,
+ int padding_alignment,
+ int memory_type) {
+
+ //fprintf(stderr, "\nLoop::datacopy_privatized3() *****\n");
+ //fprintf(stderr, "privatized_levels.size() %d\n", privatized_levels.size());
+ //fprintf(stderr, "level %d\n", level);
+
+ if (stmt_refs.size() == 0)
+ return true;
+
+ // check for sanity of parameters
+ IR_ArraySymbol *sym = NULL;
+ std::vector<int> lex;
+ std::set<int> active;
+ if (level <= 0)
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+ for (int i = 0; i < privatized_levels.size(); i++) {
+ if (i == 0) {
+ if (privatized_levels[i] < level)
+ throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level));
+ }
+ else if (privatized_levels[i] <= privatized_levels[i-1])
+ throw std::invalid_argument("privatized loop levels must be in ascending order");
+ }
+ for (int i = 0; i < stmt_refs.size(); i++) {
+ int stmt_num = stmt_refs[i].first;
+ active.insert(stmt_num);
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
+ if (privatized_levels.size() != 0) {
+ if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num));
+ }
+ else {
+ if (level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num));
+ }
+ for (int j = 0; j < stmt_refs[i].second.size(); j++) {
+ if (sym == NULL) {
+ sym = stmt_refs[i].second[j]->symbol();
+ lex = getLexicalOrder(stmt_num);
+ }
+ else {
+ IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol();
+ if (t->name() != sym->name()) {
+ delete t;
+ delete sym;
+ throw std::invalid_argument("try to copy data from different arrays");
+ }
+ delete t;
+ }
+ }
+ }
+
+ //fprintf(stderr, "sym %p\n", sym);
+ if (!sym) {
+ fprintf(stderr, "sym NULL, gonna die\n"); int *i=0; int j=i[0];
+ }
+
+ if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim()))
+ throw std::invalid_argument("invalid fastest changing dimension for the array to be copied");
+ if (padding_stride < 0)
+ throw std::invalid_argument("invalid temporary array stride requirement");
+ if (padding_alignment == -1 || padding_alignment == 0)
+ throw std::invalid_argument("invalid temporary array alignment requirement");
+
+ int dim = 2*level - 1;
+ int n_dim = sym->n_dim();
+
+
+ if (fastest_changing_dimension == -1)
+ switch (sym->layout_type()) {
+ case IR_ARRAY_LAYOUT_ROW_MAJOR:
+ fastest_changing_dimension = n_dim - 1;
+ break;
+ case IR_ARRAY_LAYOUT_COLUMN_MAJOR:
+ fastest_changing_dimension = 0;
+ break;
+ default:
+ throw loop_error("unsupported array layout");
+ }
+ // OK, parameter sanity checked
+
+
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+
+ // build iteration spaces for all reads and for all writes separately
+ //fprintf(stderr, "dp3: before apply_xform() ARRAY REFS\n");
+ //for (int i = 0; i < stmt_refs.size(); i++) {
+ // for (int j = 0; j < stmt_refs[i].second.size(); j++) {
+ // IR_ArrayRef *AR = stmt_refs[i].second[j];
+ // fprintf(stderr, "array ref ij %d %d ", i, j); AR->Dump(); fprintf(stderr, "\n");
+ // }
+ //}
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "stmt %d = ", i);
+ // stmt[i].code->dump();
+ // fprintf(stderr, "\n");
+ //}
+
+ apply_xform(active);
+ //fprintf(stderr, "dp3: back from apply_xform() ARRAY REFS\n");
+
+ //for (int i = 0; i < stmt_refs.size(); i++) {
+ // for (int j = 0; j < stmt_refs[i].second.size(); j++) {
+ // IR_ArrayRef *AR = stmt_refs[i].second[j];
+ // fprintf(stderr, "array ref ij %d %d ", i, j);
+ // AR->Dump();
+ // fprintf(stderr, "\n");
+ // }
+ //}
+
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "stmt %d = ", i);
+ // stmt[i].code->dump();
+ // fprintf(stderr, "\n");
+ //}
+
+
+ bool has_write_refs = false;
+ bool has_read_refs = false;
+ Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim);
+ Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim);
+ //fprintf(stderr, "\n\ni range: 0-%d\n", -1 + stmt_refs.size());
+ int stmt_num = stmt_refs[0].first;
+ for (int i = 0; i < stmt_refs.size(); i++) {
+ int stmt_num = stmt_refs[i].first;
+
+ //fprintf(stderr, "j range: 0-%d\n", -1 + stmt_refs[i].second.size());
+
+ for (int j = 0; j < stmt_refs[i].second.size(); j++) {
+ //fprintf(stderr, "ij %d %d\n", i, j);
+
+ Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim);
+ for (int k = 1; k <= mapping.n_inp(); k++)
+ mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name());
+ mapping.setup_names();
+ mapping.print(); fflush(stdout); // "{[I] -> [_t1] : I = _t1 }
+
+ F_And *f_root = mapping.add_and();
+ for (int k = 1; k <= level-1; k++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.input_var(k), 1);
+ h.update_coef(mapping.output_var(k), -1);
+ }
+ for (int k = 0; k < privatized_levels.size(); k++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.input_var(privatized_levels[k]), 1);
+ h.update_coef(mapping.output_var(level+k), -1);
+ }
+ for (int k = 0; k < n_dim; k++) {
+ IR_ArrayRef *AR = stmt_refs[i].second[j];
+ //fprintf(stderr, "array ref ");
+ AR->Dump();
+
+ CG_outputRepr *repr = stmt_refs[i].second[j]->index(k);
+ //fprintf(stderr, "k %d j %d repr ", k, j); repr->dump(); fflush(stdout);
+
+ exp2formula(ir,
+ mapping,
+ f_root,
+ freevar,
+ repr,
+ mapping.output_var(level-1+privatized_levels.size()+k+1),
+ 'w',
+ IR_COND_EQ,
+ false,
+ uninterpreted_symbols[stmt_num],
+ uninterpreted_symbols_stringrepr[stmt_num]);
+ repr->clear();
+ delete repr;
+ }
+ Relation r = omega::Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set()))));
+ if (stmt_refs[i].second[j]->is_write()) {
+ has_write_refs = true;
+ wo_copy_is = Union(wo_copy_is, r);
+ wo_copy_is.simplify(2, 4);
+
+
+ }
+ else {
+ has_read_refs = true;
+ ro_copy_is = Union(ro_copy_is, r);
+ ro_copy_is.simplify(2, 4);
+
+ }
+ }
+ }
+
+ //fprintf(stderr, "dp3: simplify\n");
+ // simplify read and write footprint iteration space
+ {
+ if (allow_extra_read)
+ ro_copy_is = SimpleHull(ro_copy_is, true, true);
+ else
+ ro_copy_is = ConvexRepresentation(ro_copy_is);
+
+ wo_copy_is = ConvexRepresentation(wo_copy_is);
+ if (wo_copy_is.number_of_conjuncts() > 1) {
+ Relation t = SimpleHull(wo_copy_is, true, true);
+ if (Must_Be_Subset(copy(t), copy(ro_copy_is)))
+ wo_copy_is = t;
+ else if (Must_Be_Subset(copy(wo_copy_is), copy(ro_copy_is)))
+ wo_copy_is = ro_copy_is;
+ }
+ }
+
+ // make copy statement variable names match the ones in the original statements which
+ // already have the same names due to apply_xform
+ {
+ int ref_stmt = *active.begin();
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++)
+ if (stmt[*i].IS.n_set() > stmt[ref_stmt].IS.n_set())
+ ref_stmt = *i;
+ for (int i = 1; i < level; i++) {
+ std::string s = stmt[ref_stmt].IS.input_var(i)->name();
+ wo_copy_is.name_set_var(i, s);
+ ro_copy_is.name_set_var(i, s);
+ }
+ for (int i = 0; i < privatized_levels.size(); i++) {
+ std::string s = stmt[ref_stmt].IS.input_var(privatized_levels[i])->name();
+ wo_copy_is.name_set_var(level+i, s);
+ ro_copy_is.name_set_var(level+i, s);
+ }
+ for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) {
+ std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size());
+ wo_copy_is.name_set_var(i, s);
+ ro_copy_is.name_set_var(i, s);
+ }
+ tmp_loop_var_name_counter += n_dim;
+ wo_copy_is.setup_names();
+ ro_copy_is.setup_names();
+ }
+
+ //fprintf(stderr, "\ndp3: build merged\n");
+ // build merged footprint iteration space for calculating temporary array size
+ Relation copy_is = SimpleHull(Union(copy(ro_copy_is), copy(wo_copy_is)), true, true);
+
+ // extract temporary array information
+ CG_outputBuilder *ocg = ir->builder();
+ std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL
+ std::vector<coef_t> index_stride(n_dim);
+ std::vector<bool> is_index_eq(n_dim, false);
+ std::vector<std::pair<int, CG_outputRepr *> > index_sz(0);
+ Relation reduced_copy_is = copy(copy_is);
+
+ for (int i = 0; i < n_dim; i++) {
+ //fprintf(stderr, "i %d/%d\n", i, n_dim);
+ if (i != 0)
+ reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var);
+ Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i);
+
+ //fprintf(stderr, "dp3: extract stride\n");
+ // extract stride
+ std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, bound.set_var(level-1+privatized_levels.size()+i+1));
+ if (result.second != NULL)
+ index_stride[i] = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1))));
+ else
+ index_stride[i] = 1;
+ //fprintf(stderr, "dp3: index_stride[%d] = %d\n", i, index_stride[i]);
+
+ // check if this array index requires loop
+ Conjunct *c = bound.query_DNF()->single_conjunct();
+ for (EQ_Iterator ei(c->EQs()); ei; ei++) {
+ //fprintf(stderr, "dp3: for\n");
+ if ((*ei).has_wildcards())
+ continue;
+
+ //fprintf(stderr, "dp3: no wildcards\n");
+ int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1));
+ if (coef != 0) {
+ //fprintf(stderr, "coef != 0\n");
+ int sign = 1;
+ if (coef < 0) {
+ //fprintf(stderr, "coef < 0\n");
+ coef = -coef;
+ sign = -1;
+ }
+
+ CG_outputRepr *op = NULL;
+ for (Constr_Vars_Iter ci(*ei); ci; ci++) {
+ //fprintf(stderr, "dp3: ci\n");
+ switch ((*ci).var->kind()) {
+ case Input_Var:
+ {
+ //fprintf(stderr, "dp3: Input_Var\n");
+ if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) {
+ //fprintf(stderr, "dp3: IF sign %d\n",(*ci).coef*sign);
+
+ if ((*ci).coef*sign == 1)
+ op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name()));
+ else if ((*ci).coef*sign == -1)
+ op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name()));
+ else if ((*ci).coef*sign > 1)
+ op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name())));
+ else // (*ci).coef*sign < -1
+ op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name())));
+ }
+ break;
+ }
+ case Global_Var:
+ {
+ //fprintf(stderr, "dp3: Global_Var\n");
+ Global_Var_ID g = (*ci).var->get_global_var();
+ if ((*ci).coef*sign == 1)
+ op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef*sign == -1)
+ op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef*sign > 1)
+ op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name())));
+ else // (*ci).coef*sign < -1
+ op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name())));
+ break;
+ }
+ default:
+ throw loop_error("unsupported array index expression");
+ }
+ }
+ if ((*ei).get_const() != 0)
+ op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const())));
+ if (coef != 1)
+ op = ocg->CreateIntegerFloor(op, ocg->CreateInt(coef));
+
+ index_lb[i] = op;
+ is_index_eq[i] = true;
+ break;
+ }
+ }
+ if (is_index_eq[i])
+ continue;
+
+ //fprintf(stderr, "dp3: separate lower and upper bounds\n");
+ // separate lower and upper bounds
+ std::vector<GEQ_Handle> lb_list, ub_list;
+ std::set<Variable_ID> excluded_floor_vars;
+ excluded_floor_vars.insert(bound.set_var(level-1+privatized_levels.size()+i+1));
+ for (GEQ_Iterator gi(c->GEQs()); gi; gi++) {
+ int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1));
+ if (coef != 0 && (*gi).has_wildcards()) {
+ bool clean_bound = true;
+ GEQ_Handle h;
+ for (Constr_Vars_Iter cvi(*gi, true); gi; gi++)
+ if (!find_floor_definition(bound, (*cvi).var, excluded_floor_vars).first) {
+ clean_bound = false;
+ break;
+ }
+ if (!clean_bound)
+ continue;
+ }
+
+ if (coef > 0)
+ lb_list.push_back(*gi);
+ else if (coef < 0)
+ ub_list.push_back(*gi);
+ }
+ if (lb_list.size() == 0 || ub_list.size() == 0)
+ throw loop_error("failed to calcuate array footprint size");
+
+ //fprintf(stderr, "dp3: build lower bound representation\n");
+ // build lower bound representation
+ std::vector<CG_outputRepr *> lb_repr_list;
+ for (int j = 0; j < lb_list.size(); j++){
+ if(this->known.n_set() == 0) {
+ lb_repr_list.push_back(output_lower_bound_repr(ocg,
+ lb_list[j],
+ bound.set_var(level-1+privatized_levels.size()+i+1),
+ result.first,
+ result.second,
+ bound,
+ Relation::True(bound.n_set()),
+ std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)),
+ uninterpreted_symbols[stmt_num]));
+ }
+ else {
+ lb_repr_list.push_back(output_lower_bound_repr(ocg,
+ lb_list[j],
+ bound.set_var(level-1+privatized_levels.size()+i+1),
+ result.first,
+ result.second,
+ bound,
+ this->known,
+ std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)),
+ uninterpreted_symbols[stmt_num]));
+ }
+ }
+ if (lb_repr_list.size() > 1) {
+ //fprintf(stderr, "loop_datacopy.cc dp3 createInvoke( max )\n");
+ index_lb[i] = ocg->CreateInvoke("max", lb_repr_list);
+ }
+ else if (lb_repr_list.size() == 1)
+ index_lb[i] = lb_repr_list[0];
+
+ //fprintf(stderr, "dp3: build temporary array size representation\n");
+ // build temporary array size representation
+ {
+ Relation cal(copy_is.n_set(), 1);
+ F_And *f_root = cal.add_and();
+ for (int j = 0; j < ub_list.size(); j++)
+ for (int k = 0; k < lb_list.size(); k++) {
+ GEQ_Handle h = f_root->add_GEQ();
+
+ for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var:
+ {
+ int pos = (*ci).var->get_position();
+ h.update_coef(cal.input_var(pos), (*ci).coef);
+ break;
+ }
+ case Global_Var:
+ {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = cal.get_local(g);
+ else
+ v = cal.get_local(g, (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot calculate temporay array size statically");
+ }
+ }
+ h.update_const(ub_list[j].get_const());
+
+ for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var:
+ {
+ int pos = (*ci).var->get_position();
+ h.update_coef(cal.input_var(pos), (*ci).coef);
+ break;
+ }
+ case Global_Var:
+ {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = cal.get_local(g);
+ else
+ v = cal.get_local(g, (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot calculate temporay array size statically");
+ }
+ }
+ h.update_const(lb_list[k].get_const());
+
+ h.update_const(1);
+ h.update_coef(cal.output_var(1), -1);
+ }
+
+ cal = Restrict_Domain(cal, copy(copy_is));
+ for (int j = 1; j <= cal.n_inp(); j++)
+ cal = Project(cal, j, Input_Var);
+ cal.simplify();
+
+ //fprintf(stderr, "dp3: pad temporary array size\n");
+ // pad temporary array size
+ // TODO: for variable array size, create padding formula
+ Conjunct *c = cal.query_DNF()->single_conjunct();
+ bool is_index_bound_const = false;
+ for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++)
+ if ((*gi).is_const(cal.output_var(1))) {
+ coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1)));
+ if (padding_stride != 0) {
+ size = (size + index_stride[i] - 1) / index_stride[i];
+ if (i == fastest_changing_dimension)
+ size = size * padding_stride;
+ }
+ if (i == fastest_changing_dimension) {
+ if (padding_alignment > 1) { // align to boundary for data packing
+ int residue = size % padding_alignment;
+ if (residue)
+ size = size+padding_alignment-residue;
+ }
+ else if (padding_alignment < -1) { // un-alignment for memory bank conflicts
+ while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1)
+ size++;
+ }
+ }
+ index_sz.push_back(std::make_pair(i, ocg->CreateInt(size)));
+ is_index_bound_const = true;
+ }
+
+ if (!is_index_bound_const) {
+ for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) {
+ int coef = (*gi).get_coef(cal.output_var(1));
+ if (coef < 0) {
+ CG_outputRepr *op = NULL;
+ for (Constr_Vars_Iter ci(*gi); ci; ci++) {
+ if ((*ci).var != cal.output_var(1)) {
+ switch((*ci).var->kind()) {
+ case Global_Var:
+ {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ if ((*ci).coef == 1)
+ op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef == -1)
+ op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef > 1)
+ op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name())));
+ else // (*ci).coef < -1
+ op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name())));
+ break;
+ }
+ default:
+ throw loop_error("failed to generate array index bound code");
+ }
+ }
+ }
+ int c = (*gi).get_const();
+ if (c > 0)
+ op = ocg->CreatePlus(op, ocg->CreateInt(c));
+ else if (c < 0)
+ op = ocg->CreateMinus(op, ocg->CreateInt(-c));
+ if (padding_stride != 0) {
+ if (i == fastest_changing_dimension) {
+ coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride));
+ coef_t t1 = index_stride[i] / g;
+ if (t1 != 1)
+ op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1));
+ coef_t t2 = padding_stride / g;
+ if (t2 != 1)
+ op = ocg->CreateTimes(op, ocg->CreateInt(t2));
+ }
+ else if (index_stride[i] != 1) {
+ op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i]));
+ }
+ }
+
+ index_sz.push_back(std::make_pair(i, op));
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ //fprintf(stderr, "dp3: change the temporary array index order\n");
+ // change the temporary array index order
+ for (int i = 0; i < index_sz.size(); i++) {
+ if (index_sz[i].first == fastest_changing_dimension)
+ switch (sym->layout_type()) {
+ case IR_ARRAY_LAYOUT_ROW_MAJOR:
+ std::swap(index_sz[index_sz.size()-1], index_sz[i]);
+ break;
+ case IR_ARRAY_LAYOUT_COLUMN_MAJOR:
+ std::swap(index_sz[0], index_sz[i]);
+ break;
+ default:
+ throw loop_error("unsupported array layout");
+ }
+ }
+
+ //fprintf(stderr, "dp3: declare temporary array or scalar\n");
+ // declare temporary array or scalar
+ IR_Symbol *tmp_sym;
+ if (index_sz.size() == 0) {
+ //fprintf(stderr, "tmp_sym is a scalar\n");
+ tmp_sym = ir->CreateScalarSymbol(sym, memory_type);
+ }
+ else {
+ //fprintf(stderr, "tmp_sym is an array\n");
+ std::vector<CG_outputRepr *> tmp_array_size(index_sz.size());
+ for (int i = 0; i < index_sz.size(); i++) {
+ tmp_array_size[i] = index_sz[i].second->clone();
+ index_sz[i].second->dump(); // THIS PRINTF
+ }
+ tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type);
+ }
+
+ //fprintf(stderr, "dp3: create temporary array read initialization code\n");
+ // create temporary array read initialization code
+ CG_outputRepr *copy_code_read;
+ if (has_read_refs) {
+ //fprintf(stderr, "has read refs\n");
+ if (index_sz.size() == 0) {
+ //fprintf(stderr, "if\n");
+
+ //fprintf(stderr, "tmp sym %s\n", tmp_sym->name().c_str());
+ IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); // create ref from symbol
+ // tmp_scalar_ref is incomplete
+
+ std::vector<CG_outputRepr *> rhs_index(n_dim);
+ for (int i = 0; i < index_lb.size(); i++) {
+ //fprintf(stderr, "i %d\n", i);
+ if (is_index_eq[i])
+ rhs_index[i] = index_lb[i]->clone();
+ else
+ rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ }
+ IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index);
+
+ // IR_ScalarRef tmp_scalar_ref has no actual reference yet. It only has the variable definition.
+ copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert());
+ //fprintf(stderr, "if ends\n");
+ }
+ else {
+ //fprintf(stderr, "else\n");
+ std::vector<CG_outputRepr *> lhs_index(index_sz.size());
+ for (int i = 0; i < index_sz.size(); i++) {
+ int cur_index_num = index_sz[i].first;
+ CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone());
+ if (padding_stride != 0) {
+ if (i == n_dim-1) {
+ coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride));
+ coef_t t1 = index_stride[cur_index_num] / g;
+ if (t1 != 1)
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1));
+ coef_t t2 = padding_stride / g;
+ if (t2 != 1)
+ cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2));
+ }
+ else if (index_stride[cur_index_num] != 1) {
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num]));
+ }
+ }
+
+ if (ir->ArrayIndexStartAt() != 0)
+ cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt()));
+ lhs_index[i] = cur_index_repr;
+ }
+
+ //fprintf(stderr, "dp3: making tmp_array_ref\n");
+ IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index);
+ //fprintf(stderr, "dp3: DONE making tmp_array_ref\n");
+
+ std::vector<CG_outputRepr *> rhs_index(n_dim);
+ for (int i = 0; i < index_lb.size(); i++)
+ if (is_index_eq[i])
+ rhs_index[i] = index_lb[i]->clone();
+ else
+ rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index);
+
+ //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment\n");
+ //copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert());
+ CG_outputRepr *lhs = tmp_array_ref->convert();
+ CG_outputRepr *rhs = copied_array_ref->convert();
+ copy_code_read = ir->builder()->CreateAssignment(0, lhs, rhs); //tmp_array_ref->convert(), copied_array_ref->convert());
+ //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment DONE\n\n");
+ }
+ } // has read refs
+
+ //fprintf(stderr, "dp3: create temporary array write back code\n");
+ // create temporary array write back code
+ CG_outputRepr *copy_code_write;
+ if (has_write_refs) {
+ //fprintf(stderr, "has_write_refs\n");
+ if (index_sz.size() == 0) {
+ //fprintf(stderr, "index_sz.size() == 0\n");
+ IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym));
+
+ std::vector<CG_outputRepr *> rhs_index(n_dim);
+ for (int i = 0; i < index_lb.size(); i++)
+ if (is_index_eq[i])
+ rhs_index[i] = index_lb[i]->clone();
+ else
+ rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index);
+
+ copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert());
+ }
+ else {
+ //fprintf(stderr, "index_sz.size() NOT = 0\n");
+
+ std::vector<CG_outputRepr *> lhs_index(n_dim);
+ for (int i = 0; i < index_lb.size(); i++)
+ if (is_index_eq[i])
+ lhs_index[i] = index_lb[i]->clone();
+ else
+ lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index);
+
+ std::vector<CG_outputRepr *> rhs_index(index_sz.size());
+ for (int i = 0; i < index_sz.size(); i++) {
+ int cur_index_num = index_sz[i].first;
+ CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone());
+ if (padding_stride != 0) {
+ if (i == n_dim-1) {
+ coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride));
+ coef_t t1 = index_stride[cur_index_num] / g;
+ if (t1 != 1)
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1));
+ coef_t t2 = padding_stride / g;
+ if (t2 != 1)
+ cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2));
+ }
+ else if (index_stride[cur_index_num] != 1) {
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num]));
+ }
+ }
+
+ if (ir->ArrayIndexStartAt() != 0)
+ cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt()));
+ rhs_index[i] = cur_index_repr;
+ }
+ IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index);
+
+ copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert());
+ }
+ } // has write refs
+
+ // now we can remove those loops for array indexes that are
+ // dependent on others
+ //fprintf(stderr, "dp3: now we can remove those loops\n");
+ if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) {
+ Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size());
+ F_And *f_root = mapping.add_and();
+ for (int i = 1; i <= level-1+privatized_levels.size(); i++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.input_var(i), 1);
+ h.update_coef(mapping.output_var(i), -1);
+ }
+
+ int cur_index = 0;
+ std::vector<int> mapped_index(index_sz.size());
+ for (int i = 0; i < n_dim; i++)
+ if (!is_index_eq[i]) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1);
+ switch (sym->layout_type()) {
+ case IR_ARRAY_LAYOUT_COLUMN_MAJOR: {
+ h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1);
+ mapped_index[index_sz.size()-cur_index-1] = i;
+ break;
+ }
+ case IR_ARRAY_LAYOUT_ROW_MAJOR: {
+ h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1);
+ mapped_index[cur_index] = i;
+ break;
+ }
+ default:
+ throw loop_error("unsupported array layout");
+ }
+ cur_index++;
+ }
+
+ wo_copy_is = omega::Range(Restrict_Domain(copy(mapping), wo_copy_is));
+ ro_copy_is = omega::Range(Restrict_Domain(copy(mapping), ro_copy_is));
+ for (int i = 1; i <= level-1+privatized_levels.size(); i++) {
+ wo_copy_is.name_set_var(i, copy_is.set_var(i)->name());
+ ro_copy_is.name_set_var(i, copy_is.set_var(i)->name());
+ }
+ for (int i = 0; i < index_sz.size(); i++) {
+ wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name());
+ ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name());
+ }
+ wo_copy_is.setup_names();
+ ro_copy_is.setup_names();
+ }
+
+ // insert read copy statement
+ //fprintf(stderr, "dp3: insert read copy statement\n");
+
+ int old_num_stmt = stmt.size();
+ int ro_copy_stmt_num = -1;
+ if (has_read_refs) {
+ Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1);
+ {
+ F_And *f_root = copy_xform.add_and();
+ for (int i = 1; i <= ro_copy_is.n_set(); i++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.input_var(i), 1);
+ h.update_coef(copy_xform.output_var(2*i), -1);
+ }
+ for (int i = 1; i <= dim; i+=2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.output_var(i), -1);
+ h.update_const(lex[i-1]);
+ }
+ for (int i = dim+2; i <= copy_xform.n_out(); i+=2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.output_var(i), 1);
+ }
+ }
+
+ Statement copy_stmt_read;
+ copy_stmt_read.IS = ro_copy_is;
+ copy_stmt_read.xform = copy_xform;
+ copy_stmt_read.code = copy_code_read;
+ //fprintf(stderr, "dp3: copy_stmt_read.code = \n");
+ copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set());
+ copy_stmt_read.ir_stmt_node = NULL;
+ copy_stmt_read.has_inspector = false;
+ for (int i = 0; i < level-1; i++) {
+ copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type;
+ if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile &&
+ stmt[*(active.begin())].loop_level[i].payload >= level) {
+ int j;
+ for (j = 0; j < privatized_levels.size(); j++)
+ if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload)
+ break;
+ if (j == privatized_levels.size())
+ copy_stmt_read.loop_level[i].payload = -1;
+ else
+ copy_stmt_read.loop_level[i].payload = level + j;
+ }
+ else
+ copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload;
+ copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level;
+ }
+ for (int i = 0; i < privatized_levels.size(); i++) {
+ copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type;
+ copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload;
+ copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level;
+ }
+ int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1);
+ for (int i = 0; i < std::min(left_num_dim, static_cast<int>(index_sz.size())); i++) {
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal;
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i;
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ }
+ for (int i = std::min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) {
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown;
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1;
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ }
+
+
+ shiftLexicalOrder(lex, dim-1, 1);
+
+ fprintf(stderr, "loop_datacopy.cc L1071 adding stmt %d\n", stmt.size());
+ stmt.push_back(copy_stmt_read);
+
+ uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]);
+ uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[*(active.begin())]);
+ ro_copy_stmt_num = stmt.size() - 1;
+ dep.insert();
+ }
+
+ //fprintf(stderr, "dp3: insert write copy statement\n");
+ // insert write copy statement
+ int wo_copy_stmt_num = -1;
+ if (has_write_refs) {
+ Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1);
+ {
+ F_And *f_root = copy_xform.add_and();
+ for (int i = 1; i <= wo_copy_is.n_set(); i++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.input_var(i), 1);
+ h.update_coef(copy_xform.output_var(2*i), -1);
+ }
+ for (int i = 1; i <= dim; i+=2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.output_var(i), -1);
+ h.update_const(lex[i-1]);
+ }
+ for (int i = dim+2; i <= copy_xform.n_out(); i+=2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.output_var(i), 1);
+ }
+ }
+
+ Statement copy_stmt_write;
+ copy_stmt_write.IS = wo_copy_is;
+ copy_stmt_write.xform = copy_xform;
+ copy_stmt_write.code = copy_code_write;
+ copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set());
+ copy_stmt_write.ir_stmt_node = NULL;
+ copy_stmt_write.has_inspector = false;
+
+ for (int i = 0; i < level-1; i++) {
+ copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type;
+ if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile &&
+ stmt[*(active.begin())].loop_level[i].payload >= level) {
+ int j;
+ for (j = 0; j < privatized_levels.size(); j++)
+ if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload)
+ break;
+ if (j == privatized_levels.size())
+ copy_stmt_write.loop_level[i].payload = -1;
+ else
+ copy_stmt_write.loop_level[i].payload = level + j;
+ }
+ else
+ copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload;
+ copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level;
+ }
+ for (int i = 0; i < privatized_levels.size(); i++) {
+ copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type;
+ copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload;
+ copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level;
+ }
+ int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1);
+ for (int i = 0; i < std::min(left_num_dim, static_cast<int>(index_sz.size())); i++) {
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal;
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i;
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ }
+ for (int i = std::min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) {
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown;
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1;
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ }
+ lex[dim-1]++;
+ shiftLexicalOrder(lex, dim-1, -2);
+
+ fprintf(stderr, "loop_datacopy.cc L1147 adding stmt %d\n", stmt.size());
+ stmt.push_back(copy_stmt_write);
+
+ uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]);
+ uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[*(active.begin())]);
+ wo_copy_stmt_num = stmt.size() - 1;
+ dep.insert();
+ }
+
+ //fprintf(stderr, "replace original array accesses with temporary array accesses\n");
+ // replace original array accesses with temporary array accesses
+ for (int i =0; i < stmt_refs.size(); i++)
+ for (int j = 0; j < stmt_refs[i].second.size(); j++) {
+ if (index_sz.size() == 0) {
+ IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym));
+ //fprintf(stderr, "dp3: loop_datacopy.cc calling ReplaceExpression i%d j%d\n", i, j);
+ ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert());
+ }
+ else {
+ std::vector<CG_outputRepr *> index_repr(index_sz.size());
+ for (int k = 0; k < index_sz.size(); k++) {
+ int cur_index_num = index_sz[k].first;
+
+ CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone());
+ if (padding_stride != 0) {
+ if (k == n_dim-1) {
+ coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride));
+ coef_t t1 = index_stride[cur_index_num] / g;
+ if (t1 != 1)
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1));
+ coef_t t2 = padding_stride / g;
+ if (t2 != 1)
+ cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2));
+ }
+ else if (index_stride[cur_index_num] != 1) {
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num]));
+ }
+ }
+
+ if (ir->ArrayIndexStartAt() != 0)
+ cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt()));
+ index_repr[k] = cur_index_repr;
+ }
+
+ IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr);
+ //fprintf(stderr, "loop_datacopy.cc ir->ReplaceExpression( ... )\n");
+ ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert());
+ }
+ }
+
+ // update dependence graph
+ //fprintf(stderr, "update dependence graph\n");
+
+ int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1;
+ if (ro_copy_stmt_num != -1) {
+ for (int i = 0; i < old_num_stmt; i++) {
+ std::vector<std::vector<DependenceVector> > D;
+
+ for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) {
+ if (active.find(i) != active.end() && active.find(j->first) == active.end()) {
+ std::vector<DependenceVector> dvs1, dvs2;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W))
+ dvs1.push_back(dv);
+ else
+ dvs2.push_back(dv);
+ }
+ j->second = dvs2;
+ if (dvs1.size() > 0)
+ dep.connect(ro_copy_stmt_num, j->first, dvs1);
+ }
+ else if (active.find(i) == active.end() && active.find(j->first) != active.end()) {
+ std::vector<DependenceVector> dvs1, dvs2;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R))
+ dvs1.push_back(dv);
+ else
+ dvs2.push_back(dv);
+ }
+ j->second = dvs2;
+ if (dvs1.size() > 0)
+ D.push_back(dvs1);
+ }
+
+ if (j->second.size() == 0)
+ dep.vertex[i].second.erase(j++);
+ else
+ j++;
+ }
+
+ for (int j = 0; j < D.size(); j++)
+ dep.connect(i, ro_copy_stmt_num, D[j]);
+ }
+
+ // insert dependences from copy statement loop to copied statements
+ //fprintf(stderr, "insert dependences from copy statement loop to copied statements\n");
+
+ DependenceVector dv;
+ dv.type = DEP_W2R;
+ dv.sym = tmp_sym->clone();
+ dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0);
+ dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0);
+ for (int i = dep_dim; i < dep.num_dim(); i++) {
+ dv.lbounds[i] = -posInfinity;
+ dv.ubounds[i] = posInfinity;
+ }
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++)
+ dep.connect(ro_copy_stmt_num, *i, dv);
+ }
+
+ if (wo_copy_stmt_num != -1) {
+ for (int i = 0; i < old_num_stmt; i++) {
+ std::vector<std::vector<DependenceVector> > D;
+
+ for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) {
+ if (active.find(i) != active.end() && active.find(j->first) == active.end()) {
+ std::vector<DependenceVector> dvs1, dvs2;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W))
+ dvs1.push_back(dv);
+ else
+ dvs2.push_back(dv);
+ }
+ j->second = dvs2;
+ if (dvs1.size() > 0)
+ dep.connect(wo_copy_stmt_num, j->first, dvs1);
+ }
+ else if (active.find(i) == active.end() && active.find(j->first) != active.end()) {
+ std::vector<DependenceVector> dvs1, dvs2;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W))
+ dvs1.push_back(dv);
+ else
+ dvs2.push_back(dv);
+ }
+ j->second = dvs2;
+ if (dvs1.size() > 0)
+ D.push_back(dvs1);
+ }
+
+ if (j->second.size() == 0)
+ dep.vertex[i].second.erase(j++);
+ else
+ j++;
+ }
+
+ for (int j = 0; j < D.size(); j++)
+ dep.connect(i, wo_copy_stmt_num, D[j]);
+ }
+
+ // insert dependences from copied statements to write statements
+ //fprintf(stderr, "dp3: insert dependences from copied statements to write statements\n");
+
+ DependenceVector dv;
+ dv.type = DEP_W2R;
+ dv.sym = tmp_sym->clone();
+ dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0);
+ dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0);
+ for (int i = dep_dim; i < dep.num_dim(); i++) {
+ dv.lbounds[i] = -posInfinity;
+ dv.ubounds[i] = posInfinity;
+ }
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++)
+ dep.connect(*i, wo_copy_stmt_num, dv);
+
+ }
+
+ // update variable name for dependences among copied statements
+ for (int i = 0; i < old_num_stmt; i++) {
+ if (active.find(i) != active.end())
+ for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++)
+ if (active.find(j->first) != active.end())
+ for (int k = 0; k < j->second.size(); k++) {
+ IR_Symbol *s = tmp_sym->clone();
+ j->second[k].sym = s;
+ }
+ }
+
+ // insert anti-dependence from write statement to read statement
+ if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1)
+ if (dep_dim >= 0) {
+ DependenceVector dv;
+ dv.type = DEP_R2W;
+ dv.sym = tmp_sym->clone();
+ dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0);
+ dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0);
+ for (int k = dep_dim; k < dep.num_dim(); k++) {
+ dv.lbounds[k] = -posInfinity;
+ dv.ubounds[k] = posInfinity;
+ }
+ for (int k = 0; k < dep_dim; k++) {
+ if (k != 0) {
+ dv.lbounds[k-1] = 0;
+ dv.ubounds[k-1] = 0;
+ }
+ dv.lbounds[k] = 1;
+ dv.ubounds[k] = posInfinity;
+ dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv);
+ }
+ }
+
+ //fprintf(stderr, "Loop::datacopy_privatized3() cleanup\n");
+ // cleanup
+ delete sym;
+ delete tmp_sym;
+ for (int i = 0; i < index_lb.size(); i++) {
+ index_lb[i]->clear();
+ delete index_lb[i];
+ }
+ for (int i = 0; i < index_sz.size(); i++) {
+ index_sz[i].second->clear();
+ delete index_sz[i].second;
+ }
+
+ return true;
+}
+
+
+
diff --git a/src/transformations/loop_extra.cc b/src/transformations/loop_extra.cc
new file mode 100644
index 0000000..dac05bf
--- /dev/null
+++ b/src/transformations/loop_extra.cc
@@ -0,0 +1,224 @@
+/*****************************************************************************
+ Copyright (C) 2010 University of Utah
+ All Rights Reserved.
+
+ Purpose:
+ Additional loop transformations.
+
+ Notes:
+
+ History:
+ 07/31/10 Created by Chun Chen
+*****************************************************************************/
+
+#include <code_gen/codegen.h>
+#include <code_gen/CG_utils.h>
+#include "loop.hh"
+#include "omegatools.hh"
+#include "ir_code.hh"
+#include "chill_error.hh"
+
+using namespace omega;
+
+
+void Loop::shift_to(int stmt_num, int level, int absolute_position) {
+ // combo
+ tile(stmt_num, level, 1, level, CountedTile);
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ std::set<int> active = getStatements(lex, 2*level-2);
+ shift(active, level, absolute_position);
+
+ // remove unnecessary tiled loop since tile size is one
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
+ int n = stmt[*i].xform.n_out();
+ Relation mapping(n, n-2);
+ F_And *f_root = mapping.add_and();
+ for (int j = 1; j <= 2*level; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(j), 1);
+ h.update_coef(mapping.input_var(j), -1);
+ }
+ for (int j = 2*level+3; j <= n; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(j-2), 1);
+ h.update_coef(mapping.input_var(j), -1);
+ }
+ stmt[*i].xform = Composition(mapping, stmt[*i].xform);
+ stmt[*i].xform.simplify();
+
+ for (int j = 0; j < stmt[*i].loop_level.size(); j++)
+ if (j != level-1 &&
+ stmt[*i].loop_level[j].type == LoopLevelTile &&
+ stmt[*i].loop_level[j].payload >= level)
+ stmt[*i].loop_level[j].payload--;
+
+ stmt[*i].loop_level.erase(stmt[*i].loop_level.begin()+level-1);
+ }
+}
+
+
+std::set<int> Loop::unroll_extra(int stmt_num, int level, int unroll_amount, int cleanup_split_level) {
+ std::set<int> cleanup_stmts = unroll(stmt_num, level, unroll_amount,std::vector< std::vector<std::string> >(), cleanup_split_level);
+ for (std::set<int>::iterator i = cleanup_stmts.begin(); i != cleanup_stmts.end(); i++)
+ unroll(*i, level, 0);
+
+ return cleanup_stmts;
+}
+
+void Loop::peel(int stmt_num, int level, int peel_amount) {
+ // check for sanity of parameters
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
+ if (level <= 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+
+ if (peel_amount == 0)
+ return;
+
+ std::set<int> subloop = getSubLoopNest(stmt_num, level);
+ std::vector<Relation> Rs;
+ for (std::set<int>::iterator i = subloop.begin(); i != subloop.end(); i++) {
+ Relation r = getNewIS(*i);
+ Relation f(r.n_set(), level);
+ F_And *f_root = f.add_and();
+ for (int j = 1; j <= level; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(f.input_var(2*j), 1);
+ h.update_coef(f.output_var(j), -1);
+ }
+ r = Composition(f, r);
+ r.simplify();
+ Rs.push_back(r);
+ }
+ Relation hull = SimpleHull(Rs);
+
+ if (peel_amount > 0) {
+ GEQ_Handle bound_eq;
+ bool found_bound = false;
+ for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++)
+ if (!(*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) > 0) {
+ bound_eq = *e;
+ found_bound = true;
+ break;
+ }
+ if (!found_bound)
+ for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++)
+ if ((*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) > 0) {
+ bool is_bound = true;
+ for (Constr_Vars_Iter cvi(*e, true); cvi; cvi++) {
+ std::pair<bool, GEQ_Handle> result = find_floor_definition(hull, cvi.curr_var());
+ if (!result.first) {
+ is_bound = false;
+ break;
+ }
+ }
+ if (is_bound) {
+ bound_eq = *e;
+ found_bound = true;
+ break;
+ }
+ }
+ if (!found_bound)
+ throw loop_error("can't find lower bound for peeling at loop level " + to_string(level));
+
+ for (int i = 1; i <= peel_amount; i++) {
+ Relation r(level);
+ F_Exists *f_exists = r.add_and()->add_exists();
+ F_And *f_root = f_exists->add_and();
+ GEQ_Handle h = f_root->add_GEQ();
+ std::map<Variable_ID, Variable_ID> exists_mapping;
+ for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++)
+ switch (cvi.curr_var()->kind()) {
+ case Input_Var:
+ h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef());
+ break;
+ case Wildcard_Var: {
+ Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping);
+ h.update_coef(v, cvi.curr_coef());
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = cvi.curr_var()->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = r.get_local(g);
+ else
+ v = r.get_local(g, cvi.curr_var()->function_of());
+ h.update_coef(v, cvi.curr_coef());
+ break;
+ }
+ default:
+ assert(false);
+ }
+ h.update_const(bound_eq.get_const() - i);
+ r.simplify();
+
+ split(stmt_num, level, r);
+ }
+ }
+ else { // peel_amount < 0
+ GEQ_Handle bound_eq;
+ bool found_bound = false;
+ for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++)
+ if (!(*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) < 0) {
+ bound_eq = *e;
+ found_bound = true;
+ break;
+ }
+ if (!found_bound)
+ for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++)
+ if ((*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) < 0) {
+ bool is_bound = true;
+ for (Constr_Vars_Iter cvi(*e, true); cvi; cvi++) {
+ std::pair<bool, GEQ_Handle> result = find_floor_definition(hull, cvi.curr_var());
+ if (!result.first) {
+ is_bound = false;
+ break;
+ }
+ }
+ if (is_bound) {
+ bound_eq = *e;
+ found_bound = true;
+ break;
+ }
+ }
+ if (!found_bound)
+ throw loop_error("can't find upper bound for peeling at loop level " + to_string(level));
+
+ for (int i = 1; i <= -peel_amount; i++) {
+ Relation r(level);
+ F_Exists *f_exists = r.add_and()->add_exists();
+ F_And *f_root = f_exists->add_and();
+ GEQ_Handle h = f_root->add_GEQ();
+ std::map<Variable_ID, Variable_ID> exists_mapping;
+ for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++)
+ switch (cvi.curr_var()->kind()) {
+ case Input_Var:
+ h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef());
+ break;
+ case Wildcard_Var: {
+ Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping);
+ h.update_coef(v, cvi.curr_coef());
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = cvi.curr_var()->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = r.get_local(g);
+ else
+ v = r.get_local(g, cvi.curr_var()->function_of());
+ h.update_coef(v, cvi.curr_coef());
+ break;
+ }
+ default:
+ assert(false);
+ }
+ h.update_const(bound_eq.get_const() - i);
+ r.simplify();
+
+ split(stmt_num, level, r);
+ }
+ }
+}
+
diff --git a/src/transformations/loop_tile.cc b/src/transformations/loop_tile.cc
new file mode 100644
index 0000000..41c3e7f
--- /dev/null
+++ b/src/transformations/loop_tile.cc
@@ -0,0 +1,587 @@
+/*
+ * loop_tile.cc
+ *
+ * Created on: Nov 12, 2012
+ * Author: anand
+ */
+
+#include <code_gen/codegen.h>
+#include "loop.hh"
+#include "omegatools.hh"
+#include "ir_code.hh"
+#include "chill_error.hh"
+
+using namespace omega;
+
+
+
+
+void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
+ TilingMethodType method, int alignment_offset, int alignment_multiple) {
+ // check for sanity of parameters
+ if (tile_size < 0)
+ throw std::invalid_argument("invalid tile size");
+ if (alignment_multiple < 1 || alignment_offset < 0)
+ throw std::invalid_argument("invalid alignment for tile");
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement " + to_string(stmt_num));
+ if (level <= 0)
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+ if (level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument(
+ "there is no loop level " + to_string(level) + " for statement "
+ + to_string(stmt_num));
+ if (outer_level <= 0 || outer_level > level)
+ throw std::invalid_argument(
+ "invalid tile controlling loop level "
+ + to_string(outer_level));
+
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+
+ int dim = 2 * level - 1;
+ int outer_dim = 2 * outer_level - 1;
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ std::set<int> same_tiled_loop = getStatements(lex, dim - 1);
+ std::set<int> same_tile_controlling_loop = getStatements(lex,
+ outer_dim - 1);
+
+ for (std::set<int>::iterator i = same_tiled_loop.begin();
+ i != same_tiled_loop.end(); i++) {
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end();
+ j++) {
+ if (same_tiled_loop.find(j->first) != same_tiled_loop.end())
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ int dim2 = level - 1;
+ if ((dv.type != DEP_CONTROL) && (dv.type != DEP_UNKNOWN)) {
+ while (stmt[*i].loop_level[dim2].type == LoopLevelTile) {
+ dim2 = stmt[*i].loop_level[dim2].payload - 1;
+ }
+ dim2 = stmt[*i].loop_level[dim2].payload;
+
+ if (dv.hasNegative(dim2) && (!dv.quasi)) {
+ for (int l = outer_level; l < level; l++)
+ if (stmt[*i].loop_level[l - 1].type
+ != LoopLevelTile) {
+ if (dv.isCarried(
+ stmt[*i].loop_level[l - 1].payload)
+ && dv.hasPositive(
+ stmt[*i].loop_level[l - 1].payload))
+ throw loop_error(
+ "loop error: Tiling is illegal, dependence violation!");
+ } else {
+
+ int dim3 = l - 1;
+ while (stmt[*i].loop_level[l - 1].type
+ != LoopLevelTile) {
+ dim3 =
+ stmt[*i].loop_level[l - 1].payload
+ - 1;
+
+ }
+
+ dim3 = stmt[*i].loop_level[l - 1].payload;
+ if (dim3 < level - 1)
+ if (dv.isCarried(dim3)
+ && dv.hasPositive(dim3))
+ throw loop_error(
+ "loop error: Tiling is illegal, dependence violation!");
+ }
+ }
+ }
+ }
+ }
+ }
+ // special case for no tiling
+ if (tile_size == 0) {
+ for (std::set<int>::iterator i = same_tile_controlling_loop.begin();
+ i != same_tile_controlling_loop.end(); i++) {
+ Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2);
+ F_And *f_root = r.add_and();
+ for (int j = 1; j <= 2 * outer_level - 1; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(r.input_var(j), 1);
+ h.update_coef(r.output_var(j), -1);
+ }
+ EQ_Handle h1 = f_root->add_EQ();
+ h1.update_coef(r.output_var(2 * outer_level), 1);
+ EQ_Handle h2 = f_root->add_EQ();
+ h2.update_coef(r.output_var(2 * outer_level + 1), 1);
+ for (int j = 2 * outer_level; j <= stmt[*i].xform.n_out(); j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(r.input_var(j), 1);
+ h.update_coef(r.output_var(j + 2), -1);
+ }
+
+ stmt[*i].xform = Composition(copy(r), stmt[*i].xform);
+ }
+ }
+ // normal tiling
+ else {
+ std::set<int> private_stmt;
+ for (std::set<int>::iterator i = same_tile_controlling_loop.begin();
+ i != same_tile_controlling_loop.end(); i++) {
+ // should test dim's value directly but it is ok for now
+ if (same_tiled_loop.find(*i) == same_tiled_loop.end()
+ && overflow.find(*i) != overflow.end())
+ private_stmt.insert(*i);
+ }
+
+ // extract the union of the iteration space to be considered
+ Relation hull;
+ {
+ std::vector<Relation> r_list;
+
+ for (std::set<int>::iterator i = same_tile_controlling_loop.begin();
+ i != same_tile_controlling_loop.end(); i++)
+ if (private_stmt.find(*i) == private_stmt.end()) {
+ Relation r = getNewIS(*i);
+ for (int j = dim + 2; j <= r.n_set(); j++)
+ r = Project(r, r.set_var(j));
+ for (int j = outer_dim; j < dim; j++)
+ r = Project(r, j + 1, Set_Var);
+ for (int j = 0; j < outer_dim; j += 2)
+ r = Project(r, j + 1, Set_Var);
+ r.simplify(2, 4);
+ r_list.push_back(r);
+ }
+
+ hull = SimpleHull(r_list);
+ }
+
+ // extract the bound of the dimension to be tiled
+ Relation bound = get_loop_bound(hull, dim);
+ if (!bound.has_single_conjunct()) {
+ // further simplify the bound
+ hull = Approximate(hull);
+ bound = get_loop_bound(hull, dim);
+
+ int i = outer_dim - 2;
+ while (!bound.has_single_conjunct() && i >= 0) {
+ hull = Project(hull, i + 1, Set_Var);
+ bound = get_loop_bound(hull, dim);
+ i -= 2;
+ }
+
+ if (!bound.has_single_conjunct())
+ throw loop_error("cannot handle tile bounds");
+ }
+
+ // separate lower and upper bounds
+ std::vector<GEQ_Handle> lb_list, ub_list;
+ {
+ Conjunct *c = bound.query_DNF()->single_conjunct();
+ for (GEQ_Iterator gi(c->GEQs()); gi; gi++) {
+ int coef = (*gi).get_coef(bound.set_var(dim + 1));
+ if (coef < 0)
+ ub_list.push_back(*gi);
+ else if (coef > 0)
+ lb_list.push_back(*gi);
+ }
+ }
+ if (lb_list.size() == 0)
+ throw loop_error(
+ "unable to calculate tile controlling loop lower bound");
+ if (ub_list.size() == 0)
+ throw loop_error(
+ "unable to calculate tile controlling loop upper bound");
+
+ // find the simplest lower bound for StridedTile or simplest iteration count for CountedTile
+ int simplest_lb = 0, simplest_ub = 0;
+ if (method == StridedTile) {
+ int best_cost = INT_MAX;
+ for (int i = 0; i < lb_list.size(); i++) {
+ int cost = 0;
+ for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ cost += 5;
+ break;
+ }
+ case Global_Var: {
+ cost += 2;
+ break;
+ }
+ default:
+ cost += 15;
+ break;
+ }
+ }
+
+ if (cost < best_cost) {
+ best_cost = cost;
+ simplest_lb = i;
+ }
+ }
+ } else if (method == CountedTile) {
+ std::map<Variable_ID, coef_t> s1, s2, s3;
+ int best_cost = INT_MAX;
+ for (int i = 0; i < lb_list.size(); i++)
+ for (int j = 0; j < ub_list.size(); j++) {
+ int cost = 0;
+
+ for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ s1[(*ci).var] += (*ci).coef;
+ break;
+ }
+ case Global_Var: {
+ s2[(*ci).var] += (*ci).coef;
+ break;
+ }
+ case Exists_Var:
+ case Wildcard_Var: {
+ s3[(*ci).var] += (*ci).coef;
+ break;
+ }
+ default:
+ cost = INT_MAX - 2;
+ break;
+ }
+ }
+
+ for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ s1[(*ci).var] += (*ci).coef;
+ break;
+ }
+ case Global_Var: {
+ s2[(*ci).var] += (*ci).coef;
+ break;
+ }
+ case Exists_Var:
+ case Wildcard_Var: {
+ s3[(*ci).var] += (*ci).coef;
+ break;
+ }
+ default:
+ if (cost == INT_MAX - 2)
+ cost = INT_MAX - 1;
+ else
+ cost = INT_MAX - 3;
+ break;
+ }
+ }
+
+ if (cost == 0) {
+ for (std::map<Variable_ID, coef_t>::iterator k =
+ s1.begin(); k != s1.end(); k++)
+ if ((*k).second != 0)
+ cost += 5;
+ for (std::map<Variable_ID, coef_t>::iterator k =
+ s2.begin(); k != s2.end(); k++)
+ if ((*k).second != 0)
+ cost += 2;
+ for (std::map<Variable_ID, coef_t>::iterator k =
+ s3.begin(); k != s3.end(); k++)
+ if ((*k).second != 0)
+ cost += 15;
+ }
+
+ if (cost < best_cost) {
+ best_cost = cost;
+ simplest_lb = i;
+ simplest_ub = j;
+ }
+ }
+ }
+
+ // prepare the new transformation relations
+ for (std::set<int>::iterator i = same_tile_controlling_loop.begin();
+ i != same_tile_controlling_loop.end(); i++) {
+ Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2);
+ F_And *f_root = r.add_and();
+ for (int j = 0; j < outer_dim - 1; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(r.output_var(j + 1), 1);
+ h.update_coef(r.input_var(j + 1), -1);
+ }
+
+ for (int j = outer_dim - 1; j < stmt[*i].xform.n_out(); j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(r.output_var(j + 3), 1);
+ h.update_coef(r.input_var(j + 1), -1);
+ }
+
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(r.output_var(outer_dim), 1);
+ h.update_const(-lex[outer_dim - 1]);
+
+ stmt[*i].xform = Composition(r, stmt[*i].xform);
+ }
+
+ // add tiling constraints.
+ for (std::set<int>::iterator i = same_tile_controlling_loop.begin();
+ i != same_tile_controlling_loop.end(); i++) {
+ F_And *f_super_root = stmt[*i].xform.and_with_and();
+ F_Exists *f_exists = f_super_root->add_exists();
+ F_And *f_root = f_exists->add_and();
+
+ // create a lower bound variable for easy formula creation later
+ Variable_ID aligned_lb;
+ {
+ Variable_ID lb = f_exists->declare();
+ coef_t coef = lb_list[simplest_lb].get_coef(
+ bound.set_var(dim + 1));
+ if (coef == 1) { // e.g. if i >= m+5, then LB = m+5
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(lb, 1);
+ for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ if (pos != dim + 1)
+ h.update_coef(stmt[*i].xform.output_var(pos),
+ (*ci).coef);
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = stmt[*i].xform.get_local(g);
+ else
+ v = stmt[*i].xform.get_local(g,
+ (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot handle tile bounds");
+ }
+ }
+ h.update_const(lb_list[simplest_lb].get_const());
+ } else { // e.g. if 2i >= m+5, then m+5 <= 2*LB < m+5+2
+ GEQ_Handle h1 = f_root->add_GEQ();
+ GEQ_Handle h2 = f_root->add_GEQ();
+ for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ if (pos == dim + 1) {
+ h1.update_coef(lb, (*ci).coef);
+ h2.update_coef(lb, -(*ci).coef);
+ } else {
+ h1.update_coef(stmt[*i].xform.output_var(pos),
+ (*ci).coef);
+ h2.update_coef(stmt[*i].xform.output_var(pos),
+ -(*ci).coef);
+ }
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = stmt[*i].xform.get_local(g);
+ else
+ v = stmt[*i].xform.get_local(g,
+ (*ci).var->function_of());
+ h1.update_coef(v, (*ci).coef);
+ h2.update_coef(v, -(*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot handle tile bounds");
+ }
+ }
+ h1.update_const(lb_list[simplest_lb].get_const());
+ h2.update_const(-lb_list[simplest_lb].get_const());
+ h2.update_const(coef - 1);
+ }
+
+ Variable_ID offset_lb;
+ if (alignment_offset == 0)
+ offset_lb = lb;
+ else {
+ EQ_Handle h = f_root->add_EQ();
+ offset_lb = f_exists->declare();
+ h.update_coef(offset_lb, 1);
+ h.update_coef(lb, -1);
+ h.update_const(alignment_offset);
+ }
+
+ if (alignment_multiple == 1) { // trivial
+ aligned_lb = offset_lb;
+ } else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB
+ aligned_lb = f_exists->declare();
+ Variable_ID e = f_exists->declare();
+
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(aligned_lb, 1);
+ h.update_coef(e, -alignment_multiple);
+
+ GEQ_Handle h1 = f_root->add_GEQ();
+ GEQ_Handle h2 = f_root->add_GEQ();
+ h1.update_coef(e, alignment_multiple);
+ h2.update_coef(e, -alignment_multiple);
+ h1.update_coef(offset_lb, -1);
+ h2.update_coef(offset_lb, 1);
+ h1.update_const(alignment_multiple - 1);
+ }
+ }
+
+ // create an upper bound variable for easy formula creation later
+ Variable_ID ub = f_exists->declare();
+ {
+ coef_t coef = -ub_list[simplest_ub].get_coef(
+ bound.set_var(dim + 1));
+ if (coef == 1) { // e.g. if i <= m+5, then UB = m+5
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(ub, -1);
+ for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ if (pos != dim + 1)
+ h.update_coef(stmt[*i].xform.output_var(pos),
+ (*ci).coef);
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = stmt[*i].xform.get_local(g);
+ else
+ v = stmt[*i].xform.get_local(g,
+ (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot handle tile bounds");
+ }
+ }
+ h.update_const(ub_list[simplest_ub].get_const());
+ } else { // e.g. if 2i <= m+5, then m+5-2 < 2*UB <= m+5
+ GEQ_Handle h1 = f_root->add_GEQ();
+ GEQ_Handle h2 = f_root->add_GEQ();
+ for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ if (pos == dim + 1) {
+ h1.update_coef(ub, -(*ci).coef);
+ h2.update_coef(ub, (*ci).coef);
+ } else {
+ h1.update_coef(stmt[*i].xform.output_var(pos),
+ -(*ci).coef);
+ h2.update_coef(stmt[*i].xform.output_var(pos),
+ (*ci).coef);
+ }
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = stmt[*i].xform.get_local(g);
+ else
+ v = stmt[*i].xform.get_local(g,
+ (*ci).var->function_of());
+ h1.update_coef(v, -(*ci).coef);
+ h2.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot handle tile bounds");
+ }
+ }
+ h1.update_const(-ub_list[simplest_ub].get_const());
+ h2.update_const(ub_list[simplest_ub].get_const());
+ h1.update_const(coef - 1);
+ }
+ }
+
+ // insert tile controlling loop constraints
+ if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0
+ Variable_ID e = f_exists->declare();
+ GEQ_Handle h1 = f_root->add_GEQ();
+ h1.update_coef(e, 1);
+
+ EQ_Handle h2 = f_root->add_EQ();
+ h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1);
+ h2.update_coef(e, -tile_size);
+ h2.update_coef(aligned_lb, -1);
+ } else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32)
+ GEQ_Handle h1 = f_root->add_GEQ();
+ h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1);
+
+ GEQ_Handle h2 = f_root->add_GEQ();
+ h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1),
+ -tile_size);
+ h2.update_coef(aligned_lb, -1);
+ h2.update_coef(ub, 1);
+ }
+
+ // special care for private statements like overflow assignment
+ if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB
+ GEQ_Handle h = f_root->add_GEQ();
+ h.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1);
+ h.update_coef(ub, 1);
+ }
+
+ // restrict original loop index inside the tile
+ else {
+ if (method == StridedTile) { // e.g. ii <= i < ii + tile_size
+ GEQ_Handle h1 = f_root->add_GEQ();
+ h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1);
+ h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1),
+ -1);
+
+ GEQ_Handle h2 = f_root->add_GEQ();
+ h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1);
+ h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1);
+ h2.update_const(tile_size - 1);
+ } else if (method == CountedTile) { // e.g. LB+32*ii <= i < LB+32*ii+tile_size
+ GEQ_Handle h1 = f_root->add_GEQ();
+ h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1),
+ -tile_size);
+ h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1);
+ h1.update_coef(aligned_lb, -1);
+
+ GEQ_Handle h2 = f_root->add_GEQ();
+ h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1),
+ tile_size);
+ h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1);
+ h2.update_const(tile_size - 1);
+ h2.update_coef(aligned_lb, 1);
+ }
+ }
+ }
+ }
+
+ // update loop level information
+ for (std::set<int>::iterator i = same_tile_controlling_loop.begin();
+ i != same_tile_controlling_loop.end(); i++) {
+ for (int j = 1; j <= stmt[*i].loop_level.size(); j++)
+ switch (stmt[*i].loop_level[j - 1].type) {
+ case LoopLevelOriginal:
+ break;
+ case LoopLevelTile:
+ if (stmt[*i].loop_level[j - 1].payload >= outer_level)
+ stmt[*i].loop_level[j - 1].payload++;
+ break;
+ default:
+ throw loop_error(
+ "unknown loop level type for statement "
+ + to_string(*i));
+ }
+
+ LoopLevel ll;
+ ll.type = LoopLevelTile;
+ ll.payload = level + 1;
+ ll.parallel_level = 0;
+ stmt[*i].loop_level.insert(
+ stmt[*i].loop_level.begin() + (outer_level - 1), ll);
+ }
+}
+
diff --git a/src/transformations/loop_unroll.cc b/src/transformations/loop_unroll.cc
new file mode 100644
index 0000000..86ffd84
--- /dev/null
+++ b/src/transformations/loop_unroll.cc
@@ -0,0 +1,1222 @@
+/*
+ * loop_unroll.cc
+ *
+ * Created on: Nov 12, 2012
+ * Author: anand
+ */
+
+#include <code_gen/codegen.h>
+#include <code_gen/CG_utils.h>
+#include "loop.hh"
+#include "omegatools.hh"
+#include "ir_code.hh"
+#include "chill_error.hh"
+#include <math.h>
+
+using namespace omega;
+
+
+std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
+ std::vector<std::vector<std::string> > idxNames,
+ int cleanup_split_level) {
+ // check for sanity of parameters
+ // check for sanity of parameters
+ if (unroll_amount < 0)
+ throw std::invalid_argument(
+ "invalid unroll amount " + to_string(unroll_amount));
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement " + to_string(stmt_num));
+ if (level <= 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+
+ if (cleanup_split_level == 0)
+ cleanup_split_level = level;
+ if (cleanup_split_level > level)
+ throw std::invalid_argument(
+ "cleanup code must be split at or outside the unrolled loop level "
+ + to_string(level));
+ if (cleanup_split_level <= 0)
+ throw std::invalid_argument(
+ "invalid split loop level " + to_string(cleanup_split_level));
+
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+
+ int dim = 2 * level - 1;
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ std::set<int> same_loop = getStatements(lex, dim - 1);
+
+ // nothing to do
+ if (unroll_amount == 1)
+ return std::set<int>();
+
+ for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end();
+ i++) {
+ std::vector<std::pair<int, DependenceVector> > D;
+ int n = stmt[*i].xform.n_out();
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end();
+ j++) {
+ if (same_loop.find(j->first) != same_loop.end())
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ int dim2 = level - 1;
+ if (dv.type != DEP_CONTROL) {
+
+ while (stmt[*i].loop_level[dim2].type == LoopLevelTile) {
+ dim2 = stmt[*i].loop_level[dim2].payload - 1;
+ }
+ dim2 = stmt[*i].loop_level[dim2].payload;
+
+ /*if (dv.isCarried(dim2)
+ && (dv.hasNegative(dim2) && !dv.quasi))
+ throw loop_error(
+ "loop error: Unrolling is illegal, dependence violation!");
+
+ if (dv.isCarried(dim2)
+ && (dv.hasPositive(dim2) && dv.quasi))
+ throw loop_error(
+ "loop error: Unrolling is illegal, dependence violation!");
+ */
+ bool safe = false;
+
+ if (dv.isCarried(dim2) && dv.hasPositive(dim2)) {
+ if (dv.quasi)
+ throw loop_error(
+ "loop error: a quasi dependence with a positive carried distance");
+ if (!dv.quasi) {
+ if (dv.lbounds[dim2] != posInfinity) {
+ //if (dv.lbounds[dim2] != negInfinity)
+ if (dv.lbounds[dim2] > unroll_amount)
+ safe = true;
+ } else
+ safe = true;
+ }/* else {
+ if (dv.ubounds[dim2] != negInfinity) {
+ if (dv.ubounds[dim2] != posInfinity)
+ if ((-(dv.ubounds[dim2])) > unroll_amount)
+ safe = true;
+ } else
+ safe = true;
+ }*/
+
+ if (!safe) {
+ for (int l = level + 1; l <= (n - 1) / 2; l++) {
+ int dim3 = l - 1;
+
+ if (stmt[*i].loop_level[dim3].type
+ != LoopLevelTile)
+ dim3 =
+ stmt[*i].loop_level[dim3].payload;
+ else {
+ while (stmt[*i].loop_level[dim3].type
+ == LoopLevelTile) {
+ dim3 =
+ stmt[*i].loop_level[dim3].payload
+ - 1;
+ }
+ dim3 =
+ stmt[*i].loop_level[dim3].payload;
+ }
+
+ if (dim3 > dim2) {
+
+ if (dv.hasPositive(dim3))
+ break;
+ else if (dv.hasNegative(dim3))
+ throw loop_error(
+ "loop error: Unrolling is illegal, dependence violation!");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ // extract the intersection of the iteration space to be considered
+ Relation hull = Relation::True(level);
+ apply_xform(same_loop);
+ for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end();
+ i++) {
+ if (stmt[*i].IS.is_upper_bound_satisfiable()) {
+ Relation mapping(stmt[*i].IS.n_set(), level);
+ F_And *f_root = mapping.add_and();
+ for (int j = 1; j <= level; j++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.input_var(j), 1);
+ h.update_coef(mapping.output_var(j), -1);
+ }
+ hull = Intersection(hull,
+ omega::Range(Restrict_Domain(mapping, copy(stmt[*i].IS))));
+ hull.simplify(2, 4);
+
+ }
+ }
+ for (int i = 1; i <= level; i++) {
+ std::string name = tmp_loop_var_name_prefix + to_string(i);
+ hull.name_set_var(i, name);
+ }
+ hull.setup_names();
+
+ // extract the exact loop bound of the dimension to be unrolled
+ if (is_single_loop_iteration(hull, level, this->known))
+ return std::set<int>();
+ Relation bound = get_loop_bound(hull, level, this->known);
+ if (!bound.has_single_conjunct() || !bound.is_satisfiable()
+ || bound.is_tautology())
+ throw loop_error("unable to extract loop bound for unrolling");
+
+ // extract the loop stride
+ coef_t stride;
+ std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound,
+ bound.set_var(level));
+ if (result.second == NULL)
+ stride = 1;
+ else
+ stride = abs(result.first.get_coef(result.second))
+ / gcd(abs(result.first.get_coef(result.second)),
+ abs(result.first.get_coef(bound.set_var(level))));
+
+ // separate lower and upper bounds
+ std::vector<GEQ_Handle> lb_list, ub_list;
+ {
+ Conjunct *c = bound.query_DNF()->single_conjunct();
+ for (GEQ_Iterator gi(c->GEQs()); gi; gi++) {
+ int coef = (*gi).get_coef(bound.set_var(level));
+ if (coef < 0)
+ ub_list.push_back(*gi);
+ else if (coef > 0)
+ lb_list.push_back(*gi);
+ }
+ }
+
+ // simplify overflow expression for each pair of upper and lower bounds
+ std::vector<std::vector<std::map<Variable_ID, int> > > overflow_table(
+ lb_list.size(),
+ std::vector<std::map<Variable_ID, int> >(ub_list.size(),
+ std::map<Variable_ID, int>()));
+ bool is_overflow_simplifiable = true;
+ for (int i = 0; i < lb_list.size(); i++) {
+ if (!is_overflow_simplifiable)
+ break;
+
+ for (int j = 0; j < ub_list.size(); j++) {
+ // lower bound or upper bound has non-unit coefficient, can't simplify
+ if (ub_list[j].get_coef(bound.set_var(level)) != -1
+ || lb_list[i].get_coef(bound.set_var(level)) != 1) {
+ is_overflow_simplifiable = false;
+ break;
+ }
+
+ for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ if ((*ci).var != bound.set_var(level))
+ overflow_table[i][j][(*ci).var] += (*ci).coef;
+
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = bound.get_local(g);
+ else
+ v = bound.get_local(g, (*ci).var->function_of());
+ overflow_table[i][j][(*ci).var] += (*ci).coef;
+ break;
+ }
+ default:
+ throw loop_error("failed to calculate overflow amount");
+ }
+ }
+ overflow_table[i][j][NULL] += ub_list[j].get_const();
+
+ for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var: {
+ if ((*ci).var != bound.set_var(level)) {
+ overflow_table[i][j][(*ci).var] += (*ci).coef;
+ if (overflow_table[i][j][(*ci).var] == 0)
+ overflow_table[i][j].erase(
+ overflow_table[i][j].find((*ci).var));
+ }
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = bound.get_local(g);
+ else
+ v = bound.get_local(g, (*ci).var->function_of());
+ overflow_table[i][j][(*ci).var] += (*ci).coef;
+ if (overflow_table[i][j][(*ci).var] == 0)
+ overflow_table[i][j].erase(
+ overflow_table[i][j].find((*ci).var));
+ break;
+ }
+ default:
+ throw loop_error("failed to calculate overflow amount");
+ }
+ }
+ overflow_table[i][j][NULL] += lb_list[i].get_const();
+
+ overflow_table[i][j][NULL] += stride;
+ if (unroll_amount == 0
+ || (overflow_table[i][j].size() == 1
+ && overflow_table[i][j][NULL] / stride
+ < unroll_amount))
+ unroll_amount = overflow_table[i][j][NULL] / stride;
+ }
+ }
+
+ // loop iteration count can't be determined, bail out gracefully
+ if (unroll_amount == 0)
+ return std::set<int>();
+
+ // further simply overflow calculation using coefficients' modular
+ if (is_overflow_simplifiable) {
+ for (int i = 0; i < lb_list.size(); i++)
+ for (int j = 0; j < ub_list.size(); j++)
+ if (stride == 1) {
+ for (std::map<Variable_ID, int>::iterator k =
+ overflow_table[i][j].begin();
+ k != overflow_table[i][j].end();)
+ if ((*k).first != NULL) {
+ int t = int_mod_hat((*k).second, unroll_amount);
+ if (t == 0) {
+ overflow_table[i][j].erase(k++);
+ } else {
+ int t2 = hull.query_variable_mod((*k).first,
+ unroll_amount);
+ if (t2 != INT_MAX) {
+ overflow_table[i][j][NULL] += t * t2;
+ overflow_table[i][j].erase(k++);
+ } else {
+ (*k).second = t;
+ k++;
+ }
+ }
+ } else
+ k++;
+
+ overflow_table[i][j][NULL] = int_mod_hat(
+ overflow_table[i][j][NULL], unroll_amount);
+
+ // Since we don't have MODULO instruction in SUIF yet (only MOD),
+ // make all coef positive in the final formula
+ for (std::map<Variable_ID, int>::iterator k =
+ overflow_table[i][j].begin();
+ k != overflow_table[i][j].end(); k++)
+ if ((*k).second < 0)
+ (*k).second += unroll_amount;
+ }
+ }
+
+ // build overflow statement
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *overflow_code = NULL;
+ Relation cond_upper(level), cond_lower(level);
+ Relation overflow_constraint(0);
+ F_And *overflow_constraint_root = overflow_constraint.add_and();
+ std::vector<Free_Var_Decl *> over_var_list;
+ if (is_overflow_simplifiable && lb_list.size() == 1) {
+ for (int i = 0; i < ub_list.size(); i++) {
+ if (overflow_table[0][i].size() == 1) {
+ // upper splitting condition
+ GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]);
+ h.update_const(
+ ((overflow_table[0][i][NULL] / stride) % unroll_amount)
+ * -stride);
+ } else {
+ // upper splitting condition
+ std::string over_name = overflow_var_name_prefix
+ + to_string(overflow_var_name_counter++);
+ Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name);
+ over_var_list.push_back(over_free_var);
+ GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]);
+ h.update_coef(cond_upper.get_local(over_free_var), -stride);
+
+ // insert constraint 0 <= overflow < unroll_amount
+ Variable_ID v = overflow_constraint.get_local(over_free_var);
+ GEQ_Handle h1 = overflow_constraint_root->add_GEQ();
+ h1.update_coef(v, 1);
+ GEQ_Handle h2 = overflow_constraint_root->add_GEQ();
+ h2.update_coef(v, -1);
+ h2.update_const(unroll_amount - 1);
+
+ // create overflow assignment
+ bound.setup_names(); // hack to fix omega relation variable names issue
+ CG_outputRepr *rhs = NULL;
+ bool is_split_illegal = false;
+ for (std::map<Variable_ID, int>::iterator j =
+ overflow_table[0][i].begin();
+ j != overflow_table[0][i].end(); j++)
+ if ((*j).first != NULL) {
+ if ((*j).first->kind() == Input_Var
+ && (*j).first->get_position()
+ >= cleanup_split_level)
+ is_split_illegal = true;
+
+ CG_outputRepr *t = ocg->CreateIdent((*j).first->name());
+ if ((*j).second != 1)
+ t = ocg->CreateTimes(ocg->CreateInt((*j).second),
+ t);
+ rhs = ocg->CreatePlus(rhs, t);
+ } else if ((*j).second != 0)
+ rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second));
+
+ if (is_split_illegal) {
+ rhs->clear();
+ delete rhs;
+ throw loop_error(
+ "cannot split cleanup code at loop level "
+ + to_string(cleanup_split_level)
+ + " due to overflow variable data dependence");
+ }
+
+ if (stride != 1)
+ rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride));
+ rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount));
+
+ CG_outputRepr *lhs = ocg->CreateIdent(over_name);
+ init_code = ocg->StmtListAppend(init_code,
+ ocg->CreateAssignment(0, lhs, ocg->CreateInt(0)));
+ lhs = ocg->CreateIdent(over_name);
+ overflow_code = ocg->StmtListAppend(overflow_code,
+ ocg->CreateAssignment(0, lhs, rhs));
+ }
+ }
+
+ // lower splitting condition
+ GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]);
+ } else if (is_overflow_simplifiable && ub_list.size() == 1) {
+ for (int i = 0; i < lb_list.size(); i++) {
+
+ if (overflow_table[i][0].size() == 1) {
+ // lower splitting condition
+ GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]);
+ h.update_const(overflow_table[i][0][NULL] * -stride);
+ } else {
+ // lower splitting condition
+ std::string over_name = overflow_var_name_prefix
+ + to_string(overflow_var_name_counter++);
+ Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name);
+ over_var_list.push_back(over_free_var);
+ GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]);
+ h.update_coef(cond_lower.get_local(over_free_var), -stride);
+
+ // insert constraint 0 <= overflow < unroll_amount
+ Variable_ID v = overflow_constraint.get_local(over_free_var);
+ GEQ_Handle h1 = overflow_constraint_root->add_GEQ();
+ h1.update_coef(v, 1);
+ GEQ_Handle h2 = overflow_constraint_root->add_GEQ();
+ h2.update_coef(v, -1);
+ h2.update_const(unroll_amount - 1);
+
+ // create overflow assignment
+ bound.setup_names(); // hack to fix omega relation variable names issue
+ CG_outputRepr *rhs = NULL;
+ for (std::map<Variable_ID, int>::iterator j =
+ overflow_table[0][i].begin();
+ j != overflow_table[0][i].end(); j++)
+ if ((*j).first != NULL) {
+ CG_outputRepr *t = ocg->CreateIdent((*j).first->name());
+ if ((*j).second != 1)
+ t = ocg->CreateTimes(ocg->CreateInt((*j).second),
+ t);
+ rhs = ocg->CreatePlus(rhs, t);
+ } else if ((*j).second != 0)
+ rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second));
+
+ if (stride != 1)
+ rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride));
+ rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount));
+
+ CG_outputRepr *lhs = ocg->CreateIdent(over_name);
+ init_code = ocg->StmtListAppend(init_code,
+ ocg->CreateAssignment(0, lhs, ocg->CreateInt(0)));
+ lhs = ocg->CreateIdent(over_name);
+ overflow_code = ocg->StmtListAppend(overflow_code,
+ ocg->CreateAssignment(0, lhs, rhs));
+ }
+ }
+
+ // upper splitting condition
+ GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]);
+ } else {
+ std::string over_name = overflow_var_name_prefix
+ + to_string(overflow_var_name_counter++);
+ Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name);
+ over_var_list.push_back(over_free_var);
+
+ std::vector<CG_outputRepr *> lb_repr_list, ub_repr_list;
+ for (int i = 0; i < lb_list.size(); i++) {
+ lb_repr_list.push_back(
+ output_lower_bound_repr(ocg,
+ lb_list[i],
+ bound.set_var(dim + 1), result.first, result.second,
+ bound, Relation::True(bound.n_set()),
+ std::vector<std::pair<CG_outputRepr *, int> >(
+ bound.n_set(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL),
+ 0)),
+ uninterpreted_symbols[stmt_num]));
+ GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]);
+ }
+ for (int i = 0; i < ub_list.size(); i++) {
+ ub_repr_list.push_back(
+ output_upper_bound_repr(ocg, ub_list[i],
+ bound.set_var(dim + 1), bound,
+ std::vector<std::pair<CG_outputRepr *, int> >(
+ bound.n_set(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL),
+ 0)),
+ uninterpreted_symbols[stmt_num]));
+ GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]);
+ h.update_coef(cond_upper.get_local(over_free_var), -stride);
+ }
+
+ CG_outputRepr *lbRepr, *ubRepr;
+ if (lb_repr_list.size() > 1) {
+ //fprintf(stderr, "loop_unroll.cc createInvoke( max )\n");
+ lbRepr = ocg->CreateInvoke("max", lb_repr_list);
+ }
+ else if (lb_repr_list.size() == 1) {
+ lbRepr = lb_repr_list[0];
+ }
+
+ if (ub_repr_list.size() > 1) {
+ //fprintf(stderr, "loop_unroll.cc createInvoke( min )\n");
+ ubRepr = ocg->CreateInvoke("min", ub_repr_list);
+ }
+ else if (ub_repr_list.size() == 1) {
+ ubRepr = ub_repr_list[0];
+ }
+
+ // create overflow assignment
+ CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr),
+ ocg->CreateInt(1));
+ if (stride != 1)
+ rhs = ocg->CreateIntegerFloor(rhs, ocg->CreateInt(stride));
+ rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount));
+ CG_outputRepr *lhs = ocg->CreateIdent(over_name);
+ init_code = ocg->StmtListAppend(init_code,
+ ocg->CreateAssignment(0, lhs, ocg->CreateInt(0)));
+ lhs = ocg->CreateIdent(over_name);
+ overflow_code = ocg->CreateAssignment(0, lhs, rhs);
+
+ // insert constraint 0 <= overflow < unroll_amount
+ Variable_ID v = overflow_constraint.get_local(over_free_var);
+ GEQ_Handle h1 = overflow_constraint_root->add_GEQ();
+ h1.update_coef(v, 1);
+ GEQ_Handle h2 = overflow_constraint_root->add_GEQ();
+ h2.update_coef(v, -1);
+ h2.update_const(unroll_amount - 1);
+ }
+
+ // insert overflow statement
+ int overflow_stmt_num = -1;
+ if (overflow_code != NULL) {
+ // build iteration space for overflow statement
+ Relation mapping(level, cleanup_split_level - 1);
+ F_And *f_root = mapping.add_and();
+ for (int i = 1; i < cleanup_split_level; i++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.output_var(i), 1);
+ h.update_coef(mapping.input_var(i), -1);
+ }
+ Relation overflow_IS = omega::Range(Restrict_Domain(mapping, copy(hull)));
+ for (int i = 1; i < cleanup_split_level; i++)
+ overflow_IS.name_set_var(i, hull.set_var(i)->name());
+ overflow_IS.setup_names();
+
+ // build dumb transformation relation for overflow statement
+ Relation overflow_xform(cleanup_split_level - 1,
+ 2 * (cleanup_split_level - 1) + 1);
+ f_root = overflow_xform.add_and();
+ for (int i = 1; i <= cleanup_split_level - 1; i++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(overflow_xform.output_var(2 * i), 1);
+ h.update_coef(overflow_xform.input_var(i), -1);
+
+ h = f_root->add_EQ();
+ h.update_coef(overflow_xform.output_var(2 * i - 1), 1);
+ h.update_const(-lex[2 * i - 2]);
+ }
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(
+ overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1),
+ 1);
+ h.update_const(-lex[2 * (cleanup_split_level - 1)]);
+
+ shiftLexicalOrder(lex, 2 * cleanup_split_level - 2, 1);
+ Statement overflow_stmt;
+
+ overflow_stmt.code = overflow_code;
+ overflow_stmt.IS = overflow_IS;
+ overflow_stmt.xform = overflow_xform;
+ overflow_stmt.loop_level = std::vector<LoopLevel>(level - 1);
+ overflow_stmt.ir_stmt_node = NULL;
+ for (int i = 0; i < level - 1; i++) {
+ overflow_stmt.loop_level[i].type =
+ stmt[stmt_num].loop_level[i].type;
+ if (stmt[stmt_num].loop_level[i].type == LoopLevelTile
+ && stmt[stmt_num].loop_level[i].payload >= level)
+ overflow_stmt.loop_level[i].payload = -1;
+ else
+ overflow_stmt.loop_level[i].payload =
+ stmt[stmt_num].loop_level[i].payload;
+ overflow_stmt.loop_level[i].parallel_level =
+ stmt[stmt_num].loop_level[i].parallel_level;
+ }
+
+ fprintf(stderr, "loop_unroll.cc L581 adding stmt %d\n", stmt.size());
+ stmt.push_back(overflow_stmt);
+
+ uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]);
+ uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]);
+ dep.insert();
+ overflow_stmt_num = stmt.size() - 1;
+ overflow[overflow_stmt_num] = over_var_list;
+
+ // update the global known information on overflow variable
+ this->known = Intersection(this->known,
+ Extend_Set(copy(overflow_constraint),
+ this->known.n_set() - overflow_constraint.n_set()));
+
+ // update dependence graph
+ DependenceVector dv;
+ dv.type = DEP_CONTROL;
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++)
+ dep.connect(overflow_stmt_num, *i, dv);
+ dv.type = DEP_W2W;
+ {
+ IR_ScalarSymbol *overflow_sym = NULL;
+ std::vector<IR_ScalarRef *> scalars = ir->FindScalarRef(overflow_code);
+ for (int i = scalars.size() - 1; i >= 0; i--)
+ if (scalars[i]->is_write()) {
+ overflow_sym = scalars[i]->symbol();
+ break;
+ }
+ for (int i = scalars.size() - 1; i >= 0; i--)
+ delete scalars[i];
+ dv.sym = overflow_sym;
+ }
+ dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0);
+ dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0);
+ int dep_dim = get_last_dep_dim_before(stmt_num, level);
+ for (int i = dep_dim + 1; i < dep.num_dim(); i++) {
+ dv.lbounds[i] = -posInfinity;
+ dv.ubounds[i] = posInfinity;
+ }
+ for (int i = 0; i <= dep_dim; i++) {
+ if (i != 0) {
+ dv.lbounds[i - 1] = 0;
+ dv.ubounds[i - 1] = 0;
+ }
+ dv.lbounds[i] = 1;
+ dv.ubounds[i] = posInfinity;
+ dep.connect(overflow_stmt_num, overflow_stmt_num, dv);
+ }
+ }
+
+ // split the loop so it can be fully unrolled
+ std::set<int> new_stmts = split(stmt_num, cleanup_split_level, cond_upper);
+ std::set<int> new_stmts2 = split(stmt_num, cleanup_split_level, cond_lower);
+ new_stmts.insert(new_stmts2.begin(), new_stmts2.end());
+
+ // check if unrolled statements can be trivially lumped together as one statement
+ bool can_be_lumped = true;
+ if (can_be_lumped) {
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++)
+ if (*i != stmt_num) {
+ if (stmt[*i].loop_level.size()
+ != stmt[stmt_num].loop_level.size()) {
+ can_be_lumped = false;
+ break;
+ }
+ for (int j = 0; j < stmt[stmt_num].loop_level.size(); j++)
+ if (!(stmt[*i].loop_level[j].type
+ == stmt[stmt_num].loop_level[j].type
+ && stmt[*i].loop_level[j].payload
+ == stmt[stmt_num].loop_level[j].payload)) {
+ can_be_lumped = false;
+ break;
+ }
+ if (!can_be_lumped)
+ break;
+ std::vector<int> lex2 = getLexicalOrder(*i);
+ for (int j = 2 * level; j < lex.size() - 1; j += 2)
+ if (lex[j] != lex2[j]) {
+ can_be_lumped = false;
+ break;
+ }
+ if (!can_be_lumped)
+ break;
+ }
+ }
+ if (can_be_lumped) {
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++)
+ if (is_inner_loop_depend_on_level(stmt[*i].IS, level,
+ this->known)) {
+ can_be_lumped = false;
+ break;
+ }
+ }
+ if (can_be_lumped) {
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++)
+ if (*i != stmt_num) {
+ if (!(Must_Be_Subset(copy(stmt[*i].IS), copy(stmt[stmt_num].IS))
+ && Must_Be_Subset(copy(stmt[stmt_num].IS),
+ copy(stmt[*i].IS)))) {
+ can_be_lumped = false;
+ break;
+ }
+ }
+ }
+ if (can_be_lumped) {
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++) {
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[*i].second.begin();
+ j != dep.vertex[*i].second.end(); j++)
+ if (same_loop.find(j->first) != same_loop.end()) {
+ for (int k = 0; k < j->second.size(); k++)
+ if (j->second[k].type == DEP_CONTROL
+ || j->second[k].type == DEP_UNKNOWN) {
+ can_be_lumped = false;
+ break;
+ }
+ if (!can_be_lumped)
+ break;
+ }
+ if (!can_be_lumped)
+ break;
+ }
+ }
+
+ // insert unrolled statements
+ int old_num_stmt = stmt.size();
+ if (!can_be_lumped) {
+ std::map<int, std::vector<int> > what_stmt_num;
+
+ for (int j = 1; j < unroll_amount; j++) {
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++) {
+ Statement new_stmt;
+
+ std::vector<std::string> loop_vars;
+ std::vector<CG_outputRepr *> subs;
+ loop_vars.push_back(stmt[*i].IS.set_var(level)->name());
+ subs.push_back(
+ ocg->CreatePlus(
+ ocg->CreateIdent(
+ stmt[*i].IS.set_var(level)->name()),
+ ocg->CreateInt(j * stride)));
+ new_stmt.code = ocg->CreateSubstitutedStmt(0,
+ stmt[*i].code->clone(), loop_vars, subs);
+
+ new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride);
+ add_loop_stride(new_stmt.IS, bound, level - 1,
+ unroll_amount * stride);
+
+ new_stmt.xform = copy(stmt[*i].xform);
+
+ new_stmt.loop_level = stmt[*i].loop_level;
+ new_stmt.ir_stmt_node = NULL;
+
+ fprintf(stderr, "loop_unroll.cc L740 adding stmt %d\n", stmt.size());
+ stmt.push_back(new_stmt);
+
+ uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]);
+ uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]);
+ dep.insert();
+ what_stmt_num[*i].push_back(stmt.size() - 1);
+ }
+ }
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++)
+ add_loop_stride(stmt[*i].IS, bound, level - 1,
+ unroll_amount * stride);
+
+ // update dependence graph
+ if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) {
+ int dep_dim = stmt[stmt_num].loop_level[level - 1].payload;
+ int new_stride = unroll_amount * stride;
+ for (int i = 0; i < old_num_stmt; i++) {
+ std::vector<std::pair<int, DependenceVector> > D;
+
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin();
+ j != dep.vertex[i].second.end();) {
+ if (same_loop.find(i) != same_loop.end()) {
+ if (same_loop.find(j->first) != same_loop.end()) {
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.type == DEP_CONTROL
+ || dv.type == DEP_UNKNOWN) {
+ D.push_back(std::make_pair(j->first, dv));
+ for (int kk = 0; kk < unroll_amount - 1;
+ kk++)
+ if (what_stmt_num[i][kk] != -1
+ && what_stmt_num[j->first][kk]
+ != -1)
+ dep.connect(what_stmt_num[i][kk],
+ what_stmt_num[j->first][kk],
+ dv);
+ } else {
+ coef_t lb = dv.lbounds[dep_dim];
+ coef_t ub = dv.ubounds[dep_dim];
+ if (ub == lb
+ && int_mod(lb,
+ static_cast<coef_t>(new_stride))
+ == 0) {
+ D.push_back(
+ std::make_pair(j->first, dv));
+ for (int kk = 0; kk < unroll_amount - 1;
+ kk++)
+ if (what_stmt_num[i][kk] != -1
+ && what_stmt_num[j->first][kk]
+ != -1)
+ dep.connect(
+ what_stmt_num[i][kk],
+ what_stmt_num[j->first][kk],
+ dv);
+ } else if (lb == -posInfinity
+ && ub == posInfinity) {
+ D.push_back(
+ std::make_pair(j->first, dv));
+ for (int kk = 0; kk < unroll_amount;
+ kk++)
+ if (kk == 0)
+ D.push_back(
+ std::make_pair(j->first,
+ dv));
+ else if (what_stmt_num[j->first][kk
+ - 1] != -1)
+ D.push_back(
+ std::make_pair(
+ what_stmt_num[j->first][kk
+ - 1],
+ dv));
+ for (int t = 0; t < unroll_amount - 1;
+ t++)
+ if (what_stmt_num[i][t] != -1)
+ for (int kk = 0;
+ kk < unroll_amount;
+ kk++)
+ if (kk == 0)
+ dep.connect(
+ what_stmt_num[i][t],
+ j->first, dv);
+ else if (what_stmt_num[j->first][kk
+ - 1] != -1)
+ dep.connect(
+ what_stmt_num[i][t],
+ what_stmt_num[j->first][kk
+ - 1],
+ dv);
+ } else {
+ for (int kk = 0; kk < unroll_amount;
+ kk++) {
+ if (lb != -posInfinity) {
+ if (kk * stride
+ < int_mod(lb,
+ static_cast<coef_t>(new_stride)))
+ dv.lbounds[dep_dim] =
+ floor(
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride
+ + new_stride;
+ else
+ dv.lbounds[dep_dim] =
+ floor(
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride;
+ }
+ if (ub != posInfinity) {
+ if (kk * stride
+ > int_mod(ub,
+ static_cast<coef_t>(new_stride)))
+ dv.ubounds[dep_dim] =
+ floor(
+ static_cast<double>(ub)
+ / new_stride)
+ * new_stride
+ - new_stride;
+ else
+ dv.ubounds[dep_dim] =
+ floor(
+ static_cast<double>(ub)
+ / new_stride)
+ * new_stride;
+ }
+ if (dv.ubounds[dep_dim]
+ >= dv.lbounds[dep_dim]) {
+ if (kk == 0)
+ D.push_back(
+ std::make_pair(
+ j->first,
+ dv));
+ else if (what_stmt_num[j->first][kk
+ - 1] != -1)
+ D.push_back(
+ std::make_pair(
+ what_stmt_num[j->first][kk
+ - 1],
+ dv));
+ }
+ }
+ for (int t = 0; t < unroll_amount - 1;
+ t++)
+ if (what_stmt_num[i][t] != -1)
+ for (int kk = 0;
+ kk < unroll_amount;
+ kk++) {
+ if (lb != -posInfinity) {
+ if (kk * stride
+ < int_mod(
+ lb + t
+ + 1,
+ static_cast<coef_t>(new_stride)))
+ dv.lbounds[dep_dim] =
+ floor(
+ static_cast<double>(lb
+ + (t
+ + 1)
+ * stride)
+ / new_stride)
+ * new_stride
+ + new_stride;
+ else
+ dv.lbounds[dep_dim] =
+ floor(
+ static_cast<double>(lb
+ + (t
+ + 1)
+ * stride)
+ / new_stride)
+ * new_stride;
+ }
+ if (ub != posInfinity) {
+ if (kk * stride
+ > int_mod(
+ ub + t
+ + 1,
+ static_cast<coef_t>(new_stride)))
+ dv.ubounds[dep_dim] =
+ floor(
+ static_cast<double>(ub
+ + (t
+ + 1)
+ * stride)
+ / new_stride)
+ * new_stride
+ - new_stride;
+ else
+ dv.ubounds[dep_dim] =
+ floor(
+ static_cast<double>(ub
+ + (t
+ + 1)
+ * stride)
+ / new_stride)
+ * new_stride;
+ }
+ if (dv.ubounds[dep_dim]
+ >= dv.lbounds[dep_dim]) {
+ if (kk == 0)
+ dep.connect(
+ what_stmt_num[i][t],
+ j->first,
+ dv);
+ else if (what_stmt_num[j->first][kk
+ - 1] != -1)
+ dep.connect(
+ what_stmt_num[i][t],
+ what_stmt_num[j->first][kk
+ - 1],
+ dv);
+ }
+ }
+ }
+ }
+ }
+
+ dep.vertex[i].second.erase(j++);
+ } else {
+ for (int kk = 0; kk < unroll_amount - 1; kk++)
+ if (what_stmt_num[i][kk] != -1)
+ dep.connect(what_stmt_num[i][kk], j->first,
+ j->second);
+
+ j++;
+ }
+ } else {
+ if (same_loop.find(j->first) != same_loop.end())
+ for (int k = 0; k < j->second.size(); k++)
+ for (int kk = 0; kk < unroll_amount - 1; kk++)
+ if (what_stmt_num[j->first][kk] != -1)
+ D.push_back(
+ std::make_pair(
+ what_stmt_num[j->first][kk],
+ j->second[k]));
+ j++;
+ }
+ }
+
+ for (int j = 0; j < D.size(); j++)
+ dep.connect(i, D[j].first, D[j].second);
+ }
+ }
+
+ // reset lexical order for the unrolled loop body
+ std::set<int> new_same_loop;
+
+ int count = 0;
+
+ for (std::map<int, std::vector<int> >::iterator i =
+ what_stmt_num.begin(); i != what_stmt_num.end(); i++) {
+
+ new_same_loop.insert(i->first);
+ for (int k = dim + 1; k < stmt[i->first].xform.n_out(); k += 2)
+ assign_const(stmt[i->first].xform, k,
+ get_const(stmt[(what_stmt_num.begin())->first].xform, k,
+ Output_Var) + count);
+ count++;
+ for (int j = 0; j < i->second.size(); j++) {
+ new_same_loop.insert(i->second[j]);
+ for (int k = dim + 1; k < stmt[i->second[j]].xform.n_out(); k +=
+ 2)
+ assign_const(stmt[i->second[j]].xform, k,
+ get_const(
+ stmt[(what_stmt_num.begin())->first].xform,
+ k, Output_Var) + count);
+ count++;
+ }
+ }
+ setLexicalOrder(dim + 1, new_same_loop, 0, idxNames);
+ } else {
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++)
+ add_loop_stride(stmt[*i].IS, bound, level - 1,
+ unroll_amount * stride);
+
+ int max_level = stmt[stmt_num].loop_level.size();
+ std::vector<std::pair<int, int> > stmt_order;
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++)
+ stmt_order.push_back(
+ std::make_pair(
+ get_const(stmt[*i].xform, 2 * max_level,
+ Output_Var), *i));
+ sort(stmt_order.begin(), stmt_order.end());
+
+ Statement new_stmt;
+ new_stmt.code = NULL;
+ for (int j = 1; j < unroll_amount; j++) {
+ for (int i = 0; i < stmt_order.size(); i++) {
+ std::vector<std::string> loop_vars;
+ std::vector<CG_outputRepr *> subs;
+
+ //fprintf(stderr, "loop_unroll.cc, will replace '%s with '%s+%d' ??\n",
+ // stmt[stmt_order[i].second].IS.set_var(level)->name().c_str(),
+ // stmt[stmt_order[i].second].IS.set_var(level)->name().c_str(), j * stride);
+
+ loop_vars.push_back(
+ stmt[stmt_order[i].second].IS.set_var(level)->name());
+ subs.push_back(
+ ocg->CreatePlus(ocg->CreateIdent(stmt[stmt_order[i].second].IS.set_var(level)->name()),
+ ocg->CreateInt(j * stride))); // BUG HERE
+ //fprintf(stderr, "loop_unroll.cc subs now has %d parts\n", subs.size());
+ //for (int k=0; k< subs.size(); k++) //fprintf(stderr, "subs[%d] = 0x%x\n", k, subs[k]);
+
+ //fprintf(stderr, "ij %d %d ", i, j);
+ //fprintf(stderr, "old src was =\n");
+ //stmt[stmt_order[i].second].code->dump(); fflush(stdout); //fprintf(stderr, "\n");
+
+
+
+ CG_outputRepr *code = ocg->CreateSubstitutedStmt(0,
+ stmt[stmt_order[i].second].code->clone(),
+ loop_vars,
+ subs);
+
+ //fprintf(stderr, "old src is =\n");
+ //stmt[stmt_order[i].second].code->dump(); fflush(stdout); //fprintf(stderr, "\n");
+
+ //fprintf(stderr, "substituted copy is =\n");
+ //code->dump(); //fprintf(stderr, "\n\n");
+
+
+ new_stmt.code = ocg->StmtListAppend(new_stmt.code, code);
+ //fprintf(stderr, "appended code =\n");
+ //new_stmt.code->dump();
+
+ }
+ }
+
+
+
+ //fprintf(stderr, "new_stmt.IS = \n");
+ new_stmt.IS = copy(stmt[stmt_num].IS);
+ new_stmt.xform = copy(stmt[stmt_num].xform);
+ assign_const(new_stmt.xform, 2 * max_level,
+ stmt_order[stmt_order.size() - 1].first + 1);
+ new_stmt.loop_level = stmt[stmt_num].loop_level;
+ new_stmt.ir_stmt_node = NULL;
+
+ new_stmt.has_inspector = false; // ?? or from copied stmt?
+ if (stmt[stmt_num].has_inspector) fprintf(stderr, "OLD STMT HAS INSPECTOR\n");
+ else fprintf(stderr, "OLD STMT DOES NOT HAVE INSPECTOR\n");
+
+ fprintf(stderr, "loop_unroll.cc L1083 adding stmt %d\n", stmt.size());
+ stmt.push_back(new_stmt);
+
+ uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]);
+ uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]);
+ dep.insert();
+
+ //fprintf(stderr, "update dependence graph\n");
+ // update dependence graph
+ if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) {
+ int dep_dim = stmt[stmt_num].loop_level[level - 1].payload;
+ int new_stride = unroll_amount * stride;
+ for (int i = 0; i < old_num_stmt; i++) {
+ std::vector<std::pair<int, std::vector<DependenceVector> > > D;
+
+ for (DependenceGraph::EdgeList::iterator j =
+ dep.vertex[i].second.begin();
+ j != dep.vertex[i].second.end();) {
+ if (same_loop.find(i) != same_loop.end()) {
+ if (same_loop.find(j->first) != same_loop.end()) {
+ std::vector<DependenceVector> dvs11, dvs12, dvs22,
+ dvs21;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.type == DEP_CONTROL
+ || dv.type == DEP_UNKNOWN) {
+ if (i == j->first) {
+ dvs11.push_back(dv);
+ dvs22.push_back(dv);
+ } else
+ throw loop_error(
+ "unrolled statements lumped together illegally");
+ } else {
+ coef_t lb = dv.lbounds[dep_dim];
+ coef_t ub = dv.ubounds[dep_dim];
+ if (ub == lb
+ && int_mod(lb,
+ static_cast<coef_t>(new_stride))
+ == 0) {
+ dvs11.push_back(dv);
+ dvs22.push_back(dv);
+ } else {
+ if (lb != -posInfinity)
+ dv.lbounds[dep_dim] = ceil(
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride;
+ if (ub != posInfinity)
+ dv.ubounds[dep_dim] = floor(
+ static_cast<double>(ub)
+ / new_stride)
+ * new_stride;
+ if (dv.ubounds[dep_dim]
+ >= dv.lbounds[dep_dim])
+ dvs11.push_back(dv);
+
+ if (lb != -posInfinity)
+ dv.lbounds[dep_dim] = ceil(
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride;
+ if (ub != posInfinity)
+ dv.ubounds[dep_dim] = ceil(
+ static_cast<double>(ub)
+ / new_stride)
+ * new_stride;
+ if (dv.ubounds[dep_dim]
+ >= dv.lbounds[dep_dim])
+ dvs21.push_back(dv);
+
+ if (lb != -posInfinity)
+ dv.lbounds[dep_dim] = floor(
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride;
+ if (ub != posInfinity)
+ dv.ubounds[dep_dim] = floor(
+ static_cast<double>(ub
+ - stride)
+ / new_stride)
+ * new_stride;
+ if (dv.ubounds[dep_dim]
+ >= dv.lbounds[dep_dim])
+ dvs12.push_back(dv);
+
+ if (lb != -posInfinity)
+ dv.lbounds[dep_dim] = floor(
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride;
+ if (ub != posInfinity)
+ dv.ubounds[dep_dim] = ceil(
+ static_cast<double>(ub
+ - stride)
+ / new_stride)
+ * new_stride;
+ if (dv.ubounds[dep_dim]
+ >= dv.lbounds[dep_dim])
+ dvs22.push_back(dv);
+ }
+ }
+ }
+ if (dvs11.size() > 0)
+ D.push_back(std::make_pair(i, dvs11));
+ if (dvs22.size() > 0)
+ dep.connect(old_num_stmt, old_num_stmt, dvs22);
+ if (dvs12.size() > 0)
+ D.push_back(
+ std::make_pair(old_num_stmt, dvs12));
+ if (dvs21.size() > 0)
+ dep.connect(old_num_stmt, i, dvs21);
+
+ dep.vertex[i].second.erase(j++);
+ } else {
+ dep.connect(old_num_stmt, j->first, j->second);
+ j++;
+ }
+ } else {
+ if (same_loop.find(j->first) != same_loop.end())
+ D.push_back(
+ std::make_pair(old_num_stmt, j->second));
+ j++;
+ }
+ }
+
+ for (int j = 0; j < D.size(); j++)
+ dep.connect(i, D[j].first, D[j].second);
+ }
+ }
+ }
+
+ //fprintf(stderr, " loop_unroll.cc returning new_stmts\n");
+ return new_stmts;
+}
+
+