diff options
Diffstat (limited to 'src/transformations')
-rw-r--r-- | src/transformations/loop.cc | 4433 | ||||
-rw-r--r-- | src/transformations/loop_basic.cc | 1839 | ||||
-rw-r--r-- | src/transformations/loop_datacopy.cc | 1369 | ||||
-rw-r--r-- | src/transformations/loop_extra.cc | 224 | ||||
-rw-r--r-- | src/transformations/loop_tile.cc | 587 | ||||
-rw-r--r-- | src/transformations/loop_unroll.cc | 1222 |
6 files changed, 9674 insertions, 0 deletions
diff --git a/src/transformations/loop.cc b/src/transformations/loop.cc new file mode 100644 index 0000000..570bc90 --- /dev/null +++ b/src/transformations/loop.cc @@ -0,0 +1,4433 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Core loop transformation functionality. + + Notes: + "level" (starting from 1) means loop level and it corresponds to "dim" + (starting from 0) in transformed iteration space [c_1,l_1,c_2,l_2,...., + c_n,l_n,c_(n+1)], e.g., l_2 is loop level 2 in generated code, dim 3 + in transformed iteration space, and variable 4 in Omega relation. + All c's are constant numbers only and they will not show up as actual loops. + Formula: + dim = 2*level - 1 + var = dim + 1 + + History: + 10/2005 Created by Chun Chen. + 09/2009 Expand tile functionality, -chun + 10/2009 Initialize unfusible loop nest without bailing out, -chun +*****************************************************************************/ + +#include <limits.h> +#include <math.h> +#include <code_gen/codegen.h> +#include <code_gen/CG_utils.h> +#include <code_gen/CG_stringRepr.h> +#include <code_gen/CG_chillRepr.h> // Mark. Bad idea. TODO +#include <iostream> +#include <algorithm> +#include <map> +#include "loop.hh" +#include "omegatools.hh" +#include "irtools.hh" +#include "chill_error.hh" +#include <string.h> +#include <list> +#include <chilldebug.h> + +// TODO +#define _DEBUG_ true + + + +using namespace omega; + +const std::string Loop::tmp_loop_var_name_prefix = std::string("chill_t"); // Manu:: In fortran, first character of a variable name must be a letter, so this change +const std::string Loop::overflow_var_name_prefix = std::string("over"); + +void echocontroltype( const IR_Control *control ) { + switch(control->type()) { + case IR_CONTROL_BLOCK: { + CHILL_DEBUG_PRINT("IR_CONTROL_BLOCK\n"); + break; + } + case IR_CONTROL_LOOP: { + CHILL_DEBUG_PRINT("IR_CONTROL_LOOP\n"); + break; + } + case IR_CONTROL_IF: { + CHILL_DEBUG_PRINT("IR_CONTROL_IF\n"); + break; + } + default: + CHILL_DEBUG_PRINT("just a bunch of statements?\n"); + + } // switch +} + +omega::Relation Loop::getNewIS(int stmt_num) const { + + omega::Relation result; + + if (stmt[stmt_num].xform.is_null()) { + omega::Relation known = omega::Extend_Set(omega::copy(this->known), + stmt[stmt_num].IS.n_set() - this->known.n_set()); + result = omega::Intersection(omega::copy(stmt[stmt_num].IS), known); + } else { + omega::Relation known = omega::Extend_Set(omega::copy(this->known), + stmt[stmt_num].xform.n_out() - this->known.n_set()); + result = omega::Intersection( + omega::Range( + omega::Restrict_Domain( + omega::copy(stmt[stmt_num].xform), + omega::copy(stmt[stmt_num].IS))), known); + } + + result.simplify(2, 4); + + return result; +} + + + +void Loop::reduce(int stmt_num, + std::vector<int> &level, + int param, + std::string func_name, + std::vector<int> &seq_levels, + std::vector<int> cudaized_levels, + int bound_level) { + + // illegal instruction?? fprintf(stderr, " Loop::reduce( stmt %d, param %d, func_name (encrypted)...)\n", stmt, param); // , func_name.c_str()); + + //std::cout << "Reducing stmt# " << stmt_num << " at level " << level << "\n"; + //ir->printStmt(stmt[stmt_num].code); + + if (stmt[stmt_num].reduction != 1) { + CHILL_DEBUG_PRINT("Cannot reduce this statement\n"); + return; + } + CHILL_DEBUG_PRINT("CAN reduce this statment?\n"); + + /*for (int i = 0; i < level.size(); i++) + if (stmt[stmt_num].loop_level[level[i] - 1].segreducible != true) { + std::cout << "Cannot reduce this statement\n"; + return; + } + for (int i = 0; i < seq_levels.size(); i++) + if (stmt[stmt_num].loop_level[seq_levels[i] - 1].segreducible != true) { + std::cout << "Cannot reduce this statement\n"; + return; + } + */ + // std::pair<int, std::string> to_insert(level, func_name); + // reduced_statements.insert(std::pair<int, std::pair<int, std::string> >(stmt_num, to_insert )); + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + fprintf(stderr, "set last_compute_cg_ = NULL;\n"); + + omega::CG_outputBuilder *ocg = ir->builder(); + + omega::CG_outputRepr *funCallRepr; + std::vector<omega::CG_outputRepr *> arg_repr_list; + apply_xform(stmt_num); + std::vector<IR_ArrayRef *> access = ir->FindArrayRef(stmt[stmt_num].code); + std::set<std::string> names; + for (int i = 0; i < access.size(); i++) { + std::vector<IR_ArrayRef *> access2; + for (int j = 0; j < access[i]->n_dim(); j++) { + std::vector<IR_ArrayRef *> access3 = ir->FindArrayRef( + access[i]->index(j)); + access2.insert(access2.end(), access3.begin(), access3.end()); + } + if (access2.size() == 0) { + if (names.find(access[i]->name()) == names.end()) { + arg_repr_list.push_back( + ocg->CreateAddressOf(access[i]->convert())); + names.insert(access[i]->name()); + if (access[i]->is_write()) + reduced_write_refs.insert(access[i]->name()); + } + } else { + if (names.find(access[i]->name()) == names.end()) { + arg_repr_list.push_back(ocg->CreateAddressOf(ocg->CreateArrayRefExpression(ocg->CreateIdent(access[i]->name()), + ocg->CreateInt(0)))); + names.insert(access[i]->name()); + if (access[i]->is_write()) + reduced_write_refs.insert(access[i]->name()); + } + } + } + + for (int i = 0; i < seq_levels.size(); i++) + arg_repr_list.push_back( + ocg->CreateIdent( + stmt[stmt_num].IS.set_var(seq_levels[i])->name())); + + if (bound_level != -1) { + + omega::Relation new_IS = copy(stmt[stmt_num].IS); + new_IS.copy_names(stmt[stmt_num].IS); + new_IS.setup_names(); + new_IS.simplify(); + int dim = bound_level; + //omega::Relation r = getNewIS(stmt_num); + for (int j = dim + 1; j <= new_IS.n_set(); j++) + new_IS = omega::Project(new_IS, new_IS.set_var(j)); + + new_IS.simplify(2, 4); + + omega::Relation bound_ = get_loop_bound(copy(new_IS), dim - 1); + omega::Variable_ID v = bound_.set_var(dim); + std::vector<omega::CG_outputRepr *> ubList; + for (omega::GEQ_Iterator e( + const_cast<omega::Relation &>(bound_).single_conjunct()->GEQs()); + e; e++) { + if ((*e).get_coef(v) < 0) { + // && (*e).is_const_except_for_global(v)) + omega::CG_outputRepr *UPPERBOUND = + omega::output_upper_bound_repr(ir->builder(), *e, v, + bound_, + std::vector< + std::pair<omega::CG_outputRepr *, int> >( + bound_.n_set(), + std::make_pair( + static_cast<omega::CG_outputRepr *>(NULL), + 0)), uninterpreted_symbols[stmt_num]); + if (UPPERBOUND != NULL) + ubList.push_back(UPPERBOUND); + + } + + } + + omega::CG_outputRepr * ubRepr; + if (ubList.size() > 1) { + + ubRepr = ir->builder()->CreateInvoke("min", ubList); + arg_repr_list.push_back(ubRepr); + } else if (ubList.size() == 1) + arg_repr_list.push_back(ubList[0]); + } + + funCallRepr = ocg->CreateInvoke(func_name, arg_repr_list); + stmt[stmt_num].code = funCallRepr; + for (int i = 0; i < level.size(); i++) { + //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL)); + std::vector<std::string> loop_vars; + loop_vars.push_back(stmt[stmt_num].IS.set_var(level[i])->name()); + + std::vector<omega::CG_outputRepr *> subs; + subs.push_back(ocg->CreateInt(0)); + + stmt[stmt_num].code = ocg->CreateSubstitutedStmt(0, stmt[stmt_num].code, + loop_vars, subs); + + } + + omega::Relation new_IS = copy(stmt[stmt_num].IS); + new_IS.copy_names(stmt[stmt_num].IS); + new_IS.setup_names(); + new_IS.simplify(); + int old_size = new_IS.n_set(); + + omega::Relation R = omega::copy(stmt[stmt_num].IS); + R.copy_names(stmt[stmt_num].IS); + R.setup_names(); + + for (int i = level.size() - 1; i >= 0; i--) { + int j; + + for (j = 0; j < cudaized_levels.size(); j++) { + if (cudaized_levels[j] == level[i]) + break; + + } + + if (j == cudaized_levels.size()) { + R = omega::Project(R, level[i], omega::Input_Var); + R.simplify(); + + } + // + + } + + omega::F_And *f_Root = R.and_with_and(); + for (int i = level.size() - 1; i >= 0; i--) { + int j; + + for (j = 0; j < cudaized_levels.size(); j++) { + if (cudaized_levels[j] == level[i]) + break; + + } + + if (j == cudaized_levels.size()) { + + omega::EQ_Handle h = f_Root->add_EQ(); + + h.update_coef(R.set_var(level[i]), 1); + h.update_const(-1); + } + // + + } + + R.simplify(); + stmt[stmt_num].IS = R; +} + + + + + + +//----------------------------------------------------------------------------- +// Class Loop +//----------------------------------------------------------------------------- +// --begin Anand: Added from CHiLL 0.2 + +bool Loop::isInitialized() const { + return stmt.size() != 0 && !stmt[0].xform.is_null(); +} + +//--end Anand: added from CHiLL 0.2 + +bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, + std::vector<ir_tree_node *> &ir_stmt) { + + CHILL_DEBUG_PRINT("extract_ir_stmts()\n"); + CHILL_DEBUG_PRINT("ir_tree has %d statements\n", ir_tree.size()); + + ir_stmt = extract_ir_stmts(ir_tree); + + CHILL_DEBUG_PRINT("nesting level stmt size = %d\n", (int)ir_stmt.size()); + stmt_nesting_level_.resize(ir_stmt.size()); + + std::vector<int> stmt_nesting_level(ir_stmt.size()); + + CHILL_DEBUG_PRINT("%d statements?\n", (int)ir_stmt.size()); + + // find out how deeply nested each statement is. (how can these be different?) + for (int i = 0; i < ir_stmt.size(); i++) { + fprintf(stderr, "i %d\n", i); + ir_stmt[i]->payload = i; + int t = 0; + ir_tree_node *itn = ir_stmt[i]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + t++; + } + stmt_nesting_level_[i] = t; + stmt_nesting_level[i] = t; + CHILL_DEBUG_PRINT("stmt_nesting_level[%d] = %d\n", i, t); + } + + if (actual_code.size() == 0) + actual_code = std::vector<CG_outputRepr*>(ir_stmt.size()); + + stmt = std::vector<Statement>(ir_stmt.size()); + CHILL_DEBUG_PRINT("in init_loop, made %d stmts\n", (int)ir_stmt.size()); + + uninterpreted_symbols = std::vector<std::map<std::string, std::vector<omega::CG_outputRepr * > > >(ir_stmt.size()); + uninterpreted_symbols_stringrepr = std::vector<std::map<std::string, std::vector<omega::CG_outputRepr * > > >(ir_stmt.size()); + + int n_dim = -1; + int max_loc; + //std::vector<std::string> index; + for (int i = 0; i < ir_stmt.size(); i++) { + int max_nesting_level = -1; + int loc; + + // find the max nesting level and remember the statement that was at that level + for (int j = 0; j < ir_stmt.size(); j++) { + if (stmt_nesting_level[j] > max_nesting_level) { + max_nesting_level = stmt_nesting_level[j]; + loc = j; + } + } + + CHILL_DEBUG_PRINT("max nesting level %d at location %d\n", max_nesting_level, loc); + + // most deeply nested statement acting as a reference point + if (n_dim == -1) { + CHILL_DEBUG_PRINT("n_dim now max_nesting_level %d\n", max_nesting_level); + n_dim = max_nesting_level; + max_loc = loc; + + index = std::vector<std::string>(n_dim); + + ir_tree_node *itn = ir_stmt[loc]; + CHILL_DEBUG_PRINT("itn = stmt[%d]\n", loc); + int cur_dim = n_dim - 1; + while (itn->parent != NULL) { + CHILL_DEBUG_PRINT("parent\n"); + + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) { + CHILL_DEBUG_PRINT("IR_CONTROL_LOOP cur_dim %d\n", cur_dim); + IR_Loop *IRL = static_cast<IR_Loop *>(itn->content); + index[cur_dim] = IRL->index()->name(); + CHILL_DEBUG_PRINT("index[%d] = '%s'\n", cur_dim, index[cur_dim].c_str()); + itn->payload = cur_dim--; + } + } + } + + CHILL_DEBUG_PRINT("align loops by names,\n"); + // align loops by names, temporary solution + ir_tree_node *itn = ir_stmt[loc]; // defined outside loops?? + int depth = stmt_nesting_level_[loc] - 1; + + for (int t = depth; t >= 0; t--) { + int y = t; + itn = ir_stmt[loc]; + + while ((itn->parent != NULL) && (y >= 0)) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + y--; + } + + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { + CG_outputBuilder *ocg = ir->builder(); + + itn->payload = depth - t; + + CG_outputRepr *code = + static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); + + std::vector<CG_outputRepr *> index_expr; + std::vector<std::string> old_index; + CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]); + index_expr.push_back(repl); + old_index.push_back( + static_cast<IR_Loop *>(itn->content)->index()->name()); + code = ocg->CreateSubstitutedStmt(0, code, old_index, + index_expr); + + replace.insert(std::pair<int, CG_outputRepr*>(loc, code)); + //stmt[loc].code = code; + + } + } + + CHILL_DEBUG_PRINT("set relation variable names ****\n"); + // set relation variable names + + // this finds the loop variables for loops enclosing this statement and puts + // them in an Omega Relation (just their names, which could fail) + + CHILL_DEBUG_PRINT("Relation r(%d)\n", n_dim); + Relation r(n_dim); + F_And *f_root = r.add_and(); + itn = ir_stmt[loc]; + int temp_depth = depth; + while (itn->parent != NULL) { + + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) { + fprintf(stderr, "it's a loop. temp_depth %d\n", temp_depth); + fprintf(stderr, "r.name_set_var( %d, %s )\n", itn->payload + 1, index[temp_depth].c_str()); + r.name_set_var(itn->payload + 1, index[temp_depth]); + + temp_depth--; + } + //static_cast<IR_Loop *>(itn->content)->index()->name()); + } + fprintf(stderr, "Relation r "); r.print(); fflush(stdout); + //fprintf(stderr, "f_root "); f_root->print(stderr); fprintf(stderr, "\n"); + + /*while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name()); + }*/ + + + + + fprintf(stderr, "extract information from loop/if structures\n"); + // extract information from loop/if structures + std::vector<bool> processed(n_dim, false); + std::vector<std::string> vars_to_be_reversed; + + std::vector<std::string> insp_lb; + std::vector<std::string> insp_ub; + + itn = ir_stmt[loc]; + while (itn->parent != NULL) { // keep heading upward + itn = itn->parent; + + switch (itn->content->type()) { + case IR_CONTROL_LOOP: { + fprintf(stderr, "loop.cc l 462 IR_CONTROL_LOOP\n"); + IR_Loop *lp = static_cast<IR_Loop *>(itn->content); + Variable_ID v = r.set_var(itn->payload + 1); + int c; + + try { + c = lp->step_size(); + //fprintf(stderr, "step size %d\n", c); + if (c > 0) { + CG_outputRepr *lb = lp->lower_bound(); + fprintf(stderr, "loop.cc, got the lower bound. it is:\n"); + lb->dump(); printf("\n"); fflush(stdout); + + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + + CG_outputRepr *ub = lp->upper_bound(); + //fprintf(stderr, "loop.cc, got the upper bound. it is:\n"); + //ub->dump(); printf("\n"); fflush(stdout); + + + + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + else + throw ir_error("loop condition not supported"); + + + if ((ir->QueryExpOperation(lp->lower_bound()) + == IR_OP_ARRAY_VARIABLE) + && (ir->QueryExpOperation(lp->lower_bound()) + == ir->QueryExpOperation( + lp->upper_bound()))) { + + fprintf(stderr, "loop.cc lower and upper are both IR_OP_ARRAY_VARIABLE?\n"); + + std::vector<CG_outputRepr *> v = + ir->QueryExpOperand(lp->lower_bound()); + IR_ArrayRef *ref = + static_cast<IR_ArrayRef *>(ir->Repr2Ref( + v[0])); + std::string s0 = ref->name(); + std::vector<CG_outputRepr *> v2 = + ir->QueryExpOperand(lp->upper_bound()); + IR_ArrayRef *ref2 = + static_cast<IR_ArrayRef *>(ir->Repr2Ref( + v2[0])); + std::string s1 = ref2->name(); + + if (s0 == s1) { + insp_lb.push_back(s0); + insp_ub.push_back(s1); + + } + + } + + + } else if (c < 0) { + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *lb = lp->lower_bound(); + lb = ocg->CreateMinus(NULL, lb); + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + CG_outputRepr *ub = lp->upper_bound(); + ub = ocg->CreateMinus(NULL, ub); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_GE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + else if (cond == IR_COND_GT) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LT, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + else + throw ir_error("loop condition not supported"); + + vars_to_be_reversed.push_back(lp->index()->name()); + } else + throw ir_error("loop step size zero"); + } catch (const ir_error &e) { + actual_code[loc] = + static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + + // check for loop increment or decrement that is not 1 + //fprintf(stderr, "abs(c)\n"); + if (abs(c) != 1) { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(); + F_And *f_and = f_exists->add_and(); + Stride_Handle h = f_and->add_stride(abs(c)); + if (c > 0) + h.update_coef(e, 1); + else + h.update_coef(e, -1); + h.update_coef(v, -1); + CG_outputRepr *lb = lp->lower_bound(); + exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, + true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + } + + processed[itn->payload] = true; + break; + } + + + case IR_CONTROL_IF: { + fprintf(stderr, "IR_CONTROL_IF\n"); + IR_If *theif = static_cast<IR_If *>(itn->content); + + CG_outputRepr *cond = + static_cast<IR_If *>(itn->content)->condition(); + + try { + if (itn->payload % 2 == 1) + exp2constraint(ir, r, f_root, freevar, cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + else { + F_Not *f_not = f_root->add_not(); + F_And *f_and = f_not->add_and(); + exp2constraint(ir, r, f_and, freevar, cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + } + } catch (const ir_error &e) { + std::vector<ir_tree_node *> *t; + if (itn->parent == NULL) + t = &ir_tree; + else + t = &(itn->parent->children); + int id = itn->payload; + int i = t->size() - 1; + while (i >= 0) { + if ((*t)[i] == itn) { + for (int j = 0; j < itn->children.size(); j++) + delete itn->children[j]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + } else if ((*t)[i]->payload >> 1 == id >> 1) { + delete (*t)[i]; + t->erase(t->begin() + i); + } + i--; + } + return false; + } + + break; + } + default: + //fprintf(stderr, "default?\n"); + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + } + + + //fprintf(stderr, "add information for missing loops n_dim(%d)\n", n_dim); + // add information for missing loops + for (int j = 0; j < n_dim; j++) + if (!processed[j]) { + ir_tree_node *itn = ir_stmt[max_loc]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP + && itn->payload == j) + break; + } + + Variable_ID v = r.set_var(j + 1); + if (loc < max_loc) { + + CG_outputBuilder *ocg = ir->builder(); + + CG_outputRepr *lb = + static_cast<IR_Loop *>(itn->content)->lower_bound(); + + exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ, + false,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + + /* if (ir->QueryExpOperation( + static_cast<IR_Loop *>(itn->content)->lower_bound()) + == IR_OP_VARIABLE) { + IR_ScalarRef *ref = + static_cast<IR_ScalarRef *>(ir->Repr2Ref( + static_cast<IR_Loop *>(itn->content)->lower_bound())); + std::string name_ = ref->name(); + + for (int i = 0; i < index.size(); i++) + if (index[i] == name_) { + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, false); + + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + IR_CONDITION_TYPE cond = + static_cast<IR_Loop *>(itn->content)->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, + 's', cond, false); + + + + } + + } + */ + + } else { // loc > max_loc + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + + exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ, + false,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + /*if (ir->QueryExpOperation( + static_cast<IR_Loop *>(itn->content)->upper_bound()) + == IR_OP_VARIABLE) { + IR_ScalarRef *ref = + static_cast<IR_ScalarRef *>(ir->Repr2Ref( + static_cast<IR_Loop *>(itn->content)->upper_bound())); + std::string name_ = ref->name(); + + for (int i = 0; i < index.size(); i++) + if (index[i] == name_) { + + CG_outputRepr *lb = + static_cast<IR_Loop *>(itn->content)->lower_bound(); + + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, false); + + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + IR_CONDITION_TYPE cond = + static_cast<IR_Loop *>(itn->content)->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, + 's', cond, false); + + + } + } + */ + } + } + + r.setup_names(); + r.simplify(); + + // THIS IS MISSING IN PROTONU's + for (int j = 0; j < insp_lb.size(); j++) { + + std::string lb = insp_lb[j] + "_"; + std::string ub = lb + "_"; + + Global_Var_ID u, l; + bool found_ub = false; + bool found_lb = false; + for (DNF_Iterator di(copy(r).query_DNF()); di; di++) + for (Constraint_Iterator ci = (*di)->constraints(); ci; ci++) + + for (Constr_Vars_Iter cvi(*ci); cvi; cvi++) { + Variable_ID v = cvi.curr_var(); + if (v->kind() == Global_Var) + if (v->get_global_var()->arity() > 0) { + + std::string name = + v->get_global_var()->base_name(); + if (name == lb) { + l = v->get_global_var(); + found_lb = true; + } else if (name == ub) { + u = v->get_global_var(); + found_ub = true; + } + } + + } + + if (found_lb && found_ub) { + Relation known_(copy(r).n_set()); + known_.copy_names(copy(r)); + known_.setup_names(); + Variable_ID index_lb = known_.get_local(l, Input_Tuple); + Variable_ID index_ub = known_.get_local(u, Input_Tuple); + F_And *fr = known_.add_and(); + GEQ_Handle g = fr->add_GEQ(); + g.update_coef(index_ub, 1); + g.update_coef(index_lb, -1); + g.update_const(-1); + addKnown(known_); + + } + + } + + + fprintf(stderr, "loop.cc L441 insert the statement\n"); + // insert the statement + CG_outputBuilder *ocg = ir->builder(); + std::vector<CG_outputRepr *> reverse_expr; + for (int j = 1; j <= vars_to_be_reversed.size(); j++) { + CG_outputRepr *repl = ocg->CreateIdent(vars_to_be_reversed[j]); + repl = ocg->CreateMinus(NULL, repl); + reverse_expr.push_back(repl); + } + fprintf(stderr, "loop.cc before extract\n"); + CG_outputRepr *code = + static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); + fprintf(stderr, "code = ocg->CreateSubstitutedStmt(...)\n"); + ((CG_chillRepr *)code)->Dump(); fflush(stdout); + + code = ocg->CreateSubstitutedStmt(0, code, vars_to_be_reversed, + reverse_expr); + fprintf(stderr, "stmt\n"); + ((CG_chillRepr *)code)->Dump(); fflush(stdout); + + stmt[loc].code = code; + stmt[loc].IS = r; + + //Anand: Add Information on uninterpreted function constraints to + //Known relation + + fprintf(stderr, "loop.cc stmt[%d].loop_level has size n_dim %d\n", loc, n_dim); + + stmt[loc].loop_level = std::vector<LoopLevel>(n_dim); + stmt[loc].ir_stmt_node = ir_stmt[loc]; + stmt[loc].has_inspector = false; + fprintf(stderr, "for int i < n_dim(%d)\n", n_dim); + for (int ii = 0; ii < n_dim; ii++) { + stmt[loc].loop_level[ii].type = LoopLevelOriginal; + stmt[loc].loop_level[ii].payload = ii; + stmt[loc].loop_level[ii].parallel_level = 0; + } + fprintf(stderr, "whew\n"); + + stmt_nesting_level[loc] = -1; + } + dump(); + fprintf(stderr, " loop.cc Loop::init_loop() END\n\n"); + + return true; +} + + + +Loop::Loop(const IR_Control *control) { + + CHILL_DEBUG_PRINT("control type is %d ", control->type()); + echocontroltype(control); + + CHILL_DEBUG_PRINT("2set last_compute_cg_ = NULL; \n"); + last_compute_cgr_ = NULL; + last_compute_cg_ = NULL; + + ir = const_cast<IR_Code *>(control->ir_); // point to the CHILL IR that this loop came from + if (ir == 0) { + CHILL_DEBUG_PRINT("ir gotten from control = 0x%x\n", (long)ir); + CHILL_DEBUG_PRINT("loop.cc GONNA DIE SOON *******************************\n\n"); + } + + init_code = NULL; + cleanup_code = NULL; + tmp_loop_var_name_counter = 1; + overflow_var_name_counter = 1; + known = Relation::True(0); + + CHILL_DEBUG_PRINT("calling build_ir_tree()\n"); + CHILL_DEBUG_PRINT("about to clone control\n"); + ir_tree = build_ir_tree(control->clone(), NULL); + //fprintf(stderr,"in Loop::Loop. ir_tree has %ld parts\n", ir_tree.size()); + + // std::vector<ir_tree_node *> ir_stmt; + //fprintf(stderr, "loop.cc after build_ir_tree() %ld statements\n", stmt.size()); + + int count = 0; + //fprintf(stderr, "before init_loops, %d freevar\n", freevar.size()); + //fprintf(stderr, "count %d\n", count++); + //fprintf(stderr, "loop.cc before init_loop, %ld statements\n", stmt.size()); + while (!init_loop(ir_tree, ir_stmt)) { + //fprintf(stderr, "count %d\n", count++); + } + fprintf(stderr, "after init_loop, %d freevar\n", (int)freevar.size()); + + + fprintf(stderr, "loop.cc after init_loop, %d statements\n", (int)stmt.size()); + for (int i = 0; i < stmt.size(); i++) { + std::map<int, CG_outputRepr*>::iterator it = replace.find(i); + + if (it != replace.end()) + stmt[i].code = it->second; + else + stmt[i].code = stmt[i].code; + } + + if (stmt.size() != 0) + dep = DependenceGraph(stmt[0].IS.n_set()); + else + dep = DependenceGraph(0); + // init the dependence graph + for (int i = 0; i < stmt.size(); i++) + dep.insert(); + + fprintf(stderr, "this really REALLY needs some comments\n"); + // this really REALLY needs some comments + for (int i = 0; i < stmt.size(); i++) { + fprintf(stderr, "i %d\n", i); + stmt[i].reduction = 0; // Manu -- initialization + for (int j = i; j < stmt.size(); j++) { + fprintf(stderr, "j %d\n", j); + std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = test_data_dependences( + ir, + stmt[i].code, + stmt[i].IS, + stmt[j].code, + stmt[j].IS, + freevar, + index, + stmt_nesting_level_[i], + stmt_nesting_level_[j], + uninterpreted_symbols[ i ], + uninterpreted_symbols_stringrepr[ i ]); + + fprintf(stderr, "dv.first.size() %d\n", (int)dv.first.size()); + for (int k = 0; k < dv.first.size(); k++) { + fprintf(stderr, "k1 %d\n", k); + if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k], + true)) + dep.connect(i, j, dv.first[k]); + else { + dep.connect(j, i, dv.first[k].reverse()); + } + + } + + for (int k = 0; k < dv.second.size(); k++) { + fprintf(stderr, "k2 %d\n", k); + if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], + false)) + dep.connect(j, i, dv.second[k]); + else { + dep.connect(i, j, dv.second[k].reverse()); + } + } + } + } + + fprintf(stderr, "\n\n*** LOTS OF REDUCTIONS ***\n\n"); + + // TODO: Reduction check + // Manu:: Initial implementation / algorithm + std::set<int> reducCand = std::set<int>(); + std::vector<int> canReduce = std::vector<int>(); + fprintf(stderr, "\ni range %d\n", stmt.size()); + for (int i = 0; i < stmt.size(); i++) { + fprintf(stderr, "i %d\n", i); + if (!dep.hasEdge(i, i)) { + continue; + } + fprintf(stderr, "dep.hasEdge(%d, %d)\n", i, i); + + // for each statement check if it has all the three dependences (RAW, WAR, WAW) + // If there is such a statement, it is a reduction candidate. Mark all reduction candidates. + std::vector<DependenceVector> tdv = dep.getEdge(i, i); + fprintf(stderr, "tdv size %d\n", tdv.size()); + for (int j = 0; j < tdv.size(); j++) { + fprintf(stderr, "ij %d %d\n", i, j); + if (tdv[j].is_reduction_cand) { + fprintf(stderr, "reducCand.insert( %d )\n", i); + reducCand.insert(i); + } + } + } + + fprintf(stderr, "loop.cc reducCand.size() %d\n", reducCand.size()); + bool reduc; + std::set<int>::iterator it; + int counter = 0; + for (it = reducCand.begin(); it != reducCand.end(); it++) { + fprintf(stderr, "counter %d\n", counter); + reduc = true; + for (int j = 0; j < stmt.size(); j++) { + fprintf(stderr, "j %d\n", j); + if ((*it != j) + && (stmt_nesting_level_[*it] < stmt_nesting_level_[j])) { + if (dep.hasEdge(*it, j) || dep.hasEdge(j, *it)) { + fprintf(stderr, "counter %d j %d reduc = false\n", counter, j); + reduc = false; + break; + } + } + counter += 1; + } + + if (reduc) { + fprintf(stderr, "canReduce.push_back()\n"); + canReduce.push_back(*it); + stmt[*it].reduction = 2; // First, assume that reduction is possible with some processing + } + } + + + // If reduction is possible without processing, update the value of the reduction variable to 1 + fprintf(stderr, "loop.cc canReduce.size() %d\n", canReduce.size()); + for (int i = 0; i < canReduce.size(); i++) { + // Here, assuming that stmtType returns 1 when there is a single statement within stmt[i] + if (stmtType(ir, stmt[canReduce[i]].code) == 1) { + stmt[canReduce[i]].reduction = 1; + IR_OPERATION_TYPE opType; + opType = getReductionOperator(ir, stmt[canReduce[i]].code); + stmt[canReduce[i]].reductionOp = opType; + } + } + + // printing out stuff for debugging + + if (DEP_DEBUG) { + std::cout << "STATEMENTS THAT CAN BE REDUCED: \n"; + for (int i = 0; i < canReduce.size(); i++) { + std::cout << "------- " << canReduce[i] << " ------- " + << stmt[canReduce[i]].reduction << "\n"; + ir->printStmt(stmt[canReduce[i]].code); // Manu + if (stmt[canReduce[i]].reductionOp == IR_OP_PLUS) + std::cout << "Reduction type:: + \n"; + else if (stmt[canReduce[i]].reductionOp == IR_OP_MINUS) + std::cout << "Reduction type:: - \n"; + else if (stmt[canReduce[i]].reductionOp == IR_OP_MULTIPLY) + std::cout << "Reduction type:: * \n"; + else if (stmt[canReduce[i]].reductionOp == IR_OP_DIVIDE) + std::cout << "Reduction type:: / \n"; + else + std::cout << "Unknown reduction type\n"; + } + } + // cleanup the IR tree + + fprintf(stderr, "init dumb transformation relations\n"); + + // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] + for (int i = 0; i < stmt.size(); i++) { + int n = stmt[i].IS.n_set(); + stmt[i].xform = Relation(n, 2 * n + 1); + F_And *f_root = stmt[i].xform.add_and(); + + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(2 * j), 1); + h.update_coef(stmt[i].xform.input_var(j), -1); + } + + for (int j = 1; j <= 2 * n + 1; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(j), 1); + } + stmt[i].xform.simplify(); + } + //fprintf(stderr, "done with dumb\n"); + + if (stmt.size() != 0) + num_dep_dim = stmt[0].IS.n_set(); + else + num_dep_dim = 0; + // debug + /*for (int i = 0; i < stmt.size(); i++) { + std::cout << i << ": "; + //stmt[i].xform.print(); + stmt[i].IS.print(); + std::cout << std::endl; + + }*/ + //end debug + fprintf(stderr, " at bottom of Loop::Loop, printCode\n"); + printCode(); // this dies TODO figure out why +} + +Loop::~Loop() { + + delete last_compute_cgr_; + delete last_compute_cg_; + + for (int i = 0; i < stmt.size(); i++) + if (stmt[i].code != NULL) { + stmt[i].code->clear(); + delete stmt[i].code; + } + + for (int i = 0; i < ir_tree.size(); i++) + delete ir_tree[i]; + + if (init_code != NULL) { + init_code->clear(); + delete init_code; + } + if (cleanup_code != NULL) { + cleanup_code->clear(); + delete cleanup_code; + } +} + + + + +int Loop::get_dep_dim_of(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1 || level > stmt[stmt_num].loop_level.size()) + return -1; + + int trip_count = 0; + while (true) { + switch (stmt[stmt_num].loop_level[level - 1].type) { + case LoopLevelOriginal: + return stmt[stmt_num].loop_level[level - 1].payload; + case LoopLevelTile: + level = stmt[stmt_num].loop_level[level - 1].payload; + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + throw loop_error("incorrect loop level information for statement " + + to_string(stmt_num)); + break; + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(stmt_num)); + } + trip_count++; + if (trip_count >= stmt[stmt_num].loop_level.size()) + throw loop_error( + "incorrect loop level information for statement " + + to_string(stmt_num)); + } +} + +int Loop::get_last_dep_dim_before(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + level = stmt[stmt_num].loop_level.size() + 1; + + for (int i = level - 1; i >= 1; i--) + if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal) + return stmt[stmt_num].loop_level[i - 1].payload; + + return -1; +} + +void Loop::print_internal_loop_structure() const { + for (int i = 0; i < stmt.size(); i++) { + std::vector<int> lex = getLexicalOrder(i); + std::cout << "s" << i + 1 << ": "; + for (int j = 0; j < stmt[i].loop_level.size(); j++) { + if (2 * j < lex.size()) + std::cout << lex[2 * j]; + switch (stmt[i].loop_level[j].type) { + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; + } + std::cout << ' '; + } + for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { + std::cout << lex[j]; + if (j != lex.size() - 1) + std::cout << ' '; + } + std::cout << std::endl; + } +} + +void Loop::debugRelations() const { + const int m = stmt.size(); + { + std::vector<Relation> IS(m); + std::vector<Relation> xforms(m); + + for (int i = 0; i < m; i++) { + IS[i] = stmt[i].IS; + xforms[i] = stmt[i].xform; // const stucks + } + + printf("\nxforms:\n"); + for (int i = 0; i < m; i++) { xforms[i].print(); printf("\n"); } + printf("\nIS:\n"); + for (int i = 0; i < m; i++) { IS[i].print(); printf("\n"); } + fflush(stdout); + } +} + + +CG_outputRepr *Loop::getCode(int effort) const { + fprintf(stderr,"\nloop.cc Loop::getCode( effort %d )\n", effort ); + + const int m = stmt.size(); + if (m == 0) + return NULL; + const int n = stmt[0].xform.n_out(); + + if (last_compute_cg_ == NULL) { + fprintf(stderr, "Loop::getCode() last_compute_cg_ == NULL\n"); + + std::vector<Relation> IS(m); + std::vector<Relation> xforms(m); + for (int i = 0; i < m; i++) { + IS[i] = stmt[i].IS; + xforms[i] = stmt[i].xform; + } + + debugRelations(); + + + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + printf("\nknown:\n"); known.print(); printf("\n\n"); fflush(stdout); + + last_compute_cg_ = new CodeGen(xforms, IS, known); + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + } + else { + fprintf(stderr, "Loop::getCode() last_compute_cg_ NOT NULL\n"); + } + + + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { + delete last_compute_cgr_; + last_compute_cgr_ = last_compute_cg_->buildAST(effort); + last_compute_effort_ = effort; + } + + std::vector<CG_outputRepr *> stmts(m); + fprintf(stderr, "%d stmts\n", m); + for (int i = 0; i < m; i++) + stmts[i] = stmt[i].code; + CG_outputBuilder *ocg = ir->builder(); + + fprintf(stderr, "calling last_compute_cgr_->printRepr()\n"); + CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts, + uninterpreted_symbols); + + if (init_code != NULL) + repr = ocg->StmtListAppend(init_code->clone(), repr); + if (cleanup_code != NULL) + repr = ocg->StmtListAppend(repr, cleanup_code->clone()); + + fprintf(stderr,"\nloop.cc Loop::getCode( effort %d ) DONE\n", effort ); + return repr; +} + + + + +void Loop::printCode(int effort) const { + fprintf(stderr,"\nloop.cc Loop::printCode( effort %d )\n", effort ); + const int m = stmt.size(); + if (m == 0) + return; + const int n = stmt[0].xform.n_out(); + + if (last_compute_cg_ == NULL) { + fprintf(stderr, "Loop::printCode(), last_compute_cg_ == NULL\n"); + std::vector<Relation> IS(m); + std::vector<Relation> xforms(m); + for (int i = 0; i < m; i++) { + IS[i] = stmt[i].IS; + xforms[i] = stmt[i].xform; + } + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + + last_compute_cg_ = new CodeGen(xforms, IS, known); + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + } + else fprintf(stderr, "Loop::printCode(), last_compute_cg_ NOT NULL\n"); + + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { + delete last_compute_cgr_; + last_compute_cgr_ = last_compute_cg_->buildAST(effort); + last_compute_effort_ = effort; + } + + std::string repr = last_compute_cgr_->printString( + uninterpreted_symbols_stringrepr); + fprintf(stderr, "leaving Loop::printCode()\n"); + std::cout << repr << std::endl; +} + +void Loop::printIterationSpace() const { + for (int i = 0; i < stmt.size(); i++) { + std::cout << "s" << i << ": "; + Relation r = getNewIS(i); + for (int j = 1; j <= r.n_inp(); j++) + r.name_input_var(j, CodeGen::loop_var_name_prefix + to_string(j)); + r.setup_names(); + r.print(); + } +} + +void Loop::printDependenceGraph() const { + if (dep.edgeCount() == 0) + std::cout << "no dependence exists" << std::endl; + else { + std::cout << "dependence graph:" << std::endl; + std::cout << dep; + } +} + +std::vector<Relation> Loop::getNewIS() const { + const int m = stmt.size(); + + std::vector<Relation> new_IS(m); + for (int i = 0; i < m; i++) + new_IS[i] = getNewIS(i); + + return new_IS; +} + +// pragmas are tied to loops only ??? +void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePragmaAttribute(code, level, pragmaText); +} + + +/* + void Loop::prefetch(int stmt_num, int level, const std::string &arrName, const std::string &indexName, int offset, int hint) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePrefetchAttribute(code, level, arrName, indexName, int offset, hint); + } +*/ + +void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hint) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePrefetchAttribute(code, level, arrName, hint); +} + +std::vector<int> Loop::getLexicalOrder(int stmt_num) const { + assert(stmt_num < stmt.size()); + + const int n = stmt[stmt_num].xform.n_out(); + std::vector<int> lex(n, 0); + + for (int i = 0; i < n; i += 2) + lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); + + return lex; +} + +// find the sub loop nest specified by stmt_num and level, +// only iteration space satisfiable statements returned. +std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const { + assert(stmt_num >= 0 && stmt_num < stmt.size()); + assert(level > 0 && level <= stmt[stmt_num].loop_level.size()); + + std::set<int> working; + for (int i = 0; i < stmt.size(); i++) + if (const_cast<Loop *>(this)->stmt[i].IS.is_upper_bound_satisfiable() + && stmt[i].loop_level.size() >= level) + working.insert(i); + + for (int i = 1; i <= level; i++) { + int a = getLexicalOrder(stmt_num, i); + for (std::set<int>::iterator j = working.begin(); j != working.end();) { + int b = getLexicalOrder(*j, i); + if (b != a) + working.erase(j++); + else + ++j; + } + } + + return working; +} + +int Loop::getLexicalOrder(int stmt_num, int level) const { + assert(stmt_num >= 0 && stmt_num < stmt.size()); + assert(level > 0 && level <= stmt[stmt_num].loop_level.size()+1); + + Relation &r = const_cast<Loop *>(this)->stmt[stmt_num].xform; + for (EQ_Iterator e(r.single_conjunct()->EQs()); e; e++) + if (abs((*e).get_coef(r.output_var(2 * level - 1))) == 1) { + bool is_const = true; + for (Constr_Vars_Iter cvi(*e); cvi; cvi++) + if (cvi.curr_var() != r.output_var(2 * level - 1)) { + is_const = false; + break; + } + if (is_const) { + int t = static_cast<int>((*e).get_const()); + return (*e).get_coef(r.output_var(2 * level - 1)) > 0 ? -t : t; + } + } + + throw loop_error( + "can't find lexical order for statement " + to_string(stmt_num) + + "'s loop level " + to_string(level)); +} + +std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const { + const int m = stmt.size(); + + std::set<int> same_loops; + for (int i = 0; i < m; i++) { + if (dim < 0) + same_loops.insert(i); + else { + std::vector<int> a_lex = getLexicalOrder(i); + int j; + for (j = 0; j <= dim; j += 2) + if (lex[j] != a_lex[j]) + break; + if (j > dim) + same_loops.insert(i); + } + + } + + return same_loops; +} + +void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) { + const int m = stmt.size(); + + if (amount == 0) + return; + + for (int i = 0; i < m; i++) { + std::vector<int> lex2 = getLexicalOrder(i); + + bool need_shift = true; + + for (int j = 0; j < dim; j++) + if (lex2[j] != lex[j]) { + need_shift = false; + break; + } + + if (!need_shift) + continue; + + if (amount > 0) { + if (lex2[dim] < lex[dim]) + continue; + } else if (amount < 0) { + if (lex2[dim] > lex[dim]) + continue; + } + + assign_const(stmt[i].xform, dim, lex2[dim] + amount); + } +} + +std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active, + int level) { + + std::set<int> not_nested_at_this_level; + std::map<ir_tree_node*, std::set<int> > sorted_by_loop; + std::map<int, std::set<int> > sorted_by_lex_order; + std::vector<std::set<int> > to_return; + bool lex_order_already_set = false; + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + + if (stmt[*it].ir_stmt_node == NULL) + lex_order_already_set = true; + } + + if (lex_order_already_set) { + + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + std::map<int, std::set<int> >::iterator it2 = + sorted_by_lex_order.find( + get_const(stmt[*it].xform, 2 * (level - 1), + Output_Var)); + + if (it2 != sorted_by_lex_order.end()) + it2->second.insert(*it); + else { + + std::set<int> to_insert; + + to_insert.insert(*it); + + sorted_by_lex_order.insert( + std::pair<int, std::set<int> >( + get_const(stmt[*it].xform, 2 * (level - 1), + Output_Var), to_insert)); + + } + + } + + for (std::map<int, std::set<int> >::iterator it2 = + sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end(); + it2++) + to_return.push_back(it2->second); + + } else { + + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + + ir_tree_node* itn = stmt[*it].ir_stmt_node; + itn = itn->parent; + //while (itn->content->type() != IR_CONTROL_LOOP && itn != NULL) + // itn = itn->parent; + + while ((itn != NULL) && (itn->payload != level - 1)) { + itn = itn->parent; + while (itn != NULL && itn->content->type() != IR_CONTROL_LOOP ) + itn = itn->parent; + } + + if (itn == NULL) + not_nested_at_this_level.insert(*it); + else { + std::map<ir_tree_node*, std::set<int> >::iterator it2 = + sorted_by_loop.find(itn); + + if (it2 != sorted_by_loop.end()) + it2->second.insert(*it); + else { + std::set<int> to_insert; + + to_insert.insert(*it); + + sorted_by_loop.insert( + std::pair<ir_tree_node*, std::set<int> >(itn, + to_insert)); + + } + + } + + } + if (not_nested_at_this_level.size() > 0) { + for (std::set<int>::iterator it = not_nested_at_this_level.begin(); + it != not_nested_at_this_level.end(); it++) { + std::set<int> temp; + temp.insert(*it); + to_return.push_back(temp); + + } + } + for (std::map<ir_tree_node*, std::set<int> >::iterator it2 = + sorted_by_loop.begin(); it2 != sorted_by_loop.end(); it2++) + to_return.push_back(it2->second); + } + return to_return; +} + +void update_successors(int n, + int node_num[], + int cant_fuse_with[], + Graph<std::set<int>, bool> &g, + std::list<int> &work_list, + std::list<bool> &type_list, + std::vector<bool> types) { + + std::set<int> disconnect; + for (Graph<std::set<int>, bool>::EdgeList::iterator i = + g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) { + int m = i->first; + + if (node_num[m] != -1) + throw loop_error("Graph input for fusion has cycles not a DAG!!"); + + std::vector<bool> check_ = g.getEdge(n, m); + + bool has_bad_edge_path = false; + for (int i = 0; i < check_.size(); i++) + if (!check_[i]) { + has_bad_edge_path = true; + break; + } + if (!types[m]) { + cant_fuse_with[m] = std::max(cant_fuse_with[m], cant_fuse_with[n]); + } else { + if (has_bad_edge_path) + cant_fuse_with[m] = std::max(cant_fuse_with[m], node_num[n]); + else + cant_fuse_with[m] = std::max(cant_fuse_with[m], cant_fuse_with[n]); + } + disconnect.insert(m); + } + + + for (std::set<int>::iterator i = disconnect.begin(); i != disconnect.end(); + i++) { + g.disconnect(n, *i); + + bool no_incoming_edges = true; + for (int j = 0; j < g.vertex.size(); j++) + if (j != *i) + if (g.hasEdge(j, *i)) { + no_incoming_edges = false; + break; + } + + if (no_incoming_edges) { + work_list.push_back(*i); + type_list.push_back(types[*i]); + } + } +} + + + +int Loop::getMinLexValue(std::set<int> stmts, int level) { + + int min; + + std::set<int>::iterator it = stmts.begin(); + min = getLexicalOrder(*it, level); + + for (; it != stmts.end(); it++) { + int curr = getLexicalOrder(*it, level); + if (curr < min) + min = curr; + } + + return min; +} + + + + +Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level( + std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim) { + Graph<std::set<int>, bool> g; + + for (int i = 0; i < s.size(); i++) + g.insert(s[i]); + + for (int i = 0; i < s.size(); i++) { + + for (int j = i + 1; j < s.size(); j++) { + bool has_true_edge_i_to_j = false; + bool has_true_edge_j_to_i = false; + bool is_connected_i_to_j = false; + bool is_connected_j_to_i = false; + for (std::set<int>::iterator ii = s[i].begin(); ii != s[i].end(); + ii++) { + + for (std::set<int>::iterator jj = s[j].begin(); + jj != s[j].end(); jj++) { + + std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && dvs[k].has_been_carried_at(dep_dim))) { + + if (dvs[k].is_data_dependence() + && dvs[k].has_negative_been_carried_at( + dep_dim)) { + //g.connect(i, j, false); + is_connected_i_to_j = true; + break; + } else { + //g.connect(i, j, true); + + has_true_edge_i_to_j = true; + //break + } + } + + //if (is_connected) + + // break; + // if (has_true_edge_i_to_j && !is_connected_i_to_j) + // g.connect(i, j, true); + dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && dvs[k].has_been_carried_at(dep_dim))) { + + if (is_connected_i_to_j || has_true_edge_i_to_j) + throw loop_error( + "Graph input for fusion has cycles not a DAG!!"); + + if (dvs[k].is_data_dependence() + && dvs[k].has_negative_been_carried_at( + dep_dim)) { + //g.connect(i, j, false); + is_connected_j_to_i = true; + break; + } else { + //g.connect(i, j, true); + + has_true_edge_j_to_i = true; + //break; + } + } + + // if (is_connected) + //break; + // if (is_connected) + //break; + } + + //if (is_connected) + // break; + } + + + if (is_connected_i_to_j) + g.connect(i, j, false); + else if (has_true_edge_i_to_j) + g.connect(i, j, true); + + if (is_connected_j_to_i) + g.connect(j, i, false); + else if (has_true_edge_j_to_i) + g.connect(j, i, true); + + } + } + return g; +} + + + +std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g, + std::vector<bool> &types) { + + bool roots[g.vertex.size()]; + + for (int i = 0; i < g.vertex.size(); i++) + roots[i] = true; + + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + + if (g.hasEdge(i, j)) + roots[j] = false; + + if (g.hasEdge(j, i)) + roots[i] = false; + + } + + std::list<int> work_list; + std::list<bool> type_list; + int cant_fuse_with[g.vertex.size()]; + int fused = 0; + int lastfused = 0; + int lastnum = 0; + std::vector<std::set<int> > s; + //Each Fused set's representative node + + int node_to_fused_nodes[g.vertex.size()]; + int node_num[g.vertex.size()]; + int next[g.vertex.size()]; + + for (int i = 0; i < g.vertex.size(); i++) { + if (roots[i] == true) { + work_list.push_back(i); + type_list.push_back(types[i]); + } + cant_fuse_with[i] = 0; + node_to_fused_nodes[i] = 0; + node_num[i] = -1; + next[i] = 0; + } + + + // topological sort according to chun's permute algorithm + // std::vector<std::set<int> > s = g.topoSort(); + std::vector<std::set<int> > s2 = g.topoSort(); + if (work_list.empty() || (s2.size() != g.vertex.size())) { + + std::cout << s2.size() << "\t" << g.vertex.size() << std::endl; + throw loop_error("Input for fusion not a DAG!!"); + + + } + int fused_nodes_counter = 0; + while (!work_list.empty()) { + int n = work_list.front(); + bool type = type_list.front(); + //int n_ = g.vertex[n].first; + work_list.pop_front(); + type_list.pop_front(); + int node; + /*if (cant_fuse_with[n] == 0) + node = 0; + else + node = cant_fuse_with[n]; + */ + int p; + if (type) { + //if ((fused_nodes_counter != 0) && (node != fused_nodes_counter)) { + if (cant_fuse_with[n] == 0) + p = fused; + else + p = next[cant_fuse_with[n]]; + + if (p != 0) { + int rep_node = node_to_fused_nodes[p]; + node_num[n] = node_num[rep_node]; + + try { + update_successors(n, node_num, cant_fuse_with, g, work_list, + type_list, types); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + } + for (std::set<int>::iterator it = g.vertex[n].first.begin(); + it != g.vertex[n].first.end(); it++) + s[node_num[n] - 1].insert(*it); + } else { + //std::set<int> new_node; + //new_node.insert(n_); + s.push_back(g.vertex[n].first); + lastnum = lastnum + 1; + node_num[n] = lastnum; + node_to_fused_nodes[node_num[n]] = n; + + if (lastfused == 0) { + fused = lastnum; + lastfused = fused; + } else { + next[lastfused] = lastnum; + lastfused = lastnum; + + } + + try { + update_successors(n, node_num, cant_fuse_with, g, work_list, + type_list, types); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + } + fused_nodes_counter++; + } + + } else { + s.push_back(g.vertex[n].first); + lastnum = lastnum + 1; + node_num[n] = lastnum; + node_to_fused_nodes[node_num[n]] = n; + + try { + update_successors(n, node_num, cant_fuse_with, g, work_list, + type_list, types); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + } + //fused_nodes_counter++; + + } + + } + + return s; +} + + + + +void Loop::setLexicalOrder(int dim, const std::set<int> &active, + int starting_order, std::vector<std::vector<std::string> > idxNames) { + fprintf(stderr, "Loop::setLexicalOrder() %d idxNames active size %d starting_order %d\n", idxNames.size(), active.size(), starting_order); + if (active.size() == 0) + return; + + for (int i=0; i< idxNames.size(); i++) { + std::vector<std::string> what = idxNames[i]; + for (int j=0; j<what.size(); j++) { + fprintf(stderr, "%2d %2d %s\n", i,j, what[j].c_str()); + } + } + + // check for sanity of parameters + if (dim < 0 || dim % 2 != 0) + throw std::invalid_argument( + "invalid constant loop level to set lexicographical order"); + std::vector<int> lex; + int ref_stmt_num; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if ((*i) < 0 || (*i) >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (dim >= stmt[*i].xform.n_out()) + throw std::invalid_argument( + "invalid constant loop level to set lexicographical order"); + if (i == active.begin()) { + lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } else { + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 0; j < dim; j += 2) + if (lex[j] != lex2[j]) + throw std::invalid_argument( + "statements are not in the same sub loop nest"); + } + } + + // separate statements by current loop level types + int level = (dim + 2) / 2; + std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type; + std::set<int> active_by_no_level; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if (level > stmt[*i].loop_level.size()) + active_by_no_level.insert(*i); + else + active_by_level_type[std::make_pair( + stmt[*i].loop_level[level - 1].type, + stmt[*i].loop_level[level - 1].payload)].insert(*i); + } + + // further separate statements due to control dependences + std::vector<std::set<int> > active_by_level_type_splitted; + for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i = + active_by_level_type.begin(); i != active_by_level_type.end(); i++) + active_by_level_type_splitted.push_back(i->second); + for (std::set<int>::iterator i = active_by_no_level.begin(); + i != active_by_no_level.end(); i++) + for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) { + std::set<int> controlled, not_controlled; + for (std::set<int>::iterator k = + active_by_level_type_splitted[j].begin(); + k != active_by_level_type_splitted[j].end(); k++) { + std::vector<DependenceVector> dvs = dep.getEdge(*i, *k); + bool is_controlled = false; + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].type = DEP_CONTROL) { + is_controlled = true; + break; + } + if (is_controlled) + controlled.insert(*k); + else + not_controlled.insert(*k); + } + if (controlled.size() != 0 && not_controlled.size() != 0) { + active_by_level_type_splitted.erase( + active_by_level_type_splitted.begin() + j); + active_by_level_type_splitted.push_back(controlled); + active_by_level_type_splitted.push_back(not_controlled); + } + } + + // set lexical order separating loops with different loop types first + if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) { + int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + + Graph<std::set<int>, Empty> g; + for (std::vector<std::set<int> >::iterator i = + active_by_level_type_splitted.begin(); + i != active_by_level_type_splitted.end(); i++) + g.insert(*i); + for (std::set<int>::iterator i = active_by_no_level.begin(); + i != active_by_no_level.end(); i++) { + std::set<int> t; + t.insert(*i); + g.insert(t); + } + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + bool connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*ii, + *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(i, j); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*jj, + *ii); + // find the sub loop nest specified by stmt_num and level, + // only iteration space satisfiable statements returned. + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(j, i); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + } + + std::vector<std::set<int> > s = g.topoSort(); + if (s.size() != g.vertex.size()) + throw loop_error( + "cannot separate statements with different loop types at loop level " + + to_string(level)); + + // assign lexical order + int order = starting_order; + for (int i = 0; i < s.size(); i++) { + std::set<int> &cur_scc = g.vertex[*(s[i].begin())].first; + int sz = cur_scc.size(); + if (sz == 1) { + int cur_stmt = *(cur_scc.begin()); + assign_const(stmt[cur_stmt].xform, dim, order); + for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) + assign_const(stmt[cur_stmt].xform, j, 0); + order++; + } else { // recurse ! + fprintf(stderr, "Loop:setLexicalOrder() recursing\n"); + setLexicalOrder(dim, cur_scc, order, idxNames); + order += sz; + } + } + } + else { // set lexical order separating single iteration statements and loops + + std::set<int> true_singles; + std::set<int> nonsingles; + std::map<coef_t, std::set<int> > fake_singles; + std::set<int> fake_singles_; + + // sort out statements that do not require loops + for (std::set<int>::iterator i = active.begin(); i != active.end(); + i++) { + Relation cur_IS = getNewIS(*i); + if (is_single_iteration(cur_IS, dim + 1)) { + bool is_all_single = true; + for (int j = dim + 3; j < stmt[*i].xform.n_out(); j += 2) + if (!is_single_iteration(cur_IS, j)) { + is_all_single = false; + break; + } + if (is_all_single) + true_singles.insert(*i); + else { + fake_singles_.insert(*i); + try { + fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert( + *i); + } catch (const std::exception &e) { + fake_singles[posInfinity].insert(*i); + } + } + } else + nonsingles.insert(*i); + } + + + // split nonsingles forcibly according to negative dependences present (loop unfusible) + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + + if (dim < stmt[ref_stmt_num].xform.n_out() - 1) { + + bool dummy_level_found = false; + + std::vector<std::set<int> > s; + + s = sort_by_same_loops(active, level); + bool further_levels_exist = false; + + if (!idxNames.empty()) + if (level <= idxNames[ref_stmt_num].size()) + if (idxNames[ref_stmt_num][level - 1].length() == 0) { + // && s.size() == 1) { + int order1 = 0; + dummy_level_found = true; + + for (int i = level; i < idxNames[ref_stmt_num].size(); + i++) + if (idxNames[ref_stmt_num][i].length() > 0) + further_levels_exist = true; + + } + + //if (!dummy_level_found) { + + if (s.size() > 1) { + + std::vector<bool> types; + for (int i = 0; i < s.size(); i++) + types.push_back(true); + + Graph<std::set<int>, bool> g = construct_induced_graph_at_level( + s, dep, dep_dim); + s = typed_fusion(g, types); + } + int order = starting_order; + for (int i = 0; i < s.size(); i++) { + + for (std::set<int>::iterator it = s[i].begin(); + it != s[i].end(); it++) { + assign_const(stmt[*it].xform, dim, order); + stmt[*it].xform.simplify(); + } + + if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1)) { // recurse ! + fprintf(stderr, "Loop:setLexicalOrder() recursing\n"); + setLexicalOrder(dim + 2, s[i], order, idxNames); + } + + order++; + } + //} + /* else { + + int order1 = 0; + int order = 0; + for (std::set<int>::iterator i = active.begin(); + i != active.end(); i++) { + if (!further_levels_exist) + assign_const(stmt[*i].xform, dim, order1++); + else + assign_const(stmt[*i].xform, dim, order1); + + } + + if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1) && further_levels_exist) + setLexicalOrder(dim + 2, active, order, idxNames); + } + */ + } else { + int dummy_order = 0; + for (std::set<int>::iterator i = active.begin(); i != active.end(); + i++) { + assign_const(stmt[*i].xform, dim, dummy_order++); + stmt[*i].xform.simplify(); + } + } + /*for (int i = 0; i < g2.vertex.size(); i++) + for (int j = i+1; j < g2.vertex.size(); j++) { + std::vector<DependenceVector> dvs = dep.getEdge(g2.vertex[i].first, g2.vertex[j].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { + g2.connect(i, j); + break; + } + dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { + g2.connect(j, i); + break; + } + } + + std::vector<std::set<int> > s2 = g2.packed_topoSort(); + + std::vector<std::set<int> > splitted_nonsingles; + for (int i = 0; i < s2.size(); i++) { + std::set<int> cur_scc; + for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) + cur_scc.insert(g2.vertex[*j].first); + splitted_nonsingles.push_back(cur_scc); + } + */ + //convert to dependence graph for grouped statements + //dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + /*int order = 0; + for (std::set<int>::iterator j = active.begin(); j != active.end(); + j++) { + std::set<int> continuous; + std::cout<< active.size()<<std::endl; + while (nonsingles.find(*j) != nonsingles.end() && j != active.end()) { + continuous.insert(*j); + j++; + } + + printf("continuous size is %d\n", continuous.size()); + + + + if (continuous.size() > 0) { + std::vector<std::set<int> > s = typed_fusion(continuous, dep, + dep_dim); + + for (int i = 0; i < s.size(); i++) { + for (std::set<int>::iterator l = s[i].begin(); + l != s[i].end(); l++) { + assign_const(stmt[*l].xform, dim + 2, order); + setLexicalOrder(dim + 2, s[i]); + } + order++; + } + } + + if (j != active.end()) { + assign_const(stmt[*j].xform, dim + 2, order); + + for (int k = dim + 4; k < stmt[*j].xform.n_out(); k += 2) + assign_const(stmt[*j].xform, k, 0); + order++; + } + + if( j == active.end()) + break; + } + */ + + + // assign lexical order + /*int order = starting_order; + for (int i = 0; i < s.size(); i++) { + // translate each SCC into original statements + std::set<int> cur_scc; + for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) + copy(s[i].begin(), s[i].end(), + inserter(cur_scc, cur_scc.begin())); + + // now assign the constant + for (std::set<int>::iterator j = cur_scc.begin(); + j != cur_scc.end(); j++) + assign_const(stmt[*j].xform, dim, order); + + if (cur_scc.size() > 1) + setLexicalOrder(dim + 2, cur_scc); + else if (cur_scc.size() == 1) { + int cur_stmt = *(cur_scc.begin()); + for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) + assign_const(stmt[cur_stmt].xform, j, 0); + } + + if (cur_scc.size() > 0) + order++; + } + */ + } + + fprintf(stderr, "LEAVING Loop::setLexicalOrder() %d idxNames\n", idxNames.size()); + for (int i=0; i< idxNames.size(); i++) { + std::vector<std::string> what = idxNames[i]; + for (int j=0; j<what.size(); j++) { + fprintf(stderr, "%2d %2d %s\n", i,j, what[j].c_str()); + } + } +} + + + +void Loop::apply_xform() { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + apply_xform(active); +} + +void Loop::apply_xform(int stmt_num) { + fprintf(stderr, "apply_xform( %d )\n", stmt_num); + std::set<int> active; + active.insert(stmt_num); + apply_xform(active); +} + +void Loop::apply_xform(std::set<int> &active) { + fflush(stdout); + fprintf(stderr, "loop.cc apply_xform( set )\n"); + + int max_n = 0; + + omega::CG_outputBuilder *ocg = ir->builder(); + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].loop_level.size(); + if (n > max_n) + max_n = n; + + std::vector<int> lex = getLexicalOrder(*i); + + omega::Relation mapping(2 * n + 1, n); + omega::F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + omega::EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + mapping = omega::Composition(mapping, stmt[*i].xform); + mapping.simplify(); + + // match omega input/output variables to variable names in the code + for (int j = 1; j <= stmt[*i].IS.n_set(); j++) + mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); + for (int j = 1; j <= n; j++) + mapping.name_output_var(j, + tmp_loop_var_name_prefix + + omega::to_string( + tmp_loop_var_name_counter + j - 1)); + mapping.setup_names(); + mapping.print(); // "{[I] -> [_t1] : I = _t1 } + fflush(stdout); + + omega::Relation known = Extend_Set(copy(this->known), + mapping.n_out() - this->known.n_set()); + //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL)); + + omega::CG_outputBuilder *ocgr = ir->builder(); + + + //this is probably CG_chillBuilder; + + omega::CG_stringBuilder *ocgs = new omega::CG_stringBuilder; + if (uninterpreted_symbols[*i].size() == 0) { + + + std::set<std::string> globals; + + for (omega::DNF_Iterator di(stmt[*i].IS.query_DNF()); di; di++) { + + for (omega::Constraint_Iterator e(*di); e; e++) { + for (omega::Constr_Vars_Iter cvi(*e); cvi; cvi++) { + omega::Variable_ID v = cvi.curr_var(); + if (v->kind() == omega::Global_Var + && v->get_global_var()->arity() > 0 + && globals.find(v->name()) == globals.end()) { + omega::Global_Var_ID g = v->get_global_var(); + globals.insert(v->name()); + std::vector<omega::CG_outputRepr *> reprs; + std::vector<omega::CG_outputRepr *> reprs2; + + for (int l = 1; l <= g->arity(); l++) { + omega::CG_outputRepr *temp = ocgr->CreateIdent( + stmt[*i].IS.set_var(l)->name()); + omega::CG_outputRepr *temp2 = ocgs->CreateIdent( + stmt[*i].IS.set_var(l)->name()); + + reprs.push_back(temp); + reprs2.push_back(temp2); + } + uninterpreted_symbols[*i].insert( + std::pair<std::string, + std::vector<omega::CG_outputRepr *> >( + v->get_global_var()->base_name(), + reprs)); + uninterpreted_symbols_stringrepr[*i].insert( + std::pair<std::string, + std::vector<omega::CG_outputRepr *> >( + v->get_global_var()->base_name(), + reprs2)); + } + } + } + } + } + + std::vector<std::string> loop_vars; + for (int j = 1; j <= stmt[*i].IS.n_set(); j++) { + loop_vars.push_back(stmt[*i].IS.set_var(j)->name()); + } + for (int j = 0; j<loop_vars.size(); j++) { + fprintf(stderr, "loop vars %d %s\n", j, loop_vars[j].c_str()); + } + std::vector<CG_outputRepr *> subs = output_substitutions(ocg, + Inverse(copy(mapping)), + std::vector<std::pair<CG_outputRepr *, int> >( + mapping.n_out(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), 0)), + uninterpreted_symbols[*i]); + + std::vector<CG_outputRepr *> subs2; + for (int l = 0; l < subs.size(); l++) + subs2.push_back(subs[l]->clone()); + + fprintf(stderr, "%d uninterpreted symbols\n", (int)uninterpreted_symbols.size()); + for (int j = 0; j<loop_vars.size(); j++) { + fprintf(stderr, "loop vars %d %s\n", j, loop_vars[j].c_str()); + } + + + int count = 0; + for (std::map<std::string, std::vector<CG_outputRepr *> >::iterator it = + uninterpreted_symbols[*i].begin(); + it != uninterpreted_symbols[*i].end(); it++) { + fprintf(stderr, "\ncount %d\n", count); + + std::vector<CG_outputRepr *> reprs_ = it->second; + fprintf(stderr, "%d reprs_\n", (int)reprs_.size()); + + std::vector<CG_outputRepr *> reprs_2; + for (int k = 0; k < reprs_.size(); k++) { + fprintf(stderr, "k %d\n", k); + std::vector<CG_outputRepr *> subs; + for (int l = 0; l < subs2.size(); l++) { + fprintf(stderr, "l %d\n", l); + subs.push_back(subs2[l]->clone()); + } + + fprintf(stderr, "clone\n"); + CG_outputRepr *c = reprs_[k]->clone(); + c->dump(); fflush(stdout); + + fprintf(stderr, "createsub\n"); + CG_outputRepr *s = ocgr->CreateSubstitutedStmt(0, c, + loop_vars, subs, true); + + fprintf(stderr, "push back\n"); + reprs_2.push_back( s ); + + } + + it->second = reprs_2; + count++; + fprintf(stderr, "bottom\n"); + } + + std::vector<CG_outputRepr *> subs3 = output_substitutions( + ocgs, Inverse(copy(mapping)), + std::vector<std::pair<CG_outputRepr *, int> >( + mapping.n_out(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), 0)), + uninterpreted_symbols_stringrepr[*i]); + + for (std::map<std::string, std::vector<CG_outputRepr *> >::iterator it = + uninterpreted_symbols_stringrepr[*i].begin(); + it != uninterpreted_symbols_stringrepr[*i].end(); it++) { + + std::vector<CG_outputRepr *> reprs_ = it->second; + std::vector<CG_outputRepr *> reprs_2; + for (int k = 0; k < reprs_.size(); k++) { + std::vector<CG_outputRepr *> subs; + /* for (int l = 0; l < subs3.size(); l++) + subs.push_back(subs3[l]->clone()); + reprs_2.push_back( + ocgs->CreateSubstitutedStmt(0, reprs_[k]->clone(), + loop_vars, subs)); + */ + reprs_2.push_back(subs3[k]->clone()); + } + + it->second = reprs_2; + + } + + + fprintf(stderr, "loop.cc stmt[*i].code =\n"); + //stmt[*i].code->dump(); + //fprintf(stderr, "\n"); + stmt[*i].code = ocg->CreateSubstitutedStmt(0, stmt[*i].code, loop_vars, + subs); + //fprintf(stderr, "loop.cc substituted code =\n"); + //stmt[*i].code->dump(); + //fprintf(stderr, "\n"); + + stmt[*i].IS = omega::Range(Restrict_Domain(mapping, stmt[*i].IS)); + stmt[*i].IS.simplify(); + + // replace original transformation relation with straight 1-1 mapping + //fprintf(stderr, "replace original transformation relation with straight 1-1 mapping\n"); + mapping = Relation(n, 2 * n + 1); + f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 1; j <= 2 * n + 1; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_const(-lex[j - 1]); + } + stmt[*i].xform = mapping; + + //fprintf(stderr, "\ncode is: \n"); + //stmt[*i].code->dump(); + //fprintf(stderr, "\n\n"); + + } + + tmp_loop_var_name_counter += max_n; + fflush(stdout); + fprintf(stderr, "loop.cc LEAVING apply_xform( set )\n\n"); + //for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + // fprintf(stderr, "\nloop.cc stmt[i].code =\n"); + // stmt[*i].code->dump(); + // fprintf(stderr, "\n\n"); + //} + +} + + + + +void Loop::addKnown(const Relation &cond) { + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + fprintf(stderr, "Loop::addKnown(), SETTING last_compute_cg_ = NULL\n"); + + int n1 = this->known.n_set(); + + Relation r = copy(cond); + int n2 = r.n_set(); + + if (n1 < n2) + this->known = Extend_Set(this->known, n2 - n1); + else if (n1 > n2) + r = Extend_Set(r, n1 - n2); + + this->known = Intersection(this->known, r); +} + +void Loop::removeDependence(int stmt_num_from, int stmt_num_to) { + // check for sanity of parameters + if (stmt_num_from >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num_from)); + if (stmt_num_to >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num_to)); + + dep.disconnect(stmt_num_from, stmt_num_to); +} + +void Loop::dump() const { + for (int i = 0; i < stmt.size(); i++) { + std::vector<int> lex = getLexicalOrder(i); + std::cout << "s" << i + 1 << ": "; + for (int j = 0; j < stmt[i].loop_level.size(); j++) { + if (2 * j < lex.size()) + std::cout << lex[2 * j]; + switch (stmt[i].loop_level[j].type) { + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; + } + std::cout << ' '; + } + for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { + std::cout << lex[j]; + if (j != lex.size() - 1) + std::cout << ' '; + } + std::cout << std::endl; + } +} + +bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { + if (stmt.size() == 0) + return true; + + // check for sanity of parameters + for (int i = 0; i < stmt.size(); i++) { + if (stmt[i].loop_level.size() != num_dep_dim) + throw std::invalid_argument( + "nonsingular loop transformations must be applied to original perfect loop nest"); + for (int j = 0; j < stmt[i].loop_level.size(); j++) + if (stmt[i].loop_level[j].type != LoopLevelOriginal) + throw std::invalid_argument( + "nonsingular loop transformations must be applied to original perfect loop nest"); + } + if (T.size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + for (int i = 0; i < stmt.size(); i++) + if (T[i].size() != num_dep_dim + 1 && T[i].size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + fprintf(stderr, "Loop::nonsingular(), SETTING last_compute_cg_ = NULL\n"); + + // build relation from matrix + Relation mapping(2 * num_dep_dim + 1, 2 * num_dep_dim + 1); + F_And *f_root = mapping.add_and(); + for (int i = 0; i < num_dep_dim; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * (i + 1)), -1); + for (int j = 0; j < num_dep_dim; j++) + if (T[i][j] != 0) + h.update_coef(mapping.input_var(2 * (j + 1)), T[i][j]); + if (T[i].size() == num_dep_dim + 1) + h.update_const(T[i][num_dep_dim]); + } + for (int i = 1; i <= 2 * num_dep_dim + 1; i += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), -1); + h.update_coef(mapping.input_var(i), 1); + } + + // update transformation relations + for (int i = 0; i < stmt.size(); i++) + stmt[i].xform = Composition(copy(mapping), stmt[i].xform); + + // update dependence graph + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + switch (dv.type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(num_dep_dim), ubounds( + num_dep_dim); + for (int p = 0; p < num_dep_dim; p++) { + coef_t lb = 0; + coef_t ub = 0; + for (int q = 0; q < num_dep_dim; q++) { + if (T[p][q] > 0) { + if (lb == -posInfinity + || dv.lbounds[q] == -posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.lbounds[q]; + if (ub == posInfinity + || dv.ubounds[q] == posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.ubounds[q]; + } else if (T[p][q] < 0) { + if (lb == -posInfinity + || dv.ubounds[q] == posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.ubounds[q]; + if (ub == posInfinity + || dv.lbounds[q] == -posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.lbounds[q]; + } + } + if (T[p].size() == num_dep_dim + 1) { + if (lb != -posInfinity) + lb += T[p][num_dep_dim]; + if (ub != posInfinity) + ub += T[p][num_dep_dim]; + } + lbounds[p] = lb; + ubounds[p] = ub; + } + dv.lbounds = lbounds; + dv.ubounds = ubounds; + + break; + } + default: + ; + } + } + j->second = dvs; + } + + // set constant loop values + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + setLexicalOrder(0, active); + + return true; +} + + +bool Loop::is_dependence_valid_based_on_lex_order(int i, int j, + const DependenceVector &dv, bool before) { + std::vector<int> lex_i = getLexicalOrder(i); + std::vector<int> lex_j = getLexicalOrder(j); + int last_dim; + if (!dv.is_scalar_dependence) { + for (last_dim = 0; + last_dim < lex_i.size() && (lex_i[last_dim] == lex_j[last_dim]); + last_dim++) + ; + last_dim = last_dim / 2; + if (last_dim == 0) + return true; + + for (int i = 0; i < last_dim; i++) { + if (dv.lbounds[i] > 0) + return true; + else if (dv.lbounds[i] < 0) + return false; + } + } + if (before) + return true; + + return false; + +} + +// Manu:: reduction operation + +void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels, + std::string arrName, int memory_type, int padding_alignment, + int assign_then_accumulate, int padding_stride) { + + //std::cout << "In scalar_expand function: " << stmt_num << ", " << arrName << "\n"; + //std::cout.flush(); + + //fprintf(stderr, "\n%d statements\n", stmt.size()); + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "%2d ", i); + // ((CG_chillRepr *)stmt[i].code)->Dump(); + //} + //fprintf(stderr, "\n"); + + // check for sanity of parameters + bool found_non_constant_size_dimension = false; + + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num)); + //Anand: adding check for privatized levels + //if (arrName != "RHS") + // throw std::invalid_argument( + // "invalid 3rd argument: only 'RHS' supported " + arrName); + for (int i = 0; i < levels.size(); i++) { + if (levels[i] <= 0 || levels[i] > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument( + "1invalid loop level " + to_string(levels[i])); + + if (i > 0) { + if (levels[i] < levels[i - 1]) + throw std::invalid_argument( + "loop levels must be in ascending order"); + } + } + //end --adding check for privatized levels + + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + fprintf(stderr, "Loop::scalar_expand(), SETTING last_compute_cg_ = NULL\n"); + + fprintf(stderr, "\nloop.cc finding array accesses in stmt %d of the code\n",stmt_num ); + std::vector<IR_ArrayRef *> access = ir->FindArrayRef(stmt[stmt_num].code); + fprintf(stderr, "loop.cc L2726 %d access\n", access.size()); + + IR_ArraySymbol *sym = NULL; + fprintf(stderr, "arrName %s\n", arrName.c_str()); + if (arrName == "RHS") { + fprintf(stderr, "sym RHS\n"); + sym = access[0]->symbol(); + } + else { + fprintf(stderr, "looking for array %s in access\n", arrName.c_str()); + for (int k = 0; k < access.size(); k++) { // BUH + + //fprintf(stderr, "access[%d] = %s ", k, access[k]->getTypeString()); access[k]->print(0,stderr); fprintf(stderr, "\n"); + + std::string name = access[k]->symbol()->name(); + //fprintf(stderr, "comparing %s to %s\n", name.c_str(), arrName.c_str()); + + if (access[k]->symbol()->name() == arrName) { + fprintf(stderr, "found it sym access[ k=%d ]\n", k); + sym = access[k]->symbol(); + } + } + } + if (!sym) fprintf(stderr, "DIDN'T FIND IT\n"); + fprintf(stderr, "sym %p\n", sym); + + // collect array references by name + std::vector<int> lex = getLexicalOrder(stmt_num); + int dim = 2 * levels[levels.size() - 1] - 1; + std::set<int> same_loop = getStatements(lex, dim - 1); + + //Anand: shifting this down + // assign_const(stmt[newStmt_num].xform, 2*level+1, 1); + + // std::cout << " before temp array name \n "; + // create a temporary variable + IR_Symbol *tmp_sym; + + // get the loop upperbound, that would be the size of the temp array. + omega::coef_t lb[levels.size()], ub[levels.size()], size[levels.size()]; + + //Anand Adding apply xform so that tiled loop bounds are reflected + fprintf(stderr, "Adding apply xform so that tiled loop bounds are reflected\n"); + apply_xform(same_loop); + fprintf(stderr, "loop.cc, back from apply_xform()\n"); + + //Anand commenting out the folowing 4 lines + /* copy(stmt[stmt_num].IS).query_variable_bounds( + copy(stmt[stmt_num].IS).set_var(level), lb, ub); + std::cout << "Upper Bound = " << ub << "\n"; + std::cout << "lower Bound = " << lb << "\n"; + */ + // testing testing -- Manu //////////////////////////////////////////////// + /* + // int n_dim = sym->n_dim(); + // std::cout << "------- n_dim ----------- " << n_dim << "\n"; + std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(stmt[stmt_num].IS, stmt[stmt_num].IS.set_var(level)); + omega::coef_t index_stride; + if (result.second != NULL) { + index_stride = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(stmt[stmt_num].IS.set_var(level)))); + std::cout << "simplest_stride :: " << index_stride << ", " << result.first.get_coef(result.second) << ", " << result.first.get_coef(stmt[stmt_num].IS.set_var(level))<< "\n"; + } + Relation bound; + // bound = get_loop_bound(stmt[stmt_num].IS, level); + bound = SimpleHull(stmt[stmt_num].IS,true, true); + bound.print(); + + bound = copy(stmt[stmt_num].IS); + for (int i = 1; i < level; i++) { + bound = Project(bound, i, Set_Var); + std::cout << "-------------------------------\n"; + bound.print(); + } + + bound.simplify(); + bound.print(); + // bound = get_loop_bound(bound, level); + + copy(bound).query_variable_bounds(copy(bound).set_var(level), lb, ub); + std::cout << "Upper Bound = " << ub << "\n"; + std::cout << "lower Bound = " << lb << "\n"; + + result = find_simplest_stride(bound, bound.set_var(level)); + if (result.second != NULL) + index_stride = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level)))); + else + index_stride = 1; + std::cout << "simplest_stride 11:: " << index_stride << "\n"; + */ + //////////////////////////////////////////////////////////////////////////////// + ///////////////////////////// copied datacopy code here ///////////////////////////////////////////// + + //std::cout << "In scalar_expand function 2: " << stmt_num << ", " << arrName << "\n"; + //std::cout.flush(); + + //fprintf(stderr, "\n%d statements\n", stmt.size()); + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "%2d ", i); + // ((CG_chillRepr *)stmt[i].code)->Dump(); + //} + //fprintf(stderr, "\n"); + + + + int n_dim = levels.size(); + Relation copy_is = copy(stmt[stmt_num].IS); + // extract temporary array information + CG_outputBuilder *ocg1 = ir->builder(); + std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL + std::vector<coef_t> index_stride(n_dim); + std::vector<bool> is_index_eq(n_dim, false); + std::vector<std::pair<int, CG_outputRepr *> > index_sz(0); + Relation reduced_copy_is = copy(copy_is); + std::vector<CG_outputRepr *> size_repr; + std::vector<int> size_int; + Relation xform = copy(stmt[stmt_num].xform); + for (int i = 0; i < n_dim; i++) { + + dim = 2 * levels[i] - 1; + //Anand: Commenting out the lines below: not required + // if (i != 0) + // reduced_copy_is = Project(reduced_copy_is, level - 1 + i, Set_Var); + Relation bound = get_loop_bound(copy(reduced_copy_is), levels[i] - 1); + + // extract stride + std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, + bound.set_var(levels[i])); + if (result.second != NULL) + index_stride[i] = abs(result.first.get_coef(result.second)) + / gcd(abs(result.first.get_coef(result.second)), + abs( + result.first.get_coef( + bound.set_var(levels[i])))); + else + index_stride[i] = 1; + // std::cout << "simplest_stride 11:: " << index_stride[i] << "\n"; + + // check if this array index requires loop + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (EQ_Iterator ei(c->EQs()); ei; ei++) { + if ((*ei).has_wildcards()) + continue; + + int coef = (*ei).get_coef(bound.set_var(levels[i])); + if (coef != 0) { + int sign = 1; + if (coef < 0) { + coef = -coef; + sign = -1; + } + + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*ei); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + if ((*ci).var != bound.set_var(levels[i])) + if ((*ci).coef * sign == 1) + op = ocg1->CreateMinus(op, + ocg1->CreateIdent((*ci).var->name())); + else if ((*ci).coef * sign == -1) + op = ocg1->CreatePlus(op, + ocg1->CreateIdent((*ci).var->name())); + else if ((*ci).coef * sign > 1) { + op = ocg1->CreateMinus(op, + ocg1->CreateTimes( + ocg1->CreateInt( + abs((*ci).coef)), + ocg1->CreateIdent( + (*ci).var->name()))); + } + else + // (*ci).coef*sign < -1 + op = ocg1->CreatePlus(op, + ocg1->CreateTimes( + ocg1->CreateInt( + abs((*ci).coef)), + ocg1->CreateIdent( + (*ci).var->name()))); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef * sign == 1) + op = ocg1->CreateMinus(op, + ocg1->CreateIdent(g->base_name())); + else if ((*ci).coef * sign == -1) + op = ocg1->CreatePlus(op, + ocg1->CreateIdent(g->base_name())); + else if ((*ci).coef * sign > 1) + op = ocg1->CreateMinus(op, + ocg1->CreateTimes( + ocg1->CreateInt(abs((*ci).coef)), + ocg1->CreateIdent(g->base_name()))); + else + // (*ci).coef*sign < -1 + op = ocg1->CreatePlus(op, + ocg1->CreateTimes( + ocg1->CreateInt(abs((*ci).coef)), + ocg1->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("unsupported array index expression"); + } + } + if ((*ei).get_const() != 0) + op = ocg1->CreatePlus(op, + ocg1->CreateInt(-sign * ((*ei).get_const()))); + if (coef != 1) + op = ocg1->CreateIntegerFloor(op, ocg1->CreateInt(coef)); + + index_lb[i] = op; + is_index_eq[i] = true; + break; + } + } + if (is_index_eq[i]) + continue; + + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + std::set<Variable_ID> excluded_floor_vars; + excluded_floor_vars.insert(bound.set_var(levels[i])); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(levels[i])); + if (coef != 0 && (*gi).has_wildcards()) { + bool clean_bound = true; + GEQ_Handle h; + for (Constr_Vars_Iter cvi(*gi, true); gi; gi++) + if (!find_floor_definition(bound, (*cvi).var, + excluded_floor_vars).first) { + clean_bound = false; + break; + } + else + h= find_floor_definition(bound, (*cvi).var, + excluded_floor_vars).second; + + if (!clean_bound) + continue; + else{ + if (coef > 0) + lb_list.push_back(h); + else if (coef < 0) + ub_list.push_back(h); + continue; + } + + } + + if (coef > 0) + lb_list.push_back(*gi); + else if (coef < 0) + ub_list.push_back(*gi); + } + if (lb_list.size() == 0 || ub_list.size() == 0) + throw loop_error("failed to calcuate array footprint size"); + + // build lower bound representation + std::vector<CG_outputRepr *> lb_repr_list; + /* for (int j = 0; j < lb_list.size(); j++){ + if(this->known.n_set() == 0) + lb_repr_list.push_back(output_lower_bound_repr(ocg1, lb_list[j], bound.set_var(level-1+i+1), result.first, result.second, bound, Relation::True(bound.n_set()), std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)))); + else + lb_repr_list.push_back(output_lower_bound_repr(ocg1, lb_list[j], bound.set_var(level-1+i+1), result.first, result.second, bound, this->known, std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)))); + + } + */ + if (lb_repr_list.size() > 1) + index_lb[i] = ocg1->CreateInvoke("max", lb_repr_list); + else if (lb_repr_list.size() == 1) + index_lb[i] = lb_repr_list[0]; + + // build temporary array size representation + { + Relation cal(copy_is.n_set(), 1); + F_And *f_root = cal.add_and(); + for (int j = 0; j < ub_list.size(); j++) + for (int k = 0; k < lb_list.size(); k++) { + GEQ_Handle h = f_root->add_GEQ(); + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error( + "cannot calculate temporay array size statically"); + } + } + h.update_const(ub_list[j].get_const()); + + for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error( + "cannot calculate temporay array size statically"); + } + } + h.update_const(lb_list[k].get_const()); + + h.update_const(1); + h.update_coef(cal.output_var(1), -1); + } + + cal = Restrict_Domain(cal, copy(copy_is)); + for (int j = 1; j <= cal.n_inp(); j++) { + cal = Project(cal, j, Input_Var); + } + cal.simplify(); + + // pad temporary array size + // TODO: for variable array size, create padding formula + //int padding_stride = 0; + Conjunct *c = cal.query_DNF()->single_conjunct(); + bool is_index_bound_const = false; + if (padding_stride != 0 && i == n_dim - 1) { + //size = (size + index_stride[i] - 1) / index_stride[i]; + size_repr.push_back(ocg1->CreateInt(padding_stride)); + } else { + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; + gi++) + if ((*gi).is_const(cal.output_var(1))) { + coef_t size = (*gi).get_const() + / (-(*gi).get_coef(cal.output_var(1))); + + if (padding_alignment > 1 && i == n_dim - 1) { // align to boundary for data packing + int residue = size % padding_alignment; + if (residue) + size = size + padding_alignment - residue; + } + + index_sz.push_back( + std::make_pair(i, ocg1->CreateInt(size))); + is_index_bound_const = true; + size_int.push_back(size); + size_repr.push_back(ocg1->CreateInt(size)); + + // std::cout << "============================== size :: " + // << size << "\n"; + + } + + if (!is_index_bound_const) { + + found_non_constant_size_dimension = true; + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); + gi && !is_index_bound_const; gi++) { + int coef = (*gi).get_coef(bound.set_var(levels[i])); + if (coef < 0) { + + size_repr.push_back( + ocg1->CreatePlus( + output_upper_bound_repr(ocg1, *gi, + bound.set_var(levels[i]), + bound, + std::vector< + std::pair< + CG_outputRepr *, + int> >( + bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0)), + uninterpreted_symbols[stmt_num]), + ocg1->CreateInt(1))); + + /*CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*gi); ci; ci++) { + if ((*ci).var != cal.output_var(1)) { + switch ((*ci).var->kind()) { + case Global_Var: { + Global_Var_ID g = + (*ci).var->get_global_var(); + if ((*ci).coef == 1) + op = ocg1->CreatePlus(op, + ocg1->CreateIdent( + g->base_name())); + else if ((*ci).coef == -1) + op = ocg1->CreateMinus(op, + ocg1->CreateIdent( + g->base_name())); + else if ((*ci).coef > 1) + op = + ocg1->CreatePlus(op, + ocg1->CreateTimes( + ocg1->CreateInt( + (*ci).coef), + ocg1->CreateIdent( + g->base_name()))); + else + // (*ci).coef < -1 + op = + ocg1->CreateMinus(op, + ocg1->CreateTimes( + ocg1->CreateInt( + -(*ci).coef), + ocg1->CreateIdent( + g->base_name()))); + break; + } + default: + throw loop_error( + "failed to generate array index bound code"); + } + } + } + int c = (*gi).get_const(); + if (c > 0) + op = ocg1->CreatePlus(op, ocg1->CreateInt(c)); + else if (c < 0) + op = ocg1->CreateMinus(op, ocg1->CreateInt(-c)); + */ + /* if (padding_stride != 0) { + if (i == fastest_changing_dimension) { + coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[i] / g; + if (t1 != 1) + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + op = ocg->CreateTimes(op, ocg->CreateInt(t2)); + } + else if (index_stride[i] != 1) { + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); + } + } + */ + //index_sz.push_back(std::make_pair(i, op)); + //break; + } + } + } + } + } + //size[i] = ub[i]; + + } + ///////////////////////////////////////////////////////////////////////////////////////////////////// + // + + //Anand: Creating IS of new statement + + //for(int l = dim; l < stmt[stmt_num].xform.n_out(); l+=2) + //std::cout << "In scalar_expand function 3: " << stmt_num << ", " << arrName << "\n"; + //std::cout.flush(); + + //fprintf(stderr, "\n%d statements\n", stmt.size()); + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "%2d ", i); + // ((CG_chillRepr *)stmt[i].code)->Dump(); + //} + //fprintf(stderr, "\n"); + + + shiftLexicalOrder(lex, dim + 1, 1); + Statement s = stmt[stmt_num]; + s.ir_stmt_node = NULL; + int newStmt_num = stmt.size(); + + fprintf(stderr, "loop.cc L3249 adding stmt %d\n", stmt.size()); + stmt.push_back(s); + + fprintf(stderr, "uninterpreted_symbols.push_back() newStmt_num %d\n", newStmt_num); + uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); + uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]); + stmt[newStmt_num].code = stmt[stmt_num].code->clone(); + stmt[newStmt_num].IS = copy(stmt[stmt_num].IS); + stmt[newStmt_num].xform = xform; + stmt[newStmt_num].reduction = stmt[stmt_num].reduction; + stmt[newStmt_num].reductionOp = stmt[stmt_num].reductionOp; + + + //fprintf(stderr, "\nafter clone, %d statements\n", stmt.size()); + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "%2d ", i); + // ((CG_chillRepr *)stmt[i].code)->Dump(); + //} + //fprintf(stderr, "\n"); + + + + //assign_const(stmt[newStmt_num].xform, stmt[stmt_num].xform.n_out(), 1);//Anand: change from 2*level + 1 to stmt[stmt_num].xform.size() + //Anand-End creating IS of new statement + + CG_outputRepr * tmpArrSz; + CG_outputBuilder *ocg = ir->builder(); + + //for(int k =0; k < levels.size(); k++ ) + // size_repr.push_back(ocg->CreateInt(size[k]));//Anand: copying apply_xform functionality to prevent IS modification + //due to side effects with uninterpreted function symbols and failures in omega + + //int n = stmt[stmt_num].loop_level.size(); + + /*Relation mapping(2 * n + 1, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + mapping = Composition(mapping, copy(stmt[stmt_num].xform)); + mapping.simplify(); + + // match omega input/output variables to variable names in the code + for (int j = 1; j <= stmt[stmt_num].IS.n_set(); j++) + mapping.name_input_var(j, stmt[stmt_num].IS.set_var(j)->name()); + for (int j = 1; j <= n; j++) + mapping.name_output_var(j, + tmp_loop_var_name_prefix + + to_string(tmp_loop_var_name_counter + j - 1)); + mapping.setup_names(); + + Relation size_ = omega::Range(Restrict_Domain(mapping, copy(stmt[stmt_num].IS))); + size_.simplify(); + */ + + //Anand -commenting out tmp sym creation as symbol may have more than one dimension + //tmp_sym = ir->CreateArraySymbol(tmpArrSz, sym); + std::vector<CG_outputRepr *> lhs_index; + CG_outputRepr *arr_ref_repr; + arr_ref_repr = ocg->CreateIdent( + stmt[stmt_num].IS.set_var(levels[levels.size() - 1])->name()); + + CG_outputRepr *total_size = size_repr[0]; + fprintf(stderr, "total_size = "); total_size->dump(); fflush(stdout); + + for (int i = 1; i < size_repr.size(); i++) { + fprintf(stderr, "total_size now "); total_size->dump(); fflush(stdout); fprintf(stderr, " times something\n\n"); + + total_size = ocg->CreateTimes(total_size->clone(), + size_repr[i]->clone()); + + } + + // COMMENT NEEDED + //fprintf(stderr, "\nloop.cc COMMENT NEEDED\n"); + for (int k = levels.size() - 2; k >= 0; k--) { + CG_outputRepr *temp_repr =ocg->CreateIdent(stmt[stmt_num].IS.set_var(levels[k])->name()); + for (int l = k + 1; l < levels.size(); l++) { + //fprintf(stderr, "\nloop.cc CREATETIMES\n"); + temp_repr = ocg->CreateTimes(temp_repr->clone(), + size_repr[l]->clone()); + } + + //fprintf(stderr, "\nloop.cc CREATEPLUS\n"); + arr_ref_repr = ocg->CreatePlus(arr_ref_repr->clone(), + temp_repr->clone()); + } + + + //fprintf(stderr, "loop.cc, about to die\n"); + std::vector<CG_outputRepr *> to_push; + to_push.push_back(total_size); + + if (!found_non_constant_size_dimension) { + fprintf(stderr, "constant size dimension\n"); + tmp_sym = ir->CreateArraySymbol(sym, to_push, memory_type); + } + else { + fprintf(stderr, "NON constant size dimension?\n"); + //tmp_sym = ir->CreatePointerSymbol(sym, to_push); + tmp_sym = ir->CreatePointerSymbol(sym, to_push); + + static_cast<IR_PointerSymbol *>(tmp_sym)->set_size(0, total_size); // ?? + ptr_variables.push_back(static_cast<IR_PointerSymbol *>(tmp_sym)); + fprintf(stderr, "ptr_variables now has %d entries\n", ptr_variables.size()); + } + + // add tmp_sym to Loop symtables ?? + + + // std::cout << " temp array name == " << tmp_sym->name().c_str() << "\n"; + + // get loop index variable at the given "level" + // Relation R = omega::Range(Restrict_Domain(copy(stmt[stmt_num].xform), copy(stmt[stmt_num].IS))); + // stmt[stmt_num].IS.print(); + //stmt[stmt_num].IS. + // std::cout << stmt[stmt_num].IS.n_set() << std::endl; + // std::string v = stmt[stmt_num].IS.set_var(level)->name(); + // std::cout << "loop index variable is '" << v.c_str() << "'\n"; + + // create a reference for the temporary array + fprintf(stderr, "create a reference for the temporary array\n"); + //std::cout << "In scalar_expand function 4: " << stmt_num << ", " << arrName << "\n"; + //std::cout.flush(); + + //fprintf(stderr, "\n%d statements\n", stmt.size()); + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "%2d ", i); + // ((CG_chillRepr *)stmt[i].code)->Dump(); + //} + //fprintf(stderr, "\n"); + + + + std::vector<CG_outputRepr *> to_push2; + to_push2.push_back(arr_ref_repr); // can have only one entry + + //lhs_index[0] = ocg->CreateIdent(v); + + + IR_ArrayRef *tmp_array_ref; + IR_PointerArrayRef * tmp_ptr_array_ref; // was IR_PointerArrayref + + if (!found_non_constant_size_dimension) { + fprintf(stderr, "constant size\n"); + + tmp_array_ref = ir->CreateArrayRef( + static_cast<IR_ArraySymbol *>(tmp_sym), to_push2); + } + else { + fprintf(stderr, "NON constant size\n"); + tmp_ptr_array_ref = ir->CreatePointerArrayRef( + static_cast<IR_PointerSymbol *>(tmp_sym), to_push2); + // TODO static_cast<IR_PointerSymbol *>(tmp_sym), to_push2); + } + fflush(stdout); + + //fprintf(stderr, "\n%d statements\n", stmt.size()); + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "%2d ", i); + // ((CG_chillRepr *)stmt[i].code)->Dump(); + //} + //fprintf(stderr, "\n"); + + + //std::string stemp; + //stemp = tmp_array_ref->name(); + //std::cout << "Created array reference --> " << stemp.c_str() << "\n"; + + // get the RHS expression + fprintf(stderr, "get the RHS expression arrName %s\n", arrName.c_str()); + + CG_outputRepr *rhs; + if (arrName == "RHS") { + rhs = ir->GetRHSExpression(stmt[stmt_num].code); + + std::vector<IR_ArrayRef *> symbols = ir->FindArrayRef(rhs); + } + std::set<std::string> sym_names; + + //for (int i = 0; i < symbols.size(); i++) + // sym_names.insert(symbols[i]->symbol()->name()); + + fflush(stdout); + + //fprintf(stderr, "\nbefore if (arrName == RHS)\n%d statements\n", stmt.size()); // problem is after here + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "%2d ", i); + // ((CG_chillRepr *)stmt[i].code)->Dump(); + //} + //fprintf(stderr, "\n"); + + if (arrName == "RHS") { + + std::vector<IR_ArrayRef *> symbols = ir->FindArrayRef(rhs); + + for (int i = 0; i < symbols.size(); i++) + sym_names.insert(symbols[i]->symbol()->name()); + } + else { + + fprintf(stderr, "finding array refs in stmt_num %d\n", stmt_num); + //fprintf(stderr, "\n%d statements\n", stmt.size()); + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "%2d ", i); + // ((CG_chillRepr *)stmt[i].code)->Dump(); + //} + //fprintf(stderr, "\n"); + + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); + fprintf(stderr, "\n%d refs\n", refs.size()); + + + bool found = false; + + for (int j = 0; j < refs.size(); j++) { + CG_outputRepr* to_replace; + + fprintf(stderr, "j %d build new assignment statement with temporary array\n",j); + // build new assignment statement with temporary array + if (!found_non_constant_size_dimension) { + to_replace = tmp_array_ref->convert(); + } else { + to_replace = tmp_ptr_array_ref->convert(); + } + //fprintf(stderr, "to_replace %p\n", to_replace); + //CG_chillRepr *CR = (CG_chillRepr *) to_replace; + //CR->Dump(); + + if (refs[j]->name() == arrName) { + fflush(stdout); + fprintf(stderr, "loop.cc L353\n"); // problem is after here + //fprintf(stderr, "\n%d statements\n", stmt.size()); + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "%2d ", i); + // ((CG_chillRepr *)stmt[i].code)->Dump(); + //} + //fprintf(stderr, "\n"); + + + sym_names.insert(refs[j]->symbol()->name()); + + if (!found) { + if (!found_non_constant_size_dimension) { + fprintf(stderr, "constant size2\n"); + omega::CG_outputRepr * t = tmp_array_ref->convert(); + omega::CG_outputRepr * r = refs[j]->convert()->clone(); + //CR = (CG_chillRepr *) t; + //CR->Dump(); + //CR = (CG_chillRepr *) r; + //CR->Dump(); + + //fprintf(stderr, "lhs t %p lhs r %p\n", t, r); + stmt[newStmt_num].code = + ir->builder()->CreateAssignment(0, + t, // tmp_array_ref->convert(), + r); // refs[j]->convert()->clone() + } + else { + fprintf(stderr, "NON constant size2\n"); + omega::CG_outputRepr * t = tmp_ptr_array_ref->convert(); // this fails + omega::CG_outputRepr * r = refs[j]->convert()->clone(); + + //omega::CG_chillRepr *CR = (omega::CG_chillRepr *) t; + //CR->Dump(); + //CR = (omega::CG_chillRepr *) r; + //CR->Dump(); + + //fprintf(stderr, "lhs t %p lhs r %p\n", t, r); + stmt[newStmt_num].code = + ir->builder()->CreateAssignment(0, + t, // tmp_ptr_array_ref->convert(), + r ); // refs[j]->convert()->clone()); + } + found = true; + + } + + // refs[j] has no parent? + fprintf(stderr, "replacing refs[%d]\n", j ); + ir->ReplaceExpression(refs[j], to_replace); + } + + } + + } + //ToDo need to update the dependence graph + //Anand adding dependence graph update + fprintf(stderr, "adding dependence graph update\n"); // problem is before here + //fprintf(stderr, "\n%d statements\n", stmt.size()); + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "%2d ", i); + // ((CG_chillRepr *)stmt[i].code)->Dump(); + //} + //fprintf(stderr, "\n"); + + dep.insert(); + + //Anand:Copying Dependence checks from datacopy code, might need to be a separate function/module + // in the future + + /*for (int i = 0; i < newStmt_num; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + ) { + if (same_loop.find(i) != same_loop.end() + && same_loop.find(j->first) == same_loop.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL + && sym_names.find(dv.sym->name()) != sym_names.end() + && (dv.type == DEP_R2R || dv.type == DEP_R2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(newStmt_num, j->first, dvs1); + } else if (same_loop.find(i) == same_loop.end() + && same_loop.find(j->first) != same_loop.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL + && sym_names.find(dv.sym->name()) != sym_names.end() + && (dv.type == DEP_R2R || dv.type == DEP_W2R)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, newStmt_num, D[j]); + } + */ + //Anand--end dependence check + if (arrName == "RHS") { + + // build new assignment statement with temporary array + if (!found_non_constant_size_dimension) { + if (assign_then_accumulate) { + stmt[newStmt_num].code = ir->builder()->CreateAssignment(0, + tmp_array_ref->convert(), rhs); + fprintf(stderr, "ir->ReplaceRHSExpression( stmt_ num %d )\n", stmt_num); + ir->ReplaceRHSExpression(stmt[stmt_num].code, tmp_array_ref); + } else { + CG_outputRepr *temp = tmp_array_ref->convert()->clone(); + if (ir->QueryExpOperation(stmt[stmt_num].code) + != IR_OP_PLUS_ASSIGNMENT) + throw ir_error( + "Statement is not a += accumulation statement"); + + fprintf(stderr, "replacing in a +=\n"); + stmt[newStmt_num].code = ir->builder()->CreatePlusAssignment(0, + temp->clone(), rhs); + + CG_outputRepr * lhs = ir->GetLHSExpression(stmt[stmt_num].code); + + CG_outputRepr *assignment = ir->builder()->CreateAssignment(0, + lhs, temp->clone()); + Statement init_ = stmt[newStmt_num]; // copy ?? + init_.ir_stmt_node = NULL; + + init_.code = stmt[newStmt_num].code->clone(); + init_.IS = copy(stmt[newStmt_num].IS); + init_.xform = copy(stmt[newStmt_num].xform); + init_.has_inspector = false; // ?? + + Relation mapping(init_.IS.n_set(), init_.IS.n_set()); + + F_And *f_root = mapping.add_and(); + + for (int i = 1; i <= mapping.n_inp(); i++) { + EQ_Handle h = f_root->add_EQ(); + //if (i < levels[0]) { + if (i <= levels[levels.size() - 1]) { + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } else { + h.update_const(-1); + h.update_coef(mapping.output_var(i), 1); + } + + /*else { + int j; + for (j = 0; j < levels.size(); j++) + if (i == levels[j]) + break; + + if (j == levels.size()) { + + h.update_coef(mapping.output_var(i), 1); + h.update_const(-1); + + } else { + + + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + + + } + */ + //} + } + + mapping.simplify(); + // match omega input/output variables to variable names in the code + for (int j = 1; j <= init_.IS.n_set(); j++) + mapping.name_output_var(j, init_.IS.set_var(j)->name()); + for (int j = 1; j <= init_.IS.n_set(); j++) + mapping.name_input_var(j, init_.IS.set_var(j)->name()); + + mapping.setup_names(); + + init_.IS = omega::Range( + omega::Restrict_Domain(mapping, init_.IS)); + std::vector<int> lex = getLexicalOrder(newStmt_num); + int dim = 2 * levels[0] - 1; + //init_.IS.print(); + // init_.xform.print(); + //stmt[newStmt_num].xform.print(); + // shiftLexicalOrder(lex, dim + 1, 1); + shiftLexicalOrder(lex, dim + 1, 1); + init_.reduction = stmt[newStmt_num].reduction; + init_.reductionOp = stmt[newStmt_num].reductionOp; + + init_.code = ir->builder()->CreateAssignment(0, temp->clone(), + ir->builder()->CreateInt(0)); + + fprintf(stderr, "loop.cc L3693 adding stmt %d\n", stmt.size()); + stmt.push_back(init_); + + uninterpreted_symbols.push_back(uninterpreted_symbols[newStmt_num]); + uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[newStmt_num]); + stmt[stmt_num].code = assignment; + } + } else { + if (assign_then_accumulate) { + stmt[newStmt_num].code = ir->builder()->CreateAssignment(0, + tmp_ptr_array_ref->convert(), rhs); + ir->ReplaceRHSExpression(stmt[stmt_num].code, + tmp_ptr_array_ref); + } else { + CG_outputRepr *temp = tmp_ptr_array_ref->convert()->clone(); + if (ir->QueryExpOperation(stmt[stmt_num].code) + != IR_OP_PLUS_ASSIGNMENT) + throw ir_error( + "Statement is not a += accumulation statement"); + stmt[newStmt_num].code = ir->builder()->CreatePlusAssignment(0, + temp->clone(), rhs); + + CG_outputRepr * lhs = ir->GetLHSExpression(stmt[stmt_num].code); + + CG_outputRepr *assignment = ir->builder()->CreateAssignment(0, + lhs, temp->clone()); + + stmt[stmt_num].code = assignment; + } + // call function to replace rhs with temporary array + } + } + + //std::cout << "End of scalar_expand function!! \n"; + + // if(arrName == "RHS"){ + DependenceVector dv; + std::vector<DependenceVector> E; + dv.lbounds = std::vector<omega::coef_t>(4); + dv.ubounds = std::vector<omega::coef_t>(4); + dv.type = DEP_W2R; + + for (int k = 0; k < 4; k++) { + dv.lbounds[k] = 0; + dv.ubounds[k] = 0; + + } + + //std::vector<IR_ArrayRef*> array_refs = ir->FindArrayRef(stmt[newStmt_num].code); + dv.sym = tmp_sym->clone(); + + E.push_back(dv); + + dep.connect(newStmt_num, stmt_num, E); + // } + +} + + + + +std::pair<Relation, Relation> createCSRstyleISandXFORM(CG_outputBuilder *ocg, + std::vector<Relation> &outer_loop_bounds, std::string index_name, + std::map<int, Relation> &zero_loop_bounds, + std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols, + std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols_string, + Loop *this_loop) { + + Relation IS(outer_loop_bounds.size() + 1 + zero_loop_bounds.size()); + Relation XFORM(outer_loop_bounds.size() + 1 + zero_loop_bounds.size(), + 2 * (outer_loop_bounds.size() + 1 + zero_loop_bounds.size()) + 1); + + F_And * f_r_ = IS.add_and(); + F_And * f_root = XFORM.add_and(); + + if (outer_loop_bounds.size() > 0) { + for (int it = 0; it < IS.n_set(); it++) { + IS.name_set_var(it + 1, + const_cast<Relation &>(outer_loop_bounds[0]).set_var(it + 1)->name()); + XFORM.name_input_var(it + 1, + const_cast<Relation &>(outer_loop_bounds[0]).set_var(it + 1)->name()); + + } + } else if (zero_loop_bounds.size() > 0) { + for (int it = 0; it < IS.n_set(); it++) { + IS.name_set_var(it + 1, + const_cast<Relation &>(zero_loop_bounds.begin()->second).set_var( + it + 1)->name()); + XFORM.name_input_var(it + 1, + const_cast<Relation &>(zero_loop_bounds.begin()->second).set_var( + it + 1)->name()); + + } + + } + + for (int i = 0; i < outer_loop_bounds.size(); i++) + IS = replace_set_var_as_another_set_var(IS, outer_loop_bounds[i], i + 1, + i + 1); + + int count = 1; + for (std::map<int, Relation>::iterator i = zero_loop_bounds.begin(); + i != zero_loop_bounds.end(); i++, count++) + IS = replace_set_var_as_another_set_var(IS, i->second, + outer_loop_bounds.size() + 1 + count, i->first); + + if (outer_loop_bounds.size() > 0) { + Free_Var_Decl *lb = new Free_Var_Decl(index_name + "_", 1); // index_ + Variable_ID csr_lb = IS.get_local(lb, Input_Tuple); + + Free_Var_Decl *ub = new Free_Var_Decl(index_name + "__", 1); // index__ + Variable_ID csr_ub = IS.get_local(ub, Input_Tuple); + + //lower bound + + F_And * f_r = IS.and_with_and(); + GEQ_Handle lower_bound = f_r->add_GEQ(); + lower_bound.update_coef(csr_lb, -1); + lower_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), 1); + + //upper bound + + GEQ_Handle upper_bound = f_r->add_GEQ(); + upper_bound.update_coef(csr_ub, 1); + upper_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), -1); + upper_bound.update_const(-1); + + omega::CG_stringBuilder *ocgs = new CG_stringBuilder; + + std::vector<omega::CG_outputRepr *> reprs; + std::vector<omega::CG_outputRepr *> reprs2; + + std::vector<omega::CG_outputRepr *> reprs3; + std::vector<omega::CG_outputRepr *> reprs4; + + reprs.push_back( + ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name())); + reprs2.push_back( + ocgs->CreateIdent( + IS.set_var(outer_loop_bounds.size())->name())); + uninterpreted_symbols.insert( + std::pair<std::string, std::vector<CG_outputRepr *> >( + index_name + "_", reprs)); + uninterpreted_symbols_string.insert( + std::pair<std::string, std::vector<CG_outputRepr *> >( + index_name + "_", reprs2)); + + std::string arg = "(" + IS.set_var(outer_loop_bounds.size())->name() + + ")"; + std::vector< std::string > argvec; + argvec.push_back( arg ); + + CG_outputRepr *repr = ocg->CreateArrayRefExpression(index_name, + ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name())); + + //fprintf(stderr, "( VECTOR _)\n"); + //fprintf(stderr, "loop.cc calling CreateDefineMacro( %s, argvec, repr)\n", (index_name + "_").c_str()); + this_loop->ir->CreateDefineMacro(index_name + "_", argvec, repr); + + Relation known_(copy(IS).n_set()); + known_.copy_names(copy(IS)); + known_.setup_names(); + Variable_ID index_lb = known_.get_local(lb, Input_Tuple); + Variable_ID index_ub = known_.get_local(ub, Input_Tuple); + F_And *fr = known_.add_and(); + GEQ_Handle g = fr->add_GEQ(); + g.update_coef(index_ub, 1); + g.update_coef(index_lb, -1); + g.update_const(-1); + this_loop->addKnown(known_); + + reprs3.push_back( + + ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name())); + reprs4.push_back( + + ocgs->CreateIdent(IS.set_var(outer_loop_bounds.size())->name())); + + CG_outputRepr *repr2 = ocg->CreateArrayRefExpression(index_name, + ocg->CreatePlus( + ocg->CreateIdent( + IS.set_var(outer_loop_bounds.size())->name()), + ocg->CreateInt(1))); + + //fprintf(stderr, "( VECTOR __)\n"); + //fprintf(stderr, "loop.cc calling CreateDefineMacro( %s, argvec, repr)\n", (index_name + "__").c_str()); + + this_loop->ir->CreateDefineMacro(index_name + "__", argvec, repr2); + + uninterpreted_symbols.insert( + std::pair<std::string, std::vector<CG_outputRepr *> >( + index_name + "__", reprs3)); + uninterpreted_symbols_string.insert( + std::pair<std::string, std::vector<CG_outputRepr *> >( + index_name + "__", reprs4)); + } else { + Free_Var_Decl *ub = new Free_Var_Decl(index_name); + Variable_ID csr_ub = IS.get_local(ub); + F_And * f_r = IS.and_with_and(); + GEQ_Handle upper_bound = f_r->add_GEQ(); + upper_bound.update_coef(csr_ub, 1); + upper_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), -1); + upper_bound.update_const(-1); + + GEQ_Handle lower_bound = f_r->add_GEQ(); + lower_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), 1); + + } + + for (int j = 1; j <= XFORM.n_inp(); j++) { + omega::EQ_Handle h = f_root->add_EQ(); + h.update_coef(XFORM.output_var(2 * j), 1); + h.update_coef(XFORM.input_var(j), -1); + } + + for (int j = 1; j <= XFORM.n_out(); j += 2) { + omega::EQ_Handle h = f_root->add_EQ(); + h.update_coef(XFORM.output_var(j), 1); + } + + if (_DEBUG_) { + IS.print(); + XFORM.print(); + + } + + return std::pair<Relation, Relation>(IS, XFORM); + +} + +std::pair<Relation, Relation> construct_reduced_IS_And_XFORM(IR_Code *ir, + const Relation &is, const Relation &xform, const std::vector<int> loops, + std::vector<int> &lex_order, Relation &known, + std::map<std::string, std::vector<CG_outputRepr *> > &uninterpreted_symbols) { + + Relation IS(loops.size()); + Relation XFORM(loops.size(), 2 * loops.size() + 1); + int count_ = 1; + std::map<int, int> pos_mapping; + + int n = is.n_set(); + Relation is_and_known = Intersection(copy(is), + Extend_Set(copy(known), n - known.n_set())); + + for (int it = 0; it < loops.size(); it++, count_++) { + IS.name_set_var(count_, + const_cast<Relation &>(is).set_var(loops[it])->name()); + XFORM.name_input_var(count_, + const_cast<Relation &>(xform).input_var(loops[it])->name()); + XFORM.name_output_var(2 * count_, + const_cast<Relation &>(xform).output_var((loops[it]) * 2)->name()); + XFORM.name_output_var(2 * count_ - 1, + const_cast<Relation &>(xform).output_var((loops[it]) * 2 - 1)->name()); + pos_mapping.insert(std::pair<int, int>(count_, loops[it])); + } + + XFORM.name_output_var(2 * loops.size() + 1, + const_cast<Relation &>(xform).output_var(is.n_set() * 2 + 1)->name()); + + F_And * f_r = IS.add_and(); + for (std::map<int, int>::iterator it = pos_mapping.begin(); + it != pos_mapping.end(); it++) + IS = replace_set_var_as_another_set_var(IS, is_and_known, it->first, + it->second); + /* + for (std::map<std::string, std::vector<CG_outputRepr *> >::iterator it2 = + uninterpreted_symbols.begin(); + it2 != uninterpreted_symbols.end(); it2++) { + std::vector<CG_outputRepr *> reprs_ = it2->second; + //std::vector<CG_outputRepr *> reprs_2; + + for (int k = 0; k < reprs_.size(); k++) { + std::vector<IR_ScalarRef *> refs = ir->FindScalarRef(reprs_[k]); + bool exception_found = false; + for (int m = 0; m < refs.size(); m++){ + + if (refs[m]->name() + == const_cast<Relation &>(is).set_var(it->second)->name()) + try { + ir->ReplaceExpression(refs[m], + ir->builder()->CreateIdent( + IS.set_var(it->first)->name())); + } catch (ir_error &e) { + + reprs_[k] = ir->builder()->CreateIdent( + IS.set_var(it->first)->name()); + exception_found = true; + } + if(exception_found) + break; + } + + } + it2->second = reprs_; + } + + } + */ + CHILL_DEBUG_BEGIN + std::cout << "relation debug" << std::endl; + IS.print(); + CHILL_DEBUG_END + + F_And *f_root = XFORM.add_and(); + + count_ = 1; + + for (int j = 1; j <= loops.size(); j++) { + omega::EQ_Handle h = f_root->add_EQ(); + h.update_coef(XFORM.output_var(2 * j), 1); + h.update_coef(XFORM.input_var(j), -1); + } + for (int j = 0; j < loops.size(); j++, count_++) { + omega::EQ_Handle h = f_root->add_EQ(); + h.update_coef(XFORM.output_var(count_ * 2 - 1), 1); + h.update_const(-lex_order[count_ * 2 - 2]); + } + + omega::EQ_Handle h = f_root->add_EQ(); + h.update_coef(XFORM.output_var((loops.size()) * 2 + 1), 1); + h.update_const(-lex_order[xform.n_out() - 1]); + + CHILL_DEBUG_BEGIN + std::cout << "relation debug" << std::endl; + IS.print(); + XFORM.print(); + CHILL_DEBUG_END + + return std::pair<Relation, Relation>(IS, XFORM); + +} + +std::set<std::string> inspect_repr_for_scalars(IR_Code *ir, + CG_outputRepr * repr, std::set<std::string> ignore) { + + std::vector<IR_ScalarRef *> refs = ir->FindScalarRef(repr); + std::set<std::string> loop_vars; + + for (int i = 0; i < refs.size(); i++) + if (ignore.find(refs[i]->name()) == ignore.end()) + loop_vars.insert(refs[i]->name()); + + return loop_vars; + +} + +std::set<std::string> inspect_loop_bounds(IR_Code *ir, const Relation &R, + int pos, + std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols) { + + if (!R.is_set()) + throw loop_error("Input R has to be a set not a relation!"); + + std::set<std::string> vars; + + std::vector<CG_outputRepr *> refs; + Variable_ID v = const_cast<Relation &>(R).set_var(pos); + for (DNF_Iterator di(const_cast<Relation &>(R).query_DNF()); di; di++) { + for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) { + if ((*gi).get_coef(v) != 0 && (*gi).is_const_except_for_global(v)) { + for (Constr_Vars_Iter cvi(*gi); cvi; cvi++) { + Variable_ID v = cvi.curr_var(); + switch (v->kind()) { + + case Global_Var: { + Global_Var_ID g = v->get_global_var(); + Variable_ID v2; + if (g->arity() > 0) { + + std::string s = g->base_name(); + std::copy( + uninterpreted_symbols.find(s)->second.begin(), + uninterpreted_symbols.find(s)->second.end(), + back_inserter(refs)); + + } + + break; + } + default: + break; + } + } + + } + } + } + + for (int i = 0; i < refs.size(); i++) { + std::vector<IR_ScalarRef *> refs_ = ir->FindScalarRef(refs[i]); + + for (int j = 0; j < refs_.size(); j++) + vars.insert(refs_[j]->name()); + + } + return vars; +} + +CG_outputRepr * create_counting_loop_body(IR_Code *ir, const Relation &R, + int pos, CG_outputRepr * count, + std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols) { + + if (!R.is_set()) + throw loop_error("Input R has to be a set not a relation!"); + + CG_outputRepr *ub, *lb; + ub = NULL; + lb = NULL; + std::vector<CG_outputRepr *> refs; + Variable_ID v = const_cast<Relation &>(R).set_var(pos); + for (DNF_Iterator di(const_cast<Relation &>(R).query_DNF()); di; di++) { + for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) { + if ((*gi).get_coef(v) != 0 && (*gi).is_const_except_for_global(v)) { + bool same_ge_1 = false; + bool same_ge_2 = false; + for (Constr_Vars_Iter cvi(*gi); cvi; cvi++) { + Variable_ID v = cvi.curr_var(); + switch (v->kind()) { + + case Global_Var: { + Global_Var_ID g = v->get_global_var(); + Variable_ID v2; + if (g->arity() > 0) { + + std::string s = g->base_name(); + + if ((*gi).get_coef(v) > 0) { + if (ub != NULL) + throw ir_error( + "bound expression too complex!"); + + ub = ir->builder()->CreateInvoke(s, + uninterpreted_symbols.find(s)->second); + //ub = ir->builder()->CreateMinus(ub->clone(), ir->builder()->CreateInt(-(*gi).get_const())); + same_ge_1 = true; + + } else { + if (lb != NULL) + throw ir_error( + "bound expression too complex!"); + lb = ir->builder()->CreateInvoke(s, + uninterpreted_symbols.find(s)->second); + same_ge_2 = true; + + } + } + + break; + } + default: + break; + } + } + + if (same_ge_1 && same_ge_2) + lb = ir->builder()->CreatePlus(lb->clone(), + ir->builder()->CreateInt(-(*gi).get_const())); + else if (same_ge_1) + ub = ir->builder()->CreatePlus(ub->clone(), + ir->builder()->CreateInt(-(*gi).get_const())); + else if (same_ge_2) + lb = ir->builder()->CreatePlus(lb->clone(), + ir->builder()->CreateInt(-(*gi).get_const())); + } + } + + } + + return ir->builder()->CreatePlusAssignment(0, count, + ir->builder()->CreatePlus( + ir->builder()->CreateMinus(ub->clone(), lb->clone()), + ir->builder()->CreateInt(1))); +} + + + +std::map<std::string, std::vector<std::string> > recurse_on_exp_for_arrays( + IR_Code * ir, CG_outputRepr * exp) { + + std::map<std::string, std::vector<std::string> > arr_index_to_ref; + switch (ir->QueryExpOperation(exp)) { + + case IR_OP_ARRAY_VARIABLE: { + IR_ArrayRef *ref = dynamic_cast<IR_ArrayRef *>(ir->Repr2Ref(exp)); + IR_PointerArrayRef *ref_ = + dynamic_cast<IR_PointerArrayRef *>(ir->Repr2Ref(exp)); + if (ref == NULL && ref_ == NULL) + throw loop_error("Array symbol unidentifiable!"); + + if (ref != NULL) { + std::vector<std::string> s0; + + for (int i = 0; i < ref->n_dim(); i++) { + CG_outputRepr * index = ref->index(i); + std::map<std::string, std::vector<std::string> > a0 = + recurse_on_exp_for_arrays(ir, index); + std::vector<std::string> s; + for (std::map<std::string, std::vector<std::string> >::iterator j = + a0.begin(); j != a0.end(); j++) { + if (j->second.size() != 1 && (j->second)[0] != "") + throw loop_error( + "indirect array references not allowed in guard!"); + s.push_back(j->first); + } + std::copy(s.begin(), s.end(), back_inserter(s0)); + } + arr_index_to_ref.insert( + std::pair<std::string, std::vector<std::string> >( + ref->name(), s0)); + } else { + std::vector<std::string> s0; + for (int i = 0; i < ref_->n_dim(); i++) { + CG_outputRepr * index = ref_->index(i); + std::map<std::string, std::vector<std::string> > a0 = + recurse_on_exp_for_arrays(ir, index); + std::vector<std::string> s; + for (std::map<std::string, std::vector<std::string> >::iterator j = + a0.begin(); j != a0.end(); j++) { + if (j->second.size() != 1 && (j->second)[0] != "") + throw loop_error( + "indirect array references not allowed in guard!"); + s.push_back(j->first); + } + std::copy(s.begin(), s.end(), back_inserter(s0)); + } + arr_index_to_ref.insert( + std::pair<std::string, std::vector<std::string> >( + ref_->name(), s0)); + } + break; + } + case IR_OP_PLUS: + case IR_OP_MINUS: + case IR_OP_MULTIPLY: + case IR_OP_DIVIDE: { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp); + std::map<std::string, std::vector<std::string> > a0 = + recurse_on_exp_for_arrays(ir, v[0]); + std::map<std::string, std::vector<std::string> > a1 = + recurse_on_exp_for_arrays(ir, v[1]); + arr_index_to_ref.insert(a0.begin(), a0.end()); + arr_index_to_ref.insert(a1.begin(), a1.end()); + break; + + } + case IR_OP_POSITIVE: + case IR_OP_NEGATIVE: { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp); + std::map<std::string, std::vector<std::string> > a0 = + recurse_on_exp_for_arrays(ir, v[0]); + + arr_index_to_ref.insert(a0.begin(), a0.end()); + break; + + } + case IR_OP_VARIABLE: { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp); + IR_ScalarRef *ref = static_cast<IR_ScalarRef *>(ir->Repr2Ref(v[0])); + + std::string s = ref->name(); + std::vector<std::string> to_insert; + to_insert.push_back(""); + arr_index_to_ref.insert( + std::pair<std::string, std::vector<std::string> >(s, + to_insert)); + break; + } + case IR_OP_CONSTANT: + break; + + default: { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp); + + for (int i = 0; i < v.size(); i++) { + std::map<std::string, std::vector<std::string> > a0 = + recurse_on_exp_for_arrays(ir, v[i]); + + arr_index_to_ref.insert(a0.begin(), a0.end()); + } + + break; + } + } + return arr_index_to_ref; +} + + + +std::vector<CG_outputRepr *> find_guards(IR_Code *ir, IR_Control *code) { + CHILL_DEBUG_PRINT("find_guards()\n"); + std::vector<CG_outputRepr *> guards; + switch (code->type()) { + case IR_CONTROL_IF: { + CHILL_DEBUG_PRINT("find_guards() it's an if\n"); + CG_outputRepr *cond = dynamic_cast<IR_If*>(code)->condition(); + + std::vector<CG_outputRepr *> then_body; + std::vector<CG_outputRepr *> else_body; + IR_Block *ORTB = dynamic_cast<IR_If*>(code)->then_body(); + if (ORTB != NULL) { + CHILL_DEBUG_PRINT("recursing on then\n"); + then_body = find_guards(ir, ORTB); + //dynamic_cast<IR_If*>(code)->then_body()); + } + if (dynamic_cast<IR_If*>(code)->else_body() != NULL) { + CHILL_DEBUG_PRINT("recursing on then\n"); + else_body = find_guards(ir, + dynamic_cast<IR_If*>(code)->else_body()); + } + + guards.push_back(cond); + if (then_body.size() > 0) + std::copy(then_body.begin(), then_body.end(), + back_inserter(guards)); + if (else_body.size() > 0) + std::copy(else_body.begin(), else_body.end(), + back_inserter(guards)); + break; + } + case IR_CONTROL_BLOCK: { + CHILL_DEBUG_PRINT("it's a control block\n"); + IR_Block* IRCB = dynamic_cast<IR_Block*>(code); + CHILL_DEBUG_PRINT("calling ir->FindOneLevelControlStructure(IRCB);\n"); + std::vector<IR_Control *> stmts = ir->FindOneLevelControlStructure(IRCB); + + for (int i = 0; i < stmts.size(); i++) { + std::vector<CG_outputRepr *> stmt_repr = find_guards(ir, stmts[i]); + std::copy(stmt_repr.begin(), stmt_repr.end(), + back_inserter(guards)); + } + break; + } + case IR_CONTROL_LOOP: { + CHILL_DEBUG_PRINT("it's a control loop\n"); + std::vector<CG_outputRepr *> body = find_guards(ir, + dynamic_cast<IR_Loop*>(code)->body()); + if (body.size() > 0) + std::copy(body.begin(), body.end(), back_inserter(guards)); + break; + } // loop + } // switch + return guards; +} + +bool sort_helper(std::pair<std::string, std::vector<std::string> > i, + std::pair<std::string, std::vector<std::string> > j) { + int c1 = 0; + int c2 = 0; + for (int k = 0; k < i.second.size(); k++) + if (i.second[k] != "") + c1++; + + for (int k = 0; k < j.second.size(); k++) + if (j.second[k] != "") + c2++; + return (c1 < c2); + +} + +bool sort_helper_2(std::pair<int, int> i, std::pair<int, int> j) { + + return (i.second < j.second); + +} + +std::vector<std::string> construct_iteration_order( + std::map<std::string, std::vector<std::string> > & input) { + std::vector<std::string> arrays; + std::vector<std::string> scalars; + std::vector<std::pair<std::string, std::vector<std::string> > > input_aid; + + for (std::map<std::string, std::vector<std::string> >::iterator j = + input.begin(); j != input.end(); j++) + input_aid.push_back( + std::pair<std::string, std::vector<std::string> >(j->first, + j->second)); + + std::sort(input_aid.begin(), input_aid.end(), sort_helper); + + for (int j = 0; j < input_aid[input_aid.size() - 1].second.size(); j++) + if (input_aid[input_aid.size() - 1].second[j] != "") { + arrays.push_back(input_aid[input_aid.size() - 1].second[j]); + + } + + if (arrays.size() > 0) { + for (int i = input_aid.size() - 2; i >= 0; i--) { + + int max_count = 0; + for (int j = 0; j < input_aid[i].second.size(); j++) + if (input_aid[i].second[j] != "") { + max_count++; + } + if (max_count > 0) { + for (int j = 0; j < max_count; j++) { + std::string s = input_aid[i].second[j]; + bool found = false; + for (int k = 0; k < max_count; k++) + if (s == arrays[k]) + found = true; + if (!found) + throw loop_error("guard condition not solvable"); + } + } else { + bool found = false; + for (int k = 0; k < arrays.size(); k++) + if (arrays[k] == input_aid[i].first) + found = true; + if (!found) + arrays.push_back(input_aid[i].first); + } + } + } else { + + for (int i = input_aid.size() - 1; i >= 0; i--) { + arrays.push_back(input_aid[i].first); + } + } + return arrays; +} + + + diff --git a/src/transformations/loop_basic.cc b/src/transformations/loop_basic.cc new file mode 100644 index 0000000..a058598 --- /dev/null +++ b/src/transformations/loop_basic.cc @@ -0,0 +1,1839 @@ +/* + * loop_basic.cc + * + * Created on: Nov 12, 2012 + * Author: anand + */ + +#include "loop.hh" +#include "chill_error.hh" +#include <omega.h> +#include "omegatools.hh" +#include <string.h> + +#include <code_gen/CG_utils.h> + +using namespace omega; + +void Loop::permute(const std::vector<int> &pi) { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + + permute(active, pi); +} + +void Loop::original() { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + setLexicalOrder(0, active); + //apply_xform(); +} +void Loop::permute(int stmt_num, int level, const std::vector<int> &pi) { + // check for sanity of parameters + int starting_order; + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num)); + std::set<int> active; + if (level < 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("3invalid loop level " + to_string(level)); + else if (level == 0) { + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + level = 1; + starting_order = 0; + } else { + std::vector<int> lex = getLexicalOrder(stmt_num); + active = getStatements(lex, 2 * level - 2); + starting_order = lex[2 * level - 2]; + lex[2 * level - 2]++; + shiftLexicalOrder(lex, 2 * level - 2, active.size() - 1); + } + std::vector<int> pi_inverse(pi.size(), 0); + for (int i = 0; i < pi.size(); i++) { + if (pi[i] >= level + pi.size() || pi[i] < level + || pi_inverse[pi[i] - level] != 0) + throw std::invalid_argument("invalid permuation"); + pi_inverse[pi[i] - level] = level + i; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + if (level + pi.size() - 1 > stmt[*i].loop_level.size()) + throw std::invalid_argument( + "invalid permutation for statement " + to_string(*i)); + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // Update transformation relations + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= 2 * level - 2; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = level; j <= level + pi.size() - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(2 * pi[j - level]), -1); + } + for (int j = level; j <= level + pi.size() - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j - 1), 1); + h.update_coef(mapping.input_var(2 * j - 1), -1); + } + for (int j = 2 * (level + pi.size() - 1) + 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + stmt[*i].xform = Composition(mapping, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // get the permuation for dependence vectors + std::vector<int> t; + for (int i = 0; i < pi.size(); i++) + if (stmt[stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal) + t.push_back(stmt[stmt_num].loop_level[pi[i] - 1].payload); + int max_dep_dim = -1; + int min_dep_dim = dep.num_dim(); + for (int i = 0; i < t.size(); i++) { + if (t[i] > max_dep_dim) + max_dep_dim = t[i]; + if (t[i] < min_dep_dim) + min_dep_dim = t[i]; + } + if (min_dep_dim > max_dep_dim) + return; + if (max_dep_dim - min_dep_dim + 1 != t.size()) + throw loop_error("cannot update the dependence graph after permuation"); + std::vector<int> dep_pi(dep.num_dim()); + for (int i = 0; i < min_dep_dim; i++) + dep_pi[i] = i; + for (int i = min_dep_dim; i <= max_dep_dim; i++) + dep_pi[i] = t[i - min_dep_dim]; + for (int i = max_dep_dim + 1; i < dep.num_dim(); i++) + dep_pi[i] = i; + + dep.permute(dep_pi, active); + + // update the dependence graph + DependenceGraph g(dep.num_dim()); + for (int i = 0; i < dep.vertex.size(); i++) + g.insert(); + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { + if ((active.find(i) != active.end() + && active.find(j->first) != active.end())) { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) { + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(dep.num_dim()); + std::vector<coef_t> ubounds(dep.num_dim()); + for (int d = 0; d < dep.num_dim(); d++) { + lbounds[d] = dv[k].lbounds[dep_pi[d]]; + ubounds[d] = dv[k].ubounds[dep_pi[d]]; + } + dv[k].lbounds = lbounds; + dv[k].ubounds = ubounds; + break; + } + case DEP_CONTROL: { + break; + } + default: + throw loop_error("unknown dependence type"); + } + } + g.connect(i, j->first, dv); + } else if (active.find(i) == active.end() + && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dv = j->second; + g.connect(i, j->first, dv); + } else { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + for (int d = 0; d < dep.num_dim(); d++) + if (dep_pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + break; + } + case DEP_CONTROL: + break; + default: + throw loop_error("unknown dependence type"); + } + g.connect(i, j->first, dv); + } + } + dep = g; + + // update loop level information + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int cur_dep_dim = min_dep_dim; + std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size()); + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + if (j >= level && j < level + pi.size()) { + switch (stmt[*i].loop_level[pi_inverse[j - level] - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = cur_dep_dim++; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[pi_inverse[j - level] - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[pi_inverse[j - level] + - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = pi_inverse[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } else { + switch (stmt[*i].loop_level[j - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = + stmt[*i].loop_level[j - 1].payload; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[j - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = pi_inverse[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } + stmt[*i].loop_level = new_loop_level; + } + + setLexicalOrder(2 * level - 2, active, starting_order); +} +void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) { + if (active.size() == 0 || pi.size() == 0) + return; + + // check for sanity of parameters + int level = pi[0]; + for (int i = 1; i < pi.size(); i++) + if (pi[i] < level) + level = pi[i]; + if (level < 1) + throw std::invalid_argument("invalid permuation"); + std::vector<int> reverse_pi(pi.size(), 0); + for (int i = 0; i < pi.size(); i++) + if (pi[i] >= level + pi.size()) + throw std::invalid_argument("invalid permutation"); + else + reverse_pi[pi[i] - level] = i + level; + for (int i = 0; i < reverse_pi.size(); i++) + if (reverse_pi[i] == 0) + throw std::invalid_argument("invalid permuation"); + int ref_stmt_num; + std::vector<int> lex; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(*i)); + if (i == active.begin()) { + ref_stmt_num = *i; + lex = getLexicalOrder(*i); + } else { + if (level + pi.size() - 1 > stmt[*i].loop_level.size()) + throw std::invalid_argument("invalid permuation"); + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 0; j < 2 * level - 3; j += 2) + if (lex[j] != lex2[j]) + throw std::invalid_argument( + "statements to permute must be in the same subloop"); + for (int j = 0; j < pi.size(); j++) + if (!(stmt[*i].loop_level[level + j - 1].type + == stmt[ref_stmt_num].loop_level[level + j - 1].type + && stmt[*i].loop_level[level + j - 1].payload + == stmt[ref_stmt_num].loop_level[level + j - 1].payload)) + throw std::invalid_argument( + "permuted loops must have the same loop level types"); + } + } + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // Update transformation relations + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 0; j < pi.size(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * (level + j)), 1); + h.update_coef(mapping.input_var(2 * pi[j]), -1); + } + for (int j = 1; j < level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + for (int j = level + pi.size(); j <= stmt[*i].loop_level.size(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + + stmt[*i].xform = Composition(mapping, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // get the permuation for dependence vectors + std::vector<int> t; + for (int i = 0; i < pi.size(); i++) + if (stmt[ref_stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal) + t.push_back(stmt[ref_stmt_num].loop_level[pi[i] - 1].payload); + int max_dep_dim = -1; + int min_dep_dim = num_dep_dim; + for (int i = 0; i < t.size(); i++) { + if (t[i] > max_dep_dim) + max_dep_dim = t[i]; + if (t[i] < min_dep_dim) + min_dep_dim = t[i]; + } + if (min_dep_dim > max_dep_dim) + return; + if (max_dep_dim - min_dep_dim + 1 != t.size()) + throw loop_error("cannot update the dependence graph after permuation"); + std::vector<int> dep_pi(num_dep_dim); + for (int i = 0; i < min_dep_dim; i++) + dep_pi[i] = i; + for (int i = min_dep_dim; i <= max_dep_dim; i++) + dep_pi[i] = t[i - min_dep_dim]; + for (int i = max_dep_dim + 1; i < num_dep_dim; i++) + dep_pi[i] = i; + + dep.permute(dep_pi, active); + + // update the dependence graph + DependenceGraph g(dep.num_dim()); + for (int i = 0; i < dep.vertex.size(); i++) + g.insert(); + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { // + if ((active.find(i) != active.end() + && active.find(j->first) != active.end())) { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) { + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(num_dep_dim); + std::vector<coef_t> ubounds(num_dep_dim); + for (int d = 0; d < num_dep_dim; d++) { + lbounds[d] = dv[k].lbounds[dep_pi[d]]; + ubounds[d] = dv[k].ubounds[dep_pi[d]]; + } + dv[k].lbounds = lbounds; + dv[k].ubounds = ubounds; + break; + } + case DEP_CONTROL: { + break; + } + default: + throw loop_error("unknown dependence type"); + } + } + g.connect(i, j->first, dv); + } else if (active.find(i) == active.end() + && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dv = j->second; + g.connect(i, j->first, dv); + } else { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + for (int d = 0; d < num_dep_dim; d++) + if (dep_pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + break; + } + case DEP_CONTROL: + break; + default: + throw loop_error("unknown dependence type"); + } + g.connect(i, j->first, dv); + } + } + dep = g; + + // update loop level information + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int cur_dep_dim = min_dep_dim; + std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size()); + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + if (j >= level && j < level + pi.size()) { + switch (stmt[*i].loop_level[reverse_pi[j - level] - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = cur_dep_dim++; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[reverse_pi[j - level]-1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = reverse_pi[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } else { + switch (stmt[*i].loop_level[j - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = + stmt[*i].loop_level[j - 1].payload; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[j - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = reverse_pi[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } + stmt[*i].loop_level = new_loop_level; + } + + setLexicalOrder(2 * level - 2, active); +} + + +void Loop::set_array_size(std::string name, int size ){ + array_dims.insert(std::pair<std::string, int >(name, size)); +} + + +std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("4invalid loop level " + to_string(level)); + + std::set<int> result; + int dim = 2 * level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim - 1); + + Relation cond2 = copy(cond); + cond2.simplify(); + cond2 = EQs_to_GEQs(cond2); + Conjunct *c = cond2.single_conjunct(); + int cur_lex = lex[dim - 1]; + + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int max_level = (*gi).max_tuple_pos(); + Relation single_cond(max_level); + single_cond.and_with_GEQ(*gi); + + // TODO: should decide where to place newly created statements with + // complementary split condition from dependence graph. + bool place_after; + if (max_level == 0) + place_after = true; + else if ((*gi).get_coef(cond2.set_var(max_level)) < 0) + place_after = true; + else + place_after = false; + + bool temp_place_after; // = place_after; + bool assigned = false; + int part1_to_part2; + int part2_to_part1; + // original statements with split condition, + // new statements with complement of split condition + int old_num_stmt = stmt.size(); + std::map<int, int> what_stmt_num; + apply_xform(same_loop); + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + int n = stmt[*i].IS.n_set(); + Relation part1, part2; + if (max_level > n) { + part1 = copy(stmt[*i].IS); + part2 = Relation::False(0); + } else { + part1 = Intersection(copy(stmt[*i].IS), + Extend_Set(copy(single_cond), n - max_level)); + part2 = Intersection(copy(stmt[*i].IS), + Extend_Set(Complement(copy(single_cond)), + n - max_level)); + } + + //split dependence check + + if (max_level > level) { + + DNF_Iterator di1(stmt[*i].IS.query_DNF()); + DNF_Iterator di2(part1.query_DNF()); + for (; di1 && di2; di1++, di2++) { + //printf("In next conjunct,\n"); + EQ_Iterator ei1 = (*di1)->EQs(); + EQ_Iterator ei2 = (*di2)->EQs(); + for (; ei1 && ei2; ei1++, ei2++) { + //printf(" In next equality constraint,\n"); + Constr_Vars_Iter cvi1(*ei1); + Constr_Vars_Iter cvi2(*ei2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*ei1).get_const() + != (*ei2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + dimension = + stmt[*i].loop_level[dimension].payload; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + GEQ_Iterator gi1 = (*di1)->GEQs(); + GEQ_Iterator gi2 = (*di2)->GEQs(); + + for (; gi1 && gi2; gi++, gi2++) { + + Constr_Vars_Iter cvi1(*gi1); + Constr_Vars_Iter cvi2(*gi2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*gi1).get_const() + != (*gi2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + stmt[*i].loop_level[dimension].payload; + + dimension = + stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); + j++) { + for (int k = 0; k < j->second.size(); + k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + + } + + } + + DNF_Iterator di3(stmt[*i].IS.query_DNF()); + DNF_Iterator di4(part2.query_DNF()); // + for (; di3 && di4; di3++, di4++) { + EQ_Iterator ei1 = (*di3)->EQs(); + EQ_Iterator ei2 = (*di4)->EQs(); + for (; ei1 && ei2; ei1++, ei2++) { + Constr_Vars_Iter cvi1(*ei1); + Constr_Vars_Iter cvi2(*ei2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*ei1).get_const() + != (*ei2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + stmt[*i].loop_level[dimension].payload; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + GEQ_Iterator gi1 = (*di3)->GEQs(); + GEQ_Iterator gi2 = (*di4)->GEQs(); + + for (; gi1 && gi2; gi++, gi2++) { + Constr_Vars_Iter cvi1(*gi1); + Constr_Vars_Iter cvi2(*gi2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*gi1).get_const() + != (*gi2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type // + == LoopLevelTile) + stmt[*i].loop_level[dimension].payload; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + + } + + } + + stmt[*i].IS = part1; + + int n1 = part2.n_set(); + int m = this->known.n_set(); + Relation test; + if(m > n1) + test = Intersection(copy(this->known), + Extend_Set(copy(part2), m - part2.n_set())); + else + test = Intersection(copy(part2), + Extend_Set(copy(this->known), n1 - this->known.n_set())); + + if (test.is_upper_bound_satisfiable()) { + Statement new_stmt; + new_stmt.code = stmt[*i].code->clone(); + new_stmt.IS = part2; + new_stmt.xform = copy(stmt[*i].xform); + new_stmt.ir_stmt_node = NULL; + new_stmt.loop_level = stmt[*i].loop_level; + + new_stmt.has_inspector = stmt[*i].has_inspector; + new_stmt.reduction = stmt[*i].reduction; + new_stmt.reductionOp = stmt[*i].reductionOp; + + stmt_nesting_level_.push_back(stmt_nesting_level_[*i]); + + + if (place_after) + assign_const(new_stmt.xform, dim - 1, cur_lex + 1); + else + assign_const(new_stmt.xform, dim - 1, cur_lex - 1); + + fprintf(stderr, "loop_basic.cc L828 adding stmt %d\n", stmt.size()); + stmt.push_back(new_stmt); + + uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); + uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]); + dep.insert(); + what_stmt_num[*i] = stmt.size() - 1; + if (*i == stmt_num) + result.insert(stmt.size() - 1); + } + + } + // make adjacent lexical number available for new statements + if (place_after) { + lex[dim - 1] = cur_lex + 1; + shiftLexicalOrder(lex, dim - 1, 1); + } else { + lex[dim - 1] = cur_lex - 1; + shiftLexicalOrder(lex, dim - 1, -1); + } + // update dependence graph + int dep_dim = get_dep_dim_of(stmt_num, level); + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, std::vector<DependenceVector> > > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + if (what_stmt_num.find(i) != what_stmt_num.end() + && what_stmt_num.find(j->first) + != what_stmt_num.end()) + dep.connect(what_stmt_num[i], + what_stmt_num[j->first], j->second); + if (place_after + && what_stmt_num.find(j->first) + != what_stmt_num.end()) { + std::vector<DependenceVector> dvs; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.is_data_dependence() && dep_dim != -1) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + dvs.push_back(dv); + } + if (dvs.size() > 0) + D.push_back( + std::make_pair(what_stmt_num[j->first], + dvs)); + } else if (!place_after + && what_stmt_num.find(i) + != what_stmt_num.end()) { + std::vector<DependenceVector> dvs; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.is_data_dependence() && dep_dim != -1) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + dvs.push_back(dv); + } + if (dvs.size() > 0) + dep.connect(what_stmt_num[i], j->first, dvs); + + } + } else { + if (what_stmt_num.find(i) != what_stmt_num.end()) + dep.connect(what_stmt_num[i], j->first, j->second); + } + } else if (same_loop.find(j->first) != same_loop.end()) { + if (what_stmt_num.find(j->first) != what_stmt_num.end()) + D.push_back( + std::make_pair(what_stmt_num[j->first], + j->second)); + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + + } + + return result; +} + +void Loop::skew(const std::set<int> &stmt_nums, int level, + const std::vector<int> &skew_amount) { + if (stmt_nums.size() == 0) + return; + + // check for sanity of parameters + int ref_stmt_num = *(stmt_nums.begin()); + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level < 1 || level > stmt[*i].loop_level.size()) + throw std::invalid_argument( + "5invalid loop level " + to_string(level)); + for (int j = stmt[*i].loop_level.size(); j < skew_amount.size(); j++) + if (skew_amount[j] != 0) + throw std::invalid_argument("invalid skewing formula"); + } + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // set trasformation relations + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation r(n, n); + F_And *f_root = r.add_and(); + for (int j = 1; j <= n; j++) + if (j != 2 * level) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + } + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(2 * level), -1); + for (int j = 0; j < skew_amount.size(); j++) + if (skew_amount[j] != 0) + h.update_coef(r.input_var(2 * (j + 1)), skew_amount[j]); + + stmt[*i].xform = Composition(r, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // update dependence graph + if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence between skewed statements + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + coef_t lb = 0; + coef_t ub = 0; + for (int kk = 0; kk < skew_amount.size(); kk++) { + int cur_dep_dim = get_dep_dim_of(*i, kk + 1); + if (skew_amount[kk] > 0) { + if (lb != -posInfinity + && stmt[*i].loop_level[kk].type == LoopLevelOriginal + && dv.lbounds[cur_dep_dim] != -posInfinity) + lb += skew_amount[kk] * dv.lbounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] == 0 + && dv.ubounds[cur_dep_dim]== 0)) + lb = -posInfinity; + } + if (ub != posInfinity + && stmt[*i].loop_level[kk].type == LoopLevelOriginal + && dv.ubounds[cur_dep_dim] != posInfinity) + ub += skew_amount[kk] * dv.ubounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] == 0 + && dv.ubounds[cur_dep_dim] == 0)) + ub = posInfinity; + } + } else if (skew_amount[kk] < 0) { + if (lb != -posInfinity + && stmt[*i].loop_level[kk].type == LoopLevelOriginal + && dv.ubounds[cur_dep_dim] != posInfinity) + lb += skew_amount[kk] * dv.ubounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] == 0 + && dv.ubounds[cur_dep_dim] == 0)) + lb = -posInfinity; + } + if (ub != posInfinity + && stmt[*i].loop_level[kk].type == LoopLevelOriginal + && dv.lbounds[cur_dep_dim] != -posInfinity) + ub += skew_amount[kk] * dv.lbounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] == 0 + && dv.ubounds[cur_dep_dim] == 0)) + ub = posInfinity; + } + } + } + dv.lbounds[dep_dim] = lb; + dv.ubounds[dep_dim] = ub; + if ((dv.isCarried(dep_dim) && dv.hasPositive(dep_dim)) + && dv.quasi) + dv.quasi = false; + + if ((dv.isCarried(dep_dim) && dv.hasNegative(dep_dim)) + && !dv.quasi) + throw loop_error( + "loop error: Skewing is illegal, dependence violation!"); + dv.lbounds[dep_dim] = lb; + dv.ubounds[dep_dim] = ub; + if ((dv.isCarried(dep_dim) + && dv.hasPositive(dep_dim)) && dv.quasi) + dv.quasi = false; + + if ((dv.isCarried(dep_dim) + && dv.hasNegative(dep_dim)) && !dv.quasi) + throw loop_error( + "loop error: Skewing is illegal, dependence violation!"); + } + } + j->second = dvs; + } else { + // dependence from skewed statement to unskewed statement becomes jumbled, + // put distance value at skewed dimension to unknown + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + } + j->second = dvs; + } + for (int i = 0; i < dep.vertex.size(); i++) + if (stmt_nums.find(i) == stmt_nums.end()) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence from unskewed statement to skewed statement becomes jumbled, + // put distance value at skewed dimension to unknown + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + } + j->second = dvs; + } + } +} + + +void Loop::shift(const std::set<int> &stmt_nums, int level, int shift_amount) { + if (stmt_nums.size() == 0) + return; + + // check for sanity of parameters + int ref_stmt_num = *(stmt_nums.begin()); + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level < 1 || level > stmt[*i].loop_level.size()) + throw std::invalid_argument( + "6invalid loop level " + to_string(level)); + } + + // do nothing + if (shift_amount == 0) + return; + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // set trasformation relations + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + int n = stmt[*i].xform.n_out(); + + Relation r(n, n); + F_And *f_root = r.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + if (j == 2 * level) + h.update_const(shift_amount); + } + + stmt[*i].xform = Composition(r, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // update dependence graph + if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (stmt_nums.find(j->first) == stmt_nums.end()) { + // dependence from shifted statement to unshifted statement + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + if (dv.lbounds[dep_dim] != -posInfinity) + dv.lbounds[dep_dim] -= shift_amount; + if (dv.ubounds[dep_dim] != posInfinity) + dv.ubounds[dep_dim] -= shift_amount; + } + } + j->second = dvs; + } + for (int i = 0; i < dep.vertex.size(); i++) + if (stmt_nums.find(i) == stmt_nums.end()) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence from unshifted statement to shifted statement + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + if (dv.lbounds[dep_dim] != -posInfinity) + dv.lbounds[dep_dim] += shift_amount; + if (dv.ubounds[dep_dim] != posInfinity) + dv.ubounds[dep_dim] += shift_amount; + } + } + j->second = dvs; + } + } +} + +void Loop::scale(const std::set<int> &stmt_nums, int level, int scale_amount) { + std::vector<int> skew_amount(level, 0); + skew_amount[level - 1] = scale_amount; + skew(stmt_nums, level, skew_amount); +} + +void Loop::reverse(const std::set<int> &stmt_nums, int level) { + scale(stmt_nums, level, -1); +} + +void Loop::fuse(const std::set<int> &stmt_nums, int level) { + if (stmt_nums.size() == 0 || stmt_nums.size() == 1) + return; + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int dim = 2 * level - 1; + // check for sanity of parameters + std::vector<int> ref_lex; + int ref_stmt_num; + apply_xform(); + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) { + fprintf(stderr, "statement number %d should be in [0, %d)\n", *i, stmt.size()); + throw std::invalid_argument( + "FUSE invalid statement number " + to_string(*i)); + } + if (level <= 0 + // || (level > (stmt[*i].xform.n_out() - 1) / 2 + // || level > stmt[*i].loop_level.size()) + ) { + fprintf(stderr, "FUSE level %d ", level); + fprintf(stderr, "must be greater than zero and \n"); + fprintf(stderr, "must NOT be greater than (%d - 1)/2 == %d and\n", stmt[*i].xform.n_out(), (stmt[*i].xform.n_out() - 1) / 2); + fprintf(stderr, "must NOT be greater than %d\n", stmt[*i].loop_level.size()); + throw std::invalid_argument( + "FUSE invalid loop level " + to_string(level)); + } + if (ref_lex.size() == 0) { + ref_lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } else { + std::vector<int> lex = getLexicalOrder(*i); + for (int j = 0; j < dim - 1; j += 2) + if (lex[j] != ref_lex[j]) + throw std::invalid_argument( + "statements for fusion must be in the same level-" + + to_string(level - 1) + " subloop"); + } + } + + // collect lexicographical order values from to-be-fused statements + std::set<int> lex_values; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + std::vector<int> lex = getLexicalOrder(*i); + lex_values.insert(lex[dim - 1]); + } + if (lex_values.size() == 1) + return; + // negative dependence would prevent fusion + + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + + for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); + i++) { + ref_lex[dim - 1] = *i; + std::set<int> a = getStatements(ref_lex, dim - 1); + std::set<int>::iterator j = i; + j++; + for (; j != lex_values.end(); j++) { + ref_lex[dim - 1] = *j; + std::set<int> b = getStatements(ref_lex, dim - 1); + for (std::set<int>::iterator ii = a.begin(); ii != a.end(); ii++) + for (std::set<int>::iterator jj = b.begin(); jj != b.end(); + jj++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim) + && dvs[k].hasNegative(dep_dim)) + throw loop_error( + "loop error: statements " + to_string(*ii) + + " and " + to_string(*jj) + + " cannot be fused together due to negative dependence"); + dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim) + && dvs[k].hasNegative(dep_dim)) + throw loop_error( + "loop error: statements " + to_string(*jj) + + " and " + to_string(*ii) + + " cannot be fused together due to negative dependence"); + } + } + } + + std::set<int> same_loop = getStatements(ref_lex, dim - 3); + + std::vector<std::set<int> > s = sort_by_same_loops(same_loop, level); + + std::vector<bool> s2; + + for (int i = 0; i < s.size(); i++) { + s2.push_back(false); + } + + for (std::set<int>::iterator kk = stmt_nums.begin(); kk != stmt_nums.end(); + kk++) + for (int i = 0; i < s.size(); i++) + if (s[i].find(*kk) != s[i].end()) { + + s2[i] = true; + } + + try { + + //Dependence Check for Ordering Constraint + //Graph<std::set<int>, bool> dummy = construct_induced_graph_at_level(s5, + // dep, dep_dim); + + Graph<std::set<int>, bool> g = construct_induced_graph_at_level(s, dep, + dep_dim); + std::cout << g; + s = typed_fusion(g, s2); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + } + + int order = 0; + for (int i = 0; i < s.size(); i++) { + for (std::set<int>::iterator it = s[i].begin(); it != s[i].end(); it++) { + assign_const(stmt[*it].xform, 2 * level - 2, order); + } + order++; + } + + + //plan for selective typed fusion + + /* + 1. sort the lex values of the statements + 2. construct induced graph on sorted statements + 3. pick a node from the graph, check if it is before/after from the candidate set for fusion + equal-> set the max fused node of this node to be the start/target node for fusion + before -> augment and continue + + 4. once target node identified and is on work queue update successors and other nodes to start node + 5. augment and continue + 6. if all candidate nodes dont end up in start node throw error + 7. Get nodes and update lexical values + + */ + + /* for (std::set<int>::iterator kk = stmt_nums.begin(); kk != stmt_nums.end(); + kk++) + for (int i = 0; i < s.size(); i++) + if (s[i].find(*kk) != s[i].end()) { + s1.insert(s[i].begin(), s[i].end()); + s2.insert(i); + } + + s3.push_back(s1); + for (int i = 0; i < s.size(); i++) + if (s2.find(i) == s2.end()) { + s3.push_back(s[i]); + s4.insert(s[i].begin(), s[i].end()); + } + try { + std::vector<std::set<int> > s5; + s5.push_back(s1); + s5.push_back(s4); + + //Dependence Check for Ordering Constraint + //Graph<std::set<int>, bool> dummy = construct_induced_graph_at_level(s5, + // dep, dep_dim); + + Graph<std::set<int>, bool> g = construct_induced_graph_at_level(s3, dep, + dep_dim); + std::cout<< g; + s = typed_fusion(g); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + } + + if (s3.size() == s.size()) { + int order = 0; + for (int i = 0; i < s.size(); i++) { + + for (std::set<int>::iterator it = s[i].begin(); it != s[i].end(); + it++) { + + assign_const(stmt[*it].xform, 2 * level - 2, order); + + } + + order++; + } + } else if (s3.size() > s.size()) { + + int order = 0; + for (int j = 0; j < s.size(); j++) { + std::set<int>::iterator it3; + for (it3 = s1.begin(); it3 != s1.end(); it3++) { + if (s[j].find(*it3) != s[j].end()) + break; + } + if (it3 != s1.end()) { + for (std::set<int>::iterator it = s1.begin(); it != s1.end(); + it++) + assign_const(stmt[*it].xform, 2 * level - 2, order); + + order++; + + } + + for (int i = 0; i < s3.size(); i++) { + std::set<int>::iterator it2; + + for (it2 = s3[i].begin(); it2 != s3[i].end(); it2++) { + if (s[j].find(*it2) != s[j].end()) + break; + } + + if (it2 != s3[i].end()) { + for (std::set<int>::iterator it = s3[i].begin(); + it != s3[i].end(); it++) + assign_const(stmt[*it].xform, 2 * level - 2, order); + + order++; + + } + } + } + + } else + throw loop_error("Typed Fusion Error"); + */ +} + + + +void Loop::distribute(const std::set<int> &stmt_nums, int level) { + if (stmt_nums.size() == 0 || stmt_nums.size() == 1) + return; + fprintf(stderr, "Loop::distribute()\n"); + + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + int dim = 2 * level - 1; + int ref_stmt_num; + // check for sanity of parameters + std::vector<int> ref_lex; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + + if (level < 1 + || (level > (stmt[*i].xform.n_out() - 1) / 2 + || level > stmt[*i].loop_level.size())) + throw std::invalid_argument( + "8invalid loop level " + to_string(level)); + if (ref_lex.size() == 0) { + ref_lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } else { + std::vector<int> lex = getLexicalOrder(*i); + for (int j = 0; j <= dim - 1; j += 2) + if (lex[j] != ref_lex[j]) + throw std::invalid_argument( + "statements for distribution must be in the same level-" + + to_string(level) + " subloop"); + } + } + + // find SCC in the to-be-distributed loop + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + std::set<int> same_loop = getStatements(ref_lex, dim - 1); + Graph<int, Empty> g; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) + g.insert(*i); + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(g.vertex[i].first, g.vertex[j].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g.connect(i, j); + break; + } + dvs = dep.getEdge(g.vertex[j].first, g.vertex[i].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g.connect(j, i); + break; + } + } + std::vector<std::set<int> > s = g.topoSort(); + // find statements that cannot be distributed due to dependence cycle + Graph<std::set<int>, Empty> g2; + for (int i = 0; i < s.size(); i++) { + std::set<int> t; + for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) + if (stmt_nums.find(g.vertex[*j].first) != stmt_nums.end()) + t.insert(g.vertex[*j].first); + if (!t.empty()) + g2.insert(t); + } + for (int i = 0; i < g2.vertex.size(); i++) + for (int j = i + 1; j < g2.vertex.size(); j++) + for (std::set<int>::iterator ii = g2.vertex[i].first.begin(); + ii != g2.vertex[i].first.end(); ii++) + for (std::set<int>::iterator jj = g2.vertex[j].first.begin(); + jj != g2.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g2.connect(i, j); + break; + } + dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g2.connect(j, i); + break; + } + } + std::vector<std::set<int> > s2 = g2.topoSort(); + // nothing to distribute + if (s2.size() == 1) + throw loop_error( + "loop error: no statement can be distributed due to dependence cycle"); + std::vector<std::set<int> > s3; + for (int i = 0; i < s2.size(); i++) { + std::set<int> t; + for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) + std::set_union(t.begin(), t.end(), g2.vertex[*j].first.begin(), + g2.vertex[*j].first.end(), inserter(t, t.begin())); + s3.push_back(t); + } + // associate other affected statements with the right distributed statements + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) + if (stmt_nums.find(*i) == stmt_nums.end()) { + bool is_inserted = false; + int potential_insertion_point = 0; + for (int j = 0; j < s3.size(); j++) { + for (std::set<int>::iterator k = s3[j].begin(); + k != s3[j].end(); k++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(*i, *k); + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].isCarried(dep_dim)) { + s3[j].insert(*i); + is_inserted = true; + break; + } + dvs = dep.getEdge(*k, *i); + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].isCarried(dep_dim)) + potential_insertion_point = j; + } + if (is_inserted) + break; + } + if (!is_inserted) + s3[potential_insertion_point].insert(*i); + } + // set lexicographical order after distribution + int order = ref_lex[dim - 1]; + shiftLexicalOrder(ref_lex, dim - 1, s3.size() - 1); + for (std::vector<std::set<int> >::iterator i = s3.begin(); i != s3.end(); + i++) { + for (std::set<int>::iterator j = (*i).begin(); j != (*i).end(); j++) + assign_const(stmt[*j].xform, dim - 1, order); + order++; + } + // no need to update dependence graph + + return; +} + + + + +std::vector<IR_ArrayRef *> FindOuterArrayRefs(IR_Code *ir, + std::vector<IR_ArrayRef *> &arr_refs) { + std::vector<IR_ArrayRef *> to_return; + for (int i = 0; i < arr_refs.size(); i++) + if (!ir->parent_is_array(arr_refs[i])) { + int j; + for (j = 0; j < to_return.size(); j++) + if (*to_return[j] == *arr_refs[i]) + break; + if (j == to_return.size()) + to_return.push_back(arr_refs[i]); + } + return to_return; +} + + + + + +std::vector<std::vector<std::string> > constructInspectorVariables(IR_Code *ir, + std::set<IR_ArrayRef *> &arr, std::vector<std::string> &index) { + + fprintf(stderr, "constructInspectorVariables()\n"); + + std::vector<std::vector<std::string> > to_return; + + for (std::set<IR_ArrayRef *>::iterator i = arr.begin(); i != arr.end(); + i++) { + + std::vector<std::string> per_index; + + CG_outputRepr *subscript = (*i)->index(0); + + if ((*i)->n_dim() > 1) + throw ir_error( + "multi-dimensional array support non-existent for flattening currently"); + + while (ir->QueryExpOperation(subscript) == IR_OP_ARRAY_VARIABLE) { + + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(subscript); + + IR_ArrayRef *ref = static_cast<IR_ArrayRef *>(ir->Repr2Ref(v[0])); + //per_index.push_back(ref->name()); + + subscript = ref->index(0); + + } + + if (ir->QueryExpOperation(subscript) == IR_OP_VARIABLE) { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(subscript); + IR_ScalarRef *ref = static_cast<IR_ScalarRef *>(ir->Repr2Ref(v[0])); + per_index.push_back(ref->name()); + int j; + for (j = 0; j < index.size(); j++) + if (index[j] == ref->name()) + break; + + if (j == index.size()) + throw ir_error("Non index variable in array expression"); + + int k; + for (k = 0; k < to_return.size(); k++) + if (to_return[k][0] == ref->name()) + break; + if (k == to_return.size()) { + to_return.push_back(per_index); + fprintf(stderr, "adding index %s\n", ref->name().c_str()); + } + + } + + } + + return to_return; + +} + +/*std::vector<CG_outputRepr *> constructInspectorData(IR_Code *ir, std::vector<std::vector<std::string> > &indices){ + + std::vector<CG_outputRepr *> to_return; + + for(int i =0; i < indices.size(); i++) + ir->CreateVariableDeclaration(indices[i][0]); + return to_return; + } + + + CG_outputRepr* constructInspectorFunction(IR_Code* ir, std::vector<std::vector<std::string> > &indices){ + + CG_outputRepr *to_return; + + + + return to_return; + } + +*/ + +CG_outputRepr * checkAndGenerateIndirectMappings(CG_outputBuilder * ocg, + std::vector<std::vector<std::string> > &indices, + CG_outputRepr * instance, CG_outputRepr * class_def, + CG_outputRepr * count_var) { + + CG_outputRepr *to_return = NULL; + + for (int i = 0; i < indices.size(); i++) + if (indices[i].size() > 1) { + std::string index = indices[i][indices[i].size() - 1]; + CG_outputRepr *rep = ocg->CreateArrayRefExpression( + ocg->CreateDotExpression(instance, + ocg->lookup_member_data(class_def, index, instance)), + count_var); + for (int j = indices[i].size() - 2; j >= 0; j--) + rep = ocg->CreateArrayRefExpression(indices[i][j], rep); + + CG_outputRepr *lhs = ocg->CreateArrayRefExpression( + ocg->CreateDotExpression(instance, + ocg->lookup_member_data(class_def, indices[i][0], instance)), + count_var); + + to_return = ocg->StmtListAppend(to_return, + ocg->CreateAssignment(0, lhs, rep)); + + } + + return to_return; + +} + +CG_outputRepr *generatePointerAssignments(CG_outputBuilder *ocg, + std::string prefix_name, + std::vector<std::vector<std::string> > &indices, + CG_outputRepr *instance, + CG_outputRepr *class_def) { + + fprintf(stderr, "generatePointerAssignments()\n"); + CG_outputRepr *list = NULL; + + fprintf(stderr, "prefix '%s', %d indices\n", prefix_name.c_str(), indices.size()); + for (int i = 0; i < indices.size(); i++) { + + std::string s = prefix_name + "_" + indices[i][0]; + + fprintf(stderr, "s %s\n", s.c_str()); + + // create a variable definition for a pointer to int with this name + // that seems to be the only actual result of this routine ... + //chillAST_VarDecl *vd = new chillAST_VarDecl( "int", prefix_name.c_str(), "*", NULL); + //vd->print(); printf("\n"); fflush(stdout); + //vd->dump(); printf("\n"); fflush(stdout); + + CG_outputRepr *ptr_exp = ocg->CreatePointer(s); // but dropped on the floor. unused + //fprintf(stderr, "ptr_exp created\n"); + + //CG_outputRepr *rhs = ocg->CreateDotExpression(instance, + // ocg->lookup_member_data(class_def, indices[i][0], instance)); + + //CG_outputRepr *ptr_assignment = ocg->CreateAssignment(0, ptr_exp, rhs); + + //list = ocg->StmtListAppend(list, ptr_assignment); + + } + + fprintf(stderr, "generatePointerAssignments() DONE\n\n"); + return list; +} + +void Loop::normalize(int stmt_num, int loop_level) { + + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument( + + "invalid statement number " + to_string(stmt_num)); + + if (loop_level <= 0) + throw std::invalid_argument( + "12invalid loop level " + to_string(loop_level)); + if (loop_level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument( + "there is no loop level " + to_string(loop_level) + + " for statement " + to_string(stmt_num)); + + apply_xform(stmt_num); + + Relation r = copy(stmt[stmt_num].IS); + + Relation bound = get_loop_bound(r, loop_level, this->known); + if (!bound.has_single_conjunct() || !bound.is_satisfiable() + || bound.is_tautology()) + throw loop_error("unable to extract loop bound for normalize"); + + // extract the loop stride + coef_t stride; + std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, + bound.set_var(loop_level)); + if (result.second == NULL) + stride = 1; + else + stride = abs(result.first.get_coef(result.second)) + / gcd(abs(result.first.get_coef(result.second)), + abs(result.first.get_coef(bound.set_var(loop_level)))); + + if (stride != 1) + throw loop_error( + "normalize currently only handles unit stride, non unit stride present in loop bounds"); + + GEQ_Handle lb; + + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(loop_level)); + if (coef > 0) + lb = *gi; + } + + //Loop bound already zero + //Nothing to do. + if (lb.is_const(bound.set_var(loop_level)) && lb.get_const() == 0) + return; + + if (lb.is_const_except_for_global(bound.set_var(loop_level))) { + + int n = stmt[stmt_num].xform.n_out(); + + Relation r(n, n); + F_And *f_root = r.add_and(); + for (int j = 1; j <= n; j++) + if (j != 2 * loop_level) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + } + + stmt[stmt_num].xform = Composition(r, stmt[stmt_num].xform); + stmt[stmt_num].xform.simplify(); + + for (Constr_Vars_Iter ci(lb); ci; ci++) { + if ((*ci).var->kind() == Global_Var) { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[stmt_num].xform.get_local(g); + else + v = stmt[stmt_num].xform.get_local(g, + (*ci).var->function_of()); + + F_And *f_super_root = stmt[stmt_num].xform.and_with_and(); + F_Exists *f_exists = f_super_root->add_exists(); + F_And *f_root = f_exists->add_and(); + + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[stmt_num].xform.output_var(2 * loop_level), + 1); + h.update_coef(stmt[stmt_num].xform.input_var(loop_level), -1); + h.update_coef(v, 1); + + stmt[stmt_num].xform.simplify(); + } + + } + + } else + throw loop_error("loop bounds too complex for normalize!"); + +} + diff --git a/src/transformations/loop_datacopy.cc b/src/transformations/loop_datacopy.cc new file mode 100644 index 0000000..12d74fd --- /dev/null +++ b/src/transformations/loop_datacopy.cc @@ -0,0 +1,1369 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Various data copy schemes. + + Notes: + + History: + 02/20/09 Created by Chun Chen by splitting original datacopy from loop.cc +*****************************************************************************/ + +#include <code_gen/codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + +// +// data copy function by referring arrays by numbers. +// e.g. A[i] = A[i-1] + B[i] +// parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i] +// +bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + //fprintf(stderr, "Loop::datacopy()\n"); + + // check for sanity of parameters + std::set<int> same_loop; + for (int i = 0; i < array_ref_nums.size(); i++) { + int stmt_num = array_ref_nums[i].first; + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (i == 0) { + std::vector<int> lex = getLexicalOrder(stmt_num); + same_loop = getStatements(lex, 2*level-2); + } + else if (same_loop.find(stmt_num) == same_loop.end()) + throw std::invalid_argument("array references for data copy must be located in the same subloop"); + } + + // convert array reference numbering scheme to actual array references + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (int i = 0; i < array_ref_nums.size(); i++) { + if (array_ref_nums[i].second.size() == 0) + continue; + + int stmt_num = array_ref_nums[i].first; + selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); + std::vector<bool> selected(refs.size(), false); + for (int j = 0; j < array_ref_nums[i].second.size(); j++) { + int ref_num = array_ref_nums[i].second[j]; + if (ref_num < 0 || ref_num >= refs.size()) { + for (int k = 0; k < refs.size(); k++) + delete refs[k]; + throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + } + selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected[ref_num] = true; + } + for (int j = 0; j < refs.size(); j++) + if (!selected[j]) + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references to copy"); + + // do the copy + bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + return whatever; +} + +// +// data copy function by referring arrays by name. +// e.g. A[i] = A[i-1] + B[i] +// parameter array_name=A means to copy data touched by A[i-1] and A[i] +// +bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + + fflush(stdout); + //fprintf(stderr, "Loop::datacopy2()\n"); + //fprintf(stderr, "array name %s stmt num %d\n", array_name.c_str(), stmt_num); + + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + // collect array references by name + std::vector<int> lex = getLexicalOrder(stmt_num); + int dim = 2*level - 1; + std::set<int> same_loop = getStatements(lex, dim-1); + + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + std::vector<IR_ArrayRef *> t; + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); + for (int j = 0; j < refs.size(); j++) + if (refs[j]->name() == array_name) + t.push_back(refs[j]); + else + delete refs[j]; + if (t.size() != 0) + selected_refs.push_back(std::make_pair(*i, t)); + } + + //fprintf(stderr, "selected refs:\n"); + //for (int i=0; i<selected_refs.size(); i++) { + // //fprintf(stderr, "%d 0x%x ", selected_refs[i].first, selected_refs[i].second[0]); + // selected_refs[i].second[0]->Dump(); printf("\n"); fflush(stdout); + //} + + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); + + IR_ArrayRef *AR = selected_refs[0].second[0]; + //IR_roseArrayRef *RAR = (IR_roseArrayRef *)AR; + //fprintf(stderr, "before datacopy_privatized, "); + //AR->Dump(); + + // do the copy + //fprintf(stderr, "\nLoop::datacopy2 calling privatized\n"); + + bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + + //AR = selected_refs[0].second[0]; + //fprintf(stderr, "after datacopy_privatized, "); + //AR->Dump(); + + return whatever; +} + + +bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + //fprintf(stderr, "Loop::datacopy_privatized()\n"); + + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + // collect array references by name + std::vector<int> lex = getLexicalOrder(stmt_num); + int dim = 2*level - 1; + std::set<int> same_loop = getStatements(lex, dim-1); + + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + selected_refs.push_back(std::make_pair(*i, std::vector<IR_ArrayRef *>())); + + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); + for (int j = 0; j < refs.size(); j++) + if (refs[j]->name() == array_name) + selected_refs[selected_refs.size()-1].second.push_back(refs[j]); + else + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); + + // do the copy + bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + return whatever; +} + + +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, const std::vector<int> &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + //fprintf(stderr, "Loop::datacopy_privatized2()\n"); + + // check for sanity of parameters + std::set<int> same_loop; + for (int i = 0; i < array_ref_nums.size(); i++) { + int stmt_num = array_ref_nums[i].first; + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (i == 0) { + std::vector<int> lex = getLexicalOrder(stmt_num); + same_loop = getStatements(lex, 2*level-2); + } + else if (same_loop.find(stmt_num) == same_loop.end()) + throw std::invalid_argument("array references for data copy must be located in the same subloop"); + } + + // convert array reference numbering scheme to actual array references + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (int i = 0; i < array_ref_nums.size(); i++) { + if (array_ref_nums[i].second.size() == 0) + continue; + + int stmt_num = array_ref_nums[i].first; + selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); + std::vector<bool> selected(refs.size(), false); + for (int j = 0; j < array_ref_nums[i].second.size(); j++) { + int ref_num = array_ref_nums[i].second[j]; + if (ref_num < 0 || ref_num >= refs.size()) { + for (int k = 0; k < refs.size(); k++) + delete refs[k]; + throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + } + selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected[ref_num] = true; + } + for (int j = 0; j < refs.size(); j++) + if (!selected[j]) + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references to copy"); + + // do the copy + bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + return whatever; +} + + +// +// Implement low level datacopy function with lots of options. +// + +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, + int level, + const std::vector<int> &privatized_levels, + bool allow_extra_read, + int fastest_changing_dimension, + int padding_stride, + int padding_alignment, + int memory_type) { + + //fprintf(stderr, "\nLoop::datacopy_privatized3() *****\n"); + //fprintf(stderr, "privatized_levels.size() %d\n", privatized_levels.size()); + //fprintf(stderr, "level %d\n", level); + + if (stmt_refs.size() == 0) + return true; + + // check for sanity of parameters + IR_ArraySymbol *sym = NULL; + std::vector<int> lex; + std::set<int> active; + if (level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + for (int i = 0; i < privatized_levels.size(); i++) { + if (i == 0) { + if (privatized_levels[i] < level) + throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level)); + } + else if (privatized_levels[i] <= privatized_levels[i-1]) + throw std::invalid_argument("privatized loop levels must be in ascending order"); + } + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + active.insert(stmt_num); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (privatized_levels.size() != 0) { + if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); + } + else { + if (level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); + } + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (sym == NULL) { + sym = stmt_refs[i].second[j]->symbol(); + lex = getLexicalOrder(stmt_num); + } + else { + IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); + if (t->name() != sym->name()) { + delete t; + delete sym; + throw std::invalid_argument("try to copy data from different arrays"); + } + delete t; + } + } + } + + //fprintf(stderr, "sym %p\n", sym); + if (!sym) { + fprintf(stderr, "sym NULL, gonna die\n"); int *i=0; int j=i[0]; + } + + if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim())) + throw std::invalid_argument("invalid fastest changing dimension for the array to be copied"); + if (padding_stride < 0) + throw std::invalid_argument("invalid temporary array stride requirement"); + if (padding_alignment == -1 || padding_alignment == 0) + throw std::invalid_argument("invalid temporary array alignment requirement"); + + int dim = 2*level - 1; + int n_dim = sym->n_dim(); + + + if (fastest_changing_dimension == -1) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + fastest_changing_dimension = n_dim - 1; + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + fastest_changing_dimension = 0; + break; + default: + throw loop_error("unsupported array layout"); + } + // OK, parameter sanity checked + + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // build iteration spaces for all reads and for all writes separately + //fprintf(stderr, "dp3: before apply_xform() ARRAY REFS\n"); + //for (int i = 0; i < stmt_refs.size(); i++) { + // for (int j = 0; j < stmt_refs[i].second.size(); j++) { + // IR_ArrayRef *AR = stmt_refs[i].second[j]; + // fprintf(stderr, "array ref ij %d %d ", i, j); AR->Dump(); fprintf(stderr, "\n"); + // } + //} + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "stmt %d = ", i); + // stmt[i].code->dump(); + // fprintf(stderr, "\n"); + //} + + apply_xform(active); + //fprintf(stderr, "dp3: back from apply_xform() ARRAY REFS\n"); + + //for (int i = 0; i < stmt_refs.size(); i++) { + // for (int j = 0; j < stmt_refs[i].second.size(); j++) { + // IR_ArrayRef *AR = stmt_refs[i].second[j]; + // fprintf(stderr, "array ref ij %d %d ", i, j); + // AR->Dump(); + // fprintf(stderr, "\n"); + // } + //} + + //for (int i=0; i<stmt.size(); i++) { + // fprintf(stderr, "stmt %d = ", i); + // stmt[i].code->dump(); + // fprintf(stderr, "\n"); + //} + + + bool has_write_refs = false; + bool has_read_refs = false; + Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + //fprintf(stderr, "\n\ni range: 0-%d\n", -1 + stmt_refs.size()); + int stmt_num = stmt_refs[0].first; + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + + //fprintf(stderr, "j range: 0-%d\n", -1 + stmt_refs[i].second.size()); + + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + //fprintf(stderr, "ij %d %d\n", i, j); + + Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim); + for (int k = 1; k <= mapping.n_inp(); k++) + mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name()); + mapping.setup_names(); + mapping.print(); fflush(stdout); // "{[I] -> [_t1] : I = _t1 } + + F_And *f_root = mapping.add_and(); + for (int k = 1; k <= level-1; k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(k), 1); + h.update_coef(mapping.output_var(k), -1); + } + for (int k = 0; k < privatized_levels.size(); k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(privatized_levels[k]), 1); + h.update_coef(mapping.output_var(level+k), -1); + } + for (int k = 0; k < n_dim; k++) { + IR_ArrayRef *AR = stmt_refs[i].second[j]; + //fprintf(stderr, "array ref "); + AR->Dump(); + + CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); + //fprintf(stderr, "k %d j %d repr ", k, j); repr->dump(); fflush(stdout); + + exp2formula(ir, + mapping, + f_root, + freevar, + repr, + mapping.output_var(level-1+privatized_levels.size()+k+1), + 'w', + IR_COND_EQ, + false, + uninterpreted_symbols[stmt_num], + uninterpreted_symbols_stringrepr[stmt_num]); + repr->clear(); + delete repr; + } + Relation r = omega::Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); + if (stmt_refs[i].second[j]->is_write()) { + has_write_refs = true; + wo_copy_is = Union(wo_copy_is, r); + wo_copy_is.simplify(2, 4); + + + } + else { + has_read_refs = true; + ro_copy_is = Union(ro_copy_is, r); + ro_copy_is.simplify(2, 4); + + } + } + } + + //fprintf(stderr, "dp3: simplify\n"); + // simplify read and write footprint iteration space + { + if (allow_extra_read) + ro_copy_is = SimpleHull(ro_copy_is, true, true); + else + ro_copy_is = ConvexRepresentation(ro_copy_is); + + wo_copy_is = ConvexRepresentation(wo_copy_is); + if (wo_copy_is.number_of_conjuncts() > 1) { + Relation t = SimpleHull(wo_copy_is, true, true); + if (Must_Be_Subset(copy(t), copy(ro_copy_is))) + wo_copy_is = t; + else if (Must_Be_Subset(copy(wo_copy_is), copy(ro_copy_is))) + wo_copy_is = ro_copy_is; + } + } + + // make copy statement variable names match the ones in the original statements which + // already have the same names due to apply_xform + { + int ref_stmt = *active.begin(); + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + if (stmt[*i].IS.n_set() > stmt[ref_stmt].IS.n_set()) + ref_stmt = *i; + for (int i = 1; i < level; i++) { + std::string s = stmt[ref_stmt].IS.input_var(i)->name(); + wo_copy_is.name_set_var(i, s); + ro_copy_is.name_set_var(i, s); + } + for (int i = 0; i < privatized_levels.size(); i++) { + std::string s = stmt[ref_stmt].IS.input_var(privatized_levels[i])->name(); + wo_copy_is.name_set_var(level+i, s); + ro_copy_is.name_set_var(level+i, s); + } + for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) { + std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size()); + wo_copy_is.name_set_var(i, s); + ro_copy_is.name_set_var(i, s); + } + tmp_loop_var_name_counter += n_dim; + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + } + + //fprintf(stderr, "\ndp3: build merged\n"); + // build merged footprint iteration space for calculating temporary array size + Relation copy_is = SimpleHull(Union(copy(ro_copy_is), copy(wo_copy_is)), true, true); + + // extract temporary array information + CG_outputBuilder *ocg = ir->builder(); + std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL + std::vector<coef_t> index_stride(n_dim); + std::vector<bool> is_index_eq(n_dim, false); + std::vector<std::pair<int, CG_outputRepr *> > index_sz(0); + Relation reduced_copy_is = copy(copy_is); + + for (int i = 0; i < n_dim; i++) { + //fprintf(stderr, "i %d/%d\n", i, n_dim); + if (i != 0) + reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); + Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); + + //fprintf(stderr, "dp3: extract stride\n"); + // extract stride + std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, bound.set_var(level-1+privatized_levels.size()+i+1)); + if (result.second != NULL) + index_stride[i] = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)))); + else + index_stride[i] = 1; + //fprintf(stderr, "dp3: index_stride[%d] = %d\n", i, index_stride[i]); + + // check if this array index requires loop + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (EQ_Iterator ei(c->EQs()); ei; ei++) { + //fprintf(stderr, "dp3: for\n"); + if ((*ei).has_wildcards()) + continue; + + //fprintf(stderr, "dp3: no wildcards\n"); + int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0) { + //fprintf(stderr, "coef != 0\n"); + int sign = 1; + if (coef < 0) { + //fprintf(stderr, "coef < 0\n"); + coef = -coef; + sign = -1; + } + + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*ei); ci; ci++) { + //fprintf(stderr, "dp3: ci\n"); + switch ((*ci).var->kind()) { + case Input_Var: + { + //fprintf(stderr, "dp3: Input_Var\n"); + if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) { + //fprintf(stderr, "dp3: IF sign %d\n",(*ci).coef*sign); + + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + } + break; + } + case Global_Var: + { + //fprintf(stderr, "dp3: Global_Var\n"); + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("unsupported array index expression"); + } + } + if ((*ei).get_const() != 0) + op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); + if (coef != 1) + op = ocg->CreateIntegerFloor(op, ocg->CreateInt(coef)); + + index_lb[i] = op; + is_index_eq[i] = true; + break; + } + } + if (is_index_eq[i]) + continue; + + //fprintf(stderr, "dp3: separate lower and upper bounds\n"); + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + std::set<Variable_ID> excluded_floor_vars; + excluded_floor_vars.insert(bound.set_var(level-1+privatized_levels.size()+i+1)); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0 && (*gi).has_wildcards()) { + bool clean_bound = true; + GEQ_Handle h; + for (Constr_Vars_Iter cvi(*gi, true); gi; gi++) + if (!find_floor_definition(bound, (*cvi).var, excluded_floor_vars).first) { + clean_bound = false; + break; + } + if (!clean_bound) + continue; + } + + if (coef > 0) + lb_list.push_back(*gi); + else if (coef < 0) + ub_list.push_back(*gi); + } + if (lb_list.size() == 0 || ub_list.size() == 0) + throw loop_error("failed to calcuate array footprint size"); + + //fprintf(stderr, "dp3: build lower bound representation\n"); + // build lower bound representation + std::vector<CG_outputRepr *> lb_repr_list; + for (int j = 0; j < lb_list.size(); j++){ + if(this->known.n_set() == 0) { + lb_repr_list.push_back(output_lower_bound_repr(ocg, + lb_list[j], + bound.set_var(level-1+privatized_levels.size()+i+1), + result.first, + result.second, + bound, + Relation::True(bound.n_set()), + std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)), + uninterpreted_symbols[stmt_num])); + } + else { + lb_repr_list.push_back(output_lower_bound_repr(ocg, + lb_list[j], + bound.set_var(level-1+privatized_levels.size()+i+1), + result.first, + result.second, + bound, + this->known, + std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)), + uninterpreted_symbols[stmt_num])); + } + } + if (lb_repr_list.size() > 1) { + //fprintf(stderr, "loop_datacopy.cc dp3 createInvoke( max )\n"); + index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); + } + else if (lb_repr_list.size() == 1) + index_lb[i] = lb_repr_list[0]; + + //fprintf(stderr, "dp3: build temporary array size representation\n"); + // build temporary array size representation + { + Relation cal(copy_is.n_set(), 1); + F_And *f_root = cal.add_and(); + for (int j = 0; j < ub_list.size(); j++) + for (int k = 0; k < lb_list.size(); k++) { + GEQ_Handle h = f_root->add_GEQ(); + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(ub_list[j].get_const()); + + for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(lb_list[k].get_const()); + + h.update_const(1); + h.update_coef(cal.output_var(1), -1); + } + + cal = Restrict_Domain(cal, copy(copy_is)); + for (int j = 1; j <= cal.n_inp(); j++) + cal = Project(cal, j, Input_Var); + cal.simplify(); + + //fprintf(stderr, "dp3: pad temporary array size\n"); + // pad temporary array size + // TODO: for variable array size, create padding formula + Conjunct *c = cal.query_DNF()->single_conjunct(); + bool is_index_bound_const = false; + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) + if ((*gi).is_const(cal.output_var(1))) { + coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1))); + if (padding_stride != 0) { + size = (size + index_stride[i] - 1) / index_stride[i]; + if (i == fastest_changing_dimension) + size = size * padding_stride; + } + if (i == fastest_changing_dimension) { + if (padding_alignment > 1) { // align to boundary for data packing + int residue = size % padding_alignment; + if (residue) + size = size+padding_alignment-residue; + } + else if (padding_alignment < -1) { // un-alignment for memory bank conflicts + while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1) + size++; + } + } + index_sz.push_back(std::make_pair(i, ocg->CreateInt(size))); + is_index_bound_const = true; + } + + if (!is_index_bound_const) { + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { + int coef = (*gi).get_coef(cal.output_var(1)); + if (coef < 0) { + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*gi); ci; ci++) { + if ((*ci).var != cal.output_var(1)) { + switch((*ci).var->kind()) { + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef == 1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef == -1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef > 1) + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef < -1 + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("failed to generate array index bound code"); + } + } + } + int c = (*gi).get_const(); + if (c > 0) + op = ocg->CreatePlus(op, ocg->CreateInt(c)); + else if (c < 0) + op = ocg->CreateMinus(op, ocg->CreateInt(-c)); + if (padding_stride != 0) { + if (i == fastest_changing_dimension) { + coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[i] / g; + if (t1 != 1) + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + op = ocg->CreateTimes(op, ocg->CreateInt(t2)); + } + else if (index_stride[i] != 1) { + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); + } + } + + index_sz.push_back(std::make_pair(i, op)); + break; + } + } + } + } + } + + //fprintf(stderr, "dp3: change the temporary array index order\n"); + // change the temporary array index order + for (int i = 0; i < index_sz.size(); i++) { + if (index_sz[i].first == fastest_changing_dimension) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + std::swap(index_sz[index_sz.size()-1], index_sz[i]); + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + std::swap(index_sz[0], index_sz[i]); + break; + default: + throw loop_error("unsupported array layout"); + } + } + + //fprintf(stderr, "dp3: declare temporary array or scalar\n"); + // declare temporary array or scalar + IR_Symbol *tmp_sym; + if (index_sz.size() == 0) { + //fprintf(stderr, "tmp_sym is a scalar\n"); + tmp_sym = ir->CreateScalarSymbol(sym, memory_type); + } + else { + //fprintf(stderr, "tmp_sym is an array\n"); + std::vector<CG_outputRepr *> tmp_array_size(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + tmp_array_size[i] = index_sz[i].second->clone(); + index_sz[i].second->dump(); // THIS PRINTF + } + tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); + } + + //fprintf(stderr, "dp3: create temporary array read initialization code\n"); + // create temporary array read initialization code + CG_outputRepr *copy_code_read; + if (has_read_refs) { + //fprintf(stderr, "has read refs\n"); + if (index_sz.size() == 0) { + //fprintf(stderr, "if\n"); + + //fprintf(stderr, "tmp sym %s\n", tmp_sym->name().c_str()); + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); // create ref from symbol + // tmp_scalar_ref is incomplete + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) { + //fprintf(stderr, "i %d\n", i); + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + } + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + // IR_ScalarRef tmp_scalar_ref has no actual reference yet. It only has the variable definition. + copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); + //fprintf(stderr, "if ends\n"); + } + else { + //fprintf(stderr, "else\n"); + std::vector<CG_outputRepr *> lhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + lhs_index[i] = cur_index_repr; + } + + //fprintf(stderr, "dp3: making tmp_array_ref\n"); + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index); + //fprintf(stderr, "dp3: DONE making tmp_array_ref\n"); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment\n"); + //copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); + CG_outputRepr *lhs = tmp_array_ref->convert(); + CG_outputRepr *rhs = copied_array_ref->convert(); + copy_code_read = ir->builder()->CreateAssignment(0, lhs, rhs); //tmp_array_ref->convert(), copied_array_ref->convert()); + //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment DONE\n\n"); + } + } // has read refs + + //fprintf(stderr, "dp3: create temporary array write back code\n"); + // create temporary array write back code + CG_outputRepr *copy_code_write; + if (has_write_refs) { + //fprintf(stderr, "has_write_refs\n"); + if (index_sz.size() == 0) { + //fprintf(stderr, "index_sz.size() == 0\n"); + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); + } + else { + //fprintf(stderr, "index_sz.size() NOT = 0\n"); + + std::vector<CG_outputRepr *> lhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + lhs_index[i] = index_lb[i]->clone(); + else + lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); + + std::vector<CG_outputRepr *> rhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + rhs_index[i] = cur_index_repr; + } + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); + } + } // has write refs + + // now we can remove those loops for array indexes that are + // dependent on others + //fprintf(stderr, "dp3: now we can remove those loops\n"); + if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { + Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + + int cur_index = 0; + std::vector<int> mapped_index(index_sz.size()); + for (int i = 0; i < n_dim; i++) + if (!is_index_eq[i]) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); + mapped_index[index_sz.size()-cur_index-1] = i; + break; + } + case IR_ARRAY_LAYOUT_ROW_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); + mapped_index[cur_index] = i; + break; + } + default: + throw loop_error("unsupported array layout"); + } + cur_index++; + } + + wo_copy_is = omega::Range(Restrict_Domain(copy(mapping), wo_copy_is)); + ro_copy_is = omega::Range(Restrict_Domain(copy(mapping), ro_copy_is)); + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + } + for (int i = 0; i < index_sz.size(); i++) { + wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + } + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + } + + // insert read copy statement + //fprintf(stderr, "dp3: insert read copy statement\n"); + + int old_num_stmt = stmt.size(); + int ro_copy_stmt_num = -1; + if (has_read_refs) { + Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= ro_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_read; + copy_stmt_read.IS = ro_copy_is; + copy_stmt_read.xform = copy_xform; + copy_stmt_read.code = copy_code_read; + //fprintf(stderr, "dp3: copy_stmt_read.code = \n"); + copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set()); + copy_stmt_read.ir_stmt_node = NULL; + copy_stmt_read.has_inspector = false; + for (int i = 0; i < level-1; i++) { + copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_read.loop_level[i].payload = -1; + else + copy_stmt_read.loop_level[i].payload = level + j; + } + else + copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < std::min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = std::min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + + + shiftLexicalOrder(lex, dim-1, 1); + + fprintf(stderr, "loop_datacopy.cc L1071 adding stmt %d\n", stmt.size()); + stmt.push_back(copy_stmt_read); + + uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]); + uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[*(active.begin())]); + ro_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + //fprintf(stderr, "dp3: insert write copy statement\n"); + // insert write copy statement + int wo_copy_stmt_num = -1; + if (has_write_refs) { + Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= wo_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_write; + copy_stmt_write.IS = wo_copy_is; + copy_stmt_write.xform = copy_xform; + copy_stmt_write.code = copy_code_write; + copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set()); + copy_stmt_write.ir_stmt_node = NULL; + copy_stmt_write.has_inspector = false; + + for (int i = 0; i < level-1; i++) { + copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_write.loop_level[i].payload = -1; + else + copy_stmt_write.loop_level[i].payload = level + j; + } + else + copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < std::min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = std::min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + lex[dim-1]++; + shiftLexicalOrder(lex, dim-1, -2); + + fprintf(stderr, "loop_datacopy.cc L1147 adding stmt %d\n", stmt.size()); + stmt.push_back(copy_stmt_write); + + uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]); + uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[*(active.begin())]); + wo_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + //fprintf(stderr, "replace original array accesses with temporary array accesses\n"); + // replace original array accesses with temporary array accesses + for (int i =0; i < stmt_refs.size(); i++) + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + //fprintf(stderr, "dp3: loop_datacopy.cc calling ReplaceExpression i%d j%d\n", i, j); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); + } + else { + std::vector<CG_outputRepr *> index_repr(index_sz.size()); + for (int k = 0; k < index_sz.size(); k++) { + int cur_index_num = index_sz[k].first; + + CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (k == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + index_repr[k] = cur_index_repr; + } + + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr); + //fprintf(stderr, "loop_datacopy.cc ir->ReplaceExpression( ... )\n"); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); + } + } + + // update dependence graph + //fprintf(stderr, "update dependence graph\n"); + + int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1; + if (ro_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(ro_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, ro_copy_stmt_num, D[j]); + } + + // insert dependences from copy statement loop to copied statements + //fprintf(stderr, "insert dependences from copy statement loop to copied statements\n"); + + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + for (int i = dep_dim; i < dep.num_dim(); i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(ro_copy_stmt_num, *i, dv); + } + + if (wo_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(wo_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, wo_copy_stmt_num, D[j]); + } + + // insert dependences from copied statements to write statements + //fprintf(stderr, "dp3: insert dependences from copied statements to write statements\n"); + + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + for (int i = dep_dim; i < dep.num_dim(); i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(*i, wo_copy_stmt_num, dv); + + } + + // update variable name for dependences among copied statements + for (int i = 0; i < old_num_stmt; i++) { + if (active.find(i) != active.end()) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) + if (active.find(j->first) != active.end()) + for (int k = 0; k < j->second.size(); k++) { + IR_Symbol *s = tmp_sym->clone(); + j->second[k].sym = s; + } + } + + // insert anti-dependence from write statement to read statement + if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) + if (dep_dim >= 0) { + DependenceVector dv; + dv.type = DEP_R2W; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + for (int k = dep_dim; k < dep.num_dim(); k++) { + dv.lbounds[k] = -posInfinity; + dv.ubounds[k] = posInfinity; + } + for (int k = 0; k < dep_dim; k++) { + if (k != 0) { + dv.lbounds[k-1] = 0; + dv.ubounds[k-1] = 0; + } + dv.lbounds[k] = 1; + dv.ubounds[k] = posInfinity; + dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv); + } + } + + //fprintf(stderr, "Loop::datacopy_privatized3() cleanup\n"); + // cleanup + delete sym; + delete tmp_sym; + for (int i = 0; i < index_lb.size(); i++) { + index_lb[i]->clear(); + delete index_lb[i]; + } + for (int i = 0; i < index_sz.size(); i++) { + index_sz[i].second->clear(); + delete index_sz[i].second; + } + + return true; +} + + + diff --git a/src/transformations/loop_extra.cc b/src/transformations/loop_extra.cc new file mode 100644 index 0000000..dac05bf --- /dev/null +++ b/src/transformations/loop_extra.cc @@ -0,0 +1,224 @@ +/***************************************************************************** + Copyright (C) 2010 University of Utah + All Rights Reserved. + + Purpose: + Additional loop transformations. + + Notes: + + History: + 07/31/10 Created by Chun Chen +*****************************************************************************/ + +#include <code_gen/codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + + +void Loop::shift_to(int stmt_num, int level, int absolute_position) { + // combo + tile(stmt_num, level, 1, level, CountedTile); + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> active = getStatements(lex, 2*level-2); + shift(active, level, absolute_position); + + // remove unnecessary tiled loop since tile size is one + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation mapping(n, n-2); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= 2*level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 2*level+3; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j-2), 1); + h.update_coef(mapping.input_var(j), -1); + } + stmt[*i].xform = Composition(mapping, stmt[*i].xform); + stmt[*i].xform.simplify(); + + for (int j = 0; j < stmt[*i].loop_level.size(); j++) + if (j != level-1 && + stmt[*i].loop_level[j].type == LoopLevelTile && + stmt[*i].loop_level[j].payload >= level) + stmt[*i].loop_level[j].payload--; + + stmt[*i].loop_level.erase(stmt[*i].loop_level.begin()+level-1); + } +} + + +std::set<int> Loop::unroll_extra(int stmt_num, int level, int unroll_amount, int cleanup_split_level) { + std::set<int> cleanup_stmts = unroll(stmt_num, level, unroll_amount,std::vector< std::vector<std::string> >(), cleanup_split_level); + for (std::set<int>::iterator i = cleanup_stmts.begin(); i != cleanup_stmts.end(); i++) + unroll(*i, level, 0); + + return cleanup_stmts; +} + +void Loop::peel(int stmt_num, int level, int peel_amount) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + if (peel_amount == 0) + return; + + std::set<int> subloop = getSubLoopNest(stmt_num, level); + std::vector<Relation> Rs; + for (std::set<int>::iterator i = subloop.begin(); i != subloop.end(); i++) { + Relation r = getNewIS(*i); + Relation f(r.n_set(), level); + F_And *f_root = f.add_and(); + for (int j = 1; j <= level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(f.input_var(2*j), 1); + h.update_coef(f.output_var(j), -1); + } + r = Composition(f, r); + r.simplify(); + Rs.push_back(r); + } + Relation hull = SimpleHull(Rs); + + if (peel_amount > 0) { + GEQ_Handle bound_eq; + bool found_bound = false; + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if (!(*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) > 0) { + bound_eq = *e; + found_bound = true; + break; + } + if (!found_bound) + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if ((*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) > 0) { + bool is_bound = true; + for (Constr_Vars_Iter cvi(*e, true); cvi; cvi++) { + std::pair<bool, GEQ_Handle> result = find_floor_definition(hull, cvi.curr_var()); + if (!result.first) { + is_bound = false; + break; + } + } + if (is_bound) { + bound_eq = *e; + found_bound = true; + break; + } + } + if (!found_bound) + throw loop_error("can't find lower bound for peeling at loop level " + to_string(level)); + + for (int i = 1; i <= peel_amount; i++) { + Relation r(level); + F_Exists *f_exists = r.add_and()->add_exists(); + F_And *f_root = f_exists->add_and(); + GEQ_Handle h = f_root->add_GEQ(); + std::map<Variable_ID, Variable_ID> exists_mapping; + for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++) + switch (cvi.curr_var()->kind()) { + case Input_Var: + h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); + break; + case Wildcard_Var: { + Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); + h.update_coef(v, cvi.curr_coef()); + break; + } + case Global_Var: { + Global_Var_ID g = cvi.curr_var()->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = r.get_local(g); + else + v = r.get_local(g, cvi.curr_var()->function_of()); + h.update_coef(v, cvi.curr_coef()); + break; + } + default: + assert(false); + } + h.update_const(bound_eq.get_const() - i); + r.simplify(); + + split(stmt_num, level, r); + } + } + else { // peel_amount < 0 + GEQ_Handle bound_eq; + bool found_bound = false; + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if (!(*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) < 0) { + bound_eq = *e; + found_bound = true; + break; + } + if (!found_bound) + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if ((*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) < 0) { + bool is_bound = true; + for (Constr_Vars_Iter cvi(*e, true); cvi; cvi++) { + std::pair<bool, GEQ_Handle> result = find_floor_definition(hull, cvi.curr_var()); + if (!result.first) { + is_bound = false; + break; + } + } + if (is_bound) { + bound_eq = *e; + found_bound = true; + break; + } + } + if (!found_bound) + throw loop_error("can't find upper bound for peeling at loop level " + to_string(level)); + + for (int i = 1; i <= -peel_amount; i++) { + Relation r(level); + F_Exists *f_exists = r.add_and()->add_exists(); + F_And *f_root = f_exists->add_and(); + GEQ_Handle h = f_root->add_GEQ(); + std::map<Variable_ID, Variable_ID> exists_mapping; + for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++) + switch (cvi.curr_var()->kind()) { + case Input_Var: + h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); + break; + case Wildcard_Var: { + Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); + h.update_coef(v, cvi.curr_coef()); + break; + } + case Global_Var: { + Global_Var_ID g = cvi.curr_var()->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = r.get_local(g); + else + v = r.get_local(g, cvi.curr_var()->function_of()); + h.update_coef(v, cvi.curr_coef()); + break; + } + default: + assert(false); + } + h.update_const(bound_eq.get_const() - i); + r.simplify(); + + split(stmt_num, level, r); + } + } +} + diff --git a/src/transformations/loop_tile.cc b/src/transformations/loop_tile.cc new file mode 100644 index 0000000..41c3e7f --- /dev/null +++ b/src/transformations/loop_tile.cc @@ -0,0 +1,587 @@ +/* + * loop_tile.cc + * + * Created on: Nov 12, 2012 + * Author: anand + */ + +#include <code_gen/codegen.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + + + + +void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, + TilingMethodType method, int alignment_offset, int alignment_multiple) { + // check for sanity of parameters + if (tile_size < 0) + throw std::invalid_argument("invalid tile size"); + if (alignment_multiple < 1 || alignment_offset < 0) + throw std::invalid_argument("invalid alignment for tile"); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument( + "there is no loop level " + to_string(level) + " for statement " + + to_string(stmt_num)); + if (outer_level <= 0 || outer_level > level) + throw std::invalid_argument( + "invalid tile controlling loop level " + + to_string(outer_level)); + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int dim = 2 * level - 1; + int outer_dim = 2 * outer_level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_tiled_loop = getStatements(lex, dim - 1); + std::set<int> same_tile_controlling_loop = getStatements(lex, + outer_dim - 1); + + for (std::set<int>::iterator i = same_tiled_loop.begin(); + i != same_tiled_loop.end(); i++) { + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); + j++) { + if (same_tiled_loop.find(j->first) != same_tiled_loop.end()) + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + int dim2 = level - 1; + if ((dv.type != DEP_CONTROL) && (dv.type != DEP_UNKNOWN)) { + while (stmt[*i].loop_level[dim2].type == LoopLevelTile) { + dim2 = stmt[*i].loop_level[dim2].payload - 1; + } + dim2 = stmt[*i].loop_level[dim2].payload; + + if (dv.hasNegative(dim2) && (!dv.quasi)) { + for (int l = outer_level; l < level; l++) + if (stmt[*i].loop_level[l - 1].type + != LoopLevelTile) { + if (dv.isCarried( + stmt[*i].loop_level[l - 1].payload) + && dv.hasPositive( + stmt[*i].loop_level[l - 1].payload)) + throw loop_error( + "loop error: Tiling is illegal, dependence violation!"); + } else { + + int dim3 = l - 1; + while (stmt[*i].loop_level[l - 1].type + != LoopLevelTile) { + dim3 = + stmt[*i].loop_level[l - 1].payload + - 1; + + } + + dim3 = stmt[*i].loop_level[l - 1].payload; + if (dim3 < level - 1) + if (dv.isCarried(dim3) + && dv.hasPositive(dim3)) + throw loop_error( + "loop error: Tiling is illegal, dependence violation!"); + } + } + } + } + } + } + // special case for no tiling + if (tile_size == 0) { + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); + F_And *f_root = r.add_and(); + for (int j = 1; j <= 2 * outer_level - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + } + EQ_Handle h1 = f_root->add_EQ(); + h1.update_coef(r.output_var(2 * outer_level), 1); + EQ_Handle h2 = f_root->add_EQ(); + h2.update_coef(r.output_var(2 * outer_level + 1), 1); + for (int j = 2 * outer_level; j <= stmt[*i].xform.n_out(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j + 2), -1); + } + + stmt[*i].xform = Composition(copy(r), stmt[*i].xform); + } + } + // normal tiling + else { + std::set<int> private_stmt; + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + // should test dim's value directly but it is ok for now + if (same_tiled_loop.find(*i) == same_tiled_loop.end() + && overflow.find(*i) != overflow.end()) + private_stmt.insert(*i); + } + + // extract the union of the iteration space to be considered + Relation hull; + { + std::vector<Relation> r_list; + + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) + if (private_stmt.find(*i) == private_stmt.end()) { + Relation r = getNewIS(*i); + for (int j = dim + 2; j <= r.n_set(); j++) + r = Project(r, r.set_var(j)); + for (int j = outer_dim; j < dim; j++) + r = Project(r, j + 1, Set_Var); + for (int j = 0; j < outer_dim; j += 2) + r = Project(r, j + 1, Set_Var); + r.simplify(2, 4); + r_list.push_back(r); + } + + hull = SimpleHull(r_list); + } + + // extract the bound of the dimension to be tiled + Relation bound = get_loop_bound(hull, dim); + if (!bound.has_single_conjunct()) { + // further simplify the bound + hull = Approximate(hull); + bound = get_loop_bound(hull, dim); + + int i = outer_dim - 2; + while (!bound.has_single_conjunct() && i >= 0) { + hull = Project(hull, i + 1, Set_Var); + bound = get_loop_bound(hull, dim); + i -= 2; + } + + if (!bound.has_single_conjunct()) + throw loop_error("cannot handle tile bounds"); + } + + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + { + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(dim + 1)); + if (coef < 0) + ub_list.push_back(*gi); + else if (coef > 0) + lb_list.push_back(*gi); + } + } + if (lb_list.size() == 0) + throw loop_error( + "unable to calculate tile controlling loop lower bound"); + if (ub_list.size() == 0) + throw loop_error( + "unable to calculate tile controlling loop upper bound"); + + // find the simplest lower bound for StridedTile or simplest iteration count for CountedTile + int simplest_lb = 0, simplest_ub = 0; + if (method == StridedTile) { + int best_cost = INT_MAX; + for (int i = 0; i < lb_list.size(); i++) { + int cost = 0; + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + cost += 5; + break; + } + case Global_Var: { + cost += 2; + break; + } + default: + cost += 15; + break; + } + } + + if (cost < best_cost) { + best_cost = cost; + simplest_lb = i; + } + } + } else if (method == CountedTile) { + std::map<Variable_ID, coef_t> s1, s2, s3; + int best_cost = INT_MAX; + for (int i = 0; i < lb_list.size(); i++) + for (int j = 0; j < ub_list.size(); j++) { + int cost = 0; + + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + cost = INT_MAX - 2; + break; + } + } + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + if (cost == INT_MAX - 2) + cost = INT_MAX - 1; + else + cost = INT_MAX - 3; + break; + } + } + + if (cost == 0) { + for (std::map<Variable_ID, coef_t>::iterator k = + s1.begin(); k != s1.end(); k++) + if ((*k).second != 0) + cost += 5; + for (std::map<Variable_ID, coef_t>::iterator k = + s2.begin(); k != s2.end(); k++) + if ((*k).second != 0) + cost += 2; + for (std::map<Variable_ID, coef_t>::iterator k = + s3.begin(); k != s3.end(); k++) + if ((*k).second != 0) + cost += 15; + } + + if (cost < best_cost) { + best_cost = cost; + simplest_lb = i; + simplest_ub = j; + } + } + } + + // prepare the new transformation relations + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); + F_And *f_root = r.add_and(); + for (int j = 0; j < outer_dim - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(j + 1), 1); + h.update_coef(r.input_var(j + 1), -1); + } + + for (int j = outer_dim - 1; j < stmt[*i].xform.n_out(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(j + 3), 1); + h.update_coef(r.input_var(j + 1), -1); + } + + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(outer_dim), 1); + h.update_const(-lex[outer_dim - 1]); + + stmt[*i].xform = Composition(r, stmt[*i].xform); + } + + // add tiling constraints. + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + F_And *f_super_root = stmt[*i].xform.and_with_and(); + F_Exists *f_exists = f_super_root->add_exists(); + F_And *f_root = f_exists->add_and(); + + // create a lower bound variable for easy formula creation later + Variable_ID aligned_lb; + { + Variable_ID lb = f_exists->declare(); + coef_t coef = lb_list[simplest_lb].get_coef( + bound.set_var(dim + 1)); + if (coef == 1) { // e.g. if i >= m+5, then LB = m+5 + EQ_Handle h = f_root->add_EQ(); + h.update_coef(lb, 1); + for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h.update_const(lb_list[simplest_lb].get_const()); + } else { // e.g. if 2i >= m+5, then m+5 <= 2*LB < m+5+2 + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(lb, (*ci).coef); + h2.update_coef(lb, -(*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, (*ci).coef); + h2.update_coef(v, -(*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h1.update_const(lb_list[simplest_lb].get_const()); + h2.update_const(-lb_list[simplest_lb].get_const()); + h2.update_const(coef - 1); + } + + Variable_ID offset_lb; + if (alignment_offset == 0) + offset_lb = lb; + else { + EQ_Handle h = f_root->add_EQ(); + offset_lb = f_exists->declare(); + h.update_coef(offset_lb, 1); + h.update_coef(lb, -1); + h.update_const(alignment_offset); + } + + if (alignment_multiple == 1) { // trivial + aligned_lb = offset_lb; + } else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB + aligned_lb = f_exists->declare(); + Variable_ID e = f_exists->declare(); + + EQ_Handle h = f_root->add_EQ(); + h.update_coef(aligned_lb, 1); + h.update_coef(e, -alignment_multiple); + + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + h1.update_coef(e, alignment_multiple); + h2.update_coef(e, -alignment_multiple); + h1.update_coef(offset_lb, -1); + h2.update_coef(offset_lb, 1); + h1.update_const(alignment_multiple - 1); + } + } + + // create an upper bound variable for easy formula creation later + Variable_ID ub = f_exists->declare(); + { + coef_t coef = -ub_list[simplest_ub].get_coef( + bound.set_var(dim + 1)); + if (coef == 1) { // e.g. if i <= m+5, then UB = m+5 + EQ_Handle h = f_root->add_EQ(); + h.update_coef(ub, -1); + for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h.update_const(ub_list[simplest_ub].get_const()); + } else { // e.g. if 2i <= m+5, then m+5-2 < 2*UB <= m+5 + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(ub, -(*ci).coef); + h2.update_coef(ub, (*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, -(*ci).coef); + h2.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h1.update_const(-ub_list[simplest_ub].get_const()); + h2.update_const(ub_list[simplest_ub].get_const()); + h1.update_const(coef - 1); + } + } + + // insert tile controlling loop constraints + if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0 + Variable_ID e = f_exists->declare(); + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(e, 1); + + EQ_Handle h2 = f_root->add_EQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + h2.update_coef(e, -tile_size); + h2.update_coef(aligned_lb, -1); + } else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32) + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -tile_size); + h2.update_coef(aligned_lb, -1); + h2.update_coef(ub, 1); + } + + // special care for private statements like overflow assignment + if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1); + h.update_coef(ub, 1); + } + + // restrict original loop index inside the tile + else { + if (method == StridedTile) { // e.g. ii <= i < ii + tile_size + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + h2.update_const(tile_size - 1); + } else if (method == CountedTile) { // e.g. LB+32*ii <= i < LB+32*ii+tile_size + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -tile_size); + h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); + h1.update_coef(aligned_lb, -1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + tile_size); + h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); + h2.update_const(tile_size - 1); + h2.update_coef(aligned_lb, 1); + } + } + } + } + + // update loop level information + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + switch (stmt[*i].loop_level[j - 1].type) { + case LoopLevelOriginal: + break; + case LoopLevelTile: + if (stmt[*i].loop_level[j - 1].payload >= outer_level) + stmt[*i].loop_level[j - 1].payload++; + break; + default: + throw loop_error( + "unknown loop level type for statement " + + to_string(*i)); + } + + LoopLevel ll; + ll.type = LoopLevelTile; + ll.payload = level + 1; + ll.parallel_level = 0; + stmt[*i].loop_level.insert( + stmt[*i].loop_level.begin() + (outer_level - 1), ll); + } +} + diff --git a/src/transformations/loop_unroll.cc b/src/transformations/loop_unroll.cc new file mode 100644 index 0000000..86ffd84 --- /dev/null +++ b/src/transformations/loop_unroll.cc @@ -0,0 +1,1222 @@ +/* + * loop_unroll.cc + * + * Created on: Nov 12, 2012 + * Author: anand + */ + +#include <code_gen/codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" +#include <math.h> + +using namespace omega; + + +std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, + std::vector<std::vector<std::string> > idxNames, + int cleanup_split_level) { + // check for sanity of parameters + // check for sanity of parameters + if (unroll_amount < 0) + throw std::invalid_argument( + "invalid unroll amount " + to_string(unroll_amount)); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + if (cleanup_split_level == 0) + cleanup_split_level = level; + if (cleanup_split_level > level) + throw std::invalid_argument( + "cleanup code must be split at or outside the unrolled loop level " + + to_string(level)); + if (cleanup_split_level <= 0) + throw std::invalid_argument( + "invalid split loop level " + to_string(cleanup_split_level)); + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int dim = 2 * level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim - 1); + + // nothing to do + if (unroll_amount == 1) + return std::set<int>(); + + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) { + std::vector<std::pair<int, DependenceVector> > D; + int n = stmt[*i].xform.n_out(); + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); + j++) { + if (same_loop.find(j->first) != same_loop.end()) + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + int dim2 = level - 1; + if (dv.type != DEP_CONTROL) { + + while (stmt[*i].loop_level[dim2].type == LoopLevelTile) { + dim2 = stmt[*i].loop_level[dim2].payload - 1; + } + dim2 = stmt[*i].loop_level[dim2].payload; + + /*if (dv.isCarried(dim2) + && (dv.hasNegative(dim2) && !dv.quasi)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + + if (dv.isCarried(dim2) + && (dv.hasPositive(dim2) && dv.quasi)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + */ + bool safe = false; + + if (dv.isCarried(dim2) && dv.hasPositive(dim2)) { + if (dv.quasi) + throw loop_error( + "loop error: a quasi dependence with a positive carried distance"); + if (!dv.quasi) { + if (dv.lbounds[dim2] != posInfinity) { + //if (dv.lbounds[dim2] != negInfinity) + if (dv.lbounds[dim2] > unroll_amount) + safe = true; + } else + safe = true; + }/* else { + if (dv.ubounds[dim2] != negInfinity) { + if (dv.ubounds[dim2] != posInfinity) + if ((-(dv.ubounds[dim2])) > unroll_amount) + safe = true; + } else + safe = true; + }*/ + + if (!safe) { + for (int l = level + 1; l <= (n - 1) / 2; l++) { + int dim3 = l - 1; + + if (stmt[*i].loop_level[dim3].type + != LoopLevelTile) + dim3 = + stmt[*i].loop_level[dim3].payload; + else { + while (stmt[*i].loop_level[dim3].type + == LoopLevelTile) { + dim3 = + stmt[*i].loop_level[dim3].payload + - 1; + } + dim3 = + stmt[*i].loop_level[dim3].payload; + } + + if (dim3 > dim2) { + + if (dv.hasPositive(dim3)) + break; + else if (dv.hasNegative(dim3)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + } + } + } + } + } + } + } + } + // extract the intersection of the iteration space to be considered + Relation hull = Relation::True(level); + apply_xform(same_loop); + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) { + if (stmt[*i].IS.is_upper_bound_satisfiable()) { + Relation mapping(stmt[*i].IS.n_set(), level); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(j), 1); + h.update_coef(mapping.output_var(j), -1); + } + hull = Intersection(hull, + omega::Range(Restrict_Domain(mapping, copy(stmt[*i].IS)))); + hull.simplify(2, 4); + + } + } + for (int i = 1; i <= level; i++) { + std::string name = tmp_loop_var_name_prefix + to_string(i); + hull.name_set_var(i, name); + } + hull.setup_names(); + + // extract the exact loop bound of the dimension to be unrolled + if (is_single_loop_iteration(hull, level, this->known)) + return std::set<int>(); + Relation bound = get_loop_bound(hull, level, this->known); + if (!bound.has_single_conjunct() || !bound.is_satisfiable() + || bound.is_tautology()) + throw loop_error("unable to extract loop bound for unrolling"); + + // extract the loop stride + coef_t stride; + std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, + bound.set_var(level)); + if (result.second == NULL) + stride = 1; + else + stride = abs(result.first.get_coef(result.second)) + / gcd(abs(result.first.get_coef(result.second)), + abs(result.first.get_coef(bound.set_var(level)))); + + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + { + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(level)); + if (coef < 0) + ub_list.push_back(*gi); + else if (coef > 0) + lb_list.push_back(*gi); + } + } + + // simplify overflow expression for each pair of upper and lower bounds + std::vector<std::vector<std::map<Variable_ID, int> > > overflow_table( + lb_list.size(), + std::vector<std::map<Variable_ID, int> >(ub_list.size(), + std::map<Variable_ID, int>())); + bool is_overflow_simplifiable = true; + for (int i = 0; i < lb_list.size(); i++) { + if (!is_overflow_simplifiable) + break; + + for (int j = 0; j < ub_list.size(); j++) { + // lower bound or upper bound has non-unit coefficient, can't simplify + if (ub_list[j].get_coef(bound.set_var(level)) != -1 + || lb_list[i].get_coef(bound.set_var(level)) != 1) { + is_overflow_simplifiable = false; + break; + } + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + if ((*ci).var != bound.set_var(level)) + overflow_table[i][j][(*ci).var] += (*ci).coef; + + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); + overflow_table[i][j][(*ci).var] += (*ci).coef; + break; + } + default: + throw loop_error("failed to calculate overflow amount"); + } + } + overflow_table[i][j][NULL] += ub_list[j].get_const(); + + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + if ((*ci).var != bound.set_var(level)) { + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase( + overflow_table[i][j].find((*ci).var)); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase( + overflow_table[i][j].find((*ci).var)); + break; + } + default: + throw loop_error("failed to calculate overflow amount"); + } + } + overflow_table[i][j][NULL] += lb_list[i].get_const(); + + overflow_table[i][j][NULL] += stride; + if (unroll_amount == 0 + || (overflow_table[i][j].size() == 1 + && overflow_table[i][j][NULL] / stride + < unroll_amount)) + unroll_amount = overflow_table[i][j][NULL] / stride; + } + } + + // loop iteration count can't be determined, bail out gracefully + if (unroll_amount == 0) + return std::set<int>(); + + // further simply overflow calculation using coefficients' modular + if (is_overflow_simplifiable) { + for (int i = 0; i < lb_list.size(); i++) + for (int j = 0; j < ub_list.size(); j++) + if (stride == 1) { + for (std::map<Variable_ID, int>::iterator k = + overflow_table[i][j].begin(); + k != overflow_table[i][j].end();) + if ((*k).first != NULL) { + int t = int_mod_hat((*k).second, unroll_amount); + if (t == 0) { + overflow_table[i][j].erase(k++); + } else { + int t2 = hull.query_variable_mod((*k).first, + unroll_amount); + if (t2 != INT_MAX) { + overflow_table[i][j][NULL] += t * t2; + overflow_table[i][j].erase(k++); + } else { + (*k).second = t; + k++; + } + } + } else + k++; + + overflow_table[i][j][NULL] = int_mod_hat( + overflow_table[i][j][NULL], unroll_amount); + + // Since we don't have MODULO instruction in SUIF yet (only MOD), + // make all coef positive in the final formula + for (std::map<Variable_ID, int>::iterator k = + overflow_table[i][j].begin(); + k != overflow_table[i][j].end(); k++) + if ((*k).second < 0) + (*k).second += unroll_amount; + } + } + + // build overflow statement + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *overflow_code = NULL; + Relation cond_upper(level), cond_lower(level); + Relation overflow_constraint(0); + F_And *overflow_constraint_root = overflow_constraint.add_and(); + std::vector<Free_Var_Decl *> over_var_list; + if (is_overflow_simplifiable && lb_list.size() == 1) { + for (int i = 0; i < ub_list.size(); i++) { + if (overflow_table[0][i].size() == 1) { + // upper splitting condition + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_const( + ((overflow_table[0][i][NULL] / stride) % unroll_amount) + * -stride); + } else { + // upper splitting condition + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_coef(cond_upper.get_local(over_free_var), -stride); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + + // create overflow assignment + bound.setup_names(); // hack to fix omega relation variable names issue + CG_outputRepr *rhs = NULL; + bool is_split_illegal = false; + for (std::map<Variable_ID, int>::iterator j = + overflow_table[0][i].begin(); + j != overflow_table[0][i].end(); j++) + if ((*j).first != NULL) { + if ((*j).first->kind() == Input_Var + && (*j).first->get_position() + >= cleanup_split_level) + is_split_illegal = true; + + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); + if ((*j).second != 1) + t = ocg->CreateTimes(ocg->CreateInt((*j).second), + t); + rhs = ocg->CreatePlus(rhs, t); + } else if ((*j).second != 0) + rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); + + if (is_split_illegal) { + rhs->clear(); + delete rhs; + throw loop_error( + "cannot split cleanup code at loop level " + + to_string(cleanup_split_level) + + " due to overflow variable data dependence"); + } + + if (stride != 1) + rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->StmtListAppend(overflow_code, + ocg->CreateAssignment(0, lhs, rhs)); + } + } + + // lower splitting condition + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]); + } else if (is_overflow_simplifiable && ub_list.size() == 1) { + for (int i = 0; i < lb_list.size(); i++) { + + if (overflow_table[i][0].size() == 1) { + // lower splitting condition + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + h.update_const(overflow_table[i][0][NULL] * -stride); + } else { + // lower splitting condition + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + h.update_coef(cond_lower.get_local(over_free_var), -stride); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + + // create overflow assignment + bound.setup_names(); // hack to fix omega relation variable names issue + CG_outputRepr *rhs = NULL; + for (std::map<Variable_ID, int>::iterator j = + overflow_table[0][i].begin(); + j != overflow_table[0][i].end(); j++) + if ((*j).first != NULL) { + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); + if ((*j).second != 1) + t = ocg->CreateTimes(ocg->CreateInt((*j).second), + t); + rhs = ocg->CreatePlus(rhs, t); + } else if ((*j).second != 0) + rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); + + if (stride != 1) + rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->StmtListAppend(overflow_code, + ocg->CreateAssignment(0, lhs, rhs)); + } + } + + // upper splitting condition + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]); + } else { + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + + std::vector<CG_outputRepr *> lb_repr_list, ub_repr_list; + for (int i = 0; i < lb_list.size(); i++) { + lb_repr_list.push_back( + output_lower_bound_repr(ocg, + lb_list[i], + bound.set_var(dim + 1), result.first, result.second, + bound, Relation::True(bound.n_set()), + std::vector<std::pair<CG_outputRepr *, int> >( + bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0)), + uninterpreted_symbols[stmt_num])); + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + } + for (int i = 0; i < ub_list.size(); i++) { + ub_repr_list.push_back( + output_upper_bound_repr(ocg, ub_list[i], + bound.set_var(dim + 1), bound, + std::vector<std::pair<CG_outputRepr *, int> >( + bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0)), + uninterpreted_symbols[stmt_num])); + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_coef(cond_upper.get_local(over_free_var), -stride); + } + + CG_outputRepr *lbRepr, *ubRepr; + if (lb_repr_list.size() > 1) { + //fprintf(stderr, "loop_unroll.cc createInvoke( max )\n"); + lbRepr = ocg->CreateInvoke("max", lb_repr_list); + } + else if (lb_repr_list.size() == 1) { + lbRepr = lb_repr_list[0]; + } + + if (ub_repr_list.size() > 1) { + //fprintf(stderr, "loop_unroll.cc createInvoke( min )\n"); + ubRepr = ocg->CreateInvoke("min", ub_repr_list); + } + else if (ub_repr_list.size() == 1) { + ubRepr = ub_repr_list[0]; + } + + // create overflow assignment + CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr), + ocg->CreateInt(1)); + if (stride != 1) + rhs = ocg->CreateIntegerFloor(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->CreateAssignment(0, lhs, rhs); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + } + + // insert overflow statement + int overflow_stmt_num = -1; + if (overflow_code != NULL) { + // build iteration space for overflow statement + Relation mapping(level, cleanup_split_level - 1); + F_And *f_root = mapping.add_and(); + for (int i = 1; i < cleanup_split_level; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), 1); + h.update_coef(mapping.input_var(i), -1); + } + Relation overflow_IS = omega::Range(Restrict_Domain(mapping, copy(hull))); + for (int i = 1; i < cleanup_split_level; i++) + overflow_IS.name_set_var(i, hull.set_var(i)->name()); + overflow_IS.setup_names(); + + // build dumb transformation relation for overflow statement + Relation overflow_xform(cleanup_split_level - 1, + 2 * (cleanup_split_level - 1) + 1); + f_root = overflow_xform.add_and(); + for (int i = 1; i <= cleanup_split_level - 1; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2 * i), 1); + h.update_coef(overflow_xform.input_var(i), -1); + + h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2 * i - 1), 1); + h.update_const(-lex[2 * i - 2]); + } + EQ_Handle h = f_root->add_EQ(); + h.update_coef( + overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1), + 1); + h.update_const(-lex[2 * (cleanup_split_level - 1)]); + + shiftLexicalOrder(lex, 2 * cleanup_split_level - 2, 1); + Statement overflow_stmt; + + overflow_stmt.code = overflow_code; + overflow_stmt.IS = overflow_IS; + overflow_stmt.xform = overflow_xform; + overflow_stmt.loop_level = std::vector<LoopLevel>(level - 1); + overflow_stmt.ir_stmt_node = NULL; + for (int i = 0; i < level - 1; i++) { + overflow_stmt.loop_level[i].type = + stmt[stmt_num].loop_level[i].type; + if (stmt[stmt_num].loop_level[i].type == LoopLevelTile + && stmt[stmt_num].loop_level[i].payload >= level) + overflow_stmt.loop_level[i].payload = -1; + else + overflow_stmt.loop_level[i].payload = + stmt[stmt_num].loop_level[i].payload; + overflow_stmt.loop_level[i].parallel_level = + stmt[stmt_num].loop_level[i].parallel_level; + } + + fprintf(stderr, "loop_unroll.cc L581 adding stmt %d\n", stmt.size()); + stmt.push_back(overflow_stmt); + + uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); + uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]); + dep.insert(); + overflow_stmt_num = stmt.size() - 1; + overflow[overflow_stmt_num] = over_var_list; + + // update the global known information on overflow variable + this->known = Intersection(this->known, + Extend_Set(copy(overflow_constraint), + this->known.n_set() - overflow_constraint.n_set())); + + // update dependence graph + DependenceVector dv; + dv.type = DEP_CONTROL; + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + dep.connect(overflow_stmt_num, *i, dv); + dv.type = DEP_W2W; + { + IR_ScalarSymbol *overflow_sym = NULL; + std::vector<IR_ScalarRef *> scalars = ir->FindScalarRef(overflow_code); + for (int i = scalars.size() - 1; i >= 0; i--) + if (scalars[i]->is_write()) { + overflow_sym = scalars[i]->symbol(); + break; + } + for (int i = scalars.size() - 1; i >= 0; i--) + delete scalars[i]; + dv.sym = overflow_sym; + } + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + int dep_dim = get_last_dep_dim_before(stmt_num, level); + for (int i = dep_dim + 1; i < dep.num_dim(); i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (int i = 0; i <= dep_dim; i++) { + if (i != 0) { + dv.lbounds[i - 1] = 0; + dv.ubounds[i - 1] = 0; + } + dv.lbounds[i] = 1; + dv.ubounds[i] = posInfinity; + dep.connect(overflow_stmt_num, overflow_stmt_num, dv); + } + } + + // split the loop so it can be fully unrolled + std::set<int> new_stmts = split(stmt_num, cleanup_split_level, cond_upper); + std::set<int> new_stmts2 = split(stmt_num, cleanup_split_level, cond_lower); + new_stmts.insert(new_stmts2.begin(), new_stmts2.end()); + + // check if unrolled statements can be trivially lumped together as one statement + bool can_be_lumped = true; + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (*i != stmt_num) { + if (stmt[*i].loop_level.size() + != stmt[stmt_num].loop_level.size()) { + can_be_lumped = false; + break; + } + for (int j = 0; j < stmt[stmt_num].loop_level.size(); j++) + if (!(stmt[*i].loop_level[j].type + == stmt[stmt_num].loop_level[j].type + && stmt[*i].loop_level[j].payload + == stmt[stmt_num].loop_level[j].payload)) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 2 * level; j < lex.size() - 1; j += 2) + if (lex[j] != lex2[j]) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (is_inner_loop_depend_on_level(stmt[*i].IS, level, + this->known)) { + can_be_lumped = false; + break; + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (*i != stmt_num) { + if (!(Must_Be_Subset(copy(stmt[*i].IS), copy(stmt[stmt_num].IS)) + && Must_Be_Subset(copy(stmt[stmt_num].IS), + copy(stmt[*i].IS)))) { + can_be_lumped = false; + break; + } + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (same_loop.find(j->first) != same_loop.end()) { + for (int k = 0; k < j->second.size(); k++) + if (j->second[k].type == DEP_CONTROL + || j->second[k].type == DEP_UNKNOWN) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + } + if (!can_be_lumped) + break; + } + } + + // insert unrolled statements + int old_num_stmt = stmt.size(); + if (!can_be_lumped) { + std::map<int, std::vector<int> > what_stmt_num; + + for (int j = 1; j < unroll_amount; j++) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + Statement new_stmt; + + std::vector<std::string> loop_vars; + std::vector<CG_outputRepr *> subs; + loop_vars.push_back(stmt[*i].IS.set_var(level)->name()); + subs.push_back( + ocg->CreatePlus( + ocg->CreateIdent( + stmt[*i].IS.set_var(level)->name()), + ocg->CreateInt(j * stride))); + new_stmt.code = ocg->CreateSubstitutedStmt(0, + stmt[*i].code->clone(), loop_vars, subs); + + new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride); + add_loop_stride(new_stmt.IS, bound, level - 1, + unroll_amount * stride); + + new_stmt.xform = copy(stmt[*i].xform); + + new_stmt.loop_level = stmt[*i].loop_level; + new_stmt.ir_stmt_node = NULL; + + fprintf(stderr, "loop_unroll.cc L740 adding stmt %d\n", stmt.size()); + stmt.push_back(new_stmt); + + uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); + uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]); + dep.insert(); + what_stmt_num[*i].push_back(stmt.size() - 1); + } + } + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + add_loop_stride(stmt[*i].IS, bound, level - 1, + unroll_amount * stride); + + // update dependence graph + if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; + int new_stride = unroll_amount * stride; + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end();) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type == DEP_CONTROL + || dv.type == DEP_UNKNOWN) { + D.push_back(std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount - 1; + kk++) + if (what_stmt_num[i][kk] != -1 + && what_stmt_num[j->first][kk] + != -1) + dep.connect(what_stmt_num[i][kk], + what_stmt_num[j->first][kk], + dv); + } else { + coef_t lb = dv.lbounds[dep_dim]; + coef_t ub = dv.ubounds[dep_dim]; + if (ub == lb + && int_mod(lb, + static_cast<coef_t>(new_stride)) + == 0) { + D.push_back( + std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount - 1; + kk++) + if (what_stmt_num[i][kk] != -1 + && what_stmt_num[j->first][kk] + != -1) + dep.connect( + what_stmt_num[i][kk], + what_stmt_num[j->first][kk], + dv); + } else if (lb == -posInfinity + && ub == posInfinity) { + D.push_back( + std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount; + kk++) + if (kk == 0) + D.push_back( + std::make_pair(j->first, + dv)); + else if (what_stmt_num[j->first][kk + - 1] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); + for (int t = 0; t < unroll_amount - 1; + t++) + if (what_stmt_num[i][t] != -1) + for (int kk = 0; + kk < unroll_amount; + kk++) + if (kk == 0) + dep.connect( + what_stmt_num[i][t], + j->first, dv); + else if (what_stmt_num[j->first][kk + - 1] != -1) + dep.connect( + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); + } else { + for (int kk = 0; kk < unroll_amount; + kk++) { + if (lb != -posInfinity) { + if (kk * stride + < int_mod(lb, + static_cast<coef_t>(new_stride))) + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb) + / new_stride) + * new_stride + + new_stride; + else + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + } + if (ub != posInfinity) { + if (kk * stride + > int_mod(ub, + static_cast<coef_t>(new_stride))) + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub) + / new_stride) + * new_stride + - new_stride; + else + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub) + / new_stride) + * new_stride; + } + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) { + if (kk == 0) + D.push_back( + std::make_pair( + j->first, + dv)); + else if (what_stmt_num[j->first][kk + - 1] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); + } + } + for (int t = 0; t < unroll_amount - 1; + t++) + if (what_stmt_num[i][t] != -1) + for (int kk = 0; + kk < unroll_amount; + kk++) { + if (lb != -posInfinity) { + if (kk * stride + < int_mod( + lb + t + + 1, + static_cast<coef_t>(new_stride))) + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride + + new_stride; + else + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride; + } + if (ub != posInfinity) { + if (kk * stride + > int_mod( + ub + t + + 1, + static_cast<coef_t>(new_stride))) + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride + - new_stride; + else + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride; + } + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) { + if (kk == 0) + dep.connect( + what_stmt_num[i][t], + j->first, + dv); + else if (what_stmt_num[j->first][kk + - 1] != -1) + dep.connect( + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); + } + } + } + } + } + + dep.vertex[i].second.erase(j++); + } else { + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[i][kk] != -1) + dep.connect(what_stmt_num[i][kk], j->first, + j->second); + + j++; + } + } else { + if (same_loop.find(j->first) != same_loop.end()) + for (int k = 0; k < j->second.size(); k++) + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[j->first][kk] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk], + j->second[k])); + j++; + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + } + + // reset lexical order for the unrolled loop body + std::set<int> new_same_loop; + + int count = 0; + + for (std::map<int, std::vector<int> >::iterator i = + what_stmt_num.begin(); i != what_stmt_num.end(); i++) { + + new_same_loop.insert(i->first); + for (int k = dim + 1; k < stmt[i->first].xform.n_out(); k += 2) + assign_const(stmt[i->first].xform, k, + get_const(stmt[(what_stmt_num.begin())->first].xform, k, + Output_Var) + count); + count++; + for (int j = 0; j < i->second.size(); j++) { + new_same_loop.insert(i->second[j]); + for (int k = dim + 1; k < stmt[i->second[j]].xform.n_out(); k += + 2) + assign_const(stmt[i->second[j]].xform, k, + get_const( + stmt[(what_stmt_num.begin())->first].xform, + k, Output_Var) + count); + count++; + } + } + setLexicalOrder(dim + 1, new_same_loop, 0, idxNames); + } else { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + add_loop_stride(stmt[*i].IS, bound, level - 1, + unroll_amount * stride); + + int max_level = stmt[stmt_num].loop_level.size(); + std::vector<std::pair<int, int> > stmt_order; + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + stmt_order.push_back( + std::make_pair( + get_const(stmt[*i].xform, 2 * max_level, + Output_Var), *i)); + sort(stmt_order.begin(), stmt_order.end()); + + Statement new_stmt; + new_stmt.code = NULL; + for (int j = 1; j < unroll_amount; j++) { + for (int i = 0; i < stmt_order.size(); i++) { + std::vector<std::string> loop_vars; + std::vector<CG_outputRepr *> subs; + + //fprintf(stderr, "loop_unroll.cc, will replace '%s with '%s+%d' ??\n", + // stmt[stmt_order[i].second].IS.set_var(level)->name().c_str(), + // stmt[stmt_order[i].second].IS.set_var(level)->name().c_str(), j * stride); + + loop_vars.push_back( + stmt[stmt_order[i].second].IS.set_var(level)->name()); + subs.push_back( + ocg->CreatePlus(ocg->CreateIdent(stmt[stmt_order[i].second].IS.set_var(level)->name()), + ocg->CreateInt(j * stride))); // BUG HERE + //fprintf(stderr, "loop_unroll.cc subs now has %d parts\n", subs.size()); + //for (int k=0; k< subs.size(); k++) //fprintf(stderr, "subs[%d] = 0x%x\n", k, subs[k]); + + //fprintf(stderr, "ij %d %d ", i, j); + //fprintf(stderr, "old src was =\n"); + //stmt[stmt_order[i].second].code->dump(); fflush(stdout); //fprintf(stderr, "\n"); + + + + CG_outputRepr *code = ocg->CreateSubstitutedStmt(0, + stmt[stmt_order[i].second].code->clone(), + loop_vars, + subs); + + //fprintf(stderr, "old src is =\n"); + //stmt[stmt_order[i].second].code->dump(); fflush(stdout); //fprintf(stderr, "\n"); + + //fprintf(stderr, "substituted copy is =\n"); + //code->dump(); //fprintf(stderr, "\n\n"); + + + new_stmt.code = ocg->StmtListAppend(new_stmt.code, code); + //fprintf(stderr, "appended code =\n"); + //new_stmt.code->dump(); + + } + } + + + + //fprintf(stderr, "new_stmt.IS = \n"); + new_stmt.IS = copy(stmt[stmt_num].IS); + new_stmt.xform = copy(stmt[stmt_num].xform); + assign_const(new_stmt.xform, 2 * max_level, + stmt_order[stmt_order.size() - 1].first + 1); + new_stmt.loop_level = stmt[stmt_num].loop_level; + new_stmt.ir_stmt_node = NULL; + + new_stmt.has_inspector = false; // ?? or from copied stmt? + if (stmt[stmt_num].has_inspector) fprintf(stderr, "OLD STMT HAS INSPECTOR\n"); + else fprintf(stderr, "OLD STMT DOES NOT HAVE INSPECTOR\n"); + + fprintf(stderr, "loop_unroll.cc L1083 adding stmt %d\n", stmt.size()); + stmt.push_back(new_stmt); + + uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); + uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]); + dep.insert(); + + //fprintf(stderr, "update dependence graph\n"); + // update dependence graph + if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; + int new_stride = unroll_amount * stride; + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, std::vector<DependenceVector> > > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end();) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + std::vector<DependenceVector> dvs11, dvs12, dvs22, + dvs21; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type == DEP_CONTROL + || dv.type == DEP_UNKNOWN) { + if (i == j->first) { + dvs11.push_back(dv); + dvs22.push_back(dv); + } else + throw loop_error( + "unrolled statements lumped together illegally"); + } else { + coef_t lb = dv.lbounds[dep_dim]; + coef_t ub = dv.ubounds[dep_dim]; + if (ub == lb + && int_mod(lb, + static_cast<coef_t>(new_stride)) + == 0) { + dvs11.push_back(dv); + dvs22.push_back(dv); + } else { + if (lb != -posInfinity) + dv.lbounds[dep_dim] = ceil( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = floor( + static_cast<double>(ub) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs11.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = ceil( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = ceil( + static_cast<double>(ub) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs21.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = floor( + static_cast<double>(ub + - stride) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs12.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = ceil( + static_cast<double>(ub + - stride) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs22.push_back(dv); + } + } + } + if (dvs11.size() > 0) + D.push_back(std::make_pair(i, dvs11)); + if (dvs22.size() > 0) + dep.connect(old_num_stmt, old_num_stmt, dvs22); + if (dvs12.size() > 0) + D.push_back( + std::make_pair(old_num_stmt, dvs12)); + if (dvs21.size() > 0) + dep.connect(old_num_stmt, i, dvs21); + + dep.vertex[i].second.erase(j++); + } else { + dep.connect(old_num_stmt, j->first, j->second); + j++; + } + } else { + if (same_loop.find(j->first) != same_loop.end()) + D.push_back( + std::make_pair(old_num_stmt, j->second)); + j++; + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + } + } + + //fprintf(stderr, " loop_unroll.cc returning new_stmts\n"); + return new_stmts; +} + + |