From 699861922d5349ffa98b518f34016b2be2ca368d Mon Sep 17 00:00:00 2001 From: Tuowen Zhao Date: Fri, 23 Sep 2016 10:59:54 -0600 Subject: more changes --- src/transformations/loop.cc | 2791 +++++++++++++++++----------------- src/transformations/loop_basic.cc | 858 ++++++----- src/transformations/loop_datacopy.cc | 811 +++++----- src/transformations/loop_extra.cc | 124 +- src/transformations/loop_tile.cc | 420 +++-- src/transformations/loop_unroll.cc | 642 ++++---- 6 files changed, 2832 insertions(+), 2814 deletions(-) (limited to 'src/transformations') diff --git a/src/transformations/loop.cc b/src/transformations/loop.cc index 570bc90..10dc7bb 100644 --- a/src/transformations/loop.cc +++ b/src/transformations/loop.cc @@ -43,36 +43,36 @@ #define _DEBUG_ true - using namespace omega; -const std::string Loop::tmp_loop_var_name_prefix = std::string("chill_t"); // Manu:: In fortran, first character of a variable name must be a letter, so this change +const std::string Loop::tmp_loop_var_name_prefix = std::string( + "chill_t"); // Manu:: In fortran, first character of a variable name must be a letter, so this change const std::string Loop::overflow_var_name_prefix = std::string("over"); -void echocontroltype( const IR_Control *control ) { - switch(control->type()) { - case IR_CONTROL_BLOCK: { - CHILL_DEBUG_PRINT("IR_CONTROL_BLOCK\n"); - break; - } - case IR_CONTROL_LOOP: { - CHILL_DEBUG_PRINT("IR_CONTROL_LOOP\n"); - break; - } - case IR_CONTROL_IF: { - CHILL_DEBUG_PRINT("IR_CONTROL_IF\n"); - break; - } - default: - CHILL_DEBUG_PRINT("just a bunch of statements?\n"); - +void echocontroltype(const IR_Control *control) { + switch (control->type()) { + case IR_CONTROL_BLOCK: { + CHILL_DEBUG_PRINT("IR_CONTROL_BLOCK\n"); + break; + } + case IR_CONTROL_LOOP: { + CHILL_DEBUG_PRINT("IR_CONTROL_LOOP\n"); + break; + } + case IR_CONTROL_IF: { + CHILL_DEBUG_PRINT("IR_CONTROL_IF\n"); + break; + } + default: + CHILL_DEBUG_PRINT("just a bunch of statements?\n"); + } // switch } omega::Relation Loop::getNewIS(int stmt_num) const { - + omega::Relation result; - + if (stmt[stmt_num].xform.is_null()) { omega::Relation known = omega::Extend_Set(omega::copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set()); @@ -81,32 +81,31 @@ omega::Relation Loop::getNewIS(int stmt_num) const { omega::Relation known = omega::Extend_Set(omega::copy(this->known), stmt[stmt_num].xform.n_out() - this->known.n_set()); result = omega::Intersection( - omega::Range( - omega::Restrict_Domain( - omega::copy(stmt[stmt_num].xform), - omega::copy(stmt[stmt_num].IS))), known); + omega::Range( + omega::Restrict_Domain( + omega::copy(stmt[stmt_num].xform), + omega::copy(stmt[stmt_num].IS))), known); } - + result.simplify(2, 4); - + return result; } - -void Loop::reduce(int stmt_num, - std::vector &level, +void Loop::reduce(int stmt_num, + std::vector &level, int param, - std::string func_name, + std::string func_name, std::vector &seq_levels, - std::vector cudaized_levels, + std::vector cudaized_levels, int bound_level) { // illegal instruction?? fprintf(stderr, " Loop::reduce( stmt %d, param %d, func_name (encrypted)...)\n", stmt, param); // , func_name.c_str()); - + //std::cout << "Reducing stmt# " << stmt_num << " at level " << level << "\n"; //ir->printStmt(stmt[stmt_num].code); - + if (stmt[stmt_num].reduction != 1) { CHILL_DEBUG_PRINT("Cannot reduce this statement\n"); return; @@ -132,9 +131,9 @@ void Loop::reduce(int stmt_num, delete last_compute_cg_; last_compute_cg_ = NULL; fprintf(stderr, "set last_compute_cg_ = NULL;\n"); - + omega::CG_outputBuilder *ocg = ir->builder(); - + omega::CG_outputRepr *funCallRepr; std::vector arg_repr_list; apply_xform(stmt_num); @@ -144,13 +143,13 @@ void Loop::reduce(int stmt_num, std::vector access2; for (int j = 0; j < access[i]->n_dim(); j++) { std::vector access3 = ir->FindArrayRef( - access[i]->index(j)); + access[i]->index(j)); access2.insert(access2.end(), access3.begin(), access3.end()); } if (access2.size() == 0) { if (names.find(access[i]->name()) == names.end()) { arg_repr_list.push_back( - ocg->CreateAddressOf(access[i]->convert())); + ocg->CreateAddressOf(access[i]->convert())); names.insert(access[i]->name()); if (access[i]->is_write()) reduced_write_refs.insert(access[i]->name()); @@ -165,14 +164,14 @@ void Loop::reduce(int stmt_num, } } } - + for (int i = 0; i < seq_levels.size(); i++) arg_repr_list.push_back( - ocg->CreateIdent( - stmt[stmt_num].IS.set_var(seq_levels[i])->name())); - + ocg->CreateIdent( + stmt[stmt_num].IS.set_var(seq_levels[i])->name())); + if (bound_level != -1) { - + omega::Relation new_IS = copy(stmt[stmt_num].IS); new_IS.copy_names(stmt[stmt_num].IS); new_IS.setup_names(); @@ -181,106 +180,106 @@ void Loop::reduce(int stmt_num, //omega::Relation r = getNewIS(stmt_num); for (int j = dim + 1; j <= new_IS.n_set(); j++) new_IS = omega::Project(new_IS, new_IS.set_var(j)); - + new_IS.simplify(2, 4); - + omega::Relation bound_ = get_loop_bound(copy(new_IS), dim - 1); omega::Variable_ID v = bound_.set_var(dim); std::vector ubList; for (omega::GEQ_Iterator e( - const_cast(bound_).single_conjunct()->GEQs()); + const_cast(bound_).single_conjunct()->GEQs()); e; e++) { if ((*e).get_coef(v) < 0) { // && (*e).is_const_except_for_global(v)) omega::CG_outputRepr *UPPERBOUND = - omega::output_upper_bound_repr(ir->builder(), *e, v, - bound_, - std::vector< - std::pair >( - bound_.n_set(), - std::make_pair( - static_cast(NULL), - 0)), uninterpreted_symbols[stmt_num]); + omega::output_upper_bound_repr(ir->builder(), *e, v, + bound_, + std::vector< + std::pair >( + bound_.n_set(), + std::make_pair( + static_cast(NULL), + 0)), uninterpreted_symbols[stmt_num]); if (UPPERBOUND != NULL) ubList.push_back(UPPERBOUND); - + } - + } - - omega::CG_outputRepr * ubRepr; + + omega::CG_outputRepr *ubRepr; if (ubList.size() > 1) { - + ubRepr = ir->builder()->CreateInvoke("min", ubList); arg_repr_list.push_back(ubRepr); } else if (ubList.size() == 1) arg_repr_list.push_back(ubList[0]); } - + funCallRepr = ocg->CreateInvoke(func_name, arg_repr_list); stmt[stmt_num].code = funCallRepr; for (int i = 0; i < level.size(); i++) { //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector(mapping.n_out(), NULL)); std::vector loop_vars; loop_vars.push_back(stmt[stmt_num].IS.set_var(level[i])->name()); - + std::vector subs; subs.push_back(ocg->CreateInt(0)); - + stmt[stmt_num].code = ocg->CreateSubstitutedStmt(0, stmt[stmt_num].code, loop_vars, subs); - + } - + omega::Relation new_IS = copy(stmt[stmt_num].IS); new_IS.copy_names(stmt[stmt_num].IS); new_IS.setup_names(); new_IS.simplify(); int old_size = new_IS.n_set(); - + omega::Relation R = omega::copy(stmt[stmt_num].IS); R.copy_names(stmt[stmt_num].IS); R.setup_names(); - + for (int i = level.size() - 1; i >= 0; i--) { int j; - + for (j = 0; j < cudaized_levels.size(); j++) { if (cudaized_levels[j] == level[i]) break; - + } - + if (j == cudaized_levels.size()) { R = omega::Project(R, level[i], omega::Input_Var); R.simplify(); - + } // - + } - + omega::F_And *f_Root = R.and_with_and(); for (int i = level.size() - 1; i >= 0; i--) { int j; - + for (j = 0; j < cudaized_levels.size(); j++) { if (cudaized_levels[j] == level[i]) break; - + } - + if (j == cudaized_levels.size()) { - + omega::EQ_Handle h = f_Root->add_EQ(); - + h.update_coef(R.set_var(level[i]), 1); h.update_const(-1); } // - + } - + R.simplify(); stmt[stmt_num].IS = R; } @@ -303,22 +302,22 @@ bool Loop::isInitialized() const { bool Loop::init_loop(std::vector &ir_tree, std::vector &ir_stmt) { - + CHILL_DEBUG_PRINT("extract_ir_stmts()\n"); CHILL_DEBUG_PRINT("ir_tree has %d statements\n", ir_tree.size()); ir_stmt = extract_ir_stmts(ir_tree); - - CHILL_DEBUG_PRINT("nesting level stmt size = %d\n", (int)ir_stmt.size()); + + CHILL_DEBUG_PRINT("nesting level stmt size = %d\n", (int) ir_stmt.size()); stmt_nesting_level_.resize(ir_stmt.size()); - + std::vector stmt_nesting_level(ir_stmt.size()); - - CHILL_DEBUG_PRINT("%d statements?\n", (int)ir_stmt.size()); - + + CHILL_DEBUG_PRINT("%d statements?\n", (int) ir_stmt.size()); + // find out how deeply nested each statement is. (how can these be different?) for (int i = 0; i < ir_stmt.size(); i++) { - fprintf(stderr, "i %d\n", i); + fprintf(stderr, "i %d\n", i); ir_stmt[i]->payload = i; int t = 0; ir_tree_node *itn = ir_stmt[i]; @@ -331,23 +330,24 @@ bool Loop::init_loop(std::vector &ir_tree, stmt_nesting_level[i] = t; CHILL_DEBUG_PRINT("stmt_nesting_level[%d] = %d\n", i, t); } - + if (actual_code.size() == 0) - actual_code = std::vector(ir_stmt.size()); - + actual_code = std::vector(ir_stmt.size()); + stmt = std::vector(ir_stmt.size()); - CHILL_DEBUG_PRINT("in init_loop, made %d stmts\n", (int)ir_stmt.size()); - - uninterpreted_symbols = std::vector > >(ir_stmt.size()); - uninterpreted_symbols_stringrepr = std::vector > >(ir_stmt.size()); - + CHILL_DEBUG_PRINT("in init_loop, made %d stmts\n", (int) ir_stmt.size()); + + uninterpreted_symbols = std::vector > >(ir_stmt.size()); + uninterpreted_symbols_stringrepr = std::vector > >( + ir_stmt.size()); + int n_dim = -1; int max_loc; //std::vector index; for (int i = 0; i < ir_stmt.size(); i++) { int max_nesting_level = -1; int loc; - + // find the max nesting level and remember the statement that was at that level for (int j = 0; j < ir_stmt.size(); j++) { if (stmt_nesting_level[j] > max_nesting_level) { @@ -355,23 +355,23 @@ bool Loop::init_loop(std::vector &ir_tree, loc = j; } } - + CHILL_DEBUG_PRINT("max nesting level %d at location %d\n", max_nesting_level, loc); - + // most deeply nested statement acting as a reference point if (n_dim == -1) { CHILL_DEBUG_PRINT("n_dim now max_nesting_level %d\n", max_nesting_level); n_dim = max_nesting_level; max_loc = loc; - + index = std::vector(n_dim); - + ir_tree_node *itn = ir_stmt[loc]; CHILL_DEBUG_PRINT("itn = stmt[%d]\n", loc); int cur_dim = n_dim - 1; while (itn->parent != NULL) { CHILL_DEBUG_PRINT("parent\n"); - + itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP) { CHILL_DEBUG_PRINT("IR_CONTROL_LOOP cur_dim %d\n", cur_dim); @@ -382,258 +382,264 @@ bool Loop::init_loop(std::vector &ir_tree, } } } - + CHILL_DEBUG_PRINT("align loops by names,\n"); // align loops by names, temporary solution ir_tree_node *itn = ir_stmt[loc]; // defined outside loops?? int depth = stmt_nesting_level_[loc] - 1; - + for (int t = depth; t >= 0; t--) { int y = t; itn = ir_stmt[loc]; - + while ((itn->parent != NULL) && (y >= 0)) { itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP) y--; } - + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { CG_outputBuilder *ocg = ir->builder(); - + itn->payload = depth - t; - + CG_outputRepr *code = - static_cast(ir_stmt[loc]->content)->extract(); - + static_cast(ir_stmt[loc]->content)->extract(); + std::vector index_expr; std::vector old_index; CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]); index_expr.push_back(repl); old_index.push_back( - static_cast(itn->content)->index()->name()); + static_cast(itn->content)->index()->name()); code = ocg->CreateSubstitutedStmt(0, code, old_index, index_expr); - - replace.insert(std::pair(loc, code)); + + replace.insert(std::pair(loc, code)); //stmt[loc].code = code; - + } } - + CHILL_DEBUG_PRINT("set relation variable names ****\n"); // set relation variable names - + // this finds the loop variables for loops enclosing this statement and puts // them in an Omega Relation (just their names, which could fail) - + CHILL_DEBUG_PRINT("Relation r(%d)\n", n_dim); Relation r(n_dim); F_And *f_root = r.add_and(); itn = ir_stmt[loc]; int temp_depth = depth; while (itn->parent != NULL) { - + itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP) { - fprintf(stderr, "it's a loop. temp_depth %d\n", temp_depth); + fprintf(stderr, "it's a loop. temp_depth %d\n", temp_depth); fprintf(stderr, "r.name_set_var( %d, %s )\n", itn->payload + 1, index[temp_depth].c_str()); r.name_set_var(itn->payload + 1, index[temp_depth]); - + temp_depth--; } //static_cast(itn->content)->index()->name()); } - fprintf(stderr, "Relation r "); r.print(); fflush(stdout); + fprintf(stderr, "Relation r "); + r.print(); + fflush(stdout); //fprintf(stderr, "f_root "); f_root->print(stderr); fprintf(stderr, "\n"); - + /*while (itn->parent != NULL) { itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP) r.name_set_var(itn->payload+1, static_cast(itn->content)->index()->name()); }*/ - - - - - fprintf(stderr, "extract information from loop/if structures\n"); + + + + + fprintf(stderr, "extract information from loop/if structures\n"); // extract information from loop/if structures std::vector processed(n_dim, false); std::vector vars_to_be_reversed; - + std::vector insp_lb; std::vector insp_ub; - + itn = ir_stmt[loc]; while (itn->parent != NULL) { // keep heading upward itn = itn->parent; - + switch (itn->content->type()) { - case IR_CONTROL_LOOP: { - fprintf(stderr, "loop.cc l 462 IR_CONTROL_LOOP\n"); - IR_Loop *lp = static_cast(itn->content); - Variable_ID v = r.set_var(itn->payload + 1); - int c; - - try { - c = lp->step_size(); - //fprintf(stderr, "step size %d\n", c); - if (c > 0) { + case IR_CONTROL_LOOP: { + fprintf(stderr, "loop.cc l 462 IR_CONTROL_LOOP\n"); + IR_Loop *lp = static_cast(itn->content); + Variable_ID v = r.set_var(itn->payload + 1); + int c; + + try { + c = lp->step_size(); + //fprintf(stderr, "step size %d\n", c); + if (c > 0) { + CG_outputRepr *lb = lp->lower_bound(); + fprintf(stderr, "loop.cc, got the lower bound. it is:\n"); + lb->dump(); + printf("\n"); + fflush(stdout); + + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]); + + CG_outputRepr *ub = lp->upper_bound(); + //fprintf(stderr, "loop.cc, got the upper bound. it is:\n"); + //ub->dump(); printf("\n"); fflush(stdout); + + + + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + cond, true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]); + else + throw ir_error("loop condition not supported"); + + + if ((ir->QueryExpOperation(lp->lower_bound()) + == IR_OP_ARRAY_VARIABLE) + && (ir->QueryExpOperation(lp->lower_bound()) + == ir->QueryExpOperation( + lp->upper_bound()))) { + + fprintf(stderr, "loop.cc lower and upper are both IR_OP_ARRAY_VARIABLE?\n"); + + std::vector v = + ir->QueryExpOperand(lp->lower_bound()); + IR_ArrayRef *ref = + static_cast(ir->Repr2Ref( + v[0])); + std::string s0 = ref->name(); + std::vector v2 = + ir->QueryExpOperand(lp->upper_bound()); + IR_ArrayRef *ref2 = + static_cast(ir->Repr2Ref( + v2[0])); + std::string s1 = ref2->name(); + + if (s0 == s1) { + insp_lb.push_back(s0); + insp_ub.push_back(s1); + + } + + } + + + } else if (c < 0) { + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *lb = lp->lower_bound(); + lb = ocg->CreateMinus(NULL, lb); + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]); + CG_outputRepr *ub = lp->upper_bound(); + ub = ocg->CreateMinus(NULL, ub); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_GE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LE, true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]); + else if (cond == IR_COND_GT) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LT, true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]); + else + throw ir_error("loop condition not supported"); + + vars_to_be_reversed.push_back(lp->index()->name()); + } else + throw ir_error("loop step size zero"); + } catch (const ir_error &e) { + actual_code[loc] = + static_cast(ir_stmt[loc]->content)->extract(); + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector(); + itn->content = itn->content->convert(); + return false; + } + + // check for loop increment or decrement that is not 1 + //fprintf(stderr, "abs(c)\n"); + if (abs(c) != 1) { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(); + F_And *f_and = f_exists->add_and(); + Stride_Handle h = f_and->add_stride(abs(c)); + if (c > 0) + h.update_coef(e, 1); + else + h.update_coef(e, -1); + h.update_coef(v, -1); CG_outputRepr *lb = lp->lower_bound(); - fprintf(stderr, "loop.cc, got the lower bound. it is:\n"); - lb->dump(); printf("\n"); fflush(stdout); - - exp2formula(ir, r, f_root, freevar, lb, v, 's', - IR_COND_GE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); - - CG_outputRepr *ub = lp->upper_bound(); - //fprintf(stderr, "loop.cc, got the upper bound. it is:\n"); - //ub->dump(); printf("\n"); fflush(stdout); - - - - IR_CONDITION_TYPE cond = lp->stop_cond(); - if (cond == IR_COND_LT || cond == IR_COND_LE) - exp2formula(ir, r, f_root, freevar, ub, v, 's', - cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, + true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]); + } + + processed[itn->payload] = true; + break; + } + + + case IR_CONTROL_IF: { + fprintf(stderr, "IR_CONTROL_IF\n"); + IR_If *theif = static_cast(itn->content); + + CG_outputRepr *cond = + static_cast(itn->content)->condition(); + + try { + if (itn->payload % 2 == 1) + exp2constraint(ir, r, f_root, freevar, cond, true, uninterpreted_symbols[i], + uninterpreted_symbols_stringrepr[i]); + else { + F_Not *f_not = f_root->add_not(); + F_And *f_and = f_not->add_and(); + exp2constraint(ir, r, f_and, freevar, cond, true, uninterpreted_symbols[i], + uninterpreted_symbols_stringrepr[i]); + } + } catch (const ir_error &e) { + std::vector *t; + if (itn->parent == NULL) + t = &ir_tree; else - throw ir_error("loop condition not supported"); - - - if ((ir->QueryExpOperation(lp->lower_bound()) - == IR_OP_ARRAY_VARIABLE) - && (ir->QueryExpOperation(lp->lower_bound()) - == ir->QueryExpOperation( - lp->upper_bound()))) { - - fprintf(stderr, "loop.cc lower and upper are both IR_OP_ARRAY_VARIABLE?\n"); - - std::vector v = - ir->QueryExpOperand(lp->lower_bound()); - IR_ArrayRef *ref = - static_cast(ir->Repr2Ref( - v[0])); - std::string s0 = ref->name(); - std::vector v2 = - ir->QueryExpOperand(lp->upper_bound()); - IR_ArrayRef *ref2 = - static_cast(ir->Repr2Ref( - v2[0])); - std::string s1 = ref2->name(); - - if (s0 == s1) { - insp_lb.push_back(s0); - insp_ub.push_back(s1); - + t = &(itn->parent->children); + int id = itn->payload; + int i = t->size() - 1; + while (i >= 0) { + if ((*t)[i] == itn) { + for (int j = 0; j < itn->children.size(); j++) + delete itn->children[j]; + itn->children = std::vector(); + itn->content = itn->content->convert(); + } else if ((*t)[i]->payload >> 1 == id >> 1) { + delete (*t)[i]; + t->erase(t->begin() + i); } - + i--; } - - - } else if (c < 0) { - CG_outputBuilder *ocg = ir->builder(); - CG_outputRepr *lb = lp->lower_bound(); - lb = ocg->CreateMinus(NULL, lb); - exp2formula(ir, r, f_root, freevar, lb, v, 's', - IR_COND_GE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); - CG_outputRepr *ub = lp->upper_bound(); - ub = ocg->CreateMinus(NULL, ub); - IR_CONDITION_TYPE cond = lp->stop_cond(); - if (cond == IR_COND_GE) - exp2formula(ir, r, f_root, freevar, ub, v, 's', - IR_COND_LE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); - else if (cond == IR_COND_GT) - exp2formula(ir, r, f_root, freevar, ub, v, 's', - IR_COND_LT, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); - else - throw ir_error("loop condition not supported"); - - vars_to_be_reversed.push_back(lp->index()->name()); - } else - throw ir_error("loop step size zero"); - } catch (const ir_error &e) { - actual_code[loc] = - static_cast(ir_stmt[loc]->content)->extract(); + return false; + } + + break; + } + default: + //fprintf(stderr, "default?\n"); for (int i = 0; i < itn->children.size(); i++) delete itn->children[i]; itn->children = std::vector(); itn->content = itn->content->convert(); return false; - } - - // check for loop increment or decrement that is not 1 - //fprintf(stderr, "abs(c)\n"); - if (abs(c) != 1) { - F_Exists *f_exists = f_root->add_exists(); - Variable_ID e = f_exists->declare(); - F_And *f_and = f_exists->add_and(); - Stride_Handle h = f_and->add_stride(abs(c)); - if (c > 0) - h.update_coef(e, 1); - else - h.update_coef(e, -1); - h.update_coef(v, -1); - CG_outputRepr *lb = lp->lower_bound(); - exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, - true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); - } - - processed[itn->payload] = true; - break; - } - - - case IR_CONTROL_IF: { - fprintf(stderr, "IR_CONTROL_IF\n"); - IR_If *theif = static_cast(itn->content); - - CG_outputRepr *cond = - static_cast(itn->content)->condition(); - - try { - if (itn->payload % 2 == 1) - exp2constraint(ir, r, f_root, freevar, cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); - else { - F_Not *f_not = f_root->add_not(); - F_And *f_and = f_not->add_and(); - exp2constraint(ir, r, f_and, freevar, cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); - } - } catch (const ir_error &e) { - std::vector *t; - if (itn->parent == NULL) - t = &ir_tree; - else - t = &(itn->parent->children); - int id = itn->payload; - int i = t->size() - 1; - while (i >= 0) { - if ((*t)[i] == itn) { - for (int j = 0; j < itn->children.size(); j++) - delete itn->children[j]; - itn->children = std::vector(); - itn->content = itn->content->convert(); - } else if ((*t)[i]->payload >> 1 == id >> 1) { - delete (*t)[i]; - t->erase(t->begin() + i); - } - i--; - } - return false; - } - - break; - } - default: - //fprintf(stderr, "default?\n"); - for (int i = 0; i < itn->children.size(); i++) - delete itn->children[i]; - itn->children = std::vector(); - itn->content = itn->content->convert(); - return false; } } - - + + //fprintf(stderr, "add information for missing loops n_dim(%d)\n", n_dim); // add information for missing loops for (int j = 0; j < n_dim; j++) @@ -645,18 +651,18 @@ bool Loop::init_loop(std::vector &ir_tree, && itn->payload == j) break; } - + Variable_ID v = r.set_var(j + 1); if (loc < max_loc) { - + CG_outputBuilder *ocg = ir->builder(); - + CG_outputRepr *lb = - static_cast(itn->content)->lower_bound(); - + static_cast(itn->content)->lower_bound(); + exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ, - false,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); - + false, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]); + /* if (ir->QueryExpOperation( static_cast(itn->content)->lower_bound()) == IR_OP_VARIABLE) { @@ -684,15 +690,15 @@ bool Loop::init_loop(std::vector &ir_tree, } */ - + } else { // loc > max_loc - + CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *ub = - static_cast(itn->content)->upper_bound(); - + static_cast(itn->content)->upper_bound(); + exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ, - false,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]); + false, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]); /*if (ir->QueryExpOperation( static_cast(itn->content)->upper_bound()) == IR_OP_VARIABLE) { @@ -724,29 +730,29 @@ bool Loop::init_loop(std::vector &ir_tree, */ } } - + r.setup_names(); r.simplify(); - + // THIS IS MISSING IN PROTONU's for (int j = 0; j < insp_lb.size(); j++) { - + std::string lb = insp_lb[j] + "_"; std::string ub = lb + "_"; - + Global_Var_ID u, l; bool found_ub = false; bool found_lb = false; for (DNF_Iterator di(copy(r).query_DNF()); di; di++) for (Constraint_Iterator ci = (*di)->constraints(); ci; ci++) - + for (Constr_Vars_Iter cvi(*ci); cvi; cvi++) { Variable_ID v = cvi.curr_var(); if (v->kind() == Global_Var) if (v->get_global_var()->arity() > 0) { - + std::string name = - v->get_global_var()->base_name(); + v->get_global_var()->base_name(); if (name == lb) { l = v->get_global_var(); found_lb = true; @@ -755,9 +761,9 @@ bool Loop::init_loop(std::vector &ir_tree, found_ub = true; } } - + } - + if (found_lb && found_ub) { Relation known_(copy(r).n_set()); known_.copy_names(copy(r)); @@ -770,12 +776,12 @@ bool Loop::init_loop(std::vector &ir_tree, g.update_coef(index_lb, -1); g.update_const(-1); addKnown(known_); - + } - + } - - + + fprintf(stderr, "loop.cc L441 insert the statement\n"); // insert the statement CG_outputBuilder *ocg = ir->builder(); @@ -785,36 +791,38 @@ bool Loop::init_loop(std::vector &ir_tree, repl = ocg->CreateMinus(NULL, repl); reverse_expr.push_back(repl); } - fprintf(stderr, "loop.cc before extract\n"); + fprintf(stderr, "loop.cc before extract\n"); CG_outputRepr *code = - static_cast(ir_stmt[loc]->content)->extract(); + static_cast(ir_stmt[loc]->content)->extract(); fprintf(stderr, "code = ocg->CreateSubstitutedStmt(...)\n"); - ((CG_chillRepr *)code)->Dump(); fflush(stdout); - + ((CG_chillRepr *) code)->Dump(); + fflush(stdout); + code = ocg->CreateSubstitutedStmt(0, code, vars_to_be_reversed, reverse_expr); fprintf(stderr, "stmt\n"); - ((CG_chillRepr *)code)->Dump(); fflush(stdout); + ((CG_chillRepr *) code)->Dump(); + fflush(stdout); stmt[loc].code = code; stmt[loc].IS = r; - + //Anand: Add Information on uninterpreted function constraints to //Known relation - - fprintf(stderr, "loop.cc stmt[%d].loop_level has size n_dim %d\n", loc, n_dim); + + fprintf(stderr, "loop.cc stmt[%d].loop_level has size n_dim %d\n", loc, n_dim); stmt[loc].loop_level = std::vector(n_dim); stmt[loc].ir_stmt_node = ir_stmt[loc]; stmt[loc].has_inspector = false; - fprintf(stderr, "for int i < n_dim(%d)\n", n_dim); + fprintf(stderr, "for int i < n_dim(%d)\n", n_dim); for (int ii = 0; ii < n_dim; ii++) { stmt[loc].loop_level[ii].type = LoopLevelOriginal; stmt[loc].loop_level[ii].payload = ii; stmt[loc].loop_level[ii].parallel_level = 0; } - fprintf(stderr, "whew\n"); - + fprintf(stderr, "whew\n"); + stmt_nesting_level[loc] = -1; } dump(); @@ -824,36 +832,35 @@ bool Loop::init_loop(std::vector &ir_tree, } - Loop::Loop(const IR_Control *control) { - - CHILL_DEBUG_PRINT("control type is %d ", control->type()); + + CHILL_DEBUG_PRINT("control type is %d \n", control->type()); echocontroltype(control); - CHILL_DEBUG_PRINT("2set last_compute_cg_ = NULL; \n"); + CHILL_DEBUG_PRINT("set last_compute_cg_ = NULL; \n"); last_compute_cgr_ = NULL; last_compute_cg_ = NULL; ir = const_cast(control->ir_); // point to the CHILL IR that this loop came from - if (ir == 0) { - CHILL_DEBUG_PRINT("ir gotten from control = 0x%x\n", (long)ir); - CHILL_DEBUG_PRINT("loop.cc GONNA DIE SOON *******************************\n\n"); + if (ir == 0) { + CHILL_DEBUG_PRINT("ir gotten from control = 0x%x\n", (long) ir); + CHILL_DEBUG_PRINT("GONNA DIE SOON *******************************\n\n"); } - + init_code = NULL; cleanup_code = NULL; tmp_loop_var_name_counter = 1; overflow_var_name_counter = 1; known = Relation::True(0); - + CHILL_DEBUG_PRINT("calling build_ir_tree()\n"); CHILL_DEBUG_PRINT("about to clone control\n"); ir_tree = build_ir_tree(control->clone(), NULL); //fprintf(stderr,"in Loop::Loop. ir_tree has %ld parts\n", ir_tree.size()); - + // std::vector ir_stmt; //fprintf(stderr, "loop.cc after build_ir_tree() %ld statements\n", stmt.size()); - + int count = 0; //fprintf(stderr, "before init_loops, %d freevar\n", freevar.size()); //fprintf(stderr, "count %d\n", count++); @@ -861,19 +868,19 @@ Loop::Loop(const IR_Control *control) { while (!init_loop(ir_tree, ir_stmt)) { //fprintf(stderr, "count %d\n", count++); } - fprintf(stderr, "after init_loop, %d freevar\n", (int)freevar.size()); - - - fprintf(stderr, "loop.cc after init_loop, %d statements\n", (int)stmt.size()); + fprintf(stderr, "after init_loop, %d freevar\n", (int) freevar.size()); + + + fprintf(stderr, "loop.cc after init_loop, %d statements\n", (int) stmt.size()); for (int i = 0; i < stmt.size(); i++) { - std::map::iterator it = replace.find(i); - + std::map::iterator it = replace.find(i); + if (it != replace.end()) stmt[i].code = it->second; else stmt[i].code = stmt[i].code; } - + if (stmt.size() != 0) dep = DependenceGraph(stmt[0].IS.n_set()); else @@ -881,42 +888,42 @@ Loop::Loop(const IR_Control *control) { // init the dependence graph for (int i = 0; i < stmt.size(); i++) dep.insert(); - - fprintf(stderr, "this really REALLY needs some comments\n"); + + fprintf(stderr, "this really REALLY needs some comments\n"); // this really REALLY needs some comments for (int i = 0; i < stmt.size(); i++) { - fprintf(stderr, "i %d\n", i); + fprintf(stderr, "i %d\n", i); stmt[i].reduction = 0; // Manu -- initialization for (int j = i; j < stmt.size(); j++) { - fprintf(stderr, "j %d\n", j); + fprintf(stderr, "j %d\n", j); std::pair, - std::vector > dv = test_data_dependences( - ir, - stmt[i].code, - stmt[i].IS, - stmt[j].code, - stmt[j].IS, - freevar, - index, - stmt_nesting_level_[i], - stmt_nesting_level_[j], - uninterpreted_symbols[ i ], - uninterpreted_symbols_stringrepr[ i ]); - - fprintf(stderr, "dv.first.size() %d\n", (int)dv.first.size()); + std::vector > dv = test_data_dependences( + ir, + stmt[i].code, + stmt[i].IS, + stmt[j].code, + stmt[j].IS, + freevar, + index, + stmt_nesting_level_[i], + stmt_nesting_level_[j], + uninterpreted_symbols[i], + uninterpreted_symbols_stringrepr[i]); + + fprintf(stderr, "dv.first.size() %d\n", (int) dv.first.size()); for (int k = 0; k < dv.first.size(); k++) { - fprintf(stderr, "k1 %d\n", k); + fprintf(stderr, "k1 %d\n", k); if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k], true)) dep.connect(i, j, dv.first[k]); else { dep.connect(j, i, dv.first[k].reverse()); } - + } - - for (int k = 0; k < dv.second.size(); k++) { - fprintf(stderr, "k2 %d\n", k); + + for (int k = 0; k < dv.second.size(); k++) { + fprintf(stderr, "k2 %d\n", k); if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], false)) dep.connect(j, i, dv.second[k]); @@ -926,64 +933,64 @@ Loop::Loop(const IR_Control *control) { } } } - - fprintf(stderr, "\n\n*** LOTS OF REDUCTIONS ***\n\n"); - + + fprintf(stderr, "\n\n*** LOTS OF REDUCTIONS ***\n\n"); + // TODO: Reduction check // Manu:: Initial implementation / algorithm std::set reducCand = std::set(); std::vector canReduce = std::vector(); - fprintf(stderr, "\ni range %d\n", stmt.size()); + fprintf(stderr, "\ni range %d\n", stmt.size()); for (int i = 0; i < stmt.size(); i++) { - fprintf(stderr, "i %d\n", i); + fprintf(stderr, "i %d\n", i); if (!dep.hasEdge(i, i)) { continue; } - fprintf(stderr, "dep.hasEdge(%d, %d)\n", i, i); + fprintf(stderr, "dep.hasEdge(%d, %d)\n", i, i); // for each statement check if it has all the three dependences (RAW, WAR, WAW) // If there is such a statement, it is a reduction candidate. Mark all reduction candidates. std::vector tdv = dep.getEdge(i, i); - fprintf(stderr, "tdv size %d\n", tdv.size()); + fprintf(stderr, "tdv size %d\n", tdv.size()); for (int j = 0; j < tdv.size(); j++) { - fprintf(stderr, "ij %d %d\n", i, j); - if (tdv[j].is_reduction_cand) { - fprintf(stderr, "reducCand.insert( %d )\n", i); + fprintf(stderr, "ij %d %d\n", i, j); + if (tdv[j].is_reduction_cand) { + fprintf(stderr, "reducCand.insert( %d )\n", i); reducCand.insert(i); } } } - - fprintf(stderr, "loop.cc reducCand.size() %d\n", reducCand.size()); + + fprintf(stderr, "loop.cc reducCand.size() %d\n", reducCand.size()); bool reduc; std::set::iterator it; - int counter = 0; + int counter = 0; for (it = reducCand.begin(); it != reducCand.end(); it++) { - fprintf(stderr, "counter %d\n", counter); + fprintf(stderr, "counter %d\n", counter); reduc = true; for (int j = 0; j < stmt.size(); j++) { - fprintf(stderr, "j %d\n", j); + fprintf(stderr, "j %d\n", j); if ((*it != j) && (stmt_nesting_level_[*it] < stmt_nesting_level_[j])) { if (dep.hasEdge(*it, j) || dep.hasEdge(j, *it)) { - fprintf(stderr, "counter %d j %d reduc = false\n", counter, j); + fprintf(stderr, "counter %d j %d reduc = false\n", counter, j); reduc = false; break; } } counter += 1; } - + if (reduc) { - fprintf(stderr, "canReduce.push_back()\n"); + fprintf(stderr, "canReduce.push_back()\n"); canReduce.push_back(*it); stmt[*it].reduction = 2; // First, assume that reduction is possible with some processing } } - - + + // If reduction is possible without processing, update the value of the reduction variable to 1 - fprintf(stderr, "loop.cc canReduce.size() %d\n", canReduce.size()); + fprintf(stderr, "loop.cc canReduce.size() %d\n", canReduce.size()); for (int i = 0; i < canReduce.size(); i++) { // Here, assuming that stmtType returns 1 when there is a single statement within stmt[i] if (stmtType(ir, stmt[canReduce[i]].code) == 1) { @@ -993,9 +1000,9 @@ Loop::Loop(const IR_Control *control) { stmt[canReduce[i]].reductionOp = opType; } } - + // printing out stuff for debugging - + if (DEP_DEBUG) { std::cout << "STATEMENTS THAT CAN BE REDUCED: \n"; for (int i = 0; i < canReduce.size(); i++) { @@ -1015,21 +1022,21 @@ Loop::Loop(const IR_Control *control) { } } // cleanup the IR tree - - fprintf(stderr, "init dumb transformation relations\n"); + + fprintf(stderr, "init dumb transformation relations\n"); // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] for (int i = 0; i < stmt.size(); i++) { int n = stmt[i].IS.n_set(); stmt[i].xform = Relation(n, 2 * n + 1); F_And *f_root = stmt[i].xform.add_and(); - + for (int j = 1; j <= n; j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(stmt[i].xform.output_var(2 * j), 1); h.update_coef(stmt[i].xform.input_var(j), -1); } - + for (int j = 1; j <= 2 * n + 1; j += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(stmt[i].xform.output_var(j), 1); @@ -1037,7 +1044,7 @@ Loop::Loop(const IR_Control *control) { stmt[i].xform.simplify(); } //fprintf(stderr, "done with dumb\n"); - + if (stmt.size() != 0) num_dep_dim = stmt[0].IS.n_set(); else @@ -1056,19 +1063,19 @@ Loop::Loop(const IR_Control *control) { } Loop::~Loop() { - + delete last_compute_cgr_; delete last_compute_cg_; - + for (int i = 0; i < stmt.size(); i++) if (stmt[i].code != NULL) { stmt[i].code->clear(); delete stmt[i].code; } - + for (int i = 0; i < ir_tree.size(); i++) delete ir_tree[i]; - + if (init_code != NULL) { init_code->clear(); delete init_code; @@ -1080,54 +1087,52 @@ Loop::~Loop() { } - - int Loop::get_dep_dim_of(int stmt_num, int level) const { if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invaid statement " + to_string(stmt_num)); - + if (level < 1 || level > stmt[stmt_num].loop_level.size()) return -1; - + int trip_count = 0; while (true) { switch (stmt[stmt_num].loop_level[level - 1].type) { - case LoopLevelOriginal: - return stmt[stmt_num].loop_level[level - 1].payload; - case LoopLevelTile: - level = stmt[stmt_num].loop_level[level - 1].payload; - if (level < 1) - return -1; - if (level > stmt[stmt_num].loop_level.size()) - throw loop_error("incorrect loop level information for statement " - + to_string(stmt_num)); - break; - default: - throw loop_error( - "unknown loop level information for statement " - + to_string(stmt_num)); + case LoopLevelOriginal: + return stmt[stmt_num].loop_level[level - 1].payload; + case LoopLevelTile: + level = stmt[stmt_num].loop_level[level - 1].payload; + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + throw loop_error("incorrect loop level information for statement " + + to_string(stmt_num)); + break; + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(stmt_num)); } trip_count++; if (trip_count >= stmt[stmt_num].loop_level.size()) throw loop_error( - "incorrect loop level information for statement " - + to_string(stmt_num)); + "incorrect loop level information for statement " + + to_string(stmt_num)); } } int Loop::get_last_dep_dim_before(int stmt_num, int level) const { if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invaid statement " + to_string(stmt_num)); - + if (level < 1) return -1; if (level > stmt[stmt_num].loop_level.size()) level = stmt[stmt_num].loop_level.size() + 1; - + for (int i = level - 1; i >= 1; i--) if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal) return stmt[stmt_num].loop_level[i - 1].payload; - + return -1; } @@ -1139,14 +1144,14 @@ void Loop::print_internal_loop_structure() const { if (2 * j < lex.size()) std::cout << lex[2 * j]; switch (stmt[i].loop_level[j].type) { - case LoopLevelOriginal: - std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; - break; - case LoopLevelTile: - std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; - break; - default: - std::cout << "(unknown)"; + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; } std::cout << ' '; } @@ -1159,96 +1164,102 @@ void Loop::print_internal_loop_structure() const { } } -void Loop::debugRelations() const { - const int m = stmt.size(); +void Loop::debugRelations() const { + const int m = stmt.size(); { std::vector IS(m); std::vector xforms(m); - + for (int i = 0; i < m; i++) { IS[i] = stmt[i].IS; xforms[i] = stmt[i].xform; // const stucks } - - printf("\nxforms:\n"); - for (int i = 0; i < m; i++) { xforms[i].print(); printf("\n"); } - printf("\nIS:\n"); - for (int i = 0; i < m; i++) { IS[i].print(); printf("\n"); } - fflush(stdout); + + printf("\nxforms:\n"); + for (int i = 0; i < m; i++) { + xforms[i].print(); + printf("\n"); + } + printf("\nIS:\n"); + for (int i = 0; i < m; i++) { + IS[i].print(); + printf("\n"); + } + fflush(stdout); } } CG_outputRepr *Loop::getCode(int effort) const { - fprintf(stderr,"\nloop.cc Loop::getCode( effort %d )\n", effort ); - + fprintf(stderr, "\nloop.cc Loop::getCode( effort %d )\n", effort); + const int m = stmt.size(); if (m == 0) return NULL; const int n = stmt[0].xform.n_out(); - + if (last_compute_cg_ == NULL) { - fprintf(stderr, "Loop::getCode() last_compute_cg_ == NULL\n"); - + fprintf(stderr, "Loop::getCode() last_compute_cg_ == NULL\n"); + std::vector IS(m); std::vector xforms(m); for (int i = 0; i < m; i++) { IS[i] = stmt[i].IS; xforms[i] = stmt[i].xform; } - - debugRelations(); - - + + debugRelations(); + + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); - printf("\nknown:\n"); known.print(); printf("\n\n"); fflush(stdout); - + printf("\nknown:\n"); + known.print(); + printf("\n\n"); + fflush(stdout); + last_compute_cg_ = new CodeGen(xforms, IS, known); delete last_compute_cgr_; last_compute_cgr_ = NULL; - } - else { - fprintf(stderr, "Loop::getCode() last_compute_cg_ NOT NULL\n"); + } else { + fprintf(stderr, "Loop::getCode() last_compute_cg_ NOT NULL\n"); } - + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { delete last_compute_cgr_; last_compute_cgr_ = last_compute_cg_->buildAST(effort); last_compute_effort_ = effort; } - + std::vector stmts(m); - fprintf(stderr, "%d stmts\n", m); + fprintf(stderr, "%d stmts\n", m); for (int i = 0; i < m; i++) stmts[i] = stmt[i].code; CG_outputBuilder *ocg = ir->builder(); - fprintf(stderr, "calling last_compute_cgr_->printRepr()\n"); - CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts, + fprintf(stderr, "calling last_compute_cgr_->printRepr()\n"); + CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts, uninterpreted_symbols); - + if (init_code != NULL) repr = ocg->StmtListAppend(init_code->clone(), repr); if (cleanup_code != NULL) repr = ocg->StmtListAppend(repr, cleanup_code->clone()); - - fprintf(stderr,"\nloop.cc Loop::getCode( effort %d ) DONE\n", effort ); + + fprintf(stderr, "\nloop.cc Loop::getCode( effort %d ) DONE\n", effort); return repr; } - - void Loop::printCode(int effort) const { - fprintf(stderr,"\nloop.cc Loop::printCode( effort %d )\n", effort ); + fprintf(stderr, "\nloop.cc Loop::printCode( effort %d )\n", effort); const int m = stmt.size(); if (m == 0) return; const int n = stmt[0].xform.n_out(); - + if (last_compute_cg_ == NULL) { - fprintf(stderr, "Loop::printCode(), last_compute_cg_ == NULL\n"); + fprintf(stderr, "Loop::printCode(), last_compute_cg_ == NULL\n"); std::vector IS(m); std::vector xforms(m); for (int i = 0; i < m; i++) { @@ -1256,22 +1267,21 @@ void Loop::printCode(int effort) const { xforms[i] = stmt[i].xform; } Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); - + last_compute_cg_ = new CodeGen(xforms, IS, known); delete last_compute_cgr_; last_compute_cgr_ = NULL; - } - else fprintf(stderr, "Loop::printCode(), last_compute_cg_ NOT NULL\n"); - + } else fprintf(stderr, "Loop::printCode(), last_compute_cg_ NOT NULL\n"); + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { delete last_compute_cgr_; last_compute_cgr_ = last_compute_cg_->buildAST(effort); last_compute_effort_ = effort; } - + std::string repr = last_compute_cgr_->printString( - uninterpreted_symbols_stringrepr); - fprintf(stderr, "leaving Loop::printCode()\n"); + uninterpreted_symbols_stringrepr); + fprintf(stderr, "leaving Loop::printCode()\n"); std::cout << repr << std::endl; } @@ -1297,20 +1307,20 @@ void Loop::printDependenceGraph() const { std::vector Loop::getNewIS() const { const int m = stmt.size(); - + std::vector new_IS(m); for (int i = 0; i < m; i++) new_IS[i] = getNewIS(i); - + return new_IS; } // pragmas are tied to loops only ??? void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) { // check sanity of parameters - if(stmt_num < 0) + if (stmt_num < 0) throw std::invalid_argument("invalid statement " + to_string(stmt_num)); - + CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *code = stmt[stmt_num].code; ocg->CreatePragmaAttribute(code, level, pragmaText); @@ -1331,9 +1341,9 @@ void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) { void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hint) { // check sanity of parameters - if(stmt_num < 0) + if (stmt_num < 0) throw std::invalid_argument("invalid statement " + to_string(stmt_num)); - + CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *code = stmt[stmt_num].code; ocg->CreatePrefetchAttribute(code, level, arrName, hint); @@ -1341,13 +1351,13 @@ void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hin std::vector Loop::getLexicalOrder(int stmt_num) const { assert(stmt_num < stmt.size()); - + const int n = stmt[stmt_num].xform.n_out(); std::vector lex(n, 0); - + for (int i = 0; i < n; i += 2) lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); - + return lex; } @@ -1356,13 +1366,13 @@ std::vector Loop::getLexicalOrder(int stmt_num) const { std::set Loop::getSubLoopNest(int stmt_num, int level) const { assert(stmt_num >= 0 && stmt_num < stmt.size()); assert(level > 0 && level <= stmt[stmt_num].loop_level.size()); - + std::set working; for (int i = 0; i < stmt.size(); i++) if (const_cast(this)->stmt[i].IS.is_upper_bound_satisfiable() && stmt[i].loop_level.size() >= level) working.insert(i); - + for (int i = 1; i <= level; i++) { int a = getLexicalOrder(stmt_num, i); for (std::set::iterator j = working.begin(); j != working.end();) { @@ -1373,14 +1383,14 @@ std::set Loop::getSubLoopNest(int stmt_num, int level) const { ++j; } } - + return working; } int Loop::getLexicalOrder(int stmt_num, int level) const { assert(stmt_num >= 0 && stmt_num < stmt.size()); - assert(level > 0 && level <= stmt[stmt_num].loop_level.size()+1); - + assert(level > 0 && level <= stmt[stmt_num].loop_level.size() + 1); + Relation &r = const_cast(this)->stmt[stmt_num].xform; for (EQ_Iterator e(r.single_conjunct()->EQs()); e; e++) if (abs((*e).get_coef(r.output_var(2 * level - 1))) == 1) { @@ -1395,15 +1405,15 @@ int Loop::getLexicalOrder(int stmt_num, int level) const { return (*e).get_coef(r.output_var(2 * level - 1)) > 0 ? -t : t; } } - + throw loop_error( - "can't find lexical order for statement " + to_string(stmt_num) - + "'s loop level " + to_string(level)); + "can't find lexical order for statement " + to_string(stmt_num) + + "'s loop level " + to_string(level)); } std::set Loop::getStatements(const std::vector &lex, int dim) const { const int m = stmt.size(); - + std::set same_loops; for (int i = 0; i < m; i++) { if (dim < 0) @@ -1417,32 +1427,32 @@ std::set Loop::getStatements(const std::vector &lex, int dim) const { if (j > dim) same_loops.insert(i); } - + } - + return same_loops; } void Loop::shiftLexicalOrder(const std::vector &lex, int dim, int amount) { const int m = stmt.size(); - + if (amount == 0) return; - + for (int i = 0; i < m; i++) { std::vector lex2 = getLexicalOrder(i); - + bool need_shift = true; - + for (int j = 0; j < dim; j++) if (lex2[j] != lex[j]) { need_shift = false; break; } - + if (!need_shift) continue; - + if (amount > 0) { if (lex2[dim] < lex[dim]) continue; @@ -1450,94 +1460,94 @@ void Loop::shiftLexicalOrder(const std::vector &lex, int dim, int amount) { if (lex2[dim] > lex[dim]) continue; } - + assign_const(stmt[i].xform, dim, lex2[dim] + amount); } } std::vector > Loop::sort_by_same_loops(std::set active, int level) { - + std::set not_nested_at_this_level; - std::map > sorted_by_loop; + std::map > sorted_by_loop; std::map > sorted_by_lex_order; std::vector > to_return; bool lex_order_already_set = false; for (std::set::iterator it = active.begin(); it != active.end(); it++) { - + if (stmt[*it].ir_stmt_node == NULL) lex_order_already_set = true; } - + if (lex_order_already_set) { - + for (std::set::iterator it = active.begin(); it != active.end(); it++) { std::map >::iterator it2 = - sorted_by_lex_order.find( - get_const(stmt[*it].xform, 2 * (level - 1), - Output_Var)); - + sorted_by_lex_order.find( + get_const(stmt[*it].xform, 2 * (level - 1), + Output_Var)); + if (it2 != sorted_by_lex_order.end()) it2->second.insert(*it); else { - + std::set to_insert; - + to_insert.insert(*it); - + sorted_by_lex_order.insert( - std::pair >( - get_const(stmt[*it].xform, 2 * (level - 1), - Output_Var), to_insert)); - + std::pair >( + get_const(stmt[*it].xform, 2 * (level - 1), + Output_Var), to_insert)); + } - + } - + for (std::map >::iterator it2 = - sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end(); + sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end(); it2++) to_return.push_back(it2->second); - + } else { - + for (std::set::iterator it = active.begin(); it != active.end(); it++) { - - ir_tree_node* itn = stmt[*it].ir_stmt_node; + + ir_tree_node *itn = stmt[*it].ir_stmt_node; itn = itn->parent; //while (itn->content->type() != IR_CONTROL_LOOP && itn != NULL) // itn = itn->parent; - + while ((itn != NULL) && (itn->payload != level - 1)) { itn = itn->parent; - while (itn != NULL && itn->content->type() != IR_CONTROL_LOOP ) + while (itn != NULL && itn->content->type() != IR_CONTROL_LOOP) itn = itn->parent; } - + if (itn == NULL) not_nested_at_this_level.insert(*it); else { - std::map >::iterator it2 = - sorted_by_loop.find(itn); - + std::map >::iterator it2 = + sorted_by_loop.find(itn); + if (it2 != sorted_by_loop.end()) it2->second.insert(*it); else { std::set to_insert; - + to_insert.insert(*it); - + sorted_by_loop.insert( - std::pair >(itn, - to_insert)); - + std::pair >(itn, + to_insert)); + } - + } - + } if (not_nested_at_this_level.size() > 0) { for (std::set::iterator it = not_nested_at_this_level.begin(); @@ -1545,34 +1555,34 @@ std::vector > Loop::sort_by_same_loops(std::set active, std::set temp; temp.insert(*it); to_return.push_back(temp); - + } } - for (std::map >::iterator it2 = - sorted_by_loop.begin(); it2 != sorted_by_loop.end(); it2++) + for (std::map >::iterator it2 = + sorted_by_loop.begin(); it2 != sorted_by_loop.end(); it2++) to_return.push_back(it2->second); } return to_return; } -void update_successors(int n, - int node_num[], +void update_successors(int n, + int node_num[], int cant_fuse_with[], - Graph, bool> &g, + Graph, bool> &g, std::list &work_list, - std::list &type_list, + std::list &type_list, std::vector types) { - + std::set disconnect; for (Graph, bool>::EdgeList::iterator i = - g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) { + g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) { int m = i->first; - + if (node_num[m] != -1) throw loop_error("Graph input for fusion has cycles not a DAG!!"); - + std::vector check_ = g.getEdge(n, m); - + bool has_bad_edge_path = false; for (int i = 0; i < check_.size(); i++) if (!check_[i]) { @@ -1589,12 +1599,12 @@ void update_successors(int n, } disconnect.insert(m); } - - + + for (std::set::iterator i = disconnect.begin(); i != disconnect.end(); i++) { g.disconnect(n, *i); - + bool no_incoming_edges = true; for (int j = 0; j < g.vertex.size(); j++) if (j != *i) @@ -1602,7 +1612,7 @@ void update_successors(int n, no_incoming_edges = false; break; } - + if (no_incoming_edges) { work_list.push_back(*i); type_list.push_back(types[*i]); @@ -1611,35 +1621,32 @@ void update_successors(int n, } - int Loop::getMinLexValue(std::set stmts, int level) { - + int min; - + std::set::iterator it = stmts.begin(); min = getLexicalOrder(*it, level); - + for (; it != stmts.end(); it++) { int curr = getLexicalOrder(*it, level); if (curr < min) min = curr; } - + return min; } - - Graph, bool> Loop::construct_induced_graph_at_level( - std::vector > s, DependenceGraph dep, int dep_dim) { + std::vector > s, DependenceGraph dep, int dep_dim) { Graph, bool> g; - + for (int i = 0; i < s.size(); i++) g.insert(s[i]); - + for (int i = 0; i < s.size(); i++) { - + for (int j = i + 1; j < s.size(); j++) { bool has_true_edge_i_to_j = false; bool has_true_edge_j_to_i = false; @@ -1647,32 +1654,32 @@ Graph, bool> Loop::construct_induced_graph_at_level( bool is_connected_j_to_i = false; for (std::set::iterator ii = s[i].begin(); ii != s[i].end(); ii++) { - + for (std::set::iterator jj = s[j].begin(); jj != s[j].end(); jj++) { - + std::vector dvs = dep.getEdge(*ii, *jj); for (int k = 0; k < dvs.size(); k++) if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && dvs[k].has_been_carried_at(dep_dim))) { - + if (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at( - dep_dim)) { + dep_dim)) { //g.connect(i, j, false); is_connected_i_to_j = true; break; } else { //g.connect(i, j, true); - + has_true_edge_i_to_j = true; //break } } - + //if (is_connected) - + // break; // if (has_true_edge_i_to_j && !is_connected_i_to_j) // g.connect(i, j, true); @@ -1681,72 +1688,71 @@ Graph, bool> Loop::construct_induced_graph_at_level( if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && dvs[k].has_been_carried_at(dep_dim))) { - + if (is_connected_i_to_j || has_true_edge_i_to_j) throw loop_error( - "Graph input for fusion has cycles not a DAG!!"); - + "Graph input for fusion has cycles not a DAG!!"); + if (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at( - dep_dim)) { + dep_dim)) { //g.connect(i, j, false); is_connected_j_to_i = true; break; } else { //g.connect(i, j, true); - + has_true_edge_j_to_i = true; //break; } } - + // if (is_connected) //break; // if (is_connected) //break; } - + //if (is_connected) // break; } - - + + if (is_connected_i_to_j) g.connect(i, j, false); else if (has_true_edge_i_to_j) g.connect(i, j, true); - + if (is_connected_j_to_i) g.connect(j, i, false); else if (has_true_edge_j_to_i) g.connect(j, i, true); - + } } return g; } - std::vector > Loop::typed_fusion(Graph, bool> g, std::vector &types) { - + bool roots[g.vertex.size()]; - + for (int i = 0; i < g.vertex.size(); i++) roots[i] = true; - + for (int i = 0; i < g.vertex.size(); i++) for (int j = i + 1; j < g.vertex.size(); j++) { - + if (g.hasEdge(i, j)) roots[j] = false; - + if (g.hasEdge(j, i)) roots[i] = false; - + } - + std::list work_list; std::list type_list; int cant_fuse_with[g.vertex.size()]; @@ -1755,11 +1761,11 @@ std::vector > Loop::typed_fusion(Graph, bool> g, int lastnum = 0; std::vector > s; //Each Fused set's representative node - + int node_to_fused_nodes[g.vertex.size()]; int node_num[g.vertex.size()]; int next[g.vertex.size()]; - + for (int i = 0; i < g.vertex.size(); i++) { if (roots[i] == true) { work_list.push_back(i); @@ -1770,17 +1776,17 @@ std::vector > Loop::typed_fusion(Graph, bool> g, node_num[i] = -1; next[i] = 0; } - - + + // topological sort according to chun's permute algorithm // std::vector > s = g.topoSort(); std::vector > s2 = g.topoSort(); if (work_list.empty() || (s2.size() != g.vertex.size())) { - + std::cout << s2.size() << "\t" << g.vertex.size() << std::endl; throw loop_error("Input for fusion not a DAG!!"); - - + + } int fused_nodes_counter = 0; while (!work_list.empty()) { @@ -1802,19 +1808,19 @@ std::vector > Loop::typed_fusion(Graph, bool> g, p = fused; else p = next[cant_fuse_with[n]]; - + if (p != 0) { int rep_node = node_to_fused_nodes[p]; node_num[n] = node_num[rep_node]; - + try { update_successors(n, node_num, cant_fuse_with, g, work_list, type_list, types); } catch (const loop_error &e) { - + throw loop_error( - "statements cannot be fused together due to negative dependence"); - + "statements cannot be fused together due to negative dependence"); + } for (std::set::iterator it = g.vertex[n].first.begin(); it != g.vertex[n].first.end(); it++) @@ -1826,81 +1832,80 @@ std::vector > Loop::typed_fusion(Graph, bool> g, lastnum = lastnum + 1; node_num[n] = lastnum; node_to_fused_nodes[node_num[n]] = n; - + if (lastfused == 0) { fused = lastnum; lastfused = fused; } else { next[lastfused] = lastnum; lastfused = lastnum; - + } - + try { update_successors(n, node_num, cant_fuse_with, g, work_list, type_list, types); } catch (const loop_error &e) { - + throw loop_error( - "statements cannot be fused together due to negative dependence"); - + "statements cannot be fused together due to negative dependence"); + } fused_nodes_counter++; } - + } else { s.push_back(g.vertex[n].first); lastnum = lastnum + 1; node_num[n] = lastnum; node_to_fused_nodes[node_num[n]] = n; - + try { update_successors(n, node_num, cant_fuse_with, g, work_list, type_list, types); } catch (const loop_error &e) { - + throw loop_error( - "statements cannot be fused together due to negative dependence"); - + "statements cannot be fused together due to negative dependence"); + } //fused_nodes_counter++; - + } - + } - + return s; } - - void Loop::setLexicalOrder(int dim, const std::set &active, int starting_order, std::vector > idxNames) { - fprintf(stderr, "Loop::setLexicalOrder() %d idxNames active size %d starting_order %d\n", idxNames.size(), active.size(), starting_order); + fprintf(stderr, "Loop::setLexicalOrder() %d idxNames active size %d starting_order %d\n", idxNames.size(), + active.size(), starting_order); if (active.size() == 0) return; - for (int i=0; i< idxNames.size(); i++) { + for (int i = 0; i < idxNames.size(); i++) { std::vector what = idxNames[i]; - for (int j=0; j lex; int ref_stmt_num; for (std::set::iterator i = active.begin(); i != active.end(); i++) { if ((*i) < 0 || (*i) >= stmt.size()) throw std::invalid_argument( - "invalid statement number " + to_string(*i)); + "invalid statement number " + to_string(*i)); if (dim >= stmt[*i].xform.n_out()) throw std::invalid_argument( - "invalid constant loop level to set lexicographical order"); + "invalid constant loop level to set lexicographical order"); if (i == active.begin()) { lex = getLexicalOrder(*i); ref_stmt_num = *i; @@ -1909,10 +1914,10 @@ void Loop::setLexicalOrder(int dim, const std::set &active, for (int j = 0; j < dim; j += 2) if (lex[j] != lex2[j]) throw std::invalid_argument( - "statements are not in the same sub loop nest"); + "statements are not in the same sub loop nest"); } } - + // separate statements by current loop level types int level = (dim + 2) / 2; std::map, std::set > active_by_level_type; @@ -1922,21 +1927,21 @@ void Loop::setLexicalOrder(int dim, const std::set &active, active_by_no_level.insert(*i); else active_by_level_type[std::make_pair( - stmt[*i].loop_level[level - 1].type, - stmt[*i].loop_level[level - 1].payload)].insert(*i); + stmt[*i].loop_level[level - 1].type, + stmt[*i].loop_level[level - 1].payload)].insert(*i); } - + // further separate statements due to control dependences std::vector > active_by_level_type_splitted; for (std::map, std::set >::iterator i = - active_by_level_type.begin(); i != active_by_level_type.end(); i++) + active_by_level_type.begin(); i != active_by_level_type.end(); i++) active_by_level_type_splitted.push_back(i->second); for (std::set::iterator i = active_by_no_level.begin(); i != active_by_no_level.end(); i++) for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) { std::set controlled, not_controlled; for (std::set::iterator k = - active_by_level_type_splitted[j].begin(); + active_by_level_type_splitted[j].begin(); k != active_by_level_type_splitted[j].end(); k++) { std::vector dvs = dep.getEdge(*i, *k); bool is_controlled = false; @@ -1952,19 +1957,19 @@ void Loop::setLexicalOrder(int dim, const std::set &active, } if (controlled.size() != 0 && not_controlled.size() != 0) { active_by_level_type_splitted.erase( - active_by_level_type_splitted.begin() + j); + active_by_level_type_splitted.begin() + j); active_by_level_type_splitted.push_back(controlled); active_by_level_type_splitted.push_back(not_controlled); } } - + // set lexical order separating loops with different loop types first if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) { int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; - + Graph, Empty> g; for (std::vector >::iterator i = - active_by_level_type_splitted.begin(); + active_by_level_type_splitted.begin(); i != active_by_level_type_splitted.end(); i++) g.insert(*i); for (std::set::iterator i = active_by_no_level.begin(); @@ -1986,7 +1991,7 @@ void Loop::setLexicalOrder(int dim, const std::set &active, if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && !dvs[k].has_been_carried_before( - dep_dim))) { + dep_dim))) { g.connect(i, j); connected = true; break; @@ -2010,7 +2015,7 @@ void Loop::setLexicalOrder(int dim, const std::set &active, if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && !dvs[k].has_been_carried_before( - dep_dim))) { + dep_dim))) { g.connect(j, i); connected = true; break; @@ -2022,13 +2027,13 @@ void Loop::setLexicalOrder(int dim, const std::set &active, break; } } - + std::vector > s = g.topoSort(); if (s.size() != g.vertex.size()) throw loop_error( - "cannot separate statements with different loop types at loop level " - + to_string(level)); - + "cannot separate statements with different loop types at loop level " + + to_string(level)); + // assign lexical order int order = starting_order; for (int i = 0; i < s.size(); i++) { @@ -2041,19 +2046,18 @@ void Loop::setLexicalOrder(int dim, const std::set &active, assign_const(stmt[cur_stmt].xform, j, 0); order++; } else { // recurse ! - fprintf(stderr, "Loop:setLexicalOrder() recursing\n"); + fprintf(stderr, "Loop:setLexicalOrder() recursing\n"); setLexicalOrder(dim, cur_scc, order, idxNames); order += sz; } } - } - else { // set lexical order separating single iteration statements and loops + } else { // set lexical order separating single iteration statements and loops std::set true_singles; std::set nonsingles; std::map > fake_singles; std::set fake_singles_; - + // sort out statements that do not require loops for (std::set::iterator i = active.begin(); i != active.end(); i++) { @@ -2071,7 +2075,7 @@ void Loop::setLexicalOrder(int dim, const std::set &active, fake_singles_.insert(*i); try { fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert( - *i); + *i); } catch (const std::exception &e) { fake_singles[posInfinity].insert(*i); } @@ -2079,60 +2083,60 @@ void Loop::setLexicalOrder(int dim, const std::set &active, } else nonsingles.insert(*i); } - - + + // split nonsingles forcibly according to negative dependences present (loop unfusible) int dep_dim = get_dep_dim_of(ref_stmt_num, level); - + if (dim < stmt[ref_stmt_num].xform.n_out() - 1) { - + bool dummy_level_found = false; - + std::vector > s; - + s = sort_by_same_loops(active, level); bool further_levels_exist = false; - + if (!idxNames.empty()) if (level <= idxNames[ref_stmt_num].size()) if (idxNames[ref_stmt_num][level - 1].length() == 0) { // && s.size() == 1) { int order1 = 0; dummy_level_found = true; - + for (int i = level; i < idxNames[ref_stmt_num].size(); i++) if (idxNames[ref_stmt_num][i].length() > 0) further_levels_exist = true; - + } - + //if (!dummy_level_found) { - + if (s.size() > 1) { - + std::vector types; for (int i = 0; i < s.size(); i++) types.push_back(true); - + Graph, bool> g = construct_induced_graph_at_level( - s, dep, dep_dim); + s, dep, dep_dim); s = typed_fusion(g, types); } int order = starting_order; for (int i = 0; i < s.size(); i++) { - + for (std::set::iterator it = s[i].begin(); it != s[i].end(); it++) { assign_const(stmt[*it].xform, dim, order); stmt[*it].xform.simplify(); } - + if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1)) { // recurse ! - fprintf(stderr, "Loop:setLexicalOrder() recursing\n"); + fprintf(stderr, "Loop:setLexicalOrder() recursing\n"); setLexicalOrder(dim + 2, s[i], order, idxNames); } - + order++; } //} @@ -2231,8 +2235,8 @@ void Loop::setLexicalOrder(int dim, const std::set &active, break; } */ - - + + // assign lexical order /*int order = starting_order; for (int i = 0; i < s.size(); i++) { @@ -2261,17 +2265,16 @@ void Loop::setLexicalOrder(int dim, const std::set &active, */ } - fprintf(stderr, "LEAVING Loop::setLexicalOrder() %d idxNames\n", idxNames.size()); - for (int i=0; i< idxNames.size(); i++) { + fprintf(stderr, "LEAVING Loop::setLexicalOrder() %d idxNames\n", idxNames.size()); + for (int i = 0; i < idxNames.size(); i++) { std::vector what = idxNames[i]; - for (int j=0; j active; for (int i = 0; i < stmt.size(); i++) @@ -2280,26 +2283,26 @@ void Loop::apply_xform() { } void Loop::apply_xform(int stmt_num) { - fprintf(stderr, "apply_xform( %d )\n", stmt_num); + fprintf(stderr, "apply_xform( %d )\n", stmt_num); std::set active; active.insert(stmt_num); apply_xform(active); } void Loop::apply_xform(std::set &active) { - fflush(stdout); + fflush(stdout); fprintf(stderr, "loop.cc apply_xform( set )\n"); - + int max_n = 0; - + omega::CG_outputBuilder *ocg = ir->builder(); for (std::set::iterator i = active.begin(); i != active.end(); i++) { int n = stmt[*i].loop_level.size(); if (n > max_n) max_n = n; - + std::vector lex = getLexicalOrder(*i); - + omega::Relation mapping(2 * n + 1, n); omega::F_And *f_root = mapping.add_and(); for (int j = 1; j <= n; j++) { @@ -2309,7 +2312,7 @@ void Loop::apply_xform(std::set &active) { } mapping = omega::Composition(mapping, stmt[*i].xform); mapping.simplify(); - + // match omega input/output variables to variable names in the code for (int j = 1; j <= stmt[*i].IS.n_set(); j++) mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); @@ -2317,28 +2320,28 @@ void Loop::apply_xform(std::set &active) { mapping.name_output_var(j, tmp_loop_var_name_prefix + omega::to_string( - tmp_loop_var_name_counter + j - 1)); + tmp_loop_var_name_counter + j - 1)); mapping.setup_names(); mapping.print(); // "{[I] -> [_t1] : I = _t1 } - fflush(stdout); - + fflush(stdout); + omega::Relation known = Extend_Set(copy(this->known), mapping.n_out() - this->known.n_set()); //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector(mapping.n_out(), NULL)); - - omega::CG_outputBuilder *ocgr = ir->builder(); - - + + omega::CG_outputBuilder *ocgr = ir->builder(); + + //this is probably CG_chillBuilder; - + omega::CG_stringBuilder *ocgs = new omega::CG_stringBuilder; if (uninterpreted_symbols[*i].size() == 0) { - - + + std::set globals; - + for (omega::DNF_Iterator di(stmt[*i].IS.query_DNF()); di; di++) { - + for (omega::Constraint_Iterator e(*di); e; e++) { for (omega::Constr_Vars_Iter cvi(*e); cvi; cvi++) { omega::Variable_ID v = cvi.curr_var(); @@ -2349,105 +2352,106 @@ void Loop::apply_xform(std::set &active) { globals.insert(v->name()); std::vector reprs; std::vector reprs2; - + for (int l = 1; l <= g->arity(); l++) { omega::CG_outputRepr *temp = ocgr->CreateIdent( - stmt[*i].IS.set_var(l)->name()); + stmt[*i].IS.set_var(l)->name()); omega::CG_outputRepr *temp2 = ocgs->CreateIdent( - stmt[*i].IS.set_var(l)->name()); - + stmt[*i].IS.set_var(l)->name()); + reprs.push_back(temp); reprs2.push_back(temp2); } uninterpreted_symbols[*i].insert( - std::pair >( - v->get_global_var()->base_name(), - reprs)); + std::pair >( + v->get_global_var()->base_name(), + reprs)); uninterpreted_symbols_stringrepr[*i].insert( - std::pair >( - v->get_global_var()->base_name(), - reprs2)); + std::pair >( + v->get_global_var()->base_name(), + reprs2)); } } } } } - + std::vector loop_vars; for (int j = 1; j <= stmt[*i].IS.n_set(); j++) { loop_vars.push_back(stmt[*i].IS.set_var(j)->name()); } - for (int j = 0; j subs = output_substitutions(ocg, Inverse(copy(mapping)), std::vector >( - mapping.n_out(), - std::make_pair( - static_cast(NULL), 0)), + mapping.n_out(), + std::make_pair( + static_cast(NULL), 0)), uninterpreted_symbols[*i]); - + std::vector subs2; for (int l = 0; l < subs.size(); l++) subs2.push_back(subs[l]->clone()); - - fprintf(stderr, "%d uninterpreted symbols\n", (int)uninterpreted_symbols.size()); - for (int j = 0; j >::iterator it = - uninterpreted_symbols[*i].begin(); + uninterpreted_symbols[*i].begin(); it != uninterpreted_symbols[*i].end(); it++) { - fprintf(stderr, "\ncount %d\n", count); - + fprintf(stderr, "\ncount %d\n", count); + std::vector reprs_ = it->second; - fprintf(stderr, "%d reprs_\n", (int)reprs_.size()); - + fprintf(stderr, "%d reprs_\n", (int) reprs_.size()); + std::vector reprs_2; for (int k = 0; k < reprs_.size(); k++) { - fprintf(stderr, "k %d\n", k); + fprintf(stderr, "k %d\n", k); std::vector subs; for (int l = 0; l < subs2.size(); l++) { - fprintf(stderr, "l %d\n", l); + fprintf(stderr, "l %d\n", l); subs.push_back(subs2[l]->clone()); } - + fprintf(stderr, "clone\n"); - CG_outputRepr *c = reprs_[k]->clone(); - c->dump(); fflush(stdout); - - fprintf(stderr, "createsub\n"); + CG_outputRepr *c = reprs_[k]->clone(); + c->dump(); + fflush(stdout); + + fprintf(stderr, "createsub\n"); CG_outputRepr *s = ocgr->CreateSubstitutedStmt(0, c, loop_vars, subs, true); - - fprintf(stderr, "push back\n"); - reprs_2.push_back( s ); - + + fprintf(stderr, "push back\n"); + reprs_2.push_back(s); + } - + it->second = reprs_2; count++; - fprintf(stderr, "bottom\n"); + fprintf(stderr, "bottom\n"); } - + std::vector subs3 = output_substitutions( - ocgs, Inverse(copy(mapping)), - std::vector >( - mapping.n_out(), - std::make_pair( - static_cast(NULL), 0)), - uninterpreted_symbols_stringrepr[*i]); - + ocgs, Inverse(copy(mapping)), + std::vector >( + mapping.n_out(), + std::make_pair( + static_cast(NULL), 0)), + uninterpreted_symbols_stringrepr[*i]); + for (std::map >::iterator it = - uninterpreted_symbols_stringrepr[*i].begin(); + uninterpreted_symbols_stringrepr[*i].begin(); it != uninterpreted_symbols_stringrepr[*i].end(); it++) { - + std::vector reprs_ = it->second; std::vector reprs_2; for (int k = 0; k < reprs_.size(); k++) { @@ -2460,13 +2464,13 @@ void Loop::apply_xform(std::set &active) { */ reprs_2.push_back(subs3[k]->clone()); } - + it->second = reprs_2; - + } - - - fprintf(stderr, "loop.cc stmt[*i].code =\n"); + + + fprintf(stderr, "loop.cc stmt[*i].code =\n"); //stmt[*i].code->dump(); //fprintf(stderr, "\n"); stmt[*i].code = ocg->CreateSubstitutedStmt(0, stmt[*i].code, loop_vars, @@ -2474,10 +2478,10 @@ void Loop::apply_xform(std::set &active) { //fprintf(stderr, "loop.cc substituted code =\n"); //stmt[*i].code->dump(); //fprintf(stderr, "\n"); - + stmt[*i].IS = omega::Range(Restrict_Domain(mapping, stmt[*i].IS)); stmt[*i].IS.simplify(); - + // replace original transformation relation with straight 1-1 mapping //fprintf(stderr, "replace original transformation relation with straight 1-1 mapping\n"); mapping = Relation(n, 2 * n + 1); @@ -2493,46 +2497,44 @@ void Loop::apply_xform(std::set &active) { h.update_const(-lex[j - 1]); } stmt[*i].xform = mapping; - + //fprintf(stderr, "\ncode is: \n"); //stmt[*i].code->dump(); //fprintf(stderr, "\n\n"); - + } - + tmp_loop_var_name_counter += max_n; - fflush(stdout); - fprintf(stderr, "loop.cc LEAVING apply_xform( set )\n\n"); + fflush(stdout); + fprintf(stderr, "loop.cc LEAVING apply_xform( set )\n\n"); //for (std::set::iterator i = active.begin(); i != active.end(); i++) { // fprintf(stderr, "\nloop.cc stmt[i].code =\n"); // stmt[*i].code->dump(); // fprintf(stderr, "\n\n"); //} - -} - +} void Loop::addKnown(const Relation &cond) { - + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; fprintf(stderr, "Loop::addKnown(), SETTING last_compute_cg_ = NULL\n"); - + int n1 = this->known.n_set(); - + Relation r = copy(cond); int n2 = r.n_set(); - + if (n1 < n2) this->known = Extend_Set(this->known, n2 - n1); else if (n1 > n2) r = Extend_Set(r, n1 - n2); - + this->known = Intersection(this->known, r); } @@ -2540,11 +2542,11 @@ void Loop::removeDependence(int stmt_num_from, int stmt_num_to) { // check for sanity of parameters if (stmt_num_from >= stmt.size()) throw std::invalid_argument( - "invalid statement number " + to_string(stmt_num_from)); + "invalid statement number " + to_string(stmt_num_from)); if (stmt_num_to >= stmt.size()) throw std::invalid_argument( - "invalid statement number " + to_string(stmt_num_to)); - + "invalid statement number " + to_string(stmt_num_to)); + dep.disconnect(stmt_num_from, stmt_num_to); } @@ -2556,14 +2558,14 @@ void Loop::dump() const { if (2 * j < lex.size()) std::cout << lex[2 * j]; switch (stmt[i].loop_level[j].type) { - case LoopLevelOriginal: - std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; - break; - case LoopLevelTile: - std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; - break; - default: - std::cout << "(unknown)"; + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; } std::cout << ' '; } @@ -2579,16 +2581,16 @@ void Loop::dump() const { bool Loop::nonsingular(const std::vector > &T) { if (stmt.size() == 0) return true; - + // check for sanity of parameters for (int i = 0; i < stmt.size(); i++) { if (stmt[i].loop_level.size() != num_dep_dim) throw std::invalid_argument( - "nonsingular loop transformations must be applied to original perfect loop nest"); + "nonsingular loop transformations must be applied to original perfect loop nest"); for (int j = 0; j < stmt[i].loop_level.size(); j++) if (stmt[i].loop_level[j].type != LoopLevelOriginal) throw std::invalid_argument( - "nonsingular loop transformations must be applied to original perfect loop nest"); + "nonsingular loop transformations must be applied to original perfect loop nest"); } if (T.size() != num_dep_dim) throw std::invalid_argument("invalid transformation matrix"); @@ -2619,81 +2621,80 @@ bool Loop::nonsingular(const std::vector > &T) { h.update_coef(mapping.output_var(i), -1); h.update_coef(mapping.input_var(i), 1); } - + // update transformation relations for (int i = 0; i < stmt.size(); i++) stmt[i].xform = Composition(copy(mapping), stmt[i].xform); - + // update dependence graph for (int i = 0; i < dep.vertex.size(); i++) for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { std::vector dvs = j->second; for (int k = 0; k < dvs.size(); k++) { DependenceVector &dv = dvs[k]; switch (dv.type) { - case DEP_W2R: - case DEP_R2W: - case DEP_W2W: - case DEP_R2R: { - std::vector lbounds(num_dep_dim), ubounds( - num_dep_dim); - for (int p = 0; p < num_dep_dim; p++) { - coef_t lb = 0; - coef_t ub = 0; - for (int q = 0; q < num_dep_dim; q++) { - if (T[p][q] > 0) { - if (lb == -posInfinity - || dv.lbounds[q] == -posInfinity) - lb = -posInfinity; - else - lb += T[p][q] * dv.lbounds[q]; - if (ub == posInfinity - || dv.ubounds[q] == posInfinity) - ub = posInfinity; - else - ub += T[p][q] * dv.ubounds[q]; - } else if (T[p][q] < 0) { - if (lb == -posInfinity - || dv.ubounds[q] == posInfinity) - lb = -posInfinity; - else - lb += T[p][q] * dv.ubounds[q]; - if (ub == posInfinity - || dv.lbounds[q] == -posInfinity) - ub = posInfinity; - else - ub += T[p][q] * dv.lbounds[q]; + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector lbounds(num_dep_dim), ubounds( + num_dep_dim); + for (int p = 0; p < num_dep_dim; p++) { + coef_t lb = 0; + coef_t ub = 0; + for (int q = 0; q < num_dep_dim; q++) { + if (T[p][q] > 0) { + if (lb == -posInfinity + || dv.lbounds[q] == -posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.lbounds[q]; + if (ub == posInfinity + || dv.ubounds[q] == posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.ubounds[q]; + } else if (T[p][q] < 0) { + if (lb == -posInfinity + || dv.ubounds[q] == posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.ubounds[q]; + if (ub == posInfinity + || dv.lbounds[q] == -posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.lbounds[q]; + } } + if (T[p].size() == num_dep_dim + 1) { + if (lb != -posInfinity) + lb += T[p][num_dep_dim]; + if (ub != posInfinity) + ub += T[p][num_dep_dim]; + } + lbounds[p] = lb; + ubounds[p] = ub; } - if (T[p].size() == num_dep_dim + 1) { - if (lb != -posInfinity) - lb += T[p][num_dep_dim]; - if (ub != posInfinity) - ub += T[p][num_dep_dim]; - } - lbounds[p] = lb; - ubounds[p] = ub; + dv.lbounds = lbounds; + dv.ubounds = ubounds; + + break; } - dv.lbounds = lbounds; - dv.ubounds = ubounds; - - break; - } - default: - ; + default:; } } j->second = dvs; } - + // set constant loop values std::set active; for (int i = 0; i < stmt.size(); i++) active.insert(i); setLexicalOrder(0, active); - + return true; } @@ -2706,12 +2707,11 @@ bool Loop::is_dependence_valid_based_on_lex_order(int i, int j, if (!dv.is_scalar_dependence) { for (last_dim = 0; last_dim < lex_i.size() && (lex_i[last_dim] == lex_j[last_dim]); - last_dim++) - ; + last_dim++); last_dim = last_dim / 2; if (last_dim == 0) return true; - + for (int i = 0; i < last_dim; i++) { if (dv.lbounds[i] > 0) return true; @@ -2721,9 +2721,9 @@ bool Loop::is_dependence_valid_based_on_lex_order(int i, int j, } if (before) return true; - + return false; - + } // Manu:: reduction operation @@ -2731,7 +2731,7 @@ bool Loop::is_dependence_valid_based_on_lex_order(int i, int j, void Loop::scalar_expand(int stmt_num, const std::vector &levels, std::string arrName, int memory_type, int padding_alignment, int assign_then_accumulate, int padding_stride) { - + //std::cout << "In scalar_expand function: " << stmt_num << ", " << arrName << "\n"; //std::cout.flush(); @@ -2744,10 +2744,10 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, // check for sanity of parameters bool found_non_constant_size_dimension = false; - + if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument( - "invalid statement number " + to_string(stmt_num)); + "invalid statement number " + to_string(stmt_num)); //Anand: adding check for privatized levels //if (arrName != "RHS") // throw std::invalid_argument( @@ -2755,34 +2755,33 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, for (int i = 0; i < levels.size(); i++) { if (levels[i] <= 0 || levels[i] > stmt[stmt_num].loop_level.size()) throw std::invalid_argument( - "1invalid loop level " + to_string(levels[i])); - + "1invalid loop level " + to_string(levels[i])); + if (i > 0) { if (levels[i] < levels[i - 1]) throw std::invalid_argument( - "loop levels must be in ascending order"); + "loop levels must be in ascending order"); } } //end --adding check for privatized levels - + delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; fprintf(stderr, "Loop::scalar_expand(), SETTING last_compute_cg_ = NULL\n"); - fprintf(stderr, "\nloop.cc finding array accesses in stmt %d of the code\n",stmt_num ); + fprintf(stderr, "\nloop.cc finding array accesses in stmt %d of the code\n", stmt_num); std::vector access = ir->FindArrayRef(stmt[stmt_num].code); - fprintf(stderr, "loop.cc L2726 %d access\n", access.size()); + fprintf(stderr, "loop.cc L2726 %d access\n", access.size()); IR_ArraySymbol *sym = NULL; - fprintf(stderr, "arrName %s\n", arrName.c_str()); - if (arrName == "RHS") { - fprintf(stderr, "sym RHS\n"); + fprintf(stderr, "arrName %s\n", arrName.c_str()); + if (arrName == "RHS") { + fprintf(stderr, "sym RHS\n"); sym = access[0]->symbol(); - } - else { - fprintf(stderr, "looking for array %s in access\n", arrName.c_str()); + } else { + fprintf(stderr, "looking for array %s in access\n", arrName.c_str()); for (int k = 0; k < access.size(); k++) { // BUH //fprintf(stderr, "access[%d] = %s ", k, access[k]->getTypeString()); access[k]->print(0,stderr); fprintf(stderr, "\n"); @@ -2791,34 +2790,34 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, //fprintf(stderr, "comparing %s to %s\n", name.c_str(), arrName.c_str()); if (access[k]->symbol()->name() == arrName) { - fprintf(stderr, "found it sym access[ k=%d ]\n", k); + fprintf(stderr, "found it sym access[ k=%d ]\n", k); sym = access[k]->symbol(); - } + } } } - if (!sym) fprintf(stderr, "DIDN'T FIND IT\n"); - fprintf(stderr, "sym %p\n", sym); + if (!sym) fprintf(stderr, "DIDN'T FIND IT\n"); + fprintf(stderr, "sym %p\n", sym); // collect array references by name std::vector lex = getLexicalOrder(stmt_num); int dim = 2 * levels[levels.size() - 1] - 1; std::set same_loop = getStatements(lex, dim - 1); - + //Anand: shifting this down // assign_const(stmt[newStmt_num].xform, 2*level+1, 1); - + // std::cout << " before temp array name \n "; // create a temporary variable IR_Symbol *tmp_sym; - + // get the loop upperbound, that would be the size of the temp array. omega::coef_t lb[levels.size()], ub[levels.size()], size[levels.size()]; - + //Anand Adding apply xform so that tiled loop bounds are reflected fprintf(stderr, "Adding apply xform so that tiled loop bounds are reflected\n"); apply_xform(same_loop); - fprintf(stderr, "loop.cc, back from apply_xform()\n"); - + fprintf(stderr, "loop.cc, back from apply_xform()\n"); + //Anand commenting out the folowing 4 lines /* copy(stmt[stmt_num].IS).query_variable_bounds( copy(stmt[stmt_num].IS).set_var(level), lb, ub); @@ -2890,32 +2889,32 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, std::vector size_int; Relation xform = copy(stmt[stmt_num].xform); for (int i = 0; i < n_dim; i++) { - + dim = 2 * levels[i] - 1; //Anand: Commenting out the lines below: not required // if (i != 0) // reduced_copy_is = Project(reduced_copy_is, level - 1 + i, Set_Var); Relation bound = get_loop_bound(copy(reduced_copy_is), levels[i] - 1); - + // extract stride std::pair result = find_simplest_stride(bound, bound.set_var(levels[i])); if (result.second != NULL) index_stride[i] = abs(result.first.get_coef(result.second)) - / gcd(abs(result.first.get_coef(result.second)), - abs( - result.first.get_coef( - bound.set_var(levels[i])))); + / gcd(abs(result.first.get_coef(result.second)), + abs( + result.first.get_coef( + bound.set_var(levels[i])))); else index_stride[i] = 1; // std::cout << "simplest_stride 11:: " << index_stride[i] << "\n"; - + // check if this array index requires loop Conjunct *c = bound.query_DNF()->single_conjunct(); for (EQ_Iterator ei(c->EQs()); ei; ei++) { if ((*ei).has_wildcards()) continue; - + int coef = (*ei).get_coef(bound.set_var(levels[i])); if (coef != 0) { int sign = 1; @@ -2923,59 +2922,58 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, coef = -coef; sign = -1; } - + CG_outputRepr *op = NULL; for (Constr_Vars_Iter ci(*ei); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - if ((*ci).var != bound.set_var(levels[i])) + case Input_Var: { + if ((*ci).var != bound.set_var(levels[i])) + if ((*ci).coef * sign == 1) + op = ocg1->CreateMinus(op, + ocg1->CreateIdent((*ci).var->name())); + else if ((*ci).coef * sign == -1) + op = ocg1->CreatePlus(op, + ocg1->CreateIdent((*ci).var->name())); + else if ((*ci).coef * sign > 1) { + op = ocg1->CreateMinus(op, + ocg1->CreateTimes( + ocg1->CreateInt( + abs((*ci).coef)), + ocg1->CreateIdent( + (*ci).var->name()))); + } else + // (*ci).coef*sign < -1 + op = ocg1->CreatePlus(op, + ocg1->CreateTimes( + ocg1->CreateInt( + abs((*ci).coef)), + ocg1->CreateIdent( + (*ci).var->name()))); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); if ((*ci).coef * sign == 1) op = ocg1->CreateMinus(op, - ocg1->CreateIdent((*ci).var->name())); + ocg1->CreateIdent(g->base_name())); else if ((*ci).coef * sign == -1) op = ocg1->CreatePlus(op, - ocg1->CreateIdent((*ci).var->name())); - else if ((*ci).coef * sign > 1) { + ocg1->CreateIdent(g->base_name())); + else if ((*ci).coef * sign > 1) op = ocg1->CreateMinus(op, ocg1->CreateTimes( - ocg1->CreateInt( - abs((*ci).coef)), - ocg1->CreateIdent( - (*ci).var->name()))); - } + ocg1->CreateInt(abs((*ci).coef)), + ocg1->CreateIdent(g->base_name()))); else // (*ci).coef*sign < -1 op = ocg1->CreatePlus(op, ocg1->CreateTimes( - ocg1->CreateInt( - abs((*ci).coef)), - ocg1->CreateIdent( - (*ci).var->name()))); - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - if ((*ci).coef * sign == 1) - op = ocg1->CreateMinus(op, - ocg1->CreateIdent(g->base_name())); - else if ((*ci).coef * sign == -1) - op = ocg1->CreatePlus(op, - ocg1->CreateIdent(g->base_name())); - else if ((*ci).coef * sign > 1) - op = ocg1->CreateMinus(op, - ocg1->CreateTimes( - ocg1->CreateInt(abs((*ci).coef)), - ocg1->CreateIdent(g->base_name()))); - else - // (*ci).coef*sign < -1 - op = ocg1->CreatePlus(op, - ocg1->CreateTimes( - ocg1->CreateInt(abs((*ci).coef)), - ocg1->CreateIdent(g->base_name()))); - break; - } - default: - throw loop_error("unsupported array index expression"); + ocg1->CreateInt(abs((*ci).coef)), + ocg1->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("unsupported array index expression"); } } if ((*ei).get_const() != 0) @@ -2983,7 +2981,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, ocg1->CreateInt(-sign * ((*ei).get_const()))); if (coef != 1) op = ocg1->CreateIntegerFloor(op, ocg1->CreateInt(coef)); - + index_lb[i] = op; is_index_eq[i] = true; break; @@ -2991,7 +2989,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, } if (is_index_eq[i]) continue; - + // separate lower and upper bounds std::vector lb_list, ub_list; std::set excluded_floor_vars; @@ -3006,23 +3004,22 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, excluded_floor_vars).first) { clean_bound = false; break; - } - else - h= find_floor_definition(bound, (*cvi).var, - excluded_floor_vars).second; - + } else + h = find_floor_definition(bound, (*cvi).var, + excluded_floor_vars).second; + if (!clean_bound) continue; - else{ + else { if (coef > 0) lb_list.push_back(h); else if (coef < 0) ub_list.push_back(h); - continue; - } - + continue; + } + } - + if (coef > 0) lb_list.push_back(*gi); else if (coef < 0) @@ -3030,7 +3027,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, } if (lb_list.size() == 0 || ub_list.size() == 0) throw loop_error("failed to calcuate array footprint size"); - + // build lower bound representation std::vector lb_repr_list; /* for (int j = 0; j < lb_list.size(); j++){ @@ -3045,7 +3042,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, index_lb[i] = ocg1->CreateInvoke("max", lb_repr_list); else if (lb_repr_list.size() == 1) index_lb[i] = lb_repr_list[0]; - + // build temporary array size representation { Relation cal(copy_is.n_set(), 1); @@ -3053,65 +3050,65 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, for (int j = 0; j < ub_list.size(); j++) for (int k = 0; k < lb_list.size(); k++) { GEQ_Handle h = f_root->add_GEQ(); - + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - h.update_coef(cal.input_var(pos), (*ci).coef); - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = cal.get_local(g); - else - v = cal.get_local(g, (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error( - "cannot calculate temporay array size statically"); + case Input_Var: { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error( + "cannot calculate temporay array size statically"); } } h.update_const(ub_list[j].get_const()); - + for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - h.update_coef(cal.input_var(pos), (*ci).coef); - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = cal.get_local(g); - else - v = cal.get_local(g, (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error( - "cannot calculate temporay array size statically"); + case Input_Var: { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error( + "cannot calculate temporay array size statically"); } } h.update_const(lb_list[k].get_const()); - + h.update_const(1); h.update_coef(cal.output_var(1), -1); } - + cal = Restrict_Domain(cal, copy(copy_is)); for (int j = 1; j <= cal.n_inp(); j++) { cal = Project(cal, j, Input_Var); } cal.simplify(); - + // pad temporary array size // TODO: for variable array size, create padding formula //int padding_stride = 0; @@ -3125,50 +3122,50 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, gi++) if ((*gi).is_const(cal.output_var(1))) { coef_t size = (*gi).get_const() - / (-(*gi).get_coef(cal.output_var(1))); - + / (-(*gi).get_coef(cal.output_var(1))); + if (padding_alignment > 1 && i == n_dim - 1) { // align to boundary for data packing int residue = size % padding_alignment; if (residue) size = size + padding_alignment - residue; } - + index_sz.push_back( - std::make_pair(i, ocg1->CreateInt(size))); + std::make_pair(i, ocg1->CreateInt(size))); is_index_bound_const = true; size_int.push_back(size); size_repr.push_back(ocg1->CreateInt(size)); - + // std::cout << "============================== size :: " // << size << "\n"; - + } - + if (!is_index_bound_const) { - + found_non_constant_size_dimension = true; Conjunct *c = bound.query_DNF()->single_conjunct(); for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { int coef = (*gi).get_coef(bound.set_var(levels[i])); if (coef < 0) { - + size_repr.push_back( - ocg1->CreatePlus( - output_upper_bound_repr(ocg1, *gi, - bound.set_var(levels[i]), - bound, - std::vector< - std::pair< - CG_outputRepr *, - int> >( - bound.n_set(), - std::make_pair( - static_cast(NULL), - 0)), - uninterpreted_symbols[stmt_num]), - ocg1->CreateInt(1))); - + ocg1->CreatePlus( + output_upper_bound_repr(ocg1, *gi, + bound.set_var(levels[i]), + bound, + std::vector< + std::pair< + CG_outputRepr *, + int> >( + bound.n_set(), + std::make_pair( + static_cast(NULL), + 0)), + uninterpreted_symbols[stmt_num]), + ocg1->CreateInt(1))); + /*CG_outputRepr *op = NULL; for (Constr_Vars_Iter ci(*gi); ci; ci++) { if ((*ci).var != cal.output_var(1)) { @@ -3238,13 +3235,13 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, } } //size[i] = ub[i]; - + } ///////////////////////////////////////////////////////////////////////////////////////////////////// // - + //Anand: Creating IS of new statement - + //for(int l = dim; l < stmt[stmt_num].xform.n_out(); l+=2) //std::cout << "In scalar_expand function 3: " << stmt_num << ", " << arrName << "\n"; //std::cout.flush(); @@ -3256,16 +3253,16 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, //} //fprintf(stderr, "\n"); - + shiftLexicalOrder(lex, dim + 1, 1); Statement s = stmt[stmt_num]; s.ir_stmt_node = NULL; int newStmt_num = stmt.size(); - fprintf(stderr, "loop.cc L3249 adding stmt %d\n", stmt.size()); + fprintf(stderr, "loop.cc L3249 adding stmt %d\n", stmt.size()); stmt.push_back(s); - - fprintf(stderr, "uninterpreted_symbols.push_back() newStmt_num %d\n", newStmt_num); + + fprintf(stderr, "uninterpreted_symbols.push_back() newStmt_num %d\n", newStmt_num); uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]); stmt[newStmt_num].code = stmt[stmt_num].code->clone(); @@ -3286,16 +3283,16 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, //assign_const(stmt[newStmt_num].xform, stmt[stmt_num].xform.n_out(), 1);//Anand: change from 2*level + 1 to stmt[stmt_num].xform.size() //Anand-End creating IS of new statement - - CG_outputRepr * tmpArrSz; + + CG_outputRepr *tmpArrSz; CG_outputBuilder *ocg = ir->builder(); - + //for(int k =0; k < levels.size(); k++ ) // size_repr.push_back(ocg->CreateInt(size[k]));//Anand: copying apply_xform functionality to prevent IS modification //due to side effects with uninterpreted function symbols and failures in omega - + //int n = stmt[stmt_num].loop_level.size(); - + /*Relation mapping(2 * n + 1, n); F_And *f_root = mapping.add_and(); for (int j = 1; j <= n; j++) { @@ -3318,64 +3315,68 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, Relation size_ = omega::Range(Restrict_Domain(mapping, copy(stmt[stmt_num].IS))); size_.simplify(); */ - + //Anand -commenting out tmp sym creation as symbol may have more than one dimension //tmp_sym = ir->CreateArraySymbol(tmpArrSz, sym); std::vector lhs_index; CG_outputRepr *arr_ref_repr; arr_ref_repr = ocg->CreateIdent( - stmt[stmt_num].IS.set_var(levels[levels.size() - 1])->name()); - + stmt[stmt_num].IS.set_var(levels[levels.size() - 1])->name()); + CG_outputRepr *total_size = size_repr[0]; - fprintf(stderr, "total_size = "); total_size->dump(); fflush(stdout); + fprintf(stderr, "total_size = "); + total_size->dump(); + fflush(stdout); for (int i = 1; i < size_repr.size(); i++) { - fprintf(stderr, "total_size now "); total_size->dump(); fflush(stdout); fprintf(stderr, " times something\n\n"); + fprintf(stderr, "total_size now "); + total_size->dump(); + fflush(stdout); + fprintf(stderr, " times something\n\n"); total_size = ocg->CreateTimes(total_size->clone(), size_repr[i]->clone()); - + } - + // COMMENT NEEDED //fprintf(stderr, "\nloop.cc COMMENT NEEDED\n"); for (int k = levels.size() - 2; k >= 0; k--) { - CG_outputRepr *temp_repr =ocg->CreateIdent(stmt[stmt_num].IS.set_var(levels[k])->name()); - for (int l = k + 1; l < levels.size(); l++) { + CG_outputRepr *temp_repr = ocg->CreateIdent(stmt[stmt_num].IS.set_var(levels[k])->name()); + for (int l = k + 1; l < levels.size(); l++) { //fprintf(stderr, "\nloop.cc CREATETIMES\n"); temp_repr = ocg->CreateTimes(temp_repr->clone(), size_repr[l]->clone()); } - + //fprintf(stderr, "\nloop.cc CREATEPLUS\n"); arr_ref_repr = ocg->CreatePlus(arr_ref_repr->clone(), temp_repr->clone()); } - + //fprintf(stderr, "loop.cc, about to die\n"); std::vector to_push; to_push.push_back(total_size); - if (!found_non_constant_size_dimension) { - fprintf(stderr, "constant size dimension\n"); + if (!found_non_constant_size_dimension) { + fprintf(stderr, "constant size dimension\n"); tmp_sym = ir->CreateArraySymbol(sym, to_push, memory_type); - } - else { - fprintf(stderr, "NON constant size dimension?\n"); + } else { + fprintf(stderr, "NON constant size dimension?\n"); //tmp_sym = ir->CreatePointerSymbol(sym, to_push); tmp_sym = ir->CreatePointerSymbol(sym, to_push); static_cast(tmp_sym)->set_size(0, total_size); // ?? ptr_variables.push_back(static_cast(tmp_sym)); - fprintf(stderr, "ptr_variables now has %d entries\n", ptr_variables.size()); + fprintf(stderr, "ptr_variables now has %d entries\n", ptr_variables.size()); } - + // add tmp_sym to Loop symtables ?? - + // std::cout << " temp array name == " << tmp_sym->name().c_str() << "\n"; - + // get loop index variable at the given "level" // Relation R = omega::Range(Restrict_Domain(copy(stmt[stmt_num].xform), copy(stmt[stmt_num].IS))); // stmt[stmt_num].IS.print(); @@ -3383,9 +3384,9 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, // std::cout << stmt[stmt_num].IS.n_set() << std::endl; // std::string v = stmt[stmt_num].IS.set_var(level)->name(); // std::cout << "loop index variable is '" << v.c_str() << "'\n"; - + // create a reference for the temporary array - fprintf(stderr, "create a reference for the temporary array\n"); + fprintf(stderr, "create a reference for the temporary array\n"); //std::cout << "In scalar_expand function 4: " << stmt_num << ", " << arrName << "\n"; //std::cout.flush(); @@ -3396,7 +3397,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, //} //fprintf(stderr, "\n"); - + std::vector to_push2; to_push2.push_back(arr_ref_repr); // can have only one entry @@ -3405,21 +3406,20 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, IR_ArrayRef *tmp_array_ref; - IR_PointerArrayRef * tmp_ptr_array_ref; // was IR_PointerArrayref + IR_PointerArrayRef *tmp_ptr_array_ref; // was IR_PointerArrayref if (!found_non_constant_size_dimension) { fprintf(stderr, "constant size\n"); tmp_array_ref = ir->CreateArrayRef( - static_cast(tmp_sym), to_push2); - } - else { - fprintf(stderr, "NON constant size\n"); + static_cast(tmp_sym), to_push2); + } else { + fprintf(stderr, "NON constant size\n"); tmp_ptr_array_ref = ir->CreatePointerArrayRef( - static_cast(tmp_sym), to_push2); + static_cast(tmp_sym), to_push2); // TODO static_cast(tmp_sym), to_push2); } - fflush(stdout); + fflush(stdout); //fprintf(stderr, "\n%d statements\n", stmt.size()); //for (int i=0; i &levels, //std::string stemp; //stemp = tmp_array_ref->name(); //std::cout << "Created array reference --> " << stemp.c_str() << "\n"; - + // get the RHS expression - fprintf(stderr, "get the RHS expression arrName %s\n", arrName.c_str()); + fprintf(stderr, "get the RHS expression arrName %s\n", arrName.c_str()); CG_outputRepr *rhs; if (arrName == "RHS") { rhs = ir->GetRHSExpression(stmt[stmt_num].code); - + std::vector symbols = ir->FindArrayRef(rhs); } std::set sym_names; - + //for (int i = 0; i < symbols.size(); i++) // sym_names.insert(symbols[i]->symbol()->name()); - - fflush(stdout); + + fflush(stdout); //fprintf(stderr, "\nbefore if (arrName == RHS)\n%d statements\n", stmt.size()); // problem is after here //for (int i=0; i &levels, //fprintf(stderr, "\n"); if (arrName == "RHS") { - + std::vector symbols = ir->FindArrayRef(rhs); - + for (int i = 0; i < symbols.size(); i++) sym_names.insert(symbols[i]->symbol()->name()); - } - else { + } else { - fprintf(stderr, "finding array refs in stmt_num %d\n", stmt_num); + fprintf(stderr, "finding array refs in stmt_num %d\n", stmt_num); //fprintf(stderr, "\n%d statements\n", stmt.size()); //for (int i=0; i &levels, //fprintf(stderr, "\n"); std::vector refs = ir->FindArrayRef(stmt[stmt_num].code); - fprintf(stderr, "\n%d refs\n", refs.size()); + fprintf(stderr, "\n%d refs\n", refs.size()); + - bool found = false; for (int j = 0; j < refs.size(); j++) { - CG_outputRepr* to_replace; + CG_outputRepr *to_replace; - fprintf(stderr, "j %d build new assignment statement with temporary array\n",j); + fprintf(stderr, "j %d build new assignment statement with temporary array\n", j); // build new assignment statement with temporary array if (!found_non_constant_size_dimension) { to_replace = tmp_array_ref->convert(); @@ -3494,7 +3493,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, //CR->Dump(); if (refs[j]->name() == arrName) { - fflush(stdout); + fflush(stdout); fprintf(stderr, "loop.cc L353\n"); // problem is after here //fprintf(stderr, "\n%d statements\n", stmt.size()); //for (int i=0; i &levels, // ((CG_chillRepr *)stmt[i].code)->Dump(); //} //fprintf(stderr, "\n"); - + sym_names.insert(refs[j]->symbol()->name()); - + if (!found) { - if (!found_non_constant_size_dimension) { - fprintf(stderr, "constant size2\n"); - omega::CG_outputRepr * t = tmp_array_ref->convert(); - omega::CG_outputRepr * r = refs[j]->convert()->clone(); + if (!found_non_constant_size_dimension) { + fprintf(stderr, "constant size2\n"); + omega::CG_outputRepr *t = tmp_array_ref->convert(); + omega::CG_outputRepr *r = refs[j]->convert()->clone(); //CR = (CG_chillRepr *) t; //CR->Dump(); //CR = (CG_chillRepr *) r; @@ -3518,14 +3517,13 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, //fprintf(stderr, "lhs t %p lhs r %p\n", t, r); stmt[newStmt_num].code = - ir->builder()->CreateAssignment(0, - t, // tmp_array_ref->convert(), - r); // refs[j]->convert()->clone() - } - else { - fprintf(stderr, "NON constant size2\n"); - omega::CG_outputRepr * t = tmp_ptr_array_ref->convert(); // this fails - omega::CG_outputRepr * r = refs[j]->convert()->clone(); + ir->builder()->CreateAssignment(0, + t, // tmp_array_ref->convert(), + r); // refs[j]->convert()->clone() + } else { + fprintf(stderr, "NON constant size2\n"); + omega::CG_outputRepr *t = tmp_ptr_array_ref->convert(); // this fails + omega::CG_outputRepr *r = refs[j]->convert()->clone(); //omega::CG_chillRepr *CR = (omega::CG_chillRepr *) t; //CR->Dump(); @@ -3534,21 +3532,21 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, //fprintf(stderr, "lhs t %p lhs r %p\n", t, r); stmt[newStmt_num].code = - ir->builder()->CreateAssignment(0, - t, // tmp_ptr_array_ref->convert(), - r ); // refs[j]->convert()->clone()); + ir->builder()->CreateAssignment(0, + t, // tmp_ptr_array_ref->convert(), + r); // refs[j]->convert()->clone()); } found = true; - + } - + // refs[j] has no parent? - fprintf(stderr, "replacing refs[%d]\n", j ); + fprintf(stderr, "replacing refs[%d]\n", j); ir->ReplaceExpression(refs[j], to_replace); } - + } - + } //ToDo need to update the dependence graph //Anand adding dependence graph update @@ -3561,10 +3559,10 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, //fprintf(stderr, "\n"); dep.insert(); - + //Anand:Copying Dependence checks from datacopy code, might need to be a separate function/module // in the future - + /*for (int i = 0; i < newStmt_num; i++) { std::vector > D; @@ -3615,41 +3613,41 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, */ //Anand--end dependence check if (arrName == "RHS") { - + // build new assignment statement with temporary array if (!found_non_constant_size_dimension) { if (assign_then_accumulate) { stmt[newStmt_num].code = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), rhs); - fprintf(stderr, "ir->ReplaceRHSExpression( stmt_ num %d )\n", stmt_num); + fprintf(stderr, "ir->ReplaceRHSExpression( stmt_ num %d )\n", stmt_num); ir->ReplaceRHSExpression(stmt[stmt_num].code, tmp_array_ref); } else { CG_outputRepr *temp = tmp_array_ref->convert()->clone(); if (ir->QueryExpOperation(stmt[stmt_num].code) != IR_OP_PLUS_ASSIGNMENT) throw ir_error( - "Statement is not a += accumulation statement"); + "Statement is not a += accumulation statement"); - fprintf(stderr, "replacing in a +=\n"); + fprintf(stderr, "replacing in a +=\n"); stmt[newStmt_num].code = ir->builder()->CreatePlusAssignment(0, temp->clone(), rhs); - - CG_outputRepr * lhs = ir->GetLHSExpression(stmt[stmt_num].code); - + + CG_outputRepr *lhs = ir->GetLHSExpression(stmt[stmt_num].code); + CG_outputRepr *assignment = ir->builder()->CreateAssignment(0, lhs, temp->clone()); Statement init_ = stmt[newStmt_num]; // copy ?? init_.ir_stmt_node = NULL; - + init_.code = stmt[newStmt_num].code->clone(); init_.IS = copy(stmt[newStmt_num].IS); init_.xform = copy(stmt[newStmt_num].xform); init_.has_inspector = false; // ?? Relation mapping(init_.IS.n_set(), init_.IS.n_set()); - + F_And *f_root = mapping.add_and(); - + for (int i = 1; i <= mapping.n_inp(); i++) { EQ_Handle h = f_root->add_EQ(); //if (i < levels[0]) { @@ -3660,7 +3658,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, h.update_const(-1); h.update_coef(mapping.output_var(i), 1); } - + /*else { int j; for (j = 0; j < levels.size(); j++) @@ -3683,18 +3681,18 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, */ //} } - + mapping.simplify(); // match omega input/output variables to variable names in the code for (int j = 1; j <= init_.IS.n_set(); j++) mapping.name_output_var(j, init_.IS.set_var(j)->name()); for (int j = 1; j <= init_.IS.n_set(); j++) mapping.name_input_var(j, init_.IS.set_var(j)->name()); - + mapping.setup_names(); - + init_.IS = omega::Range( - omega::Restrict_Domain(mapping, init_.IS)); + omega::Restrict_Domain(mapping, init_.IS)); std::vector lex = getLexicalOrder(newStmt_num); int dim = 2 * levels[0] - 1; //init_.IS.print(); @@ -3704,11 +3702,11 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, shiftLexicalOrder(lex, dim + 1, 1); init_.reduction = stmt[newStmt_num].reduction; init_.reductionOp = stmt[newStmt_num].reductionOp; - + init_.code = ir->builder()->CreateAssignment(0, temp->clone(), ir->builder()->CreateInt(0)); - fprintf(stderr, "loop.cc L3693 adding stmt %d\n", stmt.size()); + fprintf(stderr, "loop.cc L3693 adding stmt %d\n", stmt.size()); stmt.push_back(init_); uninterpreted_symbols.push_back(uninterpreted_symbols[newStmt_num]); @@ -3726,47 +3724,45 @@ void Loop::scalar_expand(int stmt_num, const std::vector &levels, if (ir->QueryExpOperation(stmt[stmt_num].code) != IR_OP_PLUS_ASSIGNMENT) throw ir_error( - "Statement is not a += accumulation statement"); + "Statement is not a += accumulation statement"); stmt[newStmt_num].code = ir->builder()->CreatePlusAssignment(0, temp->clone(), rhs); - - CG_outputRepr * lhs = ir->GetLHSExpression(stmt[stmt_num].code); - + + CG_outputRepr *lhs = ir->GetLHSExpression(stmt[stmt_num].code); + CG_outputRepr *assignment = ir->builder()->CreateAssignment(0, lhs, temp->clone()); - + stmt[stmt_num].code = assignment; } // call function to replace rhs with temporary array } } - + //std::cout << "End of scalar_expand function!! \n"; - + // if(arrName == "RHS"){ DependenceVector dv; std::vector E; dv.lbounds = std::vector(4); dv.ubounds = std::vector(4); dv.type = DEP_W2R; - + for (int k = 0; k < 4; k++) { dv.lbounds[k] = 0; dv.ubounds[k] = 0; - + } - + //std::vector array_refs = ir->FindArrayRef(stmt[newStmt_num].code); dv.sym = tmp_sym->clone(); - + E.push_back(dv); - + dep.connect(newStmt_num, stmt_num, E); // } - -} - +} std::pair createCSRstyleISandXFORM(CG_outputBuilder *ocg, @@ -3775,98 +3771,98 @@ std::pair createCSRstyleISandXFORM(CG_outputBuilder *ocg, std::map > &uninterpreted_symbols, std::map > &uninterpreted_symbols_string, Loop *this_loop) { - + Relation IS(outer_loop_bounds.size() + 1 + zero_loop_bounds.size()); Relation XFORM(outer_loop_bounds.size() + 1 + zero_loop_bounds.size(), 2 * (outer_loop_bounds.size() + 1 + zero_loop_bounds.size()) + 1); - - F_And * f_r_ = IS.add_and(); - F_And * f_root = XFORM.add_and(); - + + F_And *f_r_ = IS.add_and(); + F_And *f_root = XFORM.add_and(); + if (outer_loop_bounds.size() > 0) { for (int it = 0; it < IS.n_set(); it++) { IS.name_set_var(it + 1, const_cast(outer_loop_bounds[0]).set_var(it + 1)->name()); XFORM.name_input_var(it + 1, const_cast(outer_loop_bounds[0]).set_var(it + 1)->name()); - + } } else if (zero_loop_bounds.size() > 0) { for (int it = 0; it < IS.n_set(); it++) { IS.name_set_var(it + 1, const_cast(zero_loop_bounds.begin()->second).set_var( - it + 1)->name()); + it + 1)->name()); XFORM.name_input_var(it + 1, const_cast(zero_loop_bounds.begin()->second).set_var( - it + 1)->name()); - + it + 1)->name()); + } - + } - + for (int i = 0; i < outer_loop_bounds.size(); i++) IS = replace_set_var_as_another_set_var(IS, outer_loop_bounds[i], i + 1, i + 1); - + int count = 1; for (std::map::iterator i = zero_loop_bounds.begin(); i != zero_loop_bounds.end(); i++, count++) IS = replace_set_var_as_another_set_var(IS, i->second, outer_loop_bounds.size() + 1 + count, i->first); - + if (outer_loop_bounds.size() > 0) { Free_Var_Decl *lb = new Free_Var_Decl(index_name + "_", 1); // index_ Variable_ID csr_lb = IS.get_local(lb, Input_Tuple); - + Free_Var_Decl *ub = new Free_Var_Decl(index_name + "__", 1); // index__ Variable_ID csr_ub = IS.get_local(ub, Input_Tuple); - + //lower bound - - F_And * f_r = IS.and_with_and(); + + F_And *f_r = IS.and_with_and(); GEQ_Handle lower_bound = f_r->add_GEQ(); lower_bound.update_coef(csr_lb, -1); lower_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), 1); - + //upper bound - + GEQ_Handle upper_bound = f_r->add_GEQ(); upper_bound.update_coef(csr_ub, 1); upper_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), -1); upper_bound.update_const(-1); - + omega::CG_stringBuilder *ocgs = new CG_stringBuilder; - + std::vector reprs; std::vector reprs2; - + std::vector reprs3; std::vector reprs4; - + reprs.push_back( - ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name())); + ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name())); reprs2.push_back( - ocgs->CreateIdent( - IS.set_var(outer_loop_bounds.size())->name())); + ocgs->CreateIdent( + IS.set_var(outer_loop_bounds.size())->name())); uninterpreted_symbols.insert( - std::pair >( - index_name + "_", reprs)); + std::pair >( + index_name + "_", reprs)); uninterpreted_symbols_string.insert( - std::pair >( - index_name + "_", reprs2)); - + std::pair >( + index_name + "_", reprs2)); + std::string arg = "(" + IS.set_var(outer_loop_bounds.size())->name() - + ")"; - std::vector< std::string > argvec; - argvec.push_back( arg ); - + + ")"; + std::vector argvec; + argvec.push_back(arg); + CG_outputRepr *repr = ocg->CreateArrayRefExpression(index_name, ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name())); - + //fprintf(stderr, "( VECTOR _)\n"); //fprintf(stderr, "loop.cc calling CreateDefineMacro( %s, argvec, repr)\n", (index_name + "_").c_str()); this_loop->ir->CreateDefineMacro(index_name + "_", argvec, repr); - + Relation known_(copy(IS).n_set()); known_.copy_names(copy(IS)); known_.setup_names(); @@ -3878,80 +3874,81 @@ std::pair createCSRstyleISandXFORM(CG_outputBuilder *ocg, g.update_coef(index_lb, -1); g.update_const(-1); this_loop->addKnown(known_); - + reprs3.push_back( - - ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name())); + + ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name())); reprs4.push_back( - - ocgs->CreateIdent(IS.set_var(outer_loop_bounds.size())->name())); - + + ocgs->CreateIdent(IS.set_var(outer_loop_bounds.size())->name())); + CG_outputRepr *repr2 = ocg->CreateArrayRefExpression(index_name, ocg->CreatePlus( - ocg->CreateIdent( - IS.set_var(outer_loop_bounds.size())->name()), - ocg->CreateInt(1))); - + ocg->CreateIdent( + IS.set_var(outer_loop_bounds.size())->name()), + ocg->CreateInt(1))); + //fprintf(stderr, "( VECTOR __)\n"); //fprintf(stderr, "loop.cc calling CreateDefineMacro( %s, argvec, repr)\n", (index_name + "__").c_str()); - + this_loop->ir->CreateDefineMacro(index_name + "__", argvec, repr2); - + uninterpreted_symbols.insert( - std::pair >( - index_name + "__", reprs3)); + std::pair >( + index_name + "__", reprs3)); uninterpreted_symbols_string.insert( - std::pair >( - index_name + "__", reprs4)); + std::pair >( + index_name + "__", reprs4)); } else { Free_Var_Decl *ub = new Free_Var_Decl(index_name); Variable_ID csr_ub = IS.get_local(ub); - F_And * f_r = IS.and_with_and(); + F_And *f_r = IS.and_with_and(); GEQ_Handle upper_bound = f_r->add_GEQ(); upper_bound.update_coef(csr_ub, 1); upper_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), -1); upper_bound.update_const(-1); - + GEQ_Handle lower_bound = f_r->add_GEQ(); lower_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), 1); - + } - + for (int j = 1; j <= XFORM.n_inp(); j++) { omega::EQ_Handle h = f_root->add_EQ(); h.update_coef(XFORM.output_var(2 * j), 1); h.update_coef(XFORM.input_var(j), -1); } - + for (int j = 1; j <= XFORM.n_out(); j += 2) { omega::EQ_Handle h = f_root->add_EQ(); h.update_coef(XFORM.output_var(j), 1); } - + if (_DEBUG_) { IS.print(); XFORM.print(); - + } - + return std::pair(IS, XFORM); - + } std::pair construct_reduced_IS_And_XFORM(IR_Code *ir, - const Relation &is, const Relation &xform, const std::vector loops, + const Relation &is, const Relation &xform, + const std::vector loops, std::vector &lex_order, Relation &known, std::map > &uninterpreted_symbols) { - + Relation IS(loops.size()); Relation XFORM(loops.size(), 2 * loops.size() + 1); int count_ = 1; std::map pos_mapping; - + int n = is.n_set(); Relation is_and_known = Intersection(copy(is), Extend_Set(copy(known), n - known.n_set())); - + for (int it = 0; it < loops.size(); it++, count_++) { IS.name_set_var(count_, const_cast(is).set_var(loops[it])->name()); @@ -3963,11 +3960,11 @@ std::pair construct_reduced_IS_And_XFORM(IR_Code *ir, const_cast(xform).output_var((loops[it]) * 2 - 1)->name()); pos_mapping.insert(std::pair(count_, loops[it])); } - + XFORM.name_output_var(2 * loops.size() + 1, const_cast(xform).output_var(is.n_set() * 2 + 1)->name()); - - F_And * f_r = IS.add_and(); + + F_And *f_r = IS.add_and(); for (std::map::iterator it = pos_mapping.begin(); it != pos_mapping.end(); it++) IS = replace_set_var_as_another_set_var(IS, is_and_known, it->first, @@ -4012,9 +4009,9 @@ std::pair construct_reduced_IS_And_XFORM(IR_Code *ir, CHILL_DEBUG_END F_And *f_root = XFORM.add_and(); - + count_ = 1; - + for (int j = 1; j <= loops.size(); j++) { omega::EQ_Handle h = f_root->add_EQ(); h.update_coef(XFORM.output_var(2 * j), 1); @@ -4025,7 +4022,7 @@ std::pair construct_reduced_IS_And_XFORM(IR_Code *ir, h.update_coef(XFORM.output_var(count_ * 2 - 1), 1); h.update_const(-lex_order[count_ * 2 - 2]); } - + omega::EQ_Handle h = f_root->add_EQ(); h.update_coef(XFORM.output_var((loops.size()) * 2 + 1), 1); h.update_const(-lex_order[xform.n_out() - 1]); @@ -4035,34 +4032,34 @@ std::pair construct_reduced_IS_And_XFORM(IR_Code *ir, IS.print(); XFORM.print(); CHILL_DEBUG_END - + return std::pair(IS, XFORM); - + } std::set inspect_repr_for_scalars(IR_Code *ir, - CG_outputRepr * repr, std::set ignore) { - + CG_outputRepr *repr, std::set ignore) { + std::vector refs = ir->FindScalarRef(repr); std::set loop_vars; - + for (int i = 0; i < refs.size(); i++) if (ignore.find(refs[i]->name()) == ignore.end()) loop_vars.insert(refs[i]->name()); - + return loop_vars; - + } std::set inspect_loop_bounds(IR_Code *ir, const Relation &R, int pos, std::map > &uninterpreted_symbols) { - + if (!R.is_set()) throw loop_error("Input R has to be a set not a relation!"); - + std::set vars; - + std::vector refs; Variable_ID v = const_cast(R).set_var(pos); for (DNF_Iterator di(const_cast(R).query_DNF()); di; di++) { @@ -4071,48 +4068,48 @@ std::set inspect_loop_bounds(IR_Code *ir, const Relation &R, for (Constr_Vars_Iter cvi(*gi); cvi; cvi++) { Variable_ID v = cvi.curr_var(); switch (v->kind()) { - - case Global_Var: { - Global_Var_ID g = v->get_global_var(); - Variable_ID v2; - if (g->arity() > 0) { - - std::string s = g->base_name(); - std::copy( - uninterpreted_symbols.find(s)->second.begin(), - uninterpreted_symbols.find(s)->second.end(), - back_inserter(refs)); - + + case Global_Var: { + Global_Var_ID g = v->get_global_var(); + Variable_ID v2; + if (g->arity() > 0) { + + std::string s = g->base_name(); + std::copy( + uninterpreted_symbols.find(s)->second.begin(), + uninterpreted_symbols.find(s)->second.end(), + back_inserter(refs)); + + } + + break; } - - break; - } - default: - break; + default: + break; } } - + } } } - + for (int i = 0; i < refs.size(); i++) { std::vector refs_ = ir->FindScalarRef(refs[i]); - + for (int j = 0; j < refs_.size(); j++) vars.insert(refs_[j]->name()); - + } return vars; } -CG_outputRepr * create_counting_loop_body(IR_Code *ir, const Relation &R, - int pos, CG_outputRepr * count, - std::map > &uninterpreted_symbols) { - +CG_outputRepr *create_counting_loop_body(IR_Code *ir, const Relation &R, + int pos, CG_outputRepr *count, + std::map > &uninterpreted_symbols) { + if (!R.is_set()) throw loop_error("Input R has to be a set not a relation!"); - + CG_outputRepr *ub, *lb; ub = NULL; lb = NULL; @@ -4126,42 +4123,42 @@ CG_outputRepr * create_counting_loop_body(IR_Code *ir, const Relation &R, for (Constr_Vars_Iter cvi(*gi); cvi; cvi++) { Variable_ID v = cvi.curr_var(); switch (v->kind()) { - - case Global_Var: { - Global_Var_ID g = v->get_global_var(); - Variable_ID v2; - if (g->arity() > 0) { - - std::string s = g->base_name(); - - if ((*gi).get_coef(v) > 0) { - if (ub != NULL) - throw ir_error( - "bound expression too complex!"); - - ub = ir->builder()->CreateInvoke(s, - uninterpreted_symbols.find(s)->second); - //ub = ir->builder()->CreateMinus(ub->clone(), ir->builder()->CreateInt(-(*gi).get_const())); - same_ge_1 = true; - - } else { - if (lb != NULL) - throw ir_error( - "bound expression too complex!"); - lb = ir->builder()->CreateInvoke(s, - uninterpreted_symbols.find(s)->second); - same_ge_2 = true; - + + case Global_Var: { + Global_Var_ID g = v->get_global_var(); + Variable_ID v2; + if (g->arity() > 0) { + + std::string s = g->base_name(); + + if ((*gi).get_coef(v) > 0) { + if (ub != NULL) + throw ir_error( + "bound expression too complex!"); + + ub = ir->builder()->CreateInvoke(s, + uninterpreted_symbols.find(s)->second); + //ub = ir->builder()->CreateMinus(ub->clone(), ir->builder()->CreateInt(-(*gi).get_const())); + same_ge_1 = true; + + } else { + if (lb != NULL) + throw ir_error( + "bound expression too complex!"); + lb = ir->builder()->CreateInvoke(s, + uninterpreted_symbols.find(s)->second); + same_ge_2 = true; + + } } + + break; } - - break; - } - default: - break; + default: + break; } } - + if (same_ge_1 && same_ge_2) lb = ir->builder()->CreatePlus(lb->clone(), ir->builder()->CreateInt(-(*gi).get_const())); @@ -4173,181 +4170,179 @@ CG_outputRepr * create_counting_loop_body(IR_Code *ir, const Relation &R, ir->builder()->CreateInt(-(*gi).get_const())); } } - + } - + return ir->builder()->CreatePlusAssignment(0, count, ir->builder()->CreatePlus( - ir->builder()->CreateMinus(ub->clone(), lb->clone()), - ir->builder()->CreateInt(1))); + ir->builder()->CreateMinus(ub->clone(), lb->clone()), + ir->builder()->CreateInt(1))); } - std::map > recurse_on_exp_for_arrays( - IR_Code * ir, CG_outputRepr * exp) { - + IR_Code *ir, CG_outputRepr *exp) { + std::map > arr_index_to_ref; switch (ir->QueryExpOperation(exp)) { - - case IR_OP_ARRAY_VARIABLE: { - IR_ArrayRef *ref = dynamic_cast(ir->Repr2Ref(exp)); - IR_PointerArrayRef *ref_ = - dynamic_cast(ir->Repr2Ref(exp)); - if (ref == NULL && ref_ == NULL) - throw loop_error("Array symbol unidentifiable!"); - - if (ref != NULL) { - std::vector s0; - - for (int i = 0; i < ref->n_dim(); i++) { - CG_outputRepr * index = ref->index(i); - std::map > a0 = - recurse_on_exp_for_arrays(ir, index); - std::vector s; - for (std::map >::iterator j = - a0.begin(); j != a0.end(); j++) { - if (j->second.size() != 1 && (j->second)[0] != "") - throw loop_error( - "indirect array references not allowed in guard!"); - s.push_back(j->first); + + case IR_OP_ARRAY_VARIABLE: { + IR_ArrayRef *ref = dynamic_cast(ir->Repr2Ref(exp)); + IR_PointerArrayRef *ref_ = + dynamic_cast(ir->Repr2Ref(exp)); + if (ref == NULL && ref_ == NULL) + throw loop_error("Array symbol unidentifiable!"); + + if (ref != NULL) { + std::vector s0; + + for (int i = 0; i < ref->n_dim(); i++) { + CG_outputRepr *index = ref->index(i); + std::map > a0 = + recurse_on_exp_for_arrays(ir, index); + std::vector s; + for (std::map >::iterator j = + a0.begin(); j != a0.end(); j++) { + if (j->second.size() != 1 && (j->second)[0] != "") + throw loop_error( + "indirect array references not allowed in guard!"); + s.push_back(j->first); + } + std::copy(s.begin(), s.end(), back_inserter(s0)); } - std::copy(s.begin(), s.end(), back_inserter(s0)); - } - arr_index_to_ref.insert( - std::pair >( - ref->name(), s0)); - } else { - std::vector s0; - for (int i = 0; i < ref_->n_dim(); i++) { - CG_outputRepr * index = ref_->index(i); - std::map > a0 = - recurse_on_exp_for_arrays(ir, index); - std::vector s; - for (std::map >::iterator j = - a0.begin(); j != a0.end(); j++) { - if (j->second.size() != 1 && (j->second)[0] != "") - throw loop_error( - "indirect array references not allowed in guard!"); - s.push_back(j->first); + arr_index_to_ref.insert( + std::pair >( + ref->name(), s0)); + } else { + std::vector s0; + for (int i = 0; i < ref_->n_dim(); i++) { + CG_outputRepr *index = ref_->index(i); + std::map > a0 = + recurse_on_exp_for_arrays(ir, index); + std::vector s; + for (std::map >::iterator j = + a0.begin(); j != a0.end(); j++) { + if (j->second.size() != 1 && (j->second)[0] != "") + throw loop_error( + "indirect array references not allowed in guard!"); + s.push_back(j->first); + } + std::copy(s.begin(), s.end(), back_inserter(s0)); } - std::copy(s.begin(), s.end(), back_inserter(s0)); + arr_index_to_ref.insert( + std::pair >( + ref_->name(), s0)); } - arr_index_to_ref.insert( - std::pair >( - ref_->name(), s0)); + break; } - break; - } - case IR_OP_PLUS: - case IR_OP_MINUS: - case IR_OP_MULTIPLY: - case IR_OP_DIVIDE: { - std::vector v = ir->QueryExpOperand(exp); - std::map > a0 = - recurse_on_exp_for_arrays(ir, v[0]); - std::map > a1 = - recurse_on_exp_for_arrays(ir, v[1]); - arr_index_to_ref.insert(a0.begin(), a0.end()); - arr_index_to_ref.insert(a1.begin(), a1.end()); - break; - - } - case IR_OP_POSITIVE: - case IR_OP_NEGATIVE: { - std::vector v = ir->QueryExpOperand(exp); - std::map > a0 = - recurse_on_exp_for_arrays(ir, v[0]); - - arr_index_to_ref.insert(a0.begin(), a0.end()); - break; - - } - case IR_OP_VARIABLE: { - std::vector v = ir->QueryExpOperand(exp); - IR_ScalarRef *ref = static_cast(ir->Repr2Ref(v[0])); - - std::string s = ref->name(); - std::vector to_insert; - to_insert.push_back(""); - arr_index_to_ref.insert( - std::pair >(s, - to_insert)); - break; - } - case IR_OP_CONSTANT: - break; - - default: { - std::vector v = ir->QueryExpOperand(exp); - - for (int i = 0; i < v.size(); i++) { + case IR_OP_PLUS: + case IR_OP_MINUS: + case IR_OP_MULTIPLY: + case IR_OP_DIVIDE: { + std::vector v = ir->QueryExpOperand(exp); std::map > a0 = - recurse_on_exp_for_arrays(ir, v[i]); - + recurse_on_exp_for_arrays(ir, v[0]); + std::map > a1 = + recurse_on_exp_for_arrays(ir, v[1]); arr_index_to_ref.insert(a0.begin(), a0.end()); + arr_index_to_ref.insert(a1.begin(), a1.end()); + break; + + } + case IR_OP_POSITIVE: + case IR_OP_NEGATIVE: { + std::vector v = ir->QueryExpOperand(exp); + std::map > a0 = + recurse_on_exp_for_arrays(ir, v[0]); + + arr_index_to_ref.insert(a0.begin(), a0.end()); + break; + + } + case IR_OP_VARIABLE: { + std::vector v = ir->QueryExpOperand(exp); + IR_ScalarRef *ref = static_cast(ir->Repr2Ref(v[0])); + + std::string s = ref->name(); + std::vector to_insert; + to_insert.push_back(""); + arr_index_to_ref.insert( + std::pair >(s, + to_insert)); + break; + } + case IR_OP_CONSTANT: + break; + + default: { + std::vector v = ir->QueryExpOperand(exp); + + for (int i = 0; i < v.size(); i++) { + std::map > a0 = + recurse_on_exp_for_arrays(ir, v[i]); + + arr_index_to_ref.insert(a0.begin(), a0.end()); + } + + break; } - - break; - } } return arr_index_to_ref; } - std::vector find_guards(IR_Code *ir, IR_Control *code) { CHILL_DEBUG_PRINT("find_guards()\n"); std::vector guards; switch (code->type()) { - case IR_CONTROL_IF: { - CHILL_DEBUG_PRINT("find_guards() it's an if\n"); - CG_outputRepr *cond = dynamic_cast(code)->condition(); - - std::vector then_body; - std::vector else_body; - IR_Block *ORTB = dynamic_cast(code)->then_body(); - if (ORTB != NULL) { - CHILL_DEBUG_PRINT("recursing on then\n"); - then_body = find_guards(ir, ORTB); - //dynamic_cast(code)->then_body()); - } - if (dynamic_cast(code)->else_body() != NULL) { - CHILL_DEBUG_PRINT("recursing on then\n"); - else_body = find_guards(ir, - dynamic_cast(code)->else_body()); + case IR_CONTROL_IF: { + CHILL_DEBUG_PRINT("find_guards() it's an if\n"); + CG_outputRepr *cond = dynamic_cast(code)->condition(); + + std::vector then_body; + std::vector else_body; + IR_Block *ORTB = dynamic_cast(code)->then_body(); + if (ORTB != NULL) { + CHILL_DEBUG_PRINT("recursing on then\n"); + then_body = find_guards(ir, ORTB); + //dynamic_cast(code)->then_body()); + } + if (dynamic_cast(code)->else_body() != NULL) { + CHILL_DEBUG_PRINT("recursing on then\n"); + else_body = find_guards(ir, + dynamic_cast(code)->else_body()); + } + + guards.push_back(cond); + if (then_body.size() > 0) + std::copy(then_body.begin(), then_body.end(), + back_inserter(guards)); + if (else_body.size() > 0) + std::copy(else_body.begin(), else_body.end(), + back_inserter(guards)); + break; } - - guards.push_back(cond); - if (then_body.size() > 0) - std::copy(then_body.begin(), then_body.end(), - back_inserter(guards)); - if (else_body.size() > 0) - std::copy(else_body.begin(), else_body.end(), - back_inserter(guards)); - break; - } - case IR_CONTROL_BLOCK: { - CHILL_DEBUG_PRINT("it's a control block\n"); - IR_Block* IRCB = dynamic_cast(code); - CHILL_DEBUG_PRINT("calling ir->FindOneLevelControlStructure(IRCB);\n"); - std::vector stmts = ir->FindOneLevelControlStructure(IRCB); - - for (int i = 0; i < stmts.size(); i++) { - std::vector stmt_repr = find_guards(ir, stmts[i]); - std::copy(stmt_repr.begin(), stmt_repr.end(), - back_inserter(guards)); + case IR_CONTROL_BLOCK: { + CHILL_DEBUG_PRINT("it's a control block\n"); + IR_Block *IRCB = dynamic_cast(code); + CHILL_DEBUG_PRINT("calling ir->FindOneLevelControlStructure(IRCB);\n"); + std::vector stmts = ir->FindOneLevelControlStructure(IRCB); + + for (int i = 0; i < stmts.size(); i++) { + std::vector stmt_repr = find_guards(ir, stmts[i]); + std::copy(stmt_repr.begin(), stmt_repr.end(), + back_inserter(guards)); + } + break; } - break; - } - case IR_CONTROL_LOOP: { - CHILL_DEBUG_PRINT("it's a control loop\n"); - std::vector body = find_guards(ir, - dynamic_cast(code)->body()); - if (body.size() > 0) - std::copy(body.begin(), body.end(), back_inserter(guards)); - break; - } // loop + case IR_CONTROL_LOOP: { + CHILL_DEBUG_PRINT("it's a control loop\n"); + std::vector body = find_guards(ir, + dynamic_cast(code)->body()); + if (body.size() > 0) + std::copy(body.begin(), body.end(), back_inserter(guards)); + break; + } // loop } // switch return guards; } @@ -4359,43 +4354,43 @@ bool sort_helper(std::pair > i, for (int k = 0; k < i.second.size(); k++) if (i.second[k] != "") c1++; - + for (int k = 0; k < j.second.size(); k++) if (j.second[k] != "") c2++; return (c1 < c2); - + } bool sort_helper_2(std::pair i, std::pair j) { - + return (i.second < j.second); - + } std::vector construct_iteration_order( - std::map > & input) { + std::map > &input) { std::vector arrays; std::vector scalars; std::vector > > input_aid; - + for (std::map >::iterator j = - input.begin(); j != input.end(); j++) + input.begin(); j != input.end(); j++) input_aid.push_back( - std::pair >(j->first, - j->second)); - + std::pair >(j->first, + j->second)); + std::sort(input_aid.begin(), input_aid.end(), sort_helper); - + for (int j = 0; j < input_aid[input_aid.size() - 1].second.size(); j++) if (input_aid[input_aid.size() - 1].second[j] != "") { arrays.push_back(input_aid[input_aid.size() - 1].second[j]); - + } - + if (arrays.size() > 0) { for (int i = input_aid.size() - 2; i >= 0; i--) { - + int max_count = 0; for (int j = 0; j < input_aid[i].second.size(); j++) if (input_aid[i].second[j] != "") { @@ -4421,7 +4416,7 @@ std::vector construct_iteration_order( } } } else { - + for (int i = input_aid.size() - 1; i >= 0; i--) { arrays.push_back(input_aid[i].first); } diff --git a/src/transformations/loop_basic.cc b/src/transformations/loop_basic.cc index a058598..1be0981 100644 --- a/src/transformations/loop_basic.cc +++ b/src/transformations/loop_basic.cc @@ -19,7 +19,7 @@ void Loop::permute(const std::vector &pi) { std::set active; for (int i = 0; i < stmt.size(); i++) active.insert(i); - + permute(active, pi); } @@ -30,12 +30,13 @@ void Loop::original() { setLexicalOrder(0, active); //apply_xform(); } + void Loop::permute(int stmt_num, int level, const std::vector &pi) { // check for sanity of parameters int starting_order; if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument( - "invalid statement number " + to_string(stmt_num)); + "invalid statement number " + to_string(stmt_num)); std::set active; if (level < 0 || level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("3invalid loop level " + to_string(level)); @@ -61,14 +62,14 @@ void Loop::permute(int stmt_num, int level, const std::vector &pi) { for (std::set::iterator i = active.begin(); i != active.end(); i++) if (level + pi.size() - 1 > stmt[*i].loop_level.size()) throw std::invalid_argument( - "invalid permutation for statement " + to_string(*i)); - + "invalid permutation for statement " + to_string(*i)); + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + // Update transformation relations for (std::set::iterator i = active.begin(); i != active.end(); i++) { int n = stmt[*i].xform.n_out(); @@ -97,7 +98,7 @@ void Loop::permute(int stmt_num, int level, const std::vector &pi) { stmt[*i].xform = Composition(mapping, stmt[*i].xform); stmt[*i].xform.simplify(); } - + // get the permuation for dependence vectors std::vector t; for (int i = 0; i < pi.size(); i++) @@ -122,41 +123,41 @@ void Loop::permute(int stmt_num, int level, const std::vector &pi) { dep_pi[i] = t[i - min_dep_dim]; for (int i = max_dep_dim + 1; i < dep.num_dim(); i++) dep_pi[i] = i; - + dep.permute(dep_pi, active); - + // update the dependence graph DependenceGraph g(dep.num_dim()); for (int i = 0; i < dep.vertex.size(); i++) g.insert(); for (int i = 0; i < dep.vertex.size(); i++) for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { if ((active.find(i) != active.end() && active.find(j->first) != active.end())) { std::vector dv = j->second; for (int k = 0; k < dv.size(); k++) { switch (dv[k].type) { - case DEP_W2R: - case DEP_R2W: - case DEP_W2W: - case DEP_R2R: { - std::vector lbounds(dep.num_dim()); - std::vector ubounds(dep.num_dim()); - for (int d = 0; d < dep.num_dim(); d++) { - lbounds[d] = dv[k].lbounds[dep_pi[d]]; - ubounds[d] = dv[k].ubounds[dep_pi[d]]; + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector lbounds(dep.num_dim()); + std::vector ubounds(dep.num_dim()); + for (int d = 0; d < dep.num_dim(); d++) { + lbounds[d] = dv[k].lbounds[dep_pi[d]]; + ubounds[d] = dv[k].ubounds[dep_pi[d]]; + } + dv[k].lbounds = lbounds; + dv[k].ubounds = ubounds; + break; } - dv[k].lbounds = lbounds; - dv[k].ubounds = ubounds; - break; - } - case DEP_CONTROL: { - break; - } - default: - throw loop_error("unknown dependence type"); + case DEP_CONTROL: { + break; + } + default: + throw loop_error("unknown dependence type"); } } g.connect(i, j->first, dv); @@ -168,27 +169,27 @@ void Loop::permute(int stmt_num, int level, const std::vector &pi) { std::vector dv = j->second; for (int k = 0; k < dv.size(); k++) switch (dv[k].type) { - case DEP_W2R: - case DEP_R2W: - case DEP_W2W: - case DEP_R2R: { - for (int d = 0; d < dep.num_dim(); d++) - if (dep_pi[d] != d) { - dv[k].lbounds[d] = -posInfinity; - dv[k].ubounds[d] = posInfinity; - } - break; - } - case DEP_CONTROL: - break; - default: - throw loop_error("unknown dependence type"); + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + for (int d = 0; d < dep.num_dim(); d++) + if (dep_pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + break; + } + case DEP_CONTROL: + break; + default: + throw loop_error("unknown dependence type"); } g.connect(i, j->first, dv); } } dep = g; - + // update loop level information for (std::set::iterator i = active.begin(); i != active.end(); i++) { int cur_dep_dim = min_dep_dim; @@ -196,66 +197,67 @@ void Loop::permute(int stmt_num, int level, const std::vector &pi) { for (int j = 1; j <= stmt[*i].loop_level.size(); j++) if (j >= level && j < level + pi.size()) { switch (stmt[*i].loop_level[pi_inverse[j - level] - 1].type) { - case LoopLevelOriginal: - new_loop_level[j - 1].type = LoopLevelOriginal; - new_loop_level[j - 1].payload = cur_dep_dim++; - new_loop_level[j - 1].parallel_level = - stmt[*i].loop_level[pi_inverse[j - level] - 1].parallel_level; - break; - case LoopLevelTile: { - new_loop_level[j - 1].type = LoopLevelTile; - int ref_level = stmt[*i].loop_level[pi_inverse[j - level] - - 1].payload; - if (ref_level >= level && ref_level < level + pi.size()) - new_loop_level[j - 1].payload = pi_inverse[ref_level - - level]; - else - new_loop_level[j - 1].payload = ref_level; - new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j - - 1].parallel_level; - break; - } - default: - throw loop_error( - "unknown loop level information for statement " - + to_string(*i)); + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = cur_dep_dim++; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[pi_inverse[j - level] - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[pi_inverse[j - level] + - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = pi_inverse[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); } } else { switch (stmt[*i].loop_level[j - 1].type) { - case LoopLevelOriginal: - new_loop_level[j - 1].type = LoopLevelOriginal; - new_loop_level[j - 1].payload = - stmt[*i].loop_level[j - 1].payload; - new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j - - 1].parallel_level; - break; - case LoopLevelTile: { - new_loop_level[j - 1].type = LoopLevelTile; - int ref_level = stmt[*i].loop_level[j - 1].payload; - if (ref_level >= level && ref_level < level + pi.size()) - new_loop_level[j - 1].payload = pi_inverse[ref_level - - level]; - else - new_loop_level[j - 1].payload = ref_level; - new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j - - 1].parallel_level; - break; - } - default: - throw loop_error( - "unknown loop level information for statement " - + to_string(*i)); + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = + stmt[*i].loop_level[j - 1].payload; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[j - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = pi_inverse[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); } } stmt[*i].loop_level = new_loop_level; } - + setLexicalOrder(2 * level - 2, active, starting_order); } + void Loop::permute(const std::set &active, const std::vector &pi) { if (active.size() == 0 || pi.size() == 0) return; - + // check for sanity of parameters int level = pi[0]; for (int i = 1; i < pi.size(); i++) @@ -287,14 +289,14 @@ void Loop::permute(const std::set &active, const std::vector &pi) { for (int j = 0; j < 2 * level - 3; j += 2) if (lex[j] != lex2[j]) throw std::invalid_argument( - "statements to permute must be in the same subloop"); + "statements to permute must be in the same subloop"); for (int j = 0; j < pi.size(); j++) if (!(stmt[*i].loop_level[level + j - 1].type == stmt[ref_stmt_num].loop_level[level + j - 1].type && stmt[*i].loop_level[level + j - 1].payload - == stmt[ref_stmt_num].loop_level[level + j - 1].payload)) + == stmt[ref_stmt_num].loop_level[level + j - 1].payload)) throw std::invalid_argument( - "permuted loops must have the same loop level types"); + "permuted loops must have the same loop level types"); } } // invalidate saved codegen computation @@ -302,7 +304,7 @@ void Loop::permute(const std::set &active, const std::vector &pi) { last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + // Update transformation relations for (std::set::iterator i = active.begin(); i != active.end(); i++) { int n = stmt[*i].xform.n_out(); @@ -328,11 +330,11 @@ void Loop::permute(const std::set &active, const std::vector &pi) { h.update_coef(mapping.output_var(2 * j), 1); h.update_coef(mapping.input_var(2 * j), -1); } - + stmt[*i].xform = Composition(mapping, stmt[*i].xform); stmt[*i].xform.simplify(); } - + // get the permuation for dependence vectors std::vector t; for (int i = 0; i < pi.size(); i++) @@ -357,41 +359,41 @@ void Loop::permute(const std::set &active, const std::vector &pi) { dep_pi[i] = t[i - min_dep_dim]; for (int i = max_dep_dim + 1; i < num_dep_dim; i++) dep_pi[i] = i; - + dep.permute(dep_pi, active); - + // update the dependence graph DependenceGraph g(dep.num_dim()); for (int i = 0; i < dep.vertex.size(); i++) g.insert(); for (int i = 0; i < dep.vertex.size(); i++) for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { // if ((active.find(i) != active.end() && active.find(j->first) != active.end())) { std::vector dv = j->second; for (int k = 0; k < dv.size(); k++) { switch (dv[k].type) { - case DEP_W2R: - case DEP_R2W: - case DEP_W2W: - case DEP_R2R: { - std::vector lbounds(num_dep_dim); - std::vector ubounds(num_dep_dim); - for (int d = 0; d < num_dep_dim; d++) { - lbounds[d] = dv[k].lbounds[dep_pi[d]]; - ubounds[d] = dv[k].ubounds[dep_pi[d]]; + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector lbounds(num_dep_dim); + std::vector ubounds(num_dep_dim); + for (int d = 0; d < num_dep_dim; d++) { + lbounds[d] = dv[k].lbounds[dep_pi[d]]; + ubounds[d] = dv[k].ubounds[dep_pi[d]]; + } + dv[k].lbounds = lbounds; + dv[k].ubounds = ubounds; + break; } - dv[k].lbounds = lbounds; - dv[k].ubounds = ubounds; - break; - } - case DEP_CONTROL: { - break; - } - default: - throw loop_error("unknown dependence type"); + case DEP_CONTROL: { + break; + } + default: + throw loop_error("unknown dependence type"); } } g.connect(i, j->first, dv); @@ -403,27 +405,27 @@ void Loop::permute(const std::set &active, const std::vector &pi) { std::vector dv = j->second; for (int k = 0; k < dv.size(); k++) switch (dv[k].type) { - case DEP_W2R: - case DEP_R2W: - case DEP_W2W: - case DEP_R2R: { - for (int d = 0; d < num_dep_dim; d++) - if (dep_pi[d] != d) { - dv[k].lbounds[d] = -posInfinity; - dv[k].ubounds[d] = posInfinity; - } - break; - } - case DEP_CONTROL: - break; - default: - throw loop_error("unknown dependence type"); + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + for (int d = 0; d < num_dep_dim; d++) + if (dep_pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + break; + } + case DEP_CONTROL: + break; + default: + throw loop_error("unknown dependence type"); } g.connect(i, j->first, dv); } } dep = g; - + // update loop level information for (std::set::iterator i = active.begin(); i != active.end(); i++) { int cur_dep_dim = min_dep_dim; @@ -431,65 +433,65 @@ void Loop::permute(const std::set &active, const std::vector &pi) { for (int j = 1; j <= stmt[*i].loop_level.size(); j++) if (j >= level && j < level + pi.size()) { switch (stmt[*i].loop_level[reverse_pi[j - level] - 1].type) { - case LoopLevelOriginal: - new_loop_level[j - 1].type = LoopLevelOriginal; - new_loop_level[j - 1].payload = cur_dep_dim++; - new_loop_level[j - 1].parallel_level = - stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; - break; - case LoopLevelTile: { - new_loop_level[j - 1].type = LoopLevelTile; - int ref_level = stmt[*i].loop_level[reverse_pi[j - level]-1].payload; - if (ref_level >= level && ref_level < level + pi.size()) - new_loop_level[j - 1].payload = reverse_pi[ref_level - - level]; - else - new_loop_level[j - 1].payload = ref_level; - new_loop_level[j - 1].parallel_level = - stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; - break; - } - default: - throw loop_error( - "unknown loop level information for statement " - + to_string(*i)); + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = cur_dep_dim++; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[reverse_pi[j - level] - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = reverse_pi[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); } } else { switch (stmt[*i].loop_level[j - 1].type) { - case LoopLevelOriginal: - new_loop_level[j - 1].type = LoopLevelOriginal; - new_loop_level[j - 1].payload = - stmt[*i].loop_level[j - 1].payload; - new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j - - 1].parallel_level; - break; - case LoopLevelTile: { - new_loop_level[j - 1].type = LoopLevelTile; - int ref_level = stmt[*i].loop_level[j - 1].payload; - if (ref_level >= level && ref_level < level + pi.size()) - new_loop_level[j - 1].payload = reverse_pi[ref_level - - level]; - else - new_loop_level[j - 1].payload = ref_level; - new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j - - 1].parallel_level; - break; - } - default: - throw loop_error( - "unknown loop level information for statement " - + to_string(*i)); + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = + stmt[*i].loop_level[j - 1].payload; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[j - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = reverse_pi[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); } } stmt[*i].loop_level = new_loop_level; } - + setLexicalOrder(2 * level - 2, active); } -void Loop::set_array_size(std::string name, int size ){ - array_dims.insert(std::pair(name, size)); +void Loop::set_array_size(std::string name, int size) { + array_dims.insert(std::pair(name, size)); } @@ -499,23 +501,23 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { throw std::invalid_argument("invalid statement " + to_string(stmt_num)); if (level <= 0 || level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("4invalid loop level " + to_string(level)); - + std::set result; int dim = 2 * level - 1; std::vector lex = getLexicalOrder(stmt_num); std::set same_loop = getStatements(lex, dim - 1); - + Relation cond2 = copy(cond); cond2.simplify(); cond2 = EQs_to_GEQs(cond2); Conjunct *c = cond2.single_conjunct(); int cur_lex = lex[dim - 1]; - + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { int max_level = (*gi).max_tuple_pos(); Relation single_cond(max_level); single_cond.and_with_GEQ(*gi); - + // TODO: should decide where to place newly created statements with // complementary split condition from dependence graph. bool place_after; @@ -525,7 +527,7 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { place_after = true; else place_after = false; - + bool temp_place_after; // = place_after; bool assigned = false; int part1_to_part2; @@ -549,11 +551,11 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { Extend_Set(Complement(copy(single_cond)), n - max_level)); } - + //split dependence check - + if (max_level > level) { - + DNF_Iterator di1(stmt[*i].IS.query_DNF()); DNF_Iterator di2(part1.query_DNF()); for (; di1 && di2; di1++, di2++) { @@ -569,34 +571,34 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { bool identical = false; if (identical = !strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name())) { - + for (; cvi1 && cvi2; cvi1++, cvi2++) { - + if (((*cvi1).coef != (*cvi2).coef || (*ei1).get_const() - != (*ei2).get_const()) + != (*ei2).get_const()) || (strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name()))) { - + same++; } } } if ((same != 0) || !identical) { - + dimension = dimension - 1; - + while (stmt[*i].loop_level[dimension].type == LoopLevelTile) dimension = - stmt[*i].loop_level[dimension].payload; - + stmt[*i].loop_level[dimension].payload; + dimension = stmt[*i].loop_level[dimension].payload; - + for (int i = 0; i < stmt.size(); i++) { std::vector > D; for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; @@ -604,19 +606,19 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { if (dv.hasNegative(dimension) && !dv.quasi) throw loop_error( - "loop error: Split is illegal, dependence violation!"); - + "loop error: Split is illegal, dependence violation!"); + } } } - + } - + GEQ_Iterator gi1 = (*di1)->GEQs(); GEQ_Iterator gi2 = (*di2)->GEQs(); - + for (; gi1 && gi2; gi++, gi2++) { - + Constr_Vars_Iter cvi1(*gi1); Constr_Vars_Iter cvi2(*gi2); int dimension = (*cvi1).var->get_position(); @@ -624,33 +626,33 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { bool identical = false; if (identical = !strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name())) { - + for (; cvi1 && cvi2; cvi1++, cvi2++) { - + if (((*cvi1).coef != (*cvi2).coef || (*gi1).get_const() - != (*gi2).get_const()) + != (*gi2).get_const()) || (strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name()))) { - + same++; } } } if ((same != 0) || !identical) { dimension = dimension - 1; - + while (stmt[*i].loop_level[dimension].type == LoopLevelTile) stmt[*i].loop_level[dimension].payload; - + dimension = - stmt[*i].loop_level[dimension].payload; - + stmt[*i].loop_level[dimension].payload; + for (int i = 0; i < stmt.size(); i++) { std::vector > D; for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { for (int k = 0; k < j->second.size(); @@ -659,22 +661,22 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { if (dv.type != DEP_CONTROL) if (dv.hasNegative(dimension) && !dv.quasi) - + throw loop_error( - "loop error: Split is illegal, dependence violation!"); - + "loop error: Split is illegal, dependence violation!"); + } } } - + } - + } - + } - + } - + DNF_Iterator di3(stmt[*i].IS.query_DNF()); DNF_Iterator di4(part2.query_DNF()); // for (; di3 && di4; di3++, di4++) { @@ -688,52 +690,52 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { bool identical = false; if (identical = !strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name())) { - + for (; cvi1 && cvi2; cvi1++, cvi2++) { - + if (((*cvi1).coef != (*cvi2).coef || (*ei1).get_const() - != (*ei2).get_const()) + != (*ei2).get_const()) || (strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name()))) { - + same++; } } } if ((same != 0) || !identical) { dimension = dimension - 1; - + while (stmt[*i].loop_level[dimension].type == LoopLevelTile) stmt[*i].loop_level[dimension].payload; - + dimension = stmt[*i].loop_level[dimension].payload; - + for (int i = 0; i < stmt.size(); i++) { std::vector > D; for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if (dv.type != DEP_CONTROL) if (dv.hasNegative(dimension) && !dv.quasi) - + throw loop_error( - "loop error: Split is illegal, dependence violation!"); - + "loop error: Split is illegal, dependence violation!"); + } } } - + } - + } GEQ_Iterator gi1 = (*di3)->GEQs(); GEQ_Iterator gi2 = (*di4)->GEQs(); - + for (; gi1 && gi2; gi++, gi2++) { Constr_Vars_Iter cvi1(*gi1); Constr_Vars_Iter cvi2(*gi2); @@ -742,66 +744,66 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { bool identical = false; if (identical = !strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name())) { - + for (; cvi1 && cvi2; cvi1++, cvi2++) { - + if (((*cvi1).coef != (*cvi2).coef || (*gi1).get_const() - != (*gi2).get_const()) + != (*gi2).get_const()) || (strcmp((*cvi1).var->char_name(), (*cvi2).var->char_name()))) { - + same++; } } } if ((same != 0) || !identical) { dimension = dimension - 1; - + while (stmt[*i].loop_level[dimension].type // == LoopLevelTile) stmt[*i].loop_level[dimension].payload; - + dimension = stmt[*i].loop_level[dimension].payload; - + for (int i = 0; i < stmt.size(); i++) { std::vector > D; for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if (dv.type != DEP_CONTROL) if (dv.hasNegative(dimension) && !dv.quasi) - + throw loop_error( - "loop error: Split is illegal, dependence violation!"); - + "loop error: Split is illegal, dependence violation!"); + } } } - + } - + } - + } - + } - + stmt[*i].IS = part1; - + int n1 = part2.n_set(); int m = this->known.n_set(); Relation test; - if(m > n1) + if (m > n1) test = Intersection(copy(this->known), Extend_Set(copy(part2), m - part2.n_set())); else test = Intersection(copy(part2), Extend_Set(copy(this->known), n1 - this->known.n_set())); - + if (test.is_upper_bound_satisfiable()) { Statement new_stmt; new_stmt.code = stmt[*i].code->clone(); @@ -809,20 +811,20 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { new_stmt.xform = copy(stmt[*i].xform); new_stmt.ir_stmt_node = NULL; new_stmt.loop_level = stmt[*i].loop_level; - + new_stmt.has_inspector = stmt[*i].has_inspector; new_stmt.reduction = stmt[*i].reduction; new_stmt.reductionOp = stmt[*i].reductionOp; - + stmt_nesting_level_.push_back(stmt_nesting_level_[*i]); - - + + if (place_after) assign_const(new_stmt.xform, dim - 1, cur_lex + 1); else assign_const(new_stmt.xform, dim - 1, cur_lex - 1); - - fprintf(stderr, "loop_basic.cc L828 adding stmt %d\n", stmt.size()); + + fprintf(stderr, "loop_basic.cc L828 adding stmt %d\n", stmt.size()); stmt.push_back(new_stmt); uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); @@ -832,7 +834,7 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { if (*i == stmt_num) result.insert(stmt.size() - 1); } - + } // make adjacent lexical number available for new statements if (place_after) { @@ -846,20 +848,20 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { int dep_dim = get_dep_dim_of(stmt_num, level); for (int i = 0; i < old_num_stmt; i++) { std::vector > > D; - + for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { if (same_loop.find(i) != same_loop.end()) { if (same_loop.find(j->first) != same_loop.end()) { if (what_stmt_num.find(i) != what_stmt_num.end() && what_stmt_num.find(j->first) - != what_stmt_num.end()) + != what_stmt_num.end()) dep.connect(what_stmt_num[i], what_stmt_num[j->first], j->second); if (place_after && what_stmt_num.find(j->first) - != what_stmt_num.end()) { + != what_stmt_num.end()) { std::vector dvs; for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; @@ -871,11 +873,11 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { } if (dvs.size() > 0) D.push_back( - std::make_pair(what_stmt_num[j->first], - dvs)); + std::make_pair(what_stmt_num[j->first], + dvs)); } else if (!place_after && what_stmt_num.find(i) - != what_stmt_num.end()) { + != what_stmt_num.end()) { std::vector dvs; for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; @@ -887,7 +889,7 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { } if (dvs.size() > 0) dep.connect(what_stmt_num[i], j->first, dvs); - + } } else { if (what_stmt_num.find(i) != what_stmt_num.end()) @@ -896,17 +898,17 @@ std::set Loop::split(int stmt_num, int level, const Relation &cond) { } else if (same_loop.find(j->first) != same_loop.end()) { if (what_stmt_num.find(j->first) != what_stmt_num.end()) D.push_back( - std::make_pair(what_stmt_num[j->first], - j->second)); + std::make_pair(what_stmt_num[j->first], + j->second)); } } - + for (int j = 0; j < D.size(); j++) dep.connect(i, D[j].first, D[j].second); } - + } - + return result; } @@ -914,28 +916,28 @@ void Loop::skew(const std::set &stmt_nums, int level, const std::vector &skew_amount) { if (stmt_nums.size() == 0) return; - + // check for sanity of parameters int ref_stmt_num = *(stmt_nums.begin()); for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { if (*i < 0 || *i >= stmt.size()) throw std::invalid_argument( - "invalid statement number " + to_string(*i)); + "invalid statement number " + to_string(*i)); if (level < 1 || level > stmt[*i].loop_level.size()) throw std::invalid_argument( - "5invalid loop level " + to_string(level)); + "5invalid loop level " + to_string(level)); for (int j = stmt[*i].loop_level.size(); j < skew_amount.size(); j++) if (skew_amount[j] != 0) throw std::invalid_argument("invalid skewing formula"); } - + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + // set trasformation relations for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { @@ -953,18 +955,18 @@ void Loop::skew(const std::set &stmt_nums, int level, for (int j = 0; j < skew_amount.size(); j++) if (skew_amount[j] != 0) h.update_coef(r.input_var(2 * (j + 1)), skew_amount[j]); - + stmt[*i].xform = Composition(r, stmt[*i].xform); stmt[*i].xform.simplify(); } - + // update dependence graph if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) for (DependenceGraph::EdgeList::iterator j = - dep.vertex[*i].second.begin(); + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) if (stmt_nums.find(j->first) != stmt_nums.end()) { // dependence between skewed statements @@ -984,7 +986,7 @@ void Loop::skew(const std::set &stmt_nums, int level, else { if (cur_dep_dim != -1 && !(dv.lbounds[cur_dep_dim] == 0 - && dv.ubounds[cur_dep_dim]== 0)) + && dv.ubounds[cur_dep_dim] == 0)) lb = -posInfinity; } if (ub != posInfinity @@ -1022,24 +1024,24 @@ void Loop::skew(const std::set &stmt_nums, int level, } dv.lbounds[dep_dim] = lb; dv.ubounds[dep_dim] = ub; - if ((dv.isCarried(dep_dim) && dv.hasPositive(dep_dim)) + if ((dv.isCarried(dep_dim) && dv.hasPositive(dep_dim)) && dv.quasi) dv.quasi = false; - - if ((dv.isCarried(dep_dim) && dv.hasNegative(dep_dim)) + + if ((dv.isCarried(dep_dim) && dv.hasNegative(dep_dim)) && !dv.quasi) throw loop_error( - "loop error: Skewing is illegal, dependence violation!"); + "loop error: Skewing is illegal, dependence violation!"); dv.lbounds[dep_dim] = lb; dv.ubounds[dep_dim] = ub; if ((dv.isCarried(dep_dim) && dv.hasPositive(dep_dim)) && dv.quasi) dv.quasi = false; - + if ((dv.isCarried(dep_dim) && dv.hasNegative(dep_dim)) && !dv.quasi) throw loop_error( - "loop error: Skewing is illegal, dependence violation!"); + "loop error: Skewing is illegal, dependence violation!"); } } j->second = dvs; @@ -1059,7 +1061,7 @@ void Loop::skew(const std::set &stmt_nums, int level, for (int i = 0; i < dep.vertex.size(); i++) if (stmt_nums.find(i) == stmt_nums.end()) for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) if (stmt_nums.find(j->first) != stmt_nums.end()) { // dependence from unskewed statement to skewed statement becomes jumbled, @@ -1081,34 +1083,34 @@ void Loop::skew(const std::set &stmt_nums, int level, void Loop::shift(const std::set &stmt_nums, int level, int shift_amount) { if (stmt_nums.size() == 0) return; - + // check for sanity of parameters int ref_stmt_num = *(stmt_nums.begin()); for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { if (*i < 0 || *i >= stmt.size()) throw std::invalid_argument( - "invalid statement number " + to_string(*i)); + "invalid statement number " + to_string(*i)); if (level < 1 || level > stmt[*i].loop_level.size()) throw std::invalid_argument( - "6invalid loop level " + to_string(level)); + "6invalid loop level " + to_string(level)); } - + // do nothing if (shift_amount == 0) return; - + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + // set trasformation relations for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { int n = stmt[*i].xform.n_out(); - + Relation r(n, n); F_And *f_root = r.add_and(); for (int j = 1; j <= n; j++) { @@ -1118,18 +1120,18 @@ void Loop::shift(const std::set &stmt_nums, int level, int shift_amount) { if (j == 2 * level) h.update_const(shift_amount); } - + stmt[*i].xform = Composition(r, stmt[*i].xform); stmt[*i].xform.simplify(); } - + // update dependence graph if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) for (DependenceGraph::EdgeList::iterator j = - dep.vertex[*i].second.begin(); + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) if (stmt_nums.find(j->first) == stmt_nums.end()) { // dependence from shifted statement to unshifted statement @@ -1148,7 +1150,7 @@ void Loop::shift(const std::set &stmt_nums, int level, int shift_amount) { for (int i = 0; i < dep.vertex.size(); i++) if (stmt_nums.find(i) == stmt_nums.end()) for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) if (stmt_nums.find(j->first) != stmt_nums.end()) { // dependence from unshifted statement to shifted statement @@ -1180,35 +1182,36 @@ void Loop::reverse(const std::set &stmt_nums, int level) { void Loop::fuse(const std::set &stmt_nums, int level) { if (stmt_nums.size() == 0 || stmt_nums.size() == 1) return; - + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + int dim = 2 * level - 1; // check for sanity of parameters std::vector ref_lex; int ref_stmt_num; - apply_xform(); + apply_xform(); for (std::set::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { if (*i < 0 || *i >= stmt.size()) { - fprintf(stderr, "statement number %d should be in [0, %d)\n", *i, stmt.size()); + fprintf(stderr, "statement number %d should be in [0, %d)\n", *i, stmt.size()); throw std::invalid_argument( - "FUSE invalid statement number " + to_string(*i)); + "FUSE invalid statement number " + to_string(*i)); } if (level <= 0 - // || (level > (stmt[*i].xform.n_out() - 1) / 2 - // || level > stmt[*i].loop_level.size()) - ) { + // || (level > (stmt[*i].xform.n_out() - 1) / 2 + // || level > stmt[*i].loop_level.size()) + ) { fprintf(stderr, "FUSE level %d ", level); fprintf(stderr, "must be greater than zero and \n"); - fprintf(stderr, "must NOT be greater than (%d - 1)/2 == %d and\n", stmt[*i].xform.n_out(), (stmt[*i].xform.n_out() - 1) / 2); + fprintf(stderr, "must NOT be greater than (%d - 1)/2 == %d and\n", stmt[*i].xform.n_out(), + (stmt[*i].xform.n_out() - 1) / 2); fprintf(stderr, "must NOT be greater than %d\n", stmt[*i].loop_level.size()); throw std::invalid_argument( - "FUSE invalid loop level " + to_string(level)); + "FUSE invalid loop level " + to_string(level)); } if (ref_lex.size() == 0) { ref_lex = getLexicalOrder(*i); @@ -1218,11 +1221,11 @@ void Loop::fuse(const std::set &stmt_nums, int level) { for (int j = 0; j < dim - 1; j += 2) if (lex[j] != ref_lex[j]) throw std::invalid_argument( - "statements for fusion must be in the same level-" - + to_string(level - 1) + " subloop"); + "statements for fusion must be in the same level-" + + to_string(level - 1) + " subloop"); } } - + // collect lexicographical order values from to-be-fused statements std::set lex_values; for (std::set::const_iterator i = stmt_nums.begin(); @@ -1233,9 +1236,9 @@ void Loop::fuse(const std::set &stmt_nums, int level) { if (lex_values.size() == 1) return; // negative dependence would prevent fusion - + int dep_dim = get_dep_dim_of(ref_stmt_num, level); - + for (std::set::iterator i = lex_values.begin(); i != lex_values.end(); i++) { ref_lex[dim - 1] = *i; @@ -1254,25 +1257,25 @@ void Loop::fuse(const std::set &stmt_nums, int level) { if (dvs[k].isCarried(dep_dim) && dvs[k].hasNegative(dep_dim)) throw loop_error( - "loop error: statements " + to_string(*ii) - + " and " + to_string(*jj) - + " cannot be fused together due to negative dependence"); + "loop error: statements " + to_string(*ii) + + " and " + to_string(*jj) + + " cannot be fused together due to negative dependence"); dvs = dep.getEdge(*jj, *ii); for (int k = 0; k < dvs.size(); k++) if (dvs[k].isCarried(dep_dim) && dvs[k].hasNegative(dep_dim)) throw loop_error( - "loop error: statements " + to_string(*jj) - + " and " + to_string(*ii) - + " cannot be fused together due to negative dependence"); + "loop error: statements " + to_string(*jj) + + " and " + to_string(*ii) + + " cannot be fused together due to negative dependence"); } } } - + std::set same_loop = getStatements(ref_lex, dim - 3); - + std::vector > s = sort_by_same_loops(same_loop, level); - + std::vector s2; for (int i = 0; i < s.size(); i++) { @@ -1283,27 +1286,27 @@ void Loop::fuse(const std::set &stmt_nums, int level) { kk++) for (int i = 0; i < s.size(); i++) if (s[i].find(*kk) != s[i].end()) { - + s2[i] = true; } - + try { - + //Dependence Check for Ordering Constraint //Graph, bool> dummy = construct_induced_graph_at_level(s5, // dep, dep_dim); - + Graph, bool> g = construct_induced_graph_at_level(s, dep, dep_dim); std::cout << g; s = typed_fusion(g, s2); } catch (const loop_error &e) { - + throw loop_error( - "statements cannot be fused together due to negative dependence"); - + "statements cannot be fused together due to negative dependence"); + } - + int order = 0; for (int i = 0; i < s.size(); i++) { for (std::set::iterator it = s[i].begin(); it != s[i].end(); it++) { @@ -1314,7 +1317,7 @@ void Loop::fuse(const std::set &stmt_nums, int level) { //plan for selective typed fusion - + /* 1. sort the lex values of the statements 2. construct induced graph on sorted statements @@ -1419,7 +1422,6 @@ void Loop::fuse(const std::set &stmt_nums, int level) { } - void Loop::distribute(const std::set &stmt_nums, int level) { if (stmt_nums.size() == 0 || stmt_nums.size() == 1) return; @@ -1439,13 +1441,13 @@ void Loop::distribute(const std::set &stmt_nums, int level) { i != stmt_nums.end(); i++) { if (*i < 0 || *i >= stmt.size()) throw std::invalid_argument( - "invalid statement number " + to_string(*i)); - + "invalid statement number " + to_string(*i)); + if (level < 1 || (level > (stmt[*i].xform.n_out() - 1) / 2 || level > stmt[*i].loop_level.size())) throw std::invalid_argument( - "8invalid loop level " + to_string(level)); + "8invalid loop level " + to_string(level)); if (ref_lex.size() == 0) { ref_lex = getLexicalOrder(*i); ref_stmt_num = *i; @@ -1454,8 +1456,8 @@ void Loop::distribute(const std::set &stmt_nums, int level) { for (int j = 0; j <= dim - 1; j += 2) if (lex[j] != ref_lex[j]) throw std::invalid_argument( - "statements for distribution must be in the same level-" - + to_string(level) + " subloop"); + "statements for distribution must be in the same level-" + + to_string(level) + " subloop"); } } @@ -1517,7 +1519,7 @@ void Loop::distribute(const std::set &stmt_nums, int level) { // nothing to distribute if (s2.size() == 1) throw loop_error( - "loop error: no statement can be distributed due to dependence cycle"); + "loop error: no statement can be distributed due to dependence cycle"); std::vector > s3; for (int i = 0; i < s2.size(); i++) { std::set t; @@ -1564,16 +1566,14 @@ void Loop::distribute(const std::set &stmt_nums, int level) { order++; } // no need to update dependence graph - + return; } - - std::vector FindOuterArrayRefs(IR_Code *ir, std::vector &arr_refs) { - std::vector to_return; + std::vector to_return; for (int i = 0; i < arr_refs.size(); i++) if (!ir->parent_is_array(arr_refs[i])) { int j; @@ -1587,38 +1587,36 @@ std::vector FindOuterArrayRefs(IR_Code *ir, } - - - std::vector > constructInspectorVariables(IR_Code *ir, - std::set &arr, std::vector &index) { - - fprintf(stderr, "constructInspectorVariables()\n"); + std::set &arr, + std::vector &index) { + + fprintf(stderr, "constructInspectorVariables()\n"); std::vector > to_return; - + for (std::set::iterator i = arr.begin(); i != arr.end(); i++) { - + std::vector per_index; - + CG_outputRepr *subscript = (*i)->index(0); - + if ((*i)->n_dim() > 1) throw ir_error( - "multi-dimensional array support non-existent for flattening currently"); - + "multi-dimensional array support non-existent for flattening currently"); + while (ir->QueryExpOperation(subscript) == IR_OP_ARRAY_VARIABLE) { - + std::vector v = ir->QueryExpOperand(subscript); - + IR_ArrayRef *ref = static_cast(ir->Repr2Ref(v[0])); //per_index.push_back(ref->name()); - + subscript = ref->index(0); - + } - + if (ir->QueryExpOperation(subscript) == IR_OP_VARIABLE) { std::vector v = ir->QueryExpOperand(subscript); IR_ScalarRef *ref = static_cast(ir->Repr2Ref(v[0])); @@ -1627,25 +1625,25 @@ std::vector > constructInspectorVariables(IR_Code *ir, for (j = 0; j < index.size(); j++) if (index[j] == ref->name()) break; - + if (j == index.size()) throw ir_error("Non index variable in array expression"); - + int k; for (k = 0; k < to_return.size(); k++) if (to_return[k][0] == ref->name()) break; - if (k == to_return.size()) { + if (k == to_return.size()) { to_return.push_back(per_index); - fprintf(stderr, "adding index %s\n", ref->name().c_str()); + fprintf(stderr, "adding index %s\n", ref->name().c_str()); } - + } - + } - + return to_return; - + } /*std::vector constructInspectorData(IR_Code *ir, std::vector > &indices){ @@ -1669,99 +1667,99 @@ std::vector > constructInspectorVariables(IR_Code *ir, */ -CG_outputRepr * checkAndGenerateIndirectMappings(CG_outputBuilder * ocg, - std::vector > &indices, - CG_outputRepr * instance, CG_outputRepr * class_def, - CG_outputRepr * count_var) { - +CG_outputRepr *checkAndGenerateIndirectMappings(CG_outputBuilder *ocg, + std::vector > &indices, + CG_outputRepr *instance, CG_outputRepr *class_def, + CG_outputRepr *count_var) { + CG_outputRepr *to_return = NULL; - + for (int i = 0; i < indices.size(); i++) if (indices[i].size() > 1) { std::string index = indices[i][indices[i].size() - 1]; CG_outputRepr *rep = ocg->CreateArrayRefExpression( - ocg->CreateDotExpression(instance, - ocg->lookup_member_data(class_def, index, instance)), - count_var); + ocg->CreateDotExpression(instance, + ocg->lookup_member_data(class_def, index, instance)), + count_var); for (int j = indices[i].size() - 2; j >= 0; j--) rep = ocg->CreateArrayRefExpression(indices[i][j], rep); - + CG_outputRepr *lhs = ocg->CreateArrayRefExpression( - ocg->CreateDotExpression(instance, - ocg->lookup_member_data(class_def, indices[i][0], instance)), - count_var); - + ocg->CreateDotExpression(instance, + ocg->lookup_member_data(class_def, indices[i][0], instance)), + count_var); + to_return = ocg->StmtListAppend(to_return, ocg->CreateAssignment(0, lhs, rep)); - + } - + return to_return; - + } CG_outputRepr *generatePointerAssignments(CG_outputBuilder *ocg, std::string prefix_name, std::vector > &indices, - CG_outputRepr *instance, + CG_outputRepr *instance, CG_outputRepr *class_def) { - + fprintf(stderr, "generatePointerAssignments()\n"); CG_outputRepr *list = NULL; - fprintf(stderr, "prefix '%s', %d indices\n", prefix_name.c_str(), indices.size()); + fprintf(stderr, "prefix '%s', %d indices\n", prefix_name.c_str(), indices.size()); for (int i = 0; i < indices.size(); i++) { - + std::string s = prefix_name + "_" + indices[i][0]; - fprintf(stderr, "s %s\n", s.c_str()); - + fprintf(stderr, "s %s\n", s.c_str()); + // create a variable definition for a pointer to int with this name // that seems to be the only actual result of this routine ... //chillAST_VarDecl *vd = new chillAST_VarDecl( "int", prefix_name.c_str(), "*", NULL); //vd->print(); printf("\n"); fflush(stdout); //vd->dump(); printf("\n"); fflush(stdout); - + CG_outputRepr *ptr_exp = ocg->CreatePointer(s); // but dropped on the floor. unused //fprintf(stderr, "ptr_exp created\n"); - + //CG_outputRepr *rhs = ocg->CreateDotExpression(instance, // ocg->lookup_member_data(class_def, indices[i][0], instance)); - + //CG_outputRepr *ptr_assignment = ocg->CreateAssignment(0, ptr_exp, rhs); - + //list = ocg->StmtListAppend(list, ptr_assignment); - + } - + fprintf(stderr, "generatePointerAssignments() DONE\n\n"); return list; } void Loop::normalize(int stmt_num, int loop_level) { - + if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument( - - "invalid statement number " + to_string(stmt_num)); - + + "invalid statement number " + to_string(stmt_num)); + if (loop_level <= 0) throw std::invalid_argument( - "12invalid loop level " + to_string(loop_level)); + "12invalid loop level " + to_string(loop_level)); if (loop_level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument( - "there is no loop level " + to_string(loop_level) - + " for statement " + to_string(stmt_num)); - + "there is no loop level " + to_string(loop_level) + + " for statement " + to_string(stmt_num)); + apply_xform(stmt_num); - + Relation r = copy(stmt[stmt_num].IS); - + Relation bound = get_loop_bound(r, loop_level, this->known); if (!bound.has_single_conjunct() || !bound.is_satisfiable() || bound.is_tautology()) throw loop_error("unable to extract loop bound for normalize"); - + // extract the loop stride coef_t stride; std::pair result = find_simplest_stride(bound, @@ -1770,31 +1768,31 @@ void Loop::normalize(int stmt_num, int loop_level) { stride = 1; else stride = abs(result.first.get_coef(result.second)) - / gcd(abs(result.first.get_coef(result.second)), - abs(result.first.get_coef(bound.set_var(loop_level)))); - + / gcd(abs(result.first.get_coef(result.second)), + abs(result.first.get_coef(bound.set_var(loop_level)))); + if (stride != 1) throw loop_error( - "normalize currently only handles unit stride, non unit stride present in loop bounds"); - + "normalize currently only handles unit stride, non unit stride present in loop bounds"); + GEQ_Handle lb; - + Conjunct *c = bound.query_DNF()->single_conjunct(); for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { int coef = (*gi).get_coef(bound.set_var(loop_level)); if (coef > 0) lb = *gi; } - + //Loop bound already zero //Nothing to do. if (lb.is_const(bound.set_var(loop_level)) && lb.get_const() == 0) return; - + if (lb.is_const_except_for_global(bound.set_var(loop_level))) { - + int n = stmt[stmt_num].xform.n_out(); - + Relation r(n, n); F_And *f_root = r.add_and(); for (int j = 1; j <= n; j++) @@ -1803,10 +1801,10 @@ void Loop::normalize(int stmt_num, int loop_level) { h.update_coef(r.input_var(j), 1); h.update_coef(r.output_var(j), -1); } - + stmt[stmt_num].xform = Composition(r, stmt[stmt_num].xform); stmt[stmt_num].xform.simplify(); - + for (Constr_Vars_Iter ci(lb); ci; ci++) { if ((*ci).var->kind() == Global_Var) { Global_Var_ID g = (*ci).var->get_global_var(); @@ -1816,24 +1814,24 @@ void Loop::normalize(int stmt_num, int loop_level) { else v = stmt[stmt_num].xform.get_local(g, (*ci).var->function_of()); - + F_And *f_super_root = stmt[stmt_num].xform.and_with_and(); F_Exists *f_exists = f_super_root->add_exists(); F_And *f_root = f_exists->add_and(); - + EQ_Handle h = f_root->add_EQ(); h.update_coef(stmt[stmt_num].xform.output_var(2 * loop_level), 1); h.update_coef(stmt[stmt_num].xform.input_var(loop_level), -1); h.update_coef(v, 1); - + stmt[stmt_num].xform.simplify(); } - + } - + } else throw loop_error("loop bounds too complex for normalize!"); - + } diff --git a/src/transformations/loop_datacopy.cc b/src/transformations/loop_datacopy.cc index 12d74fd..69fbd5b 100644 --- a/src/transformations/loop_datacopy.cc +++ b/src/transformations/loop_datacopy.cc @@ -27,7 +27,8 @@ using namespace omega; // parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i] // bool Loop::datacopy(const std::vector > > &array_ref_nums, int level, - bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, + int memory_type) { //fprintf(stderr, "Loop::datacopy()\n"); // check for sanity of parameters @@ -40,18 +41,17 @@ bool Loop::datacopy(const std::vector > > &array throw std::invalid_argument("invalid loop level " + to_string(level)); if (i == 0) { std::vector lex = getLexicalOrder(stmt_num); - same_loop = getStatements(lex, 2*level-2); - } - else if (same_loop.find(stmt_num) == same_loop.end()) + same_loop = getStatements(lex, 2 * level - 2); + } else if (same_loop.find(stmt_num) == same_loop.end()) throw std::invalid_argument("array references for data copy must be located in the same subloop"); } - + // convert array reference numbering scheme to actual array references std::vector > > selected_refs; for (int i = 0; i < array_ref_nums.size(); i++) { if (array_ref_nums[i].second.size() == 0) continue; - + int stmt_num = array_ref_nums[i].first; selected_refs.push_back(std::make_pair(stmt_num, std::vector())); std::vector refs = ir->FindArrayRef(stmt[stmt_num].code); @@ -61,9 +61,10 @@ bool Loop::datacopy(const std::vector > > &array if (ref_num < 0 || ref_num >= refs.size()) { for (int k = 0; k < refs.size(); k++) delete refs[k]; - throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + throw std::invalid_argument( + "invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); } - selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected_refs[selected_refs.size() - 1].second.push_back(refs[ref_num]); selected[ref_num] = true; } for (int j = 0; j < refs.size(); j++) @@ -72,9 +73,10 @@ bool Loop::datacopy(const std::vector > > &array } if (selected_refs.size() == 0) throw std::invalid_argument("found no array references to copy"); - + // do the copy - bool whatever = datacopy_privatized(selected_refs, level, std::vector(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + bool whatever = datacopy_privatized(selected_refs, level, std::vector(), allow_extra_read, + fastest_changing_dimension, padding_stride, padding_alignment, memory_type); return whatever; } @@ -84,9 +86,10 @@ bool Loop::datacopy(const std::vector > > &array // parameter array_name=A means to copy data touched by A[i-1] and A[i] // bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, - bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, + int memory_type) { - fflush(stdout); + fflush(stdout); //fprintf(stderr, "Loop::datacopy2()\n"); //fprintf(stderr, "array name %s stmt num %d\n", array_name.c_str(), stmt_num); @@ -95,23 +98,23 @@ bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); if (level <= 0 || level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("invalid loop level " + to_string(level)); - + // collect array references by name std::vector lex = getLexicalOrder(stmt_num); - int dim = 2*level - 1; - std::set same_loop = getStatements(lex, dim-1); - + int dim = 2 * level - 1; + std::set same_loop = getStatements(lex, dim - 1); + std::vector > > selected_refs; for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { std::vector t; - std::vector refs = ir->FindArrayRef(stmt[*i].code); + std::vector refs = ir->FindArrayRef(stmt[*i].code); for (int j = 0; j < refs.size(); j++) if (refs[j]->name() == array_name) t.push_back(refs[j]); else delete refs[j]; if (t.size() != 0) - selected_refs.push_back(std::make_pair(*i, t)); + selected_refs.push_back(std::make_pair(*i, t)); } //fprintf(stderr, "selected refs:\n"); @@ -122,27 +125,30 @@ bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, if (selected_refs.size() == 0) throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); - + IR_ArrayRef *AR = selected_refs[0].second[0]; //IR_roseArrayRef *RAR = (IR_roseArrayRef *)AR; //fprintf(stderr, "before datacopy_privatized, "); //AR->Dump(); - + // do the copy //fprintf(stderr, "\nLoop::datacopy2 calling privatized\n"); - bool whatever = datacopy_privatized(selected_refs, level, std::vector(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + bool whatever = datacopy_privatized(selected_refs, level, std::vector(), allow_extra_read, + fastest_changing_dimension, padding_stride, padding_alignment, memory_type); //AR = selected_refs[0].second[0]; //fprintf(stderr, "after datacopy_privatized, "); //AR->Dump(); - + return whatever; } -bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector &privatized_levels, - bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { +bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, + const std::vector &privatized_levels, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, + int padding_alignment, int memory_type) { //fprintf(stderr, "Loop::datacopy_privatized()\n"); // check for sanity of parameters @@ -150,33 +156,37 @@ bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); if (level <= 0 || level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("invalid loop level " + to_string(level)); - + // collect array references by name std::vector lex = getLexicalOrder(stmt_num); - int dim = 2*level - 1; - std::set same_loop = getStatements(lex, dim-1); - + int dim = 2 * level - 1; + std::set same_loop = getStatements(lex, dim - 1); + std::vector > > selected_refs; for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { selected_refs.push_back(std::make_pair(*i, std::vector())); - - std::vector refs = ir->FindArrayRef(stmt[*i].code); + + std::vector refs = ir->FindArrayRef(stmt[*i].code); for (int j = 0; j < refs.size(); j++) if (refs[j]->name() == array_name) - selected_refs[selected_refs.size()-1].second.push_back(refs[j]); + selected_refs[selected_refs.size() - 1].second.push_back(refs[j]); else delete refs[j]; } if (selected_refs.size() == 0) throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); - + // do the copy - bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, + fastest_changing_dimension, padding_stride, padding_alignment, memory_type); return whatever; } -bool Loop::datacopy_privatized(const std::vector > > &array_ref_nums, int level, const std::vector &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { +bool Loop::datacopy_privatized(const std::vector > > &array_ref_nums, int level, + const std::vector &privatized_levels, bool allow_extra_read, + int fastest_changing_dimension, int padding_stride, int padding_alignment, + int memory_type) { //fprintf(stderr, "Loop::datacopy_privatized2()\n"); // check for sanity of parameters @@ -189,18 +199,17 @@ bool Loop::datacopy_privatized(const std::vector throw std::invalid_argument("invalid loop level " + to_string(level)); if (i == 0) { std::vector lex = getLexicalOrder(stmt_num); - same_loop = getStatements(lex, 2*level-2); - } - else if (same_loop.find(stmt_num) == same_loop.end()) + same_loop = getStatements(lex, 2 * level - 2); + } else if (same_loop.find(stmt_num) == same_loop.end()) throw std::invalid_argument("array references for data copy must be located in the same subloop"); } - + // convert array reference numbering scheme to actual array references std::vector > > selected_refs; for (int i = 0; i < array_ref_nums.size(); i++) { if (array_ref_nums[i].second.size() == 0) continue; - + int stmt_num = array_ref_nums[i].first; selected_refs.push_back(std::make_pair(stmt_num, std::vector())); std::vector refs = ir->FindArrayRef(stmt[stmt_num].code); @@ -210,9 +219,10 @@ bool Loop::datacopy_privatized(const std::vector if (ref_num < 0 || ref_num >= refs.size()) { for (int k = 0; k < refs.size(); k++) delete refs[k]; - throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + throw std::invalid_argument( + "invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); } - selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected_refs[selected_refs.size() - 1].second.push_back(refs[ref_num]); selected[ref_num] = true; } for (int j = 0; j < refs.size(); j++) @@ -221,10 +231,11 @@ bool Loop::datacopy_privatized(const std::vector } if (selected_refs.size() == 0) throw std::invalid_argument("found no array references to copy"); - + // do the copy - bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); - return whatever; + bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, + fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + return whatever; } @@ -232,13 +243,13 @@ bool Loop::datacopy_privatized(const std::vector // Implement low level datacopy function with lots of options. // -bool Loop::datacopy_privatized(const std::vector > > &stmt_refs, +bool Loop::datacopy_privatized(const std::vector > > &stmt_refs, int level, const std::vector &privatized_levels, - bool allow_extra_read, + bool allow_extra_read, int fastest_changing_dimension, - int padding_stride, - int padding_alignment, + int padding_stride, + int padding_alignment, int memory_type) { //fprintf(stderr, "\nLoop::datacopy_privatized3() *****\n"); @@ -247,7 +258,7 @@ bool Loop::datacopy_privatized(const std::vector lex; @@ -258,8 +269,7 @@ bool Loop::datacopy_privatized(const std::vector= stmt.size()) throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); if (privatized_levels.size() != 0) { - if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) - throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); - } - else { + if (privatized_levels[privatized_levels.size() - 1] > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument( + "invalid loop level " + to_string(privatized_levels[privatized_levels.size() - 1]) + " for statement " + + to_string(stmt_num)); + } else { if (level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); } @@ -279,8 +290,7 @@ bool Loop::datacopy_privatized(const std::vectorsymbol(); lex = getLexicalOrder(stmt_num); - } - else { + } else { IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); if (t->name() != sym->name()) { delete t; @@ -293,8 +303,10 @@ bool Loop::datacopy_privatized(const std::vector= -1 && fastest_changing_dimension < sym->n_dim())) @@ -303,31 +315,31 @@ bool Loop::datacopy_privatized(const std::vectorn_dim(); - + if (fastest_changing_dimension == -1) switch (sym->layout_type()) { - case IR_ARRAY_LAYOUT_ROW_MAJOR: - fastest_changing_dimension = n_dim - 1; - break; - case IR_ARRAY_LAYOUT_COLUMN_MAJOR: - fastest_changing_dimension = 0; - break; - default: - throw loop_error("unsupported array layout"); + case IR_ARRAY_LAYOUT_ROW_MAJOR: + fastest_changing_dimension = n_dim - 1; + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + fastest_changing_dimension = 0; + break; + default: + throw loop_error("unsupported array layout"); } // OK, parameter sanity checked - + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + // build iteration spaces for all reads and for all writes separately //fprintf(stderr, "dp3: before apply_xform() ARRAY REFS\n"); //for (int i = 0; i < stmt_refs.size(); i++) { @@ -360,29 +372,30 @@ bool Loop::datacopy_privatized(const std::vectorname()); mapping.setup_names(); - mapping.print(); fflush(stdout); // "{[I] -> [_t1] : I = _t1 } + mapping.print(); + fflush(stdout); // "{[I] -> [_t1] : I = _t1 } F_And *f_root = mapping.add_and(); - for (int k = 1; k <= level-1; k++) { + for (int k = 1; k <= level - 1; k++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.input_var(k), 1); h.update_coef(mapping.output_var(k), -1); @@ -390,7 +403,7 @@ bool Loop::datacopy_privatized(const std::vectoradd_EQ(); h.update_coef(mapping.input_var(privatized_levels[k]), 1); - h.update_coef(mapping.output_var(level+k), -1); + h.update_coef(mapping.output_var(level + k), -1); } for (int k = 0; k < n_dim; k++) { IR_ArrayRef *AR = stmt_refs[i].second[j]; @@ -400,37 +413,39 @@ bool Loop::datacopy_privatized(const std::vectorindex(k); //fprintf(stderr, "k %d j %d repr ", k, j); repr->dump(); fflush(stdout); - exp2formula(ir, - mapping, - f_root, - freevar, - repr, - mapping.output_var(level-1+privatized_levels.size()+k+1), - 'w', - IR_COND_EQ, + exp2formula(ir, + mapping, + f_root, + freevar, + repr, + mapping.output_var(level - 1 + privatized_levels.size() + k + 1), + 'w', + IR_COND_EQ, false, uninterpreted_symbols[stmt_num], uninterpreted_symbols_stringrepr[stmt_num]); repr->clear(); delete repr; } - Relation r = omega::Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); + Relation r = omega::Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), + Extend_Set(copy(this->known), + stmt[stmt_num].IS.n_set() - + this->known.n_set())))); if (stmt_refs[i].second[j]->is_write()) { has_write_refs = true; wo_copy_is = Union(wo_copy_is, r); wo_copy_is.simplify(2, 4); - - - } - else { + + + } else { has_read_refs = true; ro_copy_is = Union(ro_copy_is, r); ro_copy_is.simplify(2, 4); - + } } } - + //fprintf(stderr, "dp3: simplify\n"); // simplify read and write footprint iteration space { @@ -438,7 +453,7 @@ bool Loop::datacopy_privatized(const std::vector 1) { Relation t = SimpleHull(wo_copy_is, true, true); @@ -448,7 +463,7 @@ bool Loop::datacopy_privatized(const std::vectorname(); - wo_copy_is.name_set_var(level+i, s); - ro_copy_is.name_set_var(level+i, s); + wo_copy_is.name_set_var(level + i, s); + ro_copy_is.name_set_var(level + i, s); } - for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) { - std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size()); + for (int i = level + privatized_levels.size(); i < level + privatized_levels.size() + n_dim; i++) { + std::string s = + tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter + i - level - privatized_levels.size()); wo_copy_is.name_set_var(i, s); ro_copy_is.name_set_var(i, s); } @@ -475,11 +491,11 @@ bool Loop::datacopy_privatized(const std::vectorbuilder(); std::vector index_lb(n_dim); // initialized to NULL @@ -487,31 +503,35 @@ bool Loop::datacopy_privatized(const std::vector is_index_eq(n_dim, false); std::vector > index_sz(0); Relation reduced_copy_is = copy(copy_is); - + for (int i = 0; i < n_dim; i++) { //fprintf(stderr, "i %d/%d\n", i, n_dim); if (i != 0) - reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); - Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); - + reduced_copy_is = Project(reduced_copy_is, level - 1 + privatized_levels.size() + i, Set_Var); + Relation bound = get_loop_bound(reduced_copy_is, level - 1 + privatized_levels.size() + i); + //fprintf(stderr, "dp3: extract stride\n"); // extract stride - std::pair result = find_simplest_stride(bound, bound.set_var(level-1+privatized_levels.size()+i+1)); + std::pair result = find_simplest_stride(bound, bound.set_var( + level - 1 + privatized_levels.size() + i + 1)); if (result.second != NULL) - index_stride[i] = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)))); + index_stride[i] = abs(result.first.get_coef(result.second)) / gcd(abs(result.first.get_coef(result.second)), + abs(result.first.get_coef(bound.set_var( + level - 1 + privatized_levels.size() + i + + 1)))); else index_stride[i] = 1; //fprintf(stderr, "dp3: index_stride[%d] = %d\n", i, index_stride[i]); - + // check if this array index requires loop Conjunct *c = bound.query_DNF()->single_conjunct(); for (EQ_Iterator ei(c->EQs()); ei; ei++) { //fprintf(stderr, "dp3: for\n"); if ((*ei).has_wildcards()) continue; - + //fprintf(stderr, "dp3: no wildcards\n"); - int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + int coef = (*ei).get_coef(bound.set_var(level - 1 + privatized_levels.size() + i + 1)); if (coef != 0) { //fprintf(stderr, "coef != 0\n"); int sign = 1; @@ -520,51 +540,53 @@ bool Loop::datacopy_privatized(const std::vectorkind()) { - case Input_Var: - { - //fprintf(stderr, "dp3: Input_Var\n"); - if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) { - //fprintf(stderr, "dp3: IF sign %d\n",(*ci).coef*sign); - - if ((*ci).coef*sign == 1) - op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); - else if ((*ci).coef*sign == -1) - op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); - else if ((*ci).coef*sign > 1) - op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + case Input_Var: { + //fprintf(stderr, "dp3: Input_Var\n"); + if ((*ci).var != bound.set_var(level - 1 + privatized_levels.size() + i + 1)) { + //fprintf(stderr, "dp3: IF sign %d\n",(*ci).coef*sign); + + if ((*ci).coef * sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef * sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef * sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), + ocg->CreateIdent((*ci).var->name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), + ocg->CreateIdent((*ci).var->name()))); + } + break; + } + case Global_Var: { + //fprintf(stderr, "dp3: Global_Var\n"); + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef * sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef * sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef * sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), + ocg->CreateIdent(g->base_name()))); else // (*ci).coef*sign < -1 - op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), + ocg->CreateIdent(g->base_name()))); + break; } - break; - } - case Global_Var: - { - //fprintf(stderr, "dp3: Global_Var\n"); - Global_Var_ID g = (*ci).var->get_global_var(); - if ((*ci).coef*sign == 1) - op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef*sign == -1) - op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef*sign > 1) - op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); - else // (*ci).coef*sign < -1 - op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); - break; - } - default: - throw loop_error("unsupported array index expression"); + default: + throw loop_error("unsupported array index expression"); } } if ((*ei).get_const() != 0) - op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); + op = ocg->CreatePlus(op, ocg->CreateInt(-sign * ((*ei).get_const()))); if (coef != 1) op = ocg->CreateIntegerFloor(op, ocg->CreateInt(coef)); - + index_lb[i] = op; is_index_eq[i] = true; break; @@ -572,14 +594,14 @@ bool Loop::datacopy_privatized(const std::vector lb_list, ub_list; std::set excluded_floor_vars; - excluded_floor_vars.insert(bound.set_var(level-1+privatized_levels.size()+i+1)); + excluded_floor_vars.insert(bound.set_var(level - 1 + privatized_levels.size() + i + 1)); for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { - int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + int coef = (*gi).get_coef(bound.set_var(level - 1 + privatized_levels.size() + i + 1)); if (coef != 0 && (*gi).has_wildcards()) { bool clean_bound = true; GEQ_Handle h; @@ -591,7 +613,7 @@ bool Loop::datacopy_privatized(const std::vector 0) lb_list.push_back(*gi); else if (coef < 0) @@ -599,41 +621,45 @@ bool Loop::datacopy_privatized(const std::vector lb_repr_list; - for (int j = 0; j < lb_list.size(); j++){ - if(this->known.n_set() == 0) { - lb_repr_list.push_back(output_lower_bound_repr(ocg, - lb_list[j], - bound.set_var(level-1+privatized_levels.size()+i+1), - result.first, - result.second, - bound, - Relation::True(bound.n_set()), - std::vector >(bound.n_set(), std::make_pair(static_cast(NULL), 0)), + for (int j = 0; j < lb_list.size(); j++) { + if (this->known.n_set() == 0) { + lb_repr_list.push_back(output_lower_bound_repr(ocg, + lb_list[j], + bound.set_var(level - 1 + privatized_levels.size() + i + 1), + result.first, + result.second, + bound, + Relation::True(bound.n_set()), + std::vector >(bound.n_set(), + std::make_pair( + static_cast(NULL), + 0)), uninterpreted_symbols[stmt_num])); - } - else { - lb_repr_list.push_back(output_lower_bound_repr(ocg, - lb_list[j], - bound.set_var(level-1+privatized_levels.size()+i+1), - result.first, - result.second, - bound, - this->known, - std::vector >(bound.n_set(), std::make_pair(static_cast(NULL), 0)), + } else { + lb_repr_list.push_back(output_lower_bound_repr(ocg, + lb_list[j], + bound.set_var(level - 1 + privatized_levels.size() + i + 1), + result.first, + result.second, + bound, + this->known, + std::vector >(bound.n_set(), + std::make_pair( + static_cast(NULL), + 0)), uninterpreted_symbols[stmt_num])); } } - if (lb_repr_list.size() > 1) { + if (lb_repr_list.size() > 1) { //fprintf(stderr, "loop_datacopy.cc dp3 createInvoke( max )\n"); index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); - } - else if (lb_repr_list.size() == 1) + } else if (lb_repr_list.size() == 1) index_lb[i] = lb_repr_list[0]; - + //fprintf(stderr, "dp3: build temporary array size representation\n"); // build temporary array size representation { @@ -642,66 +668,62 @@ bool Loop::datacopy_privatized(const std::vectoradd_GEQ(); - + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: - { - int pos = (*ci).var->get_position(); - h.update_coef(cal.input_var(pos), (*ci).coef); - break; - } - case Global_Var: - { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = cal.get_local(g); - else - v = cal.get_local(g, (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot calculate temporay array size statically"); + case Input_Var: { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); } } h.update_const(ub_list[j].get_const()); - + for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: - { - int pos = (*ci).var->get_position(); - h.update_coef(cal.input_var(pos), (*ci).coef); - break; - } - case Global_Var: - { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = cal.get_local(g); - else - v = cal.get_local(g, (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot calculate temporay array size statically"); + case Input_Var: { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); } } h.update_const(lb_list[k].get_const()); - + h.update_const(1); h.update_coef(cal.output_var(1), -1); } - + cal = Restrict_Domain(cal, copy(copy_is)); for (int j = 1; j <= cal.n_inp(); j++) cal = Project(cal, j, Input_Var); cal.simplify(); - + //fprintf(stderr, "dp3: pad temporary array size\n"); // pad temporary array size // TODO: for variable array size, create padding formula @@ -719,9 +741,8 @@ bool Loop::datacopy_privatized(const std::vector 1) { // align to boundary for data packing int residue = size % padding_alignment; if (residue) - size = size+padding_alignment-residue; - } - else if (padding_alignment < -1) { // un-alignment for memory bank conflicts + size = size + padding_alignment - residue; + } else if (padding_alignment < -1) { // un-alignment for memory bank conflicts while (gcd(size, static_cast(-padding_alignment)) != 1) size++; } @@ -729,7 +750,7 @@ bool Loop::datacopy_privatized(const std::vectorCreateInt(size))); is_index_bound_const = true; } - + if (!is_index_bound_const) { for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { int coef = (*gi).get_coef(cal.output_var(1)); @@ -737,22 +758,23 @@ bool Loop::datacopy_privatized(const std::vectorkind()) { - case Global_Var: - { - Global_Var_ID g = (*ci).var->get_global_var(); - if ((*ci).coef == 1) - op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef == -1) - op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef > 1) - op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); - else // (*ci).coef < -1 - op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); - break; - } - default: - throw loop_error("failed to generate array index bound code"); + switch ((*ci).var->kind()) { + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef == 1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef == -1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef > 1) + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), + ocg->CreateIdent(g->base_name()))); + else // (*ci).coef < -1 + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), + ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("failed to generate array index bound code"); } } } @@ -766,16 +788,16 @@ bool Loop::datacopy_privatized(const std::vector(padding_stride)); coef_t t1 = index_stride[i] / g; if (t1 != 1) - op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1 - 1)), ocg->CreateInt(t1)); coef_t t2 = padding_stride / g; if (t2 != 1) op = ocg->CreateTimes(op, ocg->CreateInt(t2)); - } - else if (index_stride[i] != 1) { - op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); + } else if (index_stride[i] != 1) { + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i] - 1)), + ocg->CreateInt(index_stride[i])); } } - + index_sz.push_back(std::make_pair(i, op)); break; } @@ -783,20 +805,20 @@ bool Loop::datacopy_privatized(const std::vectorlayout_type()) { - case IR_ARRAY_LAYOUT_ROW_MAJOR: - std::swap(index_sz[index_sz.size()-1], index_sz[i]); - break; - case IR_ARRAY_LAYOUT_COLUMN_MAJOR: - std::swap(index_sz[0], index_sz[i]); - break; - default: - throw loop_error("unsupported array layout"); + case IR_ARRAY_LAYOUT_ROW_MAJOR: + std::swap(index_sz[index_sz.size() - 1], index_sz[i]); + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + std::swap(index_sz[0], index_sz[i]); + break; + default: + throw loop_error("unsupported array layout"); } } @@ -806,51 +828,53 @@ bool Loop::datacopy_privatized(const std::vectorCreateScalarSymbol(sym, memory_type); - } - else { + } else { //fprintf(stderr, "tmp_sym is an array\n"); std::vector tmp_array_size(index_sz.size()); - for (int i = 0; i < index_sz.size(); i++) { + for (int i = 0; i < index_sz.size(); i++) { tmp_array_size[i] = index_sz[i].second->clone(); index_sz[i].second->dump(); // THIS PRINTF } tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); } - + //fprintf(stderr, "dp3: create temporary array read initialization code\n"); // create temporary array read initialization code CG_outputRepr *copy_code_read; - if (has_read_refs) { + if (has_read_refs) { //fprintf(stderr, "has read refs\n"); if (index_sz.size() == 0) { - //fprintf(stderr, "if\n"); - + //fprintf(stderr, "if\n"); + //fprintf(stderr, "tmp sym %s\n", tmp_sym->name().c_str()); - IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast(tmp_sym)); // create ref from symbol + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef( + static_cast(tmp_sym)); // create ref from symbol // tmp_scalar_ref is incomplete std::vector rhs_index(n_dim); - for (int i = 0; i < index_lb.size(); i++) { + for (int i = 0; i < index_lb.size(); i++) { //fprintf(stderr, "i %d\n", i); if (is_index_eq[i]) rhs_index[i] = index_lb[i]->clone(); else - rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + rhs_index[i] = ir->builder()->CreateIdent( + copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name()); } IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); - + // IR_ScalarRef tmp_scalar_ref has no actual reference yet. It only has the variable definition. copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); //fprintf(stderr, "if ends\n"); - } - else { + } else { //fprintf(stderr, "else\n"); std::vector lhs_index(index_sz.size()); for (int i = 0; i < index_sz.size(); i++) { int cur_index_num = index_sz[i].first; - CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + CG_outputRepr *cur_index_repr = ocg->CreateMinus( + ocg->CreateIdent(copy_is.set_var(level - 1 + privatized_levels.size() + cur_index_num + 1)->name()), + index_lb[cur_index_num]->clone()); if (padding_stride != 0) { - if (i == n_dim-1) { + if (i == n_dim - 1) { coef_t g = gcd(index_stride[cur_index_num], static_cast(padding_stride)); coef_t t1 = index_stride[cur_index_num] / g; if (t1 != 1) @@ -858,74 +882,78 @@ bool Loop::datacopy_privatized(const std::vectorCreateTimes(cur_index_repr, ocg->CreateInt(t2)); - } - else if (index_stride[cur_index_num] != 1) { + } else if (index_stride[cur_index_num] != 1) { cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); } } - + if (ir->ArrayIndexStartAt() != 0) cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); lhs_index[i] = cur_index_repr; } - + //fprintf(stderr, "dp3: making tmp_array_ref\n"); IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast(tmp_sym), lhs_index); //fprintf(stderr, "dp3: DONE making tmp_array_ref\n"); - + std::vector rhs_index(n_dim); for (int i = 0; i < index_lb.size(); i++) if (is_index_eq[i]) rhs_index[i] = index_lb[i]->clone(); else - rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + rhs_index[i] = ir->builder()->CreateIdent( + copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name()); IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); - + //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment\n"); //copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); CG_outputRepr *lhs = tmp_array_ref->convert(); CG_outputRepr *rhs = copied_array_ref->convert(); - copy_code_read = ir->builder()->CreateAssignment(0, lhs, rhs); //tmp_array_ref->convert(), copied_array_ref->convert()); + copy_code_read = ir->builder()->CreateAssignment(0, lhs, + rhs); //tmp_array_ref->convert(), copied_array_ref->convert()); //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment DONE\n\n"); } } // has read refs - + //fprintf(stderr, "dp3: create temporary array write back code\n"); // create temporary array write back code CG_outputRepr *copy_code_write; - if (has_write_refs) { + if (has_write_refs) { //fprintf(stderr, "has_write_refs\n"); if (index_sz.size() == 0) { //fprintf(stderr, "index_sz.size() == 0\n"); IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast(tmp_sym)); - + std::vector rhs_index(n_dim); for (int i = 0; i < index_lb.size(); i++) if (is_index_eq[i]) rhs_index[i] = index_lb[i]->clone(); else - rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + rhs_index[i] = ir->builder()->CreateIdent( + copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name()); IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); - + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); - } - else { + } else { //fprintf(stderr, "index_sz.size() NOT = 0\n"); - + std::vector lhs_index(n_dim); for (int i = 0; i < index_lb.size(); i++) if (is_index_eq[i]) lhs_index[i] = index_lb[i]->clone(); else - lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + lhs_index[i] = ir->builder()->CreateIdent( + copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name()); IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); - + std::vector rhs_index(index_sz.size()); for (int i = 0; i < index_sz.size(); i++) { int cur_index_num = index_sz[i].first; - CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + CG_outputRepr *cur_index_repr = ocg->CreateMinus( + ocg->CreateIdent(copy_is.set_var(level - 1 + privatized_levels.size() + cur_index_num + 1)->name()), + index_lb[cur_index_num]->clone()); if (padding_stride != 0) { - if (i == n_dim-1) { + if (i == n_dim - 1) { coef_t g = gcd(index_stride[cur_index_num], static_cast(padding_stride)); coef_t t1 = index_stride[cur_index_num] / g; if (t1 != 1) @@ -933,96 +961,98 @@ bool Loop::datacopy_privatized(const std::vectorCreateTimes(cur_index_repr, ocg->CreateInt(t2)); - } - else if (index_stride[cur_index_num] != 1) { + } else if (index_stride[cur_index_num] != 1) { cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); } } - + if (ir->ArrayIndexStartAt() != 0) cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); rhs_index[i] = cur_index_repr; } IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast(tmp_sym), rhs_index); - + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); } } // has write refs - + // now we can remove those loops for array indexes that are // dependent on others //fprintf(stderr, "dp3: now we can remove those loops\n"); if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { - Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); + Relation mapping(level - 1 + privatized_levels.size() + n_dim, + level - 1 + privatized_levels.size() + index_sz.size()); F_And *f_root = mapping.add_and(); - for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + for (int i = 1; i <= level - 1 + privatized_levels.size(); i++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.input_var(i), 1); h.update_coef(mapping.output_var(i), -1); } - + int cur_index = 0; std::vector mapped_index(index_sz.size()); for (int i = 0; i < n_dim; i++) if (!is_index_eq[i]) { EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); + h.update_coef(mapping.input_var(level - 1 + privatized_levels.size() + i + 1), 1); switch (sym->layout_type()) { - case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { - h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); - mapped_index[index_sz.size()-cur_index-1] = i; - break; - } - case IR_ARRAY_LAYOUT_ROW_MAJOR: { - h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); - mapped_index[cur_index] = i; - break; - } - default: - throw loop_error("unsupported array layout"); + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { + h.update_coef(mapping.output_var(level - 1 + privatized_levels.size() + index_sz.size() - cur_index), -1); + mapped_index[index_sz.size() - cur_index - 1] = i; + break; + } + case IR_ARRAY_LAYOUT_ROW_MAJOR: { + h.update_coef(mapping.output_var(level - 1 + privatized_levels.size() + cur_index + 1), -1); + mapped_index[cur_index] = i; + break; + } + default: + throw loop_error("unsupported array layout"); } cur_index++; } - + wo_copy_is = omega::Range(Restrict_Domain(copy(mapping), wo_copy_is)); ro_copy_is = omega::Range(Restrict_Domain(copy(mapping), ro_copy_is)); - for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + for (int i = 1; i <= level - 1 + privatized_levels.size(); i++) { wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); } for (int i = 0; i < index_sz.size(); i++) { - wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); - ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + wo_copy_is.name_set_var(level - 1 + privatized_levels.size() + i + 1, + copy_is.set_var(level - 1 + privatized_levels.size() + mapped_index[i] + 1)->name()); + ro_copy_is.name_set_var(level - 1 + privatized_levels.size() + i + 1, + copy_is.set_var(level - 1 + privatized_levels.size() + mapped_index[i] + 1)->name()); } wo_copy_is.setup_names(); ro_copy_is.setup_names(); } - + // insert read copy statement //fprintf(stderr, "dp3: insert read copy statement\n"); - + int old_num_stmt = stmt.size(); int ro_copy_stmt_num = -1; if (has_read_refs) { - Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); + Relation copy_xform(ro_copy_is.n_set(), 2 * ro_copy_is.n_set() + 1); { F_And *f_root = copy_xform.add_and(); for (int i = 1; i <= ro_copy_is.n_set(); i++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(copy_xform.input_var(i), 1); - h.update_coef(copy_xform.output_var(2*i), -1); + h.update_coef(copy_xform.output_var(2 * i), -1); } - for (int i = 1; i <= dim; i+=2) { + for (int i = 1; i <= dim; i += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(copy_xform.output_var(i), -1); - h.update_const(lex[i-1]); + h.update_const(lex[i - 1]); } - for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + for (int i = dim + 2; i <= copy_xform.n_out(); i += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(copy_xform.output_var(i), 1); } } - + Statement copy_stmt_read; copy_stmt_read.IS = ro_copy_is; copy_stmt_read.xform = copy_xform; @@ -1031,7 +1061,7 @@ bool Loop::datacopy_privatized(const std::vector(ro_copy_is.n_set()); copy_stmt_read.ir_stmt_node = NULL; copy_stmt_read.has_inspector = false; - for (int i = 0; i < level-1; i++) { + for (int i = 0; i < level - 1; i++) { copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && stmt[*(active.begin())].loop_level[i].payload >= level) { @@ -1043,32 +1073,33 @@ bool Loop::datacopy_privatized(const std::vector(index_sz.size())); i++) { - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelOriginal; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].payload = num_dep_dim - left_num_dim + i; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0; } for (int i = std::min(left_num_dim, static_cast(index_sz.size())); i < index_sz.size(); i++) { - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelUnknown; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].payload = -1; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0; } - - - shiftLexicalOrder(lex, dim-1, 1); - fprintf(stderr, "loop_datacopy.cc L1071 adding stmt %d\n", stmt.size()); + + shiftLexicalOrder(lex, dim - 1, 1); + + fprintf(stderr, "loop_datacopy.cc L1071 adding stmt %d\n", stmt.size()); stmt.push_back(copy_stmt_read); uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]); @@ -1076,30 +1107,30 @@ bool Loop::datacopy_privatized(const std::vectoradd_EQ(); h.update_coef(copy_xform.input_var(i), 1); - h.update_coef(copy_xform.output_var(2*i), -1); + h.update_coef(copy_xform.output_var(2 * i), -1); } - for (int i = 1; i <= dim; i+=2) { + for (int i = 1; i <= dim; i += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(copy_xform.output_var(i), -1); - h.update_const(lex[i-1]); + h.update_const(lex[i - 1]); } - for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + for (int i = dim + 2; i <= copy_xform.n_out(); i += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(copy_xform.output_var(i), 1); } } - + Statement copy_stmt_write; copy_stmt_write.IS = wo_copy_is; copy_stmt_write.xform = copy_xform; @@ -1107,8 +1138,8 @@ bool Loop::datacopy_privatized(const std::vector(wo_copy_is.n_set()); copy_stmt_write.ir_stmt_node = NULL; copy_stmt_write.has_inspector = false; - - for (int i = 0; i < level-1; i++) { + + for (int i = 0; i < level - 1; i++) { copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && stmt[*(active.begin())].loop_level[i].payload >= level) { @@ -1120,31 +1151,32 @@ bool Loop::datacopy_privatized(const std::vector(index_sz.size())); i++) { - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelOriginal; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].payload = num_dep_dim - left_num_dim + i; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0; } for (int i = std::min(left_num_dim, static_cast(index_sz.size())); i < index_sz.size(); i++) { - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelUnknown; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].payload = -1; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0; } - lex[dim-1]++; - shiftLexicalOrder(lex, dim-1, -2); + lex[dim - 1]++; + shiftLexicalOrder(lex, dim - 1, -2); - fprintf(stderr, "loop_datacopy.cc L1147 adding stmt %d\n", stmt.size()); + fprintf(stderr, "loop_datacopy.cc L1147 adding stmt %d\n", stmt.size()); stmt.push_back(copy_stmt_write); uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]); @@ -1152,24 +1184,24 @@ bool Loop::datacopy_privatized(const std::vectorCreateScalarRef(static_cast(tmp_sym)); //fprintf(stderr, "dp3: loop_datacopy.cc calling ReplaceExpression i%d j%d\n", i, j); ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); - } - else { + } else { std::vector index_repr(index_sz.size()); for (int k = 0; k < index_sz.size(); k++) { int cur_index_num = index_sz[k].first; - - CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); + + CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), + index_lb[cur_index_num]->clone()); if (padding_stride != 0) { - if (k == n_dim-1) { + if (k == n_dim - 1) { coef_t g = gcd(index_stride[cur_index_num], static_cast(padding_stride)); coef_t t1 = index_stride[cur_index_num] / g; if (t1 != 1) @@ -1177,23 +1209,22 @@ bool Loop::datacopy_privatized(const std::vectorCreateTimes(cur_index_repr, ocg->CreateInt(t2)); - } - else if (index_stride[cur_index_num] != 1) { + } else if (index_stride[cur_index_num] != 1) { cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); } } - + if (ir->ArrayIndexStartAt() != 0) cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); index_repr[k] = cur_index_repr; } - + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast(tmp_sym), index_repr); //fprintf(stderr, "loop_datacopy.cc ir->ReplaceExpression( ... )\n"); ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); } } - + // update dependence graph //fprintf(stderr, "update dependence graph\n"); @@ -1201,7 +1232,7 @@ bool Loop::datacopy_privatized(const std::vector > D; - + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { if (active.find(i) != active.end() && active.find(j->first) == active.end()) { std::vector dvs1, dvs2; @@ -1215,8 +1246,7 @@ bool Loop::datacopy_privatized(const std::vectorsecond = dvs2; if (dvs1.size() > 0) dep.connect(ro_copy_stmt_num, j->first, dvs1); - } - else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + } else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { std::vector dvs1, dvs2; for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; @@ -1229,17 +1259,17 @@ bool Loop::datacopy_privatized(const std::vector 0) D.push_back(dvs1); } - + if (j->second.size() == 0) dep.vertex[i].second.erase(j++); else j++; } - + for (int j = 0; j < D.size(); j++) dep.connect(i, ro_copy_stmt_num, D[j]); } - + // insert dependences from copy statement loop to copied statements //fprintf(stderr, "insert dependences from copy statement loop to copied statements\n"); @@ -1255,11 +1285,11 @@ bool Loop::datacopy_privatized(const std::vector::iterator i = active.begin(); i != active.end(); i++) dep.connect(ro_copy_stmt_num, *i, dv); } - + if (wo_copy_stmt_num != -1) { for (int i = 0; i < old_num_stmt; i++) { std::vector > D; - + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { if (active.find(i) != active.end() && active.find(j->first) == active.end()) { std::vector dvs1, dvs2; @@ -1273,8 +1303,7 @@ bool Loop::datacopy_privatized(const std::vectorsecond = dvs2; if (dvs1.size() > 0) dep.connect(wo_copy_stmt_num, j->first, dvs1); - } - else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + } else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { std::vector dvs1, dvs2; for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; @@ -1287,17 +1316,17 @@ bool Loop::datacopy_privatized(const std::vector 0) D.push_back(dvs1); } - + if (j->second.size() == 0) dep.vertex[i].second.erase(j++); else j++; } - + for (int j = 0; j < D.size(); j++) dep.connect(i, wo_copy_stmt_num, D[j]); } - + // insert dependences from copied statements to write statements //fprintf(stderr, "dp3: insert dependences from copied statements to write statements\n"); @@ -1312,9 +1341,9 @@ bool Loop::datacopy_privatized(const std::vector::iterator i = active.begin(); i != active.end(); i++) dep.connect(*i, wo_copy_stmt_num, dv); - + } - + // update variable name for dependences among copied statements for (int i = 0; i < old_num_stmt; i++) { if (active.find(i) != active.end()) @@ -1325,7 +1354,7 @@ bool Loop::datacopy_privatized(const std::vectorsecond[k].sym = s; } } - + // insert anti-dependence from write statement to read statement if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) if (dep_dim >= 0) { @@ -1340,15 +1369,15 @@ bool Loop::datacopy_privatized(const std::vectorclear(); delete index_sz[i].second; } - + return true; } diff --git a/src/transformations/loop_extra.cc b/src/transformations/loop_extra.cc index dac05bf..ee54815 100644 --- a/src/transformations/loop_extra.cc +++ b/src/transformations/loop_extra.cc @@ -25,43 +25,44 @@ void Loop::shift_to(int stmt_num, int level, int absolute_position) { // combo tile(stmt_num, level, 1, level, CountedTile); std::vector lex = getLexicalOrder(stmt_num); - std::set active = getStatements(lex, 2*level-2); + std::set active = getStatements(lex, 2 * level - 2); shift(active, level, absolute_position); - + // remove unnecessary tiled loop since tile size is one for (std::set::iterator i = active.begin(); i != active.end(); i++) { int n = stmt[*i].xform.n_out(); - Relation mapping(n, n-2); + Relation mapping(n, n - 2); F_And *f_root = mapping.add_and(); - for (int j = 1; j <= 2*level; j++) { + for (int j = 1; j <= 2 * level; j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.output_var(j), 1); h.update_coef(mapping.input_var(j), -1); } - for (int j = 2*level+3; j <= n; j++) { + for (int j = 2 * level + 3; j <= n; j++) { EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.output_var(j-2), 1); + h.update_coef(mapping.output_var(j - 2), 1); h.update_coef(mapping.input_var(j), -1); } stmt[*i].xform = Composition(mapping, stmt[*i].xform); stmt[*i].xform.simplify(); - + for (int j = 0; j < stmt[*i].loop_level.size(); j++) - if (j != level-1 && + if (j != level - 1 && stmt[*i].loop_level[j].type == LoopLevelTile && stmt[*i].loop_level[j].payload >= level) stmt[*i].loop_level[j].payload--; - - stmt[*i].loop_level.erase(stmt[*i].loop_level.begin()+level-1); + + stmt[*i].loop_level.erase(stmt[*i].loop_level.begin() + level - 1); } } std::set Loop::unroll_extra(int stmt_num, int level, int unroll_amount, int cleanup_split_level) { - std::set cleanup_stmts = unroll(stmt_num, level, unroll_amount,std::vector< std::vector >(), cleanup_split_level); + std::set cleanup_stmts = unroll(stmt_num, level, unroll_amount, std::vector >(), + cleanup_split_level); for (std::set::iterator i = cleanup_stmts.begin(); i != cleanup_stmts.end(); i++) unroll(*i, level, 0); - + return cleanup_stmts; } @@ -71,10 +72,10 @@ void Loop::peel(int stmt_num, int level, int peel_amount) { throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); if (level <= 0 || level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("invalid loop level " + to_string(level)); - + if (peel_amount == 0) return; - + std::set subloop = getSubLoopNest(stmt_num, level); std::vector Rs; for (std::set::iterator i = subloop.begin(); i != subloop.end(); i++) { @@ -83,7 +84,7 @@ void Loop::peel(int stmt_num, int level, int peel_amount) { F_And *f_root = f.add_and(); for (int j = 1; j <= level; j++) { EQ_Handle h = f_root->add_EQ(); - h.update_coef(f.input_var(2*j), 1); + h.update_coef(f.input_var(2 * j), 1); h.update_coef(f.output_var(j), -1); } r = Composition(f, r); @@ -91,7 +92,7 @@ void Loop::peel(int stmt_num, int level, int peel_amount) { Rs.push_back(r); } Relation hull = SimpleHull(Rs); - + if (peel_amount > 0) { GEQ_Handle bound_eq; bool found_bound = false; @@ -120,7 +121,7 @@ void Loop::peel(int stmt_num, int level, int peel_amount) { } if (!found_bound) throw loop_error("can't find lower bound for peeling at loop level " + to_string(level)); - + for (int i = 1; i <= peel_amount; i++) { Relation r(level); F_Exists *f_exists = r.add_and()->add_exists(); @@ -129,34 +130,33 @@ void Loop::peel(int stmt_num, int level, int peel_amount) { std::map exists_mapping; for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++) switch (cvi.curr_var()->kind()) { - case Input_Var: - h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); - break; - case Wildcard_Var: { - Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); - h.update_coef(v, cvi.curr_coef()); - break; - } - case Global_Var: { - Global_Var_ID g = cvi.curr_var()->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = r.get_local(g); - else - v = r.get_local(g, cvi.curr_var()->function_of()); - h.update_coef(v, cvi.curr_coef()); - break; - } - default: - assert(false); + case Input_Var: + h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); + break; + case Wildcard_Var: { + Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); + h.update_coef(v, cvi.curr_coef()); + break; + } + case Global_Var: { + Global_Var_ID g = cvi.curr_var()->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = r.get_local(g); + else + v = r.get_local(g, cvi.curr_var()->function_of()); + h.update_coef(v, cvi.curr_coef()); + break; + } + default: + assert(false); } h.update_const(bound_eq.get_const() - i); r.simplify(); - + split(stmt_num, level, r); } - } - else { // peel_amount < 0 + } else { // peel_amount < 0 GEQ_Handle bound_eq; bool found_bound = false; for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) @@ -184,7 +184,7 @@ void Loop::peel(int stmt_num, int level, int peel_amount) { } if (!found_bound) throw loop_error("can't find upper bound for peeling at loop level " + to_string(level)); - + for (int i = 1; i <= -peel_amount; i++) { Relation r(level); F_Exists *f_exists = r.add_and()->add_exists(); @@ -193,30 +193,30 @@ void Loop::peel(int stmt_num, int level, int peel_amount) { std::map exists_mapping; for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++) switch (cvi.curr_var()->kind()) { - case Input_Var: - h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); - break; - case Wildcard_Var: { - Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); - h.update_coef(v, cvi.curr_coef()); - break; - } - case Global_Var: { - Global_Var_ID g = cvi.curr_var()->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = r.get_local(g); - else - v = r.get_local(g, cvi.curr_var()->function_of()); - h.update_coef(v, cvi.curr_coef()); - break; - } - default: - assert(false); + case Input_Var: + h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); + break; + case Wildcard_Var: { + Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); + h.update_coef(v, cvi.curr_coef()); + break; + } + case Global_Var: { + Global_Var_ID g = cvi.curr_var()->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = r.get_local(g); + else + v = r.get_local(g, cvi.curr_var()->function_of()); + h.update_coef(v, cvi.curr_coef()); + break; + } + default: + assert(false); } h.update_const(bound_eq.get_const() - i); r.simplify(); - + split(stmt_num, level, r); } } diff --git a/src/transformations/loop_tile.cc b/src/transformations/loop_tile.cc index 41c3e7f..0a1808b 100644 --- a/src/transformations/loop_tile.cc +++ b/src/transformations/loop_tile.cc @@ -14,8 +14,6 @@ using namespace omega; - - void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, TilingMethodType method, int alignment_offset, int alignment_multiple) { // check for sanity of parameters @@ -29,30 +27,30 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, throw std::invalid_argument("invalid loop level " + to_string(level)); if (level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument( - "there is no loop level " + to_string(level) + " for statement " - + to_string(stmt_num)); + "there is no loop level " + to_string(level) + " for statement " + + to_string(stmt_num)); if (outer_level <= 0 || outer_level > level) throw std::invalid_argument( - "invalid tile controlling loop level " - + to_string(outer_level)); - + "invalid tile controlling loop level " + + to_string(outer_level)); + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + int dim = 2 * level - 1; int outer_dim = 2 * outer_level - 1; std::vector lex = getLexicalOrder(stmt_num); std::set same_tiled_loop = getStatements(lex, dim - 1); std::set same_tile_controlling_loop = getStatements(lex, outer_dim - 1); - + for (std::set::iterator i = same_tiled_loop.begin(); i != same_tiled_loop.end(); i++) { for (DependenceGraph::EdgeList::iterator j = - dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) { if (same_tiled_loop.find(j->first) != same_tiled_loop.end()) for (int k = 0; k < j->second.size(); k++) { @@ -63,34 +61,34 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, dim2 = stmt[*i].loop_level[dim2].payload - 1; } dim2 = stmt[*i].loop_level[dim2].payload; - + if (dv.hasNegative(dim2) && (!dv.quasi)) { for (int l = outer_level; l < level; l++) if (stmt[*i].loop_level[l - 1].type != LoopLevelTile) { if (dv.isCarried( - stmt[*i].loop_level[l - 1].payload) + stmt[*i].loop_level[l - 1].payload) && dv.hasPositive( - stmt[*i].loop_level[l - 1].payload)) + stmt[*i].loop_level[l - 1].payload)) throw loop_error( - "loop error: Tiling is illegal, dependence violation!"); + "loop error: Tiling is illegal, dependence violation!"); } else { - + int dim3 = l - 1; while (stmt[*i].loop_level[l - 1].type != LoopLevelTile) { dim3 = - stmt[*i].loop_level[l - 1].payload - - 1; - + stmt[*i].loop_level[l - 1].payload + - 1; + } - + dim3 = stmt[*i].loop_level[l - 1].payload; if (dim3 < level - 1) if (dv.isCarried(dim3) && dv.hasPositive(dim3)) throw loop_error( - "loop error: Tiling is illegal, dependence violation!"); + "loop error: Tiling is illegal, dependence violation!"); } } } @@ -117,11 +115,11 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h.update_coef(r.input_var(j), 1); h.update_coef(r.output_var(j + 2), -1); } - + stmt[*i].xform = Composition(copy(r), stmt[*i].xform); } } - // normal tiling + // normal tiling else { std::set private_stmt; for (std::set::iterator i = same_tile_controlling_loop.begin(); @@ -131,12 +129,12 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, && overflow.find(*i) != overflow.end()) private_stmt.insert(*i); } - + // extract the union of the iteration space to be considered Relation hull; { std::vector r_list; - + for (std::set::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) if (private_stmt.find(*i) == private_stmt.end()) { @@ -150,28 +148,28 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, r.simplify(2, 4); r_list.push_back(r); } - + hull = SimpleHull(r_list); } - + // extract the bound of the dimension to be tiled Relation bound = get_loop_bound(hull, dim); if (!bound.has_single_conjunct()) { // further simplify the bound hull = Approximate(hull); bound = get_loop_bound(hull, dim); - + int i = outer_dim - 2; while (!bound.has_single_conjunct() && i >= 0) { hull = Project(hull, i + 1, Set_Var); bound = get_loop_bound(hull, dim); i -= 2; } - + if (!bound.has_single_conjunct()) throw loop_error("cannot handle tile bounds"); } - + // separate lower and upper bounds std::vector lb_list, ub_list; { @@ -186,11 +184,11 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, } if (lb_list.size() == 0) throw loop_error( - "unable to calculate tile controlling loop lower bound"); + "unable to calculate tile controlling loop lower bound"); if (ub_list.size() == 0) throw loop_error( - "unable to calculate tile controlling loop upper bound"); - + "unable to calculate tile controlling loop upper bound"); + // find the simplest lower bound for StridedTile or simplest iteration count for CountedTile int simplest_lb = 0, simplest_ub = 0; if (method == StridedTile) { @@ -199,20 +197,20 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, int cost = 0; for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - cost += 5; - break; - } - case Global_Var: { - cost += 2; - break; - } - default: - cost += 15; - break; + case Input_Var: { + cost += 5; + break; + } + case Global_Var: { + cost += 2; + break; + } + default: + cost += 15; + break; } } - + if (cost < best_cost) { best_cost = cost; simplest_lb = i; @@ -224,67 +222,67 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, for (int i = 0; i < lb_list.size(); i++) for (int j = 0; j < ub_list.size(); j++) { int cost = 0; - + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - s1[(*ci).var] += (*ci).coef; - break; - } - case Global_Var: { - s2[(*ci).var] += (*ci).coef; - break; - } - case Exists_Var: - case Wildcard_Var: { - s3[(*ci).var] += (*ci).coef; - break; - } - default: - cost = INT_MAX - 2; - break; + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + cost = INT_MAX - 2; + break; } } - + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - s1[(*ci).var] += (*ci).coef; - break; - } - case Global_Var: { - s2[(*ci).var] += (*ci).coef; - break; - } - case Exists_Var: - case Wildcard_Var: { - s3[(*ci).var] += (*ci).coef; - break; - } - default: - if (cost == INT_MAX - 2) - cost = INT_MAX - 1; - else - cost = INT_MAX - 3; - break; + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + if (cost == INT_MAX - 2) + cost = INT_MAX - 1; + else + cost = INT_MAX - 3; + break; } } - + if (cost == 0) { for (std::map::iterator k = - s1.begin(); k != s1.end(); k++) + s1.begin(); k != s1.end(); k++) if ((*k).second != 0) cost += 5; for (std::map::iterator k = - s2.begin(); k != s2.end(); k++) + s2.begin(); k != s2.end(); k++) if ((*k).second != 0) cost += 2; for (std::map::iterator k = - s3.begin(); k != s3.end(); k++) + s3.begin(); k != s3.end(); k++) if ((*k).second != 0) cost += 15; } - + if (cost < best_cost) { best_cost = cost; simplest_lb = i; @@ -292,7 +290,7 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, } } } - + // prepare the new transformation relations for (std::set::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { @@ -303,58 +301,58 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h.update_coef(r.output_var(j + 1), 1); h.update_coef(r.input_var(j + 1), -1); } - + for (int j = outer_dim - 1; j < stmt[*i].xform.n_out(); j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(r.output_var(j + 3), 1); h.update_coef(r.input_var(j + 1), -1); } - + EQ_Handle h = f_root->add_EQ(); h.update_coef(r.output_var(outer_dim), 1); h.update_const(-lex[outer_dim - 1]); - + stmt[*i].xform = Composition(r, stmt[*i].xform); } - + // add tiling constraints. for (std::set::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { F_And *f_super_root = stmt[*i].xform.and_with_and(); F_Exists *f_exists = f_super_root->add_exists(); F_And *f_root = f_exists->add_and(); - + // create a lower bound variable for easy formula creation later Variable_ID aligned_lb; { Variable_ID lb = f_exists->declare(); coef_t coef = lb_list[simplest_lb].get_coef( - bound.set_var(dim + 1)); + bound.set_var(dim + 1)); if (coef == 1) { // e.g. if i >= m+5, then LB = m+5 EQ_Handle h = f_root->add_EQ(); h.update_coef(lb, 1); for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos != dim + 1) - h.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); } } h.update_const(lb_list[simplest_lb].get_const()); @@ -363,40 +361,40 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, GEQ_Handle h2 = f_root->add_GEQ(); for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos == dim + 1) { - h1.update_coef(lb, (*ci).coef); - h2.update_coef(lb, -(*ci).coef); - } else { - h1.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); - h2.update_coef(stmt[*i].xform.output_var(pos), - -(*ci).coef); + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(lb, (*ci).coef); + h2.update_coef(lb, -(*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + } + break; } - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h1.update_coef(v, (*ci).coef); - h2.update_coef(v, -(*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, (*ci).coef); + h2.update_coef(v, -(*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); } } h1.update_const(lb_list[simplest_lb].get_const()); h2.update_const(-lb_list[simplest_lb].get_const()); h2.update_const(coef - 1); } - + Variable_ID offset_lb; if (alignment_offset == 0) offset_lb = lb; @@ -407,17 +405,17 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h.update_coef(lb, -1); h.update_const(alignment_offset); } - + if (alignment_multiple == 1) { // trivial aligned_lb = offset_lb; } else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB aligned_lb = f_exists->declare(); Variable_ID e = f_exists->declare(); - + EQ_Handle h = f_root->add_EQ(); h.update_coef(aligned_lb, 1); h.update_coef(e, -alignment_multiple); - + GEQ_Handle h1 = f_root->add_GEQ(); GEQ_Handle h2 = f_root->add_GEQ(); h1.update_coef(e, alignment_multiple); @@ -427,37 +425,37 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h1.update_const(alignment_multiple - 1); } } - + // create an upper bound variable for easy formula creation later Variable_ID ub = f_exists->declare(); { coef_t coef = -ub_list[simplest_ub].get_coef( - bound.set_var(dim + 1)); + bound.set_var(dim + 1)); if (coef == 1) { // e.g. if i <= m+5, then UB = m+5 EQ_Handle h = f_root->add_EQ(); h.update_coef(ub, -1); for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos != dim + 1) - h.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); } } h.update_const(ub_list[simplest_ub].get_const()); @@ -466,33 +464,33 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, GEQ_Handle h2 = f_root->add_GEQ(); for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos == dim + 1) { - h1.update_coef(ub, -(*ci).coef); - h2.update_coef(ub, (*ci).coef); - } else { - h1.update_coef(stmt[*i].xform.output_var(pos), - -(*ci).coef); - h2.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(ub, -(*ci).coef); + h2.update_coef(ub, (*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + } + break; } - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h1.update_coef(v, -(*ci).coef); - h2.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, -(*ci).coef); + h2.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); } } h1.update_const(-ub_list[simplest_ub].get_const()); @@ -500,13 +498,13 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h1.update_const(coef - 1); } } - + // insert tile controlling loop constraints if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0 Variable_ID e = f_exists->declare(); GEQ_Handle h1 = f_root->add_GEQ(); h1.update_coef(e, 1); - + EQ_Handle h2 = f_root->add_EQ(); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); h2.update_coef(e, -tile_size); @@ -514,14 +512,14 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, } else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32) GEQ_Handle h1 = f_root->add_GEQ(); h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); - + GEQ_Handle h2 = f_root->add_GEQ(); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -tile_size); h2.update_coef(aligned_lb, -1); h2.update_coef(ub, 1); } - + // special care for private statements like overflow assignment if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB GEQ_Handle h = f_root->add_GEQ(); @@ -529,14 +527,14 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h.update_coef(ub, 1); } - // restrict original loop index inside the tile + // restrict original loop index inside the tile else { if (method == StridedTile) { // e.g. ii <= i < ii + tile_size GEQ_Handle h1 = f_root->add_GEQ(); h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1); - + GEQ_Handle h2 = f_root->add_GEQ(); h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); @@ -547,7 +545,7 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, -tile_size); h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); h1.update_coef(aligned_lb, -1); - + GEQ_Handle h2 = f_root->add_GEQ(); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), tile_size); @@ -558,30 +556,30 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, } } } - + // update loop level information for (std::set::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { for (int j = 1; j <= stmt[*i].loop_level.size(); j++) switch (stmt[*i].loop_level[j - 1].type) { - case LoopLevelOriginal: - break; - case LoopLevelTile: - if (stmt[*i].loop_level[j - 1].payload >= outer_level) - stmt[*i].loop_level[j - 1].payload++; - break; - default: - throw loop_error( - "unknown loop level type for statement " - + to_string(*i)); + case LoopLevelOriginal: + break; + case LoopLevelTile: + if (stmt[*i].loop_level[j - 1].payload >= outer_level) + stmt[*i].loop_level[j - 1].payload++; + break; + default: + throw loop_error( + "unknown loop level type for statement " + + to_string(*i)); } - + LoopLevel ll; ll.type = LoopLevelTile; ll.payload = level + 1; ll.parallel_level = 0; stmt[*i].loop_level.insert( - stmt[*i].loop_level.begin() + (outer_level - 1), ll); + stmt[*i].loop_level.begin() + (outer_level - 1), ll); } } diff --git a/src/transformations/loop_unroll.cc b/src/transformations/loop_unroll.cc index 86ffd84..3238d50 100644 --- a/src/transformations/loop_unroll.cc +++ b/src/transformations/loop_unroll.cc @@ -23,54 +23,54 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, // check for sanity of parameters if (unroll_amount < 0) throw std::invalid_argument( - "invalid unroll amount " + to_string(unroll_amount)); + "invalid unroll amount " + to_string(unroll_amount)); if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invalid statement " + to_string(stmt_num)); if (level <= 0 || level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("invalid loop level " + to_string(level)); - + if (cleanup_split_level == 0) cleanup_split_level = level; if (cleanup_split_level > level) throw std::invalid_argument( - "cleanup code must be split at or outside the unrolled loop level " - + to_string(level)); + "cleanup code must be split at or outside the unrolled loop level " + + to_string(level)); if (cleanup_split_level <= 0) throw std::invalid_argument( - "invalid split loop level " + to_string(cleanup_split_level)); - + "invalid split loop level " + to_string(cleanup_split_level)); + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + int dim = 2 * level - 1; std::vector lex = getLexicalOrder(stmt_num); std::set same_loop = getStatements(lex, dim - 1); - + // nothing to do if (unroll_amount == 1) return std::set(); - + for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { std::vector > D; int n = stmt[*i].xform.n_out(); for (DependenceGraph::EdgeList::iterator j = - dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) { if (same_loop.find(j->first) != same_loop.end()) for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; int dim2 = level - 1; if (dv.type != DEP_CONTROL) { - + while (stmt[*i].loop_level[dim2].type == LoopLevelTile) { dim2 = stmt[*i].loop_level[dim2].payload - 1; } dim2 = stmt[*i].loop_level[dim2].payload; - + /*if (dv.isCarried(dim2) && (dv.hasNegative(dim2) && !dv.quasi)) throw loop_error( @@ -82,11 +82,11 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, "loop error: Unrolling is illegal, dependence violation!"); */ bool safe = false; - + if (dv.isCarried(dim2) && dv.hasPositive(dim2)) { if (dv.quasi) throw loop_error( - "loop error: a quasi dependence with a positive carried distance"); + "loop error: a quasi dependence with a positive carried distance"); if (!dv.quasi) { if (dv.lbounds[dim2] != posInfinity) { //if (dv.lbounds[dim2] != negInfinity) @@ -102,33 +102,33 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, } else safe = true; }*/ - + if (!safe) { for (int l = level + 1; l <= (n - 1) / 2; l++) { int dim3 = l - 1; - + if (stmt[*i].loop_level[dim3].type != LoopLevelTile) dim3 = - stmt[*i].loop_level[dim3].payload; + stmt[*i].loop_level[dim3].payload; else { while (stmt[*i].loop_level[dim3].type == LoopLevelTile) { dim3 = - stmt[*i].loop_level[dim3].payload - - 1; + stmt[*i].loop_level[dim3].payload + - 1; } dim3 = - stmt[*i].loop_level[dim3].payload; + stmt[*i].loop_level[dim3].payload; } - + if (dim3 > dim2) { - + if (dv.hasPositive(dim3)) break; else if (dv.hasNegative(dim3)) throw loop_error( - "loop error: Unrolling is illegal, dependence violation!"); + "loop error: Unrolling is illegal, dependence violation!"); } } } @@ -153,7 +153,7 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, hull = Intersection(hull, omega::Range(Restrict_Domain(mapping, copy(stmt[*i].IS)))); hull.simplify(2, 4); - + } } for (int i = 1; i <= level; i++) { @@ -161,7 +161,7 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, hull.name_set_var(i, name); } hull.setup_names(); - + // extract the exact loop bound of the dimension to be unrolled if (is_single_loop_iteration(hull, level, this->known)) return std::set(); @@ -169,7 +169,7 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, if (!bound.has_single_conjunct() || !bound.is_satisfiable() || bound.is_tautology()) throw loop_error("unable to extract loop bound for unrolling"); - + // extract the loop stride coef_t stride; std::pair result = find_simplest_stride(bound, @@ -178,9 +178,9 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, stride = 1; else stride = abs(result.first.get_coef(result.second)) - / gcd(abs(result.first.get_coef(result.second)), - abs(result.first.get_coef(bound.set_var(level)))); - + / gcd(abs(result.first.get_coef(result.second)), + abs(result.first.get_coef(bound.set_var(level)))); + // separate lower and upper bounds std::vector lb_list, ub_list; { @@ -193,17 +193,17 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, lb_list.push_back(*gi); } } - + // simplify overflow expression for each pair of upper and lower bounds std::vector > > overflow_table( - lb_list.size(), - std::vector >(ub_list.size(), - std::map())); + lb_list.size(), + std::vector >(ub_list.size(), + std::map())); bool is_overflow_simplifiable = true; for (int i = 0; i < lb_list.size(); i++) { if (!is_overflow_simplifiable) break; - + for (int j = 0; j < ub_list.size(); j++) { // lower bound or upper bound has non-unit coefficient, can't simplify if (ub_list[j].get_coef(bound.set_var(level)) != -1 @@ -211,81 +211,81 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, is_overflow_simplifiable = false; break; } - + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - if ((*ci).var != bound.set_var(level)) + case Input_Var: { + if ((*ci).var != bound.set_var(level)) + overflow_table[i][j][(*ci).var] += (*ci).coef; + + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); overflow_table[i][j][(*ci).var] += (*ci).coef; - - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = bound.get_local(g); - else - v = bound.get_local(g, (*ci).var->function_of()); - overflow_table[i][j][(*ci).var] += (*ci).coef; - break; - } - default: - throw loop_error("failed to calculate overflow amount"); + break; + } + default: + throw loop_error("failed to calculate overflow amount"); } } overflow_table[i][j][NULL] += ub_list[j].get_const(); - + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - if ((*ci).var != bound.set_var(level)) { + case Input_Var: { + if ((*ci).var != bound.set_var(level)) { + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase( + overflow_table[i][j].find((*ci).var)); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); overflow_table[i][j][(*ci).var] += (*ci).coef; if (overflow_table[i][j][(*ci).var] == 0) overflow_table[i][j].erase( - overflow_table[i][j].find((*ci).var)); + overflow_table[i][j].find((*ci).var)); + break; } - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = bound.get_local(g); - else - v = bound.get_local(g, (*ci).var->function_of()); - overflow_table[i][j][(*ci).var] += (*ci).coef; - if (overflow_table[i][j][(*ci).var] == 0) - overflow_table[i][j].erase( - overflow_table[i][j].find((*ci).var)); - break; - } - default: - throw loop_error("failed to calculate overflow amount"); + default: + throw loop_error("failed to calculate overflow amount"); } } overflow_table[i][j][NULL] += lb_list[i].get_const(); - + overflow_table[i][j][NULL] += stride; if (unroll_amount == 0 || (overflow_table[i][j].size() == 1 && overflow_table[i][j][NULL] / stride - < unroll_amount)) + < unroll_amount)) unroll_amount = overflow_table[i][j][NULL] / stride; } } - + // loop iteration count can't be determined, bail out gracefully if (unroll_amount == 0) return std::set(); - + // further simply overflow calculation using coefficients' modular if (is_overflow_simplifiable) { for (int i = 0; i < lb_list.size(); i++) for (int j = 0; j < ub_list.size(); j++) if (stride == 1) { for (std::map::iterator k = - overflow_table[i][j].begin(); + overflow_table[i][j].begin(); k != overflow_table[i][j].end();) if ((*k).first != NULL) { int t = int_mod_hat((*k).second, unroll_amount); @@ -304,20 +304,20 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, } } else k++; - + overflow_table[i][j][NULL] = int_mod_hat( - overflow_table[i][j][NULL], unroll_amount); - + overflow_table[i][j][NULL], unroll_amount); + // Since we don't have MODULO instruction in SUIF yet (only MOD), // make all coef positive in the final formula for (std::map::iterator k = - overflow_table[i][j].begin(); + overflow_table[i][j].begin(); k != overflow_table[i][j].end(); k++) if ((*k).second < 0) (*k).second += unroll_amount; } } - + // build overflow statement CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *overflow_code = NULL; @@ -331,17 +331,17 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, // upper splitting condition GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); h.update_const( - ((overflow_table[0][i][NULL] / stride) % unroll_amount) - * -stride); + ((overflow_table[0][i][NULL] / stride) % unroll_amount) + * -stride); } else { // upper splitting condition std::string over_name = overflow_var_name_prefix - + to_string(overflow_var_name_counter++); + + to_string(overflow_var_name_counter++); Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); over_var_list.push_back(over_free_var); GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); h.update_coef(cond_upper.get_local(over_free_var), -stride); - + // insert constraint 0 <= overflow < unroll_amount Variable_ID v = overflow_constraint.get_local(over_free_var); GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); @@ -349,20 +349,20 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); h2.update_coef(v, -1); h2.update_const(unroll_amount - 1); - + // create overflow assignment bound.setup_names(); // hack to fix omega relation variable names issue CG_outputRepr *rhs = NULL; bool is_split_illegal = false; for (std::map::iterator j = - overflow_table[0][i].begin(); + overflow_table[0][i].begin(); j != overflow_table[0][i].end(); j++) if ((*j).first != NULL) { if ((*j).first->kind() == Input_Var && (*j).first->get_position() - >= cleanup_split_level) + >= cleanup_split_level) is_split_illegal = true; - + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); if ((*j).second != 1) t = ocg->CreateTimes(ocg->CreateInt((*j).second), @@ -370,20 +370,20 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, rhs = ocg->CreatePlus(rhs, t); } else if ((*j).second != 0) rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); - + if (is_split_illegal) { rhs->clear(); delete rhs; throw loop_error( - "cannot split cleanup code at loop level " - + to_string(cleanup_split_level) - + " due to overflow variable data dependence"); + "cannot split cleanup code at loop level " + + to_string(cleanup_split_level) + + " due to overflow variable data dependence"); } - + if (stride != 1) rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); - + CG_outputRepr *lhs = ocg->CreateIdent(over_name); init_code = ocg->StmtListAppend(init_code, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); @@ -392,12 +392,12 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, ocg->CreateAssignment(0, lhs, rhs)); } } - + // lower splitting condition GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]); } else if (is_overflow_simplifiable && ub_list.size() == 1) { for (int i = 0; i < lb_list.size(); i++) { - + if (overflow_table[i][0].size() == 1) { // lower splitting condition GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); @@ -405,12 +405,12 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, } else { // lower splitting condition std::string over_name = overflow_var_name_prefix - + to_string(overflow_var_name_counter++); + + to_string(overflow_var_name_counter++); Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); over_var_list.push_back(over_free_var); GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); h.update_coef(cond_lower.get_local(over_free_var), -stride); - + // insert constraint 0 <= overflow < unroll_amount Variable_ID v = overflow_constraint.get_local(over_free_var); GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); @@ -418,12 +418,12 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); h2.update_coef(v, -1); h2.update_const(unroll_amount - 1); - + // create overflow assignment bound.setup_names(); // hack to fix omega relation variable names issue CG_outputRepr *rhs = NULL; for (std::map::iterator j = - overflow_table[0][i].begin(); + overflow_table[0][i].begin(); j != overflow_table[0][i].end(); j++) if ((*j).first != NULL) { CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); @@ -433,11 +433,11 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, rhs = ocg->CreatePlus(rhs, t); } else if ((*j).second != 0) rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); - + if (stride != 1) rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); - + CG_outputRepr *lhs = ocg->CreateIdent(over_name); init_code = ocg->StmtListAppend(init_code, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); @@ -446,61 +446,59 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, ocg->CreateAssignment(0, lhs, rhs)); } } - + // upper splitting condition GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]); } else { std::string over_name = overflow_var_name_prefix - + to_string(overflow_var_name_counter++); + + to_string(overflow_var_name_counter++); Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); over_var_list.push_back(over_free_var); - + std::vector lb_repr_list, ub_repr_list; for (int i = 0; i < lb_list.size(); i++) { lb_repr_list.push_back( - output_lower_bound_repr(ocg, - lb_list[i], - bound.set_var(dim + 1), result.first, result.second, - bound, Relation::True(bound.n_set()), - std::vector >( - bound.n_set(), - std::make_pair( - static_cast(NULL), - 0)), - uninterpreted_symbols[stmt_num])); + output_lower_bound_repr(ocg, + lb_list[i], + bound.set_var(dim + 1), result.first, result.second, + bound, Relation::True(bound.n_set()), + std::vector >( + bound.n_set(), + std::make_pair( + static_cast(NULL), + 0)), + uninterpreted_symbols[stmt_num])); GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); } for (int i = 0; i < ub_list.size(); i++) { ub_repr_list.push_back( - output_upper_bound_repr(ocg, ub_list[i], - bound.set_var(dim + 1), bound, - std::vector >( - bound.n_set(), - std::make_pair( - static_cast(NULL), - 0)), - uninterpreted_symbols[stmt_num])); + output_upper_bound_repr(ocg, ub_list[i], + bound.set_var(dim + 1), bound, + std::vector >( + bound.n_set(), + std::make_pair( + static_cast(NULL), + 0)), + uninterpreted_symbols[stmt_num])); GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); h.update_coef(cond_upper.get_local(over_free_var), -stride); } - - CG_outputRepr *lbRepr, *ubRepr; + + CG_outputRepr *lbRepr, *ubRepr; if (lb_repr_list.size() > 1) { //fprintf(stderr, "loop_unroll.cc createInvoke( max )\n"); lbRepr = ocg->CreateInvoke("max", lb_repr_list); - } - else if (lb_repr_list.size() == 1) { + } else if (lb_repr_list.size() == 1) { lbRepr = lb_repr_list[0]; } - + if (ub_repr_list.size() > 1) { //fprintf(stderr, "loop_unroll.cc createInvoke( min )\n"); ubRepr = ocg->CreateInvoke("min", ub_repr_list); - } - else if (ub_repr_list.size() == 1) { + } else if (ub_repr_list.size() == 1) { ubRepr = ub_repr_list[0]; } - + // create overflow assignment CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr), ocg->CreateInt(1)); @@ -512,7 +510,7 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); lhs = ocg->CreateIdent(over_name); overflow_code = ocg->CreateAssignment(0, lhs, rhs); - + // insert constraint 0 <= overflow < unroll_amount Variable_ID v = overflow_constraint.get_local(over_free_var); GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); @@ -521,7 +519,7 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, h2.update_coef(v, -1); h2.update_const(unroll_amount - 1); } - + // insert overflow statement int overflow_stmt_num = -1; if (overflow_code != NULL) { @@ -537,7 +535,7 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, for (int i = 1; i < cleanup_split_level; i++) overflow_IS.name_set_var(i, hull.set_var(i)->name()); overflow_IS.setup_names(); - + // build dumb transformation relation for overflow statement Relation overflow_xform(cleanup_split_level - 1, 2 * (cleanup_split_level - 1) + 1); @@ -546,20 +544,20 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, EQ_Handle h = f_root->add_EQ(); h.update_coef(overflow_xform.output_var(2 * i), 1); h.update_coef(overflow_xform.input_var(i), -1); - + h = f_root->add_EQ(); h.update_coef(overflow_xform.output_var(2 * i - 1), 1); h.update_const(-lex[2 * i - 2]); } EQ_Handle h = f_root->add_EQ(); h.update_coef( - overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1), - 1); + overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1), + 1); h.update_const(-lex[2 * (cleanup_split_level - 1)]); - + shiftLexicalOrder(lex, 2 * cleanup_split_level - 2, 1); Statement overflow_stmt; - + overflow_stmt.code = overflow_code; overflow_stmt.IS = overflow_IS; overflow_stmt.xform = overflow_xform; @@ -567,18 +565,18 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, overflow_stmt.ir_stmt_node = NULL; for (int i = 0; i < level - 1; i++) { overflow_stmt.loop_level[i].type = - stmt[stmt_num].loop_level[i].type; + stmt[stmt_num].loop_level[i].type; if (stmt[stmt_num].loop_level[i].type == LoopLevelTile && stmt[stmt_num].loop_level[i].payload >= level) overflow_stmt.loop_level[i].payload = -1; else overflow_stmt.loop_level[i].payload = - stmt[stmt_num].loop_level[i].payload; + stmt[stmt_num].loop_level[i].payload; overflow_stmt.loop_level[i].parallel_level = - stmt[stmt_num].loop_level[i].parallel_level; + stmt[stmt_num].loop_level[i].parallel_level; } - - fprintf(stderr, "loop_unroll.cc L581 adding stmt %d\n", stmt.size()); + + fprintf(stderr, "loop_unroll.cc L581 adding stmt %d\n", stmt.size()); stmt.push_back(overflow_stmt); uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); @@ -586,12 +584,12 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, dep.insert(); overflow_stmt_num = stmt.size() - 1; overflow[overflow_stmt_num] = over_var_list; - + // update the global known information on overflow variable this->known = Intersection(this->known, Extend_Set(copy(overflow_constraint), this->known.n_set() - overflow_constraint.n_set())); - + // update dependence graph DependenceVector dv; dv.type = DEP_CONTROL; @@ -628,12 +626,12 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, dep.connect(overflow_stmt_num, overflow_stmt_num, dv); } } - + // split the loop so it can be fully unrolled std::set new_stmts = split(stmt_num, cleanup_split_level, cond_upper); std::set new_stmts2 = split(stmt_num, cleanup_split_level, cond_lower); new_stmts.insert(new_stmts2.begin(), new_stmts2.end()); - + // check if unrolled statements can be trivially lumped together as one statement bool can_be_lumped = true; if (can_be_lumped) { @@ -649,7 +647,7 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, if (!(stmt[*i].loop_level[j].type == stmt[stmt_num].loop_level[j].type && stmt[*i].loop_level[j].payload - == stmt[stmt_num].loop_level[j].payload)) { + == stmt[stmt_num].loop_level[j].payload)) { can_be_lumped = false; break; } @@ -690,7 +688,7 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { for (DependenceGraph::EdgeList::iterator j = - dep.vertex[*i].second.begin(); + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) if (same_loop.find(j->first) != same_loop.end()) { for (int k = 0; k < j->second.size(); k++) @@ -706,38 +704,38 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, break; } } - + // insert unrolled statements int old_num_stmt = stmt.size(); if (!can_be_lumped) { std::map > what_stmt_num; - + for (int j = 1; j < unroll_amount; j++) { for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { Statement new_stmt; - + std::vector loop_vars; std::vector subs; loop_vars.push_back(stmt[*i].IS.set_var(level)->name()); subs.push_back( - ocg->CreatePlus( - ocg->CreateIdent( - stmt[*i].IS.set_var(level)->name()), - ocg->CreateInt(j * stride))); + ocg->CreatePlus( + ocg->CreateIdent( + stmt[*i].IS.set_var(level)->name()), + ocg->CreateInt(j * stride))); new_stmt.code = ocg->CreateSubstitutedStmt(0, stmt[*i].code->clone(), loop_vars, subs); - + new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride); add_loop_stride(new_stmt.IS, bound, level - 1, unroll_amount * stride); - + new_stmt.xform = copy(stmt[*i].xform); - + new_stmt.loop_level = stmt[*i].loop_level; new_stmt.ir_stmt_node = NULL; - fprintf(stderr, "loop_unroll.cc L740 adding stmt %d\n", stmt.size()); + fprintf(stderr, "loop_unroll.cc L740 adding stmt %d\n", stmt.size()); stmt.push_back(new_stmt); uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); @@ -750,16 +748,16 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, i != same_loop.end(); i++) add_loop_stride(stmt[*i].IS, bound, level - 1, unroll_amount * stride); - + // update dependence graph if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; int new_stride = unroll_amount * stride; for (int i = 0; i < old_num_stmt; i++) { std::vector > D; - + for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { if (same_loop.find(i) != same_loop.end()) { if (same_loop.find(j->first) != same_loop.end()) { @@ -772,7 +770,7 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, kk++) if (what_stmt_num[i][kk] != -1 && what_stmt_num[j->first][kk] - != -1) + != -1) dep.connect(what_stmt_num[i][kk], what_stmt_num[j->first][kk], dv); @@ -782,35 +780,35 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, if (ub == lb && int_mod(lb, static_cast(new_stride)) - == 0) { + == 0) { D.push_back( - std::make_pair(j->first, dv)); + std::make_pair(j->first, dv)); for (int kk = 0; kk < unroll_amount - 1; kk++) if (what_stmt_num[i][kk] != -1 && what_stmt_num[j->first][kk] - != -1) + != -1) dep.connect( - what_stmt_num[i][kk], - what_stmt_num[j->first][kk], - dv); + what_stmt_num[i][kk], + what_stmt_num[j->first][kk], + dv); } else if (lb == -posInfinity && ub == posInfinity) { D.push_back( - std::make_pair(j->first, dv)); + std::make_pair(j->first, dv)); for (int kk = 0; kk < unroll_amount; kk++) if (kk == 0) D.push_back( - std::make_pair(j->first, - dv)); + std::make_pair(j->first, + dv)); else if (what_stmt_num[j->first][kk - 1] != -1) D.push_back( - std::make_pair( - what_stmt_num[j->first][kk - - 1], - dv)); + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); for (int t = 0; t < unroll_amount - 1; t++) if (what_stmt_num[i][t] != -1) @@ -819,15 +817,15 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, kk++) if (kk == 0) dep.connect( - what_stmt_num[i][t], - j->first, dv); + what_stmt_num[i][t], + j->first, dv); else if (what_stmt_num[j->first][kk - 1] != -1) dep.connect( - what_stmt_num[i][t], - what_stmt_num[j->first][kk - - 1], - dv); + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); } else { for (int kk = 0; kk < unroll_amount; kk++) { @@ -836,49 +834,49 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, < int_mod(lb, static_cast(new_stride))) dv.lbounds[dep_dim] = - floor( - static_cast(lb) - / new_stride) - * new_stride - + new_stride; + floor( + static_cast(lb) + / new_stride) + * new_stride + + new_stride; else dv.lbounds[dep_dim] = - floor( - static_cast(lb) - / new_stride) - * new_stride; + floor( + static_cast(lb) + / new_stride) + * new_stride; } if (ub != posInfinity) { if (kk * stride > int_mod(ub, static_cast(new_stride))) dv.ubounds[dep_dim] = - floor( - static_cast(ub) - / new_stride) - * new_stride - - new_stride; + floor( + static_cast(ub) + / new_stride) + * new_stride + - new_stride; else dv.ubounds[dep_dim] = - floor( - static_cast(ub) - / new_stride) - * new_stride; + floor( + static_cast(ub) + / new_stride) + * new_stride; } if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) { if (kk == 0) D.push_back( - std::make_pair( - j->first, - dv)); + std::make_pair( + j->first, + dv)); else if (what_stmt_num[j->first][kk - 1] != -1) D.push_back( - std::make_pair( - what_stmt_num[j->first][kk - - 1], - dv)); + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); } } for (int t = 0; t < unroll_amount - 1; @@ -890,80 +888,80 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, if (lb != -posInfinity) { if (kk * stride < int_mod( - lb + t - + 1, - static_cast(new_stride))) + lb + t + + 1, + static_cast(new_stride))) dv.lbounds[dep_dim] = - floor( - static_cast(lb - + (t - + 1) - * stride) - / new_stride) - * new_stride - + new_stride; + floor( + static_cast(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride + + new_stride; else dv.lbounds[dep_dim] = - floor( - static_cast(lb - + (t - + 1) - * stride) - / new_stride) - * new_stride; + floor( + static_cast(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride; } if (ub != posInfinity) { if (kk * stride > int_mod( - ub + t - + 1, - static_cast(new_stride))) + ub + t + + 1, + static_cast(new_stride))) dv.ubounds[dep_dim] = - floor( - static_cast(ub - + (t - + 1) - * stride) - / new_stride) - * new_stride - - new_stride; + floor( + static_cast(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride + - new_stride; else dv.ubounds[dep_dim] = - floor( - static_cast(ub - + (t - + 1) - * stride) - / new_stride) - * new_stride; + floor( + static_cast(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride; } if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) { if (kk == 0) dep.connect( - what_stmt_num[i][t], - j->first, - dv); + what_stmt_num[i][t], + j->first, + dv); else if (what_stmt_num[j->first][kk - 1] != -1) dep.connect( - what_stmt_num[i][t], - what_stmt_num[j->first][kk - - 1], - dv); + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); } } } } } - + dep.vertex[i].second.erase(j++); } else { for (int kk = 0; kk < unroll_amount - 1; kk++) if (what_stmt_num[i][kk] != -1) dep.connect(what_stmt_num[i][kk], j->first, j->second); - + j++; } } else { @@ -972,26 +970,26 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, for (int kk = 0; kk < unroll_amount - 1; kk++) if (what_stmt_num[j->first][kk] != -1) D.push_back( - std::make_pair( - what_stmt_num[j->first][kk], - j->second[k])); + std::make_pair( + what_stmt_num[j->first][kk], + j->second[k])); j++; } } - + for (int j = 0; j < D.size(); j++) dep.connect(i, D[j].first, D[j].second); } } - + // reset lexical order for the unrolled loop body std::set new_same_loop; - + int count = 0; - + for (std::map >::iterator i = - what_stmt_num.begin(); i != what_stmt_num.end(); i++) { - + what_stmt_num.begin(); i != what_stmt_num.end(); i++) { + new_same_loop.insert(i->first); for (int k = dim + 1; k < stmt[i->first].xform.n_out(); k += 2) assign_const(stmt[i->first].xform, k, @@ -1001,11 +999,11 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, for (int j = 0; j < i->second.size(); j++) { new_same_loop.insert(i->second[j]); for (int k = dim + 1; k < stmt[i->second[j]].xform.n_out(); k += - 2) + 2) assign_const(stmt[i->second[j]].xform, k, get_const( - stmt[(what_stmt_num.begin())->first].xform, - k, Output_Var) + count); + stmt[(what_stmt_num.begin())->first].xform, + k, Output_Var) + count); count++; } } @@ -1015,20 +1013,20 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, i != same_loop.end(); i++) add_loop_stride(stmt[*i].IS, bound, level - 1, unroll_amount * stride); - + int max_level = stmt[stmt_num].loop_level.size(); std::vector > stmt_order; for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) stmt_order.push_back( - std::make_pair( - get_const(stmt[*i].xform, 2 * max_level, - Output_Var), *i)); + std::make_pair( + get_const(stmt[*i].xform, 2 * max_level, + Output_Var), *i)); sort(stmt_order.begin(), stmt_order.end()); - + Statement new_stmt; new_stmt.code = NULL; - for (int j = 1; j < unroll_amount; j++) { + for (int j = 1; j < unroll_amount; j++) { for (int i = 0; i < stmt_order.size(); i++) { std::vector loop_vars; std::vector subs; @@ -1036,12 +1034,12 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, //fprintf(stderr, "loop_unroll.cc, will replace '%s with '%s+%d' ??\n", // stmt[stmt_order[i].second].IS.set_var(level)->name().c_str(), // stmt[stmt_order[i].second].IS.set_var(level)->name().c_str(), j * stride); - + loop_vars.push_back( - stmt[stmt_order[i].second].IS.set_var(level)->name()); + stmt[stmt_order[i].second].IS.set_var(level)->name()); subs.push_back( - ocg->CreatePlus(ocg->CreateIdent(stmt[stmt_order[i].second].IS.set_var(level)->name()), - ocg->CreateInt(j * stride))); // BUG HERE + ocg->CreatePlus(ocg->CreateIdent(stmt[stmt_order[i].second].IS.set_var(level)->name()), + ocg->CreateInt(j * stride))); // BUG HERE //fprintf(stderr, "loop_unroll.cc subs now has %d parts\n", subs.size()); //for (int k=0; k< subs.size(); k++) //fprintf(stderr, "subs[%d] = 0x%x\n", k, subs[k]); @@ -1052,7 +1050,7 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, CG_outputRepr *code = ocg->CreateSubstitutedStmt(0, - stmt[stmt_order[i].second].code->clone(), + stmt[stmt_order[i].second].code->clone(), loop_vars, subs); @@ -1069,7 +1067,7 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, } } - + //fprintf(stderr, "new_stmt.IS = \n"); @@ -1084,13 +1082,13 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, if (stmt[stmt_num].has_inspector) fprintf(stderr, "OLD STMT HAS INSPECTOR\n"); else fprintf(stderr, "OLD STMT DOES NOT HAVE INSPECTOR\n"); - fprintf(stderr, "loop_unroll.cc L1083 adding stmt %d\n", stmt.size()); + fprintf(stderr, "loop_unroll.cc L1083 adding stmt %d\n", stmt.size()); stmt.push_back(new_stmt); uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]); uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]); dep.insert(); - + //fprintf(stderr, "update dependence graph\n"); // update dependence graph if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { @@ -1098,14 +1096,14 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, int new_stride = unroll_amount * stride; for (int i = 0; i < old_num_stmt; i++) { std::vector > > D; - + for (DependenceGraph::EdgeList::iterator j = - dep.vertex[i].second.begin(); + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { if (same_loop.find(i) != same_loop.end()) { if (same_loop.find(j->first) != same_loop.end()) { std::vector dvs11, dvs12, dvs22, - dvs21; + dvs21; for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; if (dv.type == DEP_CONTROL @@ -1115,71 +1113,71 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, dvs22.push_back(dv); } else throw loop_error( - "unrolled statements lumped together illegally"); + "unrolled statements lumped together illegally"); } else { coef_t lb = dv.lbounds[dep_dim]; coef_t ub = dv.ubounds[dep_dim]; if (ub == lb && int_mod(lb, static_cast(new_stride)) - == 0) { + == 0) { dvs11.push_back(dv); dvs22.push_back(dv); } else { if (lb != -posInfinity) dv.lbounds[dep_dim] = ceil( - static_cast(lb) - / new_stride) - * new_stride; + static_cast(lb) + / new_stride) + * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = floor( - static_cast(ub) - / new_stride) - * new_stride; + static_cast(ub) + / new_stride) + * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs11.push_back(dv); - + if (lb != -posInfinity) dv.lbounds[dep_dim] = ceil( - static_cast(lb) - / new_stride) - * new_stride; + static_cast(lb) + / new_stride) + * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = ceil( - static_cast(ub) - / new_stride) - * new_stride; + static_cast(ub) + / new_stride) + * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs21.push_back(dv); - + if (lb != -posInfinity) dv.lbounds[dep_dim] = floor( - static_cast(lb) - / new_stride) - * new_stride; + static_cast(lb) + / new_stride) + * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = floor( - static_cast(ub - - stride) - / new_stride) - * new_stride; + static_cast(ub + - stride) + / new_stride) + * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs12.push_back(dv); - + if (lb != -posInfinity) dv.lbounds[dep_dim] = floor( - static_cast(lb) - / new_stride) - * new_stride; + static_cast(lb) + / new_stride) + * new_stride; if (ub != posInfinity) dv.ubounds[dep_dim] = ceil( - static_cast(ub - - stride) - / new_stride) - * new_stride; + static_cast(ub + - stride) + / new_stride) + * new_stride; if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) dvs22.push_back(dv); @@ -1192,10 +1190,10 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, dep.connect(old_num_stmt, old_num_stmt, dvs22); if (dvs12.size() > 0) D.push_back( - std::make_pair(old_num_stmt, dvs12)); + std::make_pair(old_num_stmt, dvs12)); if (dvs21.size() > 0) dep.connect(old_num_stmt, i, dvs21); - + dep.vertex[i].second.erase(j++); } else { dep.connect(old_num_stmt, j->first, j->second); @@ -1204,17 +1202,17 @@ std::set Loop::unroll(int stmt_num, int level, int unroll_amount, } else { if (same_loop.find(j->first) != same_loop.end()) D.push_back( - std::make_pair(old_num_stmt, j->second)); + std::make_pair(old_num_stmt, j->second)); j++; } } - + for (int j = 0; j < D.size(); j++) dep.connect(i, D[j].first, D[j].second); } } } - + //fprintf(stderr, " loop_unroll.cc returning new_stmts\n"); return new_stmts; } -- cgit v1.2.3-70-g09d2