summaryrefslogtreecommitdiff
path: root/src/transformations
diff options
context:
space:
mode:
authorTuowen Zhao <ztuowen@gmail.com>2016-09-23 10:59:54 -0600
committerTuowen Zhao <ztuowen@gmail.com>2016-09-23 10:59:54 -0600
commit699861922d5349ffa98b518f34016b2be2ca368d (patch)
treeb1172a41c89b382487772ef05c8a5ce70c068fa0 /src/transformations
parent16f097d5548e9b31ab4b0dc343afeb680b361e28 (diff)
downloadchill-699861922d5349ffa98b518f34016b2be2ca368d.tar.gz
chill-699861922d5349ffa98b518f34016b2be2ca368d.tar.bz2
chill-699861922d5349ffa98b518f34016b2be2ca368d.zip
more changes
Diffstat (limited to 'src/transformations')
-rw-r--r--src/transformations/loop.cc2791
-rw-r--r--src/transformations/loop_basic.cc858
-rw-r--r--src/transformations/loop_datacopy.cc811
-rw-r--r--src/transformations/loop_extra.cc124
-rw-r--r--src/transformations/loop_tile.cc420
-rw-r--r--src/transformations/loop_unroll.cc642
6 files changed, 2832 insertions, 2814 deletions
diff --git a/src/transformations/loop.cc b/src/transformations/loop.cc
index 570bc90..10dc7bb 100644
--- a/src/transformations/loop.cc
+++ b/src/transformations/loop.cc
@@ -43,36 +43,36 @@
#define _DEBUG_ true
-
using namespace omega;
-const std::string Loop::tmp_loop_var_name_prefix = std::string("chill_t"); // Manu:: In fortran, first character of a variable name must be a letter, so this change
+const std::string Loop::tmp_loop_var_name_prefix = std::string(
+ "chill_t"); // Manu:: In fortran, first character of a variable name must be a letter, so this change
const std::string Loop::overflow_var_name_prefix = std::string("over");
-void echocontroltype( const IR_Control *control ) {
- switch(control->type()) {
- case IR_CONTROL_BLOCK: {
- CHILL_DEBUG_PRINT("IR_CONTROL_BLOCK\n");
- break;
- }
- case IR_CONTROL_LOOP: {
- CHILL_DEBUG_PRINT("IR_CONTROL_LOOP\n");
- break;
- }
- case IR_CONTROL_IF: {
- CHILL_DEBUG_PRINT("IR_CONTROL_IF\n");
- break;
- }
- default:
- CHILL_DEBUG_PRINT("just a bunch of statements?\n");
-
+void echocontroltype(const IR_Control *control) {
+ switch (control->type()) {
+ case IR_CONTROL_BLOCK: {
+ CHILL_DEBUG_PRINT("IR_CONTROL_BLOCK\n");
+ break;
+ }
+ case IR_CONTROL_LOOP: {
+ CHILL_DEBUG_PRINT("IR_CONTROL_LOOP\n");
+ break;
+ }
+ case IR_CONTROL_IF: {
+ CHILL_DEBUG_PRINT("IR_CONTROL_IF\n");
+ break;
+ }
+ default:
+ CHILL_DEBUG_PRINT("just a bunch of statements?\n");
+
} // switch
}
omega::Relation Loop::getNewIS(int stmt_num) const {
-
+
omega::Relation result;
-
+
if (stmt[stmt_num].xform.is_null()) {
omega::Relation known = omega::Extend_Set(omega::copy(this->known),
stmt[stmt_num].IS.n_set() - this->known.n_set());
@@ -81,32 +81,31 @@ omega::Relation Loop::getNewIS(int stmt_num) const {
omega::Relation known = omega::Extend_Set(omega::copy(this->known),
stmt[stmt_num].xform.n_out() - this->known.n_set());
result = omega::Intersection(
- omega::Range(
- omega::Restrict_Domain(
- omega::copy(stmt[stmt_num].xform),
- omega::copy(stmt[stmt_num].IS))), known);
+ omega::Range(
+ omega::Restrict_Domain(
+ omega::copy(stmt[stmt_num].xform),
+ omega::copy(stmt[stmt_num].IS))), known);
}
-
+
result.simplify(2, 4);
-
+
return result;
}
-
-void Loop::reduce(int stmt_num,
- std::vector<int> &level,
+void Loop::reduce(int stmt_num,
+ std::vector<int> &level,
int param,
- std::string func_name,
+ std::string func_name,
std::vector<int> &seq_levels,
- std::vector<int> cudaized_levels,
+ std::vector<int> cudaized_levels,
int bound_level) {
// illegal instruction?? fprintf(stderr, " Loop::reduce( stmt %d, param %d, func_name (encrypted)...)\n", stmt, param); // , func_name.c_str());
-
+
//std::cout << "Reducing stmt# " << stmt_num << " at level " << level << "\n";
//ir->printStmt(stmt[stmt_num].code);
-
+
if (stmt[stmt_num].reduction != 1) {
CHILL_DEBUG_PRINT("Cannot reduce this statement\n");
return;
@@ -132,9 +131,9 @@ void Loop::reduce(int stmt_num,
delete last_compute_cg_;
last_compute_cg_ = NULL;
fprintf(stderr, "set last_compute_cg_ = NULL;\n");
-
+
omega::CG_outputBuilder *ocg = ir->builder();
-
+
omega::CG_outputRepr *funCallRepr;
std::vector<omega::CG_outputRepr *> arg_repr_list;
apply_xform(stmt_num);
@@ -144,13 +143,13 @@ void Loop::reduce(int stmt_num,
std::vector<IR_ArrayRef *> access2;
for (int j = 0; j < access[i]->n_dim(); j++) {
std::vector<IR_ArrayRef *> access3 = ir->FindArrayRef(
- access[i]->index(j));
+ access[i]->index(j));
access2.insert(access2.end(), access3.begin(), access3.end());
}
if (access2.size() == 0) {
if (names.find(access[i]->name()) == names.end()) {
arg_repr_list.push_back(
- ocg->CreateAddressOf(access[i]->convert()));
+ ocg->CreateAddressOf(access[i]->convert()));
names.insert(access[i]->name());
if (access[i]->is_write())
reduced_write_refs.insert(access[i]->name());
@@ -165,14 +164,14 @@ void Loop::reduce(int stmt_num,
}
}
}
-
+
for (int i = 0; i < seq_levels.size(); i++)
arg_repr_list.push_back(
- ocg->CreateIdent(
- stmt[stmt_num].IS.set_var(seq_levels[i])->name()));
-
+ ocg->CreateIdent(
+ stmt[stmt_num].IS.set_var(seq_levels[i])->name()));
+
if (bound_level != -1) {
-
+
omega::Relation new_IS = copy(stmt[stmt_num].IS);
new_IS.copy_names(stmt[stmt_num].IS);
new_IS.setup_names();
@@ -181,106 +180,106 @@ void Loop::reduce(int stmt_num,
//omega::Relation r = getNewIS(stmt_num);
for (int j = dim + 1; j <= new_IS.n_set(); j++)
new_IS = omega::Project(new_IS, new_IS.set_var(j));
-
+
new_IS.simplify(2, 4);
-
+
omega::Relation bound_ = get_loop_bound(copy(new_IS), dim - 1);
omega::Variable_ID v = bound_.set_var(dim);
std::vector<omega::CG_outputRepr *> ubList;
for (omega::GEQ_Iterator e(
- const_cast<omega::Relation &>(bound_).single_conjunct()->GEQs());
+ const_cast<omega::Relation &>(bound_).single_conjunct()->GEQs());
e; e++) {
if ((*e).get_coef(v) < 0) {
// && (*e).is_const_except_for_global(v))
omega::CG_outputRepr *UPPERBOUND =
- omega::output_upper_bound_repr(ir->builder(), *e, v,
- bound_,
- std::vector<
- std::pair<omega::CG_outputRepr *, int> >(
- bound_.n_set(),
- std::make_pair(
- static_cast<omega::CG_outputRepr *>(NULL),
- 0)), uninterpreted_symbols[stmt_num]);
+ omega::output_upper_bound_repr(ir->builder(), *e, v,
+ bound_,
+ std::vector<
+ std::pair<omega::CG_outputRepr *, int> >(
+ bound_.n_set(),
+ std::make_pair(
+ static_cast<omega::CG_outputRepr *>(NULL),
+ 0)), uninterpreted_symbols[stmt_num]);
if (UPPERBOUND != NULL)
ubList.push_back(UPPERBOUND);
-
+
}
-
+
}
-
- omega::CG_outputRepr * ubRepr;
+
+ omega::CG_outputRepr *ubRepr;
if (ubList.size() > 1) {
-
+
ubRepr = ir->builder()->CreateInvoke("min", ubList);
arg_repr_list.push_back(ubRepr);
} else if (ubList.size() == 1)
arg_repr_list.push_back(ubList[0]);
}
-
+
funCallRepr = ocg->CreateInvoke(func_name, arg_repr_list);
stmt[stmt_num].code = funCallRepr;
for (int i = 0; i < level.size(); i++) {
//stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL));
std::vector<std::string> loop_vars;
loop_vars.push_back(stmt[stmt_num].IS.set_var(level[i])->name());
-
+
std::vector<omega::CG_outputRepr *> subs;
subs.push_back(ocg->CreateInt(0));
-
+
stmt[stmt_num].code = ocg->CreateSubstitutedStmt(0, stmt[stmt_num].code,
loop_vars, subs);
-
+
}
-
+
omega::Relation new_IS = copy(stmt[stmt_num].IS);
new_IS.copy_names(stmt[stmt_num].IS);
new_IS.setup_names();
new_IS.simplify();
int old_size = new_IS.n_set();
-
+
omega::Relation R = omega::copy(stmt[stmt_num].IS);
R.copy_names(stmt[stmt_num].IS);
R.setup_names();
-
+
for (int i = level.size() - 1; i >= 0; i--) {
int j;
-
+
for (j = 0; j < cudaized_levels.size(); j++) {
if (cudaized_levels[j] == level[i])
break;
-
+
}
-
+
if (j == cudaized_levels.size()) {
R = omega::Project(R, level[i], omega::Input_Var);
R.simplify();
-
+
}
//
-
+
}
-
+
omega::F_And *f_Root = R.and_with_and();
for (int i = level.size() - 1; i >= 0; i--) {
int j;
-
+
for (j = 0; j < cudaized_levels.size(); j++) {
if (cudaized_levels[j] == level[i])
break;
-
+
}
-
+
if (j == cudaized_levels.size()) {
-
+
omega::EQ_Handle h = f_Root->add_EQ();
-
+
h.update_coef(R.set_var(level[i]), 1);
h.update_const(-1);
}
//
-
+
}
-
+
R.simplify();
stmt[stmt_num].IS = R;
}
@@ -303,22 +302,22 @@ bool Loop::isInitialized() const {
bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
std::vector<ir_tree_node *> &ir_stmt) {
-
+
CHILL_DEBUG_PRINT("extract_ir_stmts()\n");
CHILL_DEBUG_PRINT("ir_tree has %d statements\n", ir_tree.size());
ir_stmt = extract_ir_stmts(ir_tree);
-
- CHILL_DEBUG_PRINT("nesting level stmt size = %d\n", (int)ir_stmt.size());
+
+ CHILL_DEBUG_PRINT("nesting level stmt size = %d\n", (int) ir_stmt.size());
stmt_nesting_level_.resize(ir_stmt.size());
-
+
std::vector<int> stmt_nesting_level(ir_stmt.size());
-
- CHILL_DEBUG_PRINT("%d statements?\n", (int)ir_stmt.size());
-
+
+ CHILL_DEBUG_PRINT("%d statements?\n", (int) ir_stmt.size());
+
// find out how deeply nested each statement is. (how can these be different?)
for (int i = 0; i < ir_stmt.size(); i++) {
- fprintf(stderr, "i %d\n", i);
+ fprintf(stderr, "i %d\n", i);
ir_stmt[i]->payload = i;
int t = 0;
ir_tree_node *itn = ir_stmt[i];
@@ -331,23 +330,24 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
stmt_nesting_level[i] = t;
CHILL_DEBUG_PRINT("stmt_nesting_level[%d] = %d\n", i, t);
}
-
+
if (actual_code.size() == 0)
- actual_code = std::vector<CG_outputRepr*>(ir_stmt.size());
-
+ actual_code = std::vector<CG_outputRepr *>(ir_stmt.size());
+
stmt = std::vector<Statement>(ir_stmt.size());
- CHILL_DEBUG_PRINT("in init_loop, made %d stmts\n", (int)ir_stmt.size());
-
- uninterpreted_symbols = std::vector<std::map<std::string, std::vector<omega::CG_outputRepr * > > >(ir_stmt.size());
- uninterpreted_symbols_stringrepr = std::vector<std::map<std::string, std::vector<omega::CG_outputRepr * > > >(ir_stmt.size());
-
+ CHILL_DEBUG_PRINT("in init_loop, made %d stmts\n", (int) ir_stmt.size());
+
+ uninterpreted_symbols = std::vector<std::map<std::string, std::vector<omega::CG_outputRepr *> > >(ir_stmt.size());
+ uninterpreted_symbols_stringrepr = std::vector<std::map<std::string, std::vector<omega::CG_outputRepr *> > >(
+ ir_stmt.size());
+
int n_dim = -1;
int max_loc;
//std::vector<std::string> index;
for (int i = 0; i < ir_stmt.size(); i++) {
int max_nesting_level = -1;
int loc;
-
+
// find the max nesting level and remember the statement that was at that level
for (int j = 0; j < ir_stmt.size(); j++) {
if (stmt_nesting_level[j] > max_nesting_level) {
@@ -355,23 +355,23 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
loc = j;
}
}
-
+
CHILL_DEBUG_PRINT("max nesting level %d at location %d\n", max_nesting_level, loc);
-
+
// most deeply nested statement acting as a reference point
if (n_dim == -1) {
CHILL_DEBUG_PRINT("n_dim now max_nesting_level %d\n", max_nesting_level);
n_dim = max_nesting_level;
max_loc = loc;
-
+
index = std::vector<std::string>(n_dim);
-
+
ir_tree_node *itn = ir_stmt[loc];
CHILL_DEBUG_PRINT("itn = stmt[%d]\n", loc);
int cur_dim = n_dim - 1;
while (itn->parent != NULL) {
CHILL_DEBUG_PRINT("parent\n");
-
+
itn = itn->parent;
if (itn->content->type() == IR_CONTROL_LOOP) {
CHILL_DEBUG_PRINT("IR_CONTROL_LOOP cur_dim %d\n", cur_dim);
@@ -382,258 +382,264 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
}
}
}
-
+
CHILL_DEBUG_PRINT("align loops by names,\n");
// align loops by names, temporary solution
ir_tree_node *itn = ir_stmt[loc]; // defined outside loops??
int depth = stmt_nesting_level_[loc] - 1;
-
+
for (int t = depth; t >= 0; t--) {
int y = t;
itn = ir_stmt[loc];
-
+
while ((itn->parent != NULL) && (y >= 0)) {
itn = itn->parent;
if (itn->content->type() == IR_CONTROL_LOOP)
y--;
}
-
+
if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) {
CG_outputBuilder *ocg = ir->builder();
-
+
itn->payload = depth - t;
-
+
CG_outputRepr *code =
- static_cast<IR_Block *>(ir_stmt[loc]->content)->extract();
-
+ static_cast<IR_Block *>(ir_stmt[loc]->content)->extract();
+
std::vector<CG_outputRepr *> index_expr;
std::vector<std::string> old_index;
CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]);
index_expr.push_back(repl);
old_index.push_back(
- static_cast<IR_Loop *>(itn->content)->index()->name());
+ static_cast<IR_Loop *>(itn->content)->index()->name());
code = ocg->CreateSubstitutedStmt(0, code, old_index,
index_expr);
-
- replace.insert(std::pair<int, CG_outputRepr*>(loc, code));
+
+ replace.insert(std::pair<int, CG_outputRepr *>(loc, code));
//stmt[loc].code = code;
-
+
}
}
-
+
CHILL_DEBUG_PRINT("set relation variable names ****\n");
// set relation variable names
-
+
// this finds the loop variables for loops enclosing this statement and puts
// them in an Omega Relation (just their names, which could fail)
-
+
CHILL_DEBUG_PRINT("Relation r(%d)\n", n_dim);
Relation r(n_dim);
F_And *f_root = r.add_and();
itn = ir_stmt[loc];
int temp_depth = depth;
while (itn->parent != NULL) {
-
+
itn = itn->parent;
if (itn->content->type() == IR_CONTROL_LOOP) {
- fprintf(stderr, "it's a loop. temp_depth %d\n", temp_depth);
+ fprintf(stderr, "it's a loop. temp_depth %d\n", temp_depth);
fprintf(stderr, "r.name_set_var( %d, %s )\n", itn->payload + 1, index[temp_depth].c_str());
r.name_set_var(itn->payload + 1, index[temp_depth]);
-
+
temp_depth--;
}
//static_cast<IR_Loop *>(itn->content)->index()->name());
}
- fprintf(stderr, "Relation r "); r.print(); fflush(stdout);
+ fprintf(stderr, "Relation r ");
+ r.print();
+ fflush(stdout);
//fprintf(stderr, "f_root "); f_root->print(stderr); fprintf(stderr, "\n");
-
+
/*while (itn->parent != NULL) {
itn = itn->parent;
if (itn->content->type() == IR_CONTROL_LOOP)
r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name());
}*/
-
-
-
-
- fprintf(stderr, "extract information from loop/if structures\n");
+
+
+
+
+ fprintf(stderr, "extract information from loop/if structures\n");
// extract information from loop/if structures
std::vector<bool> processed(n_dim, false);
std::vector<std::string> vars_to_be_reversed;
-
+
std::vector<std::string> insp_lb;
std::vector<std::string> insp_ub;
-
+
itn = ir_stmt[loc];
while (itn->parent != NULL) { // keep heading upward
itn = itn->parent;
-
+
switch (itn->content->type()) {
- case IR_CONTROL_LOOP: {
- fprintf(stderr, "loop.cc l 462 IR_CONTROL_LOOP\n");
- IR_Loop *lp = static_cast<IR_Loop *>(itn->content);
- Variable_ID v = r.set_var(itn->payload + 1);
- int c;
-
- try {
- c = lp->step_size();
- //fprintf(stderr, "step size %d\n", c);
- if (c > 0) {
+ case IR_CONTROL_LOOP: {
+ fprintf(stderr, "loop.cc l 462 IR_CONTROL_LOOP\n");
+ IR_Loop *lp = static_cast<IR_Loop *>(itn->content);
+ Variable_ID v = r.set_var(itn->payload + 1);
+ int c;
+
+ try {
+ c = lp->step_size();
+ //fprintf(stderr, "step size %d\n", c);
+ if (c > 0) {
+ CG_outputRepr *lb = lp->lower_bound();
+ fprintf(stderr, "loop.cc, got the lower bound. it is:\n");
+ lb->dump();
+ printf("\n");
+ fflush(stdout);
+
+ exp2formula(ir, r, f_root, freevar, lb, v, 's',
+ IR_COND_GE, true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]);
+
+ CG_outputRepr *ub = lp->upper_bound();
+ //fprintf(stderr, "loop.cc, got the upper bound. it is:\n");
+ //ub->dump(); printf("\n"); fflush(stdout);
+
+
+
+ IR_CONDITION_TYPE cond = lp->stop_cond();
+ if (cond == IR_COND_LT || cond == IR_COND_LE)
+ exp2formula(ir, r, f_root, freevar, ub, v, 's',
+ cond, true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]);
+ else
+ throw ir_error("loop condition not supported");
+
+
+ if ((ir->QueryExpOperation(lp->lower_bound())
+ == IR_OP_ARRAY_VARIABLE)
+ && (ir->QueryExpOperation(lp->lower_bound())
+ == ir->QueryExpOperation(
+ lp->upper_bound()))) {
+
+ fprintf(stderr, "loop.cc lower and upper are both IR_OP_ARRAY_VARIABLE?\n");
+
+ std::vector<CG_outputRepr *> v =
+ ir->QueryExpOperand(lp->lower_bound());
+ IR_ArrayRef *ref =
+ static_cast<IR_ArrayRef *>(ir->Repr2Ref(
+ v[0]));
+ std::string s0 = ref->name();
+ std::vector<CG_outputRepr *> v2 =
+ ir->QueryExpOperand(lp->upper_bound());
+ IR_ArrayRef *ref2 =
+ static_cast<IR_ArrayRef *>(ir->Repr2Ref(
+ v2[0]));
+ std::string s1 = ref2->name();
+
+ if (s0 == s1) {
+ insp_lb.push_back(s0);
+ insp_ub.push_back(s1);
+
+ }
+
+ }
+
+
+ } else if (c < 0) {
+ CG_outputBuilder *ocg = ir->builder();
+ CG_outputRepr *lb = lp->lower_bound();
+ lb = ocg->CreateMinus(NULL, lb);
+ exp2formula(ir, r, f_root, freevar, lb, v, 's',
+ IR_COND_GE, true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]);
+ CG_outputRepr *ub = lp->upper_bound();
+ ub = ocg->CreateMinus(NULL, ub);
+ IR_CONDITION_TYPE cond = lp->stop_cond();
+ if (cond == IR_COND_GE)
+ exp2formula(ir, r, f_root, freevar, ub, v, 's',
+ IR_COND_LE, true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]);
+ else if (cond == IR_COND_GT)
+ exp2formula(ir, r, f_root, freevar, ub, v, 's',
+ IR_COND_LT, true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]);
+ else
+ throw ir_error("loop condition not supported");
+
+ vars_to_be_reversed.push_back(lp->index()->name());
+ } else
+ throw ir_error("loop step size zero");
+ } catch (const ir_error &e) {
+ actual_code[loc] =
+ static_cast<IR_Block *>(ir_stmt[loc]->content)->extract();
+ for (int i = 0; i < itn->children.size(); i++)
+ delete itn->children[i];
+ itn->children = std::vector<ir_tree_node *>();
+ itn->content = itn->content->convert();
+ return false;
+ }
+
+ // check for loop increment or decrement that is not 1
+ //fprintf(stderr, "abs(c)\n");
+ if (abs(c) != 1) {
+ F_Exists *f_exists = f_root->add_exists();
+ Variable_ID e = f_exists->declare();
+ F_And *f_and = f_exists->add_and();
+ Stride_Handle h = f_and->add_stride(abs(c));
+ if (c > 0)
+ h.update_coef(e, 1);
+ else
+ h.update_coef(e, -1);
+ h.update_coef(v, -1);
CG_outputRepr *lb = lp->lower_bound();
- fprintf(stderr, "loop.cc, got the lower bound. it is:\n");
- lb->dump(); printf("\n"); fflush(stdout);
-
- exp2formula(ir, r, f_root, freevar, lb, v, 's',
- IR_COND_GE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
-
- CG_outputRepr *ub = lp->upper_bound();
- //fprintf(stderr, "loop.cc, got the upper bound. it is:\n");
- //ub->dump(); printf("\n"); fflush(stdout);
-
-
-
- IR_CONDITION_TYPE cond = lp->stop_cond();
- if (cond == IR_COND_LT || cond == IR_COND_LE)
- exp2formula(ir, r, f_root, freevar, ub, v, 's',
- cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+ exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ,
+ true, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]);
+ }
+
+ processed[itn->payload] = true;
+ break;
+ }
+
+
+ case IR_CONTROL_IF: {
+ fprintf(stderr, "IR_CONTROL_IF\n");
+ IR_If *theif = static_cast<IR_If *>(itn->content);
+
+ CG_outputRepr *cond =
+ static_cast<IR_If *>(itn->content)->condition();
+
+ try {
+ if (itn->payload % 2 == 1)
+ exp2constraint(ir, r, f_root, freevar, cond, true, uninterpreted_symbols[i],
+ uninterpreted_symbols_stringrepr[i]);
+ else {
+ F_Not *f_not = f_root->add_not();
+ F_And *f_and = f_not->add_and();
+ exp2constraint(ir, r, f_and, freevar, cond, true, uninterpreted_symbols[i],
+ uninterpreted_symbols_stringrepr[i]);
+ }
+ } catch (const ir_error &e) {
+ std::vector<ir_tree_node *> *t;
+ if (itn->parent == NULL)
+ t = &ir_tree;
else
- throw ir_error("loop condition not supported");
-
-
- if ((ir->QueryExpOperation(lp->lower_bound())
- == IR_OP_ARRAY_VARIABLE)
- && (ir->QueryExpOperation(lp->lower_bound())
- == ir->QueryExpOperation(
- lp->upper_bound()))) {
-
- fprintf(stderr, "loop.cc lower and upper are both IR_OP_ARRAY_VARIABLE?\n");
-
- std::vector<CG_outputRepr *> v =
- ir->QueryExpOperand(lp->lower_bound());
- IR_ArrayRef *ref =
- static_cast<IR_ArrayRef *>(ir->Repr2Ref(
- v[0]));
- std::string s0 = ref->name();
- std::vector<CG_outputRepr *> v2 =
- ir->QueryExpOperand(lp->upper_bound());
- IR_ArrayRef *ref2 =
- static_cast<IR_ArrayRef *>(ir->Repr2Ref(
- v2[0]));
- std::string s1 = ref2->name();
-
- if (s0 == s1) {
- insp_lb.push_back(s0);
- insp_ub.push_back(s1);
-
+ t = &(itn->parent->children);
+ int id = itn->payload;
+ int i = t->size() - 1;
+ while (i >= 0) {
+ if ((*t)[i] == itn) {
+ for (int j = 0; j < itn->children.size(); j++)
+ delete itn->children[j];
+ itn->children = std::vector<ir_tree_node *>();
+ itn->content = itn->content->convert();
+ } else if ((*t)[i]->payload >> 1 == id >> 1) {
+ delete (*t)[i];
+ t->erase(t->begin() + i);
}
-
+ i--;
}
-
-
- } else if (c < 0) {
- CG_outputBuilder *ocg = ir->builder();
- CG_outputRepr *lb = lp->lower_bound();
- lb = ocg->CreateMinus(NULL, lb);
- exp2formula(ir, r, f_root, freevar, lb, v, 's',
- IR_COND_GE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
- CG_outputRepr *ub = lp->upper_bound();
- ub = ocg->CreateMinus(NULL, ub);
- IR_CONDITION_TYPE cond = lp->stop_cond();
- if (cond == IR_COND_GE)
- exp2formula(ir, r, f_root, freevar, ub, v, 's',
- IR_COND_LE, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
- else if (cond == IR_COND_GT)
- exp2formula(ir, r, f_root, freevar, ub, v, 's',
- IR_COND_LT, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
- else
- throw ir_error("loop condition not supported");
-
- vars_to_be_reversed.push_back(lp->index()->name());
- } else
- throw ir_error("loop step size zero");
- } catch (const ir_error &e) {
- actual_code[loc] =
- static_cast<IR_Block *>(ir_stmt[loc]->content)->extract();
+ return false;
+ }
+
+ break;
+ }
+ default:
+ //fprintf(stderr, "default?\n");
for (int i = 0; i < itn->children.size(); i++)
delete itn->children[i];
itn->children = std::vector<ir_tree_node *>();
itn->content = itn->content->convert();
return false;
- }
-
- // check for loop increment or decrement that is not 1
- //fprintf(stderr, "abs(c)\n");
- if (abs(c) != 1) {
- F_Exists *f_exists = f_root->add_exists();
- Variable_ID e = f_exists->declare();
- F_And *f_and = f_exists->add_and();
- Stride_Handle h = f_and->add_stride(abs(c));
- if (c > 0)
- h.update_coef(e, 1);
- else
- h.update_coef(e, -1);
- h.update_coef(v, -1);
- CG_outputRepr *lb = lp->lower_bound();
- exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ,
- true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
- }
-
- processed[itn->payload] = true;
- break;
- }
-
-
- case IR_CONTROL_IF: {
- fprintf(stderr, "IR_CONTROL_IF\n");
- IR_If *theif = static_cast<IR_If *>(itn->content);
-
- CG_outputRepr *cond =
- static_cast<IR_If *>(itn->content)->condition();
-
- try {
- if (itn->payload % 2 == 1)
- exp2constraint(ir, r, f_root, freevar, cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
- else {
- F_Not *f_not = f_root->add_not();
- F_And *f_and = f_not->add_and();
- exp2constraint(ir, r, f_and, freevar, cond, true,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
- }
- } catch (const ir_error &e) {
- std::vector<ir_tree_node *> *t;
- if (itn->parent == NULL)
- t = &ir_tree;
- else
- t = &(itn->parent->children);
- int id = itn->payload;
- int i = t->size() - 1;
- while (i >= 0) {
- if ((*t)[i] == itn) {
- for (int j = 0; j < itn->children.size(); j++)
- delete itn->children[j];
- itn->children = std::vector<ir_tree_node *>();
- itn->content = itn->content->convert();
- } else if ((*t)[i]->payload >> 1 == id >> 1) {
- delete (*t)[i];
- t->erase(t->begin() + i);
- }
- i--;
- }
- return false;
- }
-
- break;
- }
- default:
- //fprintf(stderr, "default?\n");
- for (int i = 0; i < itn->children.size(); i++)
- delete itn->children[i];
- itn->children = std::vector<ir_tree_node *>();
- itn->content = itn->content->convert();
- return false;
}
}
-
-
+
+
//fprintf(stderr, "add information for missing loops n_dim(%d)\n", n_dim);
// add information for missing loops
for (int j = 0; j < n_dim; j++)
@@ -645,18 +651,18 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
&& itn->payload == j)
break;
}
-
+
Variable_ID v = r.set_var(j + 1);
if (loc < max_loc) {
-
+
CG_outputBuilder *ocg = ir->builder();
-
+
CG_outputRepr *lb =
- static_cast<IR_Loop *>(itn->content)->lower_bound();
-
+ static_cast<IR_Loop *>(itn->content)->lower_bound();
+
exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ,
- false,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
-
+ false, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]);
+
/* if (ir->QueryExpOperation(
static_cast<IR_Loop *>(itn->content)->lower_bound())
== IR_OP_VARIABLE) {
@@ -684,15 +690,15 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
}
*/
-
+
} else { // loc > max_loc
-
+
CG_outputBuilder *ocg = ir->builder();
CG_outputRepr *ub =
- static_cast<IR_Loop *>(itn->content)->upper_bound();
-
+ static_cast<IR_Loop *>(itn->content)->upper_bound();
+
exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ,
- false,uninterpreted_symbols[i],uninterpreted_symbols_stringrepr[i]);
+ false, uninterpreted_symbols[i], uninterpreted_symbols_stringrepr[i]);
/*if (ir->QueryExpOperation(
static_cast<IR_Loop *>(itn->content)->upper_bound())
== IR_OP_VARIABLE) {
@@ -724,29 +730,29 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
*/
}
}
-
+
r.setup_names();
r.simplify();
-
+
// THIS IS MISSING IN PROTONU's
for (int j = 0; j < insp_lb.size(); j++) {
-
+
std::string lb = insp_lb[j] + "_";
std::string ub = lb + "_";
-
+
Global_Var_ID u, l;
bool found_ub = false;
bool found_lb = false;
for (DNF_Iterator di(copy(r).query_DNF()); di; di++)
for (Constraint_Iterator ci = (*di)->constraints(); ci; ci++)
-
+
for (Constr_Vars_Iter cvi(*ci); cvi; cvi++) {
Variable_ID v = cvi.curr_var();
if (v->kind() == Global_Var)
if (v->get_global_var()->arity() > 0) {
-
+
std::string name =
- v->get_global_var()->base_name();
+ v->get_global_var()->base_name();
if (name == lb) {
l = v->get_global_var();
found_lb = true;
@@ -755,9 +761,9 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
found_ub = true;
}
}
-
+
}
-
+
if (found_lb && found_ub) {
Relation known_(copy(r).n_set());
known_.copy_names(copy(r));
@@ -770,12 +776,12 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
g.update_coef(index_lb, -1);
g.update_const(-1);
addKnown(known_);
-
+
}
-
+
}
-
-
+
+
fprintf(stderr, "loop.cc L441 insert the statement\n");
// insert the statement
CG_outputBuilder *ocg = ir->builder();
@@ -785,36 +791,38 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
repl = ocg->CreateMinus(NULL, repl);
reverse_expr.push_back(repl);
}
- fprintf(stderr, "loop.cc before extract\n");
+ fprintf(stderr, "loop.cc before extract\n");
CG_outputRepr *code =
- static_cast<IR_Block *>(ir_stmt[loc]->content)->extract();
+ static_cast<IR_Block *>(ir_stmt[loc]->content)->extract();
fprintf(stderr, "code = ocg->CreateSubstitutedStmt(...)\n");
- ((CG_chillRepr *)code)->Dump(); fflush(stdout);
-
+ ((CG_chillRepr *) code)->Dump();
+ fflush(stdout);
+
code = ocg->CreateSubstitutedStmt(0, code, vars_to_be_reversed,
reverse_expr);
fprintf(stderr, "stmt\n");
- ((CG_chillRepr *)code)->Dump(); fflush(stdout);
+ ((CG_chillRepr *) code)->Dump();
+ fflush(stdout);
stmt[loc].code = code;
stmt[loc].IS = r;
-
+
//Anand: Add Information on uninterpreted function constraints to
//Known relation
-
- fprintf(stderr, "loop.cc stmt[%d].loop_level has size n_dim %d\n", loc, n_dim);
+
+ fprintf(stderr, "loop.cc stmt[%d].loop_level has size n_dim %d\n", loc, n_dim);
stmt[loc].loop_level = std::vector<LoopLevel>(n_dim);
stmt[loc].ir_stmt_node = ir_stmt[loc];
stmt[loc].has_inspector = false;
- fprintf(stderr, "for int i < n_dim(%d)\n", n_dim);
+ fprintf(stderr, "for int i < n_dim(%d)\n", n_dim);
for (int ii = 0; ii < n_dim; ii++) {
stmt[loc].loop_level[ii].type = LoopLevelOriginal;
stmt[loc].loop_level[ii].payload = ii;
stmt[loc].loop_level[ii].parallel_level = 0;
}
- fprintf(stderr, "whew\n");
-
+ fprintf(stderr, "whew\n");
+
stmt_nesting_level[loc] = -1;
}
dump();
@@ -824,36 +832,35 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,
}
-
Loop::Loop(const IR_Control *control) {
-
- CHILL_DEBUG_PRINT("control type is %d ", control->type());
+
+ CHILL_DEBUG_PRINT("control type is %d \n", control->type());
echocontroltype(control);
- CHILL_DEBUG_PRINT("2set last_compute_cg_ = NULL; \n");
+ CHILL_DEBUG_PRINT("set last_compute_cg_ = NULL; \n");
last_compute_cgr_ = NULL;
last_compute_cg_ = NULL;
ir = const_cast<IR_Code *>(control->ir_); // point to the CHILL IR that this loop came from
- if (ir == 0) {
- CHILL_DEBUG_PRINT("ir gotten from control = 0x%x\n", (long)ir);
- CHILL_DEBUG_PRINT("loop.cc GONNA DIE SOON *******************************\n\n");
+ if (ir == 0) {
+ CHILL_DEBUG_PRINT("ir gotten from control = 0x%x\n", (long) ir);
+ CHILL_DEBUG_PRINT("GONNA DIE SOON *******************************\n\n");
}
-
+
init_code = NULL;
cleanup_code = NULL;
tmp_loop_var_name_counter = 1;
overflow_var_name_counter = 1;
known = Relation::True(0);
-
+
CHILL_DEBUG_PRINT("calling build_ir_tree()\n");
CHILL_DEBUG_PRINT("about to clone control\n");
ir_tree = build_ir_tree(control->clone(), NULL);
//fprintf(stderr,"in Loop::Loop. ir_tree has %ld parts\n", ir_tree.size());
-
+
// std::vector<ir_tree_node *> ir_stmt;
//fprintf(stderr, "loop.cc after build_ir_tree() %ld statements\n", stmt.size());
-
+
int count = 0;
//fprintf(stderr, "before init_loops, %d freevar\n", freevar.size());
//fprintf(stderr, "count %d\n", count++);
@@ -861,19 +868,19 @@ Loop::Loop(const IR_Control *control) {
while (!init_loop(ir_tree, ir_stmt)) {
//fprintf(stderr, "count %d\n", count++);
}
- fprintf(stderr, "after init_loop, %d freevar\n", (int)freevar.size());
-
-
- fprintf(stderr, "loop.cc after init_loop, %d statements\n", (int)stmt.size());
+ fprintf(stderr, "after init_loop, %d freevar\n", (int) freevar.size());
+
+
+ fprintf(stderr, "loop.cc after init_loop, %d statements\n", (int) stmt.size());
for (int i = 0; i < stmt.size(); i++) {
- std::map<int, CG_outputRepr*>::iterator it = replace.find(i);
-
+ std::map<int, CG_outputRepr *>::iterator it = replace.find(i);
+
if (it != replace.end())
stmt[i].code = it->second;
else
stmt[i].code = stmt[i].code;
}
-
+
if (stmt.size() != 0)
dep = DependenceGraph(stmt[0].IS.n_set());
else
@@ -881,42 +888,42 @@ Loop::Loop(const IR_Control *control) {
// init the dependence graph
for (int i = 0; i < stmt.size(); i++)
dep.insert();
-
- fprintf(stderr, "this really REALLY needs some comments\n");
+
+ fprintf(stderr, "this really REALLY needs some comments\n");
// this really REALLY needs some comments
for (int i = 0; i < stmt.size(); i++) {
- fprintf(stderr, "i %d\n", i);
+ fprintf(stderr, "i %d\n", i);
stmt[i].reduction = 0; // Manu -- initialization
for (int j = i; j < stmt.size(); j++) {
- fprintf(stderr, "j %d\n", j);
+ fprintf(stderr, "j %d\n", j);
std::pair<std::vector<DependenceVector>,
- std::vector<DependenceVector> > dv = test_data_dependences(
- ir,
- stmt[i].code,
- stmt[i].IS,
- stmt[j].code,
- stmt[j].IS,
- freevar,
- index,
- stmt_nesting_level_[i],
- stmt_nesting_level_[j],
- uninterpreted_symbols[ i ],
- uninterpreted_symbols_stringrepr[ i ]);
-
- fprintf(stderr, "dv.first.size() %d\n", (int)dv.first.size());
+ std::vector<DependenceVector> > dv = test_data_dependences(
+ ir,
+ stmt[i].code,
+ stmt[i].IS,
+ stmt[j].code,
+ stmt[j].IS,
+ freevar,
+ index,
+ stmt_nesting_level_[i],
+ stmt_nesting_level_[j],
+ uninterpreted_symbols[i],
+ uninterpreted_symbols_stringrepr[i]);
+
+ fprintf(stderr, "dv.first.size() %d\n", (int) dv.first.size());
for (int k = 0; k < dv.first.size(); k++) {
- fprintf(stderr, "k1 %d\n", k);
+ fprintf(stderr, "k1 %d\n", k);
if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k],
true))
dep.connect(i, j, dv.first[k]);
else {
dep.connect(j, i, dv.first[k].reverse());
}
-
+
}
-
- for (int k = 0; k < dv.second.size(); k++) {
- fprintf(stderr, "k2 %d\n", k);
+
+ for (int k = 0; k < dv.second.size(); k++) {
+ fprintf(stderr, "k2 %d\n", k);
if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k],
false))
dep.connect(j, i, dv.second[k]);
@@ -926,64 +933,64 @@ Loop::Loop(const IR_Control *control) {
}
}
}
-
- fprintf(stderr, "\n\n*** LOTS OF REDUCTIONS ***\n\n");
-
+
+ fprintf(stderr, "\n\n*** LOTS OF REDUCTIONS ***\n\n");
+
// TODO: Reduction check
// Manu:: Initial implementation / algorithm
std::set<int> reducCand = std::set<int>();
std::vector<int> canReduce = std::vector<int>();
- fprintf(stderr, "\ni range %d\n", stmt.size());
+ fprintf(stderr, "\ni range %d\n", stmt.size());
for (int i = 0; i < stmt.size(); i++) {
- fprintf(stderr, "i %d\n", i);
+ fprintf(stderr, "i %d\n", i);
if (!dep.hasEdge(i, i)) {
continue;
}
- fprintf(stderr, "dep.hasEdge(%d, %d)\n", i, i);
+ fprintf(stderr, "dep.hasEdge(%d, %d)\n", i, i);
// for each statement check if it has all the three dependences (RAW, WAR, WAW)
// If there is such a statement, it is a reduction candidate. Mark all reduction candidates.
std::vector<DependenceVector> tdv = dep.getEdge(i, i);
- fprintf(stderr, "tdv size %d\n", tdv.size());
+ fprintf(stderr, "tdv size %d\n", tdv.size());
for (int j = 0; j < tdv.size(); j++) {
- fprintf(stderr, "ij %d %d\n", i, j);
- if (tdv[j].is_reduction_cand) {
- fprintf(stderr, "reducCand.insert( %d )\n", i);
+ fprintf(stderr, "ij %d %d\n", i, j);
+ if (tdv[j].is_reduction_cand) {
+ fprintf(stderr, "reducCand.insert( %d )\n", i);
reducCand.insert(i);
}
}
}
-
- fprintf(stderr, "loop.cc reducCand.size() %d\n", reducCand.size());
+
+ fprintf(stderr, "loop.cc reducCand.size() %d\n", reducCand.size());
bool reduc;
std::set<int>::iterator it;
- int counter = 0;
+ int counter = 0;
for (it = reducCand.begin(); it != reducCand.end(); it++) {
- fprintf(stderr, "counter %d\n", counter);
+ fprintf(stderr, "counter %d\n", counter);
reduc = true;
for (int j = 0; j < stmt.size(); j++) {
- fprintf(stderr, "j %d\n", j);
+ fprintf(stderr, "j %d\n", j);
if ((*it != j)
&& (stmt_nesting_level_[*it] < stmt_nesting_level_[j])) {
if (dep.hasEdge(*it, j) || dep.hasEdge(j, *it)) {
- fprintf(stderr, "counter %d j %d reduc = false\n", counter, j);
+ fprintf(stderr, "counter %d j %d reduc = false\n", counter, j);
reduc = false;
break;
}
}
counter += 1;
}
-
+
if (reduc) {
- fprintf(stderr, "canReduce.push_back()\n");
+ fprintf(stderr, "canReduce.push_back()\n");
canReduce.push_back(*it);
stmt[*it].reduction = 2; // First, assume that reduction is possible with some processing
}
}
-
-
+
+
// If reduction is possible without processing, update the value of the reduction variable to 1
- fprintf(stderr, "loop.cc canReduce.size() %d\n", canReduce.size());
+ fprintf(stderr, "loop.cc canReduce.size() %d\n", canReduce.size());
for (int i = 0; i < canReduce.size(); i++) {
// Here, assuming that stmtType returns 1 when there is a single statement within stmt[i]
if (stmtType(ir, stmt[canReduce[i]].code) == 1) {
@@ -993,9 +1000,9 @@ Loop::Loop(const IR_Control *control) {
stmt[canReduce[i]].reductionOp = opType;
}
}
-
+
// printing out stuff for debugging
-
+
if (DEP_DEBUG) {
std::cout << "STATEMENTS THAT CAN BE REDUCED: \n";
for (int i = 0; i < canReduce.size(); i++) {
@@ -1015,21 +1022,21 @@ Loop::Loop(const IR_Control *control) {
}
}
// cleanup the IR tree
-
- fprintf(stderr, "init dumb transformation relations\n");
+
+ fprintf(stderr, "init dumb transformation relations\n");
// init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0]
for (int i = 0; i < stmt.size(); i++) {
int n = stmt[i].IS.n_set();
stmt[i].xform = Relation(n, 2 * n + 1);
F_And *f_root = stmt[i].xform.add_and();
-
+
for (int j = 1; j <= n; j++) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(stmt[i].xform.output_var(2 * j), 1);
h.update_coef(stmt[i].xform.input_var(j), -1);
}
-
+
for (int j = 1; j <= 2 * n + 1; j += 2) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(stmt[i].xform.output_var(j), 1);
@@ -1037,7 +1044,7 @@ Loop::Loop(const IR_Control *control) {
stmt[i].xform.simplify();
}
//fprintf(stderr, "done with dumb\n");
-
+
if (stmt.size() != 0)
num_dep_dim = stmt[0].IS.n_set();
else
@@ -1056,19 +1063,19 @@ Loop::Loop(const IR_Control *control) {
}
Loop::~Loop() {
-
+
delete last_compute_cgr_;
delete last_compute_cg_;
-
+
for (int i = 0; i < stmt.size(); i++)
if (stmt[i].code != NULL) {
stmt[i].code->clear();
delete stmt[i].code;
}
-
+
for (int i = 0; i < ir_tree.size(); i++)
delete ir_tree[i];
-
+
if (init_code != NULL) {
init_code->clear();
delete init_code;
@@ -1080,54 +1087,52 @@ Loop::~Loop() {
}
-
-
int Loop::get_dep_dim_of(int stmt_num, int level) const {
if (stmt_num < 0 || stmt_num >= stmt.size())
throw std::invalid_argument("invaid statement " + to_string(stmt_num));
-
+
if (level < 1 || level > stmt[stmt_num].loop_level.size())
return -1;
-
+
int trip_count = 0;
while (true) {
switch (stmt[stmt_num].loop_level[level - 1].type) {
- case LoopLevelOriginal:
- return stmt[stmt_num].loop_level[level - 1].payload;
- case LoopLevelTile:
- level = stmt[stmt_num].loop_level[level - 1].payload;
- if (level < 1)
- return -1;
- if (level > stmt[stmt_num].loop_level.size())
- throw loop_error("incorrect loop level information for statement "
- + to_string(stmt_num));
- break;
- default:
- throw loop_error(
- "unknown loop level information for statement "
- + to_string(stmt_num));
+ case LoopLevelOriginal:
+ return stmt[stmt_num].loop_level[level - 1].payload;
+ case LoopLevelTile:
+ level = stmt[stmt_num].loop_level[level - 1].payload;
+ if (level < 1)
+ return -1;
+ if (level > stmt[stmt_num].loop_level.size())
+ throw loop_error("incorrect loop level information for statement "
+ + to_string(stmt_num));
+ break;
+ default:
+ throw loop_error(
+ "unknown loop level information for statement "
+ + to_string(stmt_num));
}
trip_count++;
if (trip_count >= stmt[stmt_num].loop_level.size())
throw loop_error(
- "incorrect loop level information for statement "
- + to_string(stmt_num));
+ "incorrect loop level information for statement "
+ + to_string(stmt_num));
}
}
int Loop::get_last_dep_dim_before(int stmt_num, int level) const {
if (stmt_num < 0 || stmt_num >= stmt.size())
throw std::invalid_argument("invaid statement " + to_string(stmt_num));
-
+
if (level < 1)
return -1;
if (level > stmt[stmt_num].loop_level.size())
level = stmt[stmt_num].loop_level.size() + 1;
-
+
for (int i = level - 1; i >= 1; i--)
if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal)
return stmt[stmt_num].loop_level[i - 1].payload;
-
+
return -1;
}
@@ -1139,14 +1144,14 @@ void Loop::print_internal_loop_structure() const {
if (2 * j < lex.size())
std::cout << lex[2 * j];
switch (stmt[i].loop_level[j].type) {
- case LoopLevelOriginal:
- std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")";
- break;
- case LoopLevelTile:
- std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")";
- break;
- default:
- std::cout << "(unknown)";
+ case LoopLevelOriginal:
+ std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ case LoopLevelTile:
+ std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ default:
+ std::cout << "(unknown)";
}
std::cout << ' ';
}
@@ -1159,96 +1164,102 @@ void Loop::print_internal_loop_structure() const {
}
}
-void Loop::debugRelations() const {
- const int m = stmt.size();
+void Loop::debugRelations() const {
+ const int m = stmt.size();
{
std::vector<Relation> IS(m);
std::vector<Relation> xforms(m);
-
+
for (int i = 0; i < m; i++) {
IS[i] = stmt[i].IS;
xforms[i] = stmt[i].xform; // const stucks
}
-
- printf("\nxforms:\n");
- for (int i = 0; i < m; i++) { xforms[i].print(); printf("\n"); }
- printf("\nIS:\n");
- for (int i = 0; i < m; i++) { IS[i].print(); printf("\n"); }
- fflush(stdout);
+
+ printf("\nxforms:\n");
+ for (int i = 0; i < m; i++) {
+ xforms[i].print();
+ printf("\n");
+ }
+ printf("\nIS:\n");
+ for (int i = 0; i < m; i++) {
+ IS[i].print();
+ printf("\n");
+ }
+ fflush(stdout);
}
}
CG_outputRepr *Loop::getCode(int effort) const {
- fprintf(stderr,"\nloop.cc Loop::getCode( effort %d )\n", effort );
-
+ fprintf(stderr, "\nloop.cc Loop::getCode( effort %d )\n", effort);
+
const int m = stmt.size();
if (m == 0)
return NULL;
const int n = stmt[0].xform.n_out();
-
+
if (last_compute_cg_ == NULL) {
- fprintf(stderr, "Loop::getCode() last_compute_cg_ == NULL\n");
-
+ fprintf(stderr, "Loop::getCode() last_compute_cg_ == NULL\n");
+
std::vector<Relation> IS(m);
std::vector<Relation> xforms(m);
for (int i = 0; i < m; i++) {
IS[i] = stmt[i].IS;
xforms[i] = stmt[i].xform;
}
-
- debugRelations();
-
-
+
+ debugRelations();
+
+
Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
- printf("\nknown:\n"); known.print(); printf("\n\n"); fflush(stdout);
-
+ printf("\nknown:\n");
+ known.print();
+ printf("\n\n");
+ fflush(stdout);
+
last_compute_cg_ = new CodeGen(xforms, IS, known);
delete last_compute_cgr_;
last_compute_cgr_ = NULL;
- }
- else {
- fprintf(stderr, "Loop::getCode() last_compute_cg_ NOT NULL\n");
+ } else {
+ fprintf(stderr, "Loop::getCode() last_compute_cg_ NOT NULL\n");
}
-
+
if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {
delete last_compute_cgr_;
last_compute_cgr_ = last_compute_cg_->buildAST(effort);
last_compute_effort_ = effort;
}
-
+
std::vector<CG_outputRepr *> stmts(m);
- fprintf(stderr, "%d stmts\n", m);
+ fprintf(stderr, "%d stmts\n", m);
for (int i = 0; i < m; i++)
stmts[i] = stmt[i].code;
CG_outputBuilder *ocg = ir->builder();
- fprintf(stderr, "calling last_compute_cgr_->printRepr()\n");
- CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts,
+ fprintf(stderr, "calling last_compute_cgr_->printRepr()\n");
+ CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts,
uninterpreted_symbols);
-
+
if (init_code != NULL)
repr = ocg->StmtListAppend(init_code->clone(), repr);
if (cleanup_code != NULL)
repr = ocg->StmtListAppend(repr, cleanup_code->clone());
-
- fprintf(stderr,"\nloop.cc Loop::getCode( effort %d ) DONE\n", effort );
+
+ fprintf(stderr, "\nloop.cc Loop::getCode( effort %d ) DONE\n", effort);
return repr;
}
-
-
void Loop::printCode(int effort) const {
- fprintf(stderr,"\nloop.cc Loop::printCode( effort %d )\n", effort );
+ fprintf(stderr, "\nloop.cc Loop::printCode( effort %d )\n", effort);
const int m = stmt.size();
if (m == 0)
return;
const int n = stmt[0].xform.n_out();
-
+
if (last_compute_cg_ == NULL) {
- fprintf(stderr, "Loop::printCode(), last_compute_cg_ == NULL\n");
+ fprintf(stderr, "Loop::printCode(), last_compute_cg_ == NULL\n");
std::vector<Relation> IS(m);
std::vector<Relation> xforms(m);
for (int i = 0; i < m; i++) {
@@ -1256,22 +1267,21 @@ void Loop::printCode(int effort) const {
xforms[i] = stmt[i].xform;
}
Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
-
+
last_compute_cg_ = new CodeGen(xforms, IS, known);
delete last_compute_cgr_;
last_compute_cgr_ = NULL;
- }
- else fprintf(stderr, "Loop::printCode(), last_compute_cg_ NOT NULL\n");
-
+ } else fprintf(stderr, "Loop::printCode(), last_compute_cg_ NOT NULL\n");
+
if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {
delete last_compute_cgr_;
last_compute_cgr_ = last_compute_cg_->buildAST(effort);
last_compute_effort_ = effort;
}
-
+
std::string repr = last_compute_cgr_->printString(
- uninterpreted_symbols_stringrepr);
- fprintf(stderr, "leaving Loop::printCode()\n");
+ uninterpreted_symbols_stringrepr);
+ fprintf(stderr, "leaving Loop::printCode()\n");
std::cout << repr << std::endl;
}
@@ -1297,20 +1307,20 @@ void Loop::printDependenceGraph() const {
std::vector<Relation> Loop::getNewIS() const {
const int m = stmt.size();
-
+
std::vector<Relation> new_IS(m);
for (int i = 0; i < m; i++)
new_IS[i] = getNewIS(i);
-
+
return new_IS;
}
// pragmas are tied to loops only ???
void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) {
// check sanity of parameters
- if(stmt_num < 0)
+ if (stmt_num < 0)
throw std::invalid_argument("invalid statement " + to_string(stmt_num));
-
+
CG_outputBuilder *ocg = ir->builder();
CG_outputRepr *code = stmt[stmt_num].code;
ocg->CreatePragmaAttribute(code, level, pragmaText);
@@ -1331,9 +1341,9 @@ void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) {
void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hint) {
// check sanity of parameters
- if(stmt_num < 0)
+ if (stmt_num < 0)
throw std::invalid_argument("invalid statement " + to_string(stmt_num));
-
+
CG_outputBuilder *ocg = ir->builder();
CG_outputRepr *code = stmt[stmt_num].code;
ocg->CreatePrefetchAttribute(code, level, arrName, hint);
@@ -1341,13 +1351,13 @@ void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hin
std::vector<int> Loop::getLexicalOrder(int stmt_num) const {
assert(stmt_num < stmt.size());
-
+
const int n = stmt[stmt_num].xform.n_out();
std::vector<int> lex(n, 0);
-
+
for (int i = 0; i < n; i += 2)
lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var);
-
+
return lex;
}
@@ -1356,13 +1366,13 @@ std::vector<int> Loop::getLexicalOrder(int stmt_num) const {
std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const {
assert(stmt_num >= 0 && stmt_num < stmt.size());
assert(level > 0 && level <= stmt[stmt_num].loop_level.size());
-
+
std::set<int> working;
for (int i = 0; i < stmt.size(); i++)
if (const_cast<Loop *>(this)->stmt[i].IS.is_upper_bound_satisfiable()
&& stmt[i].loop_level.size() >= level)
working.insert(i);
-
+
for (int i = 1; i <= level; i++) {
int a = getLexicalOrder(stmt_num, i);
for (std::set<int>::iterator j = working.begin(); j != working.end();) {
@@ -1373,14 +1383,14 @@ std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const {
++j;
}
}
-
+
return working;
}
int Loop::getLexicalOrder(int stmt_num, int level) const {
assert(stmt_num >= 0 && stmt_num < stmt.size());
- assert(level > 0 && level <= stmt[stmt_num].loop_level.size()+1);
-
+ assert(level > 0 && level <= stmt[stmt_num].loop_level.size() + 1);
+
Relation &r = const_cast<Loop *>(this)->stmt[stmt_num].xform;
for (EQ_Iterator e(r.single_conjunct()->EQs()); e; e++)
if (abs((*e).get_coef(r.output_var(2 * level - 1))) == 1) {
@@ -1395,15 +1405,15 @@ int Loop::getLexicalOrder(int stmt_num, int level) const {
return (*e).get_coef(r.output_var(2 * level - 1)) > 0 ? -t : t;
}
}
-
+
throw loop_error(
- "can't find lexical order for statement " + to_string(stmt_num)
- + "'s loop level " + to_string(level));
+ "can't find lexical order for statement " + to_string(stmt_num)
+ + "'s loop level " + to_string(level));
}
std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const {
const int m = stmt.size();
-
+
std::set<int> same_loops;
for (int i = 0; i < m; i++) {
if (dim < 0)
@@ -1417,32 +1427,32 @@ std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const {
if (j > dim)
same_loops.insert(i);
}
-
+
}
-
+
return same_loops;
}
void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) {
const int m = stmt.size();
-
+
if (amount == 0)
return;
-
+
for (int i = 0; i < m; i++) {
std::vector<int> lex2 = getLexicalOrder(i);
-
+
bool need_shift = true;
-
+
for (int j = 0; j < dim; j++)
if (lex2[j] != lex[j]) {
need_shift = false;
break;
}
-
+
if (!need_shift)
continue;
-
+
if (amount > 0) {
if (lex2[dim] < lex[dim])
continue;
@@ -1450,94 +1460,94 @@ void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) {
if (lex2[dim] > lex[dim])
continue;
}
-
+
assign_const(stmt[i].xform, dim, lex2[dim] + amount);
}
}
std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active,
int level) {
-
+
std::set<int> not_nested_at_this_level;
- std::map<ir_tree_node*, std::set<int> > sorted_by_loop;
+ std::map<ir_tree_node *, std::set<int> > sorted_by_loop;
std::map<int, std::set<int> > sorted_by_lex_order;
std::vector<std::set<int> > to_return;
bool lex_order_already_set = false;
for (std::set<int>::iterator it = active.begin(); it != active.end();
it++) {
-
+
if (stmt[*it].ir_stmt_node == NULL)
lex_order_already_set = true;
}
-
+
if (lex_order_already_set) {
-
+
for (std::set<int>::iterator it = active.begin(); it != active.end();
it++) {
std::map<int, std::set<int> >::iterator it2 =
- sorted_by_lex_order.find(
- get_const(stmt[*it].xform, 2 * (level - 1),
- Output_Var));
-
+ sorted_by_lex_order.find(
+ get_const(stmt[*it].xform, 2 * (level - 1),
+ Output_Var));
+
if (it2 != sorted_by_lex_order.end())
it2->second.insert(*it);
else {
-
+
std::set<int> to_insert;
-
+
to_insert.insert(*it);
-
+
sorted_by_lex_order.insert(
- std::pair<int, std::set<int> >(
- get_const(stmt[*it].xform, 2 * (level - 1),
- Output_Var), to_insert));
-
+ std::pair<int, std::set<int> >(
+ get_const(stmt[*it].xform, 2 * (level - 1),
+ Output_Var), to_insert));
+
}
-
+
}
-
+
for (std::map<int, std::set<int> >::iterator it2 =
- sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end();
+ sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end();
it2++)
to_return.push_back(it2->second);
-
+
} else {
-
+
for (std::set<int>::iterator it = active.begin(); it != active.end();
it++) {
-
- ir_tree_node* itn = stmt[*it].ir_stmt_node;
+
+ ir_tree_node *itn = stmt[*it].ir_stmt_node;
itn = itn->parent;
//while (itn->content->type() != IR_CONTROL_LOOP && itn != NULL)
// itn = itn->parent;
-
+
while ((itn != NULL) && (itn->payload != level - 1)) {
itn = itn->parent;
- while (itn != NULL && itn->content->type() != IR_CONTROL_LOOP )
+ while (itn != NULL && itn->content->type() != IR_CONTROL_LOOP)
itn = itn->parent;
}
-
+
if (itn == NULL)
not_nested_at_this_level.insert(*it);
else {
- std::map<ir_tree_node*, std::set<int> >::iterator it2 =
- sorted_by_loop.find(itn);
-
+ std::map<ir_tree_node *, std::set<int> >::iterator it2 =
+ sorted_by_loop.find(itn);
+
if (it2 != sorted_by_loop.end())
it2->second.insert(*it);
else {
std::set<int> to_insert;
-
+
to_insert.insert(*it);
-
+
sorted_by_loop.insert(
- std::pair<ir_tree_node*, std::set<int> >(itn,
- to_insert));
-
+ std::pair<ir_tree_node *, std::set<int> >(itn,
+ to_insert));
+
}
-
+
}
-
+
}
if (not_nested_at_this_level.size() > 0) {
for (std::set<int>::iterator it = not_nested_at_this_level.begin();
@@ -1545,34 +1555,34 @@ std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active,
std::set<int> temp;
temp.insert(*it);
to_return.push_back(temp);
-
+
}
}
- for (std::map<ir_tree_node*, std::set<int> >::iterator it2 =
- sorted_by_loop.begin(); it2 != sorted_by_loop.end(); it2++)
+ for (std::map<ir_tree_node *, std::set<int> >::iterator it2 =
+ sorted_by_loop.begin(); it2 != sorted_by_loop.end(); it2++)
to_return.push_back(it2->second);
}
return to_return;
}
-void update_successors(int n,
- int node_num[],
+void update_successors(int n,
+ int node_num[],
int cant_fuse_with[],
- Graph<std::set<int>, bool> &g,
+ Graph<std::set<int>, bool> &g,
std::list<int> &work_list,
- std::list<bool> &type_list,
+ std::list<bool> &type_list,
std::vector<bool> types) {
-
+
std::set<int> disconnect;
for (Graph<std::set<int>, bool>::EdgeList::iterator i =
- g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) {
+ g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) {
int m = i->first;
-
+
if (node_num[m] != -1)
throw loop_error("Graph input for fusion has cycles not a DAG!!");
-
+
std::vector<bool> check_ = g.getEdge(n, m);
-
+
bool has_bad_edge_path = false;
for (int i = 0; i < check_.size(); i++)
if (!check_[i]) {
@@ -1589,12 +1599,12 @@ void update_successors(int n,
}
disconnect.insert(m);
}
-
-
+
+
for (std::set<int>::iterator i = disconnect.begin(); i != disconnect.end();
i++) {
g.disconnect(n, *i);
-
+
bool no_incoming_edges = true;
for (int j = 0; j < g.vertex.size(); j++)
if (j != *i)
@@ -1602,7 +1612,7 @@ void update_successors(int n,
no_incoming_edges = false;
break;
}
-
+
if (no_incoming_edges) {
work_list.push_back(*i);
type_list.push_back(types[*i]);
@@ -1611,35 +1621,32 @@ void update_successors(int n,
}
-
int Loop::getMinLexValue(std::set<int> stmts, int level) {
-
+
int min;
-
+
std::set<int>::iterator it = stmts.begin();
min = getLexicalOrder(*it, level);
-
+
for (; it != stmts.end(); it++) {
int curr = getLexicalOrder(*it, level);
if (curr < min)
min = curr;
}
-
+
return min;
}
-
-
Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level(
- std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim) {
+ std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim) {
Graph<std::set<int>, bool> g;
-
+
for (int i = 0; i < s.size(); i++)
g.insert(s[i]);
-
+
for (int i = 0; i < s.size(); i++) {
-
+
for (int j = i + 1; j < s.size(); j++) {
bool has_true_edge_i_to_j = false;
bool has_true_edge_j_to_i = false;
@@ -1647,32 +1654,32 @@ Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level(
bool is_connected_j_to_i = false;
for (std::set<int>::iterator ii = s[i].begin(); ii != s[i].end();
ii++) {
-
+
for (std::set<int>::iterator jj = s[j].begin();
jj != s[j].end(); jj++) {
-
+
std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj);
for (int k = 0; k < dvs.size(); k++)
if (dvs[k].is_control_dependence()
|| (dvs[k].is_data_dependence()
&& dvs[k].has_been_carried_at(dep_dim))) {
-
+
if (dvs[k].is_data_dependence()
&& dvs[k].has_negative_been_carried_at(
- dep_dim)) {
+ dep_dim)) {
//g.connect(i, j, false);
is_connected_i_to_j = true;
break;
} else {
//g.connect(i, j, true);
-
+
has_true_edge_i_to_j = true;
//break
}
}
-
+
//if (is_connected)
-
+
// break;
// if (has_true_edge_i_to_j && !is_connected_i_to_j)
// g.connect(i, j, true);
@@ -1681,72 +1688,71 @@ Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level(
if (dvs[k].is_control_dependence()
|| (dvs[k].is_data_dependence()
&& dvs[k].has_been_carried_at(dep_dim))) {
-
+
if (is_connected_i_to_j || has_true_edge_i_to_j)
throw loop_error(
- "Graph input for fusion has cycles not a DAG!!");
-
+ "Graph input for fusion has cycles not a DAG!!");
+
if (dvs[k].is_data_dependence()
&& dvs[k].has_negative_been_carried_at(
- dep_dim)) {
+ dep_dim)) {
//g.connect(i, j, false);
is_connected_j_to_i = true;
break;
} else {
//g.connect(i, j, true);
-
+
has_true_edge_j_to_i = true;
//break;
}
}
-
+
// if (is_connected)
//break;
// if (is_connected)
//break;
}
-
+
//if (is_connected)
// break;
}
-
-
+
+
if (is_connected_i_to_j)
g.connect(i, j, false);
else if (has_true_edge_i_to_j)
g.connect(i, j, true);
-
+
if (is_connected_j_to_i)
g.connect(j, i, false);
else if (has_true_edge_j_to_i)
g.connect(j, i, true);
-
+
}
}
return g;
}
-
std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g,
std::vector<bool> &types) {
-
+
bool roots[g.vertex.size()];
-
+
for (int i = 0; i < g.vertex.size(); i++)
roots[i] = true;
-
+
for (int i = 0; i < g.vertex.size(); i++)
for (int j = i + 1; j < g.vertex.size(); j++) {
-
+
if (g.hasEdge(i, j))
roots[j] = false;
-
+
if (g.hasEdge(j, i))
roots[i] = false;
-
+
}
-
+
std::list<int> work_list;
std::list<bool> type_list;
int cant_fuse_with[g.vertex.size()];
@@ -1755,11 +1761,11 @@ std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g,
int lastnum = 0;
std::vector<std::set<int> > s;
//Each Fused set's representative node
-
+
int node_to_fused_nodes[g.vertex.size()];
int node_num[g.vertex.size()];
int next[g.vertex.size()];
-
+
for (int i = 0; i < g.vertex.size(); i++) {
if (roots[i] == true) {
work_list.push_back(i);
@@ -1770,17 +1776,17 @@ std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g,
node_num[i] = -1;
next[i] = 0;
}
-
-
+
+
// topological sort according to chun's permute algorithm
// std::vector<std::set<int> > s = g.topoSort();
std::vector<std::set<int> > s2 = g.topoSort();
if (work_list.empty() || (s2.size() != g.vertex.size())) {
-
+
std::cout << s2.size() << "\t" << g.vertex.size() << std::endl;
throw loop_error("Input for fusion not a DAG!!");
-
-
+
+
}
int fused_nodes_counter = 0;
while (!work_list.empty()) {
@@ -1802,19 +1808,19 @@ std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g,
p = fused;
else
p = next[cant_fuse_with[n]];
-
+
if (p != 0) {
int rep_node = node_to_fused_nodes[p];
node_num[n] = node_num[rep_node];
-
+
try {
update_successors(n, node_num, cant_fuse_with, g, work_list,
type_list, types);
} catch (const loop_error &e) {
-
+
throw loop_error(
- "statements cannot be fused together due to negative dependence");
-
+ "statements cannot be fused together due to negative dependence");
+
}
for (std::set<int>::iterator it = g.vertex[n].first.begin();
it != g.vertex[n].first.end(); it++)
@@ -1826,81 +1832,80 @@ std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g,
lastnum = lastnum + 1;
node_num[n] = lastnum;
node_to_fused_nodes[node_num[n]] = n;
-
+
if (lastfused == 0) {
fused = lastnum;
lastfused = fused;
} else {
next[lastfused] = lastnum;
lastfused = lastnum;
-
+
}
-
+
try {
update_successors(n, node_num, cant_fuse_with, g, work_list,
type_list, types);
} catch (const loop_error &e) {
-
+
throw loop_error(
- "statements cannot be fused together due to negative dependence");
-
+ "statements cannot be fused together due to negative dependence");
+
}
fused_nodes_counter++;
}
-
+
} else {
s.push_back(g.vertex[n].first);
lastnum = lastnum + 1;
node_num[n] = lastnum;
node_to_fused_nodes[node_num[n]] = n;
-
+
try {
update_successors(n, node_num, cant_fuse_with, g, work_list,
type_list, types);
} catch (const loop_error &e) {
-
+
throw loop_error(
- "statements cannot be fused together due to negative dependence");
-
+ "statements cannot be fused together due to negative dependence");
+
}
//fused_nodes_counter++;
-
+
}
-
+
}
-
+
return s;
}
-
-
void Loop::setLexicalOrder(int dim, const std::set<int> &active,
int starting_order, std::vector<std::vector<std::string> > idxNames) {
- fprintf(stderr, "Loop::setLexicalOrder() %d idxNames active size %d starting_order %d\n", idxNames.size(), active.size(), starting_order);
+ fprintf(stderr, "Loop::setLexicalOrder() %d idxNames active size %d starting_order %d\n", idxNames.size(),
+ active.size(), starting_order);
if (active.size() == 0)
return;
- for (int i=0; i< idxNames.size(); i++) {
+ for (int i = 0; i < idxNames.size(); i++) {
std::vector<std::string> what = idxNames[i];
- for (int j=0; j<what.size(); j++) {
- fprintf(stderr, "%2d %2d %s\n", i,j, what[j].c_str());
+ for (int j = 0; j < what.size(); j++) {
+ fprintf(stderr, "%2d %2d %s\n", i, j, what[j].c_str());
}
}
// check for sanity of parameters
if (dim < 0 || dim % 2 != 0)
throw std::invalid_argument(
- "invalid constant loop level to set lexicographical order");
+ "invalid constant loop level to set lexicographical order");
std::vector<int> lex;
int ref_stmt_num;
for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
if ((*i) < 0 || (*i) >= stmt.size())
throw std::invalid_argument(
- "invalid statement number " + to_string(*i));
+ "invalid statement number " + to_string(*i));
if (dim >= stmt[*i].xform.n_out())
throw std::invalid_argument(
- "invalid constant loop level to set lexicographical order");
+ "invalid constant loop level to set lexicographical order");
if (i == active.begin()) {
lex = getLexicalOrder(*i);
ref_stmt_num = *i;
@@ -1909,10 +1914,10 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,
for (int j = 0; j < dim; j += 2)
if (lex[j] != lex2[j])
throw std::invalid_argument(
- "statements are not in the same sub loop nest");
+ "statements are not in the same sub loop nest");
}
}
-
+
// separate statements by current loop level types
int level = (dim + 2) / 2;
std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type;
@@ -1922,21 +1927,21 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,
active_by_no_level.insert(*i);
else
active_by_level_type[std::make_pair(
- stmt[*i].loop_level[level - 1].type,
- stmt[*i].loop_level[level - 1].payload)].insert(*i);
+ stmt[*i].loop_level[level - 1].type,
+ stmt[*i].loop_level[level - 1].payload)].insert(*i);
}
-
+
// further separate statements due to control dependences
std::vector<std::set<int> > active_by_level_type_splitted;
for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i =
- active_by_level_type.begin(); i != active_by_level_type.end(); i++)
+ active_by_level_type.begin(); i != active_by_level_type.end(); i++)
active_by_level_type_splitted.push_back(i->second);
for (std::set<int>::iterator i = active_by_no_level.begin();
i != active_by_no_level.end(); i++)
for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) {
std::set<int> controlled, not_controlled;
for (std::set<int>::iterator k =
- active_by_level_type_splitted[j].begin();
+ active_by_level_type_splitted[j].begin();
k != active_by_level_type_splitted[j].end(); k++) {
std::vector<DependenceVector> dvs = dep.getEdge(*i, *k);
bool is_controlled = false;
@@ -1952,19 +1957,19 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,
}
if (controlled.size() != 0 && not_controlled.size() != 0) {
active_by_level_type_splitted.erase(
- active_by_level_type_splitted.begin() + j);
+ active_by_level_type_splitted.begin() + j);
active_by_level_type_splitted.push_back(controlled);
active_by_level_type_splitted.push_back(not_controlled);
}
}
-
+
// set lexical order separating loops with different loop types first
if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) {
int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1;
-
+
Graph<std::set<int>, Empty> g;
for (std::vector<std::set<int> >::iterator i =
- active_by_level_type_splitted.begin();
+ active_by_level_type_splitted.begin();
i != active_by_level_type_splitted.end(); i++)
g.insert(*i);
for (std::set<int>::iterator i = active_by_no_level.begin();
@@ -1986,7 +1991,7 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,
if (dvs[k].is_control_dependence()
|| (dvs[k].is_data_dependence()
&& !dvs[k].has_been_carried_before(
- dep_dim))) {
+ dep_dim))) {
g.connect(i, j);
connected = true;
break;
@@ -2010,7 +2015,7 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,
if (dvs[k].is_control_dependence()
|| (dvs[k].is_data_dependence()
&& !dvs[k].has_been_carried_before(
- dep_dim))) {
+ dep_dim))) {
g.connect(j, i);
connected = true;
break;
@@ -2022,13 +2027,13 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,
break;
}
}
-
+
std::vector<std::set<int> > s = g.topoSort();
if (s.size() != g.vertex.size())
throw loop_error(
- "cannot separate statements with different loop types at loop level "
- + to_string(level));
-
+ "cannot separate statements with different loop types at loop level "
+ + to_string(level));
+
// assign lexical order
int order = starting_order;
for (int i = 0; i < s.size(); i++) {
@@ -2041,19 +2046,18 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,
assign_const(stmt[cur_stmt].xform, j, 0);
order++;
} else { // recurse !
- fprintf(stderr, "Loop:setLexicalOrder() recursing\n");
+ fprintf(stderr, "Loop:setLexicalOrder() recursing\n");
setLexicalOrder(dim, cur_scc, order, idxNames);
order += sz;
}
}
- }
- else { // set lexical order separating single iteration statements and loops
+ } else { // set lexical order separating single iteration statements and loops
std::set<int> true_singles;
std::set<int> nonsingles;
std::map<coef_t, std::set<int> > fake_singles;
std::set<int> fake_singles_;
-
+
// sort out statements that do not require loops
for (std::set<int>::iterator i = active.begin(); i != active.end();
i++) {
@@ -2071,7 +2075,7 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,
fake_singles_.insert(*i);
try {
fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert(
- *i);
+ *i);
} catch (const std::exception &e) {
fake_singles[posInfinity].insert(*i);
}
@@ -2079,60 +2083,60 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,
} else
nonsingles.insert(*i);
}
-
-
+
+
// split nonsingles forcibly according to negative dependences present (loop unfusible)
int dep_dim = get_dep_dim_of(ref_stmt_num, level);
-
+
if (dim < stmt[ref_stmt_num].xform.n_out() - 1) {
-
+
bool dummy_level_found = false;
-
+
std::vector<std::set<int> > s;
-
+
s = sort_by_same_loops(active, level);
bool further_levels_exist = false;
-
+
if (!idxNames.empty())
if (level <= idxNames[ref_stmt_num].size())
if (idxNames[ref_stmt_num][level - 1].length() == 0) {
// && s.size() == 1) {
int order1 = 0;
dummy_level_found = true;
-
+
for (int i = level; i < idxNames[ref_stmt_num].size();
i++)
if (idxNames[ref_stmt_num][i].length() > 0)
further_levels_exist = true;
-
+
}
-
+
//if (!dummy_level_found) {
-
+
if (s.size() > 1) {
-
+
std::vector<bool> types;
for (int i = 0; i < s.size(); i++)
types.push_back(true);
-
+
Graph<std::set<int>, bool> g = construct_induced_graph_at_level(
- s, dep, dep_dim);
+ s, dep, dep_dim);
s = typed_fusion(g, types);
}
int order = starting_order;
for (int i = 0; i < s.size(); i++) {
-
+
for (std::set<int>::iterator it = s[i].begin();
it != s[i].end(); it++) {
assign_const(stmt[*it].xform, dim, order);
stmt[*it].xform.simplify();
}
-
+
if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1)) { // recurse !
- fprintf(stderr, "Loop:setLexicalOrder() recursing\n");
+ fprintf(stderr, "Loop:setLexicalOrder() recursing\n");
setLexicalOrder(dim + 2, s[i], order, idxNames);
}
-
+
order++;
}
//}
@@ -2231,8 +2235,8 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,
break;
}
*/
-
-
+
+
// assign lexical order
/*int order = starting_order;
for (int i = 0; i < s.size(); i++) {
@@ -2261,17 +2265,16 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,
*/
}
- fprintf(stderr, "LEAVING Loop::setLexicalOrder() %d idxNames\n", idxNames.size());
- for (int i=0; i< idxNames.size(); i++) {
+ fprintf(stderr, "LEAVING Loop::setLexicalOrder() %d idxNames\n", idxNames.size());
+ for (int i = 0; i < idxNames.size(); i++) {
std::vector<std::string> what = idxNames[i];
- for (int j=0; j<what.size(); j++) {
- fprintf(stderr, "%2d %2d %s\n", i,j, what[j].c_str());
+ for (int j = 0; j < what.size(); j++) {
+ fprintf(stderr, "%2d %2d %s\n", i, j, what[j].c_str());
}
}
}
-
void Loop::apply_xform() {
std::set<int> active;
for (int i = 0; i < stmt.size(); i++)
@@ -2280,26 +2283,26 @@ void Loop::apply_xform() {
}
void Loop::apply_xform(int stmt_num) {
- fprintf(stderr, "apply_xform( %d )\n", stmt_num);
+ fprintf(stderr, "apply_xform( %d )\n", stmt_num);
std::set<int> active;
active.insert(stmt_num);
apply_xform(active);
}
void Loop::apply_xform(std::set<int> &active) {
- fflush(stdout);
+ fflush(stdout);
fprintf(stderr, "loop.cc apply_xform( set )\n");
-
+
int max_n = 0;
-
+
omega::CG_outputBuilder *ocg = ir->builder();
for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
int n = stmt[*i].loop_level.size();
if (n > max_n)
max_n = n;
-
+
std::vector<int> lex = getLexicalOrder(*i);
-
+
omega::Relation mapping(2 * n + 1, n);
omega::F_And *f_root = mapping.add_and();
for (int j = 1; j <= n; j++) {
@@ -2309,7 +2312,7 @@ void Loop::apply_xform(std::set<int> &active) {
}
mapping = omega::Composition(mapping, stmt[*i].xform);
mapping.simplify();
-
+
// match omega input/output variables to variable names in the code
for (int j = 1; j <= stmt[*i].IS.n_set(); j++)
mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name());
@@ -2317,28 +2320,28 @@ void Loop::apply_xform(std::set<int> &active) {
mapping.name_output_var(j,
tmp_loop_var_name_prefix
+ omega::to_string(
- tmp_loop_var_name_counter + j - 1));
+ tmp_loop_var_name_counter + j - 1));
mapping.setup_names();
mapping.print(); // "{[I] -> [_t1] : I = _t1 }
- fflush(stdout);
-
+ fflush(stdout);
+
omega::Relation known = Extend_Set(copy(this->known),
mapping.n_out() - this->known.n_set());
//stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL));
-
- omega::CG_outputBuilder *ocgr = ir->builder();
-
-
+
+ omega::CG_outputBuilder *ocgr = ir->builder();
+
+
//this is probably CG_chillBuilder;
-
+
omega::CG_stringBuilder *ocgs = new omega::CG_stringBuilder;
if (uninterpreted_symbols[*i].size() == 0) {
-
-
+
+
std::set<std::string> globals;
-
+
for (omega::DNF_Iterator di(stmt[*i].IS.query_DNF()); di; di++) {
-
+
for (omega::Constraint_Iterator e(*di); e; e++) {
for (omega::Constr_Vars_Iter cvi(*e); cvi; cvi++) {
omega::Variable_ID v = cvi.curr_var();
@@ -2349,105 +2352,106 @@ void Loop::apply_xform(std::set<int> &active) {
globals.insert(v->name());
std::vector<omega::CG_outputRepr *> reprs;
std::vector<omega::CG_outputRepr *> reprs2;
-
+
for (int l = 1; l <= g->arity(); l++) {
omega::CG_outputRepr *temp = ocgr->CreateIdent(
- stmt[*i].IS.set_var(l)->name());
+ stmt[*i].IS.set_var(l)->name());
omega::CG_outputRepr *temp2 = ocgs->CreateIdent(
- stmt[*i].IS.set_var(l)->name());
-
+ stmt[*i].IS.set_var(l)->name());
+
reprs.push_back(temp);
reprs2.push_back(temp2);
}
uninterpreted_symbols[*i].insert(
- std::pair<std::string,
- std::vector<omega::CG_outputRepr *> >(
- v->get_global_var()->base_name(),
- reprs));
+ std::pair<std::string,
+ std::vector<omega::CG_outputRepr *> >(
+ v->get_global_var()->base_name(),
+ reprs));
uninterpreted_symbols_stringrepr[*i].insert(
- std::pair<std::string,
- std::vector<omega::CG_outputRepr *> >(
- v->get_global_var()->base_name(),
- reprs2));
+ std::pair<std::string,
+ std::vector<omega::CG_outputRepr *> >(
+ v->get_global_var()->base_name(),
+ reprs2));
}
}
}
}
}
-
+
std::vector<std::string> loop_vars;
for (int j = 1; j <= stmt[*i].IS.n_set(); j++) {
loop_vars.push_back(stmt[*i].IS.set_var(j)->name());
}
- for (int j = 0; j<loop_vars.size(); j++) {
- fprintf(stderr, "loop vars %d %s\n", j, loop_vars[j].c_str());
+ for (int j = 0; j < loop_vars.size(); j++) {
+ fprintf(stderr, "loop vars %d %s\n", j, loop_vars[j].c_str());
}
std::vector<CG_outputRepr *> subs = output_substitutions(ocg,
Inverse(copy(mapping)),
std::vector<std::pair<CG_outputRepr *, int> >(
- mapping.n_out(),
- std::make_pair(
- static_cast<CG_outputRepr *>(NULL), 0)),
+ mapping.n_out(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL), 0)),
uninterpreted_symbols[*i]);
-
+
std::vector<CG_outputRepr *> subs2;
for (int l = 0; l < subs.size(); l++)
subs2.push_back(subs[l]->clone());
-
- fprintf(stderr, "%d uninterpreted symbols\n", (int)uninterpreted_symbols.size());
- for (int j = 0; j<loop_vars.size(); j++) {
- fprintf(stderr, "loop vars %d %s\n", j, loop_vars[j].c_str());
- }
-
-
- int count = 0;
+
+ fprintf(stderr, "%d uninterpreted symbols\n", (int) uninterpreted_symbols.size());
+ for (int j = 0; j < loop_vars.size(); j++) {
+ fprintf(stderr, "loop vars %d %s\n", j, loop_vars[j].c_str());
+ }
+
+
+ int count = 0;
for (std::map<std::string, std::vector<CG_outputRepr *> >::iterator it =
- uninterpreted_symbols[*i].begin();
+ uninterpreted_symbols[*i].begin();
it != uninterpreted_symbols[*i].end(); it++) {
- fprintf(stderr, "\ncount %d\n", count);
-
+ fprintf(stderr, "\ncount %d\n", count);
+
std::vector<CG_outputRepr *> reprs_ = it->second;
- fprintf(stderr, "%d reprs_\n", (int)reprs_.size());
-
+ fprintf(stderr, "%d reprs_\n", (int) reprs_.size());
+
std::vector<CG_outputRepr *> reprs_2;
for (int k = 0; k < reprs_.size(); k++) {
- fprintf(stderr, "k %d\n", k);
+ fprintf(stderr, "k %d\n", k);
std::vector<CG_outputRepr *> subs;
for (int l = 0; l < subs2.size(); l++) {
- fprintf(stderr, "l %d\n", l);
+ fprintf(stderr, "l %d\n", l);
subs.push_back(subs2[l]->clone());
}
-
+
fprintf(stderr, "clone\n");
- CG_outputRepr *c = reprs_[k]->clone();
- c->dump(); fflush(stdout);
-
- fprintf(stderr, "createsub\n");
+ CG_outputRepr *c = reprs_[k]->clone();
+ c->dump();
+ fflush(stdout);
+
+ fprintf(stderr, "createsub\n");
CG_outputRepr *s = ocgr->CreateSubstitutedStmt(0, c,
loop_vars, subs, true);
-
- fprintf(stderr, "push back\n");
- reprs_2.push_back( s );
-
+
+ fprintf(stderr, "push back\n");
+ reprs_2.push_back(s);
+
}
-
+
it->second = reprs_2;
count++;
- fprintf(stderr, "bottom\n");
+ fprintf(stderr, "bottom\n");
}
-
+
std::vector<CG_outputRepr *> subs3 = output_substitutions(
- ocgs, Inverse(copy(mapping)),
- std::vector<std::pair<CG_outputRepr *, int> >(
- mapping.n_out(),
- std::make_pair(
- static_cast<CG_outputRepr *>(NULL), 0)),
- uninterpreted_symbols_stringrepr[*i]);
-
+ ocgs, Inverse(copy(mapping)),
+ std::vector<std::pair<CG_outputRepr *, int> >(
+ mapping.n_out(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL), 0)),
+ uninterpreted_symbols_stringrepr[*i]);
+
for (std::map<std::string, std::vector<CG_outputRepr *> >::iterator it =
- uninterpreted_symbols_stringrepr[*i].begin();
+ uninterpreted_symbols_stringrepr[*i].begin();
it != uninterpreted_symbols_stringrepr[*i].end(); it++) {
-
+
std::vector<CG_outputRepr *> reprs_ = it->second;
std::vector<CG_outputRepr *> reprs_2;
for (int k = 0; k < reprs_.size(); k++) {
@@ -2460,13 +2464,13 @@ void Loop::apply_xform(std::set<int> &active) {
*/
reprs_2.push_back(subs3[k]->clone());
}
-
+
it->second = reprs_2;
-
+
}
-
-
- fprintf(stderr, "loop.cc stmt[*i].code =\n");
+
+
+ fprintf(stderr, "loop.cc stmt[*i].code =\n");
//stmt[*i].code->dump();
//fprintf(stderr, "\n");
stmt[*i].code = ocg->CreateSubstitutedStmt(0, stmt[*i].code, loop_vars,
@@ -2474,10 +2478,10 @@ void Loop::apply_xform(std::set<int> &active) {
//fprintf(stderr, "loop.cc substituted code =\n");
//stmt[*i].code->dump();
//fprintf(stderr, "\n");
-
+
stmt[*i].IS = omega::Range(Restrict_Domain(mapping, stmt[*i].IS));
stmt[*i].IS.simplify();
-
+
// replace original transformation relation with straight 1-1 mapping
//fprintf(stderr, "replace original transformation relation with straight 1-1 mapping\n");
mapping = Relation(n, 2 * n + 1);
@@ -2493,46 +2497,44 @@ void Loop::apply_xform(std::set<int> &active) {
h.update_const(-lex[j - 1]);
}
stmt[*i].xform = mapping;
-
+
//fprintf(stderr, "\ncode is: \n");
//stmt[*i].code->dump();
//fprintf(stderr, "\n\n");
-
+
}
-
+
tmp_loop_var_name_counter += max_n;
- fflush(stdout);
- fprintf(stderr, "loop.cc LEAVING apply_xform( set )\n\n");
+ fflush(stdout);
+ fprintf(stderr, "loop.cc LEAVING apply_xform( set )\n\n");
//for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
// fprintf(stderr, "\nloop.cc stmt[i].code =\n");
// stmt[*i].code->dump();
// fprintf(stderr, "\n\n");
//}
-
-}
-
+}
void Loop::addKnown(const Relation &cond) {
-
+
// invalidate saved codegen computation
delete last_compute_cgr_;
last_compute_cgr_ = NULL;
delete last_compute_cg_;
last_compute_cg_ = NULL;
fprintf(stderr, "Loop::addKnown(), SETTING last_compute_cg_ = NULL\n");
-
+
int n1 = this->known.n_set();
-
+
Relation r = copy(cond);
int n2 = r.n_set();
-
+
if (n1 < n2)
this->known = Extend_Set(this->known, n2 - n1);
else if (n1 > n2)
r = Extend_Set(r, n1 - n2);
-
+
this->known = Intersection(this->known, r);
}
@@ -2540,11 +2542,11 @@ void Loop::removeDependence(int stmt_num_from, int stmt_num_to) {
// check for sanity of parameters
if (stmt_num_from >= stmt.size())
throw std::invalid_argument(
- "invalid statement number " + to_string(stmt_num_from));
+ "invalid statement number " + to_string(stmt_num_from));
if (stmt_num_to >= stmt.size())
throw std::invalid_argument(
- "invalid statement number " + to_string(stmt_num_to));
-
+ "invalid statement number " + to_string(stmt_num_to));
+
dep.disconnect(stmt_num_from, stmt_num_to);
}
@@ -2556,14 +2558,14 @@ void Loop::dump() const {
if (2 * j < lex.size())
std::cout << lex[2 * j];
switch (stmt[i].loop_level[j].type) {
- case LoopLevelOriginal:
- std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")";
- break;
- case LoopLevelTile:
- std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")";
- break;
- default:
- std::cout << "(unknown)";
+ case LoopLevelOriginal:
+ std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ case LoopLevelTile:
+ std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")";
+ break;
+ default:
+ std::cout << "(unknown)";
}
std::cout << ' ';
}
@@ -2579,16 +2581,16 @@ void Loop::dump() const {
bool Loop::nonsingular(const std::vector<std::vector<int> > &T) {
if (stmt.size() == 0)
return true;
-
+
// check for sanity of parameters
for (int i = 0; i < stmt.size(); i++) {
if (stmt[i].loop_level.size() != num_dep_dim)
throw std::invalid_argument(
- "nonsingular loop transformations must be applied to original perfect loop nest");
+ "nonsingular loop transformations must be applied to original perfect loop nest");
for (int j = 0; j < stmt[i].loop_level.size(); j++)
if (stmt[i].loop_level[j].type != LoopLevelOriginal)
throw std::invalid_argument(
- "nonsingular loop transformations must be applied to original perfect loop nest");
+ "nonsingular loop transformations must be applied to original perfect loop nest");
}
if (T.size() != num_dep_dim)
throw std::invalid_argument("invalid transformation matrix");
@@ -2619,81 +2621,80 @@ bool Loop::nonsingular(const std::vector<std::vector<int> > &T) {
h.update_coef(mapping.output_var(i), -1);
h.update_coef(mapping.input_var(i), 1);
}
-
+
// update transformation relations
for (int i = 0; i < stmt.size(); i++)
stmt[i].xform = Composition(copy(mapping), stmt[i].xform);
-
+
// update dependence graph
for (int i = 0; i < dep.vertex.size(); i++)
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();
+ dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();
j++) {
std::vector<DependenceVector> dvs = j->second;
for (int k = 0; k < dvs.size(); k++) {
DependenceVector &dv = dvs[k];
switch (dv.type) {
- case DEP_W2R:
- case DEP_R2W:
- case DEP_W2W:
- case DEP_R2R: {
- std::vector<coef_t> lbounds(num_dep_dim), ubounds(
- num_dep_dim);
- for (int p = 0; p < num_dep_dim; p++) {
- coef_t lb = 0;
- coef_t ub = 0;
- for (int q = 0; q < num_dep_dim; q++) {
- if (T[p][q] > 0) {
- if (lb == -posInfinity
- || dv.lbounds[q] == -posInfinity)
- lb = -posInfinity;
- else
- lb += T[p][q] * dv.lbounds[q];
- if (ub == posInfinity
- || dv.ubounds[q] == posInfinity)
- ub = posInfinity;
- else
- ub += T[p][q] * dv.ubounds[q];
- } else if (T[p][q] < 0) {
- if (lb == -posInfinity
- || dv.ubounds[q] == posInfinity)
- lb = -posInfinity;
- else
- lb += T[p][q] * dv.ubounds[q];
- if (ub == posInfinity
- || dv.lbounds[q] == -posInfinity)
- ub = posInfinity;
- else
- ub += T[p][q] * dv.lbounds[q];
+ case DEP_W2R:
+ case DEP_R2W:
+ case DEP_W2W:
+ case DEP_R2R: {
+ std::vector<coef_t> lbounds(num_dep_dim), ubounds(
+ num_dep_dim);
+ for (int p = 0; p < num_dep_dim; p++) {
+ coef_t lb = 0;
+ coef_t ub = 0;
+ for (int q = 0; q < num_dep_dim; q++) {
+ if (T[p][q] > 0) {
+ if (lb == -posInfinity
+ || dv.lbounds[q] == -posInfinity)
+ lb = -posInfinity;
+ else
+ lb += T[p][q] * dv.lbounds[q];
+ if (ub == posInfinity
+ || dv.ubounds[q] == posInfinity)
+ ub = posInfinity;
+ else
+ ub += T[p][q] * dv.ubounds[q];
+ } else if (T[p][q] < 0) {
+ if (lb == -posInfinity
+ || dv.ubounds[q] == posInfinity)
+ lb = -posInfinity;
+ else
+ lb += T[p][q] * dv.ubounds[q];
+ if (ub == posInfinity
+ || dv.lbounds[q] == -posInfinity)
+ ub = posInfinity;
+ else
+ ub += T[p][q] * dv.lbounds[q];
+ }
}
+ if (T[p].size() == num_dep_dim + 1) {
+ if (lb != -posInfinity)
+ lb += T[p][num_dep_dim];
+ if (ub != posInfinity)
+ ub += T[p][num_dep_dim];
+ }
+ lbounds[p] = lb;
+ ubounds[p] = ub;
}
- if (T[p].size() == num_dep_dim + 1) {
- if (lb != -posInfinity)
- lb += T[p][num_dep_dim];
- if (ub != posInfinity)
- ub += T[p][num_dep_dim];
- }
- lbounds[p] = lb;
- ubounds[p] = ub;
+ dv.lbounds = lbounds;
+ dv.ubounds = ubounds;
+
+ break;
}
- dv.lbounds = lbounds;
- dv.ubounds = ubounds;
-
- break;
- }
- default:
- ;
+ default:;
}
}
j->second = dvs;
}
-
+
// set constant loop values
std::set<int> active;
for (int i = 0; i < stmt.size(); i++)
active.insert(i);
setLexicalOrder(0, active);
-
+
return true;
}
@@ -2706,12 +2707,11 @@ bool Loop::is_dependence_valid_based_on_lex_order(int i, int j,
if (!dv.is_scalar_dependence) {
for (last_dim = 0;
last_dim < lex_i.size() && (lex_i[last_dim] == lex_j[last_dim]);
- last_dim++)
- ;
+ last_dim++);
last_dim = last_dim / 2;
if (last_dim == 0)
return true;
-
+
for (int i = 0; i < last_dim; i++) {
if (dv.lbounds[i] > 0)
return true;
@@ -2721,9 +2721,9 @@ bool Loop::is_dependence_valid_based_on_lex_order(int i, int j,
}
if (before)
return true;
-
+
return false;
-
+
}
// Manu:: reduction operation
@@ -2731,7 +2731,7 @@ bool Loop::is_dependence_valid_based_on_lex_order(int i, int j,
void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
std::string arrName, int memory_type, int padding_alignment,
int assign_then_accumulate, int padding_stride) {
-
+
//std::cout << "In scalar_expand function: " << stmt_num << ", " << arrName << "\n";
//std::cout.flush();
@@ -2744,10 +2744,10 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
// check for sanity of parameters
bool found_non_constant_size_dimension = false;
-
+
if (stmt_num < 0 || stmt_num >= stmt.size())
throw std::invalid_argument(
- "invalid statement number " + to_string(stmt_num));
+ "invalid statement number " + to_string(stmt_num));
//Anand: adding check for privatized levels
//if (arrName != "RHS")
// throw std::invalid_argument(
@@ -2755,34 +2755,33 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
for (int i = 0; i < levels.size(); i++) {
if (levels[i] <= 0 || levels[i] > stmt[stmt_num].loop_level.size())
throw std::invalid_argument(
- "1invalid loop level " + to_string(levels[i]));
-
+ "1invalid loop level " + to_string(levels[i]));
+
if (i > 0) {
if (levels[i] < levels[i - 1])
throw std::invalid_argument(
- "loop levels must be in ascending order");
+ "loop levels must be in ascending order");
}
}
//end --adding check for privatized levels
-
+
delete last_compute_cgr_;
last_compute_cgr_ = NULL;
delete last_compute_cg_;
last_compute_cg_ = NULL;
fprintf(stderr, "Loop::scalar_expand(), SETTING last_compute_cg_ = NULL\n");
- fprintf(stderr, "\nloop.cc finding array accesses in stmt %d of the code\n",stmt_num );
+ fprintf(stderr, "\nloop.cc finding array accesses in stmt %d of the code\n", stmt_num);
std::vector<IR_ArrayRef *> access = ir->FindArrayRef(stmt[stmt_num].code);
- fprintf(stderr, "loop.cc L2726 %d access\n", access.size());
+ fprintf(stderr, "loop.cc L2726 %d access\n", access.size());
IR_ArraySymbol *sym = NULL;
- fprintf(stderr, "arrName %s\n", arrName.c_str());
- if (arrName == "RHS") {
- fprintf(stderr, "sym RHS\n");
+ fprintf(stderr, "arrName %s\n", arrName.c_str());
+ if (arrName == "RHS") {
+ fprintf(stderr, "sym RHS\n");
sym = access[0]->symbol();
- }
- else {
- fprintf(stderr, "looking for array %s in access\n", arrName.c_str());
+ } else {
+ fprintf(stderr, "looking for array %s in access\n", arrName.c_str());
for (int k = 0; k < access.size(); k++) { // BUH
//fprintf(stderr, "access[%d] = %s ", k, access[k]->getTypeString()); access[k]->print(0,stderr); fprintf(stderr, "\n");
@@ -2791,34 +2790,34 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
//fprintf(stderr, "comparing %s to %s\n", name.c_str(), arrName.c_str());
if (access[k]->symbol()->name() == arrName) {
- fprintf(stderr, "found it sym access[ k=%d ]\n", k);
+ fprintf(stderr, "found it sym access[ k=%d ]\n", k);
sym = access[k]->symbol();
- }
+ }
}
}
- if (!sym) fprintf(stderr, "DIDN'T FIND IT\n");
- fprintf(stderr, "sym %p\n", sym);
+ if (!sym) fprintf(stderr, "DIDN'T FIND IT\n");
+ fprintf(stderr, "sym %p\n", sym);
// collect array references by name
std::vector<int> lex = getLexicalOrder(stmt_num);
int dim = 2 * levels[levels.size() - 1] - 1;
std::set<int> same_loop = getStatements(lex, dim - 1);
-
+
//Anand: shifting this down
// assign_const(stmt[newStmt_num].xform, 2*level+1, 1);
-
+
// std::cout << " before temp array name \n ";
// create a temporary variable
IR_Symbol *tmp_sym;
-
+
// get the loop upperbound, that would be the size of the temp array.
omega::coef_t lb[levels.size()], ub[levels.size()], size[levels.size()];
-
+
//Anand Adding apply xform so that tiled loop bounds are reflected
fprintf(stderr, "Adding apply xform so that tiled loop bounds are reflected\n");
apply_xform(same_loop);
- fprintf(stderr, "loop.cc, back from apply_xform()\n");
-
+ fprintf(stderr, "loop.cc, back from apply_xform()\n");
+
//Anand commenting out the folowing 4 lines
/* copy(stmt[stmt_num].IS).query_variable_bounds(
copy(stmt[stmt_num].IS).set_var(level), lb, ub);
@@ -2890,32 +2889,32 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
std::vector<int> size_int;
Relation xform = copy(stmt[stmt_num].xform);
for (int i = 0; i < n_dim; i++) {
-
+
dim = 2 * levels[i] - 1;
//Anand: Commenting out the lines below: not required
// if (i != 0)
// reduced_copy_is = Project(reduced_copy_is, level - 1 + i, Set_Var);
Relation bound = get_loop_bound(copy(reduced_copy_is), levels[i] - 1);
-
+
// extract stride
std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound,
bound.set_var(levels[i]));
if (result.second != NULL)
index_stride[i] = abs(result.first.get_coef(result.second))
- / gcd(abs(result.first.get_coef(result.second)),
- abs(
- result.first.get_coef(
- bound.set_var(levels[i]))));
+ / gcd(abs(result.first.get_coef(result.second)),
+ abs(
+ result.first.get_coef(
+ bound.set_var(levels[i]))));
else
index_stride[i] = 1;
// std::cout << "simplest_stride 11:: " << index_stride[i] << "\n";
-
+
// check if this array index requires loop
Conjunct *c = bound.query_DNF()->single_conjunct();
for (EQ_Iterator ei(c->EQs()); ei; ei++) {
if ((*ei).has_wildcards())
continue;
-
+
int coef = (*ei).get_coef(bound.set_var(levels[i]));
if (coef != 0) {
int sign = 1;
@@ -2923,59 +2922,58 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
coef = -coef;
sign = -1;
}
-
+
CG_outputRepr *op = NULL;
for (Constr_Vars_Iter ci(*ei); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- if ((*ci).var != bound.set_var(levels[i]))
+ case Input_Var: {
+ if ((*ci).var != bound.set_var(levels[i]))
+ if ((*ci).coef * sign == 1)
+ op = ocg1->CreateMinus(op,
+ ocg1->CreateIdent((*ci).var->name()));
+ else if ((*ci).coef * sign == -1)
+ op = ocg1->CreatePlus(op,
+ ocg1->CreateIdent((*ci).var->name()));
+ else if ((*ci).coef * sign > 1) {
+ op = ocg1->CreateMinus(op,
+ ocg1->CreateTimes(
+ ocg1->CreateInt(
+ abs((*ci).coef)),
+ ocg1->CreateIdent(
+ (*ci).var->name())));
+ } else
+ // (*ci).coef*sign < -1
+ op = ocg1->CreatePlus(op,
+ ocg1->CreateTimes(
+ ocg1->CreateInt(
+ abs((*ci).coef)),
+ ocg1->CreateIdent(
+ (*ci).var->name())));
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
if ((*ci).coef * sign == 1)
op = ocg1->CreateMinus(op,
- ocg1->CreateIdent((*ci).var->name()));
+ ocg1->CreateIdent(g->base_name()));
else if ((*ci).coef * sign == -1)
op = ocg1->CreatePlus(op,
- ocg1->CreateIdent((*ci).var->name()));
- else if ((*ci).coef * sign > 1) {
+ ocg1->CreateIdent(g->base_name()));
+ else if ((*ci).coef * sign > 1)
op = ocg1->CreateMinus(op,
ocg1->CreateTimes(
- ocg1->CreateInt(
- abs((*ci).coef)),
- ocg1->CreateIdent(
- (*ci).var->name())));
- }
+ ocg1->CreateInt(abs((*ci).coef)),
+ ocg1->CreateIdent(g->base_name())));
else
// (*ci).coef*sign < -1
op = ocg1->CreatePlus(op,
ocg1->CreateTimes(
- ocg1->CreateInt(
- abs((*ci).coef)),
- ocg1->CreateIdent(
- (*ci).var->name())));
- break;
- }
- case Global_Var: {
- Global_Var_ID g = (*ci).var->get_global_var();
- if ((*ci).coef * sign == 1)
- op = ocg1->CreateMinus(op,
- ocg1->CreateIdent(g->base_name()));
- else if ((*ci).coef * sign == -1)
- op = ocg1->CreatePlus(op,
- ocg1->CreateIdent(g->base_name()));
- else if ((*ci).coef * sign > 1)
- op = ocg1->CreateMinus(op,
- ocg1->CreateTimes(
- ocg1->CreateInt(abs((*ci).coef)),
- ocg1->CreateIdent(g->base_name())));
- else
- // (*ci).coef*sign < -1
- op = ocg1->CreatePlus(op,
- ocg1->CreateTimes(
- ocg1->CreateInt(abs((*ci).coef)),
- ocg1->CreateIdent(g->base_name())));
- break;
- }
- default:
- throw loop_error("unsupported array index expression");
+ ocg1->CreateInt(abs((*ci).coef)),
+ ocg1->CreateIdent(g->base_name())));
+ break;
+ }
+ default:
+ throw loop_error("unsupported array index expression");
}
}
if ((*ei).get_const() != 0)
@@ -2983,7 +2981,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
ocg1->CreateInt(-sign * ((*ei).get_const())));
if (coef != 1)
op = ocg1->CreateIntegerFloor(op, ocg1->CreateInt(coef));
-
+
index_lb[i] = op;
is_index_eq[i] = true;
break;
@@ -2991,7 +2989,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
}
if (is_index_eq[i])
continue;
-
+
// separate lower and upper bounds
std::vector<GEQ_Handle> lb_list, ub_list;
std::set<Variable_ID> excluded_floor_vars;
@@ -3006,23 +3004,22 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
excluded_floor_vars).first) {
clean_bound = false;
break;
- }
- else
- h= find_floor_definition(bound, (*cvi).var,
- excluded_floor_vars).second;
-
+ } else
+ h = find_floor_definition(bound, (*cvi).var,
+ excluded_floor_vars).second;
+
if (!clean_bound)
continue;
- else{
+ else {
if (coef > 0)
lb_list.push_back(h);
else if (coef < 0)
ub_list.push_back(h);
- continue;
- }
-
+ continue;
+ }
+
}
-
+
if (coef > 0)
lb_list.push_back(*gi);
else if (coef < 0)
@@ -3030,7 +3027,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
}
if (lb_list.size() == 0 || ub_list.size() == 0)
throw loop_error("failed to calcuate array footprint size");
-
+
// build lower bound representation
std::vector<CG_outputRepr *> lb_repr_list;
/* for (int j = 0; j < lb_list.size(); j++){
@@ -3045,7 +3042,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
index_lb[i] = ocg1->CreateInvoke("max", lb_repr_list);
else if (lb_repr_list.size() == 1)
index_lb[i] = lb_repr_list[0];
-
+
// build temporary array size representation
{
Relation cal(copy_is.n_set(), 1);
@@ -3053,65 +3050,65 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
for (int j = 0; j < ub_list.size(); j++)
for (int k = 0; k < lb_list.size(); k++) {
GEQ_Handle h = f_root->add_GEQ();
-
+
for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- int pos = (*ci).var->get_position();
- h.update_coef(cal.input_var(pos), (*ci).coef);
- break;
- }
- case Global_Var: {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = cal.get_local(g);
- else
- v = cal.get_local(g, (*ci).var->function_of());
- h.update_coef(v, (*ci).coef);
- break;
- }
- default:
- throw loop_error(
- "cannot calculate temporay array size statically");
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ h.update_coef(cal.input_var(pos), (*ci).coef);
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = cal.get_local(g);
+ else
+ v = cal.get_local(g, (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error(
+ "cannot calculate temporay array size statically");
}
}
h.update_const(ub_list[j].get_const());
-
+
for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- int pos = (*ci).var->get_position();
- h.update_coef(cal.input_var(pos), (*ci).coef);
- break;
- }
- case Global_Var: {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = cal.get_local(g);
- else
- v = cal.get_local(g, (*ci).var->function_of());
- h.update_coef(v, (*ci).coef);
- break;
- }
- default:
- throw loop_error(
- "cannot calculate temporay array size statically");
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ h.update_coef(cal.input_var(pos), (*ci).coef);
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = cal.get_local(g);
+ else
+ v = cal.get_local(g, (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error(
+ "cannot calculate temporay array size statically");
}
}
h.update_const(lb_list[k].get_const());
-
+
h.update_const(1);
h.update_coef(cal.output_var(1), -1);
}
-
+
cal = Restrict_Domain(cal, copy(copy_is));
for (int j = 1; j <= cal.n_inp(); j++) {
cal = Project(cal, j, Input_Var);
}
cal.simplify();
-
+
// pad temporary array size
// TODO: for variable array size, create padding formula
//int padding_stride = 0;
@@ -3125,50 +3122,50 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
gi++)
if ((*gi).is_const(cal.output_var(1))) {
coef_t size = (*gi).get_const()
- / (-(*gi).get_coef(cal.output_var(1)));
-
+ / (-(*gi).get_coef(cal.output_var(1)));
+
if (padding_alignment > 1 && i == n_dim - 1) { // align to boundary for data packing
int residue = size % padding_alignment;
if (residue)
size = size + padding_alignment - residue;
}
-
+
index_sz.push_back(
- std::make_pair(i, ocg1->CreateInt(size)));
+ std::make_pair(i, ocg1->CreateInt(size)));
is_index_bound_const = true;
size_int.push_back(size);
size_repr.push_back(ocg1->CreateInt(size));
-
+
// std::cout << "============================== size :: "
// << size << "\n";
-
+
}
-
+
if (!is_index_bound_const) {
-
+
found_non_constant_size_dimension = true;
Conjunct *c = bound.query_DNF()->single_conjunct();
for (GEQ_Iterator gi(c->GEQs());
gi && !is_index_bound_const; gi++) {
int coef = (*gi).get_coef(bound.set_var(levels[i]));
if (coef < 0) {
-
+
size_repr.push_back(
- ocg1->CreatePlus(
- output_upper_bound_repr(ocg1, *gi,
- bound.set_var(levels[i]),
- bound,
- std::vector<
- std::pair<
- CG_outputRepr *,
- int> >(
- bound.n_set(),
- std::make_pair(
- static_cast<CG_outputRepr *>(NULL),
- 0)),
- uninterpreted_symbols[stmt_num]),
- ocg1->CreateInt(1)));
-
+ ocg1->CreatePlus(
+ output_upper_bound_repr(ocg1, *gi,
+ bound.set_var(levels[i]),
+ bound,
+ std::vector<
+ std::pair<
+ CG_outputRepr *,
+ int> >(
+ bound.n_set(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL),
+ 0)),
+ uninterpreted_symbols[stmt_num]),
+ ocg1->CreateInt(1)));
+
/*CG_outputRepr *op = NULL;
for (Constr_Vars_Iter ci(*gi); ci; ci++) {
if ((*ci).var != cal.output_var(1)) {
@@ -3238,13 +3235,13 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
}
}
//size[i] = ub[i];
-
+
}
/////////////////////////////////////////////////////////////////////////////////////////////////////
//
-
+
//Anand: Creating IS of new statement
-
+
//for(int l = dim; l < stmt[stmt_num].xform.n_out(); l+=2)
//std::cout << "In scalar_expand function 3: " << stmt_num << ", " << arrName << "\n";
//std::cout.flush();
@@ -3256,16 +3253,16 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
//}
//fprintf(stderr, "\n");
-
+
shiftLexicalOrder(lex, dim + 1, 1);
Statement s = stmt[stmt_num];
s.ir_stmt_node = NULL;
int newStmt_num = stmt.size();
- fprintf(stderr, "loop.cc L3249 adding stmt %d\n", stmt.size());
+ fprintf(stderr, "loop.cc L3249 adding stmt %d\n", stmt.size());
stmt.push_back(s);
-
- fprintf(stderr, "uninterpreted_symbols.push_back() newStmt_num %d\n", newStmt_num);
+
+ fprintf(stderr, "uninterpreted_symbols.push_back() newStmt_num %d\n", newStmt_num);
uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]);
uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]);
stmt[newStmt_num].code = stmt[stmt_num].code->clone();
@@ -3286,16 +3283,16 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
//assign_const(stmt[newStmt_num].xform, stmt[stmt_num].xform.n_out(), 1);//Anand: change from 2*level + 1 to stmt[stmt_num].xform.size()
//Anand-End creating IS of new statement
-
- CG_outputRepr * tmpArrSz;
+
+ CG_outputRepr *tmpArrSz;
CG_outputBuilder *ocg = ir->builder();
-
+
//for(int k =0; k < levels.size(); k++ )
// size_repr.push_back(ocg->CreateInt(size[k]));//Anand: copying apply_xform functionality to prevent IS modification
//due to side effects with uninterpreted function symbols and failures in omega
-
+
//int n = stmt[stmt_num].loop_level.size();
-
+
/*Relation mapping(2 * n + 1, n);
F_And *f_root = mapping.add_and();
for (int j = 1; j <= n; j++) {
@@ -3318,64 +3315,68 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
Relation size_ = omega::Range(Restrict_Domain(mapping, copy(stmt[stmt_num].IS)));
size_.simplify();
*/
-
+
//Anand -commenting out tmp sym creation as symbol may have more than one dimension
//tmp_sym = ir->CreateArraySymbol(tmpArrSz, sym);
std::vector<CG_outputRepr *> lhs_index;
CG_outputRepr *arr_ref_repr;
arr_ref_repr = ocg->CreateIdent(
- stmt[stmt_num].IS.set_var(levels[levels.size() - 1])->name());
-
+ stmt[stmt_num].IS.set_var(levels[levels.size() - 1])->name());
+
CG_outputRepr *total_size = size_repr[0];
- fprintf(stderr, "total_size = "); total_size->dump(); fflush(stdout);
+ fprintf(stderr, "total_size = ");
+ total_size->dump();
+ fflush(stdout);
for (int i = 1; i < size_repr.size(); i++) {
- fprintf(stderr, "total_size now "); total_size->dump(); fflush(stdout); fprintf(stderr, " times something\n\n");
+ fprintf(stderr, "total_size now ");
+ total_size->dump();
+ fflush(stdout);
+ fprintf(stderr, " times something\n\n");
total_size = ocg->CreateTimes(total_size->clone(),
size_repr[i]->clone());
-
+
}
-
+
// COMMENT NEEDED
//fprintf(stderr, "\nloop.cc COMMENT NEEDED\n");
for (int k = levels.size() - 2; k >= 0; k--) {
- CG_outputRepr *temp_repr =ocg->CreateIdent(stmt[stmt_num].IS.set_var(levels[k])->name());
- for (int l = k + 1; l < levels.size(); l++) {
+ CG_outputRepr *temp_repr = ocg->CreateIdent(stmt[stmt_num].IS.set_var(levels[k])->name());
+ for (int l = k + 1; l < levels.size(); l++) {
//fprintf(stderr, "\nloop.cc CREATETIMES\n");
temp_repr = ocg->CreateTimes(temp_repr->clone(),
size_repr[l]->clone());
}
-
+
//fprintf(stderr, "\nloop.cc CREATEPLUS\n");
arr_ref_repr = ocg->CreatePlus(arr_ref_repr->clone(),
temp_repr->clone());
}
-
+
//fprintf(stderr, "loop.cc, about to die\n");
std::vector<CG_outputRepr *> to_push;
to_push.push_back(total_size);
- if (!found_non_constant_size_dimension) {
- fprintf(stderr, "constant size dimension\n");
+ if (!found_non_constant_size_dimension) {
+ fprintf(stderr, "constant size dimension\n");
tmp_sym = ir->CreateArraySymbol(sym, to_push, memory_type);
- }
- else {
- fprintf(stderr, "NON constant size dimension?\n");
+ } else {
+ fprintf(stderr, "NON constant size dimension?\n");
//tmp_sym = ir->CreatePointerSymbol(sym, to_push);
tmp_sym = ir->CreatePointerSymbol(sym, to_push);
static_cast<IR_PointerSymbol *>(tmp_sym)->set_size(0, total_size); // ??
ptr_variables.push_back(static_cast<IR_PointerSymbol *>(tmp_sym));
- fprintf(stderr, "ptr_variables now has %d entries\n", ptr_variables.size());
+ fprintf(stderr, "ptr_variables now has %d entries\n", ptr_variables.size());
}
-
+
// add tmp_sym to Loop symtables ??
-
+
// std::cout << " temp array name == " << tmp_sym->name().c_str() << "\n";
-
+
// get loop index variable at the given "level"
// Relation R = omega::Range(Restrict_Domain(copy(stmt[stmt_num].xform), copy(stmt[stmt_num].IS)));
// stmt[stmt_num].IS.print();
@@ -3383,9 +3384,9 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
// std::cout << stmt[stmt_num].IS.n_set() << std::endl;
// std::string v = stmt[stmt_num].IS.set_var(level)->name();
// std::cout << "loop index variable is '" << v.c_str() << "'\n";
-
+
// create a reference for the temporary array
- fprintf(stderr, "create a reference for the temporary array\n");
+ fprintf(stderr, "create a reference for the temporary array\n");
//std::cout << "In scalar_expand function 4: " << stmt_num << ", " << arrName << "\n";
//std::cout.flush();
@@ -3396,7 +3397,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
//}
//fprintf(stderr, "\n");
-
+
std::vector<CG_outputRepr *> to_push2;
to_push2.push_back(arr_ref_repr); // can have only one entry
@@ -3405,21 +3406,20 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
IR_ArrayRef *tmp_array_ref;
- IR_PointerArrayRef * tmp_ptr_array_ref; // was IR_PointerArrayref
+ IR_PointerArrayRef *tmp_ptr_array_ref; // was IR_PointerArrayref
if (!found_non_constant_size_dimension) {
fprintf(stderr, "constant size\n");
tmp_array_ref = ir->CreateArrayRef(
- static_cast<IR_ArraySymbol *>(tmp_sym), to_push2);
- }
- else {
- fprintf(stderr, "NON constant size\n");
+ static_cast<IR_ArraySymbol *>(tmp_sym), to_push2);
+ } else {
+ fprintf(stderr, "NON constant size\n");
tmp_ptr_array_ref = ir->CreatePointerArrayRef(
- static_cast<IR_PointerSymbol *>(tmp_sym), to_push2);
+ static_cast<IR_PointerSymbol *>(tmp_sym), to_push2);
// TODO static_cast<IR_PointerSymbol *>(tmp_sym), to_push2);
}
- fflush(stdout);
+ fflush(stdout);
//fprintf(stderr, "\n%d statements\n", stmt.size());
//for (int i=0; i<stmt.size(); i++) {
@@ -3432,22 +3432,22 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
//std::string stemp;
//stemp = tmp_array_ref->name();
//std::cout << "Created array reference --> " << stemp.c_str() << "\n";
-
+
// get the RHS expression
- fprintf(stderr, "get the RHS expression arrName %s\n", arrName.c_str());
+ fprintf(stderr, "get the RHS expression arrName %s\n", arrName.c_str());
CG_outputRepr *rhs;
if (arrName == "RHS") {
rhs = ir->GetRHSExpression(stmt[stmt_num].code);
-
+
std::vector<IR_ArrayRef *> symbols = ir->FindArrayRef(rhs);
}
std::set<std::string> sym_names;
-
+
//for (int i = 0; i < symbols.size(); i++)
// sym_names.insert(symbols[i]->symbol()->name());
-
- fflush(stdout);
+
+ fflush(stdout);
//fprintf(stderr, "\nbefore if (arrName == RHS)\n%d statements\n", stmt.size()); // problem is after here
//for (int i=0; i<stmt.size(); i++) {
@@ -3457,15 +3457,14 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
//fprintf(stderr, "\n");
if (arrName == "RHS") {
-
+
std::vector<IR_ArrayRef *> symbols = ir->FindArrayRef(rhs);
-
+
for (int i = 0; i < symbols.size(); i++)
sym_names.insert(symbols[i]->symbol()->name());
- }
- else {
+ } else {
- fprintf(stderr, "finding array refs in stmt_num %d\n", stmt_num);
+ fprintf(stderr, "finding array refs in stmt_num %d\n", stmt_num);
//fprintf(stderr, "\n%d statements\n", stmt.size());
//for (int i=0; i<stmt.size(); i++) {
// fprintf(stderr, "%2d ", i);
@@ -3474,15 +3473,15 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
//fprintf(stderr, "\n");
std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code);
- fprintf(stderr, "\n%d refs\n", refs.size());
+ fprintf(stderr, "\n%d refs\n", refs.size());
+
-
bool found = false;
for (int j = 0; j < refs.size(); j++) {
- CG_outputRepr* to_replace;
+ CG_outputRepr *to_replace;
- fprintf(stderr, "j %d build new assignment statement with temporary array\n",j);
+ fprintf(stderr, "j %d build new assignment statement with temporary array\n", j);
// build new assignment statement with temporary array
if (!found_non_constant_size_dimension) {
to_replace = tmp_array_ref->convert();
@@ -3494,7 +3493,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
//CR->Dump();
if (refs[j]->name() == arrName) {
- fflush(stdout);
+ fflush(stdout);
fprintf(stderr, "loop.cc L353\n"); // problem is after here
//fprintf(stderr, "\n%d statements\n", stmt.size());
//for (int i=0; i<stmt.size(); i++) {
@@ -3502,15 +3501,15 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
// ((CG_chillRepr *)stmt[i].code)->Dump();
//}
//fprintf(stderr, "\n");
-
+
sym_names.insert(refs[j]->symbol()->name());
-
+
if (!found) {
- if (!found_non_constant_size_dimension) {
- fprintf(stderr, "constant size2\n");
- omega::CG_outputRepr * t = tmp_array_ref->convert();
- omega::CG_outputRepr * r = refs[j]->convert()->clone();
+ if (!found_non_constant_size_dimension) {
+ fprintf(stderr, "constant size2\n");
+ omega::CG_outputRepr *t = tmp_array_ref->convert();
+ omega::CG_outputRepr *r = refs[j]->convert()->clone();
//CR = (CG_chillRepr *) t;
//CR->Dump();
//CR = (CG_chillRepr *) r;
@@ -3518,14 +3517,13 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
//fprintf(stderr, "lhs t %p lhs r %p\n", t, r);
stmt[newStmt_num].code =
- ir->builder()->CreateAssignment(0,
- t, // tmp_array_ref->convert(),
- r); // refs[j]->convert()->clone()
- }
- else {
- fprintf(stderr, "NON constant size2\n");
- omega::CG_outputRepr * t = tmp_ptr_array_ref->convert(); // this fails
- omega::CG_outputRepr * r = refs[j]->convert()->clone();
+ ir->builder()->CreateAssignment(0,
+ t, // tmp_array_ref->convert(),
+ r); // refs[j]->convert()->clone()
+ } else {
+ fprintf(stderr, "NON constant size2\n");
+ omega::CG_outputRepr *t = tmp_ptr_array_ref->convert(); // this fails
+ omega::CG_outputRepr *r = refs[j]->convert()->clone();
//omega::CG_chillRepr *CR = (omega::CG_chillRepr *) t;
//CR->Dump();
@@ -3534,21 +3532,21 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
//fprintf(stderr, "lhs t %p lhs r %p\n", t, r);
stmt[newStmt_num].code =
- ir->builder()->CreateAssignment(0,
- t, // tmp_ptr_array_ref->convert(),
- r ); // refs[j]->convert()->clone());
+ ir->builder()->CreateAssignment(0,
+ t, // tmp_ptr_array_ref->convert(),
+ r); // refs[j]->convert()->clone());
}
found = true;
-
+
}
-
+
// refs[j] has no parent?
- fprintf(stderr, "replacing refs[%d]\n", j );
+ fprintf(stderr, "replacing refs[%d]\n", j);
ir->ReplaceExpression(refs[j], to_replace);
}
-
+
}
-
+
}
//ToDo need to update the dependence graph
//Anand adding dependence graph update
@@ -3561,10 +3559,10 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
//fprintf(stderr, "\n");
dep.insert();
-
+
//Anand:Copying Dependence checks from datacopy code, might need to be a separate function/module
// in the future
-
+
/*for (int i = 0; i < newStmt_num; i++) {
std::vector<std::vector<DependenceVector> > D;
@@ -3615,41 +3613,41 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
*/
//Anand--end dependence check
if (arrName == "RHS") {
-
+
// build new assignment statement with temporary array
if (!found_non_constant_size_dimension) {
if (assign_then_accumulate) {
stmt[newStmt_num].code = ir->builder()->CreateAssignment(0,
tmp_array_ref->convert(), rhs);
- fprintf(stderr, "ir->ReplaceRHSExpression( stmt_ num %d )\n", stmt_num);
+ fprintf(stderr, "ir->ReplaceRHSExpression( stmt_ num %d )\n", stmt_num);
ir->ReplaceRHSExpression(stmt[stmt_num].code, tmp_array_ref);
} else {
CG_outputRepr *temp = tmp_array_ref->convert()->clone();
if (ir->QueryExpOperation(stmt[stmt_num].code)
!= IR_OP_PLUS_ASSIGNMENT)
throw ir_error(
- "Statement is not a += accumulation statement");
+ "Statement is not a += accumulation statement");
- fprintf(stderr, "replacing in a +=\n");
+ fprintf(stderr, "replacing in a +=\n");
stmt[newStmt_num].code = ir->builder()->CreatePlusAssignment(0,
temp->clone(), rhs);
-
- CG_outputRepr * lhs = ir->GetLHSExpression(stmt[stmt_num].code);
-
+
+ CG_outputRepr *lhs = ir->GetLHSExpression(stmt[stmt_num].code);
+
CG_outputRepr *assignment = ir->builder()->CreateAssignment(0,
lhs, temp->clone());
Statement init_ = stmt[newStmt_num]; // copy ??
init_.ir_stmt_node = NULL;
-
+
init_.code = stmt[newStmt_num].code->clone();
init_.IS = copy(stmt[newStmt_num].IS);
init_.xform = copy(stmt[newStmt_num].xform);
init_.has_inspector = false; // ??
Relation mapping(init_.IS.n_set(), init_.IS.n_set());
-
+
F_And *f_root = mapping.add_and();
-
+
for (int i = 1; i <= mapping.n_inp(); i++) {
EQ_Handle h = f_root->add_EQ();
//if (i < levels[0]) {
@@ -3660,7 +3658,7 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
h.update_const(-1);
h.update_coef(mapping.output_var(i), 1);
}
-
+
/*else {
int j;
for (j = 0; j < levels.size(); j++)
@@ -3683,18 +3681,18 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
*/
//}
}
-
+
mapping.simplify();
// match omega input/output variables to variable names in the code
for (int j = 1; j <= init_.IS.n_set(); j++)
mapping.name_output_var(j, init_.IS.set_var(j)->name());
for (int j = 1; j <= init_.IS.n_set(); j++)
mapping.name_input_var(j, init_.IS.set_var(j)->name());
-
+
mapping.setup_names();
-
+
init_.IS = omega::Range(
- omega::Restrict_Domain(mapping, init_.IS));
+ omega::Restrict_Domain(mapping, init_.IS));
std::vector<int> lex = getLexicalOrder(newStmt_num);
int dim = 2 * levels[0] - 1;
//init_.IS.print();
@@ -3704,11 +3702,11 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
shiftLexicalOrder(lex, dim + 1, 1);
init_.reduction = stmt[newStmt_num].reduction;
init_.reductionOp = stmt[newStmt_num].reductionOp;
-
+
init_.code = ir->builder()->CreateAssignment(0, temp->clone(),
ir->builder()->CreateInt(0));
- fprintf(stderr, "loop.cc L3693 adding stmt %d\n", stmt.size());
+ fprintf(stderr, "loop.cc L3693 adding stmt %d\n", stmt.size());
stmt.push_back(init_);
uninterpreted_symbols.push_back(uninterpreted_symbols[newStmt_num]);
@@ -3726,47 +3724,45 @@ void Loop::scalar_expand(int stmt_num, const std::vector<int> &levels,
if (ir->QueryExpOperation(stmt[stmt_num].code)
!= IR_OP_PLUS_ASSIGNMENT)
throw ir_error(
- "Statement is not a += accumulation statement");
+ "Statement is not a += accumulation statement");
stmt[newStmt_num].code = ir->builder()->CreatePlusAssignment(0,
temp->clone(), rhs);
-
- CG_outputRepr * lhs = ir->GetLHSExpression(stmt[stmt_num].code);
-
+
+ CG_outputRepr *lhs = ir->GetLHSExpression(stmt[stmt_num].code);
+
CG_outputRepr *assignment = ir->builder()->CreateAssignment(0,
lhs, temp->clone());
-
+
stmt[stmt_num].code = assignment;
}
// call function to replace rhs with temporary array
}
}
-
+
//std::cout << "End of scalar_expand function!! \n";
-
+
// if(arrName == "RHS"){
DependenceVector dv;
std::vector<DependenceVector> E;
dv.lbounds = std::vector<omega::coef_t>(4);
dv.ubounds = std::vector<omega::coef_t>(4);
dv.type = DEP_W2R;
-
+
for (int k = 0; k < 4; k++) {
dv.lbounds[k] = 0;
dv.ubounds[k] = 0;
-
+
}
-
+
//std::vector<IR_ArrayRef*> array_refs = ir->FindArrayRef(stmt[newStmt_num].code);
dv.sym = tmp_sym->clone();
-
+
E.push_back(dv);
-
+
dep.connect(newStmt_num, stmt_num, E);
// }
-
-}
-
+}
std::pair<Relation, Relation> createCSRstyleISandXFORM(CG_outputBuilder *ocg,
@@ -3775,98 +3771,98 @@ std::pair<Relation, Relation> createCSRstyleISandXFORM(CG_outputBuilder *ocg,
std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols,
std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols_string,
Loop *this_loop) {
-
+
Relation IS(outer_loop_bounds.size() + 1 + zero_loop_bounds.size());
Relation XFORM(outer_loop_bounds.size() + 1 + zero_loop_bounds.size(),
2 * (outer_loop_bounds.size() + 1 + zero_loop_bounds.size()) + 1);
-
- F_And * f_r_ = IS.add_and();
- F_And * f_root = XFORM.add_and();
-
+
+ F_And *f_r_ = IS.add_and();
+ F_And *f_root = XFORM.add_and();
+
if (outer_loop_bounds.size() > 0) {
for (int it = 0; it < IS.n_set(); it++) {
IS.name_set_var(it + 1,
const_cast<Relation &>(outer_loop_bounds[0]).set_var(it + 1)->name());
XFORM.name_input_var(it + 1,
const_cast<Relation &>(outer_loop_bounds[0]).set_var(it + 1)->name());
-
+
}
} else if (zero_loop_bounds.size() > 0) {
for (int it = 0; it < IS.n_set(); it++) {
IS.name_set_var(it + 1,
const_cast<Relation &>(zero_loop_bounds.begin()->second).set_var(
- it + 1)->name());
+ it + 1)->name());
XFORM.name_input_var(it + 1,
const_cast<Relation &>(zero_loop_bounds.begin()->second).set_var(
- it + 1)->name());
-
+ it + 1)->name());
+
}
-
+
}
-
+
for (int i = 0; i < outer_loop_bounds.size(); i++)
IS = replace_set_var_as_another_set_var(IS, outer_loop_bounds[i], i + 1,
i + 1);
-
+
int count = 1;
for (std::map<int, Relation>::iterator i = zero_loop_bounds.begin();
i != zero_loop_bounds.end(); i++, count++)
IS = replace_set_var_as_another_set_var(IS, i->second,
outer_loop_bounds.size() + 1 + count, i->first);
-
+
if (outer_loop_bounds.size() > 0) {
Free_Var_Decl *lb = new Free_Var_Decl(index_name + "_", 1); // index_
Variable_ID csr_lb = IS.get_local(lb, Input_Tuple);
-
+
Free_Var_Decl *ub = new Free_Var_Decl(index_name + "__", 1); // index__
Variable_ID csr_ub = IS.get_local(ub, Input_Tuple);
-
+
//lower bound
-
- F_And * f_r = IS.and_with_and();
+
+ F_And *f_r = IS.and_with_and();
GEQ_Handle lower_bound = f_r->add_GEQ();
lower_bound.update_coef(csr_lb, -1);
lower_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), 1);
-
+
//upper bound
-
+
GEQ_Handle upper_bound = f_r->add_GEQ();
upper_bound.update_coef(csr_ub, 1);
upper_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), -1);
upper_bound.update_const(-1);
-
+
omega::CG_stringBuilder *ocgs = new CG_stringBuilder;
-
+
std::vector<omega::CG_outputRepr *> reprs;
std::vector<omega::CG_outputRepr *> reprs2;
-
+
std::vector<omega::CG_outputRepr *> reprs3;
std::vector<omega::CG_outputRepr *> reprs4;
-
+
reprs.push_back(
- ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name()));
+ ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name()));
reprs2.push_back(
- ocgs->CreateIdent(
- IS.set_var(outer_loop_bounds.size())->name()));
+ ocgs->CreateIdent(
+ IS.set_var(outer_loop_bounds.size())->name()));
uninterpreted_symbols.insert(
- std::pair<std::string, std::vector<CG_outputRepr *> >(
- index_name + "_", reprs));
+ std::pair<std::string, std::vector<CG_outputRepr *> >(
+ index_name + "_", reprs));
uninterpreted_symbols_string.insert(
- std::pair<std::string, std::vector<CG_outputRepr *> >(
- index_name + "_", reprs2));
-
+ std::pair<std::string, std::vector<CG_outputRepr *> >(
+ index_name + "_", reprs2));
+
std::string arg = "(" + IS.set_var(outer_loop_bounds.size())->name()
- + ")";
- std::vector< std::string > argvec;
- argvec.push_back( arg );
-
+ + ")";
+ std::vector<std::string> argvec;
+ argvec.push_back(arg);
+
CG_outputRepr *repr = ocg->CreateArrayRefExpression(index_name,
ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name()));
-
+
//fprintf(stderr, "( VECTOR _)\n");
//fprintf(stderr, "loop.cc calling CreateDefineMacro( %s, argvec, repr)\n", (index_name + "_").c_str());
this_loop->ir->CreateDefineMacro(index_name + "_", argvec, repr);
-
+
Relation known_(copy(IS).n_set());
known_.copy_names(copy(IS));
known_.setup_names();
@@ -3878,80 +3874,81 @@ std::pair<Relation, Relation> createCSRstyleISandXFORM(CG_outputBuilder *ocg,
g.update_coef(index_lb, -1);
g.update_const(-1);
this_loop->addKnown(known_);
-
+
reprs3.push_back(
-
- ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name()));
+
+ ocg->CreateIdent(IS.set_var(outer_loop_bounds.size())->name()));
reprs4.push_back(
-
- ocgs->CreateIdent(IS.set_var(outer_loop_bounds.size())->name()));
-
+
+ ocgs->CreateIdent(IS.set_var(outer_loop_bounds.size())->name()));
+
CG_outputRepr *repr2 = ocg->CreateArrayRefExpression(index_name,
ocg->CreatePlus(
- ocg->CreateIdent(
- IS.set_var(outer_loop_bounds.size())->name()),
- ocg->CreateInt(1)));
-
+ ocg->CreateIdent(
+ IS.set_var(outer_loop_bounds.size())->name()),
+ ocg->CreateInt(1)));
+
//fprintf(stderr, "( VECTOR __)\n");
//fprintf(stderr, "loop.cc calling CreateDefineMacro( %s, argvec, repr)\n", (index_name + "__").c_str());
-
+
this_loop->ir->CreateDefineMacro(index_name + "__", argvec, repr2);
-
+
uninterpreted_symbols.insert(
- std::pair<std::string, std::vector<CG_outputRepr *> >(
- index_name + "__", reprs3));
+ std::pair<std::string, std::vector<CG_outputRepr *> >(
+ index_name + "__", reprs3));
uninterpreted_symbols_string.insert(
- std::pair<std::string, std::vector<CG_outputRepr *> >(
- index_name + "__", reprs4));
+ std::pair<std::string, std::vector<CG_outputRepr *> >(
+ index_name + "__", reprs4));
} else {
Free_Var_Decl *ub = new Free_Var_Decl(index_name);
Variable_ID csr_ub = IS.get_local(ub);
- F_And * f_r = IS.and_with_and();
+ F_And *f_r = IS.and_with_and();
GEQ_Handle upper_bound = f_r->add_GEQ();
upper_bound.update_coef(csr_ub, 1);
upper_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), -1);
upper_bound.update_const(-1);
-
+
GEQ_Handle lower_bound = f_r->add_GEQ();
lower_bound.update_coef(IS.set_var(outer_loop_bounds.size() + 1), 1);
-
+
}
-
+
for (int j = 1; j <= XFORM.n_inp(); j++) {
omega::EQ_Handle h = f_root->add_EQ();
h.update_coef(XFORM.output_var(2 * j), 1);
h.update_coef(XFORM.input_var(j), -1);
}
-
+
for (int j = 1; j <= XFORM.n_out(); j += 2) {
omega::EQ_Handle h = f_root->add_EQ();
h.update_coef(XFORM.output_var(j), 1);
}
-
+
if (_DEBUG_) {
IS.print();
XFORM.print();
-
+
}
-
+
return std::pair<Relation, Relation>(IS, XFORM);
-
+
}
std::pair<Relation, Relation> construct_reduced_IS_And_XFORM(IR_Code *ir,
- const Relation &is, const Relation &xform, const std::vector<int> loops,
+ const Relation &is, const Relation &xform,
+ const std::vector<int> loops,
std::vector<int> &lex_order, Relation &known,
std::map<std::string, std::vector<CG_outputRepr *> > &uninterpreted_symbols) {
-
+
Relation IS(loops.size());
Relation XFORM(loops.size(), 2 * loops.size() + 1);
int count_ = 1;
std::map<int, int> pos_mapping;
-
+
int n = is.n_set();
Relation is_and_known = Intersection(copy(is),
Extend_Set(copy(known), n - known.n_set()));
-
+
for (int it = 0; it < loops.size(); it++, count_++) {
IS.name_set_var(count_,
const_cast<Relation &>(is).set_var(loops[it])->name());
@@ -3963,11 +3960,11 @@ std::pair<Relation, Relation> construct_reduced_IS_And_XFORM(IR_Code *ir,
const_cast<Relation &>(xform).output_var((loops[it]) * 2 - 1)->name());
pos_mapping.insert(std::pair<int, int>(count_, loops[it]));
}
-
+
XFORM.name_output_var(2 * loops.size() + 1,
const_cast<Relation &>(xform).output_var(is.n_set() * 2 + 1)->name());
-
- F_And * f_r = IS.add_and();
+
+ F_And *f_r = IS.add_and();
for (std::map<int, int>::iterator it = pos_mapping.begin();
it != pos_mapping.end(); it++)
IS = replace_set_var_as_another_set_var(IS, is_and_known, it->first,
@@ -4012,9 +4009,9 @@ std::pair<Relation, Relation> construct_reduced_IS_And_XFORM(IR_Code *ir,
CHILL_DEBUG_END
F_And *f_root = XFORM.add_and();
-
+
count_ = 1;
-
+
for (int j = 1; j <= loops.size(); j++) {
omega::EQ_Handle h = f_root->add_EQ();
h.update_coef(XFORM.output_var(2 * j), 1);
@@ -4025,7 +4022,7 @@ std::pair<Relation, Relation> construct_reduced_IS_And_XFORM(IR_Code *ir,
h.update_coef(XFORM.output_var(count_ * 2 - 1), 1);
h.update_const(-lex_order[count_ * 2 - 2]);
}
-
+
omega::EQ_Handle h = f_root->add_EQ();
h.update_coef(XFORM.output_var((loops.size()) * 2 + 1), 1);
h.update_const(-lex_order[xform.n_out() - 1]);
@@ -4035,34 +4032,34 @@ std::pair<Relation, Relation> construct_reduced_IS_And_XFORM(IR_Code *ir,
IS.print();
XFORM.print();
CHILL_DEBUG_END
-
+
return std::pair<Relation, Relation>(IS, XFORM);
-
+
}
std::set<std::string> inspect_repr_for_scalars(IR_Code *ir,
- CG_outputRepr * repr, std::set<std::string> ignore) {
-
+ CG_outputRepr *repr, std::set<std::string> ignore) {
+
std::vector<IR_ScalarRef *> refs = ir->FindScalarRef(repr);
std::set<std::string> loop_vars;
-
+
for (int i = 0; i < refs.size(); i++)
if (ignore.find(refs[i]->name()) == ignore.end())
loop_vars.insert(refs[i]->name());
-
+
return loop_vars;
-
+
}
std::set<std::string> inspect_loop_bounds(IR_Code *ir, const Relation &R,
int pos,
std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols) {
-
+
if (!R.is_set())
throw loop_error("Input R has to be a set not a relation!");
-
+
std::set<std::string> vars;
-
+
std::vector<CG_outputRepr *> refs;
Variable_ID v = const_cast<Relation &>(R).set_var(pos);
for (DNF_Iterator di(const_cast<Relation &>(R).query_DNF()); di; di++) {
@@ -4071,48 +4068,48 @@ std::set<std::string> inspect_loop_bounds(IR_Code *ir, const Relation &R,
for (Constr_Vars_Iter cvi(*gi); cvi; cvi++) {
Variable_ID v = cvi.curr_var();
switch (v->kind()) {
-
- case Global_Var: {
- Global_Var_ID g = v->get_global_var();
- Variable_ID v2;
- if (g->arity() > 0) {
-
- std::string s = g->base_name();
- std::copy(
- uninterpreted_symbols.find(s)->second.begin(),
- uninterpreted_symbols.find(s)->second.end(),
- back_inserter(refs));
-
+
+ case Global_Var: {
+ Global_Var_ID g = v->get_global_var();
+ Variable_ID v2;
+ if (g->arity() > 0) {
+
+ std::string s = g->base_name();
+ std::copy(
+ uninterpreted_symbols.find(s)->second.begin(),
+ uninterpreted_symbols.find(s)->second.end(),
+ back_inserter(refs));
+
+ }
+
+ break;
}
-
- break;
- }
- default:
- break;
+ default:
+ break;
}
}
-
+
}
}
}
-
+
for (int i = 0; i < refs.size(); i++) {
std::vector<IR_ScalarRef *> refs_ = ir->FindScalarRef(refs[i]);
-
+
for (int j = 0; j < refs_.size(); j++)
vars.insert(refs_[j]->name());
-
+
}
return vars;
}
-CG_outputRepr * create_counting_loop_body(IR_Code *ir, const Relation &R,
- int pos, CG_outputRepr * count,
- std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols) {
-
+CG_outputRepr *create_counting_loop_body(IR_Code *ir, const Relation &R,
+ int pos, CG_outputRepr *count,
+ std::map<std::string, std::vector<omega::CG_outputRepr *> > &uninterpreted_symbols) {
+
if (!R.is_set())
throw loop_error("Input R has to be a set not a relation!");
-
+
CG_outputRepr *ub, *lb;
ub = NULL;
lb = NULL;
@@ -4126,42 +4123,42 @@ CG_outputRepr * create_counting_loop_body(IR_Code *ir, const Relation &R,
for (Constr_Vars_Iter cvi(*gi); cvi; cvi++) {
Variable_ID v = cvi.curr_var();
switch (v->kind()) {
-
- case Global_Var: {
- Global_Var_ID g = v->get_global_var();
- Variable_ID v2;
- if (g->arity() > 0) {
-
- std::string s = g->base_name();
-
- if ((*gi).get_coef(v) > 0) {
- if (ub != NULL)
- throw ir_error(
- "bound expression too complex!");
-
- ub = ir->builder()->CreateInvoke(s,
- uninterpreted_symbols.find(s)->second);
- //ub = ir->builder()->CreateMinus(ub->clone(), ir->builder()->CreateInt(-(*gi).get_const()));
- same_ge_1 = true;
-
- } else {
- if (lb != NULL)
- throw ir_error(
- "bound expression too complex!");
- lb = ir->builder()->CreateInvoke(s,
- uninterpreted_symbols.find(s)->second);
- same_ge_2 = true;
-
+
+ case Global_Var: {
+ Global_Var_ID g = v->get_global_var();
+ Variable_ID v2;
+ if (g->arity() > 0) {
+
+ std::string s = g->base_name();
+
+ if ((*gi).get_coef(v) > 0) {
+ if (ub != NULL)
+ throw ir_error(
+ "bound expression too complex!");
+
+ ub = ir->builder()->CreateInvoke(s,
+ uninterpreted_symbols.find(s)->second);
+ //ub = ir->builder()->CreateMinus(ub->clone(), ir->builder()->CreateInt(-(*gi).get_const()));
+ same_ge_1 = true;
+
+ } else {
+ if (lb != NULL)
+ throw ir_error(
+ "bound expression too complex!");
+ lb = ir->builder()->CreateInvoke(s,
+ uninterpreted_symbols.find(s)->second);
+ same_ge_2 = true;
+
+ }
}
+
+ break;
}
-
- break;
- }
- default:
- break;
+ default:
+ break;
}
}
-
+
if (same_ge_1 && same_ge_2)
lb = ir->builder()->CreatePlus(lb->clone(),
ir->builder()->CreateInt(-(*gi).get_const()));
@@ -4173,181 +4170,179 @@ CG_outputRepr * create_counting_loop_body(IR_Code *ir, const Relation &R,
ir->builder()->CreateInt(-(*gi).get_const()));
}
}
-
+
}
-
+
return ir->builder()->CreatePlusAssignment(0, count,
ir->builder()->CreatePlus(
- ir->builder()->CreateMinus(ub->clone(), lb->clone()),
- ir->builder()->CreateInt(1)));
+ ir->builder()->CreateMinus(ub->clone(), lb->clone()),
+ ir->builder()->CreateInt(1)));
}
-
std::map<std::string, std::vector<std::string> > recurse_on_exp_for_arrays(
- IR_Code * ir, CG_outputRepr * exp) {
-
+ IR_Code *ir, CG_outputRepr *exp) {
+
std::map<std::string, std::vector<std::string> > arr_index_to_ref;
switch (ir->QueryExpOperation(exp)) {
-
- case IR_OP_ARRAY_VARIABLE: {
- IR_ArrayRef *ref = dynamic_cast<IR_ArrayRef *>(ir->Repr2Ref(exp));
- IR_PointerArrayRef *ref_ =
- dynamic_cast<IR_PointerArrayRef *>(ir->Repr2Ref(exp));
- if (ref == NULL && ref_ == NULL)
- throw loop_error("Array symbol unidentifiable!");
-
- if (ref != NULL) {
- std::vector<std::string> s0;
-
- for (int i = 0; i < ref->n_dim(); i++) {
- CG_outputRepr * index = ref->index(i);
- std::map<std::string, std::vector<std::string> > a0 =
- recurse_on_exp_for_arrays(ir, index);
- std::vector<std::string> s;
- for (std::map<std::string, std::vector<std::string> >::iterator j =
- a0.begin(); j != a0.end(); j++) {
- if (j->second.size() != 1 && (j->second)[0] != "")
- throw loop_error(
- "indirect array references not allowed in guard!");
- s.push_back(j->first);
+
+ case IR_OP_ARRAY_VARIABLE: {
+ IR_ArrayRef *ref = dynamic_cast<IR_ArrayRef *>(ir->Repr2Ref(exp));
+ IR_PointerArrayRef *ref_ =
+ dynamic_cast<IR_PointerArrayRef *>(ir->Repr2Ref(exp));
+ if (ref == NULL && ref_ == NULL)
+ throw loop_error("Array symbol unidentifiable!");
+
+ if (ref != NULL) {
+ std::vector<std::string> s0;
+
+ for (int i = 0; i < ref->n_dim(); i++) {
+ CG_outputRepr *index = ref->index(i);
+ std::map<std::string, std::vector<std::string> > a0 =
+ recurse_on_exp_for_arrays(ir, index);
+ std::vector<std::string> s;
+ for (std::map<std::string, std::vector<std::string> >::iterator j =
+ a0.begin(); j != a0.end(); j++) {
+ if (j->second.size() != 1 && (j->second)[0] != "")
+ throw loop_error(
+ "indirect array references not allowed in guard!");
+ s.push_back(j->first);
+ }
+ std::copy(s.begin(), s.end(), back_inserter(s0));
}
- std::copy(s.begin(), s.end(), back_inserter(s0));
- }
- arr_index_to_ref.insert(
- std::pair<std::string, std::vector<std::string> >(
- ref->name(), s0));
- } else {
- std::vector<std::string> s0;
- for (int i = 0; i < ref_->n_dim(); i++) {
- CG_outputRepr * index = ref_->index(i);
- std::map<std::string, std::vector<std::string> > a0 =
- recurse_on_exp_for_arrays(ir, index);
- std::vector<std::string> s;
- for (std::map<std::string, std::vector<std::string> >::iterator j =
- a0.begin(); j != a0.end(); j++) {
- if (j->second.size() != 1 && (j->second)[0] != "")
- throw loop_error(
- "indirect array references not allowed in guard!");
- s.push_back(j->first);
+ arr_index_to_ref.insert(
+ std::pair<std::string, std::vector<std::string> >(
+ ref->name(), s0));
+ } else {
+ std::vector<std::string> s0;
+ for (int i = 0; i < ref_->n_dim(); i++) {
+ CG_outputRepr *index = ref_->index(i);
+ std::map<std::string, std::vector<std::string> > a0 =
+ recurse_on_exp_for_arrays(ir, index);
+ std::vector<std::string> s;
+ for (std::map<std::string, std::vector<std::string> >::iterator j =
+ a0.begin(); j != a0.end(); j++) {
+ if (j->second.size() != 1 && (j->second)[0] != "")
+ throw loop_error(
+ "indirect array references not allowed in guard!");
+ s.push_back(j->first);
+ }
+ std::copy(s.begin(), s.end(), back_inserter(s0));
}
- std::copy(s.begin(), s.end(), back_inserter(s0));
+ arr_index_to_ref.insert(
+ std::pair<std::string, std::vector<std::string> >(
+ ref_->name(), s0));
}
- arr_index_to_ref.insert(
- std::pair<std::string, std::vector<std::string> >(
- ref_->name(), s0));
+ break;
}
- break;
- }
- case IR_OP_PLUS:
- case IR_OP_MINUS:
- case IR_OP_MULTIPLY:
- case IR_OP_DIVIDE: {
- std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
- std::map<std::string, std::vector<std::string> > a0 =
- recurse_on_exp_for_arrays(ir, v[0]);
- std::map<std::string, std::vector<std::string> > a1 =
- recurse_on_exp_for_arrays(ir, v[1]);
- arr_index_to_ref.insert(a0.begin(), a0.end());
- arr_index_to_ref.insert(a1.begin(), a1.end());
- break;
-
- }
- case IR_OP_POSITIVE:
- case IR_OP_NEGATIVE: {
- std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
- std::map<std::string, std::vector<std::string> > a0 =
- recurse_on_exp_for_arrays(ir, v[0]);
-
- arr_index_to_ref.insert(a0.begin(), a0.end());
- break;
-
- }
- case IR_OP_VARIABLE: {
- std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
- IR_ScalarRef *ref = static_cast<IR_ScalarRef *>(ir->Repr2Ref(v[0]));
-
- std::string s = ref->name();
- std::vector<std::string> to_insert;
- to_insert.push_back("");
- arr_index_to_ref.insert(
- std::pair<std::string, std::vector<std::string> >(s,
- to_insert));
- break;
- }
- case IR_OP_CONSTANT:
- break;
-
- default: {
- std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
-
- for (int i = 0; i < v.size(); i++) {
+ case IR_OP_PLUS:
+ case IR_OP_MINUS:
+ case IR_OP_MULTIPLY:
+ case IR_OP_DIVIDE: {
+ std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
std::map<std::string, std::vector<std::string> > a0 =
- recurse_on_exp_for_arrays(ir, v[i]);
-
+ recurse_on_exp_for_arrays(ir, v[0]);
+ std::map<std::string, std::vector<std::string> > a1 =
+ recurse_on_exp_for_arrays(ir, v[1]);
arr_index_to_ref.insert(a0.begin(), a0.end());
+ arr_index_to_ref.insert(a1.begin(), a1.end());
+ break;
+
+ }
+ case IR_OP_POSITIVE:
+ case IR_OP_NEGATIVE: {
+ std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
+ std::map<std::string, std::vector<std::string> > a0 =
+ recurse_on_exp_for_arrays(ir, v[0]);
+
+ arr_index_to_ref.insert(a0.begin(), a0.end());
+ break;
+
+ }
+ case IR_OP_VARIABLE: {
+ std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
+ IR_ScalarRef *ref = static_cast<IR_ScalarRef *>(ir->Repr2Ref(v[0]));
+
+ std::string s = ref->name();
+ std::vector<std::string> to_insert;
+ to_insert.push_back("");
+ arr_index_to_ref.insert(
+ std::pair<std::string, std::vector<std::string> >(s,
+ to_insert));
+ break;
+ }
+ case IR_OP_CONSTANT:
+ break;
+
+ default: {
+ std::vector<CG_outputRepr *> v = ir->QueryExpOperand(exp);
+
+ for (int i = 0; i < v.size(); i++) {
+ std::map<std::string, std::vector<std::string> > a0 =
+ recurse_on_exp_for_arrays(ir, v[i]);
+
+ arr_index_to_ref.insert(a0.begin(), a0.end());
+ }
+
+ break;
}
-
- break;
- }
}
return arr_index_to_ref;
}
-
std::vector<CG_outputRepr *> find_guards(IR_Code *ir, IR_Control *code) {
CHILL_DEBUG_PRINT("find_guards()\n");
std::vector<CG_outputRepr *> guards;
switch (code->type()) {
- case IR_CONTROL_IF: {
- CHILL_DEBUG_PRINT("find_guards() it's an if\n");
- CG_outputRepr *cond = dynamic_cast<IR_If*>(code)->condition();
-
- std::vector<CG_outputRepr *> then_body;
- std::vector<CG_outputRepr *> else_body;
- IR_Block *ORTB = dynamic_cast<IR_If*>(code)->then_body();
- if (ORTB != NULL) {
- CHILL_DEBUG_PRINT("recursing on then\n");
- then_body = find_guards(ir, ORTB);
- //dynamic_cast<IR_If*>(code)->then_body());
- }
- if (dynamic_cast<IR_If*>(code)->else_body() != NULL) {
- CHILL_DEBUG_PRINT("recursing on then\n");
- else_body = find_guards(ir,
- dynamic_cast<IR_If*>(code)->else_body());
+ case IR_CONTROL_IF: {
+ CHILL_DEBUG_PRINT("find_guards() it's an if\n");
+ CG_outputRepr *cond = dynamic_cast<IR_If *>(code)->condition();
+
+ std::vector<CG_outputRepr *> then_body;
+ std::vector<CG_outputRepr *> else_body;
+ IR_Block *ORTB = dynamic_cast<IR_If *>(code)->then_body();
+ if (ORTB != NULL) {
+ CHILL_DEBUG_PRINT("recursing on then\n");
+ then_body = find_guards(ir, ORTB);
+ //dynamic_cast<IR_If*>(code)->then_body());
+ }
+ if (dynamic_cast<IR_If *>(code)->else_body() != NULL) {
+ CHILL_DEBUG_PRINT("recursing on then\n");
+ else_body = find_guards(ir,
+ dynamic_cast<IR_If *>(code)->else_body());
+ }
+
+ guards.push_back(cond);
+ if (then_body.size() > 0)
+ std::copy(then_body.begin(), then_body.end(),
+ back_inserter(guards));
+ if (else_body.size() > 0)
+ std::copy(else_body.begin(), else_body.end(),
+ back_inserter(guards));
+ break;
}
-
- guards.push_back(cond);
- if (then_body.size() > 0)
- std::copy(then_body.begin(), then_body.end(),
- back_inserter(guards));
- if (else_body.size() > 0)
- std::copy(else_body.begin(), else_body.end(),
- back_inserter(guards));
- break;
- }
- case IR_CONTROL_BLOCK: {
- CHILL_DEBUG_PRINT("it's a control block\n");
- IR_Block* IRCB = dynamic_cast<IR_Block*>(code);
- CHILL_DEBUG_PRINT("calling ir->FindOneLevelControlStructure(IRCB);\n");
- std::vector<IR_Control *> stmts = ir->FindOneLevelControlStructure(IRCB);
-
- for (int i = 0; i < stmts.size(); i++) {
- std::vector<CG_outputRepr *> stmt_repr = find_guards(ir, stmts[i]);
- std::copy(stmt_repr.begin(), stmt_repr.end(),
- back_inserter(guards));
+ case IR_CONTROL_BLOCK: {
+ CHILL_DEBUG_PRINT("it's a control block\n");
+ IR_Block *IRCB = dynamic_cast<IR_Block *>(code);
+ CHILL_DEBUG_PRINT("calling ir->FindOneLevelControlStructure(IRCB);\n");
+ std::vector<IR_Control *> stmts = ir->FindOneLevelControlStructure(IRCB);
+
+ for (int i = 0; i < stmts.size(); i++) {
+ std::vector<CG_outputRepr *> stmt_repr = find_guards(ir, stmts[i]);
+ std::copy(stmt_repr.begin(), stmt_repr.end(),
+ back_inserter(guards));
+ }
+ break;
}
- break;
- }
- case IR_CONTROL_LOOP: {
- CHILL_DEBUG_PRINT("it's a control loop\n");
- std::vector<CG_outputRepr *> body = find_guards(ir,
- dynamic_cast<IR_Loop*>(code)->body());
- if (body.size() > 0)
- std::copy(body.begin(), body.end(), back_inserter(guards));
- break;
- } // loop
+ case IR_CONTROL_LOOP: {
+ CHILL_DEBUG_PRINT("it's a control loop\n");
+ std::vector<CG_outputRepr *> body = find_guards(ir,
+ dynamic_cast<IR_Loop *>(code)->body());
+ if (body.size() > 0)
+ std::copy(body.begin(), body.end(), back_inserter(guards));
+ break;
+ } // loop
} // switch
return guards;
}
@@ -4359,43 +4354,43 @@ bool sort_helper(std::pair<std::string, std::vector<std::string> > i,
for (int k = 0; k < i.second.size(); k++)
if (i.second[k] != "")
c1++;
-
+
for (int k = 0; k < j.second.size(); k++)
if (j.second[k] != "")
c2++;
return (c1 < c2);
-
+
}
bool sort_helper_2(std::pair<int, int> i, std::pair<int, int> j) {
-
+
return (i.second < j.second);
-
+
}
std::vector<std::string> construct_iteration_order(
- std::map<std::string, std::vector<std::string> > & input) {
+ std::map<std::string, std::vector<std::string> > &input) {
std::vector<std::string> arrays;
std::vector<std::string> scalars;
std::vector<std::pair<std::string, std::vector<std::string> > > input_aid;
-
+
for (std::map<std::string, std::vector<std::string> >::iterator j =
- input.begin(); j != input.end(); j++)
+ input.begin(); j != input.end(); j++)
input_aid.push_back(
- std::pair<std::string, std::vector<std::string> >(j->first,
- j->second));
-
+ std::pair<std::string, std::vector<std::string> >(j->first,
+ j->second));
+
std::sort(input_aid.begin(), input_aid.end(), sort_helper);
-
+
for (int j = 0; j < input_aid[input_aid.size() - 1].second.size(); j++)
if (input_aid[input_aid.size() - 1].second[j] != "") {
arrays.push_back(input_aid[input_aid.size() - 1].second[j]);
-
+
}
-
+
if (arrays.size() > 0) {
for (int i = input_aid.size() - 2; i >= 0; i--) {
-
+
int max_count = 0;
for (int j = 0; j < input_aid[i].second.size(); j++)
if (input_aid[i].second[j] != "") {
@@ -4421,7 +4416,7 @@ std::vector<std::string> construct_iteration_order(
}
}
} else {
-
+
for (int i = input_aid.size() - 1; i >= 0; i--) {
arrays.push_back(input_aid[i].first);
}
diff --git a/src/transformations/loop_basic.cc b/src/transformations/loop_basic.cc
index a058598..1be0981 100644
--- a/src/transformations/loop_basic.cc
+++ b/src/transformations/loop_basic.cc
@@ -19,7 +19,7 @@ void Loop::permute(const std::vector<int> &pi) {
std::set<int> active;
for (int i = 0; i < stmt.size(); i++)
active.insert(i);
-
+
permute(active, pi);
}
@@ -30,12 +30,13 @@ void Loop::original() {
setLexicalOrder(0, active);
//apply_xform();
}
+
void Loop::permute(int stmt_num, int level, const std::vector<int> &pi) {
// check for sanity of parameters
int starting_order;
if (stmt_num < 0 || stmt_num >= stmt.size())
throw std::invalid_argument(
- "invalid statement number " + to_string(stmt_num));
+ "invalid statement number " + to_string(stmt_num));
std::set<int> active;
if (level < 0 || level > stmt[stmt_num].loop_level.size())
throw std::invalid_argument("3invalid loop level " + to_string(level));
@@ -61,14 +62,14 @@ void Loop::permute(int stmt_num, int level, const std::vector<int> &pi) {
for (std::set<int>::iterator i = active.begin(); i != active.end(); i++)
if (level + pi.size() - 1 > stmt[*i].loop_level.size())
throw std::invalid_argument(
- "invalid permutation for statement " + to_string(*i));
-
+ "invalid permutation for statement " + to_string(*i));
+
// invalidate saved codegen computation
delete last_compute_cgr_;
last_compute_cgr_ = NULL;
delete last_compute_cg_;
last_compute_cg_ = NULL;
-
+
// Update transformation relations
for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
int n = stmt[*i].xform.n_out();
@@ -97,7 +98,7 @@ void Loop::permute(int stmt_num, int level, const std::vector<int> &pi) {
stmt[*i].xform = Composition(mapping, stmt[*i].xform);
stmt[*i].xform.simplify();
}
-
+
// get the permuation for dependence vectors
std::vector<int> t;
for (int i = 0; i < pi.size(); i++)
@@ -122,41 +123,41 @@ void Loop::permute(int stmt_num, int level, const std::vector<int> &pi) {
dep_pi[i] = t[i - min_dep_dim];
for (int i = max_dep_dim + 1; i < dep.num_dim(); i++)
dep_pi[i] = i;
-
+
dep.permute(dep_pi, active);
-
+
// update the dependence graph
DependenceGraph g(dep.num_dim());
for (int i = 0; i < dep.vertex.size(); i++)
g.insert();
for (int i = 0; i < dep.vertex.size(); i++)
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();
+ dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();
j++) {
if ((active.find(i) != active.end()
&& active.find(j->first) != active.end())) {
std::vector<DependenceVector> dv = j->second;
for (int k = 0; k < dv.size(); k++) {
switch (dv[k].type) {
- case DEP_W2R:
- case DEP_R2W:
- case DEP_W2W:
- case DEP_R2R: {
- std::vector<coef_t> lbounds(dep.num_dim());
- std::vector<coef_t> ubounds(dep.num_dim());
- for (int d = 0; d < dep.num_dim(); d++) {
- lbounds[d] = dv[k].lbounds[dep_pi[d]];
- ubounds[d] = dv[k].ubounds[dep_pi[d]];
+ case DEP_W2R:
+ case DEP_R2W:
+ case DEP_W2W:
+ case DEP_R2R: {
+ std::vector<coef_t> lbounds(dep.num_dim());
+ std::vector<coef_t> ubounds(dep.num_dim());
+ for (int d = 0; d < dep.num_dim(); d++) {
+ lbounds[d] = dv[k].lbounds[dep_pi[d]];
+ ubounds[d] = dv[k].ubounds[dep_pi[d]];
+ }
+ dv[k].lbounds = lbounds;
+ dv[k].ubounds = ubounds;
+ break;
}
- dv[k].lbounds = lbounds;
- dv[k].ubounds = ubounds;
- break;
- }
- case DEP_CONTROL: {
- break;
- }
- default:
- throw loop_error("unknown dependence type");
+ case DEP_CONTROL: {
+ break;
+ }
+ default:
+ throw loop_error("unknown dependence type");
}
}
g.connect(i, j->first, dv);
@@ -168,27 +169,27 @@ void Loop::permute(int stmt_num, int level, const std::vector<int> &pi) {
std::vector<DependenceVector> dv = j->second;
for (int k = 0; k < dv.size(); k++)
switch (dv[k].type) {
- case DEP_W2R:
- case DEP_R2W:
- case DEP_W2W:
- case DEP_R2R: {
- for (int d = 0; d < dep.num_dim(); d++)
- if (dep_pi[d] != d) {
- dv[k].lbounds[d] = -posInfinity;
- dv[k].ubounds[d] = posInfinity;
- }
- break;
- }
- case DEP_CONTROL:
- break;
- default:
- throw loop_error("unknown dependence type");
+ case DEP_W2R:
+ case DEP_R2W:
+ case DEP_W2W:
+ case DEP_R2R: {
+ for (int d = 0; d < dep.num_dim(); d++)
+ if (dep_pi[d] != d) {
+ dv[k].lbounds[d] = -posInfinity;
+ dv[k].ubounds[d] = posInfinity;
+ }
+ break;
+ }
+ case DEP_CONTROL:
+ break;
+ default:
+ throw loop_error("unknown dependence type");
}
g.connect(i, j->first, dv);
}
}
dep = g;
-
+
// update loop level information
for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
int cur_dep_dim = min_dep_dim;
@@ -196,66 +197,67 @@ void Loop::permute(int stmt_num, int level, const std::vector<int> &pi) {
for (int j = 1; j <= stmt[*i].loop_level.size(); j++)
if (j >= level && j < level + pi.size()) {
switch (stmt[*i].loop_level[pi_inverse[j - level] - 1].type) {
- case LoopLevelOriginal:
- new_loop_level[j - 1].type = LoopLevelOriginal;
- new_loop_level[j - 1].payload = cur_dep_dim++;
- new_loop_level[j - 1].parallel_level =
- stmt[*i].loop_level[pi_inverse[j - level] - 1].parallel_level;
- break;
- case LoopLevelTile: {
- new_loop_level[j - 1].type = LoopLevelTile;
- int ref_level = stmt[*i].loop_level[pi_inverse[j - level]
- - 1].payload;
- if (ref_level >= level && ref_level < level + pi.size())
- new_loop_level[j - 1].payload = pi_inverse[ref_level
- - level];
- else
- new_loop_level[j - 1].payload = ref_level;
- new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
- - 1].parallel_level;
- break;
- }
- default:
- throw loop_error(
- "unknown loop level information for statement "
- + to_string(*i));
+ case LoopLevelOriginal:
+ new_loop_level[j - 1].type = LoopLevelOriginal;
+ new_loop_level[j - 1].payload = cur_dep_dim++;
+ new_loop_level[j - 1].parallel_level =
+ stmt[*i].loop_level[pi_inverse[j - level] - 1].parallel_level;
+ break;
+ case LoopLevelTile: {
+ new_loop_level[j - 1].type = LoopLevelTile;
+ int ref_level = stmt[*i].loop_level[pi_inverse[j - level]
+ - 1].payload;
+ if (ref_level >= level && ref_level < level + pi.size())
+ new_loop_level[j - 1].payload = pi_inverse[ref_level
+ - level];
+ else
+ new_loop_level[j - 1].payload = ref_level;
+ new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
+ - 1].parallel_level;
+ break;
+ }
+ default:
+ throw loop_error(
+ "unknown loop level information for statement "
+ + to_string(*i));
}
} else {
switch (stmt[*i].loop_level[j - 1].type) {
- case LoopLevelOriginal:
- new_loop_level[j - 1].type = LoopLevelOriginal;
- new_loop_level[j - 1].payload =
- stmt[*i].loop_level[j - 1].payload;
- new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
- - 1].parallel_level;
- break;
- case LoopLevelTile: {
- new_loop_level[j - 1].type = LoopLevelTile;
- int ref_level = stmt[*i].loop_level[j - 1].payload;
- if (ref_level >= level && ref_level < level + pi.size())
- new_loop_level[j - 1].payload = pi_inverse[ref_level
- - level];
- else
- new_loop_level[j - 1].payload = ref_level;
- new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
- - 1].parallel_level;
- break;
- }
- default:
- throw loop_error(
- "unknown loop level information for statement "
- + to_string(*i));
+ case LoopLevelOriginal:
+ new_loop_level[j - 1].type = LoopLevelOriginal;
+ new_loop_level[j - 1].payload =
+ stmt[*i].loop_level[j - 1].payload;
+ new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
+ - 1].parallel_level;
+ break;
+ case LoopLevelTile: {
+ new_loop_level[j - 1].type = LoopLevelTile;
+ int ref_level = stmt[*i].loop_level[j - 1].payload;
+ if (ref_level >= level && ref_level < level + pi.size())
+ new_loop_level[j - 1].payload = pi_inverse[ref_level
+ - level];
+ else
+ new_loop_level[j - 1].payload = ref_level;
+ new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
+ - 1].parallel_level;
+ break;
+ }
+ default:
+ throw loop_error(
+ "unknown loop level information for statement "
+ + to_string(*i));
}
}
stmt[*i].loop_level = new_loop_level;
}
-
+
setLexicalOrder(2 * level - 2, active, starting_order);
}
+
void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) {
if (active.size() == 0 || pi.size() == 0)
return;
-
+
// check for sanity of parameters
int level = pi[0];
for (int i = 1; i < pi.size(); i++)
@@ -287,14 +289,14 @@ void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) {
for (int j = 0; j < 2 * level - 3; j += 2)
if (lex[j] != lex2[j])
throw std::invalid_argument(
- "statements to permute must be in the same subloop");
+ "statements to permute must be in the same subloop");
for (int j = 0; j < pi.size(); j++)
if (!(stmt[*i].loop_level[level + j - 1].type
== stmt[ref_stmt_num].loop_level[level + j - 1].type
&& stmt[*i].loop_level[level + j - 1].payload
- == stmt[ref_stmt_num].loop_level[level + j - 1].payload))
+ == stmt[ref_stmt_num].loop_level[level + j - 1].payload))
throw std::invalid_argument(
- "permuted loops must have the same loop level types");
+ "permuted loops must have the same loop level types");
}
}
// invalidate saved codegen computation
@@ -302,7 +304,7 @@ void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) {
last_compute_cgr_ = NULL;
delete last_compute_cg_;
last_compute_cg_ = NULL;
-
+
// Update transformation relations
for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
int n = stmt[*i].xform.n_out();
@@ -328,11 +330,11 @@ void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) {
h.update_coef(mapping.output_var(2 * j), 1);
h.update_coef(mapping.input_var(2 * j), -1);
}
-
+
stmt[*i].xform = Composition(mapping, stmt[*i].xform);
stmt[*i].xform.simplify();
}
-
+
// get the permuation for dependence vectors
std::vector<int> t;
for (int i = 0; i < pi.size(); i++)
@@ -357,41 +359,41 @@ void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) {
dep_pi[i] = t[i - min_dep_dim];
for (int i = max_dep_dim + 1; i < num_dep_dim; i++)
dep_pi[i] = i;
-
+
dep.permute(dep_pi, active);
-
+
// update the dependence graph
DependenceGraph g(dep.num_dim());
for (int i = 0; i < dep.vertex.size(); i++)
g.insert();
for (int i = 0; i < dep.vertex.size(); i++)
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();
+ dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();
j++) { //
if ((active.find(i) != active.end()
&& active.find(j->first) != active.end())) {
std::vector<DependenceVector> dv = j->second;
for (int k = 0; k < dv.size(); k++) {
switch (dv[k].type) {
- case DEP_W2R:
- case DEP_R2W:
- case DEP_W2W:
- case DEP_R2R: {
- std::vector<coef_t> lbounds(num_dep_dim);
- std::vector<coef_t> ubounds(num_dep_dim);
- for (int d = 0; d < num_dep_dim; d++) {
- lbounds[d] = dv[k].lbounds[dep_pi[d]];
- ubounds[d] = dv[k].ubounds[dep_pi[d]];
+ case DEP_W2R:
+ case DEP_R2W:
+ case DEP_W2W:
+ case DEP_R2R: {
+ std::vector<coef_t> lbounds(num_dep_dim);
+ std::vector<coef_t> ubounds(num_dep_dim);
+ for (int d = 0; d < num_dep_dim; d++) {
+ lbounds[d] = dv[k].lbounds[dep_pi[d]];
+ ubounds[d] = dv[k].ubounds[dep_pi[d]];
+ }
+ dv[k].lbounds = lbounds;
+ dv[k].ubounds = ubounds;
+ break;
}
- dv[k].lbounds = lbounds;
- dv[k].ubounds = ubounds;
- break;
- }
- case DEP_CONTROL: {
- break;
- }
- default:
- throw loop_error("unknown dependence type");
+ case DEP_CONTROL: {
+ break;
+ }
+ default:
+ throw loop_error("unknown dependence type");
}
}
g.connect(i, j->first, dv);
@@ -403,27 +405,27 @@ void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) {
std::vector<DependenceVector> dv = j->second;
for (int k = 0; k < dv.size(); k++)
switch (dv[k].type) {
- case DEP_W2R:
- case DEP_R2W:
- case DEP_W2W:
- case DEP_R2R: {
- for (int d = 0; d < num_dep_dim; d++)
- if (dep_pi[d] != d) {
- dv[k].lbounds[d] = -posInfinity;
- dv[k].ubounds[d] = posInfinity;
- }
- break;
- }
- case DEP_CONTROL:
- break;
- default:
- throw loop_error("unknown dependence type");
+ case DEP_W2R:
+ case DEP_R2W:
+ case DEP_W2W:
+ case DEP_R2R: {
+ for (int d = 0; d < num_dep_dim; d++)
+ if (dep_pi[d] != d) {
+ dv[k].lbounds[d] = -posInfinity;
+ dv[k].ubounds[d] = posInfinity;
+ }
+ break;
+ }
+ case DEP_CONTROL:
+ break;
+ default:
+ throw loop_error("unknown dependence type");
}
g.connect(i, j->first, dv);
}
}
dep = g;
-
+
// update loop level information
for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
int cur_dep_dim = min_dep_dim;
@@ -431,65 +433,65 @@ void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) {
for (int j = 1; j <= stmt[*i].loop_level.size(); j++)
if (j >= level && j < level + pi.size()) {
switch (stmt[*i].loop_level[reverse_pi[j - level] - 1].type) {
- case LoopLevelOriginal:
- new_loop_level[j - 1].type = LoopLevelOriginal;
- new_loop_level[j - 1].payload = cur_dep_dim++;
- new_loop_level[j - 1].parallel_level =
- stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level;
- break;
- case LoopLevelTile: {
- new_loop_level[j - 1].type = LoopLevelTile;
- int ref_level = stmt[*i].loop_level[reverse_pi[j - level]-1].payload;
- if (ref_level >= level && ref_level < level + pi.size())
- new_loop_level[j - 1].payload = reverse_pi[ref_level
- - level];
- else
- new_loop_level[j - 1].payload = ref_level;
- new_loop_level[j - 1].parallel_level =
- stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level;
- break;
- }
- default:
- throw loop_error(
- "unknown loop level information for statement "
- + to_string(*i));
+ case LoopLevelOriginal:
+ new_loop_level[j - 1].type = LoopLevelOriginal;
+ new_loop_level[j - 1].payload = cur_dep_dim++;
+ new_loop_level[j - 1].parallel_level =
+ stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level;
+ break;
+ case LoopLevelTile: {
+ new_loop_level[j - 1].type = LoopLevelTile;
+ int ref_level = stmt[*i].loop_level[reverse_pi[j - level] - 1].payload;
+ if (ref_level >= level && ref_level < level + pi.size())
+ new_loop_level[j - 1].payload = reverse_pi[ref_level
+ - level];
+ else
+ new_loop_level[j - 1].payload = ref_level;
+ new_loop_level[j - 1].parallel_level =
+ stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level;
+ break;
+ }
+ default:
+ throw loop_error(
+ "unknown loop level information for statement "
+ + to_string(*i));
}
} else {
switch (stmt[*i].loop_level[j - 1].type) {
- case LoopLevelOriginal:
- new_loop_level[j - 1].type = LoopLevelOriginal;
- new_loop_level[j - 1].payload =
- stmt[*i].loop_level[j - 1].payload;
- new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
- - 1].parallel_level;
- break;
- case LoopLevelTile: {
- new_loop_level[j - 1].type = LoopLevelTile;
- int ref_level = stmt[*i].loop_level[j - 1].payload;
- if (ref_level >= level && ref_level < level + pi.size())
- new_loop_level[j - 1].payload = reverse_pi[ref_level
- - level];
- else
- new_loop_level[j - 1].payload = ref_level;
- new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
- - 1].parallel_level;
- break;
- }
- default:
- throw loop_error(
- "unknown loop level information for statement "
- + to_string(*i));
+ case LoopLevelOriginal:
+ new_loop_level[j - 1].type = LoopLevelOriginal;
+ new_loop_level[j - 1].payload =
+ stmt[*i].loop_level[j - 1].payload;
+ new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
+ - 1].parallel_level;
+ break;
+ case LoopLevelTile: {
+ new_loop_level[j - 1].type = LoopLevelTile;
+ int ref_level = stmt[*i].loop_level[j - 1].payload;
+ if (ref_level >= level && ref_level < level + pi.size())
+ new_loop_level[j - 1].payload = reverse_pi[ref_level
+ - level];
+ else
+ new_loop_level[j - 1].payload = ref_level;
+ new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j
+ - 1].parallel_level;
+ break;
+ }
+ default:
+ throw loop_error(
+ "unknown loop level information for statement "
+ + to_string(*i));
}
}
stmt[*i].loop_level = new_loop_level;
}
-
+
setLexicalOrder(2 * level - 2, active);
}
-void Loop::set_array_size(std::string name, int size ){
- array_dims.insert(std::pair<std::string, int >(name, size));
+void Loop::set_array_size(std::string name, int size) {
+ array_dims.insert(std::pair<std::string, int>(name, size));
}
@@ -499,23 +501,23 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
throw std::invalid_argument("invalid statement " + to_string(stmt_num));
if (level <= 0 || level > stmt[stmt_num].loop_level.size())
throw std::invalid_argument("4invalid loop level " + to_string(level));
-
+
std::set<int> result;
int dim = 2 * level - 1;
std::vector<int> lex = getLexicalOrder(stmt_num);
std::set<int> same_loop = getStatements(lex, dim - 1);
-
+
Relation cond2 = copy(cond);
cond2.simplify();
cond2 = EQs_to_GEQs(cond2);
Conjunct *c = cond2.single_conjunct();
int cur_lex = lex[dim - 1];
-
+
for (GEQ_Iterator gi(c->GEQs()); gi; gi++) {
int max_level = (*gi).max_tuple_pos();
Relation single_cond(max_level);
single_cond.and_with_GEQ(*gi);
-
+
// TODO: should decide where to place newly created statements with
// complementary split condition from dependence graph.
bool place_after;
@@ -525,7 +527,7 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
place_after = true;
else
place_after = false;
-
+
bool temp_place_after; // = place_after;
bool assigned = false;
int part1_to_part2;
@@ -549,11 +551,11 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
Extend_Set(Complement(copy(single_cond)),
n - max_level));
}
-
+
//split dependence check
-
+
if (max_level > level) {
-
+
DNF_Iterator di1(stmt[*i].IS.query_DNF());
DNF_Iterator di2(part1.query_DNF());
for (; di1 && di2; di1++, di2++) {
@@ -569,34 +571,34 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
bool identical = false;
if (identical = !strcmp((*cvi1).var->char_name(),
(*cvi2).var->char_name())) {
-
+
for (; cvi1 && cvi2; cvi1++, cvi2++) {
-
+
if (((*cvi1).coef != (*cvi2).coef
|| (*ei1).get_const()
- != (*ei2).get_const())
+ != (*ei2).get_const())
|| (strcmp((*cvi1).var->char_name(),
(*cvi2).var->char_name()))) {
-
+
same++;
}
}
}
if ((same != 0) || !identical) {
-
+
dimension = dimension - 1;
-
+
while (stmt[*i].loop_level[dimension].type
== LoopLevelTile)
dimension =
- stmt[*i].loop_level[dimension].payload;
-
+ stmt[*i].loop_level[dimension].payload;
+
dimension = stmt[*i].loop_level[dimension].payload;
-
+
for (int i = 0; i < stmt.size(); i++) {
std::vector<std::pair<int, DependenceVector> > D;
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin();
+ dep.vertex[i].second.begin();
j != dep.vertex[i].second.end(); j++) {
for (int k = 0; k < j->second.size(); k++) {
DependenceVector dv = j->second[k];
@@ -604,19 +606,19 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
if (dv.hasNegative(dimension)
&& !dv.quasi)
throw loop_error(
- "loop error: Split is illegal, dependence violation!");
-
+ "loop error: Split is illegal, dependence violation!");
+
}
}
}
-
+
}
-
+
GEQ_Iterator gi1 = (*di1)->GEQs();
GEQ_Iterator gi2 = (*di2)->GEQs();
-
+
for (; gi1 && gi2; gi++, gi2++) {
-
+
Constr_Vars_Iter cvi1(*gi1);
Constr_Vars_Iter cvi2(*gi2);
int dimension = (*cvi1).var->get_position();
@@ -624,33 +626,33 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
bool identical = false;
if (identical = !strcmp((*cvi1).var->char_name(),
(*cvi2).var->char_name())) {
-
+
for (; cvi1 && cvi2; cvi1++, cvi2++) {
-
+
if (((*cvi1).coef != (*cvi2).coef
|| (*gi1).get_const()
- != (*gi2).get_const())
+ != (*gi2).get_const())
|| (strcmp((*cvi1).var->char_name(),
(*cvi2).var->char_name()))) {
-
+
same++;
}
}
}
if ((same != 0) || !identical) {
dimension = dimension - 1;
-
+
while (stmt[*i].loop_level[dimension].type
== LoopLevelTile)
stmt[*i].loop_level[dimension].payload;
-
+
dimension =
- stmt[*i].loop_level[dimension].payload;
-
+ stmt[*i].loop_level[dimension].payload;
+
for (int i = 0; i < stmt.size(); i++) {
std::vector<std::pair<int, DependenceVector> > D;
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin();
+ dep.vertex[i].second.begin();
j != dep.vertex[i].second.end();
j++) {
for (int k = 0; k < j->second.size();
@@ -659,22 +661,22 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
if (dv.type != DEP_CONTROL)
if (dv.hasNegative(dimension)
&& !dv.quasi)
-
+
throw loop_error(
- "loop error: Split is illegal, dependence violation!");
-
+ "loop error: Split is illegal, dependence violation!");
+
}
}
}
-
+
}
-
+
}
-
+
}
-
+
}
-
+
DNF_Iterator di3(stmt[*i].IS.query_DNF());
DNF_Iterator di4(part2.query_DNF()); //
for (; di3 && di4; di3++, di4++) {
@@ -688,52 +690,52 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
bool identical = false;
if (identical = !strcmp((*cvi1).var->char_name(),
(*cvi2).var->char_name())) {
-
+
for (; cvi1 && cvi2; cvi1++, cvi2++) {
-
+
if (((*cvi1).coef != (*cvi2).coef
|| (*ei1).get_const()
- != (*ei2).get_const())
+ != (*ei2).get_const())
|| (strcmp((*cvi1).var->char_name(),
(*cvi2).var->char_name()))) {
-
+
same++;
}
}
}
if ((same != 0) || !identical) {
dimension = dimension - 1;
-
+
while (stmt[*i].loop_level[dimension].type
== LoopLevelTile)
stmt[*i].loop_level[dimension].payload;
-
+
dimension = stmt[*i].loop_level[dimension].payload;
-
+
for (int i = 0; i < stmt.size(); i++) {
std::vector<std::pair<int, DependenceVector> > D;
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin();
+ dep.vertex[i].second.begin();
j != dep.vertex[i].second.end(); j++) {
for (int k = 0; k < j->second.size(); k++) {
DependenceVector dv = j->second[k];
if (dv.type != DEP_CONTROL)
if (dv.hasNegative(dimension)
&& !dv.quasi)
-
+
throw loop_error(
- "loop error: Split is illegal, dependence violation!");
-
+ "loop error: Split is illegal, dependence violation!");
+
}
}
}
-
+
}
-
+
}
GEQ_Iterator gi1 = (*di3)->GEQs();
GEQ_Iterator gi2 = (*di4)->GEQs();
-
+
for (; gi1 && gi2; gi++, gi2++) {
Constr_Vars_Iter cvi1(*gi1);
Constr_Vars_Iter cvi2(*gi2);
@@ -742,66 +744,66 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
bool identical = false;
if (identical = !strcmp((*cvi1).var->char_name(),
(*cvi2).var->char_name())) {
-
+
for (; cvi1 && cvi2; cvi1++, cvi2++) {
-
+
if (((*cvi1).coef != (*cvi2).coef
|| (*gi1).get_const()
- != (*gi2).get_const())
+ != (*gi2).get_const())
|| (strcmp((*cvi1).var->char_name(),
(*cvi2).var->char_name()))) {
-
+
same++;
}
}
}
if ((same != 0) || !identical) {
dimension = dimension - 1;
-
+
while (stmt[*i].loop_level[dimension].type //
== LoopLevelTile)
stmt[*i].loop_level[dimension].payload;
-
+
dimension = stmt[*i].loop_level[dimension].payload;
-
+
for (int i = 0; i < stmt.size(); i++) {
std::vector<std::pair<int, DependenceVector> > D;
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin();
+ dep.vertex[i].second.begin();
j != dep.vertex[i].second.end(); j++) {
for (int k = 0; k < j->second.size(); k++) {
DependenceVector dv = j->second[k];
if (dv.type != DEP_CONTROL)
if (dv.hasNegative(dimension)
&& !dv.quasi)
-
+
throw loop_error(
- "loop error: Split is illegal, dependence violation!");
-
+ "loop error: Split is illegal, dependence violation!");
+
}
}
}
-
+
}
-
+
}
-
+
}
-
+
}
-
+
stmt[*i].IS = part1;
-
+
int n1 = part2.n_set();
int m = this->known.n_set();
Relation test;
- if(m > n1)
+ if (m > n1)
test = Intersection(copy(this->known),
Extend_Set(copy(part2), m - part2.n_set()));
else
test = Intersection(copy(part2),
Extend_Set(copy(this->known), n1 - this->known.n_set()));
-
+
if (test.is_upper_bound_satisfiable()) {
Statement new_stmt;
new_stmt.code = stmt[*i].code->clone();
@@ -809,20 +811,20 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
new_stmt.xform = copy(stmt[*i].xform);
new_stmt.ir_stmt_node = NULL;
new_stmt.loop_level = stmt[*i].loop_level;
-
+
new_stmt.has_inspector = stmt[*i].has_inspector;
new_stmt.reduction = stmt[*i].reduction;
new_stmt.reductionOp = stmt[*i].reductionOp;
-
+
stmt_nesting_level_.push_back(stmt_nesting_level_[*i]);
-
-
+
+
if (place_after)
assign_const(new_stmt.xform, dim - 1, cur_lex + 1);
else
assign_const(new_stmt.xform, dim - 1, cur_lex - 1);
-
- fprintf(stderr, "loop_basic.cc L828 adding stmt %d\n", stmt.size());
+
+ fprintf(stderr, "loop_basic.cc L828 adding stmt %d\n", stmt.size());
stmt.push_back(new_stmt);
uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]);
@@ -832,7 +834,7 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
if (*i == stmt_num)
result.insert(stmt.size() - 1);
}
-
+
}
// make adjacent lexical number available for new statements
if (place_after) {
@@ -846,20 +848,20 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
int dep_dim = get_dep_dim_of(stmt_num, level);
for (int i = 0; i < old_num_stmt; i++) {
std::vector<std::pair<int, std::vector<DependenceVector> > > D;
-
+
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin();
+ dep.vertex[i].second.begin();
j != dep.vertex[i].second.end(); j++) {
if (same_loop.find(i) != same_loop.end()) {
if (same_loop.find(j->first) != same_loop.end()) {
if (what_stmt_num.find(i) != what_stmt_num.end()
&& what_stmt_num.find(j->first)
- != what_stmt_num.end())
+ != what_stmt_num.end())
dep.connect(what_stmt_num[i],
what_stmt_num[j->first], j->second);
if (place_after
&& what_stmt_num.find(j->first)
- != what_stmt_num.end()) {
+ != what_stmt_num.end()) {
std::vector<DependenceVector> dvs;
for (int k = 0; k < j->second.size(); k++) {
DependenceVector dv = j->second[k];
@@ -871,11 +873,11 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
}
if (dvs.size() > 0)
D.push_back(
- std::make_pair(what_stmt_num[j->first],
- dvs));
+ std::make_pair(what_stmt_num[j->first],
+ dvs));
} else if (!place_after
&& what_stmt_num.find(i)
- != what_stmt_num.end()) {
+ != what_stmt_num.end()) {
std::vector<DependenceVector> dvs;
for (int k = 0; k < j->second.size(); k++) {
DependenceVector dv = j->second[k];
@@ -887,7 +889,7 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
}
if (dvs.size() > 0)
dep.connect(what_stmt_num[i], j->first, dvs);
-
+
}
} else {
if (what_stmt_num.find(i) != what_stmt_num.end())
@@ -896,17 +898,17 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {
} else if (same_loop.find(j->first) != same_loop.end()) {
if (what_stmt_num.find(j->first) != what_stmt_num.end())
D.push_back(
- std::make_pair(what_stmt_num[j->first],
- j->second));
+ std::make_pair(what_stmt_num[j->first],
+ j->second));
}
}
-
+
for (int j = 0; j < D.size(); j++)
dep.connect(i, D[j].first, D[j].second);
}
-
+
}
-
+
return result;
}
@@ -914,28 +916,28 @@ void Loop::skew(const std::set<int> &stmt_nums, int level,
const std::vector<int> &skew_amount) {
if (stmt_nums.size() == 0)
return;
-
+
// check for sanity of parameters
int ref_stmt_num = *(stmt_nums.begin());
for (std::set<int>::const_iterator i = stmt_nums.begin();
i != stmt_nums.end(); i++) {
if (*i < 0 || *i >= stmt.size())
throw std::invalid_argument(
- "invalid statement number " + to_string(*i));
+ "invalid statement number " + to_string(*i));
if (level < 1 || level > stmt[*i].loop_level.size())
throw std::invalid_argument(
- "5invalid loop level " + to_string(level));
+ "5invalid loop level " + to_string(level));
for (int j = stmt[*i].loop_level.size(); j < skew_amount.size(); j++)
if (skew_amount[j] != 0)
throw std::invalid_argument("invalid skewing formula");
}
-
+
// invalidate saved codegen computation
delete last_compute_cgr_;
last_compute_cgr_ = NULL;
delete last_compute_cg_;
last_compute_cg_ = NULL;
-
+
// set trasformation relations
for (std::set<int>::const_iterator i = stmt_nums.begin();
i != stmt_nums.end(); i++) {
@@ -953,18 +955,18 @@ void Loop::skew(const std::set<int> &stmt_nums, int level,
for (int j = 0; j < skew_amount.size(); j++)
if (skew_amount[j] != 0)
h.update_coef(r.input_var(2 * (j + 1)), skew_amount[j]);
-
+
stmt[*i].xform = Composition(r, stmt[*i].xform);
stmt[*i].xform.simplify();
}
-
+
// update dependence graph
if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) {
int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload;
for (std::set<int>::const_iterator i = stmt_nums.begin();
i != stmt_nums.end(); i++)
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[*i].second.begin();
+ dep.vertex[*i].second.begin();
j != dep.vertex[*i].second.end(); j++)
if (stmt_nums.find(j->first) != stmt_nums.end()) {
// dependence between skewed statements
@@ -984,7 +986,7 @@ void Loop::skew(const std::set<int> &stmt_nums, int level,
else {
if (cur_dep_dim != -1
&& !(dv.lbounds[cur_dep_dim] == 0
- && dv.ubounds[cur_dep_dim]== 0))
+ && dv.ubounds[cur_dep_dim] == 0))
lb = -posInfinity;
}
if (ub != posInfinity
@@ -1022,24 +1024,24 @@ void Loop::skew(const std::set<int> &stmt_nums, int level,
}
dv.lbounds[dep_dim] = lb;
dv.ubounds[dep_dim] = ub;
- if ((dv.isCarried(dep_dim) && dv.hasPositive(dep_dim))
+ if ((dv.isCarried(dep_dim) && dv.hasPositive(dep_dim))
&& dv.quasi)
dv.quasi = false;
-
- if ((dv.isCarried(dep_dim) && dv.hasNegative(dep_dim))
+
+ if ((dv.isCarried(dep_dim) && dv.hasNegative(dep_dim))
&& !dv.quasi)
throw loop_error(
- "loop error: Skewing is illegal, dependence violation!");
+ "loop error: Skewing is illegal, dependence violation!");
dv.lbounds[dep_dim] = lb;
dv.ubounds[dep_dim] = ub;
if ((dv.isCarried(dep_dim)
&& dv.hasPositive(dep_dim)) && dv.quasi)
dv.quasi = false;
-
+
if ((dv.isCarried(dep_dim)
&& dv.hasNegative(dep_dim)) && !dv.quasi)
throw loop_error(
- "loop error: Skewing is illegal, dependence violation!");
+ "loop error: Skewing is illegal, dependence violation!");
}
}
j->second = dvs;
@@ -1059,7 +1061,7 @@ void Loop::skew(const std::set<int> &stmt_nums, int level,
for (int i = 0; i < dep.vertex.size(); i++)
if (stmt_nums.find(i) == stmt_nums.end())
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin();
+ dep.vertex[i].second.begin();
j != dep.vertex[i].second.end(); j++)
if (stmt_nums.find(j->first) != stmt_nums.end()) {
// dependence from unskewed statement to skewed statement becomes jumbled,
@@ -1081,34 +1083,34 @@ void Loop::skew(const std::set<int> &stmt_nums, int level,
void Loop::shift(const std::set<int> &stmt_nums, int level, int shift_amount) {
if (stmt_nums.size() == 0)
return;
-
+
// check for sanity of parameters
int ref_stmt_num = *(stmt_nums.begin());
for (std::set<int>::const_iterator i = stmt_nums.begin();
i != stmt_nums.end(); i++) {
if (*i < 0 || *i >= stmt.size())
throw std::invalid_argument(
- "invalid statement number " + to_string(*i));
+ "invalid statement number " + to_string(*i));
if (level < 1 || level > stmt[*i].loop_level.size())
throw std::invalid_argument(
- "6invalid loop level " + to_string(level));
+ "6invalid loop level " + to_string(level));
}
-
+
// do nothing
if (shift_amount == 0)
return;
-
+
// invalidate saved codegen computation
delete last_compute_cgr_;
last_compute_cgr_ = NULL;
delete last_compute_cg_;
last_compute_cg_ = NULL;
-
+
// set trasformation relations
for (std::set<int>::const_iterator i = stmt_nums.begin();
i != stmt_nums.end(); i++) {
int n = stmt[*i].xform.n_out();
-
+
Relation r(n, n);
F_And *f_root = r.add_and();
for (int j = 1; j <= n; j++) {
@@ -1118,18 +1120,18 @@ void Loop::shift(const std::set<int> &stmt_nums, int level, int shift_amount) {
if (j == 2 * level)
h.update_const(shift_amount);
}
-
+
stmt[*i].xform = Composition(r, stmt[*i].xform);
stmt[*i].xform.simplify();
}
-
+
// update dependence graph
if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) {
int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload;
for (std::set<int>::const_iterator i = stmt_nums.begin();
i != stmt_nums.end(); i++)
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[*i].second.begin();
+ dep.vertex[*i].second.begin();
j != dep.vertex[*i].second.end(); j++)
if (stmt_nums.find(j->first) == stmt_nums.end()) {
// dependence from shifted statement to unshifted statement
@@ -1148,7 +1150,7 @@ void Loop::shift(const std::set<int> &stmt_nums, int level, int shift_amount) {
for (int i = 0; i < dep.vertex.size(); i++)
if (stmt_nums.find(i) == stmt_nums.end())
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin();
+ dep.vertex[i].second.begin();
j != dep.vertex[i].second.end(); j++)
if (stmt_nums.find(j->first) != stmt_nums.end()) {
// dependence from unshifted statement to shifted statement
@@ -1180,35 +1182,36 @@ void Loop::reverse(const std::set<int> &stmt_nums, int level) {
void Loop::fuse(const std::set<int> &stmt_nums, int level) {
if (stmt_nums.size() == 0 || stmt_nums.size() == 1)
return;
-
+
// invalidate saved codegen computation
delete last_compute_cgr_;
last_compute_cgr_ = NULL;
delete last_compute_cg_;
last_compute_cg_ = NULL;
-
+
int dim = 2 * level - 1;
// check for sanity of parameters
std::vector<int> ref_lex;
int ref_stmt_num;
- apply_xform();
+ apply_xform();
for (std::set<int>::const_iterator i = stmt_nums.begin();
i != stmt_nums.end(); i++) {
if (*i < 0 || *i >= stmt.size()) {
- fprintf(stderr, "statement number %d should be in [0, %d)\n", *i, stmt.size());
+ fprintf(stderr, "statement number %d should be in [0, %d)\n", *i, stmt.size());
throw std::invalid_argument(
- "FUSE invalid statement number " + to_string(*i));
+ "FUSE invalid statement number " + to_string(*i));
}
if (level <= 0
- // || (level > (stmt[*i].xform.n_out() - 1) / 2
- // || level > stmt[*i].loop_level.size())
- ) {
+ // || (level > (stmt[*i].xform.n_out() - 1) / 2
+ // || level > stmt[*i].loop_level.size())
+ ) {
fprintf(stderr, "FUSE level %d ", level);
fprintf(stderr, "must be greater than zero and \n");
- fprintf(stderr, "must NOT be greater than (%d - 1)/2 == %d and\n", stmt[*i].xform.n_out(), (stmt[*i].xform.n_out() - 1) / 2);
+ fprintf(stderr, "must NOT be greater than (%d - 1)/2 == %d and\n", stmt[*i].xform.n_out(),
+ (stmt[*i].xform.n_out() - 1) / 2);
fprintf(stderr, "must NOT be greater than %d\n", stmt[*i].loop_level.size());
throw std::invalid_argument(
- "FUSE invalid loop level " + to_string(level));
+ "FUSE invalid loop level " + to_string(level));
}
if (ref_lex.size() == 0) {
ref_lex = getLexicalOrder(*i);
@@ -1218,11 +1221,11 @@ void Loop::fuse(const std::set<int> &stmt_nums, int level) {
for (int j = 0; j < dim - 1; j += 2)
if (lex[j] != ref_lex[j])
throw std::invalid_argument(
- "statements for fusion must be in the same level-"
- + to_string(level - 1) + " subloop");
+ "statements for fusion must be in the same level-"
+ + to_string(level - 1) + " subloop");
}
}
-
+
// collect lexicographical order values from to-be-fused statements
std::set<int> lex_values;
for (std::set<int>::const_iterator i = stmt_nums.begin();
@@ -1233,9 +1236,9 @@ void Loop::fuse(const std::set<int> &stmt_nums, int level) {
if (lex_values.size() == 1)
return;
// negative dependence would prevent fusion
-
+
int dep_dim = get_dep_dim_of(ref_stmt_num, level);
-
+
for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end();
i++) {
ref_lex[dim - 1] = *i;
@@ -1254,25 +1257,25 @@ void Loop::fuse(const std::set<int> &stmt_nums, int level) {
if (dvs[k].isCarried(dep_dim)
&& dvs[k].hasNegative(dep_dim))
throw loop_error(
- "loop error: statements " + to_string(*ii)
- + " and " + to_string(*jj)
- + " cannot be fused together due to negative dependence");
+ "loop error: statements " + to_string(*ii)
+ + " and " + to_string(*jj)
+ + " cannot be fused together due to negative dependence");
dvs = dep.getEdge(*jj, *ii);
for (int k = 0; k < dvs.size(); k++)
if (dvs[k].isCarried(dep_dim)
&& dvs[k].hasNegative(dep_dim))
throw loop_error(
- "loop error: statements " + to_string(*jj)
- + " and " + to_string(*ii)
- + " cannot be fused together due to negative dependence");
+ "loop error: statements " + to_string(*jj)
+ + " and " + to_string(*ii)
+ + " cannot be fused together due to negative dependence");
}
}
}
-
+
std::set<int> same_loop = getStatements(ref_lex, dim - 3);
-
+
std::vector<std::set<int> > s = sort_by_same_loops(same_loop, level);
-
+
std::vector<bool> s2;
for (int i = 0; i < s.size(); i++) {
@@ -1283,27 +1286,27 @@ void Loop::fuse(const std::set<int> &stmt_nums, int level) {
kk++)
for (int i = 0; i < s.size(); i++)
if (s[i].find(*kk) != s[i].end()) {
-
+
s2[i] = true;
}
-
+
try {
-
+
//Dependence Check for Ordering Constraint
//Graph<std::set<int>, bool> dummy = construct_induced_graph_at_level(s5,
// dep, dep_dim);
-
+
Graph<std::set<int>, bool> g = construct_induced_graph_at_level(s, dep,
dep_dim);
std::cout << g;
s = typed_fusion(g, s2);
} catch (const loop_error &e) {
-
+
throw loop_error(
- "statements cannot be fused together due to negative dependence");
-
+ "statements cannot be fused together due to negative dependence");
+
}
-
+
int order = 0;
for (int i = 0; i < s.size(); i++) {
for (std::set<int>::iterator it = s[i].begin(); it != s[i].end(); it++) {
@@ -1314,7 +1317,7 @@ void Loop::fuse(const std::set<int> &stmt_nums, int level) {
//plan for selective typed fusion
-
+
/*
1. sort the lex values of the statements
2. construct induced graph on sorted statements
@@ -1419,7 +1422,6 @@ void Loop::fuse(const std::set<int> &stmt_nums, int level) {
}
-
void Loop::distribute(const std::set<int> &stmt_nums, int level) {
if (stmt_nums.size() == 0 || stmt_nums.size() == 1)
return;
@@ -1439,13 +1441,13 @@ void Loop::distribute(const std::set<int> &stmt_nums, int level) {
i != stmt_nums.end(); i++) {
if (*i < 0 || *i >= stmt.size())
throw std::invalid_argument(
- "invalid statement number " + to_string(*i));
-
+ "invalid statement number " + to_string(*i));
+
if (level < 1
|| (level > (stmt[*i].xform.n_out() - 1) / 2
|| level > stmt[*i].loop_level.size()))
throw std::invalid_argument(
- "8invalid loop level " + to_string(level));
+ "8invalid loop level " + to_string(level));
if (ref_lex.size() == 0) {
ref_lex = getLexicalOrder(*i);
ref_stmt_num = *i;
@@ -1454,8 +1456,8 @@ void Loop::distribute(const std::set<int> &stmt_nums, int level) {
for (int j = 0; j <= dim - 1; j += 2)
if (lex[j] != ref_lex[j])
throw std::invalid_argument(
- "statements for distribution must be in the same level-"
- + to_string(level) + " subloop");
+ "statements for distribution must be in the same level-"
+ + to_string(level) + " subloop");
}
}
@@ -1517,7 +1519,7 @@ void Loop::distribute(const std::set<int> &stmt_nums, int level) {
// nothing to distribute
if (s2.size() == 1)
throw loop_error(
- "loop error: no statement can be distributed due to dependence cycle");
+ "loop error: no statement can be distributed due to dependence cycle");
std::vector<std::set<int> > s3;
for (int i = 0; i < s2.size(); i++) {
std::set<int> t;
@@ -1564,16 +1566,14 @@ void Loop::distribute(const std::set<int> &stmt_nums, int level) {
order++;
}
// no need to update dependence graph
-
+
return;
}
-
-
std::vector<IR_ArrayRef *> FindOuterArrayRefs(IR_Code *ir,
std::vector<IR_ArrayRef *> &arr_refs) {
- std::vector<IR_ArrayRef *> to_return;
+ std::vector<IR_ArrayRef *> to_return;
for (int i = 0; i < arr_refs.size(); i++)
if (!ir->parent_is_array(arr_refs[i])) {
int j;
@@ -1587,38 +1587,36 @@ std::vector<IR_ArrayRef *> FindOuterArrayRefs(IR_Code *ir,
}
-
-
-
std::vector<std::vector<std::string> > constructInspectorVariables(IR_Code *ir,
- std::set<IR_ArrayRef *> &arr, std::vector<std::string> &index) {
-
- fprintf(stderr, "constructInspectorVariables()\n");
+ std::set<IR_ArrayRef *> &arr,
+ std::vector<std::string> &index) {
+
+ fprintf(stderr, "constructInspectorVariables()\n");
std::vector<std::vector<std::string> > to_return;
-
+
for (std::set<IR_ArrayRef *>::iterator i = arr.begin(); i != arr.end();
i++) {
-
+
std::vector<std::string> per_index;
-
+
CG_outputRepr *subscript = (*i)->index(0);
-
+
if ((*i)->n_dim() > 1)
throw ir_error(
- "multi-dimensional array support non-existent for flattening currently");
-
+ "multi-dimensional array support non-existent for flattening currently");
+
while (ir->QueryExpOperation(subscript) == IR_OP_ARRAY_VARIABLE) {
-
+
std::vector<CG_outputRepr *> v = ir->QueryExpOperand(subscript);
-
+
IR_ArrayRef *ref = static_cast<IR_ArrayRef *>(ir->Repr2Ref(v[0]));
//per_index.push_back(ref->name());
-
+
subscript = ref->index(0);
-
+
}
-
+
if (ir->QueryExpOperation(subscript) == IR_OP_VARIABLE) {
std::vector<CG_outputRepr *> v = ir->QueryExpOperand(subscript);
IR_ScalarRef *ref = static_cast<IR_ScalarRef *>(ir->Repr2Ref(v[0]));
@@ -1627,25 +1625,25 @@ std::vector<std::vector<std::string> > constructInspectorVariables(IR_Code *ir,
for (j = 0; j < index.size(); j++)
if (index[j] == ref->name())
break;
-
+
if (j == index.size())
throw ir_error("Non index variable in array expression");
-
+
int k;
for (k = 0; k < to_return.size(); k++)
if (to_return[k][0] == ref->name())
break;
- if (k == to_return.size()) {
+ if (k == to_return.size()) {
to_return.push_back(per_index);
- fprintf(stderr, "adding index %s\n", ref->name().c_str());
+ fprintf(stderr, "adding index %s\n", ref->name().c_str());
}
-
+
}
-
+
}
-
+
return to_return;
-
+
}
/*std::vector<CG_outputRepr *> constructInspectorData(IR_Code *ir, std::vector<std::vector<std::string> > &indices){
@@ -1669,99 +1667,99 @@ std::vector<std::vector<std::string> > constructInspectorVariables(IR_Code *ir,
*/
-CG_outputRepr * checkAndGenerateIndirectMappings(CG_outputBuilder * ocg,
- std::vector<std::vector<std::string> > &indices,
- CG_outputRepr * instance, CG_outputRepr * class_def,
- CG_outputRepr * count_var) {
-
+CG_outputRepr *checkAndGenerateIndirectMappings(CG_outputBuilder *ocg,
+ std::vector<std::vector<std::string> > &indices,
+ CG_outputRepr *instance, CG_outputRepr *class_def,
+ CG_outputRepr *count_var) {
+
CG_outputRepr *to_return = NULL;
-
+
for (int i = 0; i < indices.size(); i++)
if (indices[i].size() > 1) {
std::string index = indices[i][indices[i].size() - 1];
CG_outputRepr *rep = ocg->CreateArrayRefExpression(
- ocg->CreateDotExpression(instance,
- ocg->lookup_member_data(class_def, index, instance)),
- count_var);
+ ocg->CreateDotExpression(instance,
+ ocg->lookup_member_data(class_def, index, instance)),
+ count_var);
for (int j = indices[i].size() - 2; j >= 0; j--)
rep = ocg->CreateArrayRefExpression(indices[i][j], rep);
-
+
CG_outputRepr *lhs = ocg->CreateArrayRefExpression(
- ocg->CreateDotExpression(instance,
- ocg->lookup_member_data(class_def, indices[i][0], instance)),
- count_var);
-
+ ocg->CreateDotExpression(instance,
+ ocg->lookup_member_data(class_def, indices[i][0], instance)),
+ count_var);
+
to_return = ocg->StmtListAppend(to_return,
ocg->CreateAssignment(0, lhs, rep));
-
+
}
-
+
return to_return;
-
+
}
CG_outputRepr *generatePointerAssignments(CG_outputBuilder *ocg,
std::string prefix_name,
std::vector<std::vector<std::string> > &indices,
- CG_outputRepr *instance,
+ CG_outputRepr *instance,
CG_outputRepr *class_def) {
-
+
fprintf(stderr, "generatePointerAssignments()\n");
CG_outputRepr *list = NULL;
- fprintf(stderr, "prefix '%s', %d indices\n", prefix_name.c_str(), indices.size());
+ fprintf(stderr, "prefix '%s', %d indices\n", prefix_name.c_str(), indices.size());
for (int i = 0; i < indices.size(); i++) {
-
+
std::string s = prefix_name + "_" + indices[i][0];
- fprintf(stderr, "s %s\n", s.c_str());
-
+ fprintf(stderr, "s %s\n", s.c_str());
+
// create a variable definition for a pointer to int with this name
// that seems to be the only actual result of this routine ...
//chillAST_VarDecl *vd = new chillAST_VarDecl( "int", prefix_name.c_str(), "*", NULL);
//vd->print(); printf("\n"); fflush(stdout);
//vd->dump(); printf("\n"); fflush(stdout);
-
+
CG_outputRepr *ptr_exp = ocg->CreatePointer(s); // but dropped on the floor. unused
//fprintf(stderr, "ptr_exp created\n");
-
+
//CG_outputRepr *rhs = ocg->CreateDotExpression(instance,
// ocg->lookup_member_data(class_def, indices[i][0], instance));
-
+
//CG_outputRepr *ptr_assignment = ocg->CreateAssignment(0, ptr_exp, rhs);
-
+
//list = ocg->StmtListAppend(list, ptr_assignment);
-
+
}
-
+
fprintf(stderr, "generatePointerAssignments() DONE\n\n");
return list;
}
void Loop::normalize(int stmt_num, int loop_level) {
-
+
if (stmt_num < 0 || stmt_num >= stmt.size())
throw std::invalid_argument(
-
- "invalid statement number " + to_string(stmt_num));
-
+
+ "invalid statement number " + to_string(stmt_num));
+
if (loop_level <= 0)
throw std::invalid_argument(
- "12invalid loop level " + to_string(loop_level));
+ "12invalid loop level " + to_string(loop_level));
if (loop_level > stmt[stmt_num].loop_level.size())
throw std::invalid_argument(
- "there is no loop level " + to_string(loop_level)
- + " for statement " + to_string(stmt_num));
-
+ "there is no loop level " + to_string(loop_level)
+ + " for statement " + to_string(stmt_num));
+
apply_xform(stmt_num);
-
+
Relation r = copy(stmt[stmt_num].IS);
-
+
Relation bound = get_loop_bound(r, loop_level, this->known);
if (!bound.has_single_conjunct() || !bound.is_satisfiable()
|| bound.is_tautology())
throw loop_error("unable to extract loop bound for normalize");
-
+
// extract the loop stride
coef_t stride;
std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound,
@@ -1770,31 +1768,31 @@ void Loop::normalize(int stmt_num, int loop_level) {
stride = 1;
else
stride = abs(result.first.get_coef(result.second))
- / gcd(abs(result.first.get_coef(result.second)),
- abs(result.first.get_coef(bound.set_var(loop_level))));
-
+ / gcd(abs(result.first.get_coef(result.second)),
+ abs(result.first.get_coef(bound.set_var(loop_level))));
+
if (stride != 1)
throw loop_error(
- "normalize currently only handles unit stride, non unit stride present in loop bounds");
-
+ "normalize currently only handles unit stride, non unit stride present in loop bounds");
+
GEQ_Handle lb;
-
+
Conjunct *c = bound.query_DNF()->single_conjunct();
for (GEQ_Iterator gi(c->GEQs()); gi; gi++) {
int coef = (*gi).get_coef(bound.set_var(loop_level));
if (coef > 0)
lb = *gi;
}
-
+
//Loop bound already zero
//Nothing to do.
if (lb.is_const(bound.set_var(loop_level)) && lb.get_const() == 0)
return;
-
+
if (lb.is_const_except_for_global(bound.set_var(loop_level))) {
-
+
int n = stmt[stmt_num].xform.n_out();
-
+
Relation r(n, n);
F_And *f_root = r.add_and();
for (int j = 1; j <= n; j++)
@@ -1803,10 +1801,10 @@ void Loop::normalize(int stmt_num, int loop_level) {
h.update_coef(r.input_var(j), 1);
h.update_coef(r.output_var(j), -1);
}
-
+
stmt[stmt_num].xform = Composition(r, stmt[stmt_num].xform);
stmt[stmt_num].xform.simplify();
-
+
for (Constr_Vars_Iter ci(lb); ci; ci++) {
if ((*ci).var->kind() == Global_Var) {
Global_Var_ID g = (*ci).var->get_global_var();
@@ -1816,24 +1814,24 @@ void Loop::normalize(int stmt_num, int loop_level) {
else
v = stmt[stmt_num].xform.get_local(g,
(*ci).var->function_of());
-
+
F_And *f_super_root = stmt[stmt_num].xform.and_with_and();
F_Exists *f_exists = f_super_root->add_exists();
F_And *f_root = f_exists->add_and();
-
+
EQ_Handle h = f_root->add_EQ();
h.update_coef(stmt[stmt_num].xform.output_var(2 * loop_level),
1);
h.update_coef(stmt[stmt_num].xform.input_var(loop_level), -1);
h.update_coef(v, 1);
-
+
stmt[stmt_num].xform.simplify();
}
-
+
}
-
+
} else
throw loop_error("loop bounds too complex for normalize!");
-
+
}
diff --git a/src/transformations/loop_datacopy.cc b/src/transformations/loop_datacopy.cc
index 12d74fd..69fbd5b 100644
--- a/src/transformations/loop_datacopy.cc
+++ b/src/transformations/loop_datacopy.cc
@@ -27,7 +27,8 @@ using namespace omega;
// parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i]
//
bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level,
- bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+ bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment,
+ int memory_type) {
//fprintf(stderr, "Loop::datacopy()\n");
// check for sanity of parameters
@@ -40,18 +41,17 @@ bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array
throw std::invalid_argument("invalid loop level " + to_string(level));
if (i == 0) {
std::vector<int> lex = getLexicalOrder(stmt_num);
- same_loop = getStatements(lex, 2*level-2);
- }
- else if (same_loop.find(stmt_num) == same_loop.end())
+ same_loop = getStatements(lex, 2 * level - 2);
+ } else if (same_loop.find(stmt_num) == same_loop.end())
throw std::invalid_argument("array references for data copy must be located in the same subloop");
}
-
+
// convert array reference numbering scheme to actual array references
std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
for (int i = 0; i < array_ref_nums.size(); i++) {
if (array_ref_nums[i].second.size() == 0)
continue;
-
+
int stmt_num = array_ref_nums[i].first;
selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>()));
std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code);
@@ -61,9 +61,10 @@ bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array
if (ref_num < 0 || ref_num >= refs.size()) {
for (int k = 0; k < refs.size(); k++)
delete refs[k];
- throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num));
+ throw std::invalid_argument(
+ "invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num));
}
- selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]);
+ selected_refs[selected_refs.size() - 1].second.push_back(refs[ref_num]);
selected[ref_num] = true;
}
for (int j = 0; j < refs.size(); j++)
@@ -72,9 +73,10 @@ bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array
}
if (selected_refs.size() == 0)
throw std::invalid_argument("found no array references to copy");
-
+
// do the copy
- bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+ bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read,
+ fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
return whatever;
}
@@ -84,9 +86,10 @@ bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array
// parameter array_name=A means to copy data touched by A[i-1] and A[i]
//
bool Loop::datacopy(int stmt_num, int level, const std::string &array_name,
- bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+ bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment,
+ int memory_type) {
- fflush(stdout);
+ fflush(stdout);
//fprintf(stderr, "Loop::datacopy2()\n");
//fprintf(stderr, "array name %s stmt num %d\n", array_name.c_str(), stmt_num);
@@ -95,23 +98,23 @@ bool Loop::datacopy(int stmt_num, int level, const std::string &array_name,
throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
if (level <= 0 || level > stmt[stmt_num].loop_level.size())
throw std::invalid_argument("invalid loop level " + to_string(level));
-
+
// collect array references by name
std::vector<int> lex = getLexicalOrder(stmt_num);
- int dim = 2*level - 1;
- std::set<int> same_loop = getStatements(lex, dim-1);
-
+ int dim = 2 * level - 1;
+ std::set<int> same_loop = getStatements(lex, dim - 1);
+
std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) {
std::vector<IR_ArrayRef *> t;
- std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code);
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code);
for (int j = 0; j < refs.size(); j++)
if (refs[j]->name() == array_name)
t.push_back(refs[j]);
else
delete refs[j];
if (t.size() != 0)
- selected_refs.push_back(std::make_pair(*i, t));
+ selected_refs.push_back(std::make_pair(*i, t));
}
//fprintf(stderr, "selected refs:\n");
@@ -122,27 +125,30 @@ bool Loop::datacopy(int stmt_num, int level, const std::string &array_name,
if (selected_refs.size() == 0)
throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy");
-
+
IR_ArrayRef *AR = selected_refs[0].second[0];
//IR_roseArrayRef *RAR = (IR_roseArrayRef *)AR;
//fprintf(stderr, "before datacopy_privatized, ");
//AR->Dump();
-
+
// do the copy
//fprintf(stderr, "\nLoop::datacopy2 calling privatized\n");
- bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+ bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read,
+ fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
//AR = selected_refs[0].second[0];
//fprintf(stderr, "after datacopy_privatized, ");
//AR->Dump();
-
+
return whatever;
}
-bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels,
- bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name,
+ const std::vector<int> &privatized_levels,
+ bool allow_extra_read, int fastest_changing_dimension, int padding_stride,
+ int padding_alignment, int memory_type) {
//fprintf(stderr, "Loop::datacopy_privatized()\n");
// check for sanity of parameters
@@ -150,33 +156,37 @@ bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array
throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
if (level <= 0 || level > stmt[stmt_num].loop_level.size())
throw std::invalid_argument("invalid loop level " + to_string(level));
-
+
// collect array references by name
std::vector<int> lex = getLexicalOrder(stmt_num);
- int dim = 2*level - 1;
- std::set<int> same_loop = getStatements(lex, dim-1);
-
+ int dim = 2 * level - 1;
+ std::set<int> same_loop = getStatements(lex, dim - 1);
+
std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) {
selected_refs.push_back(std::make_pair(*i, std::vector<IR_ArrayRef *>()));
-
- std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code);
+
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code);
for (int j = 0; j < refs.size(); j++)
if (refs[j]->name() == array_name)
- selected_refs[selected_refs.size()-1].second.push_back(refs[j]);
+ selected_refs[selected_refs.size() - 1].second.push_back(refs[j]);
else
delete refs[j];
}
if (selected_refs.size() == 0)
throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy");
-
+
// do the copy
- bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+ bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read,
+ fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
return whatever;
}
-bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, const std::vector<int> &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level,
+ const std::vector<int> &privatized_levels, bool allow_extra_read,
+ int fastest_changing_dimension, int padding_stride, int padding_alignment,
+ int memory_type) {
//fprintf(stderr, "Loop::datacopy_privatized2()\n");
// check for sanity of parameters
@@ -189,18 +199,17 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int>
throw std::invalid_argument("invalid loop level " + to_string(level));
if (i == 0) {
std::vector<int> lex = getLexicalOrder(stmt_num);
- same_loop = getStatements(lex, 2*level-2);
- }
- else if (same_loop.find(stmt_num) == same_loop.end())
+ same_loop = getStatements(lex, 2 * level - 2);
+ } else if (same_loop.find(stmt_num) == same_loop.end())
throw std::invalid_argument("array references for data copy must be located in the same subloop");
}
-
+
// convert array reference numbering scheme to actual array references
std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
for (int i = 0; i < array_ref_nums.size(); i++) {
if (array_ref_nums[i].second.size() == 0)
continue;
-
+
int stmt_num = array_ref_nums[i].first;
selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>()));
std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code);
@@ -210,9 +219,10 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int>
if (ref_num < 0 || ref_num >= refs.size()) {
for (int k = 0; k < refs.size(); k++)
delete refs[k];
- throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num));
+ throw std::invalid_argument(
+ "invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num));
}
- selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]);
+ selected_refs[selected_refs.size() - 1].second.push_back(refs[ref_num]);
selected[ref_num] = true;
}
for (int j = 0; j < refs.size(); j++)
@@ -221,10 +231,11 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int>
}
if (selected_refs.size() == 0)
throw std::invalid_argument("found no array references to copy");
-
+
// do the copy
- bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
- return whatever;
+ bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read,
+ fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+ return whatever;
}
@@ -232,13 +243,13 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int>
// Implement low level datacopy function with lots of options.
//
-bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs,
+bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs,
int level,
const std::vector<int> &privatized_levels,
- bool allow_extra_read,
+ bool allow_extra_read,
int fastest_changing_dimension,
- int padding_stride,
- int padding_alignment,
+ int padding_stride,
+ int padding_alignment,
int memory_type) {
//fprintf(stderr, "\nLoop::datacopy_privatized3() *****\n");
@@ -247,7 +258,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
if (stmt_refs.size() == 0)
return true;
-
+
// check for sanity of parameters
IR_ArraySymbol *sym = NULL;
std::vector<int> lex;
@@ -258,8 +269,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
if (i == 0) {
if (privatized_levels[i] < level)
throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level));
- }
- else if (privatized_levels[i] <= privatized_levels[i-1])
+ } else if (privatized_levels[i] <= privatized_levels[i - 1])
throw std::invalid_argument("privatized loop levels must be in ascending order");
}
for (int i = 0; i < stmt_refs.size(); i++) {
@@ -268,10 +278,11 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
if (stmt_num < 0 || stmt_num >= stmt.size())
throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
if (privatized_levels.size() != 0) {
- if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size())
- throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num));
- }
- else {
+ if (privatized_levels[privatized_levels.size() - 1] > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument(
+ "invalid loop level " + to_string(privatized_levels[privatized_levels.size() - 1]) + " for statement " +
+ to_string(stmt_num));
+ } else {
if (level > stmt[stmt_num].loop_level.size())
throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num));
}
@@ -279,8 +290,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
if (sym == NULL) {
sym = stmt_refs[i].second[j]->symbol();
lex = getLexicalOrder(stmt_num);
- }
- else {
+ } else {
IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol();
if (t->name() != sym->name()) {
delete t;
@@ -293,8 +303,10 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
}
//fprintf(stderr, "sym %p\n", sym);
- if (!sym) {
- fprintf(stderr, "sym NULL, gonna die\n"); int *i=0; int j=i[0];
+ if (!sym) {
+ fprintf(stderr, "sym NULL, gonna die\n");
+ int *i = 0;
+ int j = i[0];
}
if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim()))
@@ -303,31 +315,31 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
throw std::invalid_argument("invalid temporary array stride requirement");
if (padding_alignment == -1 || padding_alignment == 0)
throw std::invalid_argument("invalid temporary array alignment requirement");
-
- int dim = 2*level - 1;
+
+ int dim = 2 * level - 1;
int n_dim = sym->n_dim();
-
+
if (fastest_changing_dimension == -1)
switch (sym->layout_type()) {
- case IR_ARRAY_LAYOUT_ROW_MAJOR:
- fastest_changing_dimension = n_dim - 1;
- break;
- case IR_ARRAY_LAYOUT_COLUMN_MAJOR:
- fastest_changing_dimension = 0;
- break;
- default:
- throw loop_error("unsupported array layout");
+ case IR_ARRAY_LAYOUT_ROW_MAJOR:
+ fastest_changing_dimension = n_dim - 1;
+ break;
+ case IR_ARRAY_LAYOUT_COLUMN_MAJOR:
+ fastest_changing_dimension = 0;
+ break;
+ default:
+ throw loop_error("unsupported array layout");
}
// OK, parameter sanity checked
-
+
// invalidate saved codegen computation
delete last_compute_cgr_;
last_compute_cgr_ = NULL;
delete last_compute_cg_;
last_compute_cg_ = NULL;
-
+
// build iteration spaces for all reads and for all writes separately
//fprintf(stderr, "dp3: before apply_xform() ARRAY REFS\n");
//for (int i = 0; i < stmt_refs.size(); i++) {
@@ -360,29 +372,30 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
// fprintf(stderr, "\n");
//}
-
+
bool has_write_refs = false;
bool has_read_refs = false;
- Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim);
- Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim);
+ Relation wo_copy_is = Relation::False(level - 1 + privatized_levels.size() + n_dim);
+ Relation ro_copy_is = Relation::False(level - 1 + privatized_levels.size() + n_dim);
//fprintf(stderr, "\n\ni range: 0-%d\n", -1 + stmt_refs.size());
int stmt_num = stmt_refs[0].first;
for (int i = 0; i < stmt_refs.size(); i++) {
int stmt_num = stmt_refs[i].first;
-
+
//fprintf(stderr, "j range: 0-%d\n", -1 + stmt_refs[i].second.size());
for (int j = 0; j < stmt_refs[i].second.size(); j++) {
//fprintf(stderr, "ij %d %d\n", i, j);
- Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim);
+ Relation mapping(stmt[stmt_num].IS.n_set(), level - 1 + privatized_levels.size() + n_dim);
for (int k = 1; k <= mapping.n_inp(); k++)
mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name());
mapping.setup_names();
- mapping.print(); fflush(stdout); // "{[I] -> [_t1] : I = _t1 }
+ mapping.print();
+ fflush(stdout); // "{[I] -> [_t1] : I = _t1 }
F_And *f_root = mapping.add_and();
- for (int k = 1; k <= level-1; k++) {
+ for (int k = 1; k <= level - 1; k++) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(mapping.input_var(k), 1);
h.update_coef(mapping.output_var(k), -1);
@@ -390,7 +403,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
for (int k = 0; k < privatized_levels.size(); k++) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(mapping.input_var(privatized_levels[k]), 1);
- h.update_coef(mapping.output_var(level+k), -1);
+ h.update_coef(mapping.output_var(level + k), -1);
}
for (int k = 0; k < n_dim; k++) {
IR_ArrayRef *AR = stmt_refs[i].second[j];
@@ -400,37 +413,39 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
CG_outputRepr *repr = stmt_refs[i].second[j]->index(k);
//fprintf(stderr, "k %d j %d repr ", k, j); repr->dump(); fflush(stdout);
- exp2formula(ir,
- mapping,
- f_root,
- freevar,
- repr,
- mapping.output_var(level-1+privatized_levels.size()+k+1),
- 'w',
- IR_COND_EQ,
+ exp2formula(ir,
+ mapping,
+ f_root,
+ freevar,
+ repr,
+ mapping.output_var(level - 1 + privatized_levels.size() + k + 1),
+ 'w',
+ IR_COND_EQ,
false,
uninterpreted_symbols[stmt_num],
uninterpreted_symbols_stringrepr[stmt_num]);
repr->clear();
delete repr;
}
- Relation r = omega::Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set()))));
+ Relation r = omega::Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS),
+ Extend_Set(copy(this->known),
+ stmt[stmt_num].IS.n_set() -
+ this->known.n_set()))));
if (stmt_refs[i].second[j]->is_write()) {
has_write_refs = true;
wo_copy_is = Union(wo_copy_is, r);
wo_copy_is.simplify(2, 4);
-
-
- }
- else {
+
+
+ } else {
has_read_refs = true;
ro_copy_is = Union(ro_copy_is, r);
ro_copy_is.simplify(2, 4);
-
+
}
}
}
-
+
//fprintf(stderr, "dp3: simplify\n");
// simplify read and write footprint iteration space
{
@@ -438,7 +453,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
ro_copy_is = SimpleHull(ro_copy_is, true, true);
else
ro_copy_is = ConvexRepresentation(ro_copy_is);
-
+
wo_copy_is = ConvexRepresentation(wo_copy_is);
if (wo_copy_is.number_of_conjuncts() > 1) {
Relation t = SimpleHull(wo_copy_is, true, true);
@@ -448,7 +463,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
wo_copy_is = ro_copy_is;
}
}
-
+
// make copy statement variable names match the ones in the original statements which
// already have the same names due to apply_xform
{
@@ -463,11 +478,12 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
}
for (int i = 0; i < privatized_levels.size(); i++) {
std::string s = stmt[ref_stmt].IS.input_var(privatized_levels[i])->name();
- wo_copy_is.name_set_var(level+i, s);
- ro_copy_is.name_set_var(level+i, s);
+ wo_copy_is.name_set_var(level + i, s);
+ ro_copy_is.name_set_var(level + i, s);
}
- for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) {
- std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size());
+ for (int i = level + privatized_levels.size(); i < level + privatized_levels.size() + n_dim; i++) {
+ std::string s =
+ tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter + i - level - privatized_levels.size());
wo_copy_is.name_set_var(i, s);
ro_copy_is.name_set_var(i, s);
}
@@ -475,11 +491,11 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
wo_copy_is.setup_names();
ro_copy_is.setup_names();
}
-
+
//fprintf(stderr, "\ndp3: build merged\n");
// build merged footprint iteration space for calculating temporary array size
Relation copy_is = SimpleHull(Union(copy(ro_copy_is), copy(wo_copy_is)), true, true);
-
+
// extract temporary array information
CG_outputBuilder *ocg = ir->builder();
std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL
@@ -487,31 +503,35 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
std::vector<bool> is_index_eq(n_dim, false);
std::vector<std::pair<int, CG_outputRepr *> > index_sz(0);
Relation reduced_copy_is = copy(copy_is);
-
+
for (int i = 0; i < n_dim; i++) {
//fprintf(stderr, "i %d/%d\n", i, n_dim);
if (i != 0)
- reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var);
- Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i);
-
+ reduced_copy_is = Project(reduced_copy_is, level - 1 + privatized_levels.size() + i, Set_Var);
+ Relation bound = get_loop_bound(reduced_copy_is, level - 1 + privatized_levels.size() + i);
+
//fprintf(stderr, "dp3: extract stride\n");
// extract stride
- std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, bound.set_var(level-1+privatized_levels.size()+i+1));
+ std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, bound.set_var(
+ level - 1 + privatized_levels.size() + i + 1));
if (result.second != NULL)
- index_stride[i] = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1))));
+ index_stride[i] = abs(result.first.get_coef(result.second)) / gcd(abs(result.first.get_coef(result.second)),
+ abs(result.first.get_coef(bound.set_var(
+ level - 1 + privatized_levels.size() + i +
+ 1))));
else
index_stride[i] = 1;
//fprintf(stderr, "dp3: index_stride[%d] = %d\n", i, index_stride[i]);
-
+
// check if this array index requires loop
Conjunct *c = bound.query_DNF()->single_conjunct();
for (EQ_Iterator ei(c->EQs()); ei; ei++) {
//fprintf(stderr, "dp3: for\n");
if ((*ei).has_wildcards())
continue;
-
+
//fprintf(stderr, "dp3: no wildcards\n");
- int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1));
+ int coef = (*ei).get_coef(bound.set_var(level - 1 + privatized_levels.size() + i + 1));
if (coef != 0) {
//fprintf(stderr, "coef != 0\n");
int sign = 1;
@@ -520,51 +540,53 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
coef = -coef;
sign = -1;
}
-
+
CG_outputRepr *op = NULL;
for (Constr_Vars_Iter ci(*ei); ci; ci++) {
//fprintf(stderr, "dp3: ci\n");
switch ((*ci).var->kind()) {
- case Input_Var:
- {
- //fprintf(stderr, "dp3: Input_Var\n");
- if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) {
- //fprintf(stderr, "dp3: IF sign %d\n",(*ci).coef*sign);
-
- if ((*ci).coef*sign == 1)
- op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name()));
- else if ((*ci).coef*sign == -1)
- op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name()));
- else if ((*ci).coef*sign > 1)
- op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name())));
+ case Input_Var: {
+ //fprintf(stderr, "dp3: Input_Var\n");
+ if ((*ci).var != bound.set_var(level - 1 + privatized_levels.size() + i + 1)) {
+ //fprintf(stderr, "dp3: IF sign %d\n",(*ci).coef*sign);
+
+ if ((*ci).coef * sign == 1)
+ op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name()));
+ else if ((*ci).coef * sign == -1)
+ op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name()));
+ else if ((*ci).coef * sign > 1)
+ op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)),
+ ocg->CreateIdent((*ci).var->name())));
+ else // (*ci).coef*sign < -1
+ op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)),
+ ocg->CreateIdent((*ci).var->name())));
+ }
+ break;
+ }
+ case Global_Var: {
+ //fprintf(stderr, "dp3: Global_Var\n");
+ Global_Var_ID g = (*ci).var->get_global_var();
+ if ((*ci).coef * sign == 1)
+ op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef * sign == -1)
+ op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef * sign > 1)
+ op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)),
+ ocg->CreateIdent(g->base_name())));
else // (*ci).coef*sign < -1
- op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name())));
+ op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)),
+ ocg->CreateIdent(g->base_name())));
+ break;
}
- break;
- }
- case Global_Var:
- {
- //fprintf(stderr, "dp3: Global_Var\n");
- Global_Var_ID g = (*ci).var->get_global_var();
- if ((*ci).coef*sign == 1)
- op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name()));
- else if ((*ci).coef*sign == -1)
- op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name()));
- else if ((*ci).coef*sign > 1)
- op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name())));
- else // (*ci).coef*sign < -1
- op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name())));
- break;
- }
- default:
- throw loop_error("unsupported array index expression");
+ default:
+ throw loop_error("unsupported array index expression");
}
}
if ((*ei).get_const() != 0)
- op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const())));
+ op = ocg->CreatePlus(op, ocg->CreateInt(-sign * ((*ei).get_const())));
if (coef != 1)
op = ocg->CreateIntegerFloor(op, ocg->CreateInt(coef));
-
+
index_lb[i] = op;
is_index_eq[i] = true;
break;
@@ -572,14 +594,14 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
}
if (is_index_eq[i])
continue;
-
+
//fprintf(stderr, "dp3: separate lower and upper bounds\n");
// separate lower and upper bounds
std::vector<GEQ_Handle> lb_list, ub_list;
std::set<Variable_ID> excluded_floor_vars;
- excluded_floor_vars.insert(bound.set_var(level-1+privatized_levels.size()+i+1));
+ excluded_floor_vars.insert(bound.set_var(level - 1 + privatized_levels.size() + i + 1));
for (GEQ_Iterator gi(c->GEQs()); gi; gi++) {
- int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1));
+ int coef = (*gi).get_coef(bound.set_var(level - 1 + privatized_levels.size() + i + 1));
if (coef != 0 && (*gi).has_wildcards()) {
bool clean_bound = true;
GEQ_Handle h;
@@ -591,7 +613,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
if (!clean_bound)
continue;
}
-
+
if (coef > 0)
lb_list.push_back(*gi);
else if (coef < 0)
@@ -599,41 +621,45 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
}
if (lb_list.size() == 0 || ub_list.size() == 0)
throw loop_error("failed to calcuate array footprint size");
-
+
//fprintf(stderr, "dp3: build lower bound representation\n");
// build lower bound representation
std::vector<CG_outputRepr *> lb_repr_list;
- for (int j = 0; j < lb_list.size(); j++){
- if(this->known.n_set() == 0) {
- lb_repr_list.push_back(output_lower_bound_repr(ocg,
- lb_list[j],
- bound.set_var(level-1+privatized_levels.size()+i+1),
- result.first,
- result.second,
- bound,
- Relation::True(bound.n_set()),
- std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)),
+ for (int j = 0; j < lb_list.size(); j++) {
+ if (this->known.n_set() == 0) {
+ lb_repr_list.push_back(output_lower_bound_repr(ocg,
+ lb_list[j],
+ bound.set_var(level - 1 + privatized_levels.size() + i + 1),
+ result.first,
+ result.second,
+ bound,
+ Relation::True(bound.n_set()),
+ std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL),
+ 0)),
uninterpreted_symbols[stmt_num]));
- }
- else {
- lb_repr_list.push_back(output_lower_bound_repr(ocg,
- lb_list[j],
- bound.set_var(level-1+privatized_levels.size()+i+1),
- result.first,
- result.second,
- bound,
- this->known,
- std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)),
+ } else {
+ lb_repr_list.push_back(output_lower_bound_repr(ocg,
+ lb_list[j],
+ bound.set_var(level - 1 + privatized_levels.size() + i + 1),
+ result.first,
+ result.second,
+ bound,
+ this->known,
+ std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL),
+ 0)),
uninterpreted_symbols[stmt_num]));
}
}
- if (lb_repr_list.size() > 1) {
+ if (lb_repr_list.size() > 1) {
//fprintf(stderr, "loop_datacopy.cc dp3 createInvoke( max )\n");
index_lb[i] = ocg->CreateInvoke("max", lb_repr_list);
- }
- else if (lb_repr_list.size() == 1)
+ } else if (lb_repr_list.size() == 1)
index_lb[i] = lb_repr_list[0];
-
+
//fprintf(stderr, "dp3: build temporary array size representation\n");
// build temporary array size representation
{
@@ -642,66 +668,62 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
for (int j = 0; j < ub_list.size(); j++)
for (int k = 0; k < lb_list.size(); k++) {
GEQ_Handle h = f_root->add_GEQ();
-
+
for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var:
- {
- int pos = (*ci).var->get_position();
- h.update_coef(cal.input_var(pos), (*ci).coef);
- break;
- }
- case Global_Var:
- {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = cal.get_local(g);
- else
- v = cal.get_local(g, (*ci).var->function_of());
- h.update_coef(v, (*ci).coef);
- break;
- }
- default:
- throw loop_error("cannot calculate temporay array size statically");
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ h.update_coef(cal.input_var(pos), (*ci).coef);
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = cal.get_local(g);
+ else
+ v = cal.get_local(g, (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot calculate temporay array size statically");
}
}
h.update_const(ub_list[j].get_const());
-
+
for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var:
- {
- int pos = (*ci).var->get_position();
- h.update_coef(cal.input_var(pos), (*ci).coef);
- break;
- }
- case Global_Var:
- {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = cal.get_local(g);
- else
- v = cal.get_local(g, (*ci).var->function_of());
- h.update_coef(v, (*ci).coef);
- break;
- }
- default:
- throw loop_error("cannot calculate temporay array size statically");
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ h.update_coef(cal.input_var(pos), (*ci).coef);
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = cal.get_local(g);
+ else
+ v = cal.get_local(g, (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot calculate temporay array size statically");
}
}
h.update_const(lb_list[k].get_const());
-
+
h.update_const(1);
h.update_coef(cal.output_var(1), -1);
}
-
+
cal = Restrict_Domain(cal, copy(copy_is));
for (int j = 1; j <= cal.n_inp(); j++)
cal = Project(cal, j, Input_Var);
cal.simplify();
-
+
//fprintf(stderr, "dp3: pad temporary array size\n");
// pad temporary array size
// TODO: for variable array size, create padding formula
@@ -719,9 +741,8 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
if (padding_alignment > 1) { // align to boundary for data packing
int residue = size % padding_alignment;
if (residue)
- size = size+padding_alignment-residue;
- }
- else if (padding_alignment < -1) { // un-alignment for memory bank conflicts
+ size = size + padding_alignment - residue;
+ } else if (padding_alignment < -1) { // un-alignment for memory bank conflicts
while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1)
size++;
}
@@ -729,7 +750,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
index_sz.push_back(std::make_pair(i, ocg->CreateInt(size)));
is_index_bound_const = true;
}
-
+
if (!is_index_bound_const) {
for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) {
int coef = (*gi).get_coef(cal.output_var(1));
@@ -737,22 +758,23 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
CG_outputRepr *op = NULL;
for (Constr_Vars_Iter ci(*gi); ci; ci++) {
if ((*ci).var != cal.output_var(1)) {
- switch((*ci).var->kind()) {
- case Global_Var:
- {
- Global_Var_ID g = (*ci).var->get_global_var();
- if ((*ci).coef == 1)
- op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name()));
- else if ((*ci).coef == -1)
- op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name()));
- else if ((*ci).coef > 1)
- op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name())));
- else // (*ci).coef < -1
- op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name())));
- break;
- }
- default:
- throw loop_error("failed to generate array index bound code");
+ switch ((*ci).var->kind()) {
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ if ((*ci).coef == 1)
+ op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef == -1)
+ op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef > 1)
+ op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef),
+ ocg->CreateIdent(g->base_name())));
+ else // (*ci).coef < -1
+ op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef),
+ ocg->CreateIdent(g->base_name())));
+ break;
+ }
+ default:
+ throw loop_error("failed to generate array index bound code");
}
}
}
@@ -766,16 +788,16 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride));
coef_t t1 = index_stride[i] / g;
if (t1 != 1)
- op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1));
+ op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1 - 1)), ocg->CreateInt(t1));
coef_t t2 = padding_stride / g;
if (t2 != 1)
op = ocg->CreateTimes(op, ocg->CreateInt(t2));
- }
- else if (index_stride[i] != 1) {
- op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i]));
+ } else if (index_stride[i] != 1) {
+ op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i] - 1)),
+ ocg->CreateInt(index_stride[i]));
}
}
-
+
index_sz.push_back(std::make_pair(i, op));
break;
}
@@ -783,20 +805,20 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
}
}
}
-
+
//fprintf(stderr, "dp3: change the temporary array index order\n");
// change the temporary array index order
for (int i = 0; i < index_sz.size(); i++) {
if (index_sz[i].first == fastest_changing_dimension)
switch (sym->layout_type()) {
- case IR_ARRAY_LAYOUT_ROW_MAJOR:
- std::swap(index_sz[index_sz.size()-1], index_sz[i]);
- break;
- case IR_ARRAY_LAYOUT_COLUMN_MAJOR:
- std::swap(index_sz[0], index_sz[i]);
- break;
- default:
- throw loop_error("unsupported array layout");
+ case IR_ARRAY_LAYOUT_ROW_MAJOR:
+ std::swap(index_sz[index_sz.size() - 1], index_sz[i]);
+ break;
+ case IR_ARRAY_LAYOUT_COLUMN_MAJOR:
+ std::swap(index_sz[0], index_sz[i]);
+ break;
+ default:
+ throw loop_error("unsupported array layout");
}
}
@@ -806,51 +828,53 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
if (index_sz.size() == 0) {
//fprintf(stderr, "tmp_sym is a scalar\n");
tmp_sym = ir->CreateScalarSymbol(sym, memory_type);
- }
- else {
+ } else {
//fprintf(stderr, "tmp_sym is an array\n");
std::vector<CG_outputRepr *> tmp_array_size(index_sz.size());
- for (int i = 0; i < index_sz.size(); i++) {
+ for (int i = 0; i < index_sz.size(); i++) {
tmp_array_size[i] = index_sz[i].second->clone();
index_sz[i].second->dump(); // THIS PRINTF
}
tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type);
}
-
+
//fprintf(stderr, "dp3: create temporary array read initialization code\n");
// create temporary array read initialization code
CG_outputRepr *copy_code_read;
- if (has_read_refs) {
+ if (has_read_refs) {
//fprintf(stderr, "has read refs\n");
if (index_sz.size() == 0) {
- //fprintf(stderr, "if\n");
-
+ //fprintf(stderr, "if\n");
+
//fprintf(stderr, "tmp sym %s\n", tmp_sym->name().c_str());
- IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); // create ref from symbol
+ IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(
+ static_cast<IR_ScalarSymbol *>(tmp_sym)); // create ref from symbol
// tmp_scalar_ref is incomplete
std::vector<CG_outputRepr *> rhs_index(n_dim);
- for (int i = 0; i < index_lb.size(); i++) {
+ for (int i = 0; i < index_lb.size(); i++) {
//fprintf(stderr, "i %d\n", i);
if (is_index_eq[i])
rhs_index[i] = index_lb[i]->clone();
else
- rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ rhs_index[i] = ir->builder()->CreateIdent(
+ copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name());
}
IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index);
-
+
// IR_ScalarRef tmp_scalar_ref has no actual reference yet. It only has the variable definition.
copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert());
//fprintf(stderr, "if ends\n");
- }
- else {
+ } else {
//fprintf(stderr, "else\n");
std::vector<CG_outputRepr *> lhs_index(index_sz.size());
for (int i = 0; i < index_sz.size(); i++) {
int cur_index_num = index_sz[i].first;
- CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone());
+ CG_outputRepr *cur_index_repr = ocg->CreateMinus(
+ ocg->CreateIdent(copy_is.set_var(level - 1 + privatized_levels.size() + cur_index_num + 1)->name()),
+ index_lb[cur_index_num]->clone());
if (padding_stride != 0) {
- if (i == n_dim-1) {
+ if (i == n_dim - 1) {
coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride));
coef_t t1 = index_stride[cur_index_num] / g;
if (t1 != 1)
@@ -858,74 +882,78 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
coef_t t2 = padding_stride / g;
if (t2 != 1)
cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2));
- }
- else if (index_stride[cur_index_num] != 1) {
+ } else if (index_stride[cur_index_num] != 1) {
cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num]));
}
}
-
+
if (ir->ArrayIndexStartAt() != 0)
cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt()));
lhs_index[i] = cur_index_repr;
}
-
+
//fprintf(stderr, "dp3: making tmp_array_ref\n");
IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index);
//fprintf(stderr, "dp3: DONE making tmp_array_ref\n");
-
+
std::vector<CG_outputRepr *> rhs_index(n_dim);
for (int i = 0; i < index_lb.size(); i++)
if (is_index_eq[i])
rhs_index[i] = index_lb[i]->clone();
else
- rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ rhs_index[i] = ir->builder()->CreateIdent(
+ copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name());
IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index);
-
+
//fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment\n");
//copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert());
CG_outputRepr *lhs = tmp_array_ref->convert();
CG_outputRepr *rhs = copied_array_ref->convert();
- copy_code_read = ir->builder()->CreateAssignment(0, lhs, rhs); //tmp_array_ref->convert(), copied_array_ref->convert());
+ copy_code_read = ir->builder()->CreateAssignment(0, lhs,
+ rhs); //tmp_array_ref->convert(), copied_array_ref->convert());
//fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment DONE\n\n");
}
} // has read refs
-
+
//fprintf(stderr, "dp3: create temporary array write back code\n");
// create temporary array write back code
CG_outputRepr *copy_code_write;
- if (has_write_refs) {
+ if (has_write_refs) {
//fprintf(stderr, "has_write_refs\n");
if (index_sz.size() == 0) {
//fprintf(stderr, "index_sz.size() == 0\n");
IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym));
-
+
std::vector<CG_outputRepr *> rhs_index(n_dim);
for (int i = 0; i < index_lb.size(); i++)
if (is_index_eq[i])
rhs_index[i] = index_lb[i]->clone();
else
- rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ rhs_index[i] = ir->builder()->CreateIdent(
+ copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name());
IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index);
-
+
copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert());
- }
- else {
+ } else {
//fprintf(stderr, "index_sz.size() NOT = 0\n");
-
+
std::vector<CG_outputRepr *> lhs_index(n_dim);
for (int i = 0; i < index_lb.size(); i++)
if (is_index_eq[i])
lhs_index[i] = index_lb[i]->clone();
else
- lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ lhs_index[i] = ir->builder()->CreateIdent(
+ copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name());
IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index);
-
+
std::vector<CG_outputRepr *> rhs_index(index_sz.size());
for (int i = 0; i < index_sz.size(); i++) {
int cur_index_num = index_sz[i].first;
- CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone());
+ CG_outputRepr *cur_index_repr = ocg->CreateMinus(
+ ocg->CreateIdent(copy_is.set_var(level - 1 + privatized_levels.size() + cur_index_num + 1)->name()),
+ index_lb[cur_index_num]->clone());
if (padding_stride != 0) {
- if (i == n_dim-1) {
+ if (i == n_dim - 1) {
coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride));
coef_t t1 = index_stride[cur_index_num] / g;
if (t1 != 1)
@@ -933,96 +961,98 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
coef_t t2 = padding_stride / g;
if (t2 != 1)
cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2));
- }
- else if (index_stride[cur_index_num] != 1) {
+ } else if (index_stride[cur_index_num] != 1) {
cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num]));
}
}
-
+
if (ir->ArrayIndexStartAt() != 0)
cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt()));
rhs_index[i] = cur_index_repr;
}
IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index);
-
+
copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert());
}
} // has write refs
-
+
// now we can remove those loops for array indexes that are
// dependent on others
//fprintf(stderr, "dp3: now we can remove those loops\n");
if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) {
- Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size());
+ Relation mapping(level - 1 + privatized_levels.size() + n_dim,
+ level - 1 + privatized_levels.size() + index_sz.size());
F_And *f_root = mapping.add_and();
- for (int i = 1; i <= level-1+privatized_levels.size(); i++) {
+ for (int i = 1; i <= level - 1 + privatized_levels.size(); i++) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(mapping.input_var(i), 1);
h.update_coef(mapping.output_var(i), -1);
}
-
+
int cur_index = 0;
std::vector<int> mapped_index(index_sz.size());
for (int i = 0; i < n_dim; i++)
if (!is_index_eq[i]) {
EQ_Handle h = f_root->add_EQ();
- h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1);
+ h.update_coef(mapping.input_var(level - 1 + privatized_levels.size() + i + 1), 1);
switch (sym->layout_type()) {
- case IR_ARRAY_LAYOUT_COLUMN_MAJOR: {
- h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1);
- mapped_index[index_sz.size()-cur_index-1] = i;
- break;
- }
- case IR_ARRAY_LAYOUT_ROW_MAJOR: {
- h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1);
- mapped_index[cur_index] = i;
- break;
- }
- default:
- throw loop_error("unsupported array layout");
+ case IR_ARRAY_LAYOUT_COLUMN_MAJOR: {
+ h.update_coef(mapping.output_var(level - 1 + privatized_levels.size() + index_sz.size() - cur_index), -1);
+ mapped_index[index_sz.size() - cur_index - 1] = i;
+ break;
+ }
+ case IR_ARRAY_LAYOUT_ROW_MAJOR: {
+ h.update_coef(mapping.output_var(level - 1 + privatized_levels.size() + cur_index + 1), -1);
+ mapped_index[cur_index] = i;
+ break;
+ }
+ default:
+ throw loop_error("unsupported array layout");
}
cur_index++;
}
-
+
wo_copy_is = omega::Range(Restrict_Domain(copy(mapping), wo_copy_is));
ro_copy_is = omega::Range(Restrict_Domain(copy(mapping), ro_copy_is));
- for (int i = 1; i <= level-1+privatized_levels.size(); i++) {
+ for (int i = 1; i <= level - 1 + privatized_levels.size(); i++) {
wo_copy_is.name_set_var(i, copy_is.set_var(i)->name());
ro_copy_is.name_set_var(i, copy_is.set_var(i)->name());
}
for (int i = 0; i < index_sz.size(); i++) {
- wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name());
- ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name());
+ wo_copy_is.name_set_var(level - 1 + privatized_levels.size() + i + 1,
+ copy_is.set_var(level - 1 + privatized_levels.size() + mapped_index[i] + 1)->name());
+ ro_copy_is.name_set_var(level - 1 + privatized_levels.size() + i + 1,
+ copy_is.set_var(level - 1 + privatized_levels.size() + mapped_index[i] + 1)->name());
}
wo_copy_is.setup_names();
ro_copy_is.setup_names();
}
-
+
// insert read copy statement
//fprintf(stderr, "dp3: insert read copy statement\n");
-
+
int old_num_stmt = stmt.size();
int ro_copy_stmt_num = -1;
if (has_read_refs) {
- Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1);
+ Relation copy_xform(ro_copy_is.n_set(), 2 * ro_copy_is.n_set() + 1);
{
F_And *f_root = copy_xform.add_and();
for (int i = 1; i <= ro_copy_is.n_set(); i++) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(copy_xform.input_var(i), 1);
- h.update_coef(copy_xform.output_var(2*i), -1);
+ h.update_coef(copy_xform.output_var(2 * i), -1);
}
- for (int i = 1; i <= dim; i+=2) {
+ for (int i = 1; i <= dim; i += 2) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(copy_xform.output_var(i), -1);
- h.update_const(lex[i-1]);
+ h.update_const(lex[i - 1]);
}
- for (int i = dim+2; i <= copy_xform.n_out(); i+=2) {
+ for (int i = dim + 2; i <= copy_xform.n_out(); i += 2) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(copy_xform.output_var(i), 1);
}
}
-
+
Statement copy_stmt_read;
copy_stmt_read.IS = ro_copy_is;
copy_stmt_read.xform = copy_xform;
@@ -1031,7 +1061,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set());
copy_stmt_read.ir_stmt_node = NULL;
copy_stmt_read.has_inspector = false;
- for (int i = 0; i < level-1; i++) {
+ for (int i = 0; i < level - 1; i++) {
copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type;
if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile &&
stmt[*(active.begin())].loop_level[i].payload >= level) {
@@ -1043,32 +1073,33 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
copy_stmt_read.loop_level[i].payload = -1;
else
copy_stmt_read.loop_level[i].payload = level + j;
- }
- else
+ } else
copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload;
copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level;
}
for (int i = 0; i < privatized_levels.size(); i++) {
- copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type;
- copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload;
- copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level;
+ copy_stmt_read.loop_level[level - 1 + i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type;
+ copy_stmt_read.loop_level[level - 1 +
+ i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload;
+ copy_stmt_read.loop_level[level - 1 +
+ i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level;
}
int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1);
for (int i = 0; i < std::min(left_num_dim, static_cast<int>(index_sz.size())); i++) {
- copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal;
- copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i;
- copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelOriginal;
+ copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].payload = num_dep_dim - left_num_dim + i;
+ copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0;
}
for (int i = std::min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) {
- copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown;
- copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1;
- copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelUnknown;
+ copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].payload = -1;
+ copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0;
}
-
-
- shiftLexicalOrder(lex, dim-1, 1);
- fprintf(stderr, "loop_datacopy.cc L1071 adding stmt %d\n", stmt.size());
+
+ shiftLexicalOrder(lex, dim - 1, 1);
+
+ fprintf(stderr, "loop_datacopy.cc L1071 adding stmt %d\n", stmt.size());
stmt.push_back(copy_stmt_read);
uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]);
@@ -1076,30 +1107,30 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
ro_copy_stmt_num = stmt.size() - 1;
dep.insert();
}
-
+
//fprintf(stderr, "dp3: insert write copy statement\n");
// insert write copy statement
int wo_copy_stmt_num = -1;
if (has_write_refs) {
- Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1);
+ Relation copy_xform(wo_copy_is.n_set(), 2 * wo_copy_is.n_set() + 1);
{
F_And *f_root = copy_xform.add_and();
for (int i = 1; i <= wo_copy_is.n_set(); i++) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(copy_xform.input_var(i), 1);
- h.update_coef(copy_xform.output_var(2*i), -1);
+ h.update_coef(copy_xform.output_var(2 * i), -1);
}
- for (int i = 1; i <= dim; i+=2) {
+ for (int i = 1; i <= dim; i += 2) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(copy_xform.output_var(i), -1);
- h.update_const(lex[i-1]);
+ h.update_const(lex[i - 1]);
}
- for (int i = dim+2; i <= copy_xform.n_out(); i+=2) {
+ for (int i = dim + 2; i <= copy_xform.n_out(); i += 2) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(copy_xform.output_var(i), 1);
}
}
-
+
Statement copy_stmt_write;
copy_stmt_write.IS = wo_copy_is;
copy_stmt_write.xform = copy_xform;
@@ -1107,8 +1138,8 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set());
copy_stmt_write.ir_stmt_node = NULL;
copy_stmt_write.has_inspector = false;
-
- for (int i = 0; i < level-1; i++) {
+
+ for (int i = 0; i < level - 1; i++) {
copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type;
if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile &&
stmt[*(active.begin())].loop_level[i].payload >= level) {
@@ -1120,31 +1151,32 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
copy_stmt_write.loop_level[i].payload = -1;
else
copy_stmt_write.loop_level[i].payload = level + j;
- }
- else
+ } else
copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload;
copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level;
}
for (int i = 0; i < privatized_levels.size(); i++) {
- copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type;
- copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload;
- copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level;
+ copy_stmt_write.loop_level[level - 1 + i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type;
+ copy_stmt_write.loop_level[level - 1 +
+ i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload;
+ copy_stmt_write.loop_level[level - 1 +
+ i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level;
}
int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1);
for (int i = 0; i < std::min(left_num_dim, static_cast<int>(index_sz.size())); i++) {
- copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal;
- copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i;
- copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelOriginal;
+ copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].payload = num_dep_dim - left_num_dim + i;
+ copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0;
}
for (int i = std::min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) {
- copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown;
- copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1;
- copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelUnknown;
+ copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].payload = -1;
+ copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0;
}
- lex[dim-1]++;
- shiftLexicalOrder(lex, dim-1, -2);
+ lex[dim - 1]++;
+ shiftLexicalOrder(lex, dim - 1, -2);
- fprintf(stderr, "loop_datacopy.cc L1147 adding stmt %d\n", stmt.size());
+ fprintf(stderr, "loop_datacopy.cc L1147 adding stmt %d\n", stmt.size());
stmt.push_back(copy_stmt_write);
uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]);
@@ -1152,24 +1184,24 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
wo_copy_stmt_num = stmt.size() - 1;
dep.insert();
}
-
+
//fprintf(stderr, "replace original array accesses with temporary array accesses\n");
// replace original array accesses with temporary array accesses
- for (int i =0; i < stmt_refs.size(); i++)
+ for (int i = 0; i < stmt_refs.size(); i++)
for (int j = 0; j < stmt_refs[i].second.size(); j++) {
if (index_sz.size() == 0) {
IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym));
//fprintf(stderr, "dp3: loop_datacopy.cc calling ReplaceExpression i%d j%d\n", i, j);
ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert());
- }
- else {
+ } else {
std::vector<CG_outputRepr *> index_repr(index_sz.size());
for (int k = 0; k < index_sz.size(); k++) {
int cur_index_num = index_sz[k].first;
-
- CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone());
+
+ CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num),
+ index_lb[cur_index_num]->clone());
if (padding_stride != 0) {
- if (k == n_dim-1) {
+ if (k == n_dim - 1) {
coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride));
coef_t t1 = index_stride[cur_index_num] / g;
if (t1 != 1)
@@ -1177,23 +1209,22 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
coef_t t2 = padding_stride / g;
if (t2 != 1)
cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2));
- }
- else if (index_stride[cur_index_num] != 1) {
+ } else if (index_stride[cur_index_num] != 1) {
cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num]));
}
}
-
+
if (ir->ArrayIndexStartAt() != 0)
cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt()));
index_repr[k] = cur_index_repr;
}
-
+
IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr);
//fprintf(stderr, "loop_datacopy.cc ir->ReplaceExpression( ... )\n");
ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert());
}
}
-
+
// update dependence graph
//fprintf(stderr, "update dependence graph\n");
@@ -1201,7 +1232,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
if (ro_copy_stmt_num != -1) {
for (int i = 0; i < old_num_stmt; i++) {
std::vector<std::vector<DependenceVector> > D;
-
+
for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) {
if (active.find(i) != active.end() && active.find(j->first) == active.end()) {
std::vector<DependenceVector> dvs1, dvs2;
@@ -1215,8 +1246,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
j->second = dvs2;
if (dvs1.size() > 0)
dep.connect(ro_copy_stmt_num, j->first, dvs1);
- }
- else if (active.find(i) == active.end() && active.find(j->first) != active.end()) {
+ } else if (active.find(i) == active.end() && active.find(j->first) != active.end()) {
std::vector<DependenceVector> dvs1, dvs2;
for (int k = 0; k < j->second.size(); k++) {
DependenceVector dv = j->second[k];
@@ -1229,17 +1259,17 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
if (dvs1.size() > 0)
D.push_back(dvs1);
}
-
+
if (j->second.size() == 0)
dep.vertex[i].second.erase(j++);
else
j++;
}
-
+
for (int j = 0; j < D.size(); j++)
dep.connect(i, ro_copy_stmt_num, D[j]);
}
-
+
// insert dependences from copy statement loop to copied statements
//fprintf(stderr, "insert dependences from copy statement loop to copied statements\n");
@@ -1255,11 +1285,11 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
for (std::set<int>::iterator i = active.begin(); i != active.end(); i++)
dep.connect(ro_copy_stmt_num, *i, dv);
}
-
+
if (wo_copy_stmt_num != -1) {
for (int i = 0; i < old_num_stmt; i++) {
std::vector<std::vector<DependenceVector> > D;
-
+
for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) {
if (active.find(i) != active.end() && active.find(j->first) == active.end()) {
std::vector<DependenceVector> dvs1, dvs2;
@@ -1273,8 +1303,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
j->second = dvs2;
if (dvs1.size() > 0)
dep.connect(wo_copy_stmt_num, j->first, dvs1);
- }
- else if (active.find(i) == active.end() && active.find(j->first) != active.end()) {
+ } else if (active.find(i) == active.end() && active.find(j->first) != active.end()) {
std::vector<DependenceVector> dvs1, dvs2;
for (int k = 0; k < j->second.size(); k++) {
DependenceVector dv = j->second[k];
@@ -1287,17 +1316,17 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
if (dvs1.size() > 0)
D.push_back(dvs1);
}
-
+
if (j->second.size() == 0)
dep.vertex[i].second.erase(j++);
else
j++;
}
-
+
for (int j = 0; j < D.size(); j++)
dep.connect(i, wo_copy_stmt_num, D[j]);
}
-
+
// insert dependences from copied statements to write statements
//fprintf(stderr, "dp3: insert dependences from copied statements to write statements\n");
@@ -1312,9 +1341,9 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
}
for (std::set<int>::iterator i = active.begin(); i != active.end(); i++)
dep.connect(*i, wo_copy_stmt_num, dv);
-
+
}
-
+
// update variable name for dependences among copied statements
for (int i = 0; i < old_num_stmt; i++) {
if (active.find(i) != active.end())
@@ -1325,7 +1354,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
j->second[k].sym = s;
}
}
-
+
// insert anti-dependence from write statement to read statement
if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1)
if (dep_dim >= 0) {
@@ -1340,15 +1369,15 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
}
for (int k = 0; k < dep_dim; k++) {
if (k != 0) {
- dv.lbounds[k-1] = 0;
- dv.ubounds[k-1] = 0;
+ dv.lbounds[k - 1] = 0;
+ dv.ubounds[k - 1] = 0;
}
dv.lbounds[k] = 1;
dv.ubounds[k] = posInfinity;
dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv);
}
}
-
+
//fprintf(stderr, "Loop::datacopy_privatized3() cleanup\n");
// cleanup
delete sym;
@@ -1361,7 +1390,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A
index_sz[i].second->clear();
delete index_sz[i].second;
}
-
+
return true;
}
diff --git a/src/transformations/loop_extra.cc b/src/transformations/loop_extra.cc
index dac05bf..ee54815 100644
--- a/src/transformations/loop_extra.cc
+++ b/src/transformations/loop_extra.cc
@@ -25,43 +25,44 @@ void Loop::shift_to(int stmt_num, int level, int absolute_position) {
// combo
tile(stmt_num, level, 1, level, CountedTile);
std::vector<int> lex = getLexicalOrder(stmt_num);
- std::set<int> active = getStatements(lex, 2*level-2);
+ std::set<int> active = getStatements(lex, 2 * level - 2);
shift(active, level, absolute_position);
-
+
// remove unnecessary tiled loop since tile size is one
for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {
int n = stmt[*i].xform.n_out();
- Relation mapping(n, n-2);
+ Relation mapping(n, n - 2);
F_And *f_root = mapping.add_and();
- for (int j = 1; j <= 2*level; j++) {
+ for (int j = 1; j <= 2 * level; j++) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(mapping.output_var(j), 1);
h.update_coef(mapping.input_var(j), -1);
}
- for (int j = 2*level+3; j <= n; j++) {
+ for (int j = 2 * level + 3; j <= n; j++) {
EQ_Handle h = f_root->add_EQ();
- h.update_coef(mapping.output_var(j-2), 1);
+ h.update_coef(mapping.output_var(j - 2), 1);
h.update_coef(mapping.input_var(j), -1);
}
stmt[*i].xform = Composition(mapping, stmt[*i].xform);
stmt[*i].xform.simplify();
-
+
for (int j = 0; j < stmt[*i].loop_level.size(); j++)
- if (j != level-1 &&
+ if (j != level - 1 &&
stmt[*i].loop_level[j].type == LoopLevelTile &&
stmt[*i].loop_level[j].payload >= level)
stmt[*i].loop_level[j].payload--;
-
- stmt[*i].loop_level.erase(stmt[*i].loop_level.begin()+level-1);
+
+ stmt[*i].loop_level.erase(stmt[*i].loop_level.begin() + level - 1);
}
}
std::set<int> Loop::unroll_extra(int stmt_num, int level, int unroll_amount, int cleanup_split_level) {
- std::set<int> cleanup_stmts = unroll(stmt_num, level, unroll_amount,std::vector< std::vector<std::string> >(), cleanup_split_level);
+ std::set<int> cleanup_stmts = unroll(stmt_num, level, unroll_amount, std::vector<std::vector<std::string> >(),
+ cleanup_split_level);
for (std::set<int>::iterator i = cleanup_stmts.begin(); i != cleanup_stmts.end(); i++)
unroll(*i, level, 0);
-
+
return cleanup_stmts;
}
@@ -71,10 +72,10 @@ void Loop::peel(int stmt_num, int level, int peel_amount) {
throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
if (level <= 0 || level > stmt[stmt_num].loop_level.size())
throw std::invalid_argument("invalid loop level " + to_string(level));
-
+
if (peel_amount == 0)
return;
-
+
std::set<int> subloop = getSubLoopNest(stmt_num, level);
std::vector<Relation> Rs;
for (std::set<int>::iterator i = subloop.begin(); i != subloop.end(); i++) {
@@ -83,7 +84,7 @@ void Loop::peel(int stmt_num, int level, int peel_amount) {
F_And *f_root = f.add_and();
for (int j = 1; j <= level; j++) {
EQ_Handle h = f_root->add_EQ();
- h.update_coef(f.input_var(2*j), 1);
+ h.update_coef(f.input_var(2 * j), 1);
h.update_coef(f.output_var(j), -1);
}
r = Composition(f, r);
@@ -91,7 +92,7 @@ void Loop::peel(int stmt_num, int level, int peel_amount) {
Rs.push_back(r);
}
Relation hull = SimpleHull(Rs);
-
+
if (peel_amount > 0) {
GEQ_Handle bound_eq;
bool found_bound = false;
@@ -120,7 +121,7 @@ void Loop::peel(int stmt_num, int level, int peel_amount) {
}
if (!found_bound)
throw loop_error("can't find lower bound for peeling at loop level " + to_string(level));
-
+
for (int i = 1; i <= peel_amount; i++) {
Relation r(level);
F_Exists *f_exists = r.add_and()->add_exists();
@@ -129,34 +130,33 @@ void Loop::peel(int stmt_num, int level, int peel_amount) {
std::map<Variable_ID, Variable_ID> exists_mapping;
for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++)
switch (cvi.curr_var()->kind()) {
- case Input_Var:
- h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef());
- break;
- case Wildcard_Var: {
- Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping);
- h.update_coef(v, cvi.curr_coef());
- break;
- }
- case Global_Var: {
- Global_Var_ID g = cvi.curr_var()->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = r.get_local(g);
- else
- v = r.get_local(g, cvi.curr_var()->function_of());
- h.update_coef(v, cvi.curr_coef());
- break;
- }
- default:
- assert(false);
+ case Input_Var:
+ h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef());
+ break;
+ case Wildcard_Var: {
+ Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping);
+ h.update_coef(v, cvi.curr_coef());
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = cvi.curr_var()->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = r.get_local(g);
+ else
+ v = r.get_local(g, cvi.curr_var()->function_of());
+ h.update_coef(v, cvi.curr_coef());
+ break;
+ }
+ default:
+ assert(false);
}
h.update_const(bound_eq.get_const() - i);
r.simplify();
-
+
split(stmt_num, level, r);
}
- }
- else { // peel_amount < 0
+ } else { // peel_amount < 0
GEQ_Handle bound_eq;
bool found_bound = false;
for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++)
@@ -184,7 +184,7 @@ void Loop::peel(int stmt_num, int level, int peel_amount) {
}
if (!found_bound)
throw loop_error("can't find upper bound for peeling at loop level " + to_string(level));
-
+
for (int i = 1; i <= -peel_amount; i++) {
Relation r(level);
F_Exists *f_exists = r.add_and()->add_exists();
@@ -193,30 +193,30 @@ void Loop::peel(int stmt_num, int level, int peel_amount) {
std::map<Variable_ID, Variable_ID> exists_mapping;
for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++)
switch (cvi.curr_var()->kind()) {
- case Input_Var:
- h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef());
- break;
- case Wildcard_Var: {
- Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping);
- h.update_coef(v, cvi.curr_coef());
- break;
- }
- case Global_Var: {
- Global_Var_ID g = cvi.curr_var()->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = r.get_local(g);
- else
- v = r.get_local(g, cvi.curr_var()->function_of());
- h.update_coef(v, cvi.curr_coef());
- break;
- }
- default:
- assert(false);
+ case Input_Var:
+ h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef());
+ break;
+ case Wildcard_Var: {
+ Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping);
+ h.update_coef(v, cvi.curr_coef());
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = cvi.curr_var()->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = r.get_local(g);
+ else
+ v = r.get_local(g, cvi.curr_var()->function_of());
+ h.update_coef(v, cvi.curr_coef());
+ break;
+ }
+ default:
+ assert(false);
}
h.update_const(bound_eq.get_const() - i);
r.simplify();
-
+
split(stmt_num, level, r);
}
}
diff --git a/src/transformations/loop_tile.cc b/src/transformations/loop_tile.cc
index 41c3e7f..0a1808b 100644
--- a/src/transformations/loop_tile.cc
+++ b/src/transformations/loop_tile.cc
@@ -14,8 +14,6 @@
using namespace omega;
-
-
void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
TilingMethodType method, int alignment_offset, int alignment_multiple) {
// check for sanity of parameters
@@ -29,30 +27,30 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
throw std::invalid_argument("invalid loop level " + to_string(level));
if (level > stmt[stmt_num].loop_level.size())
throw std::invalid_argument(
- "there is no loop level " + to_string(level) + " for statement "
- + to_string(stmt_num));
+ "there is no loop level " + to_string(level) + " for statement "
+ + to_string(stmt_num));
if (outer_level <= 0 || outer_level > level)
throw std::invalid_argument(
- "invalid tile controlling loop level "
- + to_string(outer_level));
-
+ "invalid tile controlling loop level "
+ + to_string(outer_level));
+
// invalidate saved codegen computation
delete last_compute_cgr_;
last_compute_cgr_ = NULL;
delete last_compute_cg_;
last_compute_cg_ = NULL;
-
+
int dim = 2 * level - 1;
int outer_dim = 2 * outer_level - 1;
std::vector<int> lex = getLexicalOrder(stmt_num);
std::set<int> same_tiled_loop = getStatements(lex, dim - 1);
std::set<int> same_tile_controlling_loop = getStatements(lex,
outer_dim - 1);
-
+
for (std::set<int>::iterator i = same_tiled_loop.begin();
i != same_tiled_loop.end(); i++) {
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end();
+ dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end();
j++) {
if (same_tiled_loop.find(j->first) != same_tiled_loop.end())
for (int k = 0; k < j->second.size(); k++) {
@@ -63,34 +61,34 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
dim2 = stmt[*i].loop_level[dim2].payload - 1;
}
dim2 = stmt[*i].loop_level[dim2].payload;
-
+
if (dv.hasNegative(dim2) && (!dv.quasi)) {
for (int l = outer_level; l < level; l++)
if (stmt[*i].loop_level[l - 1].type
!= LoopLevelTile) {
if (dv.isCarried(
- stmt[*i].loop_level[l - 1].payload)
+ stmt[*i].loop_level[l - 1].payload)
&& dv.hasPositive(
- stmt[*i].loop_level[l - 1].payload))
+ stmt[*i].loop_level[l - 1].payload))
throw loop_error(
- "loop error: Tiling is illegal, dependence violation!");
+ "loop error: Tiling is illegal, dependence violation!");
} else {
-
+
int dim3 = l - 1;
while (stmt[*i].loop_level[l - 1].type
!= LoopLevelTile) {
dim3 =
- stmt[*i].loop_level[l - 1].payload
- - 1;
-
+ stmt[*i].loop_level[l - 1].payload
+ - 1;
+
}
-
+
dim3 = stmt[*i].loop_level[l - 1].payload;
if (dim3 < level - 1)
if (dv.isCarried(dim3)
&& dv.hasPositive(dim3))
throw loop_error(
- "loop error: Tiling is illegal, dependence violation!");
+ "loop error: Tiling is illegal, dependence violation!");
}
}
}
@@ -117,11 +115,11 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
h.update_coef(r.input_var(j), 1);
h.update_coef(r.output_var(j + 2), -1);
}
-
+
stmt[*i].xform = Composition(copy(r), stmt[*i].xform);
}
}
- // normal tiling
+ // normal tiling
else {
std::set<int> private_stmt;
for (std::set<int>::iterator i = same_tile_controlling_loop.begin();
@@ -131,12 +129,12 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
&& overflow.find(*i) != overflow.end())
private_stmt.insert(*i);
}
-
+
// extract the union of the iteration space to be considered
Relation hull;
{
std::vector<Relation> r_list;
-
+
for (std::set<int>::iterator i = same_tile_controlling_loop.begin();
i != same_tile_controlling_loop.end(); i++)
if (private_stmt.find(*i) == private_stmt.end()) {
@@ -150,28 +148,28 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
r.simplify(2, 4);
r_list.push_back(r);
}
-
+
hull = SimpleHull(r_list);
}
-
+
// extract the bound of the dimension to be tiled
Relation bound = get_loop_bound(hull, dim);
if (!bound.has_single_conjunct()) {
// further simplify the bound
hull = Approximate(hull);
bound = get_loop_bound(hull, dim);
-
+
int i = outer_dim - 2;
while (!bound.has_single_conjunct() && i >= 0) {
hull = Project(hull, i + 1, Set_Var);
bound = get_loop_bound(hull, dim);
i -= 2;
}
-
+
if (!bound.has_single_conjunct())
throw loop_error("cannot handle tile bounds");
}
-
+
// separate lower and upper bounds
std::vector<GEQ_Handle> lb_list, ub_list;
{
@@ -186,11 +184,11 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
}
if (lb_list.size() == 0)
throw loop_error(
- "unable to calculate tile controlling loop lower bound");
+ "unable to calculate tile controlling loop lower bound");
if (ub_list.size() == 0)
throw loop_error(
- "unable to calculate tile controlling loop upper bound");
-
+ "unable to calculate tile controlling loop upper bound");
+
// find the simplest lower bound for StridedTile or simplest iteration count for CountedTile
int simplest_lb = 0, simplest_ub = 0;
if (method == StridedTile) {
@@ -199,20 +197,20 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
int cost = 0;
for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- cost += 5;
- break;
- }
- case Global_Var: {
- cost += 2;
- break;
- }
- default:
- cost += 15;
- break;
+ case Input_Var: {
+ cost += 5;
+ break;
+ }
+ case Global_Var: {
+ cost += 2;
+ break;
+ }
+ default:
+ cost += 15;
+ break;
}
}
-
+
if (cost < best_cost) {
best_cost = cost;
simplest_lb = i;
@@ -224,67 +222,67 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
for (int i = 0; i < lb_list.size(); i++)
for (int j = 0; j < ub_list.size(); j++) {
int cost = 0;
-
+
for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- s1[(*ci).var] += (*ci).coef;
- break;
- }
- case Global_Var: {
- s2[(*ci).var] += (*ci).coef;
- break;
- }
- case Exists_Var:
- case Wildcard_Var: {
- s3[(*ci).var] += (*ci).coef;
- break;
- }
- default:
- cost = INT_MAX - 2;
- break;
+ case Input_Var: {
+ s1[(*ci).var] += (*ci).coef;
+ break;
+ }
+ case Global_Var: {
+ s2[(*ci).var] += (*ci).coef;
+ break;
+ }
+ case Exists_Var:
+ case Wildcard_Var: {
+ s3[(*ci).var] += (*ci).coef;
+ break;
+ }
+ default:
+ cost = INT_MAX - 2;
+ break;
}
}
-
+
for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- s1[(*ci).var] += (*ci).coef;
- break;
- }
- case Global_Var: {
- s2[(*ci).var] += (*ci).coef;
- break;
- }
- case Exists_Var:
- case Wildcard_Var: {
- s3[(*ci).var] += (*ci).coef;
- break;
- }
- default:
- if (cost == INT_MAX - 2)
- cost = INT_MAX - 1;
- else
- cost = INT_MAX - 3;
- break;
+ case Input_Var: {
+ s1[(*ci).var] += (*ci).coef;
+ break;
+ }
+ case Global_Var: {
+ s2[(*ci).var] += (*ci).coef;
+ break;
+ }
+ case Exists_Var:
+ case Wildcard_Var: {
+ s3[(*ci).var] += (*ci).coef;
+ break;
+ }
+ default:
+ if (cost == INT_MAX - 2)
+ cost = INT_MAX - 1;
+ else
+ cost = INT_MAX - 3;
+ break;
}
}
-
+
if (cost == 0) {
for (std::map<Variable_ID, coef_t>::iterator k =
- s1.begin(); k != s1.end(); k++)
+ s1.begin(); k != s1.end(); k++)
if ((*k).second != 0)
cost += 5;
for (std::map<Variable_ID, coef_t>::iterator k =
- s2.begin(); k != s2.end(); k++)
+ s2.begin(); k != s2.end(); k++)
if ((*k).second != 0)
cost += 2;
for (std::map<Variable_ID, coef_t>::iterator k =
- s3.begin(); k != s3.end(); k++)
+ s3.begin(); k != s3.end(); k++)
if ((*k).second != 0)
cost += 15;
}
-
+
if (cost < best_cost) {
best_cost = cost;
simplest_lb = i;
@@ -292,7 +290,7 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
}
}
}
-
+
// prepare the new transformation relations
for (std::set<int>::iterator i = same_tile_controlling_loop.begin();
i != same_tile_controlling_loop.end(); i++) {
@@ -303,58 +301,58 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
h.update_coef(r.output_var(j + 1), 1);
h.update_coef(r.input_var(j + 1), -1);
}
-
+
for (int j = outer_dim - 1; j < stmt[*i].xform.n_out(); j++) {
EQ_Handle h = f_root->add_EQ();
h.update_coef(r.output_var(j + 3), 1);
h.update_coef(r.input_var(j + 1), -1);
}
-
+
EQ_Handle h = f_root->add_EQ();
h.update_coef(r.output_var(outer_dim), 1);
h.update_const(-lex[outer_dim - 1]);
-
+
stmt[*i].xform = Composition(r, stmt[*i].xform);
}
-
+
// add tiling constraints.
for (std::set<int>::iterator i = same_tile_controlling_loop.begin();
i != same_tile_controlling_loop.end(); i++) {
F_And *f_super_root = stmt[*i].xform.and_with_and();
F_Exists *f_exists = f_super_root->add_exists();
F_And *f_root = f_exists->add_and();
-
+
// create a lower bound variable for easy formula creation later
Variable_ID aligned_lb;
{
Variable_ID lb = f_exists->declare();
coef_t coef = lb_list[simplest_lb].get_coef(
- bound.set_var(dim + 1));
+ bound.set_var(dim + 1));
if (coef == 1) { // e.g. if i >= m+5, then LB = m+5
EQ_Handle h = f_root->add_EQ();
h.update_coef(lb, 1);
for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- int pos = (*ci).var->get_position();
- if (pos != dim + 1)
- h.update_coef(stmt[*i].xform.output_var(pos),
- (*ci).coef);
- break;
- }
- case Global_Var: {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = stmt[*i].xform.get_local(g);
- else
- v = stmt[*i].xform.get_local(g,
- (*ci).var->function_of());
- h.update_coef(v, (*ci).coef);
- break;
- }
- default:
- throw loop_error("cannot handle tile bounds");
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ if (pos != dim + 1)
+ h.update_coef(stmt[*i].xform.output_var(pos),
+ (*ci).coef);
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = stmt[*i].xform.get_local(g);
+ else
+ v = stmt[*i].xform.get_local(g,
+ (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot handle tile bounds");
}
}
h.update_const(lb_list[simplest_lb].get_const());
@@ -363,40 +361,40 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
GEQ_Handle h2 = f_root->add_GEQ();
for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- int pos = (*ci).var->get_position();
- if (pos == dim + 1) {
- h1.update_coef(lb, (*ci).coef);
- h2.update_coef(lb, -(*ci).coef);
- } else {
- h1.update_coef(stmt[*i].xform.output_var(pos),
- (*ci).coef);
- h2.update_coef(stmt[*i].xform.output_var(pos),
- -(*ci).coef);
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ if (pos == dim + 1) {
+ h1.update_coef(lb, (*ci).coef);
+ h2.update_coef(lb, -(*ci).coef);
+ } else {
+ h1.update_coef(stmt[*i].xform.output_var(pos),
+ (*ci).coef);
+ h2.update_coef(stmt[*i].xform.output_var(pos),
+ -(*ci).coef);
+ }
+ break;
}
- break;
- }
- case Global_Var: {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = stmt[*i].xform.get_local(g);
- else
- v = stmt[*i].xform.get_local(g,
- (*ci).var->function_of());
- h1.update_coef(v, (*ci).coef);
- h2.update_coef(v, -(*ci).coef);
- break;
- }
- default:
- throw loop_error("cannot handle tile bounds");
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = stmt[*i].xform.get_local(g);
+ else
+ v = stmt[*i].xform.get_local(g,
+ (*ci).var->function_of());
+ h1.update_coef(v, (*ci).coef);
+ h2.update_coef(v, -(*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot handle tile bounds");
}
}
h1.update_const(lb_list[simplest_lb].get_const());
h2.update_const(-lb_list[simplest_lb].get_const());
h2.update_const(coef - 1);
}
-
+
Variable_ID offset_lb;
if (alignment_offset == 0)
offset_lb = lb;
@@ -407,17 +405,17 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
h.update_coef(lb, -1);
h.update_const(alignment_offset);
}
-
+
if (alignment_multiple == 1) { // trivial
aligned_lb = offset_lb;
} else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB
aligned_lb = f_exists->declare();
Variable_ID e = f_exists->declare();
-
+
EQ_Handle h = f_root->add_EQ();
h.update_coef(aligned_lb, 1);
h.update_coef(e, -alignment_multiple);
-
+
GEQ_Handle h1 = f_root->add_GEQ();
GEQ_Handle h2 = f_root->add_GEQ();
h1.update_coef(e, alignment_multiple);
@@ -427,37 +425,37 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
h1.update_const(alignment_multiple - 1);
}
}
-
+
// create an upper bound variable for easy formula creation later
Variable_ID ub = f_exists->declare();
{
coef_t coef = -ub_list[simplest_ub].get_coef(
- bound.set_var(dim + 1));
+ bound.set_var(dim + 1));
if (coef == 1) { // e.g. if i <= m+5, then UB = m+5
EQ_Handle h = f_root->add_EQ();
h.update_coef(ub, -1);
for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- int pos = (*ci).var->get_position();
- if (pos != dim + 1)
- h.update_coef(stmt[*i].xform.output_var(pos),
- (*ci).coef);
- break;
- }
- case Global_Var: {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = stmt[*i].xform.get_local(g);
- else
- v = stmt[*i].xform.get_local(g,
- (*ci).var->function_of());
- h.update_coef(v, (*ci).coef);
- break;
- }
- default:
- throw loop_error("cannot handle tile bounds");
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ if (pos != dim + 1)
+ h.update_coef(stmt[*i].xform.output_var(pos),
+ (*ci).coef);
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = stmt[*i].xform.get_local(g);
+ else
+ v = stmt[*i].xform.get_local(g,
+ (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot handle tile bounds");
}
}
h.update_const(ub_list[simplest_ub].get_const());
@@ -466,33 +464,33 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
GEQ_Handle h2 = f_root->add_GEQ();
for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- int pos = (*ci).var->get_position();
- if (pos == dim + 1) {
- h1.update_coef(ub, -(*ci).coef);
- h2.update_coef(ub, (*ci).coef);
- } else {
- h1.update_coef(stmt[*i].xform.output_var(pos),
- -(*ci).coef);
- h2.update_coef(stmt[*i].xform.output_var(pos),
- (*ci).coef);
+ case Input_Var: {
+ int pos = (*ci).var->get_position();
+ if (pos == dim + 1) {
+ h1.update_coef(ub, -(*ci).coef);
+ h2.update_coef(ub, (*ci).coef);
+ } else {
+ h1.update_coef(stmt[*i].xform.output_var(pos),
+ -(*ci).coef);
+ h2.update_coef(stmt[*i].xform.output_var(pos),
+ (*ci).coef);
+ }
+ break;
}
- break;
- }
- case Global_Var: {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = stmt[*i].xform.get_local(g);
- else
- v = stmt[*i].xform.get_local(g,
- (*ci).var->function_of());
- h1.update_coef(v, -(*ci).coef);
- h2.update_coef(v, (*ci).coef);
- break;
- }
- default:
- throw loop_error("cannot handle tile bounds");
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = stmt[*i].xform.get_local(g);
+ else
+ v = stmt[*i].xform.get_local(g,
+ (*ci).var->function_of());
+ h1.update_coef(v, -(*ci).coef);
+ h2.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot handle tile bounds");
}
}
h1.update_const(-ub_list[simplest_ub].get_const());
@@ -500,13 +498,13 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
h1.update_const(coef - 1);
}
}
-
+
// insert tile controlling loop constraints
if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0
Variable_ID e = f_exists->declare();
GEQ_Handle h1 = f_root->add_GEQ();
h1.update_coef(e, 1);
-
+
EQ_Handle h2 = f_root->add_EQ();
h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1);
h2.update_coef(e, -tile_size);
@@ -514,14 +512,14 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
} else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32)
GEQ_Handle h1 = f_root->add_GEQ();
h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1);
-
+
GEQ_Handle h2 = f_root->add_GEQ();
h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1),
-tile_size);
h2.update_coef(aligned_lb, -1);
h2.update_coef(ub, 1);
}
-
+
// special care for private statements like overflow assignment
if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB
GEQ_Handle h = f_root->add_GEQ();
@@ -529,14 +527,14 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
h.update_coef(ub, 1);
}
- // restrict original loop index inside the tile
+ // restrict original loop index inside the tile
else {
if (method == StridedTile) { // e.g. ii <= i < ii + tile_size
GEQ_Handle h1 = f_root->add_GEQ();
h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1);
h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1),
-1);
-
+
GEQ_Handle h2 = f_root->add_GEQ();
h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1);
h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1);
@@ -547,7 +545,7 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
-tile_size);
h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1);
h1.update_coef(aligned_lb, -1);
-
+
GEQ_Handle h2 = f_root->add_GEQ();
h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1),
tile_size);
@@ -558,30 +556,30 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,
}
}
}
-
+
// update loop level information
for (std::set<int>::iterator i = same_tile_controlling_loop.begin();
i != same_tile_controlling_loop.end(); i++) {
for (int j = 1; j <= stmt[*i].loop_level.size(); j++)
switch (stmt[*i].loop_level[j - 1].type) {
- case LoopLevelOriginal:
- break;
- case LoopLevelTile:
- if (stmt[*i].loop_level[j - 1].payload >= outer_level)
- stmt[*i].loop_level[j - 1].payload++;
- break;
- default:
- throw loop_error(
- "unknown loop level type for statement "
- + to_string(*i));
+ case LoopLevelOriginal:
+ break;
+ case LoopLevelTile:
+ if (stmt[*i].loop_level[j - 1].payload >= outer_level)
+ stmt[*i].loop_level[j - 1].payload++;
+ break;
+ default:
+ throw loop_error(
+ "unknown loop level type for statement "
+ + to_string(*i));
}
-
+
LoopLevel ll;
ll.type = LoopLevelTile;
ll.payload = level + 1;
ll.parallel_level = 0;
stmt[*i].loop_level.insert(
- stmt[*i].loop_level.begin() + (outer_level - 1), ll);
+ stmt[*i].loop_level.begin() + (outer_level - 1), ll);
}
}
diff --git a/src/transformations/loop_unroll.cc b/src/transformations/loop_unroll.cc
index 86ffd84..3238d50 100644
--- a/src/transformations/loop_unroll.cc
+++ b/src/transformations/loop_unroll.cc
@@ -23,54 +23,54 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
// check for sanity of parameters
if (unroll_amount < 0)
throw std::invalid_argument(
- "invalid unroll amount " + to_string(unroll_amount));
+ "invalid unroll amount " + to_string(unroll_amount));
if (stmt_num < 0 || stmt_num >= stmt.size())
throw std::invalid_argument("invalid statement " + to_string(stmt_num));
if (level <= 0 || level > stmt[stmt_num].loop_level.size())
throw std::invalid_argument("invalid loop level " + to_string(level));
-
+
if (cleanup_split_level == 0)
cleanup_split_level = level;
if (cleanup_split_level > level)
throw std::invalid_argument(
- "cleanup code must be split at or outside the unrolled loop level "
- + to_string(level));
+ "cleanup code must be split at or outside the unrolled loop level "
+ + to_string(level));
if (cleanup_split_level <= 0)
throw std::invalid_argument(
- "invalid split loop level " + to_string(cleanup_split_level));
-
+ "invalid split loop level " + to_string(cleanup_split_level));
+
// invalidate saved codegen computation
delete last_compute_cgr_;
last_compute_cgr_ = NULL;
delete last_compute_cg_;
last_compute_cg_ = NULL;
-
+
int dim = 2 * level - 1;
std::vector<int> lex = getLexicalOrder(stmt_num);
std::set<int> same_loop = getStatements(lex, dim - 1);
-
+
// nothing to do
if (unroll_amount == 1)
return std::set<int>();
-
+
for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end();
i++) {
std::vector<std::pair<int, DependenceVector> > D;
int n = stmt[*i].xform.n_out();
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end();
+ dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end();
j++) {
if (same_loop.find(j->first) != same_loop.end())
for (int k = 0; k < j->second.size(); k++) {
DependenceVector dv = j->second[k];
int dim2 = level - 1;
if (dv.type != DEP_CONTROL) {
-
+
while (stmt[*i].loop_level[dim2].type == LoopLevelTile) {
dim2 = stmt[*i].loop_level[dim2].payload - 1;
}
dim2 = stmt[*i].loop_level[dim2].payload;
-
+
/*if (dv.isCarried(dim2)
&& (dv.hasNegative(dim2) && !dv.quasi))
throw loop_error(
@@ -82,11 +82,11 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
"loop error: Unrolling is illegal, dependence violation!");
*/
bool safe = false;
-
+
if (dv.isCarried(dim2) && dv.hasPositive(dim2)) {
if (dv.quasi)
throw loop_error(
- "loop error: a quasi dependence with a positive carried distance");
+ "loop error: a quasi dependence with a positive carried distance");
if (!dv.quasi) {
if (dv.lbounds[dim2] != posInfinity) {
//if (dv.lbounds[dim2] != negInfinity)
@@ -102,33 +102,33 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
} else
safe = true;
}*/
-
+
if (!safe) {
for (int l = level + 1; l <= (n - 1) / 2; l++) {
int dim3 = l - 1;
-
+
if (stmt[*i].loop_level[dim3].type
!= LoopLevelTile)
dim3 =
- stmt[*i].loop_level[dim3].payload;
+ stmt[*i].loop_level[dim3].payload;
else {
while (stmt[*i].loop_level[dim3].type
== LoopLevelTile) {
dim3 =
- stmt[*i].loop_level[dim3].payload
- - 1;
+ stmt[*i].loop_level[dim3].payload
+ - 1;
}
dim3 =
- stmt[*i].loop_level[dim3].payload;
+ stmt[*i].loop_level[dim3].payload;
}
-
+
if (dim3 > dim2) {
-
+
if (dv.hasPositive(dim3))
break;
else if (dv.hasNegative(dim3))
throw loop_error(
- "loop error: Unrolling is illegal, dependence violation!");
+ "loop error: Unrolling is illegal, dependence violation!");
}
}
}
@@ -153,7 +153,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
hull = Intersection(hull,
omega::Range(Restrict_Domain(mapping, copy(stmt[*i].IS))));
hull.simplify(2, 4);
-
+
}
}
for (int i = 1; i <= level; i++) {
@@ -161,7 +161,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
hull.name_set_var(i, name);
}
hull.setup_names();
-
+
// extract the exact loop bound of the dimension to be unrolled
if (is_single_loop_iteration(hull, level, this->known))
return std::set<int>();
@@ -169,7 +169,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
if (!bound.has_single_conjunct() || !bound.is_satisfiable()
|| bound.is_tautology())
throw loop_error("unable to extract loop bound for unrolling");
-
+
// extract the loop stride
coef_t stride;
std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound,
@@ -178,9 +178,9 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
stride = 1;
else
stride = abs(result.first.get_coef(result.second))
- / gcd(abs(result.first.get_coef(result.second)),
- abs(result.first.get_coef(bound.set_var(level))));
-
+ / gcd(abs(result.first.get_coef(result.second)),
+ abs(result.first.get_coef(bound.set_var(level))));
+
// separate lower and upper bounds
std::vector<GEQ_Handle> lb_list, ub_list;
{
@@ -193,17 +193,17 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
lb_list.push_back(*gi);
}
}
-
+
// simplify overflow expression for each pair of upper and lower bounds
std::vector<std::vector<std::map<Variable_ID, int> > > overflow_table(
- lb_list.size(),
- std::vector<std::map<Variable_ID, int> >(ub_list.size(),
- std::map<Variable_ID, int>()));
+ lb_list.size(),
+ std::vector<std::map<Variable_ID, int> >(ub_list.size(),
+ std::map<Variable_ID, int>()));
bool is_overflow_simplifiable = true;
for (int i = 0; i < lb_list.size(); i++) {
if (!is_overflow_simplifiable)
break;
-
+
for (int j = 0; j < ub_list.size(); j++) {
// lower bound or upper bound has non-unit coefficient, can't simplify
if (ub_list[j].get_coef(bound.set_var(level)) != -1
@@ -211,81 +211,81 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
is_overflow_simplifiable = false;
break;
}
-
+
for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- if ((*ci).var != bound.set_var(level))
+ case Input_Var: {
+ if ((*ci).var != bound.set_var(level))
+ overflow_table[i][j][(*ci).var] += (*ci).coef;
+
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = bound.get_local(g);
+ else
+ v = bound.get_local(g, (*ci).var->function_of());
overflow_table[i][j][(*ci).var] += (*ci).coef;
-
- break;
- }
- case Global_Var: {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = bound.get_local(g);
- else
- v = bound.get_local(g, (*ci).var->function_of());
- overflow_table[i][j][(*ci).var] += (*ci).coef;
- break;
- }
- default:
- throw loop_error("failed to calculate overflow amount");
+ break;
+ }
+ default:
+ throw loop_error("failed to calculate overflow amount");
}
}
overflow_table[i][j][NULL] += ub_list[j].get_const();
-
+
for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) {
switch ((*ci).var->kind()) {
- case Input_Var: {
- if ((*ci).var != bound.set_var(level)) {
+ case Input_Var: {
+ if ((*ci).var != bound.set_var(level)) {
+ overflow_table[i][j][(*ci).var] += (*ci).coef;
+ if (overflow_table[i][j][(*ci).var] == 0)
+ overflow_table[i][j].erase(
+ overflow_table[i][j].find((*ci).var));
+ }
+ break;
+ }
+ case Global_Var: {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = bound.get_local(g);
+ else
+ v = bound.get_local(g, (*ci).var->function_of());
overflow_table[i][j][(*ci).var] += (*ci).coef;
if (overflow_table[i][j][(*ci).var] == 0)
overflow_table[i][j].erase(
- overflow_table[i][j].find((*ci).var));
+ overflow_table[i][j].find((*ci).var));
+ break;
}
- break;
- }
- case Global_Var: {
- Global_Var_ID g = (*ci).var->get_global_var();
- Variable_ID v;
- if (g->arity() == 0)
- v = bound.get_local(g);
- else
- v = bound.get_local(g, (*ci).var->function_of());
- overflow_table[i][j][(*ci).var] += (*ci).coef;
- if (overflow_table[i][j][(*ci).var] == 0)
- overflow_table[i][j].erase(
- overflow_table[i][j].find((*ci).var));
- break;
- }
- default:
- throw loop_error("failed to calculate overflow amount");
+ default:
+ throw loop_error("failed to calculate overflow amount");
}
}
overflow_table[i][j][NULL] += lb_list[i].get_const();
-
+
overflow_table[i][j][NULL] += stride;
if (unroll_amount == 0
|| (overflow_table[i][j].size() == 1
&& overflow_table[i][j][NULL] / stride
- < unroll_amount))
+ < unroll_amount))
unroll_amount = overflow_table[i][j][NULL] / stride;
}
}
-
+
// loop iteration count can't be determined, bail out gracefully
if (unroll_amount == 0)
return std::set<int>();
-
+
// further simply overflow calculation using coefficients' modular
if (is_overflow_simplifiable) {
for (int i = 0; i < lb_list.size(); i++)
for (int j = 0; j < ub_list.size(); j++)
if (stride == 1) {
for (std::map<Variable_ID, int>::iterator k =
- overflow_table[i][j].begin();
+ overflow_table[i][j].begin();
k != overflow_table[i][j].end();)
if ((*k).first != NULL) {
int t = int_mod_hat((*k).second, unroll_amount);
@@ -304,20 +304,20 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
}
} else
k++;
-
+
overflow_table[i][j][NULL] = int_mod_hat(
- overflow_table[i][j][NULL], unroll_amount);
-
+ overflow_table[i][j][NULL], unroll_amount);
+
// Since we don't have MODULO instruction in SUIF yet (only MOD),
// make all coef positive in the final formula
for (std::map<Variable_ID, int>::iterator k =
- overflow_table[i][j].begin();
+ overflow_table[i][j].begin();
k != overflow_table[i][j].end(); k++)
if ((*k).second < 0)
(*k).second += unroll_amount;
}
}
-
+
// build overflow statement
CG_outputBuilder *ocg = ir->builder();
CG_outputRepr *overflow_code = NULL;
@@ -331,17 +331,17 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
// upper splitting condition
GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]);
h.update_const(
- ((overflow_table[0][i][NULL] / stride) % unroll_amount)
- * -stride);
+ ((overflow_table[0][i][NULL] / stride) % unroll_amount)
+ * -stride);
} else {
// upper splitting condition
std::string over_name = overflow_var_name_prefix
- + to_string(overflow_var_name_counter++);
+ + to_string(overflow_var_name_counter++);
Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name);
over_var_list.push_back(over_free_var);
GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]);
h.update_coef(cond_upper.get_local(over_free_var), -stride);
-
+
// insert constraint 0 <= overflow < unroll_amount
Variable_ID v = overflow_constraint.get_local(over_free_var);
GEQ_Handle h1 = overflow_constraint_root->add_GEQ();
@@ -349,20 +349,20 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
GEQ_Handle h2 = overflow_constraint_root->add_GEQ();
h2.update_coef(v, -1);
h2.update_const(unroll_amount - 1);
-
+
// create overflow assignment
bound.setup_names(); // hack to fix omega relation variable names issue
CG_outputRepr *rhs = NULL;
bool is_split_illegal = false;
for (std::map<Variable_ID, int>::iterator j =
- overflow_table[0][i].begin();
+ overflow_table[0][i].begin();
j != overflow_table[0][i].end(); j++)
if ((*j).first != NULL) {
if ((*j).first->kind() == Input_Var
&& (*j).first->get_position()
- >= cleanup_split_level)
+ >= cleanup_split_level)
is_split_illegal = true;
-
+
CG_outputRepr *t = ocg->CreateIdent((*j).first->name());
if ((*j).second != 1)
t = ocg->CreateTimes(ocg->CreateInt((*j).second),
@@ -370,20 +370,20 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
rhs = ocg->CreatePlus(rhs, t);
} else if ((*j).second != 0)
rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second));
-
+
if (is_split_illegal) {
rhs->clear();
delete rhs;
throw loop_error(
- "cannot split cleanup code at loop level "
- + to_string(cleanup_split_level)
- + " due to overflow variable data dependence");
+ "cannot split cleanup code at loop level "
+ + to_string(cleanup_split_level)
+ + " due to overflow variable data dependence");
}
-
+
if (stride != 1)
rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride));
rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount));
-
+
CG_outputRepr *lhs = ocg->CreateIdent(over_name);
init_code = ocg->StmtListAppend(init_code,
ocg->CreateAssignment(0, lhs, ocg->CreateInt(0)));
@@ -392,12 +392,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
ocg->CreateAssignment(0, lhs, rhs));
}
}
-
+
// lower splitting condition
GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]);
} else if (is_overflow_simplifiable && ub_list.size() == 1) {
for (int i = 0; i < lb_list.size(); i++) {
-
+
if (overflow_table[i][0].size() == 1) {
// lower splitting condition
GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]);
@@ -405,12 +405,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
} else {
// lower splitting condition
std::string over_name = overflow_var_name_prefix
- + to_string(overflow_var_name_counter++);
+ + to_string(overflow_var_name_counter++);
Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name);
over_var_list.push_back(over_free_var);
GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]);
h.update_coef(cond_lower.get_local(over_free_var), -stride);
-
+
// insert constraint 0 <= overflow < unroll_amount
Variable_ID v = overflow_constraint.get_local(over_free_var);
GEQ_Handle h1 = overflow_constraint_root->add_GEQ();
@@ -418,12 +418,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
GEQ_Handle h2 = overflow_constraint_root->add_GEQ();
h2.update_coef(v, -1);
h2.update_const(unroll_amount - 1);
-
+
// create overflow assignment
bound.setup_names(); // hack to fix omega relation variable names issue
CG_outputRepr *rhs = NULL;
for (std::map<Variable_ID, int>::iterator j =
- overflow_table[0][i].begin();
+ overflow_table[0][i].begin();
j != overflow_table[0][i].end(); j++)
if ((*j).first != NULL) {
CG_outputRepr *t = ocg->CreateIdent((*j).first->name());
@@ -433,11 +433,11 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
rhs = ocg->CreatePlus(rhs, t);
} else if ((*j).second != 0)
rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second));
-
+
if (stride != 1)
rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride));
rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount));
-
+
CG_outputRepr *lhs = ocg->CreateIdent(over_name);
init_code = ocg->StmtListAppend(init_code,
ocg->CreateAssignment(0, lhs, ocg->CreateInt(0)));
@@ -446,61 +446,59 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
ocg->CreateAssignment(0, lhs, rhs));
}
}
-
+
// upper splitting condition
GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]);
} else {
std::string over_name = overflow_var_name_prefix
- + to_string(overflow_var_name_counter++);
+ + to_string(overflow_var_name_counter++);
Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name);
over_var_list.push_back(over_free_var);
-
+
std::vector<CG_outputRepr *> lb_repr_list, ub_repr_list;
for (int i = 0; i < lb_list.size(); i++) {
lb_repr_list.push_back(
- output_lower_bound_repr(ocg,
- lb_list[i],
- bound.set_var(dim + 1), result.first, result.second,
- bound, Relation::True(bound.n_set()),
- std::vector<std::pair<CG_outputRepr *, int> >(
- bound.n_set(),
- std::make_pair(
- static_cast<CG_outputRepr *>(NULL),
- 0)),
- uninterpreted_symbols[stmt_num]));
+ output_lower_bound_repr(ocg,
+ lb_list[i],
+ bound.set_var(dim + 1), result.first, result.second,
+ bound, Relation::True(bound.n_set()),
+ std::vector<std::pair<CG_outputRepr *, int> >(
+ bound.n_set(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL),
+ 0)),
+ uninterpreted_symbols[stmt_num]));
GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]);
}
for (int i = 0; i < ub_list.size(); i++) {
ub_repr_list.push_back(
- output_upper_bound_repr(ocg, ub_list[i],
- bound.set_var(dim + 1), bound,
- std::vector<std::pair<CG_outputRepr *, int> >(
- bound.n_set(),
- std::make_pair(
- static_cast<CG_outputRepr *>(NULL),
- 0)),
- uninterpreted_symbols[stmt_num]));
+ output_upper_bound_repr(ocg, ub_list[i],
+ bound.set_var(dim + 1), bound,
+ std::vector<std::pair<CG_outputRepr *, int> >(
+ bound.n_set(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL),
+ 0)),
+ uninterpreted_symbols[stmt_num]));
GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]);
h.update_coef(cond_upper.get_local(over_free_var), -stride);
}
-
- CG_outputRepr *lbRepr, *ubRepr;
+
+ CG_outputRepr *lbRepr, *ubRepr;
if (lb_repr_list.size() > 1) {
//fprintf(stderr, "loop_unroll.cc createInvoke( max )\n");
lbRepr = ocg->CreateInvoke("max", lb_repr_list);
- }
- else if (lb_repr_list.size() == 1) {
+ } else if (lb_repr_list.size() == 1) {
lbRepr = lb_repr_list[0];
}
-
+
if (ub_repr_list.size() > 1) {
//fprintf(stderr, "loop_unroll.cc createInvoke( min )\n");
ubRepr = ocg->CreateInvoke("min", ub_repr_list);
- }
- else if (ub_repr_list.size() == 1) {
+ } else if (ub_repr_list.size() == 1) {
ubRepr = ub_repr_list[0];
}
-
+
// create overflow assignment
CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr),
ocg->CreateInt(1));
@@ -512,7 +510,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
ocg->CreateAssignment(0, lhs, ocg->CreateInt(0)));
lhs = ocg->CreateIdent(over_name);
overflow_code = ocg->CreateAssignment(0, lhs, rhs);
-
+
// insert constraint 0 <= overflow < unroll_amount
Variable_ID v = overflow_constraint.get_local(over_free_var);
GEQ_Handle h1 = overflow_constraint_root->add_GEQ();
@@ -521,7 +519,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
h2.update_coef(v, -1);
h2.update_const(unroll_amount - 1);
}
-
+
// insert overflow statement
int overflow_stmt_num = -1;
if (overflow_code != NULL) {
@@ -537,7 +535,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
for (int i = 1; i < cleanup_split_level; i++)
overflow_IS.name_set_var(i, hull.set_var(i)->name());
overflow_IS.setup_names();
-
+
// build dumb transformation relation for overflow statement
Relation overflow_xform(cleanup_split_level - 1,
2 * (cleanup_split_level - 1) + 1);
@@ -546,20 +544,20 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
EQ_Handle h = f_root->add_EQ();
h.update_coef(overflow_xform.output_var(2 * i), 1);
h.update_coef(overflow_xform.input_var(i), -1);
-
+
h = f_root->add_EQ();
h.update_coef(overflow_xform.output_var(2 * i - 1), 1);
h.update_const(-lex[2 * i - 2]);
}
EQ_Handle h = f_root->add_EQ();
h.update_coef(
- overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1),
- 1);
+ overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1),
+ 1);
h.update_const(-lex[2 * (cleanup_split_level - 1)]);
-
+
shiftLexicalOrder(lex, 2 * cleanup_split_level - 2, 1);
Statement overflow_stmt;
-
+
overflow_stmt.code = overflow_code;
overflow_stmt.IS = overflow_IS;
overflow_stmt.xform = overflow_xform;
@@ -567,18 +565,18 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
overflow_stmt.ir_stmt_node = NULL;
for (int i = 0; i < level - 1; i++) {
overflow_stmt.loop_level[i].type =
- stmt[stmt_num].loop_level[i].type;
+ stmt[stmt_num].loop_level[i].type;
if (stmt[stmt_num].loop_level[i].type == LoopLevelTile
&& stmt[stmt_num].loop_level[i].payload >= level)
overflow_stmt.loop_level[i].payload = -1;
else
overflow_stmt.loop_level[i].payload =
- stmt[stmt_num].loop_level[i].payload;
+ stmt[stmt_num].loop_level[i].payload;
overflow_stmt.loop_level[i].parallel_level =
- stmt[stmt_num].loop_level[i].parallel_level;
+ stmt[stmt_num].loop_level[i].parallel_level;
}
-
- fprintf(stderr, "loop_unroll.cc L581 adding stmt %d\n", stmt.size());
+
+ fprintf(stderr, "loop_unroll.cc L581 adding stmt %d\n", stmt.size());
stmt.push_back(overflow_stmt);
uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]);
@@ -586,12 +584,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
dep.insert();
overflow_stmt_num = stmt.size() - 1;
overflow[overflow_stmt_num] = over_var_list;
-
+
// update the global known information on overflow variable
this->known = Intersection(this->known,
Extend_Set(copy(overflow_constraint),
this->known.n_set() - overflow_constraint.n_set()));
-
+
// update dependence graph
DependenceVector dv;
dv.type = DEP_CONTROL;
@@ -628,12 +626,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
dep.connect(overflow_stmt_num, overflow_stmt_num, dv);
}
}
-
+
// split the loop so it can be fully unrolled
std::set<int> new_stmts = split(stmt_num, cleanup_split_level, cond_upper);
std::set<int> new_stmts2 = split(stmt_num, cleanup_split_level, cond_lower);
new_stmts.insert(new_stmts2.begin(), new_stmts2.end());
-
+
// check if unrolled statements can be trivially lumped together as one statement
bool can_be_lumped = true;
if (can_be_lumped) {
@@ -649,7 +647,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
if (!(stmt[*i].loop_level[j].type
== stmt[stmt_num].loop_level[j].type
&& stmt[*i].loop_level[j].payload
- == stmt[stmt_num].loop_level[j].payload)) {
+ == stmt[stmt_num].loop_level[j].payload)) {
can_be_lumped = false;
break;
}
@@ -690,7 +688,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
for (std::set<int>::iterator i = same_loop.begin();
i != same_loop.end(); i++) {
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[*i].second.begin();
+ dep.vertex[*i].second.begin();
j != dep.vertex[*i].second.end(); j++)
if (same_loop.find(j->first) != same_loop.end()) {
for (int k = 0; k < j->second.size(); k++)
@@ -706,38 +704,38 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
break;
}
}
-
+
// insert unrolled statements
int old_num_stmt = stmt.size();
if (!can_be_lumped) {
std::map<int, std::vector<int> > what_stmt_num;
-
+
for (int j = 1; j < unroll_amount; j++) {
for (std::set<int>::iterator i = same_loop.begin();
i != same_loop.end(); i++) {
Statement new_stmt;
-
+
std::vector<std::string> loop_vars;
std::vector<CG_outputRepr *> subs;
loop_vars.push_back(stmt[*i].IS.set_var(level)->name());
subs.push_back(
- ocg->CreatePlus(
- ocg->CreateIdent(
- stmt[*i].IS.set_var(level)->name()),
- ocg->CreateInt(j * stride)));
+ ocg->CreatePlus(
+ ocg->CreateIdent(
+ stmt[*i].IS.set_var(level)->name()),
+ ocg->CreateInt(j * stride)));
new_stmt.code = ocg->CreateSubstitutedStmt(0,
stmt[*i].code->clone(), loop_vars, subs);
-
+
new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride);
add_loop_stride(new_stmt.IS, bound, level - 1,
unroll_amount * stride);
-
+
new_stmt.xform = copy(stmt[*i].xform);
-
+
new_stmt.loop_level = stmt[*i].loop_level;
new_stmt.ir_stmt_node = NULL;
- fprintf(stderr, "loop_unroll.cc L740 adding stmt %d\n", stmt.size());
+ fprintf(stderr, "loop_unroll.cc L740 adding stmt %d\n", stmt.size());
stmt.push_back(new_stmt);
uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]);
@@ -750,16 +748,16 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
i != same_loop.end(); i++)
add_loop_stride(stmt[*i].IS, bound, level - 1,
unroll_amount * stride);
-
+
// update dependence graph
if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) {
int dep_dim = stmt[stmt_num].loop_level[level - 1].payload;
int new_stride = unroll_amount * stride;
for (int i = 0; i < old_num_stmt; i++) {
std::vector<std::pair<int, DependenceVector> > D;
-
+
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin();
+ dep.vertex[i].second.begin();
j != dep.vertex[i].second.end();) {
if (same_loop.find(i) != same_loop.end()) {
if (same_loop.find(j->first) != same_loop.end()) {
@@ -772,7 +770,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
kk++)
if (what_stmt_num[i][kk] != -1
&& what_stmt_num[j->first][kk]
- != -1)
+ != -1)
dep.connect(what_stmt_num[i][kk],
what_stmt_num[j->first][kk],
dv);
@@ -782,35 +780,35 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
if (ub == lb
&& int_mod(lb,
static_cast<coef_t>(new_stride))
- == 0) {
+ == 0) {
D.push_back(
- std::make_pair(j->first, dv));
+ std::make_pair(j->first, dv));
for (int kk = 0; kk < unroll_amount - 1;
kk++)
if (what_stmt_num[i][kk] != -1
&& what_stmt_num[j->first][kk]
- != -1)
+ != -1)
dep.connect(
- what_stmt_num[i][kk],
- what_stmt_num[j->first][kk],
- dv);
+ what_stmt_num[i][kk],
+ what_stmt_num[j->first][kk],
+ dv);
} else if (lb == -posInfinity
&& ub == posInfinity) {
D.push_back(
- std::make_pair(j->first, dv));
+ std::make_pair(j->first, dv));
for (int kk = 0; kk < unroll_amount;
kk++)
if (kk == 0)
D.push_back(
- std::make_pair(j->first,
- dv));
+ std::make_pair(j->first,
+ dv));
else if (what_stmt_num[j->first][kk
- 1] != -1)
D.push_back(
- std::make_pair(
- what_stmt_num[j->first][kk
- - 1],
- dv));
+ std::make_pair(
+ what_stmt_num[j->first][kk
+ - 1],
+ dv));
for (int t = 0; t < unroll_amount - 1;
t++)
if (what_stmt_num[i][t] != -1)
@@ -819,15 +817,15 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
kk++)
if (kk == 0)
dep.connect(
- what_stmt_num[i][t],
- j->first, dv);
+ what_stmt_num[i][t],
+ j->first, dv);
else if (what_stmt_num[j->first][kk
- 1] != -1)
dep.connect(
- what_stmt_num[i][t],
- what_stmt_num[j->first][kk
- - 1],
- dv);
+ what_stmt_num[i][t],
+ what_stmt_num[j->first][kk
+ - 1],
+ dv);
} else {
for (int kk = 0; kk < unroll_amount;
kk++) {
@@ -836,49 +834,49 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
< int_mod(lb,
static_cast<coef_t>(new_stride)))
dv.lbounds[dep_dim] =
- floor(
- static_cast<double>(lb)
- / new_stride)
- * new_stride
- + new_stride;
+ floor(
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride
+ + new_stride;
else
dv.lbounds[dep_dim] =
- floor(
- static_cast<double>(lb)
- / new_stride)
- * new_stride;
+ floor(
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride;
}
if (ub != posInfinity) {
if (kk * stride
> int_mod(ub,
static_cast<coef_t>(new_stride)))
dv.ubounds[dep_dim] =
- floor(
- static_cast<double>(ub)
- / new_stride)
- * new_stride
- - new_stride;
+ floor(
+ static_cast<double>(ub)
+ / new_stride)
+ * new_stride
+ - new_stride;
else
dv.ubounds[dep_dim] =
- floor(
- static_cast<double>(ub)
- / new_stride)
- * new_stride;
+ floor(
+ static_cast<double>(ub)
+ / new_stride)
+ * new_stride;
}
if (dv.ubounds[dep_dim]
>= dv.lbounds[dep_dim]) {
if (kk == 0)
D.push_back(
- std::make_pair(
- j->first,
- dv));
+ std::make_pair(
+ j->first,
+ dv));
else if (what_stmt_num[j->first][kk
- 1] != -1)
D.push_back(
- std::make_pair(
- what_stmt_num[j->first][kk
- - 1],
- dv));
+ std::make_pair(
+ what_stmt_num[j->first][kk
+ - 1],
+ dv));
}
}
for (int t = 0; t < unroll_amount - 1;
@@ -890,80 +888,80 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
if (lb != -posInfinity) {
if (kk * stride
< int_mod(
- lb + t
- + 1,
- static_cast<coef_t>(new_stride)))
+ lb + t
+ + 1,
+ static_cast<coef_t>(new_stride)))
dv.lbounds[dep_dim] =
- floor(
- static_cast<double>(lb
- + (t
- + 1)
- * stride)
- / new_stride)
- * new_stride
- + new_stride;
+ floor(
+ static_cast<double>(lb
+ + (t
+ + 1)
+ * stride)
+ / new_stride)
+ * new_stride
+ + new_stride;
else
dv.lbounds[dep_dim] =
- floor(
- static_cast<double>(lb
- + (t
- + 1)
- * stride)
- / new_stride)
- * new_stride;
+ floor(
+ static_cast<double>(lb
+ + (t
+ + 1)
+ * stride)
+ / new_stride)
+ * new_stride;
}
if (ub != posInfinity) {
if (kk * stride
> int_mod(
- ub + t
- + 1,
- static_cast<coef_t>(new_stride)))
+ ub + t
+ + 1,
+ static_cast<coef_t>(new_stride)))
dv.ubounds[dep_dim] =
- floor(
- static_cast<double>(ub
- + (t
- + 1)
- * stride)
- / new_stride)
- * new_stride
- - new_stride;
+ floor(
+ static_cast<double>(ub
+ + (t
+ + 1)
+ * stride)
+ / new_stride)
+ * new_stride
+ - new_stride;
else
dv.ubounds[dep_dim] =
- floor(
- static_cast<double>(ub
- + (t
- + 1)
- * stride)
- / new_stride)
- * new_stride;
+ floor(
+ static_cast<double>(ub
+ + (t
+ + 1)
+ * stride)
+ / new_stride)
+ * new_stride;
}
if (dv.ubounds[dep_dim]
>= dv.lbounds[dep_dim]) {
if (kk == 0)
dep.connect(
- what_stmt_num[i][t],
- j->first,
- dv);
+ what_stmt_num[i][t],
+ j->first,
+ dv);
else if (what_stmt_num[j->first][kk
- 1] != -1)
dep.connect(
- what_stmt_num[i][t],
- what_stmt_num[j->first][kk
- - 1],
- dv);
+ what_stmt_num[i][t],
+ what_stmt_num[j->first][kk
+ - 1],
+ dv);
}
}
}
}
}
-
+
dep.vertex[i].second.erase(j++);
} else {
for (int kk = 0; kk < unroll_amount - 1; kk++)
if (what_stmt_num[i][kk] != -1)
dep.connect(what_stmt_num[i][kk], j->first,
j->second);
-
+
j++;
}
} else {
@@ -972,26 +970,26 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
for (int kk = 0; kk < unroll_amount - 1; kk++)
if (what_stmt_num[j->first][kk] != -1)
D.push_back(
- std::make_pair(
- what_stmt_num[j->first][kk],
- j->second[k]));
+ std::make_pair(
+ what_stmt_num[j->first][kk],
+ j->second[k]));
j++;
}
}
-
+
for (int j = 0; j < D.size(); j++)
dep.connect(i, D[j].first, D[j].second);
}
}
-
+
// reset lexical order for the unrolled loop body
std::set<int> new_same_loop;
-
+
int count = 0;
-
+
for (std::map<int, std::vector<int> >::iterator i =
- what_stmt_num.begin(); i != what_stmt_num.end(); i++) {
-
+ what_stmt_num.begin(); i != what_stmt_num.end(); i++) {
+
new_same_loop.insert(i->first);
for (int k = dim + 1; k < stmt[i->first].xform.n_out(); k += 2)
assign_const(stmt[i->first].xform, k,
@@ -1001,11 +999,11 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
for (int j = 0; j < i->second.size(); j++) {
new_same_loop.insert(i->second[j]);
for (int k = dim + 1; k < stmt[i->second[j]].xform.n_out(); k +=
- 2)
+ 2)
assign_const(stmt[i->second[j]].xform, k,
get_const(
- stmt[(what_stmt_num.begin())->first].xform,
- k, Output_Var) + count);
+ stmt[(what_stmt_num.begin())->first].xform,
+ k, Output_Var) + count);
count++;
}
}
@@ -1015,20 +1013,20 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
i != same_loop.end(); i++)
add_loop_stride(stmt[*i].IS, bound, level - 1,
unroll_amount * stride);
-
+
int max_level = stmt[stmt_num].loop_level.size();
std::vector<std::pair<int, int> > stmt_order;
for (std::set<int>::iterator i = same_loop.begin();
i != same_loop.end(); i++)
stmt_order.push_back(
- std::make_pair(
- get_const(stmt[*i].xform, 2 * max_level,
- Output_Var), *i));
+ std::make_pair(
+ get_const(stmt[*i].xform, 2 * max_level,
+ Output_Var), *i));
sort(stmt_order.begin(), stmt_order.end());
-
+
Statement new_stmt;
new_stmt.code = NULL;
- for (int j = 1; j < unroll_amount; j++) {
+ for (int j = 1; j < unroll_amount; j++) {
for (int i = 0; i < stmt_order.size(); i++) {
std::vector<std::string> loop_vars;
std::vector<CG_outputRepr *> subs;
@@ -1036,12 +1034,12 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
//fprintf(stderr, "loop_unroll.cc, will replace '%s with '%s+%d' ??\n",
// stmt[stmt_order[i].second].IS.set_var(level)->name().c_str(),
// stmt[stmt_order[i].second].IS.set_var(level)->name().c_str(), j * stride);
-
+
loop_vars.push_back(
- stmt[stmt_order[i].second].IS.set_var(level)->name());
+ stmt[stmt_order[i].second].IS.set_var(level)->name());
subs.push_back(
- ocg->CreatePlus(ocg->CreateIdent(stmt[stmt_order[i].second].IS.set_var(level)->name()),
- ocg->CreateInt(j * stride))); // BUG HERE
+ ocg->CreatePlus(ocg->CreateIdent(stmt[stmt_order[i].second].IS.set_var(level)->name()),
+ ocg->CreateInt(j * stride))); // BUG HERE
//fprintf(stderr, "loop_unroll.cc subs now has %d parts\n", subs.size());
//for (int k=0; k< subs.size(); k++) //fprintf(stderr, "subs[%d] = 0x%x\n", k, subs[k]);
@@ -1052,7 +1050,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
CG_outputRepr *code = ocg->CreateSubstitutedStmt(0,
- stmt[stmt_order[i].second].code->clone(),
+ stmt[stmt_order[i].second].code->clone(),
loop_vars,
subs);
@@ -1069,7 +1067,7 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
}
}
-
+
//fprintf(stderr, "new_stmt.IS = \n");
@@ -1084,13 +1082,13 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
if (stmt[stmt_num].has_inspector) fprintf(stderr, "OLD STMT HAS INSPECTOR\n");
else fprintf(stderr, "OLD STMT DOES NOT HAVE INSPECTOR\n");
- fprintf(stderr, "loop_unroll.cc L1083 adding stmt %d\n", stmt.size());
+ fprintf(stderr, "loop_unroll.cc L1083 adding stmt %d\n", stmt.size());
stmt.push_back(new_stmt);
uninterpreted_symbols.push_back(uninterpreted_symbols[stmt_num]);
uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[stmt_num]);
dep.insert();
-
+
//fprintf(stderr, "update dependence graph\n");
// update dependence graph
if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) {
@@ -1098,14 +1096,14 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
int new_stride = unroll_amount * stride;
for (int i = 0; i < old_num_stmt; i++) {
std::vector<std::pair<int, std::vector<DependenceVector> > > D;
-
+
for (DependenceGraph::EdgeList::iterator j =
- dep.vertex[i].second.begin();
+ dep.vertex[i].second.begin();
j != dep.vertex[i].second.end();) {
if (same_loop.find(i) != same_loop.end()) {
if (same_loop.find(j->first) != same_loop.end()) {
std::vector<DependenceVector> dvs11, dvs12, dvs22,
- dvs21;
+ dvs21;
for (int k = 0; k < j->second.size(); k++) {
DependenceVector dv = j->second[k];
if (dv.type == DEP_CONTROL
@@ -1115,71 +1113,71 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
dvs22.push_back(dv);
} else
throw loop_error(
- "unrolled statements lumped together illegally");
+ "unrolled statements lumped together illegally");
} else {
coef_t lb = dv.lbounds[dep_dim];
coef_t ub = dv.ubounds[dep_dim];
if (ub == lb
&& int_mod(lb,
static_cast<coef_t>(new_stride))
- == 0) {
+ == 0) {
dvs11.push_back(dv);
dvs22.push_back(dv);
} else {
if (lb != -posInfinity)
dv.lbounds[dep_dim] = ceil(
- static_cast<double>(lb)
- / new_stride)
- * new_stride;
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride;
if (ub != posInfinity)
dv.ubounds[dep_dim] = floor(
- static_cast<double>(ub)
- / new_stride)
- * new_stride;
+ static_cast<double>(ub)
+ / new_stride)
+ * new_stride;
if (dv.ubounds[dep_dim]
>= dv.lbounds[dep_dim])
dvs11.push_back(dv);
-
+
if (lb != -posInfinity)
dv.lbounds[dep_dim] = ceil(
- static_cast<double>(lb)
- / new_stride)
- * new_stride;
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride;
if (ub != posInfinity)
dv.ubounds[dep_dim] = ceil(
- static_cast<double>(ub)
- / new_stride)
- * new_stride;
+ static_cast<double>(ub)
+ / new_stride)
+ * new_stride;
if (dv.ubounds[dep_dim]
>= dv.lbounds[dep_dim])
dvs21.push_back(dv);
-
+
if (lb != -posInfinity)
dv.lbounds[dep_dim] = floor(
- static_cast<double>(lb)
- / new_stride)
- * new_stride;
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride;
if (ub != posInfinity)
dv.ubounds[dep_dim] = floor(
- static_cast<double>(ub
- - stride)
- / new_stride)
- * new_stride;
+ static_cast<double>(ub
+ - stride)
+ / new_stride)
+ * new_stride;
if (dv.ubounds[dep_dim]
>= dv.lbounds[dep_dim])
dvs12.push_back(dv);
-
+
if (lb != -posInfinity)
dv.lbounds[dep_dim] = floor(
- static_cast<double>(lb)
- / new_stride)
- * new_stride;
+ static_cast<double>(lb)
+ / new_stride)
+ * new_stride;
if (ub != posInfinity)
dv.ubounds[dep_dim] = ceil(
- static_cast<double>(ub
- - stride)
- / new_stride)
- * new_stride;
+ static_cast<double>(ub
+ - stride)
+ / new_stride)
+ * new_stride;
if (dv.ubounds[dep_dim]
>= dv.lbounds[dep_dim])
dvs22.push_back(dv);
@@ -1192,10 +1190,10 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
dep.connect(old_num_stmt, old_num_stmt, dvs22);
if (dvs12.size() > 0)
D.push_back(
- std::make_pair(old_num_stmt, dvs12));
+ std::make_pair(old_num_stmt, dvs12));
if (dvs21.size() > 0)
dep.connect(old_num_stmt, i, dvs21);
-
+
dep.vertex[i].second.erase(j++);
} else {
dep.connect(old_num_stmt, j->first, j->second);
@@ -1204,17 +1202,17 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,
} else {
if (same_loop.find(j->first) != same_loop.end())
D.push_back(
- std::make_pair(old_num_stmt, j->second));
+ std::make_pair(old_num_stmt, j->second));
j++;
}
}
-
+
for (int j = 0; j < D.size(); j++)
dep.connect(i, D[j].first, D[j].second);
}
}
}
-
+
//fprintf(stderr, " loop_unroll.cc returning new_stmts\n");
return new_stmts;
}