summaryrefslogtreecommitdiff
path: root/src/transformations/loop_datacopy.cc
diff options
context:
space:
mode:
authorTuowen Zhao <ztuowen@gmail.com>2016-09-22 15:12:54 -0600
committerTuowen Zhao <ztuowen@gmail.com>2016-09-22 15:12:54 -0600
commit1929ac1a60615ee86779790c46e04e53de75462f (patch)
tree35566b4f04184a9aed98fdc9dda74507075a7890 /src/transformations/loop_datacopy.cc
parentf27e01a039195c379fd6716c4870858789941365 (diff)
downloadchill-1929ac1a60615ee86779790c46e04e53de75462f.tar.gz
chill-1929ac1a60615ee86779790c46e04e53de75462f.tar.bz2
chill-1929ac1a60615ee86779790c46e04e53de75462f.zip
add CHILL_DEBUG_PRINT & CHILL_DEBUG_BEGIN & CHILL_DEBUG_END
Diffstat (limited to 'src/transformations/loop_datacopy.cc')
-rw-r--r--src/transformations/loop_datacopy.cc1369
1 files changed, 1369 insertions, 0 deletions
diff --git a/src/transformations/loop_datacopy.cc b/src/transformations/loop_datacopy.cc
new file mode 100644
index 0000000..12d74fd
--- /dev/null
+++ b/src/transformations/loop_datacopy.cc
@@ -0,0 +1,1369 @@
+/*****************************************************************************
+ Copyright (C) 2008 University of Southern California
+ Copyright (C) 2009-2010 University of Utah
+ All Rights Reserved.
+
+ Purpose:
+ Various data copy schemes.
+
+ Notes:
+
+ History:
+ 02/20/09 Created by Chun Chen by splitting original datacopy from loop.cc
+*****************************************************************************/
+
+#include <code_gen/codegen.h>
+#include <code_gen/CG_utils.h>
+#include "loop.hh"
+#include "omegatools.hh"
+#include "ir_code.hh"
+#include "chill_error.hh"
+
+using namespace omega;
+
+//
+// data copy function by referring arrays by numbers.
+// e.g. A[i] = A[i-1] + B[i]
+// parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i]
+//
+bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level,
+ bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+ //fprintf(stderr, "Loop::datacopy()\n");
+
+ // check for sanity of parameters
+ std::set<int> same_loop;
+ for (int i = 0; i < array_ref_nums.size(); i++) {
+ int stmt_num = array_ref_nums[i].first;
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
+ if (level <= 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+ if (i == 0) {
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ same_loop = getStatements(lex, 2*level-2);
+ }
+ else if (same_loop.find(stmt_num) == same_loop.end())
+ throw std::invalid_argument("array references for data copy must be located in the same subloop");
+ }
+
+ // convert array reference numbering scheme to actual array references
+ std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
+ for (int i = 0; i < array_ref_nums.size(); i++) {
+ if (array_ref_nums[i].second.size() == 0)
+ continue;
+
+ int stmt_num = array_ref_nums[i].first;
+ selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>()));
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code);
+ std::vector<bool> selected(refs.size(), false);
+ for (int j = 0; j < array_ref_nums[i].second.size(); j++) {
+ int ref_num = array_ref_nums[i].second[j];
+ if (ref_num < 0 || ref_num >= refs.size()) {
+ for (int k = 0; k < refs.size(); k++)
+ delete refs[k];
+ throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num));
+ }
+ selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]);
+ selected[ref_num] = true;
+ }
+ for (int j = 0; j < refs.size(); j++)
+ if (!selected[j])
+ delete refs[j];
+ }
+ if (selected_refs.size() == 0)
+ throw std::invalid_argument("found no array references to copy");
+
+ // do the copy
+ bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+ return whatever;
+}
+
+//
+// data copy function by referring arrays by name.
+// e.g. A[i] = A[i-1] + B[i]
+// parameter array_name=A means to copy data touched by A[i-1] and A[i]
+//
+bool Loop::datacopy(int stmt_num, int level, const std::string &array_name,
+ bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+
+ fflush(stdout);
+ //fprintf(stderr, "Loop::datacopy2()\n");
+ //fprintf(stderr, "array name %s stmt num %d\n", array_name.c_str(), stmt_num);
+
+ // check for sanity of parameters
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
+ if (level <= 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+
+ // collect array references by name
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ int dim = 2*level - 1;
+ std::set<int> same_loop = getStatements(lex, dim-1);
+
+ std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
+ for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) {
+ std::vector<IR_ArrayRef *> t;
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code);
+ for (int j = 0; j < refs.size(); j++)
+ if (refs[j]->name() == array_name)
+ t.push_back(refs[j]);
+ else
+ delete refs[j];
+ if (t.size() != 0)
+ selected_refs.push_back(std::make_pair(*i, t));
+ }
+
+ //fprintf(stderr, "selected refs:\n");
+ //for (int i=0; i<selected_refs.size(); i++) {
+ // //fprintf(stderr, "%d 0x%x ", selected_refs[i].first, selected_refs[i].second[0]);
+ // selected_refs[i].second[0]->Dump(); printf("\n"); fflush(stdout);
+ //}
+
+ if (selected_refs.size() == 0)
+ throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy");
+
+ IR_ArrayRef *AR = selected_refs[0].second[0];
+ //IR_roseArrayRef *RAR = (IR_roseArrayRef *)AR;
+ //fprintf(stderr, "before datacopy_privatized, ");
+ //AR->Dump();
+
+ // do the copy
+ //fprintf(stderr, "\nLoop::datacopy2 calling privatized\n");
+
+ bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+
+ //AR = selected_refs[0].second[0];
+ //fprintf(stderr, "after datacopy_privatized, ");
+ //AR->Dump();
+
+ return whatever;
+}
+
+
+bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels,
+ bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+ //fprintf(stderr, "Loop::datacopy_privatized()\n");
+
+ // check for sanity of parameters
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
+ if (level <= 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+
+ // collect array references by name
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ int dim = 2*level - 1;
+ std::set<int> same_loop = getStatements(lex, dim-1);
+
+ std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
+ for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) {
+ selected_refs.push_back(std::make_pair(*i, std::vector<IR_ArrayRef *>()));
+
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code);
+ for (int j = 0; j < refs.size(); j++)
+ if (refs[j]->name() == array_name)
+ selected_refs[selected_refs.size()-1].second.push_back(refs[j]);
+ else
+ delete refs[j];
+ }
+ if (selected_refs.size() == 0)
+ throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy");
+
+ // do the copy
+ bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+ return whatever;
+}
+
+
+bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, const std::vector<int> &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {
+ //fprintf(stderr, "Loop::datacopy_privatized2()\n");
+
+ // check for sanity of parameters
+ std::set<int> same_loop;
+ for (int i = 0; i < array_ref_nums.size(); i++) {
+ int stmt_num = array_ref_nums[i].first;
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
+ if (level <= 0 || level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+ if (i == 0) {
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ same_loop = getStatements(lex, 2*level-2);
+ }
+ else if (same_loop.find(stmt_num) == same_loop.end())
+ throw std::invalid_argument("array references for data copy must be located in the same subloop");
+ }
+
+ // convert array reference numbering scheme to actual array references
+ std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs;
+ for (int i = 0; i < array_ref_nums.size(); i++) {
+ if (array_ref_nums[i].second.size() == 0)
+ continue;
+
+ int stmt_num = array_ref_nums[i].first;
+ selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>()));
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code);
+ std::vector<bool> selected(refs.size(), false);
+ for (int j = 0; j < array_ref_nums[i].second.size(); j++) {
+ int ref_num = array_ref_nums[i].second[j];
+ if (ref_num < 0 || ref_num >= refs.size()) {
+ for (int k = 0; k < refs.size(); k++)
+ delete refs[k];
+ throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num));
+ }
+ selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]);
+ selected[ref_num] = true;
+ }
+ for (int j = 0; j < refs.size(); j++)
+ if (!selected[j])
+ delete refs[j];
+ }
+ if (selected_refs.size() == 0)
+ throw std::invalid_argument("found no array references to copy");
+
+ // do the copy
+ bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);
+ return whatever;
+}
+
+
+//
+// Implement low level datacopy function with lots of options.
+//
+
+bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs,
+ int level,
+ const std::vector<int> &privatized_levels,
+ bool allow_extra_read,
+ int fastest_changing_dimension,
+ int padding_stride,
+ int padding_alignment,
+ int memory_type) {
+
+ //fprintf(stderr, "\nLoop::datacopy_privatized3() *****\n");
+ //fprintf(stderr, "privatized_levels.size() %d\n", privatized_levels.size());
+ //fprintf(stderr, "level %d\n", level);
+
+ if (stmt_refs.size() == 0)
+ return true;
+
+ // check for sanity of parameters
+ IR_ArraySymbol *sym = NULL;
+ std::vector<int> lex;
+ std::set<int> active;
+ if (level <= 0)
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+ for (int i = 0; i < privatized_levels.size(); i++) {
+ if (i == 0) {
+ if (privatized_levels[i] < level)
+ throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level));
+ }
+ else if (privatized_levels[i] <= privatized_levels[i-1])
+ throw std::invalid_argument("privatized loop levels must be in ascending order");
+ }
+ for (int i = 0; i < stmt_refs.size(); i++) {
+ int stmt_num = stmt_refs[i].first;
+ active.insert(stmt_num);
+ if (stmt_num < 0 || stmt_num >= stmt.size())
+ throw std::invalid_argument("invalid statement number " + to_string(stmt_num));
+ if (privatized_levels.size() != 0) {
+ if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num));
+ }
+ else {
+ if (level > stmt[stmt_num].loop_level.size())
+ throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num));
+ }
+ for (int j = 0; j < stmt_refs[i].second.size(); j++) {
+ if (sym == NULL) {
+ sym = stmt_refs[i].second[j]->symbol();
+ lex = getLexicalOrder(stmt_num);
+ }
+ else {
+ IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol();
+ if (t->name() != sym->name()) {
+ delete t;
+ delete sym;
+ throw std::invalid_argument("try to copy data from different arrays");
+ }
+ delete t;
+ }
+ }
+ }
+
+ //fprintf(stderr, "sym %p\n", sym);
+ if (!sym) {
+ fprintf(stderr, "sym NULL, gonna die\n"); int *i=0; int j=i[0];
+ }
+
+ if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim()))
+ throw std::invalid_argument("invalid fastest changing dimension for the array to be copied");
+ if (padding_stride < 0)
+ throw std::invalid_argument("invalid temporary array stride requirement");
+ if (padding_alignment == -1 || padding_alignment == 0)
+ throw std::invalid_argument("invalid temporary array alignment requirement");
+
+ int dim = 2*level - 1;
+ int n_dim = sym->n_dim();
+
+
+ if (fastest_changing_dimension == -1)
+ switch (sym->layout_type()) {
+ case IR_ARRAY_LAYOUT_ROW_MAJOR:
+ fastest_changing_dimension = n_dim - 1;
+ break;
+ case IR_ARRAY_LAYOUT_COLUMN_MAJOR:
+ fastest_changing_dimension = 0;
+ break;
+ default:
+ throw loop_error("unsupported array layout");
+ }
+ // OK, parameter sanity checked
+
+
+ // invalidate saved codegen computation
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+
+ // build iteration spaces for all reads and for all writes separately
+ //fprintf(stderr, "dp3: before apply_xform() ARRAY REFS\n");
+ //for (int i = 0; i < stmt_refs.size(); i++) {
+ // for (int j = 0; j < stmt_refs[i].second.size(); j++) {
+ // IR_ArrayRef *AR = stmt_refs[i].second[j];
+ // fprintf(stderr, "array ref ij %d %d ", i, j); AR->Dump(); fprintf(stderr, "\n");
+ // }
+ //}
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "stmt %d = ", i);
+ // stmt[i].code->dump();
+ // fprintf(stderr, "\n");
+ //}
+
+ apply_xform(active);
+ //fprintf(stderr, "dp3: back from apply_xform() ARRAY REFS\n");
+
+ //for (int i = 0; i < stmt_refs.size(); i++) {
+ // for (int j = 0; j < stmt_refs[i].second.size(); j++) {
+ // IR_ArrayRef *AR = stmt_refs[i].second[j];
+ // fprintf(stderr, "array ref ij %d %d ", i, j);
+ // AR->Dump();
+ // fprintf(stderr, "\n");
+ // }
+ //}
+
+ //for (int i=0; i<stmt.size(); i++) {
+ // fprintf(stderr, "stmt %d = ", i);
+ // stmt[i].code->dump();
+ // fprintf(stderr, "\n");
+ //}
+
+
+ bool has_write_refs = false;
+ bool has_read_refs = false;
+ Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim);
+ Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim);
+ //fprintf(stderr, "\n\ni range: 0-%d\n", -1 + stmt_refs.size());
+ int stmt_num = stmt_refs[0].first;
+ for (int i = 0; i < stmt_refs.size(); i++) {
+ int stmt_num = stmt_refs[i].first;
+
+ //fprintf(stderr, "j range: 0-%d\n", -1 + stmt_refs[i].second.size());
+
+ for (int j = 0; j < stmt_refs[i].second.size(); j++) {
+ //fprintf(stderr, "ij %d %d\n", i, j);
+
+ Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim);
+ for (int k = 1; k <= mapping.n_inp(); k++)
+ mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name());
+ mapping.setup_names();
+ mapping.print(); fflush(stdout); // "{[I] -> [_t1] : I = _t1 }
+
+ F_And *f_root = mapping.add_and();
+ for (int k = 1; k <= level-1; k++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.input_var(k), 1);
+ h.update_coef(mapping.output_var(k), -1);
+ }
+ for (int k = 0; k < privatized_levels.size(); k++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.input_var(privatized_levels[k]), 1);
+ h.update_coef(mapping.output_var(level+k), -1);
+ }
+ for (int k = 0; k < n_dim; k++) {
+ IR_ArrayRef *AR = stmt_refs[i].second[j];
+ //fprintf(stderr, "array ref ");
+ AR->Dump();
+
+ CG_outputRepr *repr = stmt_refs[i].second[j]->index(k);
+ //fprintf(stderr, "k %d j %d repr ", k, j); repr->dump(); fflush(stdout);
+
+ exp2formula(ir,
+ mapping,
+ f_root,
+ freevar,
+ repr,
+ mapping.output_var(level-1+privatized_levels.size()+k+1),
+ 'w',
+ IR_COND_EQ,
+ false,
+ uninterpreted_symbols[stmt_num],
+ uninterpreted_symbols_stringrepr[stmt_num]);
+ repr->clear();
+ delete repr;
+ }
+ Relation r = omega::Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set()))));
+ if (stmt_refs[i].second[j]->is_write()) {
+ has_write_refs = true;
+ wo_copy_is = Union(wo_copy_is, r);
+ wo_copy_is.simplify(2, 4);
+
+
+ }
+ else {
+ has_read_refs = true;
+ ro_copy_is = Union(ro_copy_is, r);
+ ro_copy_is.simplify(2, 4);
+
+ }
+ }
+ }
+
+ //fprintf(stderr, "dp3: simplify\n");
+ // simplify read and write footprint iteration space
+ {
+ if (allow_extra_read)
+ ro_copy_is = SimpleHull(ro_copy_is, true, true);
+ else
+ ro_copy_is = ConvexRepresentation(ro_copy_is);
+
+ wo_copy_is = ConvexRepresentation(wo_copy_is);
+ if (wo_copy_is.number_of_conjuncts() > 1) {
+ Relation t = SimpleHull(wo_copy_is, true, true);
+ if (Must_Be_Subset(copy(t), copy(ro_copy_is)))
+ wo_copy_is = t;
+ else if (Must_Be_Subset(copy(wo_copy_is), copy(ro_copy_is)))
+ wo_copy_is = ro_copy_is;
+ }
+ }
+
+ // make copy statement variable names match the ones in the original statements which
+ // already have the same names due to apply_xform
+ {
+ int ref_stmt = *active.begin();
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++)
+ if (stmt[*i].IS.n_set() > stmt[ref_stmt].IS.n_set())
+ ref_stmt = *i;
+ for (int i = 1; i < level; i++) {
+ std::string s = stmt[ref_stmt].IS.input_var(i)->name();
+ wo_copy_is.name_set_var(i, s);
+ ro_copy_is.name_set_var(i, s);
+ }
+ for (int i = 0; i < privatized_levels.size(); i++) {
+ std::string s = stmt[ref_stmt].IS.input_var(privatized_levels[i])->name();
+ wo_copy_is.name_set_var(level+i, s);
+ ro_copy_is.name_set_var(level+i, s);
+ }
+ for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) {
+ std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size());
+ wo_copy_is.name_set_var(i, s);
+ ro_copy_is.name_set_var(i, s);
+ }
+ tmp_loop_var_name_counter += n_dim;
+ wo_copy_is.setup_names();
+ ro_copy_is.setup_names();
+ }
+
+ //fprintf(stderr, "\ndp3: build merged\n");
+ // build merged footprint iteration space for calculating temporary array size
+ Relation copy_is = SimpleHull(Union(copy(ro_copy_is), copy(wo_copy_is)), true, true);
+
+ // extract temporary array information
+ CG_outputBuilder *ocg = ir->builder();
+ std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL
+ std::vector<coef_t> index_stride(n_dim);
+ std::vector<bool> is_index_eq(n_dim, false);
+ std::vector<std::pair<int, CG_outputRepr *> > index_sz(0);
+ Relation reduced_copy_is = copy(copy_is);
+
+ for (int i = 0; i < n_dim; i++) {
+ //fprintf(stderr, "i %d/%d\n", i, n_dim);
+ if (i != 0)
+ reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var);
+ Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i);
+
+ //fprintf(stderr, "dp3: extract stride\n");
+ // extract stride
+ std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, bound.set_var(level-1+privatized_levels.size()+i+1));
+ if (result.second != NULL)
+ index_stride[i] = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1))));
+ else
+ index_stride[i] = 1;
+ //fprintf(stderr, "dp3: index_stride[%d] = %d\n", i, index_stride[i]);
+
+ // check if this array index requires loop
+ Conjunct *c = bound.query_DNF()->single_conjunct();
+ for (EQ_Iterator ei(c->EQs()); ei; ei++) {
+ //fprintf(stderr, "dp3: for\n");
+ if ((*ei).has_wildcards())
+ continue;
+
+ //fprintf(stderr, "dp3: no wildcards\n");
+ int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1));
+ if (coef != 0) {
+ //fprintf(stderr, "coef != 0\n");
+ int sign = 1;
+ if (coef < 0) {
+ //fprintf(stderr, "coef < 0\n");
+ coef = -coef;
+ sign = -1;
+ }
+
+ CG_outputRepr *op = NULL;
+ for (Constr_Vars_Iter ci(*ei); ci; ci++) {
+ //fprintf(stderr, "dp3: ci\n");
+ switch ((*ci).var->kind()) {
+ case Input_Var:
+ {
+ //fprintf(stderr, "dp3: Input_Var\n");
+ if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) {
+ //fprintf(stderr, "dp3: IF sign %d\n",(*ci).coef*sign);
+
+ if ((*ci).coef*sign == 1)
+ op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name()));
+ else if ((*ci).coef*sign == -1)
+ op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name()));
+ else if ((*ci).coef*sign > 1)
+ op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name())));
+ else // (*ci).coef*sign < -1
+ op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name())));
+ }
+ break;
+ }
+ case Global_Var:
+ {
+ //fprintf(stderr, "dp3: Global_Var\n");
+ Global_Var_ID g = (*ci).var->get_global_var();
+ if ((*ci).coef*sign == 1)
+ op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef*sign == -1)
+ op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef*sign > 1)
+ op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name())));
+ else // (*ci).coef*sign < -1
+ op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name())));
+ break;
+ }
+ default:
+ throw loop_error("unsupported array index expression");
+ }
+ }
+ if ((*ei).get_const() != 0)
+ op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const())));
+ if (coef != 1)
+ op = ocg->CreateIntegerFloor(op, ocg->CreateInt(coef));
+
+ index_lb[i] = op;
+ is_index_eq[i] = true;
+ break;
+ }
+ }
+ if (is_index_eq[i])
+ continue;
+
+ //fprintf(stderr, "dp3: separate lower and upper bounds\n");
+ // separate lower and upper bounds
+ std::vector<GEQ_Handle> lb_list, ub_list;
+ std::set<Variable_ID> excluded_floor_vars;
+ excluded_floor_vars.insert(bound.set_var(level-1+privatized_levels.size()+i+1));
+ for (GEQ_Iterator gi(c->GEQs()); gi; gi++) {
+ int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1));
+ if (coef != 0 && (*gi).has_wildcards()) {
+ bool clean_bound = true;
+ GEQ_Handle h;
+ for (Constr_Vars_Iter cvi(*gi, true); gi; gi++)
+ if (!find_floor_definition(bound, (*cvi).var, excluded_floor_vars).first) {
+ clean_bound = false;
+ break;
+ }
+ if (!clean_bound)
+ continue;
+ }
+
+ if (coef > 0)
+ lb_list.push_back(*gi);
+ else if (coef < 0)
+ ub_list.push_back(*gi);
+ }
+ if (lb_list.size() == 0 || ub_list.size() == 0)
+ throw loop_error("failed to calcuate array footprint size");
+
+ //fprintf(stderr, "dp3: build lower bound representation\n");
+ // build lower bound representation
+ std::vector<CG_outputRepr *> lb_repr_list;
+ for (int j = 0; j < lb_list.size(); j++){
+ if(this->known.n_set() == 0) {
+ lb_repr_list.push_back(output_lower_bound_repr(ocg,
+ lb_list[j],
+ bound.set_var(level-1+privatized_levels.size()+i+1),
+ result.first,
+ result.second,
+ bound,
+ Relation::True(bound.n_set()),
+ std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)),
+ uninterpreted_symbols[stmt_num]));
+ }
+ else {
+ lb_repr_list.push_back(output_lower_bound_repr(ocg,
+ lb_list[j],
+ bound.set_var(level-1+privatized_levels.size()+i+1),
+ result.first,
+ result.second,
+ bound,
+ this->known,
+ std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)),
+ uninterpreted_symbols[stmt_num]));
+ }
+ }
+ if (lb_repr_list.size() > 1) {
+ //fprintf(stderr, "loop_datacopy.cc dp3 createInvoke( max )\n");
+ index_lb[i] = ocg->CreateInvoke("max", lb_repr_list);
+ }
+ else if (lb_repr_list.size() == 1)
+ index_lb[i] = lb_repr_list[0];
+
+ //fprintf(stderr, "dp3: build temporary array size representation\n");
+ // build temporary array size representation
+ {
+ Relation cal(copy_is.n_set(), 1);
+ F_And *f_root = cal.add_and();
+ for (int j = 0; j < ub_list.size(); j++)
+ for (int k = 0; k < lb_list.size(); k++) {
+ GEQ_Handle h = f_root->add_GEQ();
+
+ for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var:
+ {
+ int pos = (*ci).var->get_position();
+ h.update_coef(cal.input_var(pos), (*ci).coef);
+ break;
+ }
+ case Global_Var:
+ {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = cal.get_local(g);
+ else
+ v = cal.get_local(g, (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot calculate temporay array size statically");
+ }
+ }
+ h.update_const(ub_list[j].get_const());
+
+ for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) {
+ switch ((*ci).var->kind()) {
+ case Input_Var:
+ {
+ int pos = (*ci).var->get_position();
+ h.update_coef(cal.input_var(pos), (*ci).coef);
+ break;
+ }
+ case Global_Var:
+ {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ Variable_ID v;
+ if (g->arity() == 0)
+ v = cal.get_local(g);
+ else
+ v = cal.get_local(g, (*ci).var->function_of());
+ h.update_coef(v, (*ci).coef);
+ break;
+ }
+ default:
+ throw loop_error("cannot calculate temporay array size statically");
+ }
+ }
+ h.update_const(lb_list[k].get_const());
+
+ h.update_const(1);
+ h.update_coef(cal.output_var(1), -1);
+ }
+
+ cal = Restrict_Domain(cal, copy(copy_is));
+ for (int j = 1; j <= cal.n_inp(); j++)
+ cal = Project(cal, j, Input_Var);
+ cal.simplify();
+
+ //fprintf(stderr, "dp3: pad temporary array size\n");
+ // pad temporary array size
+ // TODO: for variable array size, create padding formula
+ Conjunct *c = cal.query_DNF()->single_conjunct();
+ bool is_index_bound_const = false;
+ for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++)
+ if ((*gi).is_const(cal.output_var(1))) {
+ coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1)));
+ if (padding_stride != 0) {
+ size = (size + index_stride[i] - 1) / index_stride[i];
+ if (i == fastest_changing_dimension)
+ size = size * padding_stride;
+ }
+ if (i == fastest_changing_dimension) {
+ if (padding_alignment > 1) { // align to boundary for data packing
+ int residue = size % padding_alignment;
+ if (residue)
+ size = size+padding_alignment-residue;
+ }
+ else if (padding_alignment < -1) { // un-alignment for memory bank conflicts
+ while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1)
+ size++;
+ }
+ }
+ index_sz.push_back(std::make_pair(i, ocg->CreateInt(size)));
+ is_index_bound_const = true;
+ }
+
+ if (!is_index_bound_const) {
+ for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) {
+ int coef = (*gi).get_coef(cal.output_var(1));
+ if (coef < 0) {
+ CG_outputRepr *op = NULL;
+ for (Constr_Vars_Iter ci(*gi); ci; ci++) {
+ if ((*ci).var != cal.output_var(1)) {
+ switch((*ci).var->kind()) {
+ case Global_Var:
+ {
+ Global_Var_ID g = (*ci).var->get_global_var();
+ if ((*ci).coef == 1)
+ op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef == -1)
+ op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name()));
+ else if ((*ci).coef > 1)
+ op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name())));
+ else // (*ci).coef < -1
+ op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name())));
+ break;
+ }
+ default:
+ throw loop_error("failed to generate array index bound code");
+ }
+ }
+ }
+ int c = (*gi).get_const();
+ if (c > 0)
+ op = ocg->CreatePlus(op, ocg->CreateInt(c));
+ else if (c < 0)
+ op = ocg->CreateMinus(op, ocg->CreateInt(-c));
+ if (padding_stride != 0) {
+ if (i == fastest_changing_dimension) {
+ coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride));
+ coef_t t1 = index_stride[i] / g;
+ if (t1 != 1)
+ op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1));
+ coef_t t2 = padding_stride / g;
+ if (t2 != 1)
+ op = ocg->CreateTimes(op, ocg->CreateInt(t2));
+ }
+ else if (index_stride[i] != 1) {
+ op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i]));
+ }
+ }
+
+ index_sz.push_back(std::make_pair(i, op));
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ //fprintf(stderr, "dp3: change the temporary array index order\n");
+ // change the temporary array index order
+ for (int i = 0; i < index_sz.size(); i++) {
+ if (index_sz[i].first == fastest_changing_dimension)
+ switch (sym->layout_type()) {
+ case IR_ARRAY_LAYOUT_ROW_MAJOR:
+ std::swap(index_sz[index_sz.size()-1], index_sz[i]);
+ break;
+ case IR_ARRAY_LAYOUT_COLUMN_MAJOR:
+ std::swap(index_sz[0], index_sz[i]);
+ break;
+ default:
+ throw loop_error("unsupported array layout");
+ }
+ }
+
+ //fprintf(stderr, "dp3: declare temporary array or scalar\n");
+ // declare temporary array or scalar
+ IR_Symbol *tmp_sym;
+ if (index_sz.size() == 0) {
+ //fprintf(stderr, "tmp_sym is a scalar\n");
+ tmp_sym = ir->CreateScalarSymbol(sym, memory_type);
+ }
+ else {
+ //fprintf(stderr, "tmp_sym is an array\n");
+ std::vector<CG_outputRepr *> tmp_array_size(index_sz.size());
+ for (int i = 0; i < index_sz.size(); i++) {
+ tmp_array_size[i] = index_sz[i].second->clone();
+ index_sz[i].second->dump(); // THIS PRINTF
+ }
+ tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type);
+ }
+
+ //fprintf(stderr, "dp3: create temporary array read initialization code\n");
+ // create temporary array read initialization code
+ CG_outputRepr *copy_code_read;
+ if (has_read_refs) {
+ //fprintf(stderr, "has read refs\n");
+ if (index_sz.size() == 0) {
+ //fprintf(stderr, "if\n");
+
+ //fprintf(stderr, "tmp sym %s\n", tmp_sym->name().c_str());
+ IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); // create ref from symbol
+ // tmp_scalar_ref is incomplete
+
+ std::vector<CG_outputRepr *> rhs_index(n_dim);
+ for (int i = 0; i < index_lb.size(); i++) {
+ //fprintf(stderr, "i %d\n", i);
+ if (is_index_eq[i])
+ rhs_index[i] = index_lb[i]->clone();
+ else
+ rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ }
+ IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index);
+
+ // IR_ScalarRef tmp_scalar_ref has no actual reference yet. It only has the variable definition.
+ copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert());
+ //fprintf(stderr, "if ends\n");
+ }
+ else {
+ //fprintf(stderr, "else\n");
+ std::vector<CG_outputRepr *> lhs_index(index_sz.size());
+ for (int i = 0; i < index_sz.size(); i++) {
+ int cur_index_num = index_sz[i].first;
+ CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone());
+ if (padding_stride != 0) {
+ if (i == n_dim-1) {
+ coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride));
+ coef_t t1 = index_stride[cur_index_num] / g;
+ if (t1 != 1)
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1));
+ coef_t t2 = padding_stride / g;
+ if (t2 != 1)
+ cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2));
+ }
+ else if (index_stride[cur_index_num] != 1) {
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num]));
+ }
+ }
+
+ if (ir->ArrayIndexStartAt() != 0)
+ cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt()));
+ lhs_index[i] = cur_index_repr;
+ }
+
+ //fprintf(stderr, "dp3: making tmp_array_ref\n");
+ IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index);
+ //fprintf(stderr, "dp3: DONE making tmp_array_ref\n");
+
+ std::vector<CG_outputRepr *> rhs_index(n_dim);
+ for (int i = 0; i < index_lb.size(); i++)
+ if (is_index_eq[i])
+ rhs_index[i] = index_lb[i]->clone();
+ else
+ rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index);
+
+ //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment\n");
+ //copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert());
+ CG_outputRepr *lhs = tmp_array_ref->convert();
+ CG_outputRepr *rhs = copied_array_ref->convert();
+ copy_code_read = ir->builder()->CreateAssignment(0, lhs, rhs); //tmp_array_ref->convert(), copied_array_ref->convert());
+ //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment DONE\n\n");
+ }
+ } // has read refs
+
+ //fprintf(stderr, "dp3: create temporary array write back code\n");
+ // create temporary array write back code
+ CG_outputRepr *copy_code_write;
+ if (has_write_refs) {
+ //fprintf(stderr, "has_write_refs\n");
+ if (index_sz.size() == 0) {
+ //fprintf(stderr, "index_sz.size() == 0\n");
+ IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym));
+
+ std::vector<CG_outputRepr *> rhs_index(n_dim);
+ for (int i = 0; i < index_lb.size(); i++)
+ if (is_index_eq[i])
+ rhs_index[i] = index_lb[i]->clone();
+ else
+ rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index);
+
+ copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert());
+ }
+ else {
+ //fprintf(stderr, "index_sz.size() NOT = 0\n");
+
+ std::vector<CG_outputRepr *> lhs_index(n_dim);
+ for (int i = 0; i < index_lb.size(); i++)
+ if (is_index_eq[i])
+ lhs_index[i] = index_lb[i]->clone();
+ else
+ lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name());
+ IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index);
+
+ std::vector<CG_outputRepr *> rhs_index(index_sz.size());
+ for (int i = 0; i < index_sz.size(); i++) {
+ int cur_index_num = index_sz[i].first;
+ CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone());
+ if (padding_stride != 0) {
+ if (i == n_dim-1) {
+ coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride));
+ coef_t t1 = index_stride[cur_index_num] / g;
+ if (t1 != 1)
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1));
+ coef_t t2 = padding_stride / g;
+ if (t2 != 1)
+ cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2));
+ }
+ else if (index_stride[cur_index_num] != 1) {
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num]));
+ }
+ }
+
+ if (ir->ArrayIndexStartAt() != 0)
+ cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt()));
+ rhs_index[i] = cur_index_repr;
+ }
+ IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index);
+
+ copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert());
+ }
+ } // has write refs
+
+ // now we can remove those loops for array indexes that are
+ // dependent on others
+ //fprintf(stderr, "dp3: now we can remove those loops\n");
+ if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) {
+ Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size());
+ F_And *f_root = mapping.add_and();
+ for (int i = 1; i <= level-1+privatized_levels.size(); i++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.input_var(i), 1);
+ h.update_coef(mapping.output_var(i), -1);
+ }
+
+ int cur_index = 0;
+ std::vector<int> mapped_index(index_sz.size());
+ for (int i = 0; i < n_dim; i++)
+ if (!is_index_eq[i]) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1);
+ switch (sym->layout_type()) {
+ case IR_ARRAY_LAYOUT_COLUMN_MAJOR: {
+ h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1);
+ mapped_index[index_sz.size()-cur_index-1] = i;
+ break;
+ }
+ case IR_ARRAY_LAYOUT_ROW_MAJOR: {
+ h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1);
+ mapped_index[cur_index] = i;
+ break;
+ }
+ default:
+ throw loop_error("unsupported array layout");
+ }
+ cur_index++;
+ }
+
+ wo_copy_is = omega::Range(Restrict_Domain(copy(mapping), wo_copy_is));
+ ro_copy_is = omega::Range(Restrict_Domain(copy(mapping), ro_copy_is));
+ for (int i = 1; i <= level-1+privatized_levels.size(); i++) {
+ wo_copy_is.name_set_var(i, copy_is.set_var(i)->name());
+ ro_copy_is.name_set_var(i, copy_is.set_var(i)->name());
+ }
+ for (int i = 0; i < index_sz.size(); i++) {
+ wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name());
+ ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name());
+ }
+ wo_copy_is.setup_names();
+ ro_copy_is.setup_names();
+ }
+
+ // insert read copy statement
+ //fprintf(stderr, "dp3: insert read copy statement\n");
+
+ int old_num_stmt = stmt.size();
+ int ro_copy_stmt_num = -1;
+ if (has_read_refs) {
+ Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1);
+ {
+ F_And *f_root = copy_xform.add_and();
+ for (int i = 1; i <= ro_copy_is.n_set(); i++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.input_var(i), 1);
+ h.update_coef(copy_xform.output_var(2*i), -1);
+ }
+ for (int i = 1; i <= dim; i+=2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.output_var(i), -1);
+ h.update_const(lex[i-1]);
+ }
+ for (int i = dim+2; i <= copy_xform.n_out(); i+=2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.output_var(i), 1);
+ }
+ }
+
+ Statement copy_stmt_read;
+ copy_stmt_read.IS = ro_copy_is;
+ copy_stmt_read.xform = copy_xform;
+ copy_stmt_read.code = copy_code_read;
+ //fprintf(stderr, "dp3: copy_stmt_read.code = \n");
+ copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set());
+ copy_stmt_read.ir_stmt_node = NULL;
+ copy_stmt_read.has_inspector = false;
+ for (int i = 0; i < level-1; i++) {
+ copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type;
+ if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile &&
+ stmt[*(active.begin())].loop_level[i].payload >= level) {
+ int j;
+ for (j = 0; j < privatized_levels.size(); j++)
+ if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload)
+ break;
+ if (j == privatized_levels.size())
+ copy_stmt_read.loop_level[i].payload = -1;
+ else
+ copy_stmt_read.loop_level[i].payload = level + j;
+ }
+ else
+ copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload;
+ copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level;
+ }
+ for (int i = 0; i < privatized_levels.size(); i++) {
+ copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type;
+ copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload;
+ copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level;
+ }
+ int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1);
+ for (int i = 0; i < std::min(left_num_dim, static_cast<int>(index_sz.size())); i++) {
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal;
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i;
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ }
+ for (int i = std::min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) {
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown;
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1;
+ copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ }
+
+
+ shiftLexicalOrder(lex, dim-1, 1);
+
+ fprintf(stderr, "loop_datacopy.cc L1071 adding stmt %d\n", stmt.size());
+ stmt.push_back(copy_stmt_read);
+
+ uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]);
+ uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[*(active.begin())]);
+ ro_copy_stmt_num = stmt.size() - 1;
+ dep.insert();
+ }
+
+ //fprintf(stderr, "dp3: insert write copy statement\n");
+ // insert write copy statement
+ int wo_copy_stmt_num = -1;
+ if (has_write_refs) {
+ Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1);
+ {
+ F_And *f_root = copy_xform.add_and();
+ for (int i = 1; i <= wo_copy_is.n_set(); i++) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.input_var(i), 1);
+ h.update_coef(copy_xform.output_var(2*i), -1);
+ }
+ for (int i = 1; i <= dim; i+=2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.output_var(i), -1);
+ h.update_const(lex[i-1]);
+ }
+ for (int i = dim+2; i <= copy_xform.n_out(); i+=2) {
+ EQ_Handle h = f_root->add_EQ();
+ h.update_coef(copy_xform.output_var(i), 1);
+ }
+ }
+
+ Statement copy_stmt_write;
+ copy_stmt_write.IS = wo_copy_is;
+ copy_stmt_write.xform = copy_xform;
+ copy_stmt_write.code = copy_code_write;
+ copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set());
+ copy_stmt_write.ir_stmt_node = NULL;
+ copy_stmt_write.has_inspector = false;
+
+ for (int i = 0; i < level-1; i++) {
+ copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type;
+ if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile &&
+ stmt[*(active.begin())].loop_level[i].payload >= level) {
+ int j;
+ for (j = 0; j < privatized_levels.size(); j++)
+ if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload)
+ break;
+ if (j == privatized_levels.size())
+ copy_stmt_write.loop_level[i].payload = -1;
+ else
+ copy_stmt_write.loop_level[i].payload = level + j;
+ }
+ else
+ copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload;
+ copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level;
+ }
+ for (int i = 0; i < privatized_levels.size(); i++) {
+ copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type;
+ copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload;
+ copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level;
+ }
+ int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1);
+ for (int i = 0; i < std::min(left_num_dim, static_cast<int>(index_sz.size())); i++) {
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal;
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i;
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ }
+ for (int i = std::min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) {
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown;
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1;
+ copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0;
+ }
+ lex[dim-1]++;
+ shiftLexicalOrder(lex, dim-1, -2);
+
+ fprintf(stderr, "loop_datacopy.cc L1147 adding stmt %d\n", stmt.size());
+ stmt.push_back(copy_stmt_write);
+
+ uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]);
+ uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[*(active.begin())]);
+ wo_copy_stmt_num = stmt.size() - 1;
+ dep.insert();
+ }
+
+ //fprintf(stderr, "replace original array accesses with temporary array accesses\n");
+ // replace original array accesses with temporary array accesses
+ for (int i =0; i < stmt_refs.size(); i++)
+ for (int j = 0; j < stmt_refs[i].second.size(); j++) {
+ if (index_sz.size() == 0) {
+ IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym));
+ //fprintf(stderr, "dp3: loop_datacopy.cc calling ReplaceExpression i%d j%d\n", i, j);
+ ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert());
+ }
+ else {
+ std::vector<CG_outputRepr *> index_repr(index_sz.size());
+ for (int k = 0; k < index_sz.size(); k++) {
+ int cur_index_num = index_sz[k].first;
+
+ CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone());
+ if (padding_stride != 0) {
+ if (k == n_dim-1) {
+ coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride));
+ coef_t t1 = index_stride[cur_index_num] / g;
+ if (t1 != 1)
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1));
+ coef_t t2 = padding_stride / g;
+ if (t2 != 1)
+ cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2));
+ }
+ else if (index_stride[cur_index_num] != 1) {
+ cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num]));
+ }
+ }
+
+ if (ir->ArrayIndexStartAt() != 0)
+ cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt()));
+ index_repr[k] = cur_index_repr;
+ }
+
+ IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr);
+ //fprintf(stderr, "loop_datacopy.cc ir->ReplaceExpression( ... )\n");
+ ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert());
+ }
+ }
+
+ // update dependence graph
+ //fprintf(stderr, "update dependence graph\n");
+
+ int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1;
+ if (ro_copy_stmt_num != -1) {
+ for (int i = 0; i < old_num_stmt; i++) {
+ std::vector<std::vector<DependenceVector> > D;
+
+ for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) {
+ if (active.find(i) != active.end() && active.find(j->first) == active.end()) {
+ std::vector<DependenceVector> dvs1, dvs2;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W))
+ dvs1.push_back(dv);
+ else
+ dvs2.push_back(dv);
+ }
+ j->second = dvs2;
+ if (dvs1.size() > 0)
+ dep.connect(ro_copy_stmt_num, j->first, dvs1);
+ }
+ else if (active.find(i) == active.end() && active.find(j->first) != active.end()) {
+ std::vector<DependenceVector> dvs1, dvs2;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R))
+ dvs1.push_back(dv);
+ else
+ dvs2.push_back(dv);
+ }
+ j->second = dvs2;
+ if (dvs1.size() > 0)
+ D.push_back(dvs1);
+ }
+
+ if (j->second.size() == 0)
+ dep.vertex[i].second.erase(j++);
+ else
+ j++;
+ }
+
+ for (int j = 0; j < D.size(); j++)
+ dep.connect(i, ro_copy_stmt_num, D[j]);
+ }
+
+ // insert dependences from copy statement loop to copied statements
+ //fprintf(stderr, "insert dependences from copy statement loop to copied statements\n");
+
+ DependenceVector dv;
+ dv.type = DEP_W2R;
+ dv.sym = tmp_sym->clone();
+ dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0);
+ dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0);
+ for (int i = dep_dim; i < dep.num_dim(); i++) {
+ dv.lbounds[i] = -posInfinity;
+ dv.ubounds[i] = posInfinity;
+ }
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++)
+ dep.connect(ro_copy_stmt_num, *i, dv);
+ }
+
+ if (wo_copy_stmt_num != -1) {
+ for (int i = 0; i < old_num_stmt; i++) {
+ std::vector<std::vector<DependenceVector> > D;
+
+ for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) {
+ if (active.find(i) != active.end() && active.find(j->first) == active.end()) {
+ std::vector<DependenceVector> dvs1, dvs2;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W))
+ dvs1.push_back(dv);
+ else
+ dvs2.push_back(dv);
+ }
+ j->second = dvs2;
+ if (dvs1.size() > 0)
+ dep.connect(wo_copy_stmt_num, j->first, dvs1);
+ }
+ else if (active.find(i) == active.end() && active.find(j->first) != active.end()) {
+ std::vector<DependenceVector> dvs1, dvs2;
+ for (int k = 0; k < j->second.size(); k++) {
+ DependenceVector dv = j->second[k];
+ if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W))
+ dvs1.push_back(dv);
+ else
+ dvs2.push_back(dv);
+ }
+ j->second = dvs2;
+ if (dvs1.size() > 0)
+ D.push_back(dvs1);
+ }
+
+ if (j->second.size() == 0)
+ dep.vertex[i].second.erase(j++);
+ else
+ j++;
+ }
+
+ for (int j = 0; j < D.size(); j++)
+ dep.connect(i, wo_copy_stmt_num, D[j]);
+ }
+
+ // insert dependences from copied statements to write statements
+ //fprintf(stderr, "dp3: insert dependences from copied statements to write statements\n");
+
+ DependenceVector dv;
+ dv.type = DEP_W2R;
+ dv.sym = tmp_sym->clone();
+ dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0);
+ dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0);
+ for (int i = dep_dim; i < dep.num_dim(); i++) {
+ dv.lbounds[i] = -posInfinity;
+ dv.ubounds[i] = posInfinity;
+ }
+ for (std::set<int>::iterator i = active.begin(); i != active.end(); i++)
+ dep.connect(*i, wo_copy_stmt_num, dv);
+
+ }
+
+ // update variable name for dependences among copied statements
+ for (int i = 0; i < old_num_stmt; i++) {
+ if (active.find(i) != active.end())
+ for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++)
+ if (active.find(j->first) != active.end())
+ for (int k = 0; k < j->second.size(); k++) {
+ IR_Symbol *s = tmp_sym->clone();
+ j->second[k].sym = s;
+ }
+ }
+
+ // insert anti-dependence from write statement to read statement
+ if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1)
+ if (dep_dim >= 0) {
+ DependenceVector dv;
+ dv.type = DEP_R2W;
+ dv.sym = tmp_sym->clone();
+ dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0);
+ dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0);
+ for (int k = dep_dim; k < dep.num_dim(); k++) {
+ dv.lbounds[k] = -posInfinity;
+ dv.ubounds[k] = posInfinity;
+ }
+ for (int k = 0; k < dep_dim; k++) {
+ if (k != 0) {
+ dv.lbounds[k-1] = 0;
+ dv.ubounds[k-1] = 0;
+ }
+ dv.lbounds[k] = 1;
+ dv.ubounds[k] = posInfinity;
+ dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv);
+ }
+ }
+
+ //fprintf(stderr, "Loop::datacopy_privatized3() cleanup\n");
+ // cleanup
+ delete sym;
+ delete tmp_sym;
+ for (int i = 0; i < index_lb.size(); i++) {
+ index_lb[i]->clear();
+ delete index_lb[i];
+ }
+ for (int i = 0; i < index_sz.size(); i++) {
+ index_sz[i].second->clear();
+ delete index_sz[i].second;
+ }
+
+ return true;
+}
+
+
+