diff options
Diffstat (limited to 'src/transformations/loop_datacopy.cc')
-rw-r--r-- | src/transformations/loop_datacopy.cc | 811 |
1 files changed, 420 insertions, 391 deletions
diff --git a/src/transformations/loop_datacopy.cc b/src/transformations/loop_datacopy.cc index 12d74fd..69fbd5b 100644 --- a/src/transformations/loop_datacopy.cc +++ b/src/transformations/loop_datacopy.cc @@ -27,7 +27,8 @@ using namespace omega; // parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i] // bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, - bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, + int memory_type) { //fprintf(stderr, "Loop::datacopy()\n"); // check for sanity of parameters @@ -40,18 +41,17 @@ bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array throw std::invalid_argument("invalid loop level " + to_string(level)); if (i == 0) { std::vector<int> lex = getLexicalOrder(stmt_num); - same_loop = getStatements(lex, 2*level-2); - } - else if (same_loop.find(stmt_num) == same_loop.end()) + same_loop = getStatements(lex, 2 * level - 2); + } else if (same_loop.find(stmt_num) == same_loop.end()) throw std::invalid_argument("array references for data copy must be located in the same subloop"); } - + // convert array reference numbering scheme to actual array references std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; for (int i = 0; i < array_ref_nums.size(); i++) { if (array_ref_nums[i].second.size() == 0) continue; - + int stmt_num = array_ref_nums[i].first; selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); @@ -61,9 +61,10 @@ bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array if (ref_num < 0 || ref_num >= refs.size()) { for (int k = 0; k < refs.size(); k++) delete refs[k]; - throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + throw std::invalid_argument( + "invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); } - selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected_refs[selected_refs.size() - 1].second.push_back(refs[ref_num]); selected[ref_num] = true; } for (int j = 0; j < refs.size(); j++) @@ -72,9 +73,10 @@ bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array } if (selected_refs.size() == 0) throw std::invalid_argument("found no array references to copy"); - + // do the copy - bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, + fastest_changing_dimension, padding_stride, padding_alignment, memory_type); return whatever; } @@ -84,9 +86,10 @@ bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array // parameter array_name=A means to copy data touched by A[i-1] and A[i] // bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, - bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, + int memory_type) { - fflush(stdout); + fflush(stdout); //fprintf(stderr, "Loop::datacopy2()\n"); //fprintf(stderr, "array name %s stmt num %d\n", array_name.c_str(), stmt_num); @@ -95,23 +98,23 @@ bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); if (level <= 0 || level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("invalid loop level " + to_string(level)); - + // collect array references by name std::vector<int> lex = getLexicalOrder(stmt_num); - int dim = 2*level - 1; - std::set<int> same_loop = getStatements(lex, dim-1); - + int dim = 2 * level - 1; + std::set<int> same_loop = getStatements(lex, dim - 1); + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { std::vector<IR_ArrayRef *> t; - std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); for (int j = 0; j < refs.size(); j++) if (refs[j]->name() == array_name) t.push_back(refs[j]); else delete refs[j]; if (t.size() != 0) - selected_refs.push_back(std::make_pair(*i, t)); + selected_refs.push_back(std::make_pair(*i, t)); } //fprintf(stderr, "selected refs:\n"); @@ -122,27 +125,30 @@ bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, if (selected_refs.size() == 0) throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); - + IR_ArrayRef *AR = selected_refs[0].second[0]; //IR_roseArrayRef *RAR = (IR_roseArrayRef *)AR; //fprintf(stderr, "before datacopy_privatized, "); //AR->Dump(); - + // do the copy //fprintf(stderr, "\nLoop::datacopy2 calling privatized\n"); - bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + bool whatever = datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, + fastest_changing_dimension, padding_stride, padding_alignment, memory_type); //AR = selected_refs[0].second[0]; //fprintf(stderr, "after datacopy_privatized, "); //AR->Dump(); - + return whatever; } -bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, - bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { +bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, + const std::vector<int> &privatized_levels, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, + int padding_alignment, int memory_type) { //fprintf(stderr, "Loop::datacopy_privatized()\n"); // check for sanity of parameters @@ -150,33 +156,37 @@ bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); if (level <= 0 || level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("invalid loop level " + to_string(level)); - + // collect array references by name std::vector<int> lex = getLexicalOrder(stmt_num); - int dim = 2*level - 1; - std::set<int> same_loop = getStatements(lex, dim-1); - + int dim = 2 * level - 1; + std::set<int> same_loop = getStatements(lex, dim - 1); + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { selected_refs.push_back(std::make_pair(*i, std::vector<IR_ArrayRef *>())); - - std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); + + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); for (int j = 0; j < refs.size(); j++) if (refs[j]->name() == array_name) - selected_refs[selected_refs.size()-1].second.push_back(refs[j]); + selected_refs[selected_refs.size() - 1].second.push_back(refs[j]); else delete refs[j]; } if (selected_refs.size() == 0) throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); - + // do the copy - bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, + fastest_changing_dimension, padding_stride, padding_alignment, memory_type); return whatever; } -bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, const std::vector<int> &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, + const std::vector<int> &privatized_levels, bool allow_extra_read, + int fastest_changing_dimension, int padding_stride, int padding_alignment, + int memory_type) { //fprintf(stderr, "Loop::datacopy_privatized2()\n"); // check for sanity of parameters @@ -189,18 +199,17 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> throw std::invalid_argument("invalid loop level " + to_string(level)); if (i == 0) { std::vector<int> lex = getLexicalOrder(stmt_num); - same_loop = getStatements(lex, 2*level-2); - } - else if (same_loop.find(stmt_num) == same_loop.end()) + same_loop = getStatements(lex, 2 * level - 2); + } else if (same_loop.find(stmt_num) == same_loop.end()) throw std::invalid_argument("array references for data copy must be located in the same subloop"); } - + // convert array reference numbering scheme to actual array references std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; for (int i = 0; i < array_ref_nums.size(); i++) { if (array_ref_nums[i].second.size() == 0) continue; - + int stmt_num = array_ref_nums[i].first; selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); @@ -210,9 +219,10 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> if (ref_num < 0 || ref_num >= refs.size()) { for (int k = 0; k < refs.size(); k++) delete refs[k]; - throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + throw std::invalid_argument( + "invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); } - selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected_refs[selected_refs.size() - 1].second.push_back(refs[ref_num]); selected[ref_num] = true; } for (int j = 0; j < refs.size(); j++) @@ -221,10 +231,11 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> } if (selected_refs.size() == 0) throw std::invalid_argument("found no array references to copy"); - + // do the copy - bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); - return whatever; + bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, + fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + return whatever; } @@ -232,13 +243,13 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> // Implement low level datacopy function with lots of options. // -bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, const std::vector<int> &privatized_levels, - bool allow_extra_read, + bool allow_extra_read, int fastest_changing_dimension, - int padding_stride, - int padding_alignment, + int padding_stride, + int padding_alignment, int memory_type) { //fprintf(stderr, "\nLoop::datacopy_privatized3() *****\n"); @@ -247,7 +258,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A if (stmt_refs.size() == 0) return true; - + // check for sanity of parameters IR_ArraySymbol *sym = NULL; std::vector<int> lex; @@ -258,8 +269,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A if (i == 0) { if (privatized_levels[i] < level) throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level)); - } - else if (privatized_levels[i] <= privatized_levels[i-1]) + } else if (privatized_levels[i] <= privatized_levels[i - 1]) throw std::invalid_argument("privatized loop levels must be in ascending order"); } for (int i = 0; i < stmt_refs.size(); i++) { @@ -268,10 +278,11 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); if (privatized_levels.size() != 0) { - if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) - throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); - } - else { + if (privatized_levels[privatized_levels.size() - 1] > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument( + "invalid loop level " + to_string(privatized_levels[privatized_levels.size() - 1]) + " for statement " + + to_string(stmt_num)); + } else { if (level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); } @@ -279,8 +290,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A if (sym == NULL) { sym = stmt_refs[i].second[j]->symbol(); lex = getLexicalOrder(stmt_num); - } - else { + } else { IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); if (t->name() != sym->name()) { delete t; @@ -293,8 +303,10 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A } //fprintf(stderr, "sym %p\n", sym); - if (!sym) { - fprintf(stderr, "sym NULL, gonna die\n"); int *i=0; int j=i[0]; + if (!sym) { + fprintf(stderr, "sym NULL, gonna die\n"); + int *i = 0; + int j = i[0]; } if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim())) @@ -303,31 +315,31 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A throw std::invalid_argument("invalid temporary array stride requirement"); if (padding_alignment == -1 || padding_alignment == 0) throw std::invalid_argument("invalid temporary array alignment requirement"); - - int dim = 2*level - 1; + + int dim = 2 * level - 1; int n_dim = sym->n_dim(); - + if (fastest_changing_dimension == -1) switch (sym->layout_type()) { - case IR_ARRAY_LAYOUT_ROW_MAJOR: - fastest_changing_dimension = n_dim - 1; - break; - case IR_ARRAY_LAYOUT_COLUMN_MAJOR: - fastest_changing_dimension = 0; - break; - default: - throw loop_error("unsupported array layout"); + case IR_ARRAY_LAYOUT_ROW_MAJOR: + fastest_changing_dimension = n_dim - 1; + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + fastest_changing_dimension = 0; + break; + default: + throw loop_error("unsupported array layout"); } // OK, parameter sanity checked - + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + // build iteration spaces for all reads and for all writes separately //fprintf(stderr, "dp3: before apply_xform() ARRAY REFS\n"); //for (int i = 0; i < stmt_refs.size(); i++) { @@ -360,29 +372,30 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A // fprintf(stderr, "\n"); //} - + bool has_write_refs = false; bool has_read_refs = false; - Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); - Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + Relation wo_copy_is = Relation::False(level - 1 + privatized_levels.size() + n_dim); + Relation ro_copy_is = Relation::False(level - 1 + privatized_levels.size() + n_dim); //fprintf(stderr, "\n\ni range: 0-%d\n", -1 + stmt_refs.size()); int stmt_num = stmt_refs[0].first; for (int i = 0; i < stmt_refs.size(); i++) { int stmt_num = stmt_refs[i].first; - + //fprintf(stderr, "j range: 0-%d\n", -1 + stmt_refs[i].second.size()); for (int j = 0; j < stmt_refs[i].second.size(); j++) { //fprintf(stderr, "ij %d %d\n", i, j); - Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim); + Relation mapping(stmt[stmt_num].IS.n_set(), level - 1 + privatized_levels.size() + n_dim); for (int k = 1; k <= mapping.n_inp(); k++) mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name()); mapping.setup_names(); - mapping.print(); fflush(stdout); // "{[I] -> [_t1] : I = _t1 } + mapping.print(); + fflush(stdout); // "{[I] -> [_t1] : I = _t1 } F_And *f_root = mapping.add_and(); - for (int k = 1; k <= level-1; k++) { + for (int k = 1; k <= level - 1; k++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.input_var(k), 1); h.update_coef(mapping.output_var(k), -1); @@ -390,7 +403,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A for (int k = 0; k < privatized_levels.size(); k++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.input_var(privatized_levels[k]), 1); - h.update_coef(mapping.output_var(level+k), -1); + h.update_coef(mapping.output_var(level + k), -1); } for (int k = 0; k < n_dim; k++) { IR_ArrayRef *AR = stmt_refs[i].second[j]; @@ -400,37 +413,39 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); //fprintf(stderr, "k %d j %d repr ", k, j); repr->dump(); fflush(stdout); - exp2formula(ir, - mapping, - f_root, - freevar, - repr, - mapping.output_var(level-1+privatized_levels.size()+k+1), - 'w', - IR_COND_EQ, + exp2formula(ir, + mapping, + f_root, + freevar, + repr, + mapping.output_var(level - 1 + privatized_levels.size() + k + 1), + 'w', + IR_COND_EQ, false, uninterpreted_symbols[stmt_num], uninterpreted_symbols_stringrepr[stmt_num]); repr->clear(); delete repr; } - Relation r = omega::Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); + Relation r = omega::Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), + Extend_Set(copy(this->known), + stmt[stmt_num].IS.n_set() - + this->known.n_set())))); if (stmt_refs[i].second[j]->is_write()) { has_write_refs = true; wo_copy_is = Union(wo_copy_is, r); wo_copy_is.simplify(2, 4); - - - } - else { + + + } else { has_read_refs = true; ro_copy_is = Union(ro_copy_is, r); ro_copy_is.simplify(2, 4); - + } } } - + //fprintf(stderr, "dp3: simplify\n"); // simplify read and write footprint iteration space { @@ -438,7 +453,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A ro_copy_is = SimpleHull(ro_copy_is, true, true); else ro_copy_is = ConvexRepresentation(ro_copy_is); - + wo_copy_is = ConvexRepresentation(wo_copy_is); if (wo_copy_is.number_of_conjuncts() > 1) { Relation t = SimpleHull(wo_copy_is, true, true); @@ -448,7 +463,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A wo_copy_is = ro_copy_is; } } - + // make copy statement variable names match the ones in the original statements which // already have the same names due to apply_xform { @@ -463,11 +478,12 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A } for (int i = 0; i < privatized_levels.size(); i++) { std::string s = stmt[ref_stmt].IS.input_var(privatized_levels[i])->name(); - wo_copy_is.name_set_var(level+i, s); - ro_copy_is.name_set_var(level+i, s); + wo_copy_is.name_set_var(level + i, s); + ro_copy_is.name_set_var(level + i, s); } - for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) { - std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size()); + for (int i = level + privatized_levels.size(); i < level + privatized_levels.size() + n_dim; i++) { + std::string s = + tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter + i - level - privatized_levels.size()); wo_copy_is.name_set_var(i, s); ro_copy_is.name_set_var(i, s); } @@ -475,11 +491,11 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A wo_copy_is.setup_names(); ro_copy_is.setup_names(); } - + //fprintf(stderr, "\ndp3: build merged\n"); // build merged footprint iteration space for calculating temporary array size Relation copy_is = SimpleHull(Union(copy(ro_copy_is), copy(wo_copy_is)), true, true); - + // extract temporary array information CG_outputBuilder *ocg = ir->builder(); std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL @@ -487,31 +503,35 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A std::vector<bool> is_index_eq(n_dim, false); std::vector<std::pair<int, CG_outputRepr *> > index_sz(0); Relation reduced_copy_is = copy(copy_is); - + for (int i = 0; i < n_dim; i++) { //fprintf(stderr, "i %d/%d\n", i, n_dim); if (i != 0) - reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); - Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); - + reduced_copy_is = Project(reduced_copy_is, level - 1 + privatized_levels.size() + i, Set_Var); + Relation bound = get_loop_bound(reduced_copy_is, level - 1 + privatized_levels.size() + i); + //fprintf(stderr, "dp3: extract stride\n"); // extract stride - std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, bound.set_var(level-1+privatized_levels.size()+i+1)); + std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, bound.set_var( + level - 1 + privatized_levels.size() + i + 1)); if (result.second != NULL) - index_stride[i] = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)))); + index_stride[i] = abs(result.first.get_coef(result.second)) / gcd(abs(result.first.get_coef(result.second)), + abs(result.first.get_coef(bound.set_var( + level - 1 + privatized_levels.size() + i + + 1)))); else index_stride[i] = 1; //fprintf(stderr, "dp3: index_stride[%d] = %d\n", i, index_stride[i]); - + // check if this array index requires loop Conjunct *c = bound.query_DNF()->single_conjunct(); for (EQ_Iterator ei(c->EQs()); ei; ei++) { //fprintf(stderr, "dp3: for\n"); if ((*ei).has_wildcards()) continue; - + //fprintf(stderr, "dp3: no wildcards\n"); - int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + int coef = (*ei).get_coef(bound.set_var(level - 1 + privatized_levels.size() + i + 1)); if (coef != 0) { //fprintf(stderr, "coef != 0\n"); int sign = 1; @@ -520,51 +540,53 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A coef = -coef; sign = -1; } - + CG_outputRepr *op = NULL; for (Constr_Vars_Iter ci(*ei); ci; ci++) { //fprintf(stderr, "dp3: ci\n"); switch ((*ci).var->kind()) { - case Input_Var: - { - //fprintf(stderr, "dp3: Input_Var\n"); - if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) { - //fprintf(stderr, "dp3: IF sign %d\n",(*ci).coef*sign); - - if ((*ci).coef*sign == 1) - op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); - else if ((*ci).coef*sign == -1) - op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); - else if ((*ci).coef*sign > 1) - op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + case Input_Var: { + //fprintf(stderr, "dp3: Input_Var\n"); + if ((*ci).var != bound.set_var(level - 1 + privatized_levels.size() + i + 1)) { + //fprintf(stderr, "dp3: IF sign %d\n",(*ci).coef*sign); + + if ((*ci).coef * sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef * sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef * sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), + ocg->CreateIdent((*ci).var->name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), + ocg->CreateIdent((*ci).var->name()))); + } + break; + } + case Global_Var: { + //fprintf(stderr, "dp3: Global_Var\n"); + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef * sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef * sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef * sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), + ocg->CreateIdent(g->base_name()))); else // (*ci).coef*sign < -1 - op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), + ocg->CreateIdent(g->base_name()))); + break; } - break; - } - case Global_Var: - { - //fprintf(stderr, "dp3: Global_Var\n"); - Global_Var_ID g = (*ci).var->get_global_var(); - if ((*ci).coef*sign == 1) - op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef*sign == -1) - op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef*sign > 1) - op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); - else // (*ci).coef*sign < -1 - op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); - break; - } - default: - throw loop_error("unsupported array index expression"); + default: + throw loop_error("unsupported array index expression"); } } if ((*ei).get_const() != 0) - op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); + op = ocg->CreatePlus(op, ocg->CreateInt(-sign * ((*ei).get_const()))); if (coef != 1) op = ocg->CreateIntegerFloor(op, ocg->CreateInt(coef)); - + index_lb[i] = op; is_index_eq[i] = true; break; @@ -572,14 +594,14 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A } if (is_index_eq[i]) continue; - + //fprintf(stderr, "dp3: separate lower and upper bounds\n"); // separate lower and upper bounds std::vector<GEQ_Handle> lb_list, ub_list; std::set<Variable_ID> excluded_floor_vars; - excluded_floor_vars.insert(bound.set_var(level-1+privatized_levels.size()+i+1)); + excluded_floor_vars.insert(bound.set_var(level - 1 + privatized_levels.size() + i + 1)); for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { - int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + int coef = (*gi).get_coef(bound.set_var(level - 1 + privatized_levels.size() + i + 1)); if (coef != 0 && (*gi).has_wildcards()) { bool clean_bound = true; GEQ_Handle h; @@ -591,7 +613,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A if (!clean_bound) continue; } - + if (coef > 0) lb_list.push_back(*gi); else if (coef < 0) @@ -599,41 +621,45 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A } if (lb_list.size() == 0 || ub_list.size() == 0) throw loop_error("failed to calcuate array footprint size"); - + //fprintf(stderr, "dp3: build lower bound representation\n"); // build lower bound representation std::vector<CG_outputRepr *> lb_repr_list; - for (int j = 0; j < lb_list.size(); j++){ - if(this->known.n_set() == 0) { - lb_repr_list.push_back(output_lower_bound_repr(ocg, - lb_list[j], - bound.set_var(level-1+privatized_levels.size()+i+1), - result.first, - result.second, - bound, - Relation::True(bound.n_set()), - std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)), + for (int j = 0; j < lb_list.size(); j++) { + if (this->known.n_set() == 0) { + lb_repr_list.push_back(output_lower_bound_repr(ocg, + lb_list[j], + bound.set_var(level - 1 + privatized_levels.size() + i + 1), + result.first, + result.second, + bound, + Relation::True(bound.n_set()), + std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0)), uninterpreted_symbols[stmt_num])); - } - else { - lb_repr_list.push_back(output_lower_bound_repr(ocg, - lb_list[j], - bound.set_var(level-1+privatized_levels.size()+i+1), - result.first, - result.second, - bound, - this->known, - std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)), + } else { + lb_repr_list.push_back(output_lower_bound_repr(ocg, + lb_list[j], + bound.set_var(level - 1 + privatized_levels.size() + i + 1), + result.first, + result.second, + bound, + this->known, + std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0)), uninterpreted_symbols[stmt_num])); } } - if (lb_repr_list.size() > 1) { + if (lb_repr_list.size() > 1) { //fprintf(stderr, "loop_datacopy.cc dp3 createInvoke( max )\n"); index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); - } - else if (lb_repr_list.size() == 1) + } else if (lb_repr_list.size() == 1) index_lb[i] = lb_repr_list[0]; - + //fprintf(stderr, "dp3: build temporary array size representation\n"); // build temporary array size representation { @@ -642,66 +668,62 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A for (int j = 0; j < ub_list.size(); j++) for (int k = 0; k < lb_list.size(); k++) { GEQ_Handle h = f_root->add_GEQ(); - + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: - { - int pos = (*ci).var->get_position(); - h.update_coef(cal.input_var(pos), (*ci).coef); - break; - } - case Global_Var: - { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = cal.get_local(g); - else - v = cal.get_local(g, (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot calculate temporay array size statically"); + case Input_Var: { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); } } h.update_const(ub_list[j].get_const()); - + for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: - { - int pos = (*ci).var->get_position(); - h.update_coef(cal.input_var(pos), (*ci).coef); - break; - } - case Global_Var: - { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = cal.get_local(g); - else - v = cal.get_local(g, (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot calculate temporay array size statically"); + case Input_Var: { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); } } h.update_const(lb_list[k].get_const()); - + h.update_const(1); h.update_coef(cal.output_var(1), -1); } - + cal = Restrict_Domain(cal, copy(copy_is)); for (int j = 1; j <= cal.n_inp(); j++) cal = Project(cal, j, Input_Var); cal.simplify(); - + //fprintf(stderr, "dp3: pad temporary array size\n"); // pad temporary array size // TODO: for variable array size, create padding formula @@ -719,9 +741,8 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A if (padding_alignment > 1) { // align to boundary for data packing int residue = size % padding_alignment; if (residue) - size = size+padding_alignment-residue; - } - else if (padding_alignment < -1) { // un-alignment for memory bank conflicts + size = size + padding_alignment - residue; + } else if (padding_alignment < -1) { // un-alignment for memory bank conflicts while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1) size++; } @@ -729,7 +750,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A index_sz.push_back(std::make_pair(i, ocg->CreateInt(size))); is_index_bound_const = true; } - + if (!is_index_bound_const) { for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { int coef = (*gi).get_coef(cal.output_var(1)); @@ -737,22 +758,23 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A CG_outputRepr *op = NULL; for (Constr_Vars_Iter ci(*gi); ci; ci++) { if ((*ci).var != cal.output_var(1)) { - switch((*ci).var->kind()) { - case Global_Var: - { - Global_Var_ID g = (*ci).var->get_global_var(); - if ((*ci).coef == 1) - op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef == -1) - op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef > 1) - op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); - else // (*ci).coef < -1 - op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); - break; - } - default: - throw loop_error("failed to generate array index bound code"); + switch ((*ci).var->kind()) { + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef == 1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef == -1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef > 1) + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), + ocg->CreateIdent(g->base_name()))); + else // (*ci).coef < -1 + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), + ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("failed to generate array index bound code"); } } } @@ -766,16 +788,16 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); coef_t t1 = index_stride[i] / g; if (t1 != 1) - op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1 - 1)), ocg->CreateInt(t1)); coef_t t2 = padding_stride / g; if (t2 != 1) op = ocg->CreateTimes(op, ocg->CreateInt(t2)); - } - else if (index_stride[i] != 1) { - op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); + } else if (index_stride[i] != 1) { + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i] - 1)), + ocg->CreateInt(index_stride[i])); } } - + index_sz.push_back(std::make_pair(i, op)); break; } @@ -783,20 +805,20 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A } } } - + //fprintf(stderr, "dp3: change the temporary array index order\n"); // change the temporary array index order for (int i = 0; i < index_sz.size(); i++) { if (index_sz[i].first == fastest_changing_dimension) switch (sym->layout_type()) { - case IR_ARRAY_LAYOUT_ROW_MAJOR: - std::swap(index_sz[index_sz.size()-1], index_sz[i]); - break; - case IR_ARRAY_LAYOUT_COLUMN_MAJOR: - std::swap(index_sz[0], index_sz[i]); - break; - default: - throw loop_error("unsupported array layout"); + case IR_ARRAY_LAYOUT_ROW_MAJOR: + std::swap(index_sz[index_sz.size() - 1], index_sz[i]); + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + std::swap(index_sz[0], index_sz[i]); + break; + default: + throw loop_error("unsupported array layout"); } } @@ -806,51 +828,53 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A if (index_sz.size() == 0) { //fprintf(stderr, "tmp_sym is a scalar\n"); tmp_sym = ir->CreateScalarSymbol(sym, memory_type); - } - else { + } else { //fprintf(stderr, "tmp_sym is an array\n"); std::vector<CG_outputRepr *> tmp_array_size(index_sz.size()); - for (int i = 0; i < index_sz.size(); i++) { + for (int i = 0; i < index_sz.size(); i++) { tmp_array_size[i] = index_sz[i].second->clone(); index_sz[i].second->dump(); // THIS PRINTF } tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); } - + //fprintf(stderr, "dp3: create temporary array read initialization code\n"); // create temporary array read initialization code CG_outputRepr *copy_code_read; - if (has_read_refs) { + if (has_read_refs) { //fprintf(stderr, "has read refs\n"); if (index_sz.size() == 0) { - //fprintf(stderr, "if\n"); - + //fprintf(stderr, "if\n"); + //fprintf(stderr, "tmp sym %s\n", tmp_sym->name().c_str()); - IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); // create ref from symbol + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef( + static_cast<IR_ScalarSymbol *>(tmp_sym)); // create ref from symbol // tmp_scalar_ref is incomplete std::vector<CG_outputRepr *> rhs_index(n_dim); - for (int i = 0; i < index_lb.size(); i++) { + for (int i = 0; i < index_lb.size(); i++) { //fprintf(stderr, "i %d\n", i); if (is_index_eq[i]) rhs_index[i] = index_lb[i]->clone(); else - rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + rhs_index[i] = ir->builder()->CreateIdent( + copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name()); } IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); - + // IR_ScalarRef tmp_scalar_ref has no actual reference yet. It only has the variable definition. copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); //fprintf(stderr, "if ends\n"); - } - else { + } else { //fprintf(stderr, "else\n"); std::vector<CG_outputRepr *> lhs_index(index_sz.size()); for (int i = 0; i < index_sz.size(); i++) { int cur_index_num = index_sz[i].first; - CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + CG_outputRepr *cur_index_repr = ocg->CreateMinus( + ocg->CreateIdent(copy_is.set_var(level - 1 + privatized_levels.size() + cur_index_num + 1)->name()), + index_lb[cur_index_num]->clone()); if (padding_stride != 0) { - if (i == n_dim-1) { + if (i == n_dim - 1) { coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); coef_t t1 = index_stride[cur_index_num] / g; if (t1 != 1) @@ -858,74 +882,78 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A coef_t t2 = padding_stride / g; if (t2 != 1) cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); - } - else if (index_stride[cur_index_num] != 1) { + } else if (index_stride[cur_index_num] != 1) { cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); } } - + if (ir->ArrayIndexStartAt() != 0) cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); lhs_index[i] = cur_index_repr; } - + //fprintf(stderr, "dp3: making tmp_array_ref\n"); IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index); //fprintf(stderr, "dp3: DONE making tmp_array_ref\n"); - + std::vector<CG_outputRepr *> rhs_index(n_dim); for (int i = 0; i < index_lb.size(); i++) if (is_index_eq[i]) rhs_index[i] = index_lb[i]->clone(); else - rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + rhs_index[i] = ir->builder()->CreateIdent( + copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name()); IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); - + //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment\n"); //copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); CG_outputRepr *lhs = tmp_array_ref->convert(); CG_outputRepr *rhs = copied_array_ref->convert(); - copy_code_read = ir->builder()->CreateAssignment(0, lhs, rhs); //tmp_array_ref->convert(), copied_array_ref->convert()); + copy_code_read = ir->builder()->CreateAssignment(0, lhs, + rhs); //tmp_array_ref->convert(), copied_array_ref->convert()); //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment DONE\n\n"); } } // has read refs - + //fprintf(stderr, "dp3: create temporary array write back code\n"); // create temporary array write back code CG_outputRepr *copy_code_write; - if (has_write_refs) { + if (has_write_refs) { //fprintf(stderr, "has_write_refs\n"); if (index_sz.size() == 0) { //fprintf(stderr, "index_sz.size() == 0\n"); IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); - + std::vector<CG_outputRepr *> rhs_index(n_dim); for (int i = 0; i < index_lb.size(); i++) if (is_index_eq[i]) rhs_index[i] = index_lb[i]->clone(); else - rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + rhs_index[i] = ir->builder()->CreateIdent( + copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name()); IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); - + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); - } - else { + } else { //fprintf(stderr, "index_sz.size() NOT = 0\n"); - + std::vector<CG_outputRepr *> lhs_index(n_dim); for (int i = 0; i < index_lb.size(); i++) if (is_index_eq[i]) lhs_index[i] = index_lb[i]->clone(); else - lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + lhs_index[i] = ir->builder()->CreateIdent( + copy_is.set_var(level - 1 + privatized_levels.size() + i + 1)->name()); IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); - + std::vector<CG_outputRepr *> rhs_index(index_sz.size()); for (int i = 0; i < index_sz.size(); i++) { int cur_index_num = index_sz[i].first; - CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + CG_outputRepr *cur_index_repr = ocg->CreateMinus( + ocg->CreateIdent(copy_is.set_var(level - 1 + privatized_levels.size() + cur_index_num + 1)->name()), + index_lb[cur_index_num]->clone()); if (padding_stride != 0) { - if (i == n_dim-1) { + if (i == n_dim - 1) { coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); coef_t t1 = index_stride[cur_index_num] / g; if (t1 != 1) @@ -933,96 +961,98 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A coef_t t2 = padding_stride / g; if (t2 != 1) cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); - } - else if (index_stride[cur_index_num] != 1) { + } else if (index_stride[cur_index_num] != 1) { cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); } } - + if (ir->ArrayIndexStartAt() != 0) cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); rhs_index[i] = cur_index_repr; } IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index); - + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); } } // has write refs - + // now we can remove those loops for array indexes that are // dependent on others //fprintf(stderr, "dp3: now we can remove those loops\n"); if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { - Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); + Relation mapping(level - 1 + privatized_levels.size() + n_dim, + level - 1 + privatized_levels.size() + index_sz.size()); F_And *f_root = mapping.add_and(); - for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + for (int i = 1; i <= level - 1 + privatized_levels.size(); i++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(mapping.input_var(i), 1); h.update_coef(mapping.output_var(i), -1); } - + int cur_index = 0; std::vector<int> mapped_index(index_sz.size()); for (int i = 0; i < n_dim; i++) if (!is_index_eq[i]) { EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); + h.update_coef(mapping.input_var(level - 1 + privatized_levels.size() + i + 1), 1); switch (sym->layout_type()) { - case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { - h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); - mapped_index[index_sz.size()-cur_index-1] = i; - break; - } - case IR_ARRAY_LAYOUT_ROW_MAJOR: { - h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); - mapped_index[cur_index] = i; - break; - } - default: - throw loop_error("unsupported array layout"); + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { + h.update_coef(mapping.output_var(level - 1 + privatized_levels.size() + index_sz.size() - cur_index), -1); + mapped_index[index_sz.size() - cur_index - 1] = i; + break; + } + case IR_ARRAY_LAYOUT_ROW_MAJOR: { + h.update_coef(mapping.output_var(level - 1 + privatized_levels.size() + cur_index + 1), -1); + mapped_index[cur_index] = i; + break; + } + default: + throw loop_error("unsupported array layout"); } cur_index++; } - + wo_copy_is = omega::Range(Restrict_Domain(copy(mapping), wo_copy_is)); ro_copy_is = omega::Range(Restrict_Domain(copy(mapping), ro_copy_is)); - for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + for (int i = 1; i <= level - 1 + privatized_levels.size(); i++) { wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); } for (int i = 0; i < index_sz.size(); i++) { - wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); - ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + wo_copy_is.name_set_var(level - 1 + privatized_levels.size() + i + 1, + copy_is.set_var(level - 1 + privatized_levels.size() + mapped_index[i] + 1)->name()); + ro_copy_is.name_set_var(level - 1 + privatized_levels.size() + i + 1, + copy_is.set_var(level - 1 + privatized_levels.size() + mapped_index[i] + 1)->name()); } wo_copy_is.setup_names(); ro_copy_is.setup_names(); } - + // insert read copy statement //fprintf(stderr, "dp3: insert read copy statement\n"); - + int old_num_stmt = stmt.size(); int ro_copy_stmt_num = -1; if (has_read_refs) { - Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); + Relation copy_xform(ro_copy_is.n_set(), 2 * ro_copy_is.n_set() + 1); { F_And *f_root = copy_xform.add_and(); for (int i = 1; i <= ro_copy_is.n_set(); i++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(copy_xform.input_var(i), 1); - h.update_coef(copy_xform.output_var(2*i), -1); + h.update_coef(copy_xform.output_var(2 * i), -1); } - for (int i = 1; i <= dim; i+=2) { + for (int i = 1; i <= dim; i += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(copy_xform.output_var(i), -1); - h.update_const(lex[i-1]); + h.update_const(lex[i - 1]); } - for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + for (int i = dim + 2; i <= copy_xform.n_out(); i += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(copy_xform.output_var(i), 1); } } - + Statement copy_stmt_read; copy_stmt_read.IS = ro_copy_is; copy_stmt_read.xform = copy_xform; @@ -1031,7 +1061,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set()); copy_stmt_read.ir_stmt_node = NULL; copy_stmt_read.has_inspector = false; - for (int i = 0; i < level-1; i++) { + for (int i = 0; i < level - 1; i++) { copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && stmt[*(active.begin())].loop_level[i].payload >= level) { @@ -1043,32 +1073,33 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A copy_stmt_read.loop_level[i].payload = -1; else copy_stmt_read.loop_level[i].payload = level + j; - } - else + } else copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; } for (int i = 0; i < privatized_levels.size(); i++) { - copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; - copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; - copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + copy_stmt_read.loop_level[level - 1 + i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_read.loop_level[level - 1 + + i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_read.loop_level[level - 1 + + i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; } int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); for (int i = 0; i < std::min(left_num_dim, static_cast<int>(index_sz.size())); i++) { - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelOriginal; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].payload = num_dep_dim - left_num_dim + i; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0; } for (int i = std::min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelUnknown; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].payload = -1; + copy_stmt_read.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0; } - - - shiftLexicalOrder(lex, dim-1, 1); - fprintf(stderr, "loop_datacopy.cc L1071 adding stmt %d\n", stmt.size()); + + shiftLexicalOrder(lex, dim - 1, 1); + + fprintf(stderr, "loop_datacopy.cc L1071 adding stmt %d\n", stmt.size()); stmt.push_back(copy_stmt_read); uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]); @@ -1076,30 +1107,30 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A ro_copy_stmt_num = stmt.size() - 1; dep.insert(); } - + //fprintf(stderr, "dp3: insert write copy statement\n"); // insert write copy statement int wo_copy_stmt_num = -1; if (has_write_refs) { - Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1); + Relation copy_xform(wo_copy_is.n_set(), 2 * wo_copy_is.n_set() + 1); { F_And *f_root = copy_xform.add_and(); for (int i = 1; i <= wo_copy_is.n_set(); i++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(copy_xform.input_var(i), 1); - h.update_coef(copy_xform.output_var(2*i), -1); + h.update_coef(copy_xform.output_var(2 * i), -1); } - for (int i = 1; i <= dim; i+=2) { + for (int i = 1; i <= dim; i += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(copy_xform.output_var(i), -1); - h.update_const(lex[i-1]); + h.update_const(lex[i - 1]); } - for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + for (int i = dim + 2; i <= copy_xform.n_out(); i += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(copy_xform.output_var(i), 1); } } - + Statement copy_stmt_write; copy_stmt_write.IS = wo_copy_is; copy_stmt_write.xform = copy_xform; @@ -1107,8 +1138,8 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set()); copy_stmt_write.ir_stmt_node = NULL; copy_stmt_write.has_inspector = false; - - for (int i = 0; i < level-1; i++) { + + for (int i = 0; i < level - 1; i++) { copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && stmt[*(active.begin())].loop_level[i].payload >= level) { @@ -1120,31 +1151,32 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A copy_stmt_write.loop_level[i].payload = -1; else copy_stmt_write.loop_level[i].payload = level + j; - } - else + } else copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; } for (int i = 0; i < privatized_levels.size(); i++) { - copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; - copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; - copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + copy_stmt_write.loop_level[level - 1 + i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_write.loop_level[level - 1 + + i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_write.loop_level[level - 1 + + i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; } int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); for (int i = 0; i < std::min(left_num_dim, static_cast<int>(index_sz.size())); i++) { - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelOriginal; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].payload = num_dep_dim - left_num_dim + i; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0; } for (int i = std::min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].type = LoopLevelUnknown; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].payload = -1; + copy_stmt_write.loop_level[level - 1 + privatized_levels.size() + i].parallel_level = 0; } - lex[dim-1]++; - shiftLexicalOrder(lex, dim-1, -2); + lex[dim - 1]++; + shiftLexicalOrder(lex, dim - 1, -2); - fprintf(stderr, "loop_datacopy.cc L1147 adding stmt %d\n", stmt.size()); + fprintf(stderr, "loop_datacopy.cc L1147 adding stmt %d\n", stmt.size()); stmt.push_back(copy_stmt_write); uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]); @@ -1152,24 +1184,24 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A wo_copy_stmt_num = stmt.size() - 1; dep.insert(); } - + //fprintf(stderr, "replace original array accesses with temporary array accesses\n"); // replace original array accesses with temporary array accesses - for (int i =0; i < stmt_refs.size(); i++) + for (int i = 0; i < stmt_refs.size(); i++) for (int j = 0; j < stmt_refs[i].second.size(); j++) { if (index_sz.size() == 0) { IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); //fprintf(stderr, "dp3: loop_datacopy.cc calling ReplaceExpression i%d j%d\n", i, j); ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); - } - else { + } else { std::vector<CG_outputRepr *> index_repr(index_sz.size()); for (int k = 0; k < index_sz.size(); k++) { int cur_index_num = index_sz[k].first; - - CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); + + CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), + index_lb[cur_index_num]->clone()); if (padding_stride != 0) { - if (k == n_dim-1) { + if (k == n_dim - 1) { coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); coef_t t1 = index_stride[cur_index_num] / g; if (t1 != 1) @@ -1177,23 +1209,22 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A coef_t t2 = padding_stride / g; if (t2 != 1) cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); - } - else if (index_stride[cur_index_num] != 1) { + } else if (index_stride[cur_index_num] != 1) { cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); } } - + if (ir->ArrayIndexStartAt() != 0) cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); index_repr[k] = cur_index_repr; } - + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr); //fprintf(stderr, "loop_datacopy.cc ir->ReplaceExpression( ... )\n"); ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); } } - + // update dependence graph //fprintf(stderr, "update dependence graph\n"); @@ -1201,7 +1232,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A if (ro_copy_stmt_num != -1) { for (int i = 0; i < old_num_stmt; i++) { std::vector<std::vector<DependenceVector> > D; - + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { if (active.find(i) != active.end() && active.find(j->first) == active.end()) { std::vector<DependenceVector> dvs1, dvs2; @@ -1215,8 +1246,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A j->second = dvs2; if (dvs1.size() > 0) dep.connect(ro_copy_stmt_num, j->first, dvs1); - } - else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + } else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { std::vector<DependenceVector> dvs1, dvs2; for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; @@ -1229,17 +1259,17 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A if (dvs1.size() > 0) D.push_back(dvs1); } - + if (j->second.size() == 0) dep.vertex[i].second.erase(j++); else j++; } - + for (int j = 0; j < D.size(); j++) dep.connect(i, ro_copy_stmt_num, D[j]); } - + // insert dependences from copy statement loop to copied statements //fprintf(stderr, "insert dependences from copy statement loop to copied statements\n"); @@ -1255,11 +1285,11 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) dep.connect(ro_copy_stmt_num, *i, dv); } - + if (wo_copy_stmt_num != -1) { for (int i = 0; i < old_num_stmt; i++) { std::vector<std::vector<DependenceVector> > D; - + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { if (active.find(i) != active.end() && active.find(j->first) == active.end()) { std::vector<DependenceVector> dvs1, dvs2; @@ -1273,8 +1303,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A j->second = dvs2; if (dvs1.size() > 0) dep.connect(wo_copy_stmt_num, j->first, dvs1); - } - else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + } else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { std::vector<DependenceVector> dvs1, dvs2; for (int k = 0; k < j->second.size(); k++) { DependenceVector dv = j->second[k]; @@ -1287,17 +1316,17 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A if (dvs1.size() > 0) D.push_back(dvs1); } - + if (j->second.size() == 0) dep.vertex[i].second.erase(j++); else j++; } - + for (int j = 0; j < D.size(); j++) dep.connect(i, wo_copy_stmt_num, D[j]); } - + // insert dependences from copied statements to write statements //fprintf(stderr, "dp3: insert dependences from copied statements to write statements\n"); @@ -1312,9 +1341,9 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A } for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) dep.connect(*i, wo_copy_stmt_num, dv); - + } - + // update variable name for dependences among copied statements for (int i = 0; i < old_num_stmt; i++) { if (active.find(i) != active.end()) @@ -1325,7 +1354,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A j->second[k].sym = s; } } - + // insert anti-dependence from write statement to read statement if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) if (dep_dim >= 0) { @@ -1340,15 +1369,15 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A } for (int k = 0; k < dep_dim; k++) { if (k != 0) { - dv.lbounds[k-1] = 0; - dv.ubounds[k-1] = 0; + dv.lbounds[k - 1] = 0; + dv.ubounds[k - 1] = 0; } dv.lbounds[k] = 1; dv.ubounds[k] = posInfinity; dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv); } } - + //fprintf(stderr, "Loop::datacopy_privatized3() cleanup\n"); // cleanup delete sym; @@ -1361,7 +1390,7 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_A index_sz[i].second->clear(); delete index_sz[i].second; } - + return true; } |