From ab016596602a4c6bdc27adf01c308b325af221f0 Mon Sep 17 00:00:00 2001 From: Tuowen Zhao Date: Wed, 21 Sep 2016 22:35:47 -0600 Subject: something that only builds ... --- src/loop_datacopy.cc | 282 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 251 insertions(+), 31 deletions(-) (limited to 'src/loop_datacopy.cc') diff --git a/src/loop_datacopy.cc b/src/loop_datacopy.cc index 1ccd444..12d74fd 100644 --- a/src/loop_datacopy.cc +++ b/src/loop_datacopy.cc @@ -21,8 +21,15 @@ using namespace omega; +// +// data copy function by referring arrays by numbers. +// e.g. A[i] = A[i-1] + B[i] +// parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i] +// bool Loop::datacopy(const std::vector > > &array_ref_nums, int level, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + //fprintf(stderr, "Loop::datacopy()\n"); + // check for sanity of parameters std::set same_loop; for (int i = 0; i < array_ref_nums.size(); i++) { @@ -67,11 +74,22 @@ bool Loop::datacopy(const std::vector > > &array throw std::invalid_argument("found no array references to copy"); // do the copy - return datacopy_privatized(selected_refs, level, std::vector(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + bool whatever = datacopy_privatized(selected_refs, level, std::vector(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + return whatever; } +// +// data copy function by referring arrays by name. +// e.g. A[i] = A[i-1] + B[i] +// parameter array_name=A means to copy data touched by A[i-1] and A[i] +// bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + + fflush(stdout); + //fprintf(stderr, "Loop::datacopy2()\n"); + //fprintf(stderr, "array name %s stmt num %d\n", array_name.c_str(), stmt_num); + // check for sanity of parameters if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); @@ -95,16 +113,38 @@ bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, if (t.size() != 0) selected_refs.push_back(std::make_pair(*i, t)); } + + //fprintf(stderr, "selected refs:\n"); + //for (int i=0; iDump(); printf("\n"); fflush(stdout); + //} + if (selected_refs.size() == 0) throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); + IR_ArrayRef *AR = selected_refs[0].second[0]; + //IR_roseArrayRef *RAR = (IR_roseArrayRef *)AR; + //fprintf(stderr, "before datacopy_privatized, "); + //AR->Dump(); + // do the copy - return datacopy_privatized(selected_refs, level, std::vector(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + //fprintf(stderr, "\nLoop::datacopy2 calling privatized\n"); + + bool whatever = datacopy_privatized(selected_refs, level, std::vector(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + + //AR = selected_refs[0].second[0]; + //fprintf(stderr, "after datacopy_privatized, "); + //AR->Dump(); + + return whatever; } bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + //fprintf(stderr, "Loop::datacopy_privatized()\n"); + // check for sanity of parameters if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); @@ -131,11 +171,14 @@ bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); // do the copy - return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + return whatever; } bool Loop::datacopy_privatized(const std::vector > > &array_ref_nums, int level, const std::vector &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + //fprintf(stderr, "Loop::datacopy_privatized2()\n"); + // check for sanity of parameters std::set same_loop; for (int i = 0; i < array_ref_nums.size(); i++) { @@ -180,13 +223,28 @@ bool Loop::datacopy_privatized(const std::vector throw std::invalid_argument("found no array references to copy"); // do the copy - return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + bool whatever = datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + return whatever; } -bool Loop::datacopy_privatized(const std::vector > > &stmt_refs, int level, + +// +// Implement low level datacopy function with lots of options. +// + +bool Loop::datacopy_privatized(const std::vector > > &stmt_refs, + int level, const std::vector &privatized_levels, - bool allow_extra_read, int fastest_changing_dimension, - int padding_stride, int padding_alignment, int memory_type) { + bool allow_extra_read, + int fastest_changing_dimension, + int padding_stride, + int padding_alignment, + int memory_type) { + + //fprintf(stderr, "\nLoop::datacopy_privatized3() *****\n"); + //fprintf(stderr, "privatized_levels.size() %d\n", privatized_levels.size()); + //fprintf(stderr, "level %d\n", level); + if (stmt_refs.size() == 0) return true; @@ -233,6 +291,12 @@ bool Loop::datacopy_privatized(const std::vector= -1 && fastest_changing_dimension < sym->n_dim())) throw std::invalid_argument("invalid fastest changing dimension for the array to be copied"); if (padding_stride < 0) @@ -255,6 +319,7 @@ bool Loop::datacopy_privatized(const std::vectorDump(); fprintf(stderr, "\n"); + // } + //} + //for (int i=0; iDump(); + // fprintf(stderr, "\n"); + // } + //} + + //for (int i=0; iname()); mapping.setup_names(); + mapping.print(); fflush(stdout); // "{[I] -> [_t1] : I = _t1 } + F_And *f_root = mapping.add_and(); for (int k = 1; k <= level-1; k++) { EQ_Handle h = f_root->add_EQ(); @@ -290,12 +393,28 @@ bool Loop::datacopy_privatized(const std::vectorDump(); + CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); - exp2formula(ir, mapping, f_root, freevar, repr, mapping.output_var(level-1+privatized_levels.size()+k+1), 'w', IR_COND_EQ, false); + //fprintf(stderr, "k %d j %d repr ", k, j); repr->dump(); fflush(stdout); + + exp2formula(ir, + mapping, + f_root, + freevar, + repr, + mapping.output_var(level-1+privatized_levels.size()+k+1), + 'w', + IR_COND_EQ, + false, + uninterpreted_symbols[stmt_num], + uninterpreted_symbols_stringrepr[stmt_num]); repr->clear(); delete repr; } - Relation r = Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); + Relation r = omega::Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); if (stmt_refs[i].second[j]->is_write()) { has_write_refs = true; wo_copy_is = Union(wo_copy_is, r); @@ -312,6 +431,7 @@ bool Loop::datacopy_privatized(const std::vector result = find_simplest_stride(bound, bound.set_var(level-1+privatized_levels.size()+i+1)); if (result.second != NULL) index_stride[i] = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)))); else index_stride[i] = 1; + //fprintf(stderr, "dp3: index_stride[%d] = %d\n", i, index_stride[i]); - // check if this arary index requires loop + // check if this array index requires loop Conjunct *c = bound.query_DNF()->single_conjunct(); for (EQ_Iterator ei(c->EQs()); ei; ei++) { + //fprintf(stderr, "dp3: for\n"); if ((*ei).has_wildcards()) continue; + //fprintf(stderr, "dp3: no wildcards\n"); int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); if (coef != 0) { + //fprintf(stderr, "coef != 0\n"); int sign = 1; if (coef < 0) { + //fprintf(stderr, "coef < 0\n"); coef = -coef; sign = -1; } CG_outputRepr *op = NULL; for (Constr_Vars_Iter ci(*ei); ci; ci++) { + //fprintf(stderr, "dp3: ci\n"); switch ((*ci).var->kind()) { case Input_Var: { - if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) + //fprintf(stderr, "dp3: Input_Var\n"); + if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) { + //fprintf(stderr, "dp3: IF sign %d\n",(*ci).coef*sign); + if ((*ci).coef*sign == 1) op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); else if ((*ci).coef*sign == -1) @@ -407,10 +539,12 @@ bool Loop::datacopy_privatized(const std::vectorCreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); else // (*ci).coef*sign < -1 op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + } break; } case Global_Var: { + //fprintf(stderr, "dp3: Global_Var\n"); Global_Var_ID g = (*ci).var->get_global_var(); if ((*ci).coef*sign == 1) op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); @@ -439,7 +573,8 @@ bool Loop::datacopy_privatized(const std::vector lb_list, ub_list; std::set excluded_floor_vars; excluded_floor_vars.insert(bound.set_var(level-1+privatized_levels.size()+i+1)); @@ -465,19 +600,41 @@ bool Loop::datacopy_privatized(const std::vector lb_repr_list; for (int j = 0; j < lb_list.size(); j++){ - if(this->known.n_set() == 0) - lb_repr_list.push_back(output_lower_bound_repr(ocg, lb_list[j], bound.set_var(level-1+privatized_levels.size()+i+1), result.first, result.second, bound, Relation::True(bound.n_set()), std::vector >(bound.n_set(), std::make_pair(static_cast(NULL), 0)))); - else - lb_repr_list.push_back(output_lower_bound_repr(ocg, lb_list[j], bound.set_var(level-1+privatized_levels.size()+i+1), result.first, result.second, bound, this->known, std::vector >(bound.n_set(), std::make_pair(static_cast(NULL), 0)))); + if(this->known.n_set() == 0) { + lb_repr_list.push_back(output_lower_bound_repr(ocg, + lb_list[j], + bound.set_var(level-1+privatized_levels.size()+i+1), + result.first, + result.second, + bound, + Relation::True(bound.n_set()), + std::vector >(bound.n_set(), std::make_pair(static_cast(NULL), 0)), + uninterpreted_symbols[stmt_num])); + } + else { + lb_repr_list.push_back(output_lower_bound_repr(ocg, + lb_list[j], + bound.set_var(level-1+privatized_levels.size()+i+1), + result.first, + result.second, + bound, + this->known, + std::vector >(bound.n_set(), std::make_pair(static_cast(NULL), 0)), + uninterpreted_symbols[stmt_num])); + } } - if (lb_repr_list.size() > 1) + if (lb_repr_list.size() > 1) { + //fprintf(stderr, "loop_datacopy.cc dp3 createInvoke( max )\n"); index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); + } else if (lb_repr_list.size() == 1) index_lb[i] = lb_repr_list[0]; + //fprintf(stderr, "dp3: build temporary array size representation\n"); // build temporary array size representation { Relation cal(copy_is.n_set(), 1); @@ -545,6 +702,7 @@ bool Loop::datacopy_privatized(const std::vectorsingle_conjunct(); @@ -626,8 +784,9 @@ bool Loop::datacopy_privatized(const std::vectorlayout_type()) { case IR_ARRAY_LAYOUT_ROW_MAJOR: @@ -639,36 +798,53 @@ bool Loop::datacopy_privatized(const std::vectorCreateScalarSymbol(sym, memory_type); } else { + //fprintf(stderr, "tmp_sym is an array\n"); std::vector tmp_array_size(index_sz.size()); - for (int i = 0; i < index_sz.size(); i++) + for (int i = 0; i < index_sz.size(); i++) { tmp_array_size[i] = index_sz[i].second->clone(); + index_sz[i].second->dump(); // THIS PRINTF + } tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); } + //fprintf(stderr, "dp3: create temporary array read initialization code\n"); // create temporary array read initialization code CG_outputRepr *copy_code_read; - if (has_read_refs) + if (has_read_refs) { + //fprintf(stderr, "has read refs\n"); if (index_sz.size() == 0) { - IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast(tmp_sym)); + //fprintf(stderr, "if\n"); + //fprintf(stderr, "tmp sym %s\n", tmp_sym->name().c_str()); + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast(tmp_sym)); // create ref from symbol + // tmp_scalar_ref is incomplete + std::vector rhs_index(n_dim); - for (int i = 0; i < index_lb.size(); i++) + for (int i = 0; i < index_lb.size(); i++) { + //fprintf(stderr, "i %d\n", i); if (is_index_eq[i]) rhs_index[i] = index_lb[i]->clone(); else rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + } IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + // IR_ScalarRef tmp_scalar_ref has no actual reference yet. It only has the variable definition. copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); + //fprintf(stderr, "if ends\n"); } else { + //fprintf(stderr, "else\n"); std::vector lhs_index(index_sz.size()); for (int i = 0; i < index_sz.size(); i++) { int cur_index_num = index_sz[i].first; @@ -693,7 +869,9 @@ bool Loop::datacopy_privatized(const std::vectorCreateArrayRef(static_cast(tmp_sym), lhs_index); + //fprintf(stderr, "dp3: DONE making tmp_array_ref\n"); std::vector rhs_index(n_dim); for (int i = 0; i < index_lb.size(); i++) @@ -703,13 +881,22 @@ bool Loop::datacopy_privatized(const std::vectorbuilder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); - copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); + //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment\n"); + //copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); + CG_outputRepr *lhs = tmp_array_ref->convert(); + CG_outputRepr *rhs = copied_array_ref->convert(); + copy_code_read = ir->builder()->CreateAssignment(0, lhs, rhs); //tmp_array_ref->convert(), copied_array_ref->convert()); + //fprintf(stderr, "dp3: loop_datacopy.cc copy_code_read = CreateAssignment DONE\n\n"); } + } // has read refs + //fprintf(stderr, "dp3: create temporary array write back code\n"); // create temporary array write back code CG_outputRepr *copy_code_write; - if (has_write_refs) + if (has_write_refs) { + //fprintf(stderr, "has_write_refs\n"); if (index_sz.size() == 0) { + //fprintf(stderr, "index_sz.size() == 0\n"); IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast(tmp_sym)); std::vector rhs_index(n_dim); @@ -723,6 +910,8 @@ bool Loop::datacopy_privatized(const std::vectorbuilder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); } else { + //fprintf(stderr, "index_sz.size() NOT = 0\n"); + std::vector lhs_index(n_dim); for (int i = 0; i < index_lb.size(); i++) if (is_index_eq[i]) @@ -758,9 +947,11 @@ bool Loop::datacopy_privatized(const std::vectorbuilder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); } + } // has write refs // now we can remove those loops for array indexes that are // dependent on others + //fprintf(stderr, "dp3: now we can remove those loops\n"); if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); F_And *f_root = mapping.add_and(); @@ -793,8 +984,8 @@ bool Loop::datacopy_privatized(const std::vectorname()); ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); @@ -808,6 +999,8 @@ bool Loop::datacopy_privatized(const std::vector(ro_copy_is.n_set()); copy_stmt_read.ir_stmt_node = NULL; + copy_stmt_read.has_inspector = false; for (int i = 0; i < level-1; i++) { copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && @@ -859,12 +1054,12 @@ bool Loop::datacopy_privatized(const std::vector(index_sz.size())); i++) { + for (int i = 0; i < std::min(left_num_dim, static_cast(index_sz.size())); i++) { copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; } - for (int i = min(left_num_dim, static_cast(index_sz.size())); i < index_sz.size(); i++) { + for (int i = std::min(left_num_dim, static_cast(index_sz.size())); i < index_sz.size(); i++) { copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; @@ -872,11 +1067,17 @@ bool Loop::datacopy_privatized(const std::vector(wo_copy_is.n_set()); copy_stmt_write.ir_stmt_node = NULL; + copy_stmt_write.has_inspector = false; for (int i = 0; i < level-1; i++) { copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; @@ -929,28 +1131,35 @@ bool Loop::datacopy_privatized(const std::vector(index_sz.size())); i++) { + for (int i = 0; i < std::min(left_num_dim, static_cast(index_sz.size())); i++) { copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; } - for (int i = min(left_num_dim, static_cast(index_sz.size())); i < index_sz.size(); i++) { + for (int i = std::min(left_num_dim, static_cast(index_sz.size())); i < index_sz.size(); i++) { copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; } lex[dim-1]++; shiftLexicalOrder(lex, dim-1, -2); + + fprintf(stderr, "loop_datacopy.cc L1147 adding stmt %d\n", stmt.size()); stmt.push_back(copy_stmt_write); + + uninterpreted_symbols.push_back(uninterpreted_symbols[*(active.begin())]); + uninterpreted_symbols_stringrepr.push_back(uninterpreted_symbols_stringrepr[*(active.begin())]); wo_copy_stmt_num = stmt.size() - 1; dep.insert(); } + //fprintf(stderr, "replace original array accesses with temporary array accesses\n"); // replace original array accesses with temporary array accesses for (int i =0; i < stmt_refs.size(); i++) for (int j = 0; j < stmt_refs[i].second.size(); j++) { if (index_sz.size() == 0) { IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast(tmp_sym)); + //fprintf(stderr, "dp3: loop_datacopy.cc calling ReplaceExpression i%d j%d\n", i, j); ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); } else { @@ -980,11 +1189,14 @@ bool Loop::datacopy_privatized(const std::vectorCreateArrayRef(static_cast(tmp_sym), index_repr); + //fprintf(stderr, "loop_datacopy.cc ir->ReplaceExpression( ... )\n"); ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); } } // update dependence graph + //fprintf(stderr, "update dependence graph\n"); + int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1; if (ro_copy_stmt_num != -1) { for (int i = 0; i < old_num_stmt; i++) { @@ -1029,6 +1241,8 @@ bool Loop::datacopy_privatized(const std::vectorclone(); @@ -1085,6 +1299,8 @@ bool Loop::datacopy_privatized(const std::vectorclone(); @@ -1133,6 +1349,7 @@ bool Loop::datacopy_privatized(const std::vector