diff options
Diffstat (limited to 'src/transformations/loop_tile.cc')
-rw-r--r-- | src/transformations/loop_tile.cc | 420 |
1 files changed, 209 insertions, 211 deletions
diff --git a/src/transformations/loop_tile.cc b/src/transformations/loop_tile.cc index 41c3e7f..0a1808b 100644 --- a/src/transformations/loop_tile.cc +++ b/src/transformations/loop_tile.cc @@ -14,8 +14,6 @@ using namespace omega; - - void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, TilingMethodType method, int alignment_offset, int alignment_multiple) { // check for sanity of parameters @@ -29,30 +27,30 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, throw std::invalid_argument("invalid loop level " + to_string(level)); if (level > stmt[stmt_num].loop_level.size()) throw std::invalid_argument( - "there is no loop level " + to_string(level) + " for statement " - + to_string(stmt_num)); + "there is no loop level " + to_string(level) + " for statement " + + to_string(stmt_num)); if (outer_level <= 0 || outer_level > level) throw std::invalid_argument( - "invalid tile controlling loop level " - + to_string(outer_level)); - + "invalid tile controlling loop level " + + to_string(outer_level)); + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + int dim = 2 * level - 1; int outer_dim = 2 * outer_level - 1; std::vector<int> lex = getLexicalOrder(stmt_num); std::set<int> same_tiled_loop = getStatements(lex, dim - 1); std::set<int> same_tile_controlling_loop = getStatements(lex, outer_dim - 1); - + for (std::set<int>::iterator i = same_tiled_loop.begin(); i != same_tiled_loop.end(); i++) { for (DependenceGraph::EdgeList::iterator j = - dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) { if (same_tiled_loop.find(j->first) != same_tiled_loop.end()) for (int k = 0; k < j->second.size(); k++) { @@ -63,34 +61,34 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, dim2 = stmt[*i].loop_level[dim2].payload - 1; } dim2 = stmt[*i].loop_level[dim2].payload; - + if (dv.hasNegative(dim2) && (!dv.quasi)) { for (int l = outer_level; l < level; l++) if (stmt[*i].loop_level[l - 1].type != LoopLevelTile) { if (dv.isCarried( - stmt[*i].loop_level[l - 1].payload) + stmt[*i].loop_level[l - 1].payload) && dv.hasPositive( - stmt[*i].loop_level[l - 1].payload)) + stmt[*i].loop_level[l - 1].payload)) throw loop_error( - "loop error: Tiling is illegal, dependence violation!"); + "loop error: Tiling is illegal, dependence violation!"); } else { - + int dim3 = l - 1; while (stmt[*i].loop_level[l - 1].type != LoopLevelTile) { dim3 = - stmt[*i].loop_level[l - 1].payload - - 1; - + stmt[*i].loop_level[l - 1].payload + - 1; + } - + dim3 = stmt[*i].loop_level[l - 1].payload; if (dim3 < level - 1) if (dv.isCarried(dim3) && dv.hasPositive(dim3)) throw loop_error( - "loop error: Tiling is illegal, dependence violation!"); + "loop error: Tiling is illegal, dependence violation!"); } } } @@ -117,11 +115,11 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h.update_coef(r.input_var(j), 1); h.update_coef(r.output_var(j + 2), -1); } - + stmt[*i].xform = Composition(copy(r), stmt[*i].xform); } } - // normal tiling + // normal tiling else { std::set<int> private_stmt; for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); @@ -131,12 +129,12 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, && overflow.find(*i) != overflow.end()) private_stmt.insert(*i); } - + // extract the union of the iteration space to be considered Relation hull; { std::vector<Relation> r_list; - + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) if (private_stmt.find(*i) == private_stmt.end()) { @@ -150,28 +148,28 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, r.simplify(2, 4); r_list.push_back(r); } - + hull = SimpleHull(r_list); } - + // extract the bound of the dimension to be tiled Relation bound = get_loop_bound(hull, dim); if (!bound.has_single_conjunct()) { // further simplify the bound hull = Approximate(hull); bound = get_loop_bound(hull, dim); - + int i = outer_dim - 2; while (!bound.has_single_conjunct() && i >= 0) { hull = Project(hull, i + 1, Set_Var); bound = get_loop_bound(hull, dim); i -= 2; } - + if (!bound.has_single_conjunct()) throw loop_error("cannot handle tile bounds"); } - + // separate lower and upper bounds std::vector<GEQ_Handle> lb_list, ub_list; { @@ -186,11 +184,11 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, } if (lb_list.size() == 0) throw loop_error( - "unable to calculate tile controlling loop lower bound"); + "unable to calculate tile controlling loop lower bound"); if (ub_list.size() == 0) throw loop_error( - "unable to calculate tile controlling loop upper bound"); - + "unable to calculate tile controlling loop upper bound"); + // find the simplest lower bound for StridedTile or simplest iteration count for CountedTile int simplest_lb = 0, simplest_ub = 0; if (method == StridedTile) { @@ -199,20 +197,20 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, int cost = 0; for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - cost += 5; - break; - } - case Global_Var: { - cost += 2; - break; - } - default: - cost += 15; - break; + case Input_Var: { + cost += 5; + break; + } + case Global_Var: { + cost += 2; + break; + } + default: + cost += 15; + break; } } - + if (cost < best_cost) { best_cost = cost; simplest_lb = i; @@ -224,67 +222,67 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, for (int i = 0; i < lb_list.size(); i++) for (int j = 0; j < ub_list.size(); j++) { int cost = 0; - + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - s1[(*ci).var] += (*ci).coef; - break; - } - case Global_Var: { - s2[(*ci).var] += (*ci).coef; - break; - } - case Exists_Var: - case Wildcard_Var: { - s3[(*ci).var] += (*ci).coef; - break; - } - default: - cost = INT_MAX - 2; - break; + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + cost = INT_MAX - 2; + break; } } - + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - s1[(*ci).var] += (*ci).coef; - break; - } - case Global_Var: { - s2[(*ci).var] += (*ci).coef; - break; - } - case Exists_Var: - case Wildcard_Var: { - s3[(*ci).var] += (*ci).coef; - break; - } - default: - if (cost == INT_MAX - 2) - cost = INT_MAX - 1; - else - cost = INT_MAX - 3; - break; + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + if (cost == INT_MAX - 2) + cost = INT_MAX - 1; + else + cost = INT_MAX - 3; + break; } } - + if (cost == 0) { for (std::map<Variable_ID, coef_t>::iterator k = - s1.begin(); k != s1.end(); k++) + s1.begin(); k != s1.end(); k++) if ((*k).second != 0) cost += 5; for (std::map<Variable_ID, coef_t>::iterator k = - s2.begin(); k != s2.end(); k++) + s2.begin(); k != s2.end(); k++) if ((*k).second != 0) cost += 2; for (std::map<Variable_ID, coef_t>::iterator k = - s3.begin(); k != s3.end(); k++) + s3.begin(); k != s3.end(); k++) if ((*k).second != 0) cost += 15; } - + if (cost < best_cost) { best_cost = cost; simplest_lb = i; @@ -292,7 +290,7 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, } } } - + // prepare the new transformation relations for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { @@ -303,58 +301,58 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h.update_coef(r.output_var(j + 1), 1); h.update_coef(r.input_var(j + 1), -1); } - + for (int j = outer_dim - 1; j < stmt[*i].xform.n_out(); j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(r.output_var(j + 3), 1); h.update_coef(r.input_var(j + 1), -1); } - + EQ_Handle h = f_root->add_EQ(); h.update_coef(r.output_var(outer_dim), 1); h.update_const(-lex[outer_dim - 1]); - + stmt[*i].xform = Composition(r, stmt[*i].xform); } - + // add tiling constraints. for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { F_And *f_super_root = stmt[*i].xform.and_with_and(); F_Exists *f_exists = f_super_root->add_exists(); F_And *f_root = f_exists->add_and(); - + // create a lower bound variable for easy formula creation later Variable_ID aligned_lb; { Variable_ID lb = f_exists->declare(); coef_t coef = lb_list[simplest_lb].get_coef( - bound.set_var(dim + 1)); + bound.set_var(dim + 1)); if (coef == 1) { // e.g. if i >= m+5, then LB = m+5 EQ_Handle h = f_root->add_EQ(); h.update_coef(lb, 1); for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos != dim + 1) - h.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); } } h.update_const(lb_list[simplest_lb].get_const()); @@ -363,40 +361,40 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, GEQ_Handle h2 = f_root->add_GEQ(); for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos == dim + 1) { - h1.update_coef(lb, (*ci).coef); - h2.update_coef(lb, -(*ci).coef); - } else { - h1.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); - h2.update_coef(stmt[*i].xform.output_var(pos), - -(*ci).coef); + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(lb, (*ci).coef); + h2.update_coef(lb, -(*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + } + break; } - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h1.update_coef(v, (*ci).coef); - h2.update_coef(v, -(*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, (*ci).coef); + h2.update_coef(v, -(*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); } } h1.update_const(lb_list[simplest_lb].get_const()); h2.update_const(-lb_list[simplest_lb].get_const()); h2.update_const(coef - 1); } - + Variable_ID offset_lb; if (alignment_offset == 0) offset_lb = lb; @@ -407,17 +405,17 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h.update_coef(lb, -1); h.update_const(alignment_offset); } - + if (alignment_multiple == 1) { // trivial aligned_lb = offset_lb; } else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB aligned_lb = f_exists->declare(); Variable_ID e = f_exists->declare(); - + EQ_Handle h = f_root->add_EQ(); h.update_coef(aligned_lb, 1); h.update_coef(e, -alignment_multiple); - + GEQ_Handle h1 = f_root->add_GEQ(); GEQ_Handle h2 = f_root->add_GEQ(); h1.update_coef(e, alignment_multiple); @@ -427,37 +425,37 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h1.update_const(alignment_multiple - 1); } } - + // create an upper bound variable for easy formula creation later Variable_ID ub = f_exists->declare(); { coef_t coef = -ub_list[simplest_ub].get_coef( - bound.set_var(dim + 1)); + bound.set_var(dim + 1)); if (coef == 1) { // e.g. if i <= m+5, then UB = m+5 EQ_Handle h = f_root->add_EQ(); h.update_coef(ub, -1); for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos != dim + 1) - h.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); } } h.update_const(ub_list[simplest_ub].get_const()); @@ -466,33 +464,33 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, GEQ_Handle h2 = f_root->add_GEQ(); for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { switch ((*ci).var->kind()) { - case Input_Var: { - int pos = (*ci).var->get_position(); - if (pos == dim + 1) { - h1.update_coef(ub, -(*ci).coef); - h2.update_coef(ub, (*ci).coef); - } else { - h1.update_coef(stmt[*i].xform.output_var(pos), - -(*ci).coef); - h2.update_coef(stmt[*i].xform.output_var(pos), - (*ci).coef); + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(ub, -(*ci).coef); + h2.update_coef(ub, (*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + } + break; } - break; - } - case Global_Var: { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = stmt[*i].xform.get_local(g); - else - v = stmt[*i].xform.get_local(g, - (*ci).var->function_of()); - h1.update_coef(v, -(*ci).coef); - h2.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot handle tile bounds"); + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, -(*ci).coef); + h2.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); } } h1.update_const(-ub_list[simplest_ub].get_const()); @@ -500,13 +498,13 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h1.update_const(coef - 1); } } - + // insert tile controlling loop constraints if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0 Variable_ID e = f_exists->declare(); GEQ_Handle h1 = f_root->add_GEQ(); h1.update_coef(e, 1); - + EQ_Handle h2 = f_root->add_EQ(); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); h2.update_coef(e, -tile_size); @@ -514,14 +512,14 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, } else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32) GEQ_Handle h1 = f_root->add_GEQ(); h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); - + GEQ_Handle h2 = f_root->add_GEQ(); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -tile_size); h2.update_coef(aligned_lb, -1); h2.update_coef(ub, 1); } - + // special care for private statements like overflow assignment if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB GEQ_Handle h = f_root->add_GEQ(); @@ -529,14 +527,14 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h.update_coef(ub, 1); } - // restrict original loop index inside the tile + // restrict original loop index inside the tile else { if (method == StridedTile) { // e.g. ii <= i < ii + tile_size GEQ_Handle h1 = f_root->add_GEQ(); h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1); - + GEQ_Handle h2 = f_root->add_GEQ(); h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); @@ -547,7 +545,7 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, -tile_size); h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); h1.update_coef(aligned_lb, -1); - + GEQ_Handle h2 = f_root->add_GEQ(); h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), tile_size); @@ -558,30 +556,30 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, } } } - + // update loop level information for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { for (int j = 1; j <= stmt[*i].loop_level.size(); j++) switch (stmt[*i].loop_level[j - 1].type) { - case LoopLevelOriginal: - break; - case LoopLevelTile: - if (stmt[*i].loop_level[j - 1].payload >= outer_level) - stmt[*i].loop_level[j - 1].payload++; - break; - default: - throw loop_error( - "unknown loop level type for statement " - + to_string(*i)); + case LoopLevelOriginal: + break; + case LoopLevelTile: + if (stmt[*i].loop_level[j - 1].payload >= outer_level) + stmt[*i].loop_level[j - 1].payload++; + break; + default: + throw loop_error( + "unknown loop level type for statement " + + to_string(*i)); } - + LoopLevel ll; ll.type = LoopLevelTile; ll.payload = level + 1; ll.parallel_level = 0; stmt[*i].loop_level.insert( - stmt[*i].loop_level.begin() + (outer_level - 1), ll); + stmt[*i].loop_level.begin() + (outer_level - 1), ll); } } |