diff options
author | Tuowen Zhao <ztuowen@gmail.com> | 2016-09-20 15:56:14 -0600 |
---|---|---|
committer | Tuowen Zhao <ztuowen@gmail.com> | 2016-09-20 15:56:14 -0600 |
commit | 6983c09937baac3ffb7d3a45c3c5009c0eba7e6c (patch) | |
tree | b42f0f9383b40fbeb540bf51b9f11eaf6f80c990 | |
parent | b829868dfd6cbe9da07227220856b975f33e2037 (diff) | |
download | chill-6983c09937baac3ffb7d3a45c3c5009c0eba7e6c.tar.gz chill-6983c09937baac3ffb7d3a45c3c5009c0eba7e6c.tar.bz2 chill-6983c09937baac3ffb7d3a45c3c5009c0eba7e6c.zip |
python loop & more doc
-rw-r--r-- | CMakeLists.txt | 12 | ||||
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | include/chilldebug.h | 2 | ||||
-rw-r--r-- | include/irtools.hh | 29 | ||||
-rw-r--r-- | include/loop.hh | 32 | ||||
-rw-r--r-- | src/chill.cc (renamed from src/chill_run.cc) | 22 | ||||
-rw-r--r-- | src/chillmodule.cc | 13 | ||||
-rw-r--r-- | src/dep.cc | 116 | ||||
-rw-r--r-- | src/ir_rose.cc | 59 | ||||
-rw-r--r-- | src/ir_rose_utils.cc | 5 | ||||
-rw-r--r-- | src/irtools.cc | 28 | ||||
-rw-r--r-- | src/loop.cc | 640 | ||||
-rw-r--r-- | src/loop_basic.cc | 46 | ||||
-rw-r--r-- | src/loop_datacopy.cc | 1017 | ||||
-rw-r--r-- | src/loop_tile.cc | 45 | ||||
-rw-r--r-- | src/loop_unroll.cc | 22 |
16 files changed, 300 insertions, 1792 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index f2f7378..ff2bd43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,7 +36,6 @@ set(IR_CHILL_SRC ) set(PYTHON_SRC - src/chill_run.cc src/chillmodule.cc ) @@ -66,8 +65,15 @@ include_directories( ${BOOSTHOME}/include ${PYTHON_INCLUDE_DIRS}) -add_executable(chill ${CORE_SRC} ${PYTHON_SRC} ${IR_CHILL_SRC}) -target_link_libraries(chill ${CORE_LIBS} ${PYTHON_LIBRARY}) +add_executable(chill + src/chill.cc + ${CORE_SRC} + ${PYTHON_SRC} + ${IR_CHILL_SRC}) + +target_link_libraries(chill + ${CORE_LIBS} + ${PYTHON_LIBRARY}) add_dependencies(chill omega codegen rosecg parseRel) install(TARGETS chill @@ -33,3 +33,7 @@ * `parserel` - parse Relation vectors, for adding knowns * `example` - CHiLL example scripts * `doc` - manual & doxygen + +## Debug + +* pass `-DDEBUGCHILL` will enable debug output
\ No newline at end of file diff --git a/include/chilldebug.h b/include/chilldebug.h index 865f1f6..8678749 100644 --- a/include/chilldebug.h +++ b/include/chilldebug.h @@ -1,8 +1,6 @@ // a central place to turn on debugging messages -// enable the next line to get lots of output -//#define DEBUGCHILL #ifndef DEBUGCHILL_H #define DEBUGCHILL_H diff --git a/include/irtools.hh b/include/irtools.hh index a3b552a..6648847 100644 --- a/include/irtools.hh +++ b/include/irtools.hh @@ -33,12 +33,41 @@ struct ir_tree_node { } }; +//! Build IR tree from the source code +/*! + * Block type node can only be leaf, i.e., there is no further stuctures inside a block allowed + * @param control + * @param parent + * @return + */ std::vector<ir_tree_node *> build_ir_tree(IR_Control *control, ir_tree_node *parent = NULL); +//! Extract statements from IR tree +/*! + * Statements returned are ordered in lexical order in the source code + * @param ir_tree + * @return + */ std::vector<ir_tree_node *> extract_ir_stmts( const std::vector<ir_tree_node *> &ir_tree); bool is_dependence_valid(ir_tree_node *src_node, ir_tree_node *dst_node, const DependenceVector &dv, bool before); +//! test data dependeces between two statements +/*! + * The first statement in parameter must be lexically before the second statement in parameter. + * Returned dependences are all lexicographically positive + * @param ir + * @param repr1 + * @param IS1 + * @param repr2 + * @param IS2 + * @param freevar + * @param index + * @param i + * @param j + * @return Dependecies between the two statements. First vector is dependencies from first to second, + * second vector is the reverse + */ std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > test_data_dependences( IR_Code *ir, const omega::CG_outputRepr *repr1, const omega::Relation &IS1, const omega::CG_outputRepr *repr2, diff --git a/include/loop.hh b/include/loop.hh index 9620489..e02b7e9 100644 --- a/include/loop.hh +++ b/include/loop.hh @@ -148,8 +148,38 @@ public: void tile(int stmt_num, int level, int tile_size, int outer_level = 1, TilingMethodType method = StridedTile, int alignment_offset = 0, int alignment_multiple = 1); std::set<int> split(int stmt_num, int level, const omega::Relation &cond); std::set<int> unroll(int stmt_num, int level, int unroll_amount, std::vector< std::vector<std::string> >idxNames= std::vector< std::vector<std::string> >(), int cleanup_split_level = 0); - + + //! Datacopy function by reffering arrays by numbers + /*! + * for example + * ~~~ + * A[i] = A[i-1] + B[i]; + * ~~~ + * parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i] + * + * @param array_ref_nums + * @param level + * @param allow_extra_read + * @param fastest_changing_dimension + * @param padding_stride + * @param padding_alignment + * @param memory_type + * @return + */ bool datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); + //! Datacopy function by reffering arrays by name + /*! + * parameter array_name=A means to copy data touched by A[i-1] and A[i] + * @param stmt_num + * @param level + * @param array_name + * @param allow_extra_read + * @param fastest_changing_dimension + * @param padding_stride + * @param padding_alignment + * @param memory_type + * @return + */ bool datacopy(int stmt_num, int level, const std::string &array_name, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); bool datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, int memory_type = 0); bool datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, int memory_type = 0); diff --git a/src/chill_run.cc b/src/chill.cc index 4eafe65..6ca0c4c 100644 --- a/src/chill_run.cc +++ b/src/chill.cc @@ -1,9 +1,9 @@ #include "chilldebug.h" -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> #include "loop.hh" @@ -18,7 +18,6 @@ //--- Loop *myloop = NULL; IR_Code *ir_code = NULL; -bool repl_stop = false; bool is_interactive = false; std::vector<IR_Control *> ir_controls; @@ -36,8 +35,6 @@ int main( int argc, char* argv[] ) exit(-1); } - int fail = 0; - // Create PYTHON interpreter /* Pass argv[0] to the Python interpreter */ Py_SetProgramName(argv[0]); @@ -64,17 +61,16 @@ int main( int argc, char* argv[] ) printf("CHiLL v" CHILL_BUILD_VERSION " (built on " CHILL_BUILD_DATE ")\n"); printf("Copyright (C) 2008 University of Southern California\n"); printf("Copyright (C) 2009-2012 University of Utah\n"); - //is_interactive = true; // let the lua interpreter know. fflush(stdout); - // TODO: read lines of python code. - //Not sure if we should set fail from interactive mode + is_interactive=true; + PyRun_InteractiveLoop(stdin,"-"); printf("CHiLL ending...\n"); fflush(stdout); } - //printf("DONE with PyRun_SimpleString()\n"); - - if (!fail && ir_code != NULL && myloop != NULL && myloop->stmt.size() != 0 && !myloop->stmt[0].xform.is_null()) { + // TODO Codegen is the major part that prevent CHiLL to be a pure Python library + // Other than exporting a class of course + if (ir_code != NULL && myloop != NULL && myloop->stmt.size() != 0 && !myloop->stmt[0].xform.is_null()) { int lnum_start; int lnum_end; lnum_start = get_loop_num_start(); diff --git a/src/chillmodule.cc b/src/chillmodule.cc index 72f32d4..b347570 100644 --- a/src/chillmodule.cc +++ b/src/chillmodule.cc @@ -19,7 +19,6 @@ using namespace omega; extern Loop *myloop; extern IR_Code *ir_code; extern bool is_interactive; -extern bool repl_stop; std::string procedure_name; std::string source_filename; @@ -51,9 +50,6 @@ static void set_loop_num_end(int end_num) { loop_end_num = end_num; } -// TODO: finalize_loop(int,int) and init_loop(int,int) are identical to thier Lua counterparts. -// consider integrating them - void finalize_loop(int loop_num_start, int loop_num_end) { if (loop_num_start == loop_num_end) { ir_code->ReplaceCode(ir_controls[loops[loop_num_start]], myloop->getCode()); @@ -126,7 +122,6 @@ static void init_loop(int loop_num_start, int loop_num_end) { IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); myloop = new Loop(block); delete block; - //if (is_interactive) printf("%s ", PROMPT_STRING); } // ----------------------- // @@ -332,16 +327,9 @@ static PyObject* chill_print_space(PyObject* self, PyObject* args) { Py_RETURN_NONE; } -static PyObject* chill_exit(PyObject* self, PyObject* args) { - strict_arg_num(args, 0, "exit"); - repl_stop = true; - Py_RETURN_NONE; -} - static void add_known(std::string cond_expr) { int num_dim = myloop->known.n_set(); std::vector<std::map<std::string, int> >* cond; - // TODO since we are using python, change this! cond = parse_relation_vector(cond_expr.c_str()); Relation rel(num_dim); @@ -744,7 +732,6 @@ static PyMethodDef ChillMethods[] = { {"print_code", chill_print_code, METH_VARARGS, "print generated code"}, {"print_dep", chill_print_dep, METH_VARARGS, "print the dependencies graph"}, {"print_space", chill_print_space, METH_VARARGS, "print space"}, - {"exit", chill_exit, METH_VARARGS, "exit the interactive consule"}, {"known", chill_known, METH_VARARGS, "knwon"}, {"remove_dep", chill_remove_dep, METH_VARARGS, "remove dependency i suppose"}, {"original", chill_original, METH_VARARGS, "original"}, @@ -94,13 +94,6 @@ std::ostream& operator<<(std::ostream &os, const DependenceVector &d) { return os; } -// DependenceVector::DependenceVector(int size): -// lbounds(std::vector<coef_t>(size, 0)), -// ubounds(std::vector<coef_t>(size, 0)) { -// src = NULL; -// dst = NULL; -// } - DependenceVector::DependenceVector(const DependenceVector &that) { if (that.sym != NULL) this->sym = that.sym->clone(); @@ -135,18 +128,12 @@ DependenceType DependenceVector::getType() const { } bool DependenceVector::is_data_dependence() const { - if (type == DEP_W2R || type == DEP_R2W || type == DEP_W2W - || type == DEP_R2R) - return true; - else - return false; + return (type == DEP_W2R || type == DEP_R2W || type == DEP_W2W + || type == DEP_R2R); } bool DependenceVector::is_control_dependence() const { - if (type == DEP_CONTROL) - return true; - else - return false; + return type == DEP_CONTROL; } bool DependenceVector::has_negative_been_carried_at(int dim) const { @@ -160,10 +147,7 @@ bool DependenceVector::has_negative_been_carried_at(int dim) const { if (lbounds[i] > 0 || ubounds[i] < 0) return false; - if (lbounds[dim] < 0) - return true; - else - return false; + return lbounds[dim] < 0; } @@ -178,10 +162,7 @@ bool DependenceVector::has_been_carried_at(int dim) const { if (lbounds[i] > 0 || ubounds[i] < 0) return false; - if ((lbounds[dim] != 0) || (ubounds[dim] !=0)) - return true; - - return false; + return (lbounds[dim] != 0) || (ubounds[dim] !=0); } bool DependenceVector::has_been_carried_before(int dim) const { @@ -262,22 +243,14 @@ bool DependenceVector::hasPositive(int dim) const { if (dim >= lbounds.size()) throw std::invalid_argument("invalid dependence dimension"); - if (lbounds[dim] > 0) - //av: changed from ubounds to lbounds may have side effects - return true; - else - return false; + return lbounds[dim] > 0; } bool DependenceVector::hasNegative(int dim) const { if (dim >= lbounds.size()) throw std::invalid_argument("invalid dependence dimension"); - if (ubounds[dim] < 0) - //av: changed from lbounds to ubounds may have side effects - return true; - else - return false; + return ubounds[dim] < 0; } bool DependenceVector::isCarried(int dim, omega::coef_t distance) const { @@ -296,12 +269,7 @@ bool DependenceVector::isCarried(int dim, omega::coef_t distance) const { if (dim >= lbounds.size()) return true; - if (lbounds[dim] > distance) - return false; - else if (ubounds[dim] < -distance) - return false; - - return true; + return lbounds[dim] >= -distance && ubounds[dim] <= distance; } bool DependenceVector::canPermute(const std::vector<int> &pi) const { @@ -402,60 +370,6 @@ DependenceVector DependenceVector::reverse() const { return dv; } -// std::vector<DependenceVector> DependenceVector::matrix(const std::vector<std::vector<int> > &M) const { -// if (M.size() != lbounds.size()) -// throw std::invalid_argument("(non)unimodular transformation dimensionality does not match dependence space"); - -// const int n = lbounds.size(); -// DependenceVector dv; -// if (sym != NULL) -// dv.sym = sym->clone(); -// else -// dv.sym = NULL; -// dv.type = type; - -// for (int i = 0; i < n; i++) { -// assert(M[i].size() == n+1 || M[i].size() == n); - -// omega::coef_t lb, ub; -// if (M[i].size() == n+1) -// lb = ub = M[i][n]; -// else -// lb = ub = 0; - -// for (int j = 0; j < n; j++) { -// int c = M[i][j]; -// if (c == 0) -// continue; - -// if (c > 0) { -// if (lbounds[j] == -posInfinity) -// lb = -posInfinity; -// else if (lb != -posInfinity) -// lb += c * lbounds[j]; -// if (ubounds[j] == posInfinity) -// ub = posInfinity; -// else if (ub != posInfinity) -// ub += c * ubounds[j]; -// } -// else { -// if (ubounds[j] == posInfinity) -// lb = -posInfinity; -// else if (lb != -posInfinity) -// lb += c * ubounds[j]; -// if (lbounds[j] == -posInfinity) -// ub = posInfinity; -// else if (ub != posInfinity) -// ub += c * lbounds[j]; -// } -// } -// dv.lbounds.push_back(lb); -// dv.ubounds.push_back(ub); -// } -// dv.is_reduction = is_reduction; - -// return dv.normalize(); -// } //----------------------------------------------------------------------------- // Class: DependenceGraph @@ -497,20 +411,6 @@ DependenceGraph DependenceGraph::permute(const std::vector<int> &pi, return g; } -// DependenceGraph DependenceGraph::matrix(const std::vector<std::vector<int> > &M) const { -// DependenceGraph g; - -// for (int i = 0; i < vertex.size(); i++) -// g.insert(vertex[i].first); - -// for (int i = 0; i < vertex.size(); i++) -// for (EdgeList::const_iterator j = vertex[i].second.begin(); j != vertex[i].second.end(); j++) -// for (int k = 0; k < j->second.size(); k++) -// g.connect(i, j->first, j->second[k].matrix(M)); - -// return g; -// } - DependenceGraph DependenceGraph::subspace(int dim) const { DependenceGraph g; diff --git a/src/ir_rose.cc b/src/ir_rose.cc index 223e7a4..f4039ab 100644 --- a/src/ir_rose.cc +++ b/src/ir_rose.cc @@ -102,7 +102,6 @@ int IR_roseArraySymbol::n_dim() const { dim++; } } else if (ptrType != NULL) { - //std::cout << "pntrType \n"; ; // not sure if this case will happen } } @@ -113,7 +112,6 @@ int IR_roseArraySymbol::n_dim() const { omega::CG_outputRepr *IR_roseArraySymbol::size(int dim) const { SgArrayType* arrType = isSgArrayType(vs_->get_type()); - // SgExprListExp* dimList = arrType->get_dim_info(); int count = 0; SgExpression* expr; SgType* pntrType = isSgPointerType(vs_->get_type()); @@ -1446,16 +1444,12 @@ IR_Block *IR_roseCode::GetCode() const { } void IR_roseCode::ReplaceCode(IR_Control *old, omega::CG_outputRepr *repr) { - /* SgStatementPtrList *tnl = - static_cast<omega::CG_roseRepr *>(repr)->GetList(); - SgNode *tf_old; - */ SgStatementPtrList *tnl = static_cast<omega::CG_roseRepr *>(repr)->GetList(); SgNode* node_ = static_cast<omega::CG_roseRepr *>(repr)->GetCode(); SgNode * tf_old; - /* May need future revision it tnl has more than one statement */ + /* May need future revision if tnl has more than one statement */ switch (old->type()) { @@ -1507,44 +1501,6 @@ void IR_roseCode::ReplaceCode(IR_Control *old, omega::CG_outputRepr *repr) { delete old; delete repr; - /* May need future revision it tnl has more than one statement */ - /* - switch (old->type()) { - - case IR_CONTROL_LOOP: - tf_old = static_cast<IR_roseLoop *>(old)->tf_; - break; - case IR_CONTROL_BLOCK: - tf_old = static_cast<IR_roseBlock *>(old)->start_; - break; - - default: - throw ir_error("control structure to be replaced not supported"); - break; - } - - // std::string y = tf_old->unparseToString(); - SgStatement *s = isSgStatement(tf_old); - if (s != 0) { - SgStatement *p = isSgStatement(tf_old->get_parent()); - - if (p != 0) { - // SgStatement* it2 = isSgStatement(tnl); - - // if(it2 != NULL){ - p->replace_statement(s, *tnl); - // } - // else { - // throw ir_error("Replacing Code not a statement!"); - // } - } else - throw ir_error("Replacing Code not a statement!"); - } else - throw ir_error("Replacing Code not a statement!"); - // y = tnl->unparseToString(); - delete old; - delete repr; - */ } void IR_roseCode::ReplaceExpression(IR_Ref *old, omega::CG_outputRepr *repr) { @@ -1560,19 +1516,6 @@ void IR_roseCode::ReplaceExpression(IR_Ref *old, omega::CG_outputRepr *repr) { std::string z = isSgNode(parent)->unparseToString(); parent->replace_expression(ia_orig, op); isSgNode(op)->set_parent(isSgNode(parent)); - - /* if(isSgBinaryOp(parent)) - { - if(isSgBinaryOp(parent)->get_lhs_operand() == ia_orig){ - isSgBinaryOp(parent)->set_lhs_operand(op); - }else if(isSgBinaryOp(parent)->get_rhs_operand() == ia_orig){ - isSgBinaryOp(parent)->set_rhs_operand(op); - - - } - else - parent->replace_expression(ia_orig, op); - */ } else { SgStatement* parent_stmt = isSgStatement( isSgNode(ia_orig)->get_parent()); diff --git a/src/ir_rose_utils.cc b/src/ir_rose_utils.cc index 64b0891..1329031 100644 --- a/src/ir_rose_utils.cc +++ b/src/ir_rose_utils.cc @@ -31,8 +31,6 @@ std::vector<SgForStatement *> find_deepest_loops(SgStatementPtrList& tnl) { std::vector<SgForStatement *> loops; - - for(SgStatementPtrList::const_iterator j = tnl.begin(); j != tnl.end(); j++) { std::vector<SgForStatement *> t = find_deepest_loops(isSgNode(*j)); @@ -40,10 +38,7 @@ std::vector<SgForStatement *> find_deepest_loops(SgStatementPtrList& tnl) { loops = t; } - - return loops; - } std::vector<SgForStatement *> find_deepest_loops(SgNode *tn) { diff --git a/src/irtools.cc b/src/irtools.cc index 4ab6c85..e7e5029 100644 --- a/src/irtools.cc +++ b/src/irtools.cc @@ -19,8 +19,6 @@ using namespace omega; -// Build IR tree from the source code. Block type node can only be -// leaf, i.e., there is no further structures inside a block allowed. std::vector<ir_tree_node *> build_ir_tree(IR_Control *control, ir_tree_node *parent) { std::vector<ir_tree_node *> result; @@ -111,9 +109,6 @@ std::vector<ir_tree_node *> build_ir_tree(IR_Control *control, ir_tree_node *par return result; } - -// Extract statements from IR tree. Statements returned are ordered in -// lexical order in the source code. std::vector<ir_tree_node *> extract_ir_stmts(const std::vector<ir_tree_node *> &ir_tree) { std::vector<ir_tree_node *> result; for (int i = 0; i < ir_tree.size(); i++) @@ -184,14 +179,6 @@ bool is_dependence_valid(ir_tree_node *src_node, ir_tree_node *dst_node, } - - -// Test data dependences between two statements. The first statement -// in parameter must be lexically before the second statement in -// parameter. Returned dependences are all lexicographically -// positive. The first vector in returned pair is dependences from the -// first statement to the second statement and the second vector in -// returned pair is in reverse order. std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > test_data_dependences( IR_Code *ir, const CG_outputRepr *repr1, const Relation &IS1, const CG_outputRepr *repr2, const Relation &IS2, @@ -259,21 +246,6 @@ std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > test_da for (int i = 0; i < access2.size(); i++) delete access2[i]; } - /*std::pair<std::vector<DependenceVector>, - std::vector<DependenceVector> > dv = - ir->FindScalarDeps(repr1, repr2, index, i, j); - - - result.first.insert(result.first.end(), dv.first.begin(), - dv.first.end()); - result.second.insert(result.second.end(), dv.second.begin(), - dv.second.end());*/ - /*result.first.insert(result.first.end(), dv.first.begin(), - dv.first.end()); - result.second.insert(result.second.end(), dv.second.begin(), - dv.second.end()); - */ - return result; } diff --git a/src/loop.cc b/src/loop.cc index f187a50..19378a4 100644 --- a/src/loop.cc +++ b/src/loop.cc @@ -69,7 +69,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, stmt_nesting_level_[i] = t; stmt_nesting_level[i] = t; } - + stmt = std::vector<Statement>(ir_stmt.size()); int n_dim = -1; int max_loc; @@ -82,14 +82,14 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, max_nesting_level = stmt_nesting_level[j]; loc = j; } - + // most deeply nested statement acting as a reference point if (n_dim == -1) { n_dim = max_nesting_level; max_loc = loc; - + index = std::vector<std::string>(n_dim); - + ir_tree_node *itn = ir_stmt[loc]; int cur_dim = n_dim - 1; while (itn->parent != NULL) { @@ -101,42 +101,28 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, } } } - + // align loops by names, temporary solution ir_tree_node *itn = ir_stmt[loc]; int depth = stmt_nesting_level_[loc] - 1; - /* while (itn->parent != NULL) { - itn = itn->parent; - if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { - std::string name = static_cast<IR_Loop *>(itn->content)->index()->name(); - for (int j = 0; j < n_dim; j++) - if (index[j] == name) { - itn->payload = j; - break; - } - if (itn->payload == -1) - throw loop_error("no complex alignment yet"); - } - } - */ for (int t = depth; t >= 0; t--) { int y = t; ir_tree_node *itn = ir_stmt[loc]; - + while ((itn->parent != NULL) && (y >= 0)) { itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP) y--; } - + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { CG_outputBuilder *ocg = ir->builder(); - + itn->payload = depth - t; - + CG_outputRepr *code = static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); - + std::vector<CG_outputRepr *> index_expr; std::vector<std::string> old_index; CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]); @@ -145,48 +131,39 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, static_cast<IR_Loop *>(itn->content)->index()->name()); code = ocg->CreateSubstitutedStmt(0, code, old_index, index_expr); - + replace.insert(std::pair<int, CG_outputRepr*>(loc, code)); - //stmt[loc].code = code; - } } - + // set relation variable names Relation r(n_dim); F_And *f_root = r.add_and(); itn = ir_stmt[loc]; int temp_depth = depth; while (itn->parent != NULL) { - + itn = itn->parent; if (itn->content->type() == IR_CONTROL_LOOP) { r.name_set_var(itn->payload + 1, index[temp_depth]); - + temp_depth--; } - //static_cast<IR_Loop *>(itn->content)->index()->name()); } - - /*while (itn->parent != NULL) { - itn = itn->parent; - if (itn->content->type() == IR_CONTROL_LOOP) - r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name()); - }*/ - + // extract information from loop/if structures std::vector<bool> processed(n_dim, false); std::vector<std::string> vars_to_be_reversed; itn = ir_stmt[loc]; while (itn->parent != NULL) { itn = itn->parent; - + switch (itn->content->type()) { case IR_CONTROL_LOOP: { IR_Loop *lp = static_cast<IR_Loop *>(itn->content); Variable_ID v = r.set_var(itn->payload + 1); int c; - + try { c = lp->step_size(); if (c > 0) { @@ -200,7 +177,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, cond, true); else throw ir_error("loop condition not supported"); - + } else if (c < 0) { CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *lb = lp->lower_bound(); @@ -218,7 +195,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, IR_COND_LT, true); else throw ir_error("loop condition not supported"); - + vars_to_be_reversed.push_back(lp->index()->name()); } else throw ir_error("loop step size zero"); @@ -229,7 +206,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, itn->content = itn->content->convert(); return false; } - + if (abs(c) != 1) { F_Exists *f_exists = f_root->add_exists(); Variable_ID e = f_exists->declare(); @@ -244,7 +221,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, true); } - + processed[itn->payload] = true; break; } @@ -281,7 +258,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, } return false; } - + break; } default: @@ -292,7 +269,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, return false; } } - + // add information for missing loops for (int j = 0; j < n_dim; j++) if (!processed[j]) { @@ -303,89 +280,32 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, && itn->payload == j) break; } - + Variable_ID v = r.set_var(j + 1); if (loc < max_loc) { - + CG_outputBuilder *ocg = ir->builder(); - + CG_outputRepr *lb = static_cast<IR_Loop *>(itn->content)->lower_bound(); - + exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ, false); - - /* if (ir->QueryExpOperation( - static_cast<IR_Loop *>(itn->content)->lower_bound()) - == IR_OP_VARIABLE) { - IR_ScalarRef *ref = - static_cast<IR_ScalarRef *>(ir->Repr2Ref( - static_cast<IR_Loop *>(itn->content)->lower_bound())); - std::string name_ = ref->name(); - - for (int i = 0; i < index.size(); i++) - if (index[i] == name_) { - exp2formula(ir, r, f_root, freevar, lb, v, 's', - IR_COND_GE, false); - - CG_outputRepr *ub = - static_cast<IR_Loop *>(itn->content)->upper_bound(); - IR_CONDITION_TYPE cond = - static_cast<IR_Loop *>(itn->content)->stop_cond(); - if (cond == IR_COND_LT || cond == IR_COND_LE) - exp2formula(ir, r, f_root, freevar, ub, v, - 's', cond, false); - - - - } - - } - */ - + } else { // loc > max_loc - + CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *ub = static_cast<IR_Loop *>(itn->content)->upper_bound(); - + exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ, false); - /*if (ir->QueryExpOperation( - static_cast<IR_Loop *>(itn->content)->upper_bound()) - == IR_OP_VARIABLE) { - IR_ScalarRef *ref = - static_cast<IR_ScalarRef *>(ir->Repr2Ref( - static_cast<IR_Loop *>(itn->content)->upper_bound())); - std::string name_ = ref->name(); - - for (int i = 0; i < index.size(); i++) - if (index[i] == name_) { - - CG_outputRepr *lb = - static_cast<IR_Loop *>(itn->content)->lower_bound(); - - exp2formula(ir, r, f_root, freevar, lb, v, 's', - IR_COND_GE, false); - - CG_outputRepr *ub = - static_cast<IR_Loop *>(itn->content)->upper_bound(); - IR_CONDITION_TYPE cond = - static_cast<IR_Loop *>(itn->content)->stop_cond(); - if (cond == IR_COND_LT || cond == IR_COND_LE) - exp2formula(ir, r, f_root, freevar, ub, v, - 's', cond, false); - - - } - } - */ } } - + r.setup_names(); r.simplify(); - + // insert the statement CG_outputBuilder *ocg = ir->builder(); std::vector<CG_outputRepr *> reverse_expr; @@ -407,10 +327,10 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, stmt[loc].loop_level[i].payload = i; stmt[loc].loop_level[i].parallel_level = 0; } - + stmt_nesting_level[loc] = -1; } - + return true; } @@ -418,31 +338,27 @@ Loop::Loop(const IR_Control *control) { last_compute_cgr_ = NULL; last_compute_cg_ = NULL; - + ir = const_cast<IR_Code *>(control->ir_); init_code = NULL; cleanup_code = NULL; tmp_loop_var_name_counter = 1; overflow_var_name_counter = 1; known = Relation::True(0); - + ir_tree = build_ir_tree(control->clone(), NULL); - // std::vector<ir_tree_node *> ir_stmt; - while (!init_loop(ir_tree, ir_stmt)) { } - - for (int i = 0; i < stmt.size(); i++) { std::map<int, CG_outputRepr*>::iterator it = replace.find(i); - + if (it != replace.end()) stmt[i].code = it->second; else stmt[i].code = stmt[i].code; } - + if (stmt.size() != 0) dep = DependenceGraph(stmt[0].IS.n_set()); else @@ -450,7 +366,7 @@ Loop::Loop(const IR_Control *control) { // init the dependence graph for (int i = 0; i < stmt.size(); i++) dep.insert(); - + for (int i = 0; i < stmt.size(); i++) for (int j = i; j < stmt.size(); j++) { std::pair<std::vector<DependenceVector>, @@ -458,7 +374,7 @@ Loop::Loop(const IR_Control *control) { ir, stmt[i].code, stmt[i].IS, stmt[j].code, stmt[j].IS, freevar, index, stmt_nesting_level_[i], stmt_nesting_level_[j]); - + for (int k = 0; k < dv.first.size(); k++) { if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k], true)) @@ -466,7 +382,7 @@ Loop::Loop(const IR_Control *control) { else { dep.connect(j, i, dv.first[k].reverse()); } - + } for (int k = 0; k < dv.second.size(); k++) if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], @@ -475,11 +391,8 @@ Loop::Loop(const IR_Control *control) { else { dep.connect(i, j, dv.second[k].reverse()); } - // std::pair<std::vector<DependenceVector>, - // std::vector<DependenceVector> > dv_ = test_data_dependences( - } - + // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] @@ -487,49 +400,48 @@ Loop::Loop(const IR_Control *control) { int n = stmt[i].IS.n_set(); stmt[i].xform = Relation(n, 2 * n + 1); F_And *f_root = stmt[i].xform.add_and(); - + for (int j = 1; j <= n; j++) { EQ_Handle h = f_root->add_EQ(); h.update_coef(stmt[i].xform.output_var(2 * j), 1); h.update_coef(stmt[i].xform.input_var(j), -1); } - + for (int j = 1; j <= 2 * n + 1; j += 2) { EQ_Handle h = f_root->add_EQ(); h.update_coef(stmt[i].xform.output_var(j), 1); } stmt[i].xform.simplify(); } - + if (stmt.size() != 0) num_dep_dim = stmt[0].IS.n_set(); else num_dep_dim = 0; - // debug - /*for (int i = 0; i < stmt.size(); i++) { - std::cout << i << ": "; - //stmt[i].xform.print(); - stmt[i].IS.print(); - std::cout << std::endl; - - }*/ - //end debug +// Debug output +// for (int i = 0; i < stmt.size(); i++) { +// std::cout << i << ": "; +// stmt[i].xform.print(); +// stmt[i].IS.print(); +// std::cout << std::endl; +// +// } } Loop::~Loop() { - + delete last_compute_cgr_; delete last_compute_cg_; - + for (int i = 0; i < stmt.size(); i++) if (stmt[i].code != NULL) { stmt[i].code->clear(); delete stmt[i].code; } - + for (int i = 0; i < ir_tree.size(); i++) delete ir_tree[i]; - + if (init_code != NULL) { init_code->clear(); delete init_code; @@ -543,10 +455,10 @@ Loop::~Loop() { int Loop::get_dep_dim_of(int stmt_num, int level) const { if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invaid statement " + to_string(stmt_num)); - + if (level < 1 || level > stmt[stmt_num].loop_level.size()) return -1; - + int trip_count = 0; while (true) { switch (stmt[stmt_num].loop_level[level - 1].type) { @@ -577,16 +489,16 @@ int Loop::get_dep_dim_of(int stmt_num, int level) const { int Loop::get_last_dep_dim_before(int stmt_num, int level) const { if (stmt_num < 0 || stmt_num >= stmt.size()) throw std::invalid_argument("invaid statement " + to_string(stmt_num)); - + if (level < 1) return -1; if (level > stmt[stmt_num].loop_level.size()) level = stmt[stmt_num].loop_level.size() + 1; - + for (int i = level - 1; i >= 1; i--) if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal) return stmt[stmt_num].loop_level[i - 1].payload; - + return -1; } @@ -623,7 +535,7 @@ CG_outputRepr *Loop::getCode(int effort) const { if (m == 0) return NULL; const int n = stmt[0].xform.n_out(); - + if (last_compute_cg_ == NULL) { std::vector<Relation> IS(m); std::vector<Relation> xforms(m); @@ -632,29 +544,29 @@ CG_outputRepr *Loop::getCode(int effort) const { xforms[i] = stmt[i].xform; } Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); - + last_compute_cg_ = new CodeGen(xforms, IS, known); delete last_compute_cgr_; last_compute_cgr_ = NULL; } - + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { delete last_compute_cgr_; last_compute_cgr_ = last_compute_cg_->buildAST(effort); last_compute_effort_ = effort; } - + std::vector<CG_outputRepr *> stmts(m); for (int i = 0; i < m; i++) stmts[i] = stmt[i].code; CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts); - + if (init_code != NULL) repr = ocg->StmtListAppend(init_code->clone(), repr); if (cleanup_code != NULL) repr = ocg->StmtListAppend(repr, cleanup_code->clone()); - + return repr; } @@ -663,7 +575,7 @@ void Loop::printCode(int effort) const { if (m == 0) return; const int n = stmt[0].xform.n_out(); - + if (last_compute_cg_ == NULL) { std::vector<Relation> IS(m); std::vector<Relation> xforms(m); @@ -672,18 +584,18 @@ void Loop::printCode(int effort) const { xforms[i] = stmt[i].xform; } Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); - + last_compute_cg_ = new CodeGen(xforms, IS, known); delete last_compute_cgr_; last_compute_cgr_ = NULL; } - + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { delete last_compute_cgr_; last_compute_cgr_ = last_compute_cg_->buildAST(effort); last_compute_effort_ = effort; } - + std::string repr = last_compute_cgr_->printString(); std::cout << repr << std::endl; } @@ -710,7 +622,7 @@ void Loop::printDependenceGraph() const { Relation Loop::getNewIS(int stmt_num) const { Relation result; - + if (stmt[stmt_num].xform.is_null()) { Relation known = Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set()); @@ -723,19 +635,19 @@ Relation Loop::getNewIS(int stmt_num) const { Restrict_Domain(copy(stmt[stmt_num].xform), copy(stmt[stmt_num].IS))), known); } - + result.simplify(2, 4); - + return result; } std::vector<Relation> Loop::getNewIS() const { const int m = stmt.size(); - + std::vector<Relation> new_IS(m); for (int i = 0; i < m; i++) new_IS[i] = getNewIS(i); - + return new_IS; } @@ -743,7 +655,7 @@ void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) { // check sanity of parameters if(stmt_num < 0) throw std::invalid_argument("invalid statement " + to_string(stmt_num)); - + CG_outputBuilder *ocg = ir->builder(); CG_outputRepr *code = stmt[stmt_num].code; ocg->CreatePragmaAttribute(code, level, pragmaText); @@ -761,13 +673,13 @@ void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hin std::vector<int> Loop::getLexicalOrder(int stmt_num) const { assert(stmt_num < stmt.size()); - + const int n = stmt[stmt_num].xform.n_out(); std::vector<int> lex(n, 0); - + for (int i = 0; i < n; i += 2) lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); - + return lex; } @@ -776,13 +688,13 @@ std::vector<int> Loop::getLexicalOrder(int stmt_num) const { std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const { assert(stmt_num >= 0 && stmt_num < stmt.size()); assert(level > 0 && level <= stmt[stmt_num].loop_level.size()); - + std::set<int> working; for (int i = 0; i < stmt.size(); i++) if (const_cast<Loop *>(this)->stmt[i].IS.is_upper_bound_satisfiable() && stmt[i].loop_level.size() >= level) working.insert(i); - + for (int i = 1; i <= level; i++) { int a = getLexicalOrder(stmt_num, i); for (std::set<int>::iterator j = working.begin(); j != working.end();) { @@ -793,14 +705,14 @@ std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const { ++j; } } - + return working; } int Loop::getLexicalOrder(int stmt_num, int level) const { assert(stmt_num >= 0 && stmt_num < stmt.size()); assert(level > 0 && level <= stmt[stmt_num].loop_level.size()+1); - + Relation &r = const_cast<Loop *>(this)->stmt[stmt_num].xform; for (EQ_Iterator e(r.single_conjunct()->EQs()); e; e++) if (abs((*e).get_coef(r.output_var(2 * level - 1))) == 1) { @@ -815,7 +727,7 @@ int Loop::getLexicalOrder(int stmt_num, int level) const { return (*e).get_coef(r.output_var(2 * level - 1)) > 0 ? -t : t; } } - + throw loop_error( "can't find lexical order for statement " + to_string(stmt_num) + "'s loop level " + to_string(level)); @@ -823,7 +735,7 @@ int Loop::getLexicalOrder(int stmt_num, int level) const { std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const { const int m = stmt.size(); - + std::set<int> same_loops; for (int i = 0; i < m; i++) { if (dim < 0) @@ -837,32 +749,32 @@ std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const { if (j > dim) same_loops.insert(i); } - + } - + return same_loops; } void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) { const int m = stmt.size(); - + if (amount == 0) return; - + for (int i = 0; i < m; i++) { std::vector<int> lex2 = getLexicalOrder(i); - + bool need_shift = true; - + for (int j = 0; j < dim; j++) if (lex2[j] != lex[j]) { need_shift = false; break; } - + if (!need_shift) continue; - + if (amount > 0) { if (lex2[dim] < lex[dim]) continue; @@ -870,14 +782,14 @@ void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) { if (lex2[dim] > lex[dim]) continue; } - + assign_const(stmt[i].xform, dim, lex2[dim] + amount); } } std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active, int level) { - + std::set<int> not_nested_at_this_level; std::map<ir_tree_node*, std::set<int> > sorted_by_loop; std::map<int, std::set<int> > sorted_by_lex_order; @@ -885,73 +797,73 @@ std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active, bool lex_order_already_set = false; for (std::set<int>::iterator it = active.begin(); it != active.end(); it++) { - + if (stmt[*it].ir_stmt_node == NULL) lex_order_already_set = true; } - + if (lex_order_already_set) { - + for (std::set<int>::iterator it = active.begin(); it != active.end(); it++) { std::map<int, std::set<int> >::iterator it2 = sorted_by_lex_order.find( get_const(stmt[*it].xform, 2 * (level - 1), Output_Var)); - + if (it2 != sorted_by_lex_order.end()) it2->second.insert(*it); else { - + std::set<int> to_insert; - + to_insert.insert(*it); - + sorted_by_lex_order.insert( std::pair<int, std::set<int> >( get_const(stmt[*it].xform, 2 * (level - 1), Output_Var), to_insert)); - + } - + } - + for (std::map<int, std::set<int> >::iterator it2 = sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end(); it2++) to_return.push_back(it2->second); - + } else { - + for (std::set<int>::iterator it = active.begin(); it != active.end(); it++) { - + ir_tree_node* itn = stmt[*it].ir_stmt_node; itn = itn->parent; while ((itn != NULL) && (itn->payload != level - 1)) itn = itn->parent; - + if (itn == NULL) not_nested_at_this_level.insert(*it); else { std::map<ir_tree_node*, std::set<int> >::iterator it2 = sorted_by_loop.find(itn); - + if (it2 != sorted_by_loop.end()) it2->second.insert(*it); else { std::set<int> to_insert; - + to_insert.insert(*it); - + sorted_by_loop.insert( std::pair<ir_tree_node*, std::set<int> >(itn, to_insert)); - + } - + } - + } if (not_nested_at_this_level.size() > 0) { for (std::set<int>::iterator it = not_nested_at_this_level.begin(); @@ -959,7 +871,7 @@ std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active, std::set<int> temp; temp.insert(*it); to_return.push_back(temp); - + } } for (std::map<ir_tree_node*, std::set<int> >::iterator it2 = @@ -971,17 +883,17 @@ std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active, void update_successors(int n, int node_num[], int cant_fuse_with[], Graph<std::set<int>, bool> &g, std::list<int> &work_list) { - + std::set<int> disconnect; for (Graph<std::set<int>, bool>::EdgeList::iterator i = g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) { int m = i->first; - + if (node_num[m] != -1) throw loop_error("Graph input for fusion has cycles not a DAG!!"); - + std::vector<bool> check_ = g.getEdge(n, m); - + bool has_bad_edge_path = false; for (int i = 0; i < check_.size(); i++) if (!check_[i]) { @@ -994,12 +906,12 @@ void update_successors(int n, int node_num[], int cant_fuse_with[], cant_fuse_with[m] = std::max(cant_fuse_with[m], cant_fuse_with[n]); disconnect.insert(m); } - - + + for (std::set<int>::iterator i = disconnect.begin(); i != disconnect.end(); i++) { g.disconnect(n, *i); - + bool no_incoming_edges = true; for (int j = 0; j < g.vertex.size(); j++) if (j != *i) @@ -1007,23 +919,23 @@ void update_successors(int n, int node_num[], int cant_fuse_with[], no_incoming_edges = false; break; } - - + + if (no_incoming_edges) work_list.push_back(*i); } - + } Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level( std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim) { Graph<std::set<int>, bool> g; - + for (int i = 0; i < s.size(); i++) g.insert(s[i]); - + for (int i = 0; i < s.size(); i++) { - + for (int j = i + 1; j < s.size(); j++) { bool has_true_edge_i_to_j = false; bool has_true_edge_j_to_i = false; @@ -1031,16 +943,16 @@ Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level( bool is_connected_j_to_i = false; for (std::set<int>::iterator ii = s[i].begin(); ii != s[i].end(); ii++) { - + for (std::set<int>::iterator jj = s[j].begin(); jj != s[j].end(); jj++) { - + std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj); for (int k = 0; k < dvs.size(); k++) if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && dvs[k].has_been_carried_at(dep_dim))) { - + if (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at( dep_dim)) { @@ -1049,14 +961,14 @@ Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level( break; } else { //g.connect(i, j, true); - + has_true_edge_i_to_j = true; //break } } - + //if (is_connected) - + // break; // if (has_true_edge_i_to_j && !is_connected_i_to_j) // g.connect(i, j, true); @@ -1065,76 +977,63 @@ Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level( if (dvs[k].is_control_dependence() || (dvs[k].is_data_dependence() && dvs[k].has_been_carried_at(dep_dim))) { - + if (is_connected_i_to_j || has_true_edge_i_to_j) throw loop_error( "Graph input for fusion has cycles not a DAG!!"); - + if (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at( dep_dim)) { - //g.connect(i, j, false); is_connected_j_to_i = true; break; } else { - //g.connect(i, j, true); - has_true_edge_j_to_i = true; - //break; } } - - // if (is_connected) - //break; - // if (is_connected) - //break; } - - - //if (is_connected) - // break; } - - + + if (is_connected_i_to_j) g.connect(i, j, false); else if (has_true_edge_i_to_j) g.connect(i, j, true); - + if (is_connected_j_to_i) g.connect(j, i, false); else if (has_true_edge_j_to_i) g.connect(j, i, true); - - + + } } return g; } std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g) { - + bool roots[g.vertex.size()]; - + for (int i = 0; i < g.vertex.size(); i++) roots[i] = true; - + for (int i = 0; i < g.vertex.size(); i++) for (int j = i + 1; j < g.vertex.size(); j++) { - + if (g.hasEdge(i, j)) roots[j] = false; - + if (g.hasEdge(j, i)) roots[i] = false; - + } - + std::list<int> work_list; int cant_fuse_with[g.vertex.size()]; std::vector<std::set<int> > s; //Each Fused set's representative node - + int node_to_fused_nodes[g.vertex.size()]; int node_num[g.vertex.size()]; for (int i = 0; i < g.vertex.size(); i++) { @@ -1145,61 +1044,54 @@ std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g) { node_num[i] = -1; } // topological sort according to chun's permute algorithm - // std::vector<std::set<int> > s = g.topoSort(); std::vector<std::set<int> > s2 = g.topoSort(); if (work_list.empty() || (s2.size() != g.vertex.size())) { - std::cout << s2.size() << "\t" << g.vertex.size() << std::endl; throw loop_error("Input for fusion not a DAG!!"); - - } int fused_nodes_counter = 0; while (!work_list.empty()) { int n = work_list.front(); - //int n_ = g.vertex[n].first; work_list.pop_front(); int node; if (cant_fuse_with[n] == 0) node = 0; else node = cant_fuse_with[n]; - + if ((fused_nodes_counter != 0) && (node != fused_nodes_counter)) { int rep_node = node_to_fused_nodes[node]; node_num[n] = node_num[rep_node]; - + try { update_successors(n, node_num, cant_fuse_with, g, work_list); } catch (const loop_error &e) { - + throw loop_error( "statements cannot be fused together due to negative dependence"); - - + + } for (std::set<int>::iterator it = g.vertex[n].first.begin(); it != g.vertex[n].first.end(); it++) s[node].insert(*it); } else { - //std::set<int> new_node; - //new_node.insert(n_); s.push_back(g.vertex[n].first); node_to_fused_nodes[node] = n; node_num[n] = ++node; try { update_successors(n, node_num, cant_fuse_with, g, work_list); } catch (const loop_error &e) { - + throw loop_error( "statements cannot be fused together due to negative dependence"); - - + + } fused_nodes_counter++; } } - + return s; } @@ -1207,7 +1099,7 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active, int starting_order, std::vector<std::vector<std::string> > idxNames) { if (active.size() == 0) return; - + // check for sanity of parameters if (dim < 0 || dim % 2 != 0) throw std::invalid_argument( @@ -1232,7 +1124,7 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active, "statements are not in the same sub loop nest"); } } - + // sepearate statements by current loop level types int level = (dim + 2) / 2; std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type; @@ -1245,7 +1137,7 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active, stmt[*i].loop_level[level - 1].type, stmt[*i].loop_level[level - 1].payload)].insert(*i); } - + // further separate statements due to control dependences std::vector<std::set<int> > active_by_level_type_splitted; for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i = @@ -1277,11 +1169,11 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active, active_by_level_type_splitted.push_back(not_controlled); } } - + // set lexical order separating loops with different loop types first if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) { int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; - + Graph<std::set<int>, Empty> g; for (std::vector<std::set<int> >::iterator i = active_by_level_type_splitted.begin(); @@ -1342,13 +1234,13 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active, break; } } - + std::vector<std::set<int> > s = g.topoSort(); if (s.size() != g.vertex.size()) throw loop_error( "cannot separate statements with different loop types at loop level " + to_string(level)); - + // assign lexical order int order = starting_order; for (int i = 0; i < s.size(); i++) { @@ -1372,7 +1264,7 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active, std::set<int> nonsingles; std::map<coef_t, std::set<int> > fake_singles; std::set<int> fake_singles_; - + // sort out statements that do not require loops for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { @@ -1398,176 +1290,55 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active, } else nonsingles.insert(*i); } - - + + // split nonsingles forcibly according to negative dependences present (loop unfusible) int dep_dim = get_dep_dim_of(ref_stmt_num, level); - + if (dim < stmt[ref_stmt_num].xform.n_out() - 1) { - - bool dummy_level_found = false; - + std::vector<std::set<int> > s; - + s = sort_by_same_loops(active, level); bool further_levels_exist = false; - + if (!idxNames.empty()) if (level <= idxNames[ref_stmt_num].size()) if (idxNames[ref_stmt_num][level - 1].length() == 0) { - // && s.size() == 1) { + // Dummy level found int order1 = 0; - dummy_level_found = true; - + for (int i = level; i < idxNames[ref_stmt_num].size(); i++) if (idxNames[ref_stmt_num][i].length() > 0) further_levels_exist = true; - + } - - //if (!dummy_level_found) { - + if (s.size() > 1) { - + Graph<std::set<int>, bool> g = construct_induced_graph_at_level( s, dep, dep_dim); s = typed_fusion(g); } int order = 0; for (int i = 0; i < s.size(); i++) { - + for (std::set<int>::iterator it = s[i].begin(); it != s[i].end(); it++) assign_const(stmt[*it].xform, dim, order); - + if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1)) setLexicalOrder(dim + 2, s[i], order, idxNames); - + order++; } - //} - /* else { - - int order1 = 0; - int order = 0; - for (std::set<int>::iterator i = active.begin(); - i != active.end(); i++) { - if (!further_levels_exist) - assign_const(stmt[*i].xform, dim, order1++); - else - assign_const(stmt[*i].xform, dim, order1); - - } - - if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1) && further_levels_exist) - setLexicalOrder(dim + 2, active, order, idxNames); - } - */ } else { int dummy_order = 0; for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) assign_const(stmt[*i].xform, dim, dummy_order++); } - /*for (int i = 0; i < g2.vertex.size(); i++) - for (int j = i+1; j < g2.vertex.size(); j++) { - std::vector<DependenceVector> dvs = dep.getEdge(g2.vertex[i].first, g2.vertex[j].first); - for (int k = 0; k < dvs.size(); k++) - if (dvs[k].is_control_dependence() || - (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { - g2.connect(i, j); - break; - } - dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first); - for (int k = 0; k < dvs.size(); k++) - if (dvs[k].is_control_dependence() || - (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { - g2.connect(j, i); - break; - } - } - - std::vector<std::set<int> > s2 = g2.packed_topoSort(); - - std::vector<std::set<int> > splitted_nonsingles; - for (int i = 0; i < s2.size(); i++) { - std::set<int> cur_scc; - for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) - cur_scc.insert(g2.vertex[*j].first); - splitted_nonsingles.push_back(cur_scc); - } - */ - //convert to dependence graph for grouped statements - //dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; - /*int order = 0; - for (std::set<int>::iterator j = active.begin(); j != active.end(); - j++) { - std::set<int> continuous; - std::cout<< active.size()<<std::endl; - while (nonsingles.find(*j) != nonsingles.end() && j != active.end()) { - continuous.insert(*j); - j++; - } - - printf("continuous size is %d\n", continuous.size()); - - - - if (continuous.size() > 0) { - std::vector<std::set<int> > s = typed_fusion(continuous, dep, - dep_dim); - - for (int i = 0; i < s.size(); i++) { - for (std::set<int>::iterator l = s[i].begin(); - l != s[i].end(); l++) { - assign_const(stmt[*l].xform, dim + 2, order); - setLexicalOrder(dim + 2, s[i]); - } - order++; - } - } - - if (j != active.end()) { - assign_const(stmt[*j].xform, dim + 2, order); - - for (int k = dim + 4; k < stmt[*j].xform.n_out(); k += 2) - assign_const(stmt[*j].xform, k, 0); - order++; - } - - if( j == active.end()) - break; - } - */ - - - // assign lexical order - /*int order = starting_order; - for (int i = 0; i < s.size(); i++) { - // translate each SCC into original statements - std::set<int> cur_scc; - for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) - copy(s[i].begin(), s[i].end(), - inserter(cur_scc, cur_scc.begin())); - - // now assign the constant - for (std::set<int>::iterator j = cur_scc.begin(); - j != cur_scc.end(); j++) - assign_const(stmt[*j].xform, dim, order); - - if (cur_scc.size() > 1) - setLexicalOrder(dim + 2, cur_scc); - else if (cur_scc.size() == 1) { - int cur_stmt = *(cur_scc.begin()); - for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) - assign_const(stmt[cur_stmt].xform, j, 0); - } - - if (cur_scc.size() > 0) - order++; - } - */ } } @@ -1586,15 +1357,15 @@ void Loop::apply_xform(int stmt_num) { void Loop::apply_xform(std::set<int> &active) { int max_n = 0; - + CG_outputBuilder *ocg = ir->builder(); for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { int n = stmt[*i].loop_level.size(); if (n > max_n) max_n = n; - + std::vector<int> lex = getLexicalOrder(*i); - + Relation mapping(2 * n + 1, n); F_And *f_root = mapping.add_and(); for (int j = 1; j <= n; j++) { @@ -1604,7 +1375,7 @@ void Loop::apply_xform(std::set<int> &active) { } mapping = Composition(mapping, stmt[*i].xform); mapping.simplify(); - + // match omega input/output variables to variable names in the code for (int j = 1; j <= stmt[*i].IS.n_set(); j++) mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); @@ -1613,10 +1384,9 @@ void Loop::apply_xform(std::set<int> &active) { tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter + j - 1)); mapping.setup_names(); - + Relation known = Extend_Set(copy(this->known), mapping.n_out() - this->known.n_set()); - //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL)); std::vector<std::string> loop_vars; for (int j = 1; j <= stmt[*i].IS.n_set(); j++) loop_vars.push_back(stmt[*i].IS.set_var(j)->name()); @@ -1628,7 +1398,7 @@ void Loop::apply_xform(std::set<int> &active) { subs); stmt[*i].IS = Range(Restrict_Domain(mapping, stmt[*i].IS)); stmt[*i].IS.simplify(); - + // replace original transformation relation with straight 1-1 mapping mapping = Relation(n, 2 * n + 1); f_root = mapping.add_and(); @@ -1644,28 +1414,28 @@ void Loop::apply_xform(std::set<int> &active) { } stmt[*i].xform = mapping; } - + tmp_loop_var_name_counter += max_n; } void Loop::addKnown(const Relation &cond) { - + // invalidate saved codegen computation delete last_compute_cgr_; last_compute_cgr_ = NULL; delete last_compute_cg_; last_compute_cg_ = NULL; - + int n1 = this->known.n_set(); - + Relation r = copy(cond); int n2 = r.n_set(); - + if (n1 < n2) this->known = Extend_Set(this->known, n2 - n1); else if (n1 > n2) r = Extend_Set(r, n1 - n2); - + this->known = Intersection(this->known, r); } @@ -1677,7 +1447,7 @@ void Loop::removeDependence(int stmt_num_from, int stmt_num_to) { if (stmt_num_to >= stmt.size()) throw std::invalid_argument( "invalid statement number " + to_string(stmt_num_to)); - + dep.disconnect(stmt_num_from, stmt_num_to); } @@ -1712,7 +1482,7 @@ void Loop::dump() const { bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { if (stmt.size() == 0) return true; - + // check for sanity of parameters for (int i = 0; i < stmt.size(); i++) { if (stmt[i].loop_level.size() != num_dep_dim) @@ -1750,11 +1520,11 @@ bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { h.update_coef(mapping.output_var(i), -1); h.update_coef(mapping.input_var(i), 1); } - + // update transformation relations for (int i = 0; i < stmt.size(); i++) stmt[i].xform = Composition(copy(mapping), stmt[i].xform); - + // update dependence graph for (int i = 0; i < dep.vertex.size(); i++) for (DependenceGraph::EdgeList::iterator j = @@ -1809,7 +1579,7 @@ bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { } dv.lbounds = lbounds; dv.ubounds = ubounds; - + break; } default: @@ -1818,13 +1588,13 @@ bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { } j->second = dvs; } - + // set constant loop values std::set<int> active; for (int i = 0; i < stmt.size(); i++) active.insert(i); setLexicalOrder(0, active); - + return true; } @@ -1842,7 +1612,7 @@ bool Loop::is_dependence_valid_based_on_lex_order(int i, int j, last_dim = last_dim / 2; if (last_dim == 0) return true; - + for (int i = 0; i < last_dim; i++) { if (dv.lbounds[i] > 0) return true; @@ -1852,8 +1622,8 @@ bool Loop::is_dependence_valid_based_on_lex_order(int i, int j, } if (before) return true; - + return false; - + } diff --git a/src/loop_basic.cc b/src/loop_basic.cc index f5234b9..cf72c97 100644 --- a/src/loop_basic.cc +++ b/src/loop_basic.cc @@ -794,46 +794,7 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) { new_stmt.loop_level = stmt[*i].loop_level; stmt_nesting_level_.push_back(stmt_nesting_level_[*i]); - - /*std::pair<std::vector<DependenceVector>, - std::vector<DependenceVector> > dv = - test_data_dependences(ir, stmt[*i].code, part1, - stmt[*i].code, part2, freevar, index, - stmt_nesting_level_[*i], - stmt_nesting_level_[stmt.size() - 1]); - - - - - for (int k = 0; k < dv.first.size(); k++) - part1_to_part2++; - if (part1_to_part2 > 0 && part2_to_part1 > 0) - throw loop_error( - "loop error: Aborting, split resulted in impossible dependence cycle!"); - - for (int k = 0; k < dv.second.size(); k++) - part2_to_part1++; - - - - if (part1_to_part2 > 0 && part2_to_part1 > 0) - throw loop_error( - "loop error: Aborting, split resulted in impossible dependence cycle!"); - - - - if (part2_to_part1 > 0){ - temp_place_after = false; - assigned = true; - - }else if (part1_to_part2 > 0){ - temp_place_after = true; - - assigned = true; - } - - */ - + if (place_after) assign_const(new_stmt.xform, dim - 1, cur_lex + 1); else @@ -1321,9 +1282,7 @@ void Loop::fuse(const std::set<int> &stmt_nums, int level) { s5.push_back(s4); //Dependence Check for Ordering Constraint - //Graph<std::set<int>, bool> dummy = construct_induced_graph_at_level(s5, - // dep, dep_dim); - + Graph<std::set<int>, bool> g = construct_induced_graph_at_level(s3, dep, dep_dim); @@ -1532,7 +1491,6 @@ void Loop::distribute(const std::set<int> &stmt_nums, int level) { order++; } // no need to update dependence graph - ; return; } diff --git a/src/loop_datacopy.cc b/src/loop_datacopy.cc index 8d11b0a..1ccd444 100644 --- a/src/loop_datacopy.cc +++ b/src/loop_datacopy.cc @@ -21,11 +21,6 @@ using namespace omega; -// -// data copy function by referring arrays by numbers. -// e.g. A[i] = A[i-1] + B[i] -// parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i] -// bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { // check for sanity of parameters @@ -75,11 +70,6 @@ bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array return datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); } -// -// data copy function by referring arrays by name. -// e.g. A[i] = A[i-1] + B[i] -// parameter array_name=A means to copy data touched by A[i-1] and A[i] -// bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { // check for sanity of parameters @@ -193,1013 +183,6 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); } - -// -// Implement low level datacopy function with lots of options. -// -/*bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, - const std::vector<int> &privatized_levels, - bool allow_extra_read, int fastest_changing_dimension, - int padding_stride, int padding_alignment, int memory_type) { - if (stmt_refs.size() == 0) - return true; - - // check for sanity of parameters - IR_ArraySymbol *sym = NULL; - std::vector<int> lex; - std::set<int> active; - if (level <= 0) - throw std::invalid_argument("invalid loop level " + to_string(level)); - for (int i = 0; i < privatized_levels.size(); i++) { - if (i == 0) { - if (privatized_levels[i] < level) - throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level)); - } - else if (privatized_levels[i] <= privatized_levels[i-1]) - throw std::invalid_argument("privatized loop levels must be in ascending order"); - } - for (int i = 0; i < stmt_refs.size(); i++) { - int stmt_num = stmt_refs[i].first; - active.insert(stmt_num); - if (stmt_num < 0 || stmt_num >= stmt.size()) - throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); - if (privatized_levels.size() != 0) { - if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) - throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); - } - else { - if (level > stmt[stmt_num].loop_level.size()) - throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); - } - for (int j = 0; j < stmt_refs[i].second.size(); j++) { - if (sym == NULL) { - sym = stmt_refs[i].second[j]->symbol(); - lex = getLexicalOrder(stmt_num); - } - else { - IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); - if (t->name() != sym->name()) { - delete t; - delete sym; - throw std::invalid_argument("try to copy data from different arrays"); - } - delete t; - } - } - } - if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim())) - throw std::invalid_argument("invalid fastest changing dimension for the array to be copied"); - if (padding_stride < 0) - throw std::invalid_argument("invalid temporary array stride requirement"); - if (padding_alignment == -1 || padding_alignment == 0) - throw std::invalid_argument("invalid temporary array alignment requirement"); - - int dim = 2*level - 1; - int n_dim = sym->n_dim(); - - if (fastest_changing_dimension == -1) - switch (sym->layout_type()) { - case IR_ARRAY_LAYOUT_ROW_MAJOR: - fastest_changing_dimension = n_dim - 1; - break; - case IR_ARRAY_LAYOUT_COLUMN_MAJOR: - fastest_changing_dimension = 0; - break; - default: - throw loop_error("unsupported array layout"); - } - - - // build iteration spaces for all reads and for all writes separately - apply_xform(active); - bool has_write_refs = false; - bool has_read_refs = false; - Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); - Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); - for (int i = 0; i < stmt_refs.size(); i++) { - int stmt_num = stmt_refs[i].first; - - for (int j = 0; j < stmt_refs[i].second.size(); j++) { - Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim); - for (int k = 1; k <= mapping.n_inp(); k++) - mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name()); - mapping.setup_names(); - F_And *f_root = mapping.add_and(); - for (int k = 1; k <= level-1; k++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.input_var(k), 1); - h.update_coef(mapping.output_var(k), -1); - } - for (int k = 0; k < privatized_levels.size(); k++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.input_var(privatized_levels[k]), 1); - h.update_coef(mapping.output_var(level+k), -1); - } - for (int k = 0; k < n_dim; k++) { - CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); - exp2formula(ir, mapping, f_root, freevar, repr, mapping.output_var(level-1+privatized_levels.size()+k+1), 'w', IR_COND_EQ, false); - repr->clear(); - delete repr; - } - Relation r = Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); - if (stmt_refs[i].second[j]->is_write()) { - has_write_refs = true; - wo_copy_is = Union(wo_copy_is, r); - wo_copy_is.simplify(2, 4); - } - else { - has_read_refs = true; - //protonu--removing the next line for now - ro_copy_is = Union(ro_copy_is, r); - ro_copy_is.simplify(2, 4); - //ro_copy_is = ConvexRepresentation(Union(ro_copy_is, r)); - - } - } - } - - if (allow_extra_read) { - Relation t = DecoupledConvexHull(copy(ro_copy_is)); - if (t.number_of_conjuncts() > 1) - ro_copy_is = RectHull(ro_copy_is); - else - ro_copy_is = t; - } - else { - Relation t = ConvexRepresentation(copy(ro_copy_is)); - if (t.number_of_conjuncts() > 1) - ro_copy_is = RectHull(ro_copy_is); - else - ro_copy_is = t; - } - wo_copy_is = ConvexRepresentation(wo_copy_is); - - if (allow_extra_read) { - Tuple<Relation> Rs; - Tuple<int> active; - for (DNF_Iterator di(ro_copy_is.query_DNF()); di; di++) { - Rs.append(Relation(ro_copy_is, di.curr())); - active.append(1); - } - Relation the_gcs = Relation::True(ro_copy_is.n_set()); - for (int i = level-1+privatized_levels.size()+1; i <= level-1+privatized_levels.size()+n_dim; i++) { - Relation r = greatest_common_step(Rs, active, i, Relation::Null()); - the_gcs = Intersection(the_gcs, r); - } - - ro_copy_is = Approximate(ro_copy_is); - ro_copy_is = ConvexRepresentation(ro_copy_is); - ro_copy_is = Intersection(ro_copy_is, the_gcs); - ro_copy_is.simplify(); - } - - - - for (int i = 1; i < level; i++) { - std::string s = stmt[*active.begin()].IS.input_var(i)->name(); - wo_copy_is.name_set_var(i, s); - ro_copy_is.name_set_var(i, s); - } - for (int i = 0; i < privatized_levels.size(); i++) { - std::string s = stmt[*active.begin()].IS.input_var(privatized_levels[i])->name(); - wo_copy_is.name_set_var(level+i, s); - ro_copy_is.name_set_var(level+i, s); - } - for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) { - std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size()); - wo_copy_is.name_set_var(i, s); - ro_copy_is.name_set_var(i, s); - } - tmp_loop_var_name_counter += n_dim; - - //protonu--end change - - wo_copy_is.setup_names(); - ro_copy_is.setup_names(); - - // build merged iteration space for calculating temporary array size - bool already_use_recthull = false; - Relation untampered_copy_is = ConvexRepresentation(Union(copy(wo_copy_is), copy(ro_copy_is))); - Relation copy_is = untampered_copy_is; - if (copy_is.number_of_conjuncts() > 1) { - try { - copy_is = ConvexHull(copy(untampered_copy_is)); - } - catch (const std::overflow_error &e) { - copy_is = RectHull(copy(untampered_copy_is)); - already_use_recthull = true; - } - } - - - Retry_copy_is: - // extract temporary array information - CG_outputBuilder *ocg = ir->builder(); - std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL - std::vector<coef_t> index_stride(n_dim, 1); - std::vector<bool> is_index_eq(n_dim, false); - std::vector<std::pair<int, CG_outputRepr *> > index_sz(0); - Relation reduced_copy_is = copy(copy_is); - - for (int i = 0; i < n_dim; i++) { - if (i != 0) - reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); - Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); - - // extract stride - EQ_Handle stride_eq; - { - bool simple_stride = true; - int strides = countStrides(bound.query_DNF()->single_conjunct(), bound.set_var(level-1+privatized_levels.size()+i+1), stride_eq, simple_stride); - if (strides > 1) { - throw loop_error("too many strides"); - } - else if (strides == 1) { - int sign = stride_eq.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); - Constr_Vars_Iter it(stride_eq, true); - index_stride[i] = abs((*it).coef/sign); - } - } - - // check if this arary index requires loop - Conjunct *c = bound.query_DNF()->single_conjunct(); - for (EQ_Iterator ei(c->EQs()); ei; ei++) { - if ((*ei).has_wildcards()) - continue; - - int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); - if (coef != 0) { - int sign = 1; - if (coef < 0) { - coef = -coef; - sign = -1; - } - - CG_outputRepr *op = NULL; - for (Constr_Vars_Iter ci(*ei); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: - { - if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) - if ((*ci).coef*sign == 1) - op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); - else if ((*ci).coef*sign == -1) - op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); - else if ((*ci).coef*sign > 1) - op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); - else // (*ci).coef*sign < -1 - op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); - break; - } - case Global_Var: - { - Global_Var_ID g = (*ci).var->get_global_var(); - if ((*ci).coef*sign == 1) - op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef*sign == -1) - op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef*sign > 1) - op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); - else // (*ci).coef*sign < -1 - op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); - break; - } - default: - throw loop_error("unsupported array index expression"); - } - } - if ((*ei).get_const() != 0) - op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); - if (coef != 1) - op = ocg->CreateIntegerDivide(op, ocg->CreateInt(coef)); - - index_lb[i] = op; - is_index_eq[i] = true; - break; - } - } - if (is_index_eq[i]) - continue; - - // seperate lower and upper bounds - std::vector<GEQ_Handle> lb_list, ub_list; - for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { - int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); - if (coef != 0 && (*gi).has_wildcards()) { - bool clean_bound = true; - GEQ_Handle h; - for (Constr_Vars_Iter cvi(*gi, true); gi; gi++) - if (!findFloorInequality(bound, (*cvi).var, h, bound.set_var(level-1+privatized_levels.size()+i+1))) { - clean_bound = false; - break; - } - if (!clean_bound) - continue; - } - - if (coef > 0) - lb_list.push_back(*gi); - else if (coef < 0) - ub_list.push_back(*gi); - } - if (lb_list.size() == 0 || ub_list.size() == 0) - if (already_use_recthull) - throw loop_error("failed to calcuate array footprint size"); - else { - copy_is = RectHull(copy(untampered_copy_is)); - already_use_recthull = true; - goto Retry_copy_is; - } - - // build lower bound representation - Tuple<CG_outputRepr *> lb_repr_list; - for (int j = 0; j < lb_list.size(); j++) - lb_repr_list.append(outputLBasRepr(ocg, lb_list[j], bound, - bound.set_var(level-1+privatized_levels.size()+i+1), - index_stride[i], stride_eq, Relation::True(bound.n_set()), - std::vector<CG_outputRepr *>(bound.n_set()))); - - if (lb_repr_list.size() > 1) - index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); - else if (lb_repr_list.size() == 1) - index_lb[i] = lb_repr_list[1]; - - // build temporary array size representation - { - Relation cal(copy_is.n_set(), 1); - F_And *f_root = cal.add_and(); - for (int j = 0; j < ub_list.size(); j++) - for (int k = 0; k < lb_list.size(); k++) { - GEQ_Handle h = f_root->add_GEQ(); - - for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: - { - int pos = (*ci).var->get_position(); - h.update_coef(cal.input_var(pos), (*ci).coef); - break; - } - case Global_Var: - { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = cal.get_local(g); - else - v = cal.get_local(g, (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot calculate temporay array size statically"); - } - } - h.update_const(ub_list[j].get_const()); - - for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { - switch ((*ci).var->kind()) { - case Input_Var: - { - int pos = (*ci).var->get_position(); - h.update_coef(cal.input_var(pos), (*ci).coef); - break; - } - case Global_Var: - { - Global_Var_ID g = (*ci).var->get_global_var(); - Variable_ID v; - if (g->arity() == 0) - v = cal.get_local(g); - else - v = cal.get_local(g, (*ci).var->function_of()); - h.update_coef(v, (*ci).coef); - break; - } - default: - throw loop_error("cannot calculate temporay array size statically"); - } - } - h.update_const(lb_list[k].get_const()); - - h.update_const(1); - h.update_coef(cal.output_var(1), -1); - } - - cal = Restrict_Domain(cal, copy(copy_is)); - for (int j = 1; j <= cal.n_inp(); j++) - cal = Project(cal, j, Input_Var); - cal.simplify(); - - // pad temporary array size - // TODO: for variable array size, create padding formula - Conjunct *c = cal.query_DNF()->single_conjunct(); - bool is_index_bound_const = false; - for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) - if ((*gi).is_const(cal.output_var(1))) { - coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1))); - if (padding_stride != 0) { - size = (size + index_stride[i] - 1) / index_stride[i]; - if (i == fastest_changing_dimension) - size = size * padding_stride; - } - if (i == fastest_changing_dimension) { - if (padding_alignment > 1) { // align to boundary for data packing - int residue = size % padding_alignment; - if (residue) - size = size+padding_alignment-residue; - } - else if (padding_alignment < -1) { // un-alignment for memory bank conflicts - while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1) - size++; - } - } - index_sz.push_back(std::make_pair(i, ocg->CreateInt(size))); - is_index_bound_const = true; - } - - if (!is_index_bound_const) { - for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { - int coef = (*gi).get_coef(cal.output_var(1)); - if (coef < 0) { - CG_outputRepr *op = NULL; - for (Constr_Vars_Iter ci(*gi); ci; ci++) { - if ((*ci).var != cal.output_var(1)) { - switch((*ci).var->kind()) { - case Global_Var: - { - Global_Var_ID g = (*ci).var->get_global_var(); - if ((*ci).coef == 1) - op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef == -1) - op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); - else if ((*ci).coef > 1) - op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); - else // (*ci).coef < -1 - op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); - break; - } - default: - throw loop_error("failed to generate array index bound code"); - } - } - } - int c = (*gi).get_const(); - if (c > 0) - op = ocg->CreatePlus(op, ocg->CreateInt(c)); - else if (c < 0) - op = ocg->CreateMinus(op, ocg->CreateInt(-c)); - if (padding_stride != 0) { - if (i == fastest_changing_dimension) { - coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); - coef_t t1 = index_stride[i] / g; - if (t1 != 1) - op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); - coef_t t2 = padding_stride / g; - if (t2 != 1) - op = ocg->CreateTimes(op, ocg->CreateInt(t2)); - } - else if (index_stride[i] != 1) { - op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); - } - } - - index_sz.push_back(std::make_pair(i, op)); - break; - } - } - } - } - } - - // change the temporary array index order - for (int i = 0; i < index_sz.size(); i++) - if (index_sz[i].first == fastest_changing_dimension) - switch (sym->layout_type()) { - case IR_ARRAY_LAYOUT_ROW_MAJOR: - std::swap(index_sz[index_sz.size()-1], index_sz[i]); - break; - case IR_ARRAY_LAYOUT_COLUMN_MAJOR: - std::swap(index_sz[0], index_sz[i]); - break; - default: - throw loop_error("unsupported array layout"); - } - - // declare temporary array or scalar - IR_Symbol *tmp_sym; - if (index_sz.size() == 0) { - tmp_sym = ir->CreateScalarSymbol(sym, memory_type); - } - else { - std::vector<CG_outputRepr *> tmp_array_size(index_sz.size()); - for (int i = 0; i < index_sz.size(); i++) - tmp_array_size[i] = index_sz[i].second->clone(); - tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); - } - - // create temporary array read initialization code - CG_outputRepr *copy_code_read; - if (has_read_refs) - if (index_sz.size() == 0) { - IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); - - std::vector<CG_outputRepr *> rhs_index(n_dim); - for (int i = 0; i < index_lb.size(); i++) - if (is_index_eq[i]) - rhs_index[i] = index_lb[i]->clone(); - else - rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); - IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); - - copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); - } - else { - std::vector<CG_outputRepr *> lhs_index(index_sz.size()); - for (int i = 0; i < index_sz.size(); i++) { - int cur_index_num = index_sz[i].first; - CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); - if (padding_stride != 0) { - if (i == n_dim-1) { - coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); - coef_t t1 = index_stride[cur_index_num] / g; - if (t1 != 1) - cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); - coef_t t2 = padding_stride / g; - if (t2 != 1) - cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); - } - else if (index_stride[cur_index_num] != 1) { - cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); - } - } - - if (ir->ArrayIndexStartAt() != 0) - cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); - lhs_index[i] = cur_index_repr; - } - - IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index); - - std::vector<CG_outputRepr *> rhs_index(n_dim); - for (int i = 0; i < index_lb.size(); i++) - if (is_index_eq[i]) - rhs_index[i] = index_lb[i]->clone(); - else - rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); - IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); - - copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); - } - - // create temporary array write back code - CG_outputRepr *copy_code_write; - if (has_write_refs) - if (index_sz.size() == 0) { - IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); - - std::vector<CG_outputRepr *> rhs_index(n_dim); - for (int i = 0; i < index_lb.size(); i++) - if (is_index_eq[i]) - rhs_index[i] = index_lb[i]->clone(); - else - rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); - IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); - - copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); - } - else { - std::vector<CG_outputRepr *> lhs_index(n_dim); - for (int i = 0; i < index_lb.size(); i++) - if (is_index_eq[i]) - lhs_index[i] = index_lb[i]->clone(); - else - lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); - IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); - - std::vector<CG_outputRepr *> rhs_index(index_sz.size()); - for (int i = 0; i < index_sz.size(); i++) { - int cur_index_num = index_sz[i].first; - CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); - if (padding_stride != 0) { - if (i == n_dim-1) { - coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); - coef_t t1 = index_stride[cur_index_num] / g; - if (t1 != 1) - cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); - coef_t t2 = padding_stride / g; - if (t2 != 1) - cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); - } - else if (index_stride[cur_index_num] != 1) { - cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); - } - } - - if (ir->ArrayIndexStartAt() != 0) - cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); - rhs_index[i] = cur_index_repr; - } - IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index); - - copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); - } - - // now we can remove those loops for array indexes that are - // dependent on others - if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { - Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); - F_And *f_root = mapping.add_and(); - for (int i = 1; i <= level-1+privatized_levels.size(); i++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.input_var(i), 1); - h.update_coef(mapping.output_var(i), -1); - } - - int cur_index = 0; - std::vector<int> mapped_index(index_sz.size()); - for (int i = 0; i < n_dim; i++) - if (!is_index_eq[i]) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); - switch (sym->layout_type()) { - case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { - h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); - mapped_index[index_sz.size()-cur_index-1] = i; - break; - } - case IR_ARRAY_LAYOUT_ROW_MAJOR: { - h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); - mapped_index[cur_index] = i; - break; - } - default: - throw loop_error("unsupported array layout"); - } - cur_index++; - } - - wo_copy_is = Range(Restrict_Domain(copy(mapping), wo_copy_is)); - ro_copy_is = Range(Restrict_Domain(copy(mapping), ro_copy_is)); - - // protonu--replacing Chun's old code - for (int i = 1; i <= level-1+privatized_levels.size(); i++) { - wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); - ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); - } - - - - for (int i = 0; i < index_sz.size(); i++) { - wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); - ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); - } - wo_copy_is.setup_names(); - ro_copy_is.setup_names(); - } - - // insert read copy statement - int old_num_stmt = stmt.size(); - int ro_copy_stmt_num = -1; - if (has_read_refs) { - Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); - { - F_And *f_root = copy_xform.add_and(); - for (int i = 1; i <= ro_copy_is.n_set(); i++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(copy_xform.input_var(i), 1); - h.update_coef(copy_xform.output_var(2*i), -1); - } - for (int i = 1; i <= dim; i+=2) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(copy_xform.output_var(i), -1); - h.update_const(lex[i-1]); - } - for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(copy_xform.output_var(i), 1); - } - } - - Statement copy_stmt_read; - copy_stmt_read.IS = ro_copy_is; - copy_stmt_read.xform = copy_xform; - copy_stmt_read.code = copy_code_read; - copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set()); - copy_stmt_read.ir_stmt_node = NULL; - for (int i = 0; i < level-1; i++) { - copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; - if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && - stmt[*(active.begin())].loop_level[i].payload >= level) { - int j; - for (j = 0; j < privatized_levels.size(); j++) - if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) - break; - if (j == privatized_levels.size()) - copy_stmt_read.loop_level[i].payload = -1; - else - copy_stmt_read.loop_level[i].payload = level + j; - } - else - copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; - copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; - } - for (int i = 0; i < privatized_levels.size(); i++) { - copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; - copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; - copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; - } - int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); - for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; - } - for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; - copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; - } - - shiftLexicalOrder(lex, dim-1, 1); - stmt.push_back(copy_stmt_read); - ro_copy_stmt_num = stmt.size() - 1; - dep.insert(); - } - - // insert write copy statement - int wo_copy_stmt_num = -1; - if (has_write_refs) { - Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1); - { - F_And *f_root = copy_xform.add_and(); - for (int i = 1; i <= wo_copy_is.n_set(); i++) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(copy_xform.input_var(i), 1); - h.update_coef(copy_xform.output_var(2*i), -1); - } - for (int i = 1; i <= dim; i+=2) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(copy_xform.output_var(i), -1); - h.update_const(lex[i-1]); - } - for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { - EQ_Handle h = f_root->add_EQ(); - h.update_coef(copy_xform.output_var(i), 1); - } - } - - Statement copy_stmt_write; - copy_stmt_write.IS = wo_copy_is; - copy_stmt_write.xform = copy_xform; - copy_stmt_write.code = copy_code_write; - copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set()); - copy_stmt_write.ir_stmt_node = NULL; - - for (int i = 0; i < level-1; i++) { - copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; - if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && - stmt[*(active.begin())].loop_level[i].payload >= level) { - int j; - for (j = 0; j < privatized_levels.size(); j++) - if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) - break; - if (j == privatized_levels.size()) - copy_stmt_write.loop_level[i].payload = -1; - else - copy_stmt_write.loop_level[i].payload = level + j; - } - else - copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; - copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; - } - for (int i = 0; i < privatized_levels.size(); i++) { - copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; - copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; - copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; - } - int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); - for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; - } - for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; - copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; - } - - lex[dim-1]++; - shiftLexicalOrder(lex, dim-1, -2); - stmt.push_back(copy_stmt_write); - wo_copy_stmt_num = stmt.size() - 1; - dep.insert(); - } - - // replace original array accesses with temporary array accesses - for (int i =0; i < stmt_refs.size(); i++) - for (int j = 0; j < stmt_refs[i].second.size(); j++) { - if (index_sz.size() == 0) { - IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); - ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); - } - else { - std::vector<CG_outputRepr *> index_repr(index_sz.size()); - for (int k = 0; k < index_sz.size(); k++) { - int cur_index_num = index_sz[k].first; - - CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); - if (padding_stride != 0) { - if (k == n_dim-1) { - coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); - coef_t t1 = index_stride[cur_index_num] / g; - if (t1 != 1) - cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); - coef_t t2 = padding_stride / g; - if (t2 != 1) - cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); - } - else if (index_stride[cur_index_num] != 1) { - cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); - } - } - - if (ir->ArrayIndexStartAt() != 0) - cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); - index_repr[k] = cur_index_repr; - } - - IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr); - ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); - } - } - - // update dependence graph - int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1; - if (ro_copy_stmt_num != -1) { - for (int i = 0; i < old_num_stmt; i++) { - std::vector<std::vector<DependenceVector> > D; - - for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { - if (active.find(i) != active.end() && active.find(j->first) == active.end()) { - std::vector<DependenceVector> dvs1, dvs2; - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W)) - dvs1.push_back(dv); - else - dvs2.push_back(dv); - } - j->second = dvs2; - if (dvs1.size() > 0) - dep.connect(ro_copy_stmt_num, j->first, dvs1); - } - else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { - std::vector<DependenceVector> dvs1, dvs2; - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R)) - dvs1.push_back(dv); - else - dvs2.push_back(dv); - } - j->second = dvs2; - if (dvs1.size() > 0) - D.push_back(dvs1); - } - - if (j->second.size() == 0) - dep.vertex[i].second.erase(j++); - else - j++; - } - - for (int j = 0; j < D.size(); j++) - dep.connect(i, ro_copy_stmt_num, D[j]); - } - - // insert dependences from copy statement loop to copied statements - DependenceVector dv; - dv.type = DEP_W2R; - dv.sym = tmp_sym->clone(); - dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); - dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); - for (int i = dep_dim; i < num_dep_dim; i++) { - dv.lbounds[i] = -posInfinity; - dv.ubounds[i] = posInfinity; - } - for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) - dep.connect(ro_copy_stmt_num, *i, dv); - } - - if (wo_copy_stmt_num != -1) { - for (int i = 0; i < old_num_stmt; i++) { - std::vector<std::vector<DependenceVector> > D; - - for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { - if (active.find(i) != active.end() && active.find(j->first) == active.end()) { - std::vector<DependenceVector> dvs1, dvs2; - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W)) - dvs1.push_back(dv); - else - dvs2.push_back(dv); - } - j->second = dvs2; - if (dvs1.size() > 0) - dep.connect(wo_copy_stmt_num, j->first, dvs1); - } - else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { - std::vector<DependenceVector> dvs1, dvs2; - for (int k = 0; k < j->second.size(); k++) { - DependenceVector dv = j->second[k]; - if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W)) - dvs1.push_back(dv); - else - dvs2.push_back(dv); - } - j->second = dvs2; - if (dvs1.size() > 0) - D.push_back(dvs1); - } - - if (j->second.size() == 0) - dep.vertex[i].second.erase(j++); - else - j++; - } - - for (int j = 0; j < D.size(); j++) - dep.connect(i, wo_copy_stmt_num, D[j]); - } - - // insert dependences from copied statements to write statements - DependenceVector dv; - dv.type = DEP_W2R; - dv.sym = tmp_sym->clone(); - dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); - dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); - for (int i = dep_dim; i < num_dep_dim; i++) { - dv.lbounds[i] = -posInfinity; - dv.ubounds[i] = posInfinity; - } - for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) - dep.connect(*i, wo_copy_stmt_num, dv); - - } - - // update variable name for dependences among copied statements - for (int i = 0; i < old_num_stmt; i++) { - if (active.find(i) != active.end()) - for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) - if (active.find(j->first) != active.end()) - for (int k = 0; k < j->second.size(); k++) { - IR_Symbol *s = tmp_sym->clone(); - j->second[k].sym = s; - } - } - - // insert anti-dependence from write statement to read statement - if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) - if (dep_dim >= 0) { - DependenceVector dv; - dv.type = DEP_R2W; - dv.sym = tmp_sym->clone(); - dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); - dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); - for (int k = dep_dim; k < num_dep_dim; k++) { - dv.lbounds[k] = -posInfinity; - dv.ubounds[k] = posInfinity; - } - for (int k = 0; k < dep_dim; k++) { - if (k != 0) { - dv.lbounds[k-1] = 0; - dv.ubounds[k-1] = 0; - } - dv.lbounds[k] = 1; - dv.ubounds[k] = posInfinity; - dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv); - } - } - - - // cleanup - delete sym; - delete tmp_sym; - for (int i = 0; i < index_lb.size(); i++) { - index_lb[i]->clear(); - delete index_lb[i]; - } - for (int i = 0; i < index_sz.size(); i++) { - index_sz[i].second->clear(); - delete index_sz[i].second; - } - - return true; - } -*/ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, const std::vector<int> &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, diff --git a/src/loop_tile.cc b/src/loop_tile.cc index aae8dd8..41c3e7f 100644 --- a/src/loop_tile.cc +++ b/src/loop_tile.cc @@ -126,11 +126,7 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, std::set<int> private_stmt; for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { -// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && !is_single_iteration(getNewIS(*i), dim)) -// same_tiled_loop.insert(*i); - // should test dim's value directly but it is ok for now -// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && get_const(stmt[*i].xform, dim+1, Output_Var) == posInfinity) if (same_tiled_loop.find(*i) == same_tiled_loop.end() && overflow.find(*i) != overflow.end()) private_stmt.insert(*i); @@ -138,26 +134,6 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, // extract the union of the iteration space to be considered Relation hull; - /*{ - Tuple < Relation > r_list; - Tuple<int> r_mask; - - for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); - i != same_tile_controlling_loop.end(); i++) - if (private_stmt.find(*i) == private_stmt.end()) { - Relation r = project_onto_levels(getNewIS(*i), dim + 1, - true); - for (int j = outer_dim; j < dim; j++) - r = Project(r, j + 1, Set_Var); - for (int j = 0; j < outer_dim; j += 2) - r = Project(r, j + 1, Set_Var); - r_list.append(r); - r_mask.append(1); - } - - hull = Hull(r_list, r_mask, 1, true); - }*/ - { std::vector<Relation> r_list; @@ -176,7 +152,6 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, } hull = SimpleHull(r_list); - // hull = Hull(r_list, std::vector<bool>(r_list.size(), true), 1, true); } // extract the bound of the dimension to be tiled @@ -553,25 +528,7 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, h.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1); h.update_coef(ub, 1); } - // if (private_stmt.find(*i) != private_stmt.end()) { - // if (stmt[*i].xform.n_out() > dim+3) { // e.g. ii <= UB && i = ii - // GEQ_Handle h = f_root->add_GEQ(); - // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); - // h.update_coef(ub, 1); - - // stmt[*i].xform = Project(stmt[*i].xform, dim+3, Output_Var); - // f_root = stmt[*i].xform.and_with_and(); - // EQ_Handle h1 = f_root->add_EQ(); - // h1.update_coef(stmt[*i].xform.output_var(dim+3), 1); - // h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); - // } - // else if (method == StridedTile) { // e.g. ii <= UB since i does not exist - // GEQ_Handle h = f_root->add_GEQ(); - // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); - // h.update_coef(ub, 1); - // } - // } - + // restrict original loop index inside the tile else { if (method == StridedTile) { // e.g. ii <= i < ii + tile_size diff --git a/src/loop_unroll.cc b/src/loop_unroll.cc index 9bc6acf..911d900 100644 --- a/src/loop_unroll.cc +++ b/src/loop_unroll.cc @@ -20,7 +20,6 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, std::vector<std::vector<std::string> > idxNames, int cleanup_split_level) { // check for sanity of parameters - // check for sanity of parameters if (unroll_amount < 0) throw std::invalid_argument( "invalid unroll amount " + to_string(unroll_amount)); @@ -71,16 +70,6 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, } dim2 = stmt[*i].loop_level[dim2].payload; - /*if (dv.isCarried(dim2) - && (dv.hasNegative(dim2) && !dv.quasi)) - throw loop_error( - "loop error: Unrolling is illegal, dependence violation!"); - - if (dv.isCarried(dim2) - && (dv.hasPositive(dim2) && dv.quasi)) - throw loop_error( - "loop error: Unrolling is illegal, dependence violation!"); - */ bool safe = false; if (dv.isCarried(dim2) && dv.hasPositive(dim2)) { @@ -89,20 +78,11 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, "loop error: a quasi dependence with a positive carried distance"); if (!dv.quasi) { if (dv.lbounds[dim2] != posInfinity) { - //if (dv.lbounds[dim2] != negInfinity) if (dv.lbounds[dim2] > unroll_amount) safe = true; } else safe = true; - }/* else { - if (dv.ubounds[dim2] != negInfinity) { - if (dv.ubounds[dim2] != posInfinity) - if ((-(dv.ubounds[dim2])) > unroll_amount) - safe = true; - } else - safe = true; - }*/ - + } if (!safe) { for (int l = level + 1; l <= (n - 1) / 2; l++) { int dim3 = l - 1; |