diff options
| author | Tuowen Zhao <ztuowen@gmail.com> | 2016-09-20 15:56:14 -0600 | 
|---|---|---|
| committer | Tuowen Zhao <ztuowen@gmail.com> | 2016-09-20 15:56:14 -0600 | 
| commit | 6983c09937baac3ffb7d3a45c3c5009c0eba7e6c (patch) | |
| tree | b42f0f9383b40fbeb540bf51b9f11eaf6f80c990 /src | |
| parent | b829868dfd6cbe9da07227220856b975f33e2037 (diff) | |
| download | chill-6983c09937baac3ffb7d3a45c3c5009c0eba7e6c.tar.gz chill-6983c09937baac3ffb7d3a45c3c5009c0eba7e6c.tar.bz2 chill-6983c09937baac3ffb7d3a45c3c5009c0eba7e6c.zip  | |
python loop & more doc
Diffstat (limited to 'src')
| -rw-r--r-- | src/chill.cc (renamed from src/chill_run.cc) | 22 | ||||
| -rw-r--r-- | src/chillmodule.cc | 13 | ||||
| -rw-r--r-- | src/dep.cc | 116 | ||||
| -rw-r--r-- | src/ir_rose.cc | 59 | ||||
| -rw-r--r-- | src/ir_rose_utils.cc | 5 | ||||
| -rw-r--r-- | src/irtools.cc | 28 | ||||
| -rw-r--r-- | src/loop.cc | 640 | ||||
| -rw-r--r-- | src/loop_basic.cc | 46 | ||||
| -rw-r--r-- | src/loop_datacopy.cc | 1017 | ||||
| -rw-r--r-- | src/loop_tile.cc | 45 | ||||
| -rw-r--r-- | src/loop_unroll.cc | 22 | 
11 files changed, 227 insertions, 1786 deletions
diff --git a/src/chill_run.cc b/src/chill.cc index 4eafe65..6ca0c4c 100644 --- a/src/chill_run.cc +++ b/src/chill.cc @@ -1,9 +1,9 @@  #include "chilldebug.h" -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring>  #include "loop.hh" @@ -18,7 +18,6 @@  //---  Loop *myloop = NULL;  IR_Code *ir_code = NULL; -bool repl_stop = false;  bool is_interactive = false;  std::vector<IR_Control *> ir_controls; @@ -36,8 +35,6 @@ int main( int argc, char* argv[] )      exit(-1);    } -  int fail = 0; -      // Create PYTHON interpreter    /* Pass argv[0] to the Python interpreter */    Py_SetProgramName(argv[0]); @@ -64,17 +61,16 @@ int main( int argc, char* argv[] )      printf("CHiLL v" CHILL_BUILD_VERSION " (built on " CHILL_BUILD_DATE ")\n");      printf("Copyright (C) 2008 University of Southern California\n");      printf("Copyright (C) 2009-2012 University of Utah\n"); -    //is_interactive = true; // let the lua interpreter know.      fflush(stdout); -    // TODO: read lines of python code. -    //Not sure if we should set fail from interactive mode +    is_interactive=true; +    PyRun_InteractiveLoop(stdin,"-");      printf("CHiLL ending...\n");      fflush(stdout);    } -  //printf("DONE with PyRun_SimpleString()\n"); -   -  if (!fail && ir_code != NULL && myloop != NULL && myloop->stmt.size() != 0 && !myloop->stmt[0].xform.is_null()) { +  // TODO Codegen is the major part that prevent CHiLL to be a pure Python library +  //    Other than exporting a class of course +  if (ir_code != NULL && myloop != NULL && myloop->stmt.size() != 0 && !myloop->stmt[0].xform.is_null()) {      int lnum_start;      int lnum_end;      lnum_start = get_loop_num_start(); diff --git a/src/chillmodule.cc b/src/chillmodule.cc index 72f32d4..b347570 100644 --- a/src/chillmodule.cc +++ b/src/chillmodule.cc @@ -19,7 +19,6 @@ using namespace omega;  extern Loop *myloop;  extern IR_Code *ir_code;  extern bool is_interactive; -extern bool repl_stop;  std::string procedure_name;  std::string source_filename; @@ -51,9 +50,6 @@ static void set_loop_num_end(int end_num) {    loop_end_num = end_num;  } -// TODO: finalize_loop(int,int) and init_loop(int,int) are identical to thier Lua counterparts. -// consider integrating them -  void finalize_loop(int loop_num_start, int loop_num_end) {    if (loop_num_start == loop_num_end) {      ir_code->ReplaceCode(ir_controls[loops[loop_num_start]], myloop->getCode()); @@ -126,7 +122,6 @@ static void init_loop(int loop_num_start, int loop_num_end) {    IR_Block *block = ir_code->MergeNeighboringControlStructures(parm);    myloop = new Loop(block);    delete block;   -  //if (is_interactive) printf("%s ", PROMPT_STRING);  }  // ----------------------- // @@ -332,16 +327,9 @@ static PyObject* chill_print_space(PyObject* self, PyObject* args) {    Py_RETURN_NONE;  } -static PyObject* chill_exit(PyObject* self, PyObject* args) { -  strict_arg_num(args, 0, "exit"); -  repl_stop = true; -  Py_RETURN_NONE; -} -  static void add_known(std::string cond_expr) {    int num_dim = myloop->known.n_set();    std::vector<std::map<std::string, int> >* cond; -  // TODO since we are using python, change this!    cond = parse_relation_vector(cond_expr.c_str());    Relation rel(num_dim); @@ -744,7 +732,6 @@ static PyMethodDef ChillMethods[] = {    {"print_code",          chill_print_code,                METH_VARARGS,     "print generated code"},    {"print_dep",           chill_print_dep,                 METH_VARARGS,     "print the dependencies graph"},    {"print_space",         chill_print_space,               METH_VARARGS,     "print space"}, -  {"exit",                chill_exit,                      METH_VARARGS,     "exit the interactive consule"},    {"known",               chill_known,                     METH_VARARGS,     "knwon"},    {"remove_dep",          chill_remove_dep,                METH_VARARGS,     "remove dependency i suppose"},    {"original",            chill_original,                  METH_VARARGS,     "original"}, @@ -94,13 +94,6 @@ std::ostream& operator<<(std::ostream &os, const DependenceVector &d) {    return os;  } -// DependenceVector::DependenceVector(int size): -//   lbounds(std::vector<coef_t>(size, 0)), -//   ubounds(std::vector<coef_t>(size, 0)) { -//   src = NULL; -//   dst = NULL; -// } -  DependenceVector::DependenceVector(const DependenceVector &that) {    if (that.sym != NULL)      this->sym = that.sym->clone(); @@ -135,18 +128,12 @@ DependenceType DependenceVector::getType() const {  }  bool DependenceVector::is_data_dependence() const { -  if (type == DEP_W2R || type == DEP_R2W || type == DEP_W2W -      || type == DEP_R2R) -    return true; -  else -    return false; +  return (type == DEP_W2R || type == DEP_R2W || type == DEP_W2W +      || type == DEP_R2R);  }  bool DependenceVector::is_control_dependence() const { -  if (type == DEP_CONTROL) -    return true; -  else -    return false; +  return type == DEP_CONTROL;  }  bool DependenceVector::has_negative_been_carried_at(int dim) const { @@ -160,10 +147,7 @@ bool DependenceVector::has_negative_been_carried_at(int dim) const {      if (lbounds[i] > 0 || ubounds[i] < 0)        return false; -  if (lbounds[dim] < 0) -    return true; -  else -    return false; +  return lbounds[dim] < 0;  } @@ -178,10 +162,7 @@ bool DependenceVector::has_been_carried_at(int dim) const {      if (lbounds[i] > 0 || ubounds[i] < 0)        return false; -  if ((lbounds[dim] != 0)  || (ubounds[dim] !=0)) -    return true; -   -  return false; +  return (lbounds[dim] != 0)  || (ubounds[dim] !=0);  }  bool DependenceVector::has_been_carried_before(int dim) const { @@ -262,22 +243,14 @@ bool DependenceVector::hasPositive(int dim) const {    if (dim >= lbounds.size())      throw std::invalid_argument("invalid dependence dimension"); -  if (lbounds[dim] > 0) -    //av: changed from ubounds to lbounds may have side effects -    return true; -  else -    return false; +  return lbounds[dim] > 0;  }  bool DependenceVector::hasNegative(int dim) const {    if (dim >= lbounds.size())      throw std::invalid_argument("invalid dependence dimension"); -  if (ubounds[dim] < 0) -    //av: changed from lbounds to ubounds may have side effects -    return true; -  else -    return false; +  return ubounds[dim] < 0;  }  bool DependenceVector::isCarried(int dim, omega::coef_t distance) const { @@ -296,12 +269,7 @@ bool DependenceVector::isCarried(int dim, omega::coef_t distance) const {    if (dim >= lbounds.size())      return true; -  if (lbounds[dim] > distance) -    return false; -  else if (ubounds[dim] < -distance) -    return false; -   -  return true; +  return lbounds[dim] >= -distance && ubounds[dim] <= distance;  }  bool DependenceVector::canPermute(const std::vector<int> &pi) const { @@ -402,60 +370,6 @@ DependenceVector DependenceVector::reverse() const {    return dv;  } -// std::vector<DependenceVector> DependenceVector::matrix(const std::vector<std::vector<int> > &M) const { -//   if (M.size() != lbounds.size()) -//     throw std::invalid_argument("(non)unimodular transformation dimensionality does not match dependence space"); - -//   const int n = lbounds.size(); -//   DependenceVector dv; -//   if (sym != NULL) -//     dv.sym = sym->clone(); -//   else -//     dv.sym = NULL; -//   dv.type = type; - -//   for (int i = 0; i < n; i++) { -//     assert(M[i].size() == n+1 || M[i].size() == n); - -//     omega::coef_t lb, ub; -//     if (M[i].size() == n+1) -//       lb = ub = M[i][n]; -//     else -//       lb = ub = 0; - -//     for (int j = 0; j < n; j++) { -//       int c = M[i][j]; -//       if (c == 0) -//         continue; - -//       if (c > 0) { -//         if (lbounds[j] == -posInfinity) -//           lb = -posInfinity; -//         else if (lb != -posInfinity) -//           lb += c * lbounds[j]; -//         if (ubounds[j] == posInfinity) -//           ub = posInfinity; -//         else if (ub != posInfinity) -//           ub += c * ubounds[j]; -//       } -//       else { -//         if (ubounds[j] == posInfinity) -//           lb = -posInfinity; -//         else if (lb != -posInfinity) -//           lb += c * ubounds[j]; -//         if (lbounds[j] == -posInfinity) -//           ub = posInfinity; -//         else if (ub != posInfinity) -//           ub += c * lbounds[j]; -//       } -//     } -//     dv.lbounds.push_back(lb); -//     dv.ubounds.push_back(ub); -//   } -//   dv.is_reduction = is_reduction; - -//   return dv.normalize(); -// }  //-----------------------------------------------------------------------------  // Class: DependenceGraph @@ -497,20 +411,6 @@ DependenceGraph DependenceGraph::permute(const std::vector<int> &pi,    return g;  } -// DependenceGraph DependenceGraph::matrix(const std::vector<std::vector<int> > &M) const { -//   DependenceGraph g; - -//   for (int i = 0; i < vertex.size(); i++) -//     g.insert(vertex[i].first); - -//   for (int i = 0; i < vertex.size(); i++) -//     for (EdgeList::const_iterator j = vertex[i].second.begin(); j != vertex[i].second.end(); j++) -//       for (int k = 0; k < j->second.size(); k++) -//         g.connect(i, j->first, j->second[k].matrix(M)); - -//   return g; -// } -  DependenceGraph DependenceGraph::subspace(int dim) const {    DependenceGraph g; diff --git a/src/ir_rose.cc b/src/ir_rose.cc index 223e7a4..f4039ab 100644 --- a/src/ir_rose.cc +++ b/src/ir_rose.cc @@ -102,7 +102,6 @@ int IR_roseArraySymbol::n_dim() const {  		    dim++;  		  }  	  } else if (ptrType != NULL) { -		  //std::cout << "pntrType \n";  		  ; // not sure if this case will happen  	  }    } @@ -113,7 +112,6 @@ int IR_roseArraySymbol::n_dim() const {  omega::CG_outputRepr *IR_roseArraySymbol::size(int dim) const {    SgArrayType* arrType = isSgArrayType(vs_->get_type()); -  // SgExprListExp* dimList = arrType->get_dim_info();    int count = 0;    SgExpression* expr;    SgType* pntrType = isSgPointerType(vs_->get_type()); @@ -1446,16 +1444,12 @@ IR_Block *IR_roseCode::GetCode() const {  }  void IR_roseCode::ReplaceCode(IR_Control *old, omega::CG_outputRepr *repr) { -  /*    SgStatementPtrList *tnl = -        static_cast<omega::CG_roseRepr *>(repr)->GetList(); -        SgNode *tf_old; -  */    SgStatementPtrList *tnl =      static_cast<omega::CG_roseRepr *>(repr)->GetList();    SgNode* node_ = static_cast<omega::CG_roseRepr *>(repr)->GetCode();    SgNode * tf_old; -  /* May need future revision it tnl has more than one statement */ +  /* May need future revision if tnl has more than one statement */    switch (old->type()) { @@ -1507,44 +1501,6 @@ void IR_roseCode::ReplaceCode(IR_Control *old, omega::CG_outputRepr *repr) {    delete old;    delete repr; -  /* May need future revision it tnl has more than one statement */ -  /* -    switch (old->type()) { -     -    case IR_CONTROL_LOOP: -    tf_old = static_cast<IR_roseLoop *>(old)->tf_; -    break; -    case IR_CONTROL_BLOCK: -    tf_old = static_cast<IR_roseBlock *>(old)->start_; -    break; -     -    default: -    throw ir_error("control structure to be replaced not supported"); -    break; -    } -     -    // std::string y = tf_old->unparseToString(); -    SgStatement *s = isSgStatement(tf_old); -    if (s != 0) { -    SgStatement *p = isSgStatement(tf_old->get_parent()); -     -    if (p != 0) { -    //      SgStatement* it2 = isSgStatement(tnl); -     -    //   if(it2 != NULL){ -    p->replace_statement(s, *tnl); -    //   } -    //   else { -    //          throw ir_error("Replacing Code not a statement!"); -    //      } -    } else -    throw ir_error("Replacing Code not a statement!"); -    } else -    throw ir_error("Replacing Code not a statement!"); -    //  y = tnl->unparseToString(); -    delete old; -    delete repr; -  */  }  void IR_roseCode::ReplaceExpression(IR_Ref *old, omega::CG_outputRepr *repr) { @@ -1560,19 +1516,6 @@ void IR_roseCode::ReplaceExpression(IR_Ref *old, omega::CG_outputRepr *repr) {        std::string z = isSgNode(parent)->unparseToString();        parent->replace_expression(ia_orig, op);        isSgNode(op)->set_parent(isSgNode(parent)); -       -      /* if(isSgBinaryOp(parent)) -         { -         if(isSgBinaryOp(parent)->get_lhs_operand() == ia_orig){ -         isSgBinaryOp(parent)->set_lhs_operand(op);    -         }else if(isSgBinaryOp(parent)->get_rhs_operand() == ia_orig){ -         isSgBinaryOp(parent)->set_rhs_operand(op);  -          -          -         }  -         else -         parent->replace_expression(ia_orig, op); -      */      } else {        SgStatement* parent_stmt = isSgStatement(          isSgNode(ia_orig)->get_parent()); diff --git a/src/ir_rose_utils.cc b/src/ir_rose_utils.cc index 64b0891..1329031 100644 --- a/src/ir_rose_utils.cc +++ b/src/ir_rose_utils.cc @@ -31,8 +31,6 @@ std::vector<SgForStatement *> find_deepest_loops(SgStatementPtrList& tnl) {    std::vector<SgForStatement *> loops; -   -      for(SgStatementPtrList::const_iterator j = tnl.begin(); j != tnl.end(); j++)    {      std::vector<SgForStatement *> t = find_deepest_loops(isSgNode(*j)); @@ -40,10 +38,7 @@ std::vector<SgForStatement *> find_deepest_loops(SgStatementPtrList& tnl) {        loops = t;    }        -   -      return loops; -    }  std::vector<SgForStatement *> find_deepest_loops(SgNode *tn) { diff --git a/src/irtools.cc b/src/irtools.cc index 4ab6c85..e7e5029 100644 --- a/src/irtools.cc +++ b/src/irtools.cc @@ -19,8 +19,6 @@  using namespace omega; -// Build IR tree from the source code.  Block type node can only be -// leaf, i.e., there is no further structures inside a block allowed.  std::vector<ir_tree_node *> build_ir_tree(IR_Control *control, ir_tree_node *parent) {    std::vector<ir_tree_node *> result; @@ -111,9 +109,6 @@ std::vector<ir_tree_node *> build_ir_tree(IR_Control *control, ir_tree_node *par    return result;  } - -// Extract statements from IR tree. Statements returned are ordered in -// lexical order in the source code.  std::vector<ir_tree_node *> extract_ir_stmts(const std::vector<ir_tree_node *> &ir_tree) {    std::vector<ir_tree_node *> result;    for (int i = 0; i < ir_tree.size(); i++) @@ -184,14 +179,6 @@ bool is_dependence_valid(ir_tree_node *src_node, ir_tree_node *dst_node,  } - - -// Test data dependences between two statements. The first statement -// in parameter must be lexically before the second statement in -// parameter.  Returned dependences are all lexicographically -// positive. The first vector in returned pair is dependences from the -// first statement to the second statement and the second vector in -// returned pair is in reverse order.  std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > test_data_dependences(    IR_Code *ir, const CG_outputRepr *repr1, const Relation &IS1,    const CG_outputRepr *repr2, const Relation &IS2, @@ -259,21 +246,6 @@ std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > test_da      for (int i = 0; i < access2.size(); i++)        delete access2[i];    } -  /*std::pair<std::vector<DependenceVector>, -    std::vector<DependenceVector> > dv = -    ir->FindScalarDeps(repr1, repr2, index, i, j); -     -     -    result.first.insert(result.first.end(), dv.first.begin(), -    dv.first.end()); -    result.second.insert(result.second.end(), dv.second.begin(), -    dv.second.end());*/ -  /*result.first.insert(result.first.end(), dv.first.begin(), -    dv.first.end()); -    result.second.insert(result.second.end(), dv.second.begin(), -    dv.second.end()); -  */ -      return result;  } diff --git a/src/loop.cc b/src/loop.cc index f187a50..19378a4 100644 --- a/src/loop.cc +++ b/src/loop.cc @@ -69,7 +69,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,      stmt_nesting_level_[i] = t;      stmt_nesting_level[i] = t;    } -   +    stmt = std::vector<Statement>(ir_stmt.size());    int n_dim = -1;    int max_loc; @@ -82,14 +82,14 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,          max_nesting_level = stmt_nesting_level[j];          loc = j;        } -     +      // most deeply nested statement acting as a reference point      if (n_dim == -1) {        n_dim = max_nesting_level;        max_loc = loc; -       +        index = std::vector<std::string>(n_dim); -       +        ir_tree_node *itn = ir_stmt[loc];        int cur_dim = n_dim - 1;        while (itn->parent != NULL) { @@ -101,42 +101,28 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,          }        }      } -     +      // align loops by names, temporary solution      ir_tree_node *itn = ir_stmt[loc];      int depth = stmt_nesting_level_[loc] - 1; -    /*   while (itn->parent != NULL) { -         itn = itn->parent; -         if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { -         std::string name = static_cast<IR_Loop *>(itn->content)->index()->name(); -         for (int j = 0; j < n_dim; j++) -         if (index[j] == name) { -         itn->payload = j; -         break; -         } -         if (itn->payload == -1) -         throw loop_error("no complex alignment yet"); -         } -         } -    */      for (int t = depth; t >= 0; t--) {        int y = t;        ir_tree_node *itn = ir_stmt[loc]; -       +        while ((itn->parent != NULL) && (y >= 0)) {          itn = itn->parent;          if (itn->content->type() == IR_CONTROL_LOOP)            y--;        } -       +        if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) {          CG_outputBuilder *ocg = ir->builder(); -         +          itn->payload = depth - t; -         +          CG_outputRepr *code =            static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); -         +          std::vector<CG_outputRepr *> index_expr;          std::vector<std::string> old_index;          CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]); @@ -145,48 +131,39 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,            static_cast<IR_Loop *>(itn->content)->index()->name());          code = ocg->CreateSubstitutedStmt(0, code, old_index,                                            index_expr); -         +          replace.insert(std::pair<int, CG_outputRepr*>(loc, code)); -        //stmt[loc].code = code; -                }      } -     +      // set relation variable names      Relation r(n_dim);      F_And *f_root = r.add_and();      itn = ir_stmt[loc];      int temp_depth = depth;      while (itn->parent != NULL) { -       +        itn = itn->parent;        if (itn->content->type() == IR_CONTROL_LOOP) {          r.name_set_var(itn->payload + 1, index[temp_depth]); -         +          temp_depth--;        } -      //static_cast<IR_Loop *>(itn->content)->index()->name());      } -     -    /*while (itn->parent != NULL) { -      itn = itn->parent; -      if (itn->content->type() == IR_CONTROL_LOOP) -      r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name()); -      }*/ -     +      // extract information from loop/if structures      std::vector<bool> processed(n_dim, false);      std::vector<std::string> vars_to_be_reversed;      itn = ir_stmt[loc];      while (itn->parent != NULL) {        itn = itn->parent; -       +        switch (itn->content->type()) {        case IR_CONTROL_LOOP: {          IR_Loop *lp = static_cast<IR_Loop *>(itn->content);          Variable_ID v = r.set_var(itn->payload + 1);          int c; -         +          try {            c = lp->step_size();            if (c > 0) { @@ -200,7 +177,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,                            cond, true);              else                throw ir_error("loop condition not supported"); -             +            } else if (c < 0) {              CG_outputBuilder *ocg = ir->builder();              CG_outputRepr *lb = lp->lower_bound(); @@ -218,7 +195,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,                            IR_COND_LT, true);              else                throw ir_error("loop condition not supported"); -             +              vars_to_be_reversed.push_back(lp->index()->name());            } else              throw ir_error("loop step size zero"); @@ -229,7 +206,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,            itn->content = itn->content->convert();            return false;          } -         +          if (abs(c) != 1) {            F_Exists *f_exists = f_root->add_exists();            Variable_ID e = f_exists->declare(); @@ -244,7 +221,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,            exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ,                        true);          } -         +          processed[itn->payload] = true;          break;        } @@ -281,7 +258,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,            }            return false;          } -         +          break;        }        default: @@ -292,7 +269,7 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,          return false;        }      } -     +      // add information for missing loops      for (int j = 0; j < n_dim; j++)        if (!processed[j]) { @@ -303,89 +280,32 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,                && itn->payload == j)              break;          } -         +          Variable_ID v = r.set_var(j + 1);          if (loc < max_loc) { -           +            CG_outputBuilder *ocg = ir->builder(); -           +            CG_outputRepr *lb =              static_cast<IR_Loop *>(itn->content)->lower_bound(); -           +            exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ,                        false); -           -          /*    if (ir->QueryExpOperation( -                static_cast<IR_Loop *>(itn->content)->lower_bound()) -                == IR_OP_VARIABLE) { -                IR_ScalarRef *ref = -                static_cast<IR_ScalarRef *>(ir->Repr2Ref( -                static_cast<IR_Loop *>(itn->content)->lower_bound())); -                std::string name_ = ref->name(); -                 -                for (int i = 0; i < index.size(); i++) -                if (index[i] == name_) { -                exp2formula(ir, r, f_root, freevar, lb, v, 's', -                IR_COND_GE, false); -                 -                CG_outputRepr *ub = -                static_cast<IR_Loop *>(itn->content)->upper_bound(); -                IR_CONDITION_TYPE cond = -                static_cast<IR_Loop *>(itn->content)->stop_cond(); -                if (cond == IR_COND_LT || cond == IR_COND_LE) -                exp2formula(ir, r, f_root, freevar, ub, v, -                's', cond, false); -                 -                 -                 -                } -                 -                } -          */ -           +          } else { // loc > max_loc -           +            CG_outputBuilder *ocg = ir->builder();            CG_outputRepr *ub =              static_cast<IR_Loop *>(itn->content)->upper_bound(); -           +            exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ,                        false); -          /*if (ir->QueryExpOperation( -            static_cast<IR_Loop *>(itn->content)->upper_bound()) -            == IR_OP_VARIABLE) { -            IR_ScalarRef *ref = -            static_cast<IR_ScalarRef *>(ir->Repr2Ref( -            static_cast<IR_Loop *>(itn->content)->upper_bound())); -            std::string name_ = ref->name(); -             -            for (int i = 0; i < index.size(); i++) -            if (index[i] == name_) { -             -            CG_outputRepr *lb = -            static_cast<IR_Loop *>(itn->content)->lower_bound(); -             -            exp2formula(ir, r, f_root, freevar, lb, v, 's', -            IR_COND_GE, false); -             -            CG_outputRepr *ub = -            static_cast<IR_Loop *>(itn->content)->upper_bound(); -            IR_CONDITION_TYPE cond = -            static_cast<IR_Loop *>(itn->content)->stop_cond(); -            if (cond == IR_COND_LT || cond == IR_COND_LE) -            exp2formula(ir, r, f_root, freevar, ub, v, -            's', cond, false); -             -             -            } -            } -          */          }        } -     +      r.setup_names();      r.simplify(); -     +      // insert the statement      CG_outputBuilder *ocg = ir->builder();      std::vector<CG_outputRepr *> reverse_expr; @@ -407,10 +327,10 @@ bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree,        stmt[loc].loop_level[i].payload = i;        stmt[loc].loop_level[i].parallel_level = 0;      } -     +      stmt_nesting_level[loc] = -1;    } -   +    return true;  } @@ -418,31 +338,27 @@ Loop::Loop(const IR_Control *control) {    last_compute_cgr_ = NULL;    last_compute_cg_ = NULL; -   +    ir = const_cast<IR_Code *>(control->ir_);    init_code = NULL;    cleanup_code = NULL;    tmp_loop_var_name_counter = 1;    overflow_var_name_counter = 1;    known = Relation::True(0); -   +    ir_tree = build_ir_tree(control->clone(), NULL); -  //    std::vector<ir_tree_node *> ir_stmt; -      while (!init_loop(ir_tree, ir_stmt)) {    } -   -      for (int i = 0; i < stmt.size(); i++) {      std::map<int, CG_outputRepr*>::iterator it = replace.find(i); -     +      if (it != replace.end())        stmt[i].code = it->second;      else        stmt[i].code = stmt[i].code;    } -   +    if (stmt.size() != 0)      dep = DependenceGraph(stmt[0].IS.n_set());    else @@ -450,7 +366,7 @@ Loop::Loop(const IR_Control *control) {    // init the dependence graph    for (int i = 0; i < stmt.size(); i++)      dep.insert(); -   +    for (int i = 0; i < stmt.size(); i++)      for (int j = i; j < stmt.size(); j++) {        std::pair<std::vector<DependenceVector>, @@ -458,7 +374,7 @@ Loop::Loop(const IR_Control *control) {            ir, stmt[i].code, stmt[i].IS, stmt[j].code, stmt[j].IS,            freevar, index, stmt_nesting_level_[i],            stmt_nesting_level_[j]); -       +        for (int k = 0; k < dv.first.size(); k++) {          if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k],                                  true)) @@ -466,7 +382,7 @@ Loop::Loop(const IR_Control *control) {          else {            dep.connect(j, i, dv.first[k].reverse());          } -         +        }        for (int k = 0; k < dv.second.size(); k++)          if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], @@ -475,11 +391,8 @@ Loop::Loop(const IR_Control *control) {          else {            dep.connect(i, j, dv.second[k].reverse());          } -      // std::pair<std::vector<DependenceVector>, -      //                std::vector<DependenceVector> > dv_ = test_data_dependences( -            } -   +    // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] @@ -487,49 +400,48 @@ Loop::Loop(const IR_Control *control) {      int n = stmt[i].IS.n_set();      stmt[i].xform = Relation(n, 2 * n + 1);      F_And *f_root = stmt[i].xform.add_and(); -     +      for (int j = 1; j <= n; j++) {        EQ_Handle h = f_root->add_EQ();        h.update_coef(stmt[i].xform.output_var(2 * j), 1);        h.update_coef(stmt[i].xform.input_var(j), -1);      } -     +      for (int j = 1; j <= 2 * n + 1; j += 2) {        EQ_Handle h = f_root->add_EQ();        h.update_coef(stmt[i].xform.output_var(j), 1);      }      stmt[i].xform.simplify();    } -   +    if (stmt.size() != 0)      num_dep_dim = stmt[0].IS.n_set();    else      num_dep_dim = 0; -  // debug -  /*for (int i = 0; i < stmt.size(); i++) { -    std::cout << i << ": "; -    //stmt[i].xform.print(); -    stmt[i].IS.print(); -    std::cout << std::endl; -     -    }*/ -  //end debug +// Debug output +//  for (int i = 0; i < stmt.size(); i++) { +//    std::cout << i << ": "; +//    stmt[i].xform.print(); +//    stmt[i].IS.print(); +//    std::cout << std::endl; +// +//    }  }  Loop::~Loop() { -   +    delete last_compute_cgr_;    delete last_compute_cg_; -   +    for (int i = 0; i < stmt.size(); i++)      if (stmt[i].code != NULL) {        stmt[i].code->clear();        delete stmt[i].code;      } -   +    for (int i = 0; i < ir_tree.size(); i++)      delete ir_tree[i]; -   +    if (init_code != NULL) {      init_code->clear();      delete init_code; @@ -543,10 +455,10 @@ Loop::~Loop() {  int Loop::get_dep_dim_of(int stmt_num, int level) const {    if (stmt_num < 0 || stmt_num >= stmt.size())      throw std::invalid_argument("invaid statement " + to_string(stmt_num)); -   +    if (level < 1 || level > stmt[stmt_num].loop_level.size())      return -1; -   +    int trip_count = 0;    while (true) {      switch (stmt[stmt_num].loop_level[level - 1].type) { @@ -577,16 +489,16 @@ int Loop::get_dep_dim_of(int stmt_num, int level) const {  int Loop::get_last_dep_dim_before(int stmt_num, int level) const {    if (stmt_num < 0 || stmt_num >= stmt.size())      throw std::invalid_argument("invaid statement " + to_string(stmt_num)); -   +    if (level < 1)      return -1;    if (level > stmt[stmt_num].loop_level.size())      level = stmt[stmt_num].loop_level.size() + 1; -   +    for (int i = level - 1; i >= 1; i--)      if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal)        return stmt[stmt_num].loop_level[i - 1].payload; -   +    return -1;  } @@ -623,7 +535,7 @@ CG_outputRepr *Loop::getCode(int effort) const {    if (m == 0)      return NULL;    const int n = stmt[0].xform.n_out(); -   +    if (last_compute_cg_ == NULL) {      std::vector<Relation> IS(m);      std::vector<Relation> xforms(m); @@ -632,29 +544,29 @@ CG_outputRepr *Loop::getCode(int effort) const {        xforms[i] = stmt[i].xform;      }      Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); -     +      last_compute_cg_ = new CodeGen(xforms, IS, known);      delete last_compute_cgr_;      last_compute_cgr_ = NULL;    } -   +    if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {      delete last_compute_cgr_;      last_compute_cgr_ = last_compute_cg_->buildAST(effort);      last_compute_effort_ = effort;    } -   +    std::vector<CG_outputRepr *> stmts(m);    for (int i = 0; i < m; i++)      stmts[i] = stmt[i].code;    CG_outputBuilder *ocg = ir->builder();    CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts); -   +    if (init_code != NULL)      repr = ocg->StmtListAppend(init_code->clone(), repr);    if (cleanup_code != NULL)      repr = ocg->StmtListAppend(repr, cleanup_code->clone()); -   +    return repr;  } @@ -663,7 +575,7 @@ void Loop::printCode(int effort) const {    if (m == 0)      return;    const int n = stmt[0].xform.n_out(); -   +    if (last_compute_cg_ == NULL) {      std::vector<Relation> IS(m);      std::vector<Relation> xforms(m); @@ -672,18 +584,18 @@ void Loop::printCode(int effort) const {        xforms[i] = stmt[i].xform;      }      Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); -     +      last_compute_cg_ = new CodeGen(xforms, IS, known);      delete last_compute_cgr_;      last_compute_cgr_ = NULL;    } -   +    if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {      delete last_compute_cgr_;      last_compute_cgr_ = last_compute_cg_->buildAST(effort);      last_compute_effort_ = effort;    } -   +    std::string repr = last_compute_cgr_->printString();    std::cout << repr << std::endl;  } @@ -710,7 +622,7 @@ void Loop::printDependenceGraph() const {  Relation Loop::getNewIS(int stmt_num) const {    Relation result; -   +    if (stmt[stmt_num].xform.is_null()) {      Relation known = Extend_Set(copy(this->known),                                  stmt[stmt_num].IS.n_set() - this->known.n_set()); @@ -723,19 +635,19 @@ Relation Loop::getNewIS(int stmt_num) const {          Restrict_Domain(copy(stmt[stmt_num].xform),                          copy(stmt[stmt_num].IS))), known);    } -   +    result.simplify(2, 4); -   +    return result;  }  std::vector<Relation> Loop::getNewIS() const {    const int m = stmt.size(); -   +    std::vector<Relation> new_IS(m);    for (int i = 0; i < m; i++)      new_IS[i] = getNewIS(i); -   +    return new_IS;  } @@ -743,7 +655,7 @@ void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) {  	// check sanity of parameters  	if(stmt_num < 0)  		throw std::invalid_argument("invalid statement " + to_string(stmt_num)); -	 +  	CG_outputBuilder *ocg = ir->builder();  	CG_outputRepr *code = stmt[stmt_num].code;  	ocg->CreatePragmaAttribute(code, level, pragmaText); @@ -761,13 +673,13 @@ void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hin  std::vector<int> Loop::getLexicalOrder(int stmt_num) const {    assert(stmt_num < stmt.size()); -   +    const int n = stmt[stmt_num].xform.n_out();    std::vector<int> lex(n, 0); -   +    for (int i = 0; i < n; i += 2)      lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); -   +    return lex;  } @@ -776,13 +688,13 @@ std::vector<int> Loop::getLexicalOrder(int stmt_num) const {  std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const {    assert(stmt_num >= 0 && stmt_num < stmt.size());    assert(level > 0 && level <= stmt[stmt_num].loop_level.size()); -   +    std::set<int> working;    for (int i = 0; i < stmt.size(); i++)      if (const_cast<Loop *>(this)->stmt[i].IS.is_upper_bound_satisfiable()          && stmt[i].loop_level.size() >= level)        working.insert(i); -   +    for (int i = 1; i <= level; i++) {      int a = getLexicalOrder(stmt_num, i);      for (std::set<int>::iterator j = working.begin(); j != working.end();) { @@ -793,14 +705,14 @@ std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const {          ++j;      }    } -   +    return working;  }  int Loop::getLexicalOrder(int stmt_num, int level) const {    assert(stmt_num >= 0 && stmt_num < stmt.size());    assert(level > 0 && level <= stmt[stmt_num].loop_level.size()+1); -   +    Relation &r = const_cast<Loop *>(this)->stmt[stmt_num].xform;    for (EQ_Iterator e(r.single_conjunct()->EQs()); e; e++)      if (abs((*e).get_coef(r.output_var(2 * level - 1))) == 1) { @@ -815,7 +727,7 @@ int Loop::getLexicalOrder(int stmt_num, int level) const {          return (*e).get_coef(r.output_var(2 * level - 1)) > 0 ? -t : t;        }      } -   +    throw loop_error(      "can't find lexical order for statement " + to_string(stmt_num)      + "'s loop level " + to_string(level)); @@ -823,7 +735,7 @@ int Loop::getLexicalOrder(int stmt_num, int level) const {  std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const {    const int m = stmt.size(); -   +    std::set<int> same_loops;    for (int i = 0; i < m; i++) {      if (dim < 0) @@ -837,32 +749,32 @@ std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const {        if (j > dim)          same_loops.insert(i);      } -     +    } -   +    return same_loops;  }  void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) {    const int m = stmt.size(); -   +    if (amount == 0)      return; -   +    for (int i = 0; i < m; i++) {      std::vector<int> lex2 = getLexicalOrder(i); -     +      bool need_shift = true; -     +      for (int j = 0; j < dim; j++)        if (lex2[j] != lex[j]) {          need_shift = false;          break;        } -     +      if (!need_shift)        continue; -     +      if (amount > 0) {        if (lex2[dim] < lex[dim])          continue; @@ -870,14 +782,14 @@ void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) {        if (lex2[dim] > lex[dim])          continue;      } -     +      assign_const(stmt[i].xform, dim, lex2[dim] + amount);    }  }  std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active,                                                       int level) { -   +    std::set<int> not_nested_at_this_level;    std::map<ir_tree_node*, std::set<int> > sorted_by_loop;    std::map<int, std::set<int> > sorted_by_lex_order; @@ -885,73 +797,73 @@ std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active,    bool lex_order_already_set = false;    for (std::set<int>::iterator it = active.begin(); it != active.end();         it++) { -     +      if (stmt[*it].ir_stmt_node == NULL)        lex_order_already_set = true;    } -   +    if (lex_order_already_set) { -     +      for (std::set<int>::iterator it = active.begin(); it != active.end();           it++) {        std::map<int, std::set<int> >::iterator it2 =          sorted_by_lex_order.find(            get_const(stmt[*it].xform, 2 * (level - 1),                      Output_Var)); -       +        if (it2 != sorted_by_lex_order.end())          it2->second.insert(*it);        else { -         +          std::set<int> to_insert; -         +          to_insert.insert(*it); -         +          sorted_by_lex_order.insert(            std::pair<int, std::set<int> >(              get_const(stmt[*it].xform, 2 * (level - 1),                        Output_Var), to_insert)); -         +        } -       +      } -     +      for (std::map<int, std::set<int> >::iterator it2 =             sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end();           it2++)        to_return.push_back(it2->second); -     +    } else { -     +      for (std::set<int>::iterator it = active.begin(); it != active.end();           it++) { -       +        ir_tree_node* itn = stmt[*it].ir_stmt_node;        itn = itn->parent;        while ((itn != NULL) && (itn->payload != level - 1))          itn = itn->parent; -       +        if (itn == NULL)          not_nested_at_this_level.insert(*it);        else {          std::map<ir_tree_node*, std::set<int> >::iterator it2 =            sorted_by_loop.find(itn); -         +          if (it2 != sorted_by_loop.end())            it2->second.insert(*it);          else {            std::set<int> to_insert; -           +            to_insert.insert(*it); -           +            sorted_by_loop.insert(              std::pair<ir_tree_node*, std::set<int> >(itn,                                                       to_insert)); -           +          } -         +        } -       +      }      if (not_nested_at_this_level.size() > 0) {        for (std::set<int>::iterator it = not_nested_at_this_level.begin(); @@ -959,7 +871,7 @@ std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active,          std::set<int> temp;          temp.insert(*it);          to_return.push_back(temp); -         +        }      }      for (std::map<ir_tree_node*, std::set<int> >::iterator it2 = @@ -971,17 +883,17 @@ std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active,  void update_successors(int n, int node_num[], int cant_fuse_with[],                         Graph<std::set<int>, bool> &g, std::list<int> &work_list) { -   +    std::set<int> disconnect;    for (Graph<std::set<int>, bool>::EdgeList::iterator i =           g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) {      int m = i->first; -     +      if (node_num[m] != -1)        throw loop_error("Graph input for fusion has cycles not a DAG!!"); -     +      std::vector<bool> check_ = g.getEdge(n, m); -     +      bool has_bad_edge_path = false;      for (int i = 0; i < check_.size(); i++)        if (!check_[i]) { @@ -994,12 +906,12 @@ void update_successors(int n, int node_num[], int cant_fuse_with[],        cant_fuse_with[m] = std::max(cant_fuse_with[m], cant_fuse_with[n]);      disconnect.insert(m);    } -   -   + +    for (std::set<int>::iterator i = disconnect.begin(); i != disconnect.end();         i++) {      g.disconnect(n, *i); -     +      bool no_incoming_edges = true;      for (int j = 0; j < g.vertex.size(); j++)        if (j != *i) @@ -1007,23 +919,23 @@ void update_successors(int n, int node_num[], int cant_fuse_with[],            no_incoming_edges = false;            break;          } -     -     + +      if (no_incoming_edges)        work_list.push_back(*i);    } -   +  }  Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level(    std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim) {    Graph<std::set<int>, bool> g; -   +    for (int i = 0; i < s.size(); i++)      g.insert(s[i]); -   +    for (int i = 0; i < s.size(); i++) { -     +      for (int j = i + 1; j < s.size(); j++) {        bool has_true_edge_i_to_j = false;        bool has_true_edge_j_to_i = false; @@ -1031,16 +943,16 @@ Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level(        bool is_connected_j_to_i = false;        for (std::set<int>::iterator ii = s[i].begin(); ii != s[i].end();             ii++) { -         +          for (std::set<int>::iterator jj = s[j].begin();               jj != s[j].end(); jj++) { -           +            std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj);            for (int k = 0; k < dvs.size(); k++)              if (dvs[k].is_control_dependence()                  || (dvs[k].is_data_dependence()                      && dvs[k].has_been_carried_at(dep_dim))) { -               +                if (dvs[k].is_data_dependence()                    && dvs[k].has_negative_been_carried_at(                      dep_dim)) { @@ -1049,14 +961,14 @@ Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level(                  break;                } else {                  //g.connect(i, j, true); -                 +                  has_true_edge_i_to_j = true;                  //break                }              } -           +            //if (is_connected) -           +            //    break;            //        if (has_true_edge_i_to_j && !is_connected_i_to_j)            //                g.connect(i, j, true); @@ -1065,76 +977,63 @@ Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level(              if (dvs[k].is_control_dependence()                  || (dvs[k].is_data_dependence()                      && dvs[k].has_been_carried_at(dep_dim))) { -               +                if (is_connected_i_to_j || has_true_edge_i_to_j)                  throw loop_error(                    "Graph input for fusion has cycles not a DAG!!"); -               +                if (dvs[k].is_data_dependence()                    && dvs[k].has_negative_been_carried_at(                      dep_dim)) { -                //g.connect(i, j, false);                  is_connected_j_to_i = true;                  break;                } else { -                //g.connect(i, j, true); -                                  has_true_edge_j_to_i = true; -                //break;                }              } -           -          //    if (is_connected) -          //break; -          //    if (is_connected) -          //break;          } -         -         -        //if (is_connected) -        //  break;        } -       -       + +        if (is_connected_i_to_j)          g.connect(i, j, false);        else if (has_true_edge_i_to_j)          g.connect(i, j, true); -       +        if (is_connected_j_to_i)          g.connect(j, i, false);        else if (has_true_edge_j_to_i)          g.connect(j, i, true); -       -       + +      }    }    return g;  }  std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g) { -   +    bool roots[g.vertex.size()]; -   +    for (int i = 0; i < g.vertex.size(); i++)      roots[i] = true; -   +    for (int i = 0; i < g.vertex.size(); i++)      for (int j = i + 1; j < g.vertex.size(); j++) { -       +        if (g.hasEdge(i, j))          roots[j] = false; -       +        if (g.hasEdge(j, i))          roots[i] = false; -       +      } -   +    std::list<int> work_list;    int cant_fuse_with[g.vertex.size()];    std::vector<std::set<int> > s;    //Each Fused set's representative node -   +    int node_to_fused_nodes[g.vertex.size()];    int node_num[g.vertex.size()];    for (int i = 0; i < g.vertex.size(); i++) { @@ -1145,61 +1044,54 @@ std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g) {      node_num[i] = -1;    }    // topological sort according to chun's permute algorithm -  //   std::vector<std::set<int> > s = g.topoSort();    std::vector<std::set<int> > s2 = g.topoSort();    if (work_list.empty() || (s2.size() != g.vertex.size())) { -          std::cout << s2.size() << "\t" << g.vertex.size() << std::endl;      throw loop_error("Input for fusion not a DAG!!"); -     -        }    int fused_nodes_counter = 0;    while (!work_list.empty()) {      int n = work_list.front(); -    //int n_ = g.vertex[n].first;      work_list.pop_front();      int node;      if (cant_fuse_with[n] == 0)        node = 0;      else        node = cant_fuse_with[n]; -     +      if ((fused_nodes_counter != 0) && (node != fused_nodes_counter)) {        int rep_node = node_to_fused_nodes[node];        node_num[n] = node_num[rep_node]; -       +        try {          update_successors(n, node_num, cant_fuse_with, g, work_list);        } catch (const loop_error &e) { -         +          throw loop_error(            "statements cannot be fused together due to negative dependence"); -         -         + +        }        for (std::set<int>::iterator it = g.vertex[n].first.begin();             it != g.vertex[n].first.end(); it++)          s[node].insert(*it);      } else { -      //std::set<int> new_node; -      //new_node.insert(n_);        s.push_back(g.vertex[n].first);        node_to_fused_nodes[node] = n;        node_num[n] = ++node;        try {          update_successors(n, node_num, cant_fuse_with, g, work_list);        } catch (const loop_error &e) { -         +          throw loop_error(            "statements cannot be fused together due to negative dependence"); -         -         + +        }        fused_nodes_counter++;      }    } -   +    return s;  } @@ -1207,7 +1099,7 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,                             int starting_order, std::vector<std::vector<std::string> > idxNames) {    if (active.size() == 0)      return; -   +    // check for sanity of parameters    if (dim < 0 || dim % 2 != 0)      throw std::invalid_argument( @@ -1232,7 +1124,7 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,              "statements are not in the same sub loop nest");      }    } -   +    // sepearate statements by current loop level types    int level = (dim + 2) / 2;    std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type; @@ -1245,7 +1137,7 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,            stmt[*i].loop_level[level - 1].type,            stmt[*i].loop_level[level - 1].payload)].insert(*i);    } -   +    // further separate statements due to control dependences    std::vector<std::set<int> > active_by_level_type_splitted;    for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i = @@ -1277,11 +1169,11 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,          active_by_level_type_splitted.push_back(not_controlled);        }      } -   +    // set lexical order separating loops with different loop types first    if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) {      int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; -     +      Graph<std::set<int>, Empty> g;      for (std::vector<std::set<int> >::iterator i =             active_by_level_type_splitted.begin(); @@ -1342,13 +1234,13 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,              break;          }        } -     +      std::vector<std::set<int> > s = g.topoSort();      if (s.size() != g.vertex.size())        throw loop_error(          "cannot separate statements with different loop types at loop level "          + to_string(level)); -     +      // assign lexical order      int order = starting_order;      for (int i = 0; i < s.size(); i++) { @@ -1372,7 +1264,7 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,      std::set<int> nonsingles;      std::map<coef_t, std::set<int> > fake_singles;      std::set<int> fake_singles_; -     +      // sort out statements that do not require loops      for (std::set<int>::iterator i = active.begin(); i != active.end();           i++) { @@ -1398,176 +1290,55 @@ void Loop::setLexicalOrder(int dim, const std::set<int> &active,        } else          nonsingles.insert(*i);      } -     -     + +      // split nonsingles forcibly according to negative dependences present (loop unfusible)      int dep_dim = get_dep_dim_of(ref_stmt_num, level); -     +      if (dim < stmt[ref_stmt_num].xform.n_out() - 1) { -       -      bool dummy_level_found = false; -       +        std::vector<std::set<int> > s; -       +        s = sort_by_same_loops(active, level);        bool further_levels_exist = false; -       +        if (!idxNames.empty())          if (level <= idxNames[ref_stmt_num].size())            if (idxNames[ref_stmt_num][level - 1].length() == 0) { -            //  && s.size() == 1) { +            // Dummy level found              int order1 = 0; -            dummy_level_found = true; -             +              for (int i = level; i < idxNames[ref_stmt_num].size();                   i++)                if (idxNames[ref_stmt_num][i].length() > 0)                  further_levels_exist = true; -             +            } -       -      //if (!dummy_level_found) { -       +        if (s.size() > 1) { -         +          Graph<std::set<int>, bool> g = construct_induced_graph_at_level(            s, dep, dep_dim);          s = typed_fusion(g);        }        int order = 0;        for (int i = 0; i < s.size(); i++) { -         +          for (std::set<int>::iterator it = s[i].begin();               it != s[i].end(); it++)            assign_const(stmt[*it].xform, dim, order); -         +          if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1))            setLexicalOrder(dim + 2, s[i], order, idxNames); -         +          order++;        } -      //} -      /*    else { -             -            int order1 = 0; -            int order = 0; -            for (std::set<int>::iterator i = active.begin(); -            i != active.end(); i++) { -            if (!further_levels_exist) -            assign_const(stmt[*i].xform, dim, order1++); -            else -            assign_const(stmt[*i].xform, dim, order1); -             -            } -             -            if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1) && further_levels_exist) -            setLexicalOrder(dim + 2, active, order, idxNames); -            } -      */      } else {        int dummy_order = 0;        for (std::set<int>::iterator i = active.begin(); i != active.end();             i++)          assign_const(stmt[*i].xform, dim, dummy_order++);      } -    /*for (int i = 0; i < g2.vertex.size(); i++) -      for (int j = i+1; j < g2.vertex.size(); j++) { -      std::vector<DependenceVector> dvs = dep.getEdge(g2.vertex[i].first, g2.vertex[j].first); -      for (int k = 0; k < dvs.size(); k++) -      if (dvs[k].is_control_dependence() || -      (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { -      g2.connect(i, j); -      break; -      } -      dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first); -      for (int k = 0; k < dvs.size(); k++) -      if (dvs[k].is_control_dependence() || -      (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { -      g2.connect(j, i); -      break; -      } -      } -       -      std::vector<std::set<int> > s2 = g2.packed_topoSort(); -       -      std::vector<std::set<int> > splitted_nonsingles; -      for (int i = 0; i < s2.size(); i++) { -      std::set<int> cur_scc; -      for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) -      cur_scc.insert(g2.vertex[*j].first); -      splitted_nonsingles.push_back(cur_scc); -      } -    */ -    //convert to dependence graph for grouped statements -    //dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; -    /*int order = 0; -      for (std::set<int>::iterator j = active.begin(); j != active.end(); -      j++) { -      std::set<int> continuous; -      std::cout<< active.size()<<std::endl; -      while (nonsingles.find(*j) != nonsingles.end() && j != active.end()) { -      continuous.insert(*j); -      j++; -      } -       -      printf("continuous size is %d\n", continuous.size()); -       -       -       -      if (continuous.size() > 0) { -      std::vector<std::set<int> > s = typed_fusion(continuous, dep, -      dep_dim); -       -      for (int i = 0; i < s.size(); i++) { -      for (std::set<int>::iterator l = s[i].begin(); -      l != s[i].end(); l++) { -      assign_const(stmt[*l].xform, dim + 2, order); -      setLexicalOrder(dim + 2, s[i]); -      } -      order++; -      } -      } -       -      if (j != active.end()) { -      assign_const(stmt[*j].xform, dim + 2, order); -       -      for (int k = dim + 4; k < stmt[*j].xform.n_out(); k += 2) -      assign_const(stmt[*j].xform, k, 0); -      order++; -      } -       -      if( j == active.end()) -      break; -      } -    */ -     -     -    // assign lexical order -    /*int order = starting_order; -      for (int i = 0; i < s.size(); i++) { -      // translate each SCC into original statements -      std::set<int> cur_scc; -      for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) -      copy(s[i].begin(), s[i].end(), -      inserter(cur_scc, cur_scc.begin())); -       -      // now assign the constant -      for (std::set<int>::iterator j = cur_scc.begin(); -      j != cur_scc.end(); j++) -      assign_const(stmt[*j].xform, dim, order); -       -      if (cur_scc.size() > 1) -      setLexicalOrder(dim + 2, cur_scc); -      else if (cur_scc.size() == 1) { -      int cur_stmt = *(cur_scc.begin()); -      for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) -      assign_const(stmt[cur_stmt].xform, j, 0); -      } -       -      if (cur_scc.size() > 0) -      order++; -      } -    */    }  } @@ -1586,15 +1357,15 @@ void Loop::apply_xform(int stmt_num) {  void Loop::apply_xform(std::set<int> &active) {    int max_n = 0; -   +    CG_outputBuilder *ocg = ir->builder();    for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) {      int n = stmt[*i].loop_level.size();      if (n > max_n)        max_n = n; -     +      std::vector<int> lex = getLexicalOrder(*i); -     +      Relation mapping(2 * n + 1, n);      F_And *f_root = mapping.add_and();      for (int j = 1; j <= n; j++) { @@ -1604,7 +1375,7 @@ void Loop::apply_xform(std::set<int> &active) {      }      mapping = Composition(mapping, stmt[*i].xform);      mapping.simplify(); -     +      // match omega input/output variables to variable names in the code      for (int j = 1; j <= stmt[*i].IS.n_set(); j++)        mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); @@ -1613,10 +1384,9 @@ void Loop::apply_xform(std::set<int> &active) {                                tmp_loop_var_name_prefix                                + to_string(tmp_loop_var_name_counter + j - 1));      mapping.setup_names(); -     +      Relation known = Extend_Set(copy(this->known),                                  mapping.n_out() - this->known.n_set()); -    //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL));      std::vector<std::string> loop_vars;      for (int j = 1; j <= stmt[*i].IS.n_set(); j++)        loop_vars.push_back(stmt[*i].IS.set_var(j)->name()); @@ -1628,7 +1398,7 @@ void Loop::apply_xform(std::set<int> &active) {                                                 subs);      stmt[*i].IS = Range(Restrict_Domain(mapping, stmt[*i].IS));      stmt[*i].IS.simplify(); -     +      // replace original transformation relation with straight 1-1 mapping      mapping = Relation(n, 2 * n + 1);      f_root = mapping.add_and(); @@ -1644,28 +1414,28 @@ void Loop::apply_xform(std::set<int> &active) {      }      stmt[*i].xform = mapping;    } -   +    tmp_loop_var_name_counter += max_n;  }  void Loop::addKnown(const Relation &cond) { -   +    // invalidate saved codegen computation    delete last_compute_cgr_;    last_compute_cgr_ = NULL;    delete last_compute_cg_;    last_compute_cg_ = NULL; -   +    int n1 = this->known.n_set(); -   +    Relation r = copy(cond);    int n2 = r.n_set(); -   +    if (n1 < n2)      this->known = Extend_Set(this->known, n2 - n1);    else if (n1 > n2)      r = Extend_Set(r, n1 - n2); -   +    this->known = Intersection(this->known, r);  } @@ -1677,7 +1447,7 @@ void Loop::removeDependence(int stmt_num_from, int stmt_num_to) {    if (stmt_num_to >= stmt.size())      throw std::invalid_argument(        "invalid statement number " + to_string(stmt_num_to)); -   +    dep.disconnect(stmt_num_from, stmt_num_to);  } @@ -1712,7 +1482,7 @@ void Loop::dump() const {  bool Loop::nonsingular(const std::vector<std::vector<int> > &T) {    if (stmt.size() == 0)      return true; -   +    // check for sanity of parameters    for (int i = 0; i < stmt.size(); i++) {      if (stmt[i].loop_level.size() != num_dep_dim) @@ -1750,11 +1520,11 @@ bool Loop::nonsingular(const std::vector<std::vector<int> > &T) {      h.update_coef(mapping.output_var(i), -1);      h.update_coef(mapping.input_var(i), 1);    } -   +    // update transformation relations    for (int i = 0; i < stmt.size(); i++)      stmt[i].xform = Composition(copy(mapping), stmt[i].xform); -   +    // update dependence graph    for (int i = 0; i < dep.vertex.size(); i++)      for (DependenceGraph::EdgeList::iterator j = @@ -1809,7 +1579,7 @@ bool Loop::nonsingular(const std::vector<std::vector<int> > &T) {            }            dv.lbounds = lbounds;            dv.ubounds = ubounds; -           +            break;          }          default: @@ -1818,13 +1588,13 @@ bool Loop::nonsingular(const std::vector<std::vector<int> > &T) {        }        j->second = dvs;      } -   +    // set constant loop values    std::set<int> active;    for (int i = 0; i < stmt.size(); i++)      active.insert(i);    setLexicalOrder(0, active); -   +    return true;  } @@ -1842,7 +1612,7 @@ bool Loop::is_dependence_valid_based_on_lex_order(int i, int j,      last_dim = last_dim / 2;      if (last_dim == 0)        return true; -     +      for (int i = 0; i < last_dim; i++) {        if (dv.lbounds[i] > 0)          return true; @@ -1852,8 +1622,8 @@ bool Loop::is_dependence_valid_based_on_lex_order(int i, int j,    }    if (before)      return true; -   +    return false; -   +  } diff --git a/src/loop_basic.cc b/src/loop_basic.cc index f5234b9..cf72c97 100644 --- a/src/loop_basic.cc +++ b/src/loop_basic.cc @@ -794,46 +794,7 @@ std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) {          new_stmt.loop_level = stmt[*i].loop_level;          stmt_nesting_level_.push_back(stmt_nesting_level_[*i]); -         -        /*std::pair<std::vector<DependenceVector>, -          std::vector<DependenceVector> > dv = -          test_data_dependences(ir, stmt[*i].code, part1, -          stmt[*i].code, part2, freevar, index, -          stmt_nesting_level_[*i], -          stmt_nesting_level_[stmt.size() - 1]); -           -           -           -           -          for (int k = 0; k < dv.first.size(); k++) -          part1_to_part2++; -          if (part1_to_part2 > 0 && part2_to_part1 > 0) -          throw loop_error( -          "loop error: Aborting, split resulted in impossible dependence cycle!"); -           -          for (int k = 0; k < dv.second.size(); k++) -          part2_to_part1++; -           -           -           -          if (part1_to_part2 > 0 && part2_to_part1 > 0) -          throw loop_error( -          "loop error: Aborting, split resulted in impossible dependence cycle!"); -           -           -           -          if (part2_to_part1 > 0){ -          temp_place_after = false; -          assigned = true; -           -          }else if (part1_to_part2 > 0){ -          temp_place_after = true; -           -          assigned = true; -          } -           -        */ -         +          if (place_after)            assign_const(new_stmt.xform, dim - 1, cur_lex + 1);          else @@ -1321,9 +1282,7 @@ void Loop::fuse(const std::set<int> &stmt_nums, int level) {      s5.push_back(s4);      //Dependence Check for Ordering Constraint -    //Graph<std::set<int>, bool> dummy = construct_induced_graph_at_level(s5, -    //      dep, dep_dim); -     +      Graph<std::set<int>, bool> g = construct_induced_graph_at_level(s3, dep,                                                                      dep_dim); @@ -1532,7 +1491,6 @@ void Loop::distribute(const std::set<int> &stmt_nums, int level) {      order++;    }    // no need to update dependence graph -  ;    return;  } diff --git a/src/loop_datacopy.cc b/src/loop_datacopy.cc index 8d11b0a..1ccd444 100644 --- a/src/loop_datacopy.cc +++ b/src/loop_datacopy.cc @@ -21,11 +21,6 @@  using namespace omega; -// -// data copy function by referring arrays by numbers. -// e.g. A[i] = A[i-1] + B[i] -//      parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i] -//  bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level,                      bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {    // check for sanity of parameters @@ -75,11 +70,6 @@ bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array    return datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);  } -// -// data copy function by referring arrays by name. -// e.g. A[i] = A[i-1] + B[i] -//      parameter array_name=A means to copy data touched by A[i-1] and A[i] -//  bool Loop::datacopy(int stmt_num, int level, const std::string &array_name,                      bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) {    // check for sanity of parameters @@ -193,1013 +183,6 @@ bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int>    return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type);  } - -// -// Implement low level datacopy function with lots of options. -// -/*bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, -  const std::vector<int> &privatized_levels, -  bool allow_extra_read, int fastest_changing_dimension, -  int padding_stride, int padding_alignment, int memory_type) { -  if (stmt_refs.size() == 0) -  return true; -   -  // check for sanity of parameters -  IR_ArraySymbol *sym = NULL; -  std::vector<int> lex; -  std::set<int> active; -  if (level <= 0) -  throw std::invalid_argument("invalid loop level " + to_string(level)); -  for (int i = 0; i < privatized_levels.size(); i++) { -  if (i == 0) { -  if (privatized_levels[i] < level) -  throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level)); -  } -  else if (privatized_levels[i] <= privatized_levels[i-1]) -  throw std::invalid_argument("privatized loop levels must be in ascending order"); -  } -  for (int i = 0; i < stmt_refs.size(); i++) { -  int stmt_num = stmt_refs[i].first; -  active.insert(stmt_num); -  if (stmt_num < 0 || stmt_num >= stmt.size()) -  throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); -  if (privatized_levels.size() != 0) { -  if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) -  throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); -  } -  else { -  if (level > stmt[stmt_num].loop_level.size()) -  throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); -  } -  for (int j = 0; j < stmt_refs[i].second.size(); j++) { -  if (sym == NULL) { -  sym = stmt_refs[i].second[j]->symbol(); -  lex = getLexicalOrder(stmt_num); -  } -  else { -  IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); -  if (t->name() != sym->name()) { -  delete t; -  delete sym; -  throw std::invalid_argument("try to copy data from different arrays"); -  } -  delete t; -  } -  } -  } -  if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim())) -  throw std::invalid_argument("invalid fastest changing dimension for the array to be copied"); -  if (padding_stride < 0) -  throw std::invalid_argument("invalid temporary array stride requirement"); -  if (padding_alignment == -1 || padding_alignment == 0) -  throw std::invalid_argument("invalid temporary array alignment requirement"); -   -  int dim = 2*level - 1; -  int n_dim = sym->n_dim(); -   -  if (fastest_changing_dimension == -1) -  switch (sym->layout_type()) { -  case IR_ARRAY_LAYOUT_ROW_MAJOR: -  fastest_changing_dimension = n_dim - 1; -  break; -  case IR_ARRAY_LAYOUT_COLUMN_MAJOR: -  fastest_changing_dimension = 0; -  break; -  default: -  throw loop_error("unsupported array layout"); -  } -   -   -  // build iteration spaces for all reads and for all writes separately -  apply_xform(active); -  bool has_write_refs = false; -  bool has_read_refs = false; -  Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); -  Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); -  for (int i = 0; i < stmt_refs.size(); i++) { -  int stmt_num = stmt_refs[i].first; -   -  for (int j = 0; j < stmt_refs[i].second.size(); j++) { -  Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim); -  for (int k = 1; k <= mapping.n_inp(); k++) -  mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name()); -  mapping.setup_names(); -  F_And *f_root = mapping.add_and(); -  for (int k = 1; k <= level-1; k++) { -  EQ_Handle h = f_root->add_EQ(); -  h.update_coef(mapping.input_var(k), 1); -  h.update_coef(mapping.output_var(k), -1); -  } -  for (int k = 0; k < privatized_levels.size(); k++) { -  EQ_Handle h = f_root->add_EQ(); -  h.update_coef(mapping.input_var(privatized_levels[k]), 1); -  h.update_coef(mapping.output_var(level+k), -1); -  } -  for (int k = 0; k < n_dim; k++) { -  CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); -  exp2formula(ir, mapping, f_root, freevar, repr, mapping.output_var(level-1+privatized_levels.size()+k+1), 'w', IR_COND_EQ, false); -  repr->clear(); -  delete repr; -  } -  Relation r = Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); -  if (stmt_refs[i].second[j]->is_write()) { -  has_write_refs = true; -  wo_copy_is = Union(wo_copy_is, r); -  wo_copy_is.simplify(2, 4); -  } -  else { -  has_read_refs = true; -  //protonu--removing the next line for now -  ro_copy_is = Union(ro_copy_is, r); -  ro_copy_is.simplify(2, 4); -  //ro_copy_is = ConvexRepresentation(Union(ro_copy_is, r)); -   -  } -  } -  } -   -  if (allow_extra_read) { -  Relation t = DecoupledConvexHull(copy(ro_copy_is)); -  if (t.number_of_conjuncts() > 1) -  ro_copy_is = RectHull(ro_copy_is); -  else -  ro_copy_is = t; -  } -  else { -  Relation t = ConvexRepresentation(copy(ro_copy_is)); -  if (t.number_of_conjuncts() > 1) -  ro_copy_is = RectHull(ro_copy_is); -  else -  ro_copy_is = t; -  } -  wo_copy_is = ConvexRepresentation(wo_copy_is); -   -  if (allow_extra_read) { -  Tuple<Relation> Rs; -  Tuple<int> active; -  for (DNF_Iterator di(ro_copy_is.query_DNF()); di; di++) { -  Rs.append(Relation(ro_copy_is, di.curr())); -  active.append(1); -  } -  Relation the_gcs = Relation::True(ro_copy_is.n_set()); -  for (int i = level-1+privatized_levels.size()+1; i <= level-1+privatized_levels.size()+n_dim; i++) { -  Relation r = greatest_common_step(Rs, active, i, Relation::Null()); -  the_gcs = Intersection(the_gcs, r); -  } -   -  ro_copy_is = Approximate(ro_copy_is); -  ro_copy_is = ConvexRepresentation(ro_copy_is); -  ro_copy_is = Intersection(ro_copy_is, the_gcs); -  ro_copy_is.simplify(); -  } -   -   -   -  for (int i = 1; i < level; i++) { -  std::string s = stmt[*active.begin()].IS.input_var(i)->name(); -  wo_copy_is.name_set_var(i, s); -  ro_copy_is.name_set_var(i, s); -  } -  for (int i = 0; i < privatized_levels.size(); i++) { -  std::string s = stmt[*active.begin()].IS.input_var(privatized_levels[i])->name(); -  wo_copy_is.name_set_var(level+i, s); -  ro_copy_is.name_set_var(level+i, s); -  } -  for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) { -  std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size()); -  wo_copy_is.name_set_var(i, s); -  ro_copy_is.name_set_var(i, s); -  } -  tmp_loop_var_name_counter += n_dim; -   -  //protonu--end change -   -  wo_copy_is.setup_names(); -  ro_copy_is.setup_names(); -   -  // build merged iteration space for calculating temporary array size -  bool already_use_recthull = false; -  Relation untampered_copy_is = ConvexRepresentation(Union(copy(wo_copy_is), copy(ro_copy_is))); -  Relation copy_is = untampered_copy_is; -  if (copy_is.number_of_conjuncts() > 1) { -  try { -  copy_is = ConvexHull(copy(untampered_copy_is)); -  } -  catch (const std::overflow_error &e) { -  copy_is = RectHull(copy(untampered_copy_is)); -  already_use_recthull = true; -  } -  } -   -   -  Retry_copy_is: -  // extract temporary array information -  CG_outputBuilder *ocg = ir->builder(); -  std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL -  std::vector<coef_t> index_stride(n_dim, 1); -  std::vector<bool> is_index_eq(n_dim, false); -  std::vector<std::pair<int, CG_outputRepr *> > index_sz(0);   -  Relation reduced_copy_is = copy(copy_is); -   -  for (int i = 0; i < n_dim; i++) { -  if (i != 0) -  reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); -  Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); -   -  // extract stride -  EQ_Handle stride_eq; -  { -  bool simple_stride = true; -  int strides = countStrides(bound.query_DNF()->single_conjunct(), bound.set_var(level-1+privatized_levels.size()+i+1), stride_eq, simple_stride); -  if (strides > 1) { -  throw loop_error("too many strides"); -  } -  else if (strides == 1) { -  int sign = stride_eq.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); -  Constr_Vars_Iter it(stride_eq, true); -  index_stride[i] = abs((*it).coef/sign); -  } -  } -   -  // check if this arary index requires loop -  Conjunct *c = bound.query_DNF()->single_conjunct(); -  for (EQ_Iterator ei(c->EQs()); ei; ei++) { -  if ((*ei).has_wildcards()) -  continue; -   -  int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); -  if (coef != 0) { -  int sign = 1; -  if (coef < 0) { -  coef = -coef; -  sign = -1; -  } -   -  CG_outputRepr *op = NULL; -  for (Constr_Vars_Iter ci(*ei); ci; ci++) { -  switch ((*ci).var->kind()) { -  case Input_Var: -  { -  if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) -  if ((*ci).coef*sign == 1) -  op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); -  else if ((*ci).coef*sign == -1) -  op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); -  else if ((*ci).coef*sign > 1) -  op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); -  else // (*ci).coef*sign < -1 -  op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); -  break; -  } -  case Global_Var: -  { -  Global_Var_ID g = (*ci).var->get_global_var(); -  if ((*ci).coef*sign == 1) -  op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); -  else if ((*ci).coef*sign == -1) -  op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); -  else if ((*ci).coef*sign > 1) -  op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); -  else // (*ci).coef*sign < -1 -  op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); -  break; -  } -  default: -  throw loop_error("unsupported array index expression"); -  } -  } -  if ((*ei).get_const() != 0) -  op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); -  if (coef != 1) -  op = ocg->CreateIntegerDivide(op, ocg->CreateInt(coef)); -   -  index_lb[i] = op; -  is_index_eq[i] = true; -  break; -  } -  } -  if (is_index_eq[i]) -  continue; -   -  // seperate lower and upper bounds -  std::vector<GEQ_Handle> lb_list, ub_list; -  for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { -  int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); -  if (coef != 0 && (*gi).has_wildcards()) { -  bool clean_bound = true; -  GEQ_Handle h; -  for (Constr_Vars_Iter cvi(*gi, true); gi; gi++) -  if (!findFloorInequality(bound, (*cvi).var, h, bound.set_var(level-1+privatized_levels.size()+i+1))) { -  clean_bound = false; -  break; -  } -  if (!clean_bound) -  continue; -  } -   -  if (coef > 0) -  lb_list.push_back(*gi); -  else if (coef < 0) -  ub_list.push_back(*gi); -  } -  if (lb_list.size() == 0 || ub_list.size() == 0) -  if (already_use_recthull) -  throw loop_error("failed to calcuate array footprint size"); -  else { -  copy_is = RectHull(copy(untampered_copy_is)); -  already_use_recthull = true; -  goto Retry_copy_is; -  } -   -  // build lower bound representation -  Tuple<CG_outputRepr *> lb_repr_list; -  for (int j = 0; j < lb_list.size(); j++) -  lb_repr_list.append(outputLBasRepr(ocg, lb_list[j], bound, -  bound.set_var(level-1+privatized_levels.size()+i+1),  -  index_stride[i], stride_eq, Relation::True(bound.n_set()), -  std::vector<CG_outputRepr *>(bound.n_set()))); -   -  if (lb_repr_list.size() > 1) -  index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); -  else if (lb_repr_list.size() == 1) -  index_lb[i] = lb_repr_list[1]; -   -  // build temporary array size representation -  { -  Relation cal(copy_is.n_set(), 1); -  F_And *f_root = cal.add_and(); -  for (int j = 0; j < ub_list.size(); j++) -  for (int k = 0; k < lb_list.size(); k++) { -  GEQ_Handle h = f_root->add_GEQ(); -   -  for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { -  switch ((*ci).var->kind()) { -  case Input_Var: -  { -  int pos = (*ci).var->get_position(); -  h.update_coef(cal.input_var(pos), (*ci).coef); -  break; -  } -  case Global_Var: -  { -  Global_Var_ID g = (*ci).var->get_global_var(); -  Variable_ID v; -  if (g->arity() == 0) -  v = cal.get_local(g); -  else -  v = cal.get_local(g, (*ci).var->function_of()); -  h.update_coef(v, (*ci).coef); -  break; -  } -  default: -  throw loop_error("cannot calculate temporay array size statically"); -  } -  } -  h.update_const(ub_list[j].get_const()); -   -  for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { -  switch ((*ci).var->kind()) { -  case Input_Var: -  { -  int pos = (*ci).var->get_position(); -  h.update_coef(cal.input_var(pos), (*ci).coef); -  break; -  } -  case Global_Var: -  { -  Global_Var_ID g = (*ci).var->get_global_var(); -  Variable_ID v; -  if (g->arity() == 0) -  v = cal.get_local(g); -  else -  v = cal.get_local(g, (*ci).var->function_of()); -  h.update_coef(v, (*ci).coef); -  break; -  } -  default: -  throw loop_error("cannot calculate temporay array size statically"); -  } -  } -  h.update_const(lb_list[k].get_const()); -   -  h.update_const(1); -  h.update_coef(cal.output_var(1), -1); -  } -   -  cal = Restrict_Domain(cal, copy(copy_is)); -  for (int j = 1; j <= cal.n_inp(); j++) -  cal = Project(cal, j, Input_Var); -  cal.simplify(); -   -  // pad temporary array size -  // TODO: for variable array size, create padding formula -  Conjunct *c = cal.query_DNF()->single_conjunct(); -  bool is_index_bound_const = false; -  for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) -  if ((*gi).is_const(cal.output_var(1))) { -  coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1))); -  if (padding_stride != 0) { -  size = (size + index_stride[i] - 1) / index_stride[i]; -  if (i == fastest_changing_dimension) -  size = size * padding_stride; -  } -  if (i == fastest_changing_dimension) { -  if (padding_alignment > 1) { // align to boundary for data packing -  int residue = size % padding_alignment; -  if (residue) -  size = size+padding_alignment-residue; -  } -  else if (padding_alignment < -1) {  // un-alignment for memory bank conflicts -  while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1) -  size++; -  } -  } -  index_sz.push_back(std::make_pair(i, ocg->CreateInt(size))); -  is_index_bound_const = true; -  } -   -  if (!is_index_bound_const) { -  for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { -  int coef = (*gi).get_coef(cal.output_var(1)); -  if (coef < 0) { -  CG_outputRepr *op = NULL; -  for (Constr_Vars_Iter ci(*gi); ci; ci++) { -  if ((*ci).var != cal.output_var(1)) { -  switch((*ci).var->kind()) { -  case Global_Var: -  { -  Global_Var_ID g = (*ci).var->get_global_var(); -  if ((*ci).coef == 1) -  op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); -  else if ((*ci).coef == -1) -  op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); -  else if ((*ci).coef > 1) -  op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); -  else // (*ci).coef < -1 -  op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); -  break; -  } -  default: -  throw loop_error("failed to generate array index bound code"); -  } -  } -  } -  int c = (*gi).get_const(); -  if (c > 0) -  op = ocg->CreatePlus(op, ocg->CreateInt(c)); -  else if (c < 0) -  op = ocg->CreateMinus(op, ocg->CreateInt(-c)); -  if (padding_stride != 0) { -  if (i == fastest_changing_dimension) { -  coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); -  coef_t t1 = index_stride[i] / g; -  if (t1 != 1) -  op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); -  coef_t t2 = padding_stride / g; -  if (t2 != 1) -  op = ocg->CreateTimes(op, ocg->CreateInt(t2)); -  } -  else if (index_stride[i] != 1) { -  op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); -  } -  } -   -  index_sz.push_back(std::make_pair(i, op)); -  break; -  } -  } -  } -  } -  } -   -  // change the temporary array index order -  for (int i = 0; i < index_sz.size(); i++) -  if (index_sz[i].first == fastest_changing_dimension) -  switch (sym->layout_type()) { -  case IR_ARRAY_LAYOUT_ROW_MAJOR: -  std::swap(index_sz[index_sz.size()-1], index_sz[i]); -  break; -  case IR_ARRAY_LAYOUT_COLUMN_MAJOR: -  std::swap(index_sz[0], index_sz[i]); -  break; -  default: -  throw loop_error("unsupported array layout"); -  } -   -  // declare temporary array or scalar -  IR_Symbol *tmp_sym; -  if (index_sz.size() == 0) { -  tmp_sym = ir->CreateScalarSymbol(sym, memory_type); -  } -  else { -  std::vector<CG_outputRepr *> tmp_array_size(index_sz.size()); -  for (int i = 0; i < index_sz.size(); i++) -  tmp_array_size[i] = index_sz[i].second->clone(); -  tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); -  } -   -  // create temporary array read initialization code -  CG_outputRepr *copy_code_read; -  if (has_read_refs) -  if (index_sz.size() == 0) { -  IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); -   -  std::vector<CG_outputRepr *> rhs_index(n_dim); -  for (int i = 0; i < index_lb.size(); i++) -  if (is_index_eq[i]) -  rhs_index[i] = index_lb[i]->clone(); -  else -  rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); -  IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); -   -  copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); -  } -  else { -  std::vector<CG_outputRepr *> lhs_index(index_sz.size()); -  for (int i = 0; i < index_sz.size(); i++) { -  int cur_index_num = index_sz[i].first; -  CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); -  if (padding_stride != 0) { -  if (i == n_dim-1) { -  coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); -  coef_t t1 = index_stride[cur_index_num] / g; -  if (t1 != 1) -  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); -  coef_t t2 = padding_stride / g; -  if (t2 != 1) -  cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); -  } -  else if (index_stride[cur_index_num] != 1) { -  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); -  } -  } -   -  if (ir->ArrayIndexStartAt() != 0) -  cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); -  lhs_index[i] = cur_index_repr; -  } -   -  IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index); -   -  std::vector<CG_outputRepr *> rhs_index(n_dim); -  for (int i = 0; i < index_lb.size(); i++) -  if (is_index_eq[i]) -  rhs_index[i] = index_lb[i]->clone(); -  else -  rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); -  IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); -   -  copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); -  } -   -  // create temporary array write back code -  CG_outputRepr *copy_code_write; -  if (has_write_refs) -  if (index_sz.size() == 0) { -  IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); -   -  std::vector<CG_outputRepr *> rhs_index(n_dim); -  for (int i = 0; i < index_lb.size(); i++) -  if (is_index_eq[i]) -  rhs_index[i] = index_lb[i]->clone(); -  else -  rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); -  IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); -   -  copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); -  } -  else { -  std::vector<CG_outputRepr *> lhs_index(n_dim); -  for (int i = 0; i < index_lb.size(); i++) -  if (is_index_eq[i]) -  lhs_index[i] = index_lb[i]->clone(); -  else -  lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); -  IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); -   -  std::vector<CG_outputRepr *> rhs_index(index_sz.size()); -  for (int i = 0; i < index_sz.size(); i++) { -  int cur_index_num = index_sz[i].first; -  CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); -  if (padding_stride != 0) { -  if (i == n_dim-1) { -  coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); -  coef_t t1 = index_stride[cur_index_num] / g; -  if (t1 != 1) -  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); -  coef_t t2 = padding_stride / g; -  if (t2 != 1) -  cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); -  } -  else if (index_stride[cur_index_num] != 1) { -  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); -  } -  } -   -  if (ir->ArrayIndexStartAt() != 0) -  cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); -  rhs_index[i] = cur_index_repr; -  } -  IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index); -   -  copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); -  } -   -  // now we can remove those loops for array indexes that are -  // dependent on others -  if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { -  Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); -  F_And *f_root = mapping.add_and(); -  for (int i = 1; i <= level-1+privatized_levels.size(); i++) { -  EQ_Handle h = f_root->add_EQ(); -  h.update_coef(mapping.input_var(i), 1); -  h.update_coef(mapping.output_var(i), -1); -  } -   -  int cur_index = 0; -  std::vector<int> mapped_index(index_sz.size()); -  for (int i = 0; i < n_dim; i++) -  if (!is_index_eq[i]) { -  EQ_Handle h = f_root->add_EQ(); -  h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); -  switch (sym->layout_type()) { -  case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { -  h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); -  mapped_index[index_sz.size()-cur_index-1] = i; -  break; -  } -  case IR_ARRAY_LAYOUT_ROW_MAJOR: { -  h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); -  mapped_index[cur_index] = i; -  break; -  } -  default: -  throw loop_error("unsupported array layout"); -  } -  cur_index++; -  } -   -  wo_copy_is = Range(Restrict_Domain(copy(mapping), wo_copy_is)); -  ro_copy_is = Range(Restrict_Domain(copy(mapping), ro_copy_is)); -   -  // protonu--replacing Chun's old code  -  for (int i = 1; i <= level-1+privatized_levels.size(); i++) { -  wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); -  ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); -  } -   -   -   -  for (int i = 0; i < index_sz.size(); i++) { -  wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); -  ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); -  }       -  wo_copy_is.setup_names(); -  ro_copy_is.setup_names(); -  } -   -  // insert read copy statement -  int old_num_stmt = stmt.size(); -  int ro_copy_stmt_num = -1; -  if (has_read_refs) { -  Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); -  { -  F_And *f_root = copy_xform.add_and(); -  for (int i = 1; i <= ro_copy_is.n_set(); i++) { -  EQ_Handle h = f_root->add_EQ(); -  h.update_coef(copy_xform.input_var(i), 1); -  h.update_coef(copy_xform.output_var(2*i), -1); -  } -  for (int i = 1; i <= dim; i+=2) { -  EQ_Handle h = f_root->add_EQ(); -  h.update_coef(copy_xform.output_var(i), -1); -  h.update_const(lex[i-1]); -  } -  for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { -  EQ_Handle h = f_root->add_EQ(); -  h.update_coef(copy_xform.output_var(i), 1); -  } -  } -   -  Statement copy_stmt_read; -  copy_stmt_read.IS = ro_copy_is; -  copy_stmt_read.xform = copy_xform; -  copy_stmt_read.code = copy_code_read; -  copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set()); -  copy_stmt_read.ir_stmt_node = NULL; -  for (int i = 0; i < level-1; i++) { -  copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; -  if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && -  stmt[*(active.begin())].loop_level[i].payload >= level) { -  int j; -  for (j = 0; j < privatized_levels.size(); j++) -  if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) -  break; -  if (j == privatized_levels.size()) -  copy_stmt_read.loop_level[i].payload = -1; -  else -  copy_stmt_read.loop_level[i].payload = level + j; -  } -  else -  copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; -  copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; -  } -  for (int i = 0; i < privatized_levels.size(); i++) { -  copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; -  copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; -  copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; -  } -  int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); -  for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { -  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; -  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; -  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; -  } -  for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { -  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; -  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; -  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; -  } -   -  shiftLexicalOrder(lex, dim-1, 1); -  stmt.push_back(copy_stmt_read); -  ro_copy_stmt_num = stmt.size() - 1; -  dep.insert(); -  } -   -  // insert write copy statement -  int wo_copy_stmt_num = -1; -  if (has_write_refs) { -  Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1); -  { -  F_And *f_root = copy_xform.add_and(); -  for (int i = 1; i <= wo_copy_is.n_set(); i++) { -  EQ_Handle h = f_root->add_EQ(); -  h.update_coef(copy_xform.input_var(i), 1); -  h.update_coef(copy_xform.output_var(2*i), -1); -  } -  for (int i = 1; i <= dim; i+=2) { -  EQ_Handle h = f_root->add_EQ(); -  h.update_coef(copy_xform.output_var(i), -1); -  h.update_const(lex[i-1]); -  } -  for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { -  EQ_Handle h = f_root->add_EQ(); -  h.update_coef(copy_xform.output_var(i), 1); -  } -  } -   -  Statement copy_stmt_write; -  copy_stmt_write.IS = wo_copy_is; -  copy_stmt_write.xform = copy_xform; -  copy_stmt_write.code = copy_code_write; -  copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set()); -  copy_stmt_write.ir_stmt_node = NULL; -   -  for (int i = 0; i < level-1; i++) { -  copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; -  if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && -  stmt[*(active.begin())].loop_level[i].payload >= level) { -  int j; -  for (j = 0; j < privatized_levels.size(); j++) -  if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) -  break; -  if (j == privatized_levels.size()) -  copy_stmt_write.loop_level[i].payload = -1; -  else -  copy_stmt_write.loop_level[i].payload = level + j; -  } -  else -  copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; -  copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; -  } -  for (int i = 0; i < privatized_levels.size(); i++) { -  copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; -  copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; -  copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; -  } -  int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); -  for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { -  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; -  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; -  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; -  } -  for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { -  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; -  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; -  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; -  } -   -  lex[dim-1]++; -  shiftLexicalOrder(lex, dim-1, -2); -  stmt.push_back(copy_stmt_write); -  wo_copy_stmt_num = stmt.size() - 1; -  dep.insert(); -  }  -   -  // replace original array accesses with temporary array accesses -  for (int i =0; i < stmt_refs.size(); i++) -  for (int j = 0; j < stmt_refs[i].second.size(); j++) { -  if (index_sz.size() == 0) { -  IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); -  ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); -  } -  else { -  std::vector<CG_outputRepr *> index_repr(index_sz.size()); -  for (int k = 0; k < index_sz.size(); k++) { -  int cur_index_num = index_sz[k].first; -   -  CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); -  if (padding_stride != 0) { -  if (k == n_dim-1) { -  coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); -  coef_t t1 = index_stride[cur_index_num] / g; -  if (t1 != 1) -  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); -  coef_t t2 = padding_stride / g; -  if (t2 != 1) -  cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); -  } -  else if (index_stride[cur_index_num] != 1) { -  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); -  } -  } -   -  if (ir->ArrayIndexStartAt() != 0) -  cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); -  index_repr[k] = cur_index_repr; -  } -   -  IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr); -  ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); -  } -  } -   -  // update dependence graph -  int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1; -  if (ro_copy_stmt_num != -1) { -  for (int i = 0; i < old_num_stmt; i++) { -  std::vector<std::vector<DependenceVector> > D; -   -  for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { -  if (active.find(i) != active.end() && active.find(j->first) == active.end()) { -  std::vector<DependenceVector> dvs1, dvs2; -  for (int k = 0; k < j->second.size(); k++) { -  DependenceVector dv = j->second[k]; -  if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W)) -  dvs1.push_back(dv); -  else -  dvs2.push_back(dv); -  } -  j->second = dvs2; -  if (dvs1.size() > 0) -  dep.connect(ro_copy_stmt_num, j->first, dvs1); -  } -  else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { -  std::vector<DependenceVector> dvs1, dvs2; -  for (int k = 0; k < j->second.size(); k++) { -  DependenceVector dv = j->second[k]; -  if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R)) -  dvs1.push_back(dv); -  else -  dvs2.push_back(dv); -  } -  j->second = dvs2; -  if (dvs1.size() > 0) -  D.push_back(dvs1); -  } -   -  if (j->second.size() == 0) -  dep.vertex[i].second.erase(j++); -  else -  j++; -  } -   -  for (int j = 0; j < D.size(); j++) -  dep.connect(i, ro_copy_stmt_num, D[j]); -  } -   -  // insert dependences from copy statement loop to copied statements -  DependenceVector dv; -  dv.type = DEP_W2R; -  dv.sym = tmp_sym->clone(); -  dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); -  dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); -  for (int i = dep_dim; i < num_dep_dim; i++) { -  dv.lbounds[i] = -posInfinity; -  dv.ubounds[i] = posInfinity; -  }  -  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) -  dep.connect(ro_copy_stmt_num, *i, dv); -  } -   -  if (wo_copy_stmt_num != -1) { -  for (int i = 0; i < old_num_stmt; i++) { -  std::vector<std::vector<DependenceVector> > D; -   -  for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { -  if (active.find(i) != active.end() && active.find(j->first) == active.end()) { -  std::vector<DependenceVector> dvs1, dvs2; -  for (int k = 0; k < j->second.size(); k++) { -  DependenceVector dv = j->second[k]; -  if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W)) -  dvs1.push_back(dv); -  else -  dvs2.push_back(dv); -  } -  j->second = dvs2; -  if (dvs1.size() > 0) -  dep.connect(wo_copy_stmt_num, j->first, dvs1); -  } -  else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { -  std::vector<DependenceVector> dvs1, dvs2; -  for (int k = 0; k < j->second.size(); k++) { -  DependenceVector dv = j->second[k]; -  if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W)) -  dvs1.push_back(dv); -  else -  dvs2.push_back(dv); -  } -  j->second = dvs2; -  if (dvs1.size() > 0) -  D.push_back(dvs1); -  } -   -  if (j->second.size() == 0) -  dep.vertex[i].second.erase(j++); -  else -  j++; -  } -   -  for (int j = 0; j < D.size(); j++) -  dep.connect(i, wo_copy_stmt_num, D[j]); -  } -   -  // insert dependences from copied statements to write statements -  DependenceVector dv; -  dv.type = DEP_W2R; -  dv.sym = tmp_sym->clone(); -  dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); -  dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); -  for (int i = dep_dim; i < num_dep_dim; i++) { -  dv.lbounds[i] = -posInfinity; -  dv.ubounds[i] = posInfinity; -  }  -  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) -  dep.connect(*i, wo_copy_stmt_num, dv); -   -  } -   -  // update variable name for dependences among copied statements -  for (int i = 0; i < old_num_stmt; i++) { -  if (active.find(i) != active.end()) -  for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) -  if (active.find(j->first) != active.end()) -  for (int k = 0; k < j->second.size(); k++) { -  IR_Symbol *s = tmp_sym->clone(); -  j->second[k].sym = s; -  } -  } -   -  // insert anti-dependence from write statement to read statement -  if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) -  if (dep_dim >= 0) { -  DependenceVector dv; -  dv.type = DEP_R2W; -  dv.sym = tmp_sym->clone(); -  dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); -  dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); -  for (int k = dep_dim; k < num_dep_dim; k++) { -  dv.lbounds[k] = -posInfinity; -  dv.ubounds[k] = posInfinity; -  } -  for (int k = 0; k < dep_dim; k++) { -  if (k != 0) { -  dv.lbounds[k-1] = 0; -  dv.ubounds[k-1] = 0; -  } -  dv.lbounds[k] = 1; -  dv.ubounds[k] = posInfinity; -  dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv); -  } -  } -   -   -  // cleanup -  delete sym; -  delete tmp_sym; -  for (int i = 0; i < index_lb.size(); i++) { -  index_lb[i]->clear(); -  delete index_lb[i]; -  } -  for (int i = 0; i < index_sz.size(); i++) { -  index_sz[i].second->clear(); -  delete index_sz[i].second; -  } -   -  return true; -  } -*/  bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level,                                 const std::vector<int> &privatized_levels,                                 bool allow_extra_read, int fastest_changing_dimension, diff --git a/src/loop_tile.cc b/src/loop_tile.cc index aae8dd8..41c3e7f 100644 --- a/src/loop_tile.cc +++ b/src/loop_tile.cc @@ -126,11 +126,7 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,      std::set<int> private_stmt;      for (std::set<int>::iterator i = same_tile_controlling_loop.begin();           i != same_tile_controlling_loop.end(); i++) { -//     if (same_tiled_loop.find(*i) == same_tiled_loop.end() && !is_single_iteration(getNewIS(*i), dim)) -//       same_tiled_loop.insert(*i); -              // should test dim's value directly but it is ok for now -//    if (same_tiled_loop.find(*i) == same_tiled_loop.end() && get_const(stmt[*i].xform, dim+1, Output_Var) == posInfinity)        if (same_tiled_loop.find(*i) == same_tiled_loop.end()            && overflow.find(*i) != overflow.end())          private_stmt.insert(*i); @@ -138,26 +134,6 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,      // extract the union of the iteration space to be considered      Relation hull; -    /*{ -      Tuple < Relation > r_list; -      Tuple<int> r_mask; -       -      for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); -      i != same_tile_controlling_loop.end(); i++) -      if (private_stmt.find(*i) == private_stmt.end()) { -      Relation r = project_onto_levels(getNewIS(*i), dim + 1, -      true); -      for (int j = outer_dim; j < dim; j++) -      r = Project(r, j + 1, Set_Var); -      for (int j = 0; j < outer_dim; j += 2) -      r = Project(r, j + 1, Set_Var); -      r_list.append(r); -      r_mask.append(1); -      } -       -      hull = Hull(r_list, r_mask, 1, true); -      }*/ -          {        std::vector<Relation> r_list; @@ -176,7 +152,6 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,          }        hull = SimpleHull(r_list); -      // hull = Hull(r_list, std::vector<bool>(r_list.size(), true), 1, true);      }      // extract the bound of the dimension to be tiled @@ -553,25 +528,7 @@ void Loop::tile(int stmt_num, int level, int tile_size, int outer_level,          h.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1);          h.update_coef(ub, 1);        } -      // if (private_stmt.find(*i) != private_stmt.end()) { -      //   if (stmt[*i].xform.n_out() > dim+3) { // e.g. ii <= UB && i = ii -      //     GEQ_Handle h = f_root->add_GEQ(); -      //     h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); -      //     h.update_coef(ub, 1); -       -      //     stmt[*i].xform = Project(stmt[*i].xform, dim+3, Output_Var); -      //     f_root = stmt[*i].xform.and_with_and(); -      //     EQ_Handle h1 = f_root->add_EQ(); -      //     h1.update_coef(stmt[*i].xform.output_var(dim+3), 1); -      //     h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); -      //   } -      //   else if (method == StridedTile) { // e.g. ii <= UB since i does not exist -      //     GEQ_Handle h = f_root->add_GEQ(); -      //     h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); -      //     h.update_coef(ub, 1); -      //   } -      // } -       +        // restrict original loop index inside the tile        else {          if (method == StridedTile) { // e.g. ii <= i < ii + tile_size diff --git a/src/loop_unroll.cc b/src/loop_unroll.cc index 9bc6acf..911d900 100644 --- a/src/loop_unroll.cc +++ b/src/loop_unroll.cc @@ -20,7 +20,6 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,                             std::vector<std::vector<std::string> > idxNames,                             int cleanup_split_level) {    // check for sanity of parameters -  // check for sanity of parameters    if (unroll_amount < 0)      throw std::invalid_argument(        "invalid unroll amount " + to_string(unroll_amount)); @@ -71,16 +70,6 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,              }              dim2 = stmt[*i].loop_level[dim2].payload; -            /*if (dv.isCarried(dim2) -              && (dv.hasNegative(dim2) && !dv.quasi)) -              throw loop_error( -              "loop error: Unrolling is illegal, dependence violation!"); -               -              if (dv.isCarried(dim2) -              && (dv.hasPositive(dim2) && dv.quasi)) -              throw loop_error( -              "loop error: Unrolling is illegal, dependence violation!"); -            */              bool safe = false;              if (dv.isCarried(dim2) && dv.hasPositive(dim2)) { @@ -89,20 +78,11 @@ std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount,                    "loop error: a quasi dependence with a positive carried distance");                if (!dv.quasi) {                  if (dv.lbounds[dim2] != posInfinity) { -                  //if (dv.lbounds[dim2] != negInfinity)                    if (dv.lbounds[dim2] > unroll_amount)                      safe = true;                  } else                    safe = true; -              }/* else { -                  if (dv.ubounds[dim2] != negInfinity) { -                  if (dv.ubounds[dim2] != posInfinity) -                  if ((-(dv.ubounds[dim2])) > unroll_amount) -                  safe = true; -                  } else -                  safe = true; -                  }*/ -               +              }                if (!safe) {                  for (int l = level + 1; l <= (n - 1) / 2; l++) {                    int dim3 = l - 1;  | 
