diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/chill_run.cc | 89 | ||||
| -rw-r--r-- | src/chill_run_util.cc | 120 | ||||
| -rw-r--r-- | src/chillmodule.cc | 795 | ||||
| -rw-r--r-- | src/dep.cc | 567 | ||||
| -rw-r--r-- | src/ir_rose.cc | 1756 | ||||
| -rw-r--r-- | src/ir_rose_utils.cc | 67 | ||||
| -rw-r--r-- | src/irtools.cc | 279 | ||||
| -rw-r--r-- | src/loop.cc | 1859 | ||||
| -rw-r--r-- | src/loop_basic.cc | 1538 | ||||
| -rw-r--r-- | src/loop_datacopy.cc | 2166 | ||||
| -rw-r--r-- | src/loop_extra.cc | 224 | ||||
| -rw-r--r-- | src/loop_tile.cc | 630 | ||||
| -rw-r--r-- | src/loop_unroll.cc | 1166 | ||||
| -rw-r--r-- | src/omegatools.cc | 1185 | ||||
| -rw-r--r-- | src/parse_expr.ll | 24 | ||||
| -rw-r--r-- | src/parse_expr.yy | 85 | 
16 files changed, 12550 insertions, 0 deletions
| diff --git a/src/chill_run.cc b/src/chill_run.cc new file mode 100644 index 0000000..4eafe65 --- /dev/null +++ b/src/chill_run.cc @@ -0,0 +1,89 @@ +#include "chilldebug.h" + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +#include "loop.hh" +#include <omega.h> +#include "ir_code.hh" +#include "ir_rose.hh" + +#include "chillmodule.hh" // Python wrapper functions for CHiLL + +//--- +// CHiLL globals +//--- +Loop *myloop = NULL; +IR_Code *ir_code = NULL; +bool repl_stop = false; +bool is_interactive = false; + +std::vector<IR_Control *> ir_controls; +std::vector<int> loops; + +//--- +// CHiLL program main +// Initialize state and run script or interactive mode +//--- +int main( int argc, char* argv[] ) +{ +  DEBUG_PRINT("%s  main()\n", argv[0]); +  if (argc > 2) { +    fprintf(stderr, "Usage: %s [script_file]\n", argv[0]); +    exit(-1); +  } +   +  int fail = 0; +   +  // Create PYTHON interpreter +  /* Pass argv[0] to the Python interpreter */ +  Py_SetProgramName(argv[0]); +   +  /* Initialize the Python interpreter.  Required. */ +  Py_Initialize(); +   +  /* Add a static module */ +  initchill(); +   +  if (argc == 2) { +    FILE* f = fopen(argv[1], "r"); +    if(!f){ +      printf("can't open script file \"%s\"\n", argv[1]); +      exit(-1); +    } +    PyRun_SimpleFile(f, argv[1]); +    fclose(f); +  } +  if (argc == 1) { +    //--- +    // Run a CHiLL interpreter +    //--- +    printf("CHiLL v" CHILL_BUILD_VERSION " (built on " CHILL_BUILD_DATE ")\n"); +    printf("Copyright (C) 2008 University of Southern California\n"); +    printf("Copyright (C) 2009-2012 University of Utah\n"); +    //is_interactive = true; // let the lua interpreter know. +    fflush(stdout); +    // TODO: read lines of python code. +    //Not sure if we should set fail from interactive mode +    printf("CHiLL ending...\n"); +    fflush(stdout); +  } + +  //printf("DONE with PyRun_SimpleString()\n"); +   +  if (!fail && ir_code != NULL && myloop != NULL && myloop->stmt.size() != 0 && !myloop->stmt[0].xform.is_null()) { +    int lnum_start; +    int lnum_end; +    lnum_start = get_loop_num_start(); +    lnum_end = get_loop_num_end(); +    DEBUG_PRINT("calling ROSE code gen?    loop num %d\n", lnum); +    finalize_loop(lnum_start, lnum_end); +    ((IR_roseCode*)(ir_code))->finalizeRose(); +    delete ir_code; +  } +  Py_Finalize(); +  return 0; +} diff --git a/src/chill_run_util.cc b/src/chill_run_util.cc new file mode 100644 index 0000000..29568e7 --- /dev/null +++ b/src/chill_run_util.cc @@ -0,0 +1,120 @@ +#include <stdio.h> +#include <string.h> +#include "chill_run_util.hh" + +static std::string to_string(int ival) { +  char buffer[4]; +  sprintf(buffer, "%d", ival); +  return std::string(buffer); +} + +simap_vec_t* make_prog(simap_vec_t* cond) { +  return cond; +} + +simap_vec_t* make_cond_gt(simap_t* lhs, simap_t* rhs) { +  simap_vec_t* nvec = new simap_vec_t(); +  for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) +    (*lhs)[it->first] -= it->second; +  (*lhs)[to_string(0)] -= 1; +  nvec->push_back(*lhs); +  delete rhs; +  delete lhs; +  return nvec; +} + +simap_vec_t* make_cond_lt(simap_t* lhs, simap_t* rhs) { +  return make_cond_gt(rhs, lhs); +} + +simap_vec_t* make_cond_ge(simap_t* lhs, simap_t* rhs) { +  simap_vec_t* nvec = new simap_vec_t(); +  for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) +    (*lhs)[it->first] -= it->second; +  nvec->push_back(*lhs); +  delete rhs; +  delete lhs; +  return nvec; +} + +simap_vec_t* make_cond_le(simap_t* lhs, simap_t* rhs) { +  return make_cond_ge(rhs, lhs); +} + +simap_vec_t* make_cond_eq(simap_t* lhs, simap_t* rhs) { +  simap_vec_t* nvec = new simap_vec_t(); +  for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) +    (*rhs)[it->first] -= it->second; +  nvec->push_back(*rhs); +  for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) +    it->second = -it->second; +  nvec->push_back(*rhs); +  delete rhs; +  delete lhs; +  return nvec; +} + +simap_t* make_cond_item_add(simap_t* lhs, simap_t* rhs) { +  for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) +    (*rhs)[it->first] += it->second; +  delete lhs; +  return rhs; +} + +simap_t* make_cond_item_sub(simap_t* lhs, simap_t* rhs) { +  for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) +    (*rhs)[it->first] -= it->second; +  delete lhs; +  return rhs; +} + +simap_t* make_cond_item_mul(simap_t* lhs, simap_t* rhs) { +  (*lhs)[to_string(0)] += 0; +  (*rhs)[to_string(0)] += 0; +  if(rhs->size() == 1) { +    int t = (*rhs)[to_string(0)]; +    for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) +      it->second *= t; +    delete rhs; +    return lhs; +  } +  else if(rhs->size() == 1) { +    int t = (*lhs)[to_string(0)]; +    for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) +      it->second *= t; +    delete lhs; +    return rhs; +  } +  else { +    fprintf(stderr, "require Presburger formula"); +    delete lhs; +    delete rhs; +    // exit(2); <-- this may be a boost feature +  } +} + +simap_t* make_cond_item_neg(simap_t* expr) { +  for (simap_t::iterator it = expr->begin(); it != expr->end(); it++) { +    it->second = -(it->second); +  } +  return expr; +} + +simap_t* make_cond_item_number(int n) { +  simap_t* nmap = new simap_t(); +  (*nmap)[to_string(0)] = n; +  return nmap; +} + +simap_t* make_cond_item_variable(const char* var) { +  simap_t* nmap = new simap_t(); +  (*nmap)[std::string(var)] = 1; +  return nmap; +} + +simap_t* make_cond_item_level(int n) { +  simap_t* nmap = new simap_t(); +  (*nmap)[to_string(n)] = 1; +  return nmap; +} + diff --git a/src/chillmodule.cc b/src/chillmodule.cc new file mode 100644 index 0000000..0e41f88 --- /dev/null +++ b/src/chillmodule.cc @@ -0,0 +1,795 @@ +#include "chilldebug.h" + +#include "chill_run_util.hh" + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <omega.h> +#include "loop.hh" +#include "ir_code.hh" +#include "ir_rose.hh" + +#include "chillmodule.hh" + +using namespace omega; + +extern Loop *myloop; +extern IR_Code *ir_code; +extern bool is_interactive; +extern bool repl_stop; + +std::string procedure_name; +std::string source_filename; + +int loop_start_num; +int loop_end_num; + +extern std::vector<IR_Control *> ir_controls; +extern std::vector<int> loops; + +// ----------------------- // +// CHiLL support functions // +// ----------------------- // +// not sure yet if this actually needs to be exposed to the python interface +// these four functions are here to maintain similarity to the Lua interface +int get_loop_num_start() { +  return loop_start_num; +} + +int get_loop_num_end() { +  return loop_end_num; +} + +static void set_loop_num_start(int start_num) { +  loop_start_num = start_num; +} + +static void set_loop_num_end(int end_num) { +  loop_end_num = end_num; +} + +// TODO: finalize_loop(int,int) and init_loop(int,int) are identical to thier Lua counterparts. +// consider integrating them + +void finalize_loop(int loop_num_start, int loop_num_end) { +  if (loop_num_start == loop_num_end) { +    ir_code->ReplaceCode(ir_controls[loops[loop_num_start]], myloop->getCode()); +    ir_controls[loops[loop_num_start]] = NULL; +  } +  else { +    std::vector<IR_Control *> parm; +    for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) +      parm.push_back(ir_controls[i]); +    IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); +    ir_code->ReplaceCode(block, myloop->getCode()); +    for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { +      delete ir_controls[i]; +      ir_controls[i] = NULL; +    } +  } +  delete myloop; +} +void finalize_loop() { +  int loop_num_start = get_loop_num_start(); +  int loop_num_end = get_loop_num_end(); +  finalize_loop(loop_num_start, loop_num_end); +} +static void init_loop(int loop_num_start, int loop_num_end) { +  if (source_filename.empty()) { +    fprintf(stderr, "source file not set when initializing the loop"); +    if (!is_interactive) +      exit(2); +  } +  else { +    if (ir_code == NULL) { +      if (procedure_name.empty()) +        procedure_name = "main"; +         +      ir_code = new IR_roseCode(source_filename.c_str(), procedure_name.c_str()); +           +      IR_Block *block = ir_code->GetCode(); +      ir_controls = ir_code->FindOneLevelControlStructure(block); +      for (int i = 0; i < ir_controls.size(); i++) { +        if (ir_controls[i]->type() == IR_CONTROL_LOOP) +          loops.push_back(i); +      } +      delete block; +    } +    if (myloop != NULL && myloop->isInitialized()) { +       finalize_loop(); +    } +  } +  set_loop_num_start(loop_num_start); +  set_loop_num_end(loop_num_end); +  if (loop_num_end < loop_num_start) { +    fprintf(stderr, "the last loop must be after the start loop"); +    if (!is_interactive) +      exit(2); +  }               +  if (loop_num_end >= loops.size()) { +    fprintf(stderr, "loop %d does not exist", loop_num_end); +    if (!is_interactive) +      exit(2); +  } +  std::vector<IR_Control *> parm; +  for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { +    if (ir_controls[i] == NULL) { +      fprintf(stderr, "loop has already been processed"); +      if (!is_interactive) +        exit(2); +    } +    parm.push_back(ir_controls[i]); +  } +  IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); +  myloop = new Loop(block); +  delete block;   +  //if (is_interactive) printf("%s ", PROMPT_STRING); +} + +// ----------------------- // +// Python support funcions // +// ----------------------- // + +// -- CHiLL support -- // +static void strict_arg_num(PyObject* args, int arg_num, const char* fname = NULL) { +  int arg_given = PyTuple_Size(args); +  char msg[128]; +  if(arg_num != arg_given) { +    if(fname) +      sprintf(msg, "%s: expected %i arguments, was given %i.", fname, arg_num, arg_given); +    else +      sprintf(msg, "Expected %i argumets, was given %i.", arg_num, arg_given); +    throw std::runtime_error(msg); +  } +} + +static int strict_arg_range(PyObject* args, int arg_min, int arg_max, const char* fname = NULL) { +  int arg_given = PyTuple_Size(args); +  char msg[128]; +  if(arg_given < arg_min || arg_given > arg_max) { +    if(fname) +      sprintf(msg, "%s: expected %i to %i arguments, was given %i.", fname, arg_min, arg_max, arg_given); +    else +      sprintf(msg, "Expected %i to %i, argumets, was given %i.", arg_min, arg_max, arg_given); +    throw std::runtime_error(msg); +  } +  return arg_given; +} + +static int intArg(PyObject* args, int index, int dval = 0) { +  if(PyTuple_Size(args) <= index) +    return dval;  +  int ival; +  PyObject *item = PyTuple_GetItem(args, index);  +  Py_INCREF(item); +  if (PyInt_Check(item)) ival = PyInt_AsLong(item); +  else { +    fprintf(stderr, "argument at index %i is not an int\n", index); +    exit(-1); +  } +  return ival; +} + +static std::string strArg(PyObject* args, int index, const char* dval = NULL) { +  if(PyTuple_Size(args) <= index) +    return dval; +  std::string strval; +  PyObject *item = PyTuple_GetItem(args, index);  +  Py_INCREF(item); +  if (PyString_Check(item)) strval = strdup(PyString_AsString(item)); +  else { +    fprintf(stderr, "argument at index %i is not an string\n", index); +    exit(-1); +  } +  return strval; +} + +static bool boolArg(PyObject* args, int index, bool dval = false) { +  if(PyTuple_Size(args) <= index) +    return dval; +  bool bval; +  PyObject* item = PyTuple_GetItem(args, index); +  Py_INCREF(item); +  return (bool)PyObject_IsTrue(item); +} + +static bool tostringintmapvector(PyObject* args, int index, std::vector<std::map<std::string,int> >& vec) { +  if(PyTuple_Size(args) <= index) +    return false; +  PyObject* seq = PyTuple_GetItem(args, index); +  //TODO: Typecheck +  int seq_len = PyList_Size(seq); +  for(int i = 0; i < seq_len; i++) { +    std::map<std::string,int> map; +    PyObject* dict = PyList_GetItem(seq, i); +    PyObject* keys = PyDict_Keys(dict); +    //TODO: Typecheck +    int dict_len = PyList_Size(keys); +    for(int j = 0; j < dict_len; j++) { +      PyObject* key = PyList_GetItem(keys, j); +      PyObject* value = PyDict_GetItem(dict, key); +      std::string str_key = strdup(PyString_AsString(key)); +      int int_value = PyInt_AsLong(value); +      map[str_key] = int_value; +    } +    vec.push_back(map); +  } +  return true; +} + +static bool tointvector(PyObject* seq, std::vector<int>& vec) { +  //TODO: Typecheck +  int seq_len = PyList_Size(seq); +  for(int i = 0; i < seq_len; i++) { +    PyObject* item = PyList_GetItem(seq, i); +    vec.push_back(PyInt_AsLong(item)); +  } +  return true; +} + +static bool tointvector(PyObject* args, int index, std::vector<int>& vec) { +  if(PyTuple_Size(args) <= index) +    return false; +  PyObject* seq = PyTuple_GetItem(args, index); +  return tointvector(seq, vec); +} + +static bool tointset(PyObject* args, int index, std::set<int>& set) { +  if(PyTuple_Size(args) <= index) +    return false; +  PyObject* seq = PyTuple_GetItem(args, index); +  //TODO: Typecheck +  int seq_len = PyList_Size(seq); +  for(int i = 0; i < seq_len; i++) { +    PyObject* item = PyList_GetItem(seq, i); +    set.insert(PyInt_AsLong(item)); +  } +  return true; +} +static bool tointmatrix(PyObject* args, int index, std::vector<std::vector<int> >& mat) { +  if(PyTuple_Size(args) <= index) +    return false; +  PyObject* seq_one = PyTuple_GetItem(args, index); +  int seq_one_len = PyList_Size(seq_one); +  for(int i = 0; i < seq_one_len; i++) { +    std::vector<int> vec; +    PyObject* seq_two = PyList_GetItem(seq_one, i); +    int seq_two_len = PyList_Size(seq_two); +    for(int j = 0; j < seq_two_len; j++) { +      PyObject* item = PyList_GetItem(seq_two, j); +      vec.push_back(PyInt_AsLong(item)); +    } +    mat.push_back(vec); +  } +  return true; +} + +// ------------------------- // +// CHiLL interface functions // +// ------------------------- // + +static PyObject* chill_source(PyObject* self, PyObject* args) { +  strict_arg_num(args, 1, "source"); +  source_filename = strArg(args, 0); +  Py_RETURN_NONE; +} + +static PyObject* chill_procedure(PyObject* self, PyObject* args) { +  if(!procedure_name.empty()) { +    fprintf(stderr, "only one procedure can be handled in a script"); +    if(!is_interactive) +      exit(2); +  } +  procedure_name = strArg(args, 0); +  Py_RETURN_NONE; +} + +static PyObject* chill_loop(PyObject* self, PyObject* args) { +  // loop (n) +  // loop (n:m) +   +  int nargs = PyTuple_Size(args); +  int start_num; +  int end_num; +  if(nargs == 1) { +    start_num = intArg(args, 0); +    end_num = start_num; +  } +  else if(nargs == 2) { +    start_num = intArg(args, 0); +    end_num = intArg(args, 1); +  } +  else { +    fprintf(stderr, "loop takes one or two arguments"); +    if(!is_interactive) +      exit(2); +  } +  set_loop_num_start(start_num); +  set_loop_num_end(end_num); +  init_loop(start_num, end_num); +  Py_RETURN_NONE; +} + +static PyObject* chill_print_code(PyObject* self, PyObject* args) { +  strict_arg_num(args, 0, "print_code"); +  myloop->printCode(); +  printf("\n"); +  Py_RETURN_NONE; +} + +static PyObject* chill_print_dep(PyObject* self, PyObject* args) { +  strict_arg_num(args, 0, "print_dep"); +  myloop->printDependenceGraph(); +  Py_RETURN_NONE; +} + +static PyObject* chill_print_space(PyObject* self, PyObject* args) { +  strict_arg_num(args, 0, "print_space"); +  myloop->printIterationSpace(); +  Py_RETURN_NONE; +} + +static PyObject* chill_exit(PyObject* self, PyObject* args) { +  strict_arg_num(args, 0, "exit"); +  repl_stop = true; +  Py_RETURN_NONE; +} + +static void add_known(std::string cond_expr) { +  int num_dim = myloop->known.n_set(); +  std::vector<std::map<std::string, int> >* cond; +  // TODO since we are using python, change this! +  cond = parse_relation_vector(cond_expr.c_str()); +   +  Relation rel(num_dim); +  F_And *f_root = rel.add_and(); +  for (int j = 0; j < cond->size(); j++) { +    GEQ_Handle h = f_root->add_GEQ(); +    for (std::map<std::string, int>::iterator it = (*cond)[j].begin(); it != (*cond)[j].end(); it++) { +      try { +        int dim = from_string<int>(it->first); +        if (dim == 0) +          h.update_const(it->second); +        else +          throw std::invalid_argument("only symbolic variables are allowed in known condition"); +      } +      catch (std::ios::failure e) { +        Free_Var_Decl *g = NULL; +        for (unsigned i = 0; i < myloop->freevar.size(); i++) { +          std::string name = myloop->freevar[i]->base_name(); +          if (name == it->first) { +            g = myloop->freevar[i]; +            break; +          } +        } +        if (g == NULL) +          throw std::invalid_argument("symbolic variable " + it->first + " not found"); +        else +          h.update_coef(rel.get_local(g), it->second); +      } +    } +  } +  myloop->addKnown(rel); +} + +static PyObject* chill_known(PyObject* self, PyObject* args) { +  strict_arg_num(args, 1, "known"); +  if (PyList_Check(PyTuple_GetItem(args, 0))) { +    PyObject* list = PyTuple_GetItem(args, 0); +    for (int i = 0; i < PyList_Size(list); i++) { +      add_known(std::string(PyString_AsString(PyList_GetItem(list, i)))); +    } +  } +  else { +    add_known(strArg(args, 0)); +  } +  Py_RETURN_NONE; +} + +static PyObject* chill_remove_dep(PyObject* self, PyObject* args) { +  strict_arg_num(args, 0, "remove_dep"); +  int from = intArg(args, 0); +  int to = intArg(args, 1); +  myloop->removeDependence(from, to); +  Py_RETURN_NONE; +} + +static PyObject* chill_original(PyObject* self, PyObject* args) { +  strict_arg_num(args, 0, "original"); +  myloop->original(); +  Py_RETURN_NONE; +} + +static PyObject* chill_permute(PyObject* self, PyObject* args) { +  int nargs = strict_arg_range(args, 1, 3, "permute"); +  if((nargs < 1) || (nargs > 3)) +    throw std::runtime_error("incorrect number of arguments in permute"); +  if(nargs == 1) { +    // premute ( vector ) +     std::vector<int> pi; +    if(!tointvector(args, 0, pi)) +      throw std::runtime_error("first arg in permute(pi) must be an int vector"); +    myloop->permute(pi); +  } +  else if (nargs == 2) { +    // permute ( set, vector ) +    std::set<int> active; +    std::vector<int> pi; +    if(!tointset(args, 0, active)) +      throw std::runtime_error("the first argument in permute(active, pi) must be an int set"); +    if(!tointvector(args, 1, pi)) +      throw std::runtime_error("the second argument in permute(active, pi) must be an int vector"); +     myloop->permute(active, pi); +  } +  else if (nargs == 3) { +    int stmt_num = intArg(args, 1); +    int level = intArg(args, 2); +    std::vector<int> pi; +    if(!tointvector(args, 3, pi)) +      throw std::runtime_error("the third argument in permute(stmt_num, level, pi) must be an int vector"); +    myloop->permute(stmt_num, level, pi); +  } +  Py_RETURN_NONE; +} + +static PyObject* chill_pragma(PyObject* self, PyObject* args) { +  strict_arg_num(args, 3, "pragma"); +  int stmt_num = intArg(args, 1); +  int level = intArg(args, 1); +  std::string pragmaText = strArg(args, 2); +  myloop->pragma(stmt_num, level, pragmaText); +  Py_RETURN_NONE; +} + +static PyObject* chill_prefetch(PyObject* self, PyObject* args) { +  strict_arg_num(args, 3, "prefetch"); +  int stmt_num = intArg(args, 0); +  int level = intArg(args, 1); +  std::string prefetchText = strArg(args, 2); +  int hint = intArg(args, 3); +  myloop->prefetch(stmt_num, level, prefetchText, hint); +  Py_RETURN_NONE; +} + +static PyObject* chill_tile(PyObject* self, PyObject* args) { +  int nargs = strict_arg_range(args, 3, 7, "tile"); +  int stmt_num = intArg(args, 0); +  int level = intArg(args, 1); +  int tile_size = intArg(args, 2); +  if(nargs == 3) { +    myloop->tile(stmt_num, level, tile_size); +  } +  else if(nargs >= 4) { +    int outer_level = intArg(args, 3); +    if(nargs >= 5) { +      TilingMethodType method = StridedTile; +      int imethod = intArg(args, 4, 2); //< don't know if a default value is needed +      // check method input against expected values +      if (imethod == 0) +        method = StridedTile; +      else if (imethod == 1) +        method = CountedTile; +      else +        throw std::runtime_error("5th argument must be either strided or counted"); +      if(nargs >= 6) { +        int alignment_offset = intArg(args, 5); +        if(nargs == 7) { +          int alignment_multiple = intArg(args, 6, 1); +          myloop->tile(stmt_num, level, tile_size, outer_level, method, alignment_offset, alignment_multiple); +        } +        if(nargs == 6) +          myloop->tile(stmt_num, level, tile_size, outer_level, method, alignment_offset); +      } +      if(nargs == 5) +        myloop->tile(stmt_num, level, tile_size, outer_level, method); +    } +  if(nargs == 4) +    myloop->tile(stmt_num, level, tile_size, outer_level); +  } +  Py_RETURN_NONE; +} + +static void chill_datacopy_vec(PyObject* args) { +  // Overload 1: bool datacopy( +  //    const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, +  //    int level, +  //    bool allow_extra_read = false, +  //    int fastest_changing_dimension = -1, +  //    int padding_stride = 1, +  //    int padding_alignment = 4, +  //    int memory_type = 0); +  std::vector<std::pair<int, std::vector<int> > > array_ref_nums; +  // expect list(tuple(int,list(int))) +  // or dict(int,list(int)) +  if(PyList_CheckExact(PyTuple_GetItem(args, 0))) { +    PyObject* list = PyTuple_GetItem(args, 0); +    for(int i = 0; i < PyList_Size(list); i ++) { +      PyObject* tup = PyList_GetItem(list, i); +      int index = PyLong_AsLong(PyTuple_GetItem(tup, 0)); +      std::vector<int> vec; +      tointvector(PyTuple_GetItem(tup, 1), vec); +      array_ref_nums.push_back(std::pair<int, std::vector<int> >(index, vec)); +    } +  } +  else if(PyList_CheckExact(PyTuple_GetItem(args, 0))) { +    PyObject* dict = PyTuple_GetItem(args, 0); +    PyObject* klist = PyDict_Keys(dict); +    for(int ki = 0; ki < PyList_Size(klist); ki++) { +      PyObject* index = PyList_GetItem(klist, ki); +      std::vector<int> vec; +      tointvector(PyDict_GetItem(dict,index), vec); +      array_ref_nums.push_back(std::pair<int, std::vector<int> >(PyLong_AsLong(index), vec)); +    } +    Py_DECREF(klist); +  } +  else { +    //TODO: this should never happen +  } +  int level = intArg(args, 1); +  bool allow_extra_read = boolArg(args, 2, false); +  int fastest_changing_dimension = intArg(args, 3, -1); +  int padding_stride = intArg(args, 4, 1); +  int padding_alignment = intArg(args, 5, 4); +  int memory_type = intArg(args, 6, 0); +  myloop->datacopy(array_ref_nums, level, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +static void chill_datacopy_int(PyObject* args) { +  int stmt_num = intArg(args, 0); +  int level = intArg(args, 1); +  std::string array_name = strArg(args,2,0); +  bool allow_extra_read = boolArg(args,3,false); +  int fastest_changing_dimension = intArg(args, 4, -1); +  int padding_stride = intArg(args, 5, 1); +  int padding_alignment = intArg(args, 6, 4); +  int memory_type = intArg(args, 7, 0); +  myloop->datacopy(stmt_num, level, array_name, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +static PyObject* chill_datacopy(PyObject* self, PyObject* args) { +  // Overload 2: bool datacopy(int stmt_num, int level, const std::string &array_name, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); +  int nargs = strict_arg_range(args, 3, 7, "datacopy"); +  if(PyList_CheckExact(PyTuple_GetItem(args,0)) || PyDict_CheckExact(PyTuple_GetItem(args, 0))) { +    chill_datacopy_vec(args); +  } +  else { +    chill_datacopy_int(args); +  } +  Py_RETURN_NONE; +} + +static PyObject* chill_datacopy_privatized(PyObject* self, PyObject* args) { +  //  bool datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, int memory_type = 0); +  int nargs = strict_arg_range(args, 4, 9, "datacopy_privatized"); +  int stmt_num = intArg(args, 0); +  int level = intArg(args, 1); +  std::string array_name = strArg(args, 2); +  std::vector<int> privatized_levels; +  tointvector(args, 3, privatized_levels); +  bool allow_extra_read = boolArg(args, 4, false); +  int fastest_changing_dimension = intArg(args, 5, -1); +  int padding_stride = intArg(args, 6, 1); +  int padding_alignment = intArg(args, 7, 1); +  int memory_type = intArg(args, 8); +  myloop->datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +  Py_RETURN_NONE; +} + +static PyObject* chill_unroll(PyObject* self, PyObject* args) { +  int nargs = strict_arg_range(args, 3, 4, "unroll"); +  //std::set<int> unroll(int stmt_num, int level, int unroll_amount, std::vector< std::vector<std::string> >idxNames= std::vector< std::vector<std::string> >(), int cleanup_split_level = 0); +  int stmt_num = intArg(args, 0); +  int level = intArg(args, 1); +  int unroll_amount = intArg(args, 2); +  std::vector< std::vector<std::string> > idxNames = std::vector< std::vector<std::string> >(); +  int cleanup_split_level = intArg(args, 3); +  myloop->unroll(stmt_num, level, unroll_amount, idxNames, cleanup_split_level); +  Py_RETURN_NONE; +} +   +static PyObject* chill_unroll_extra(PyObject* self, PyObject* args) { +  int nargs = strict_arg_range(args, 3, 4, "unroll_extra"); +  int stmt_num = intArg(args, 0); +  int level = intArg(args, 1); +  int unroll_amount = intArg(args, 2); +  int cleanup_split_level = intArg(args, 3, 0); +  myloop->unroll_extra(stmt_num, level, unroll_amount, cleanup_split_level);  +  Py_RETURN_NONE; +} +   +static PyObject* chill_split(PyObject* self, PyObject* args) { +  strict_arg_num(args, 3, "split"); +  int stmt_num = intArg(args, 0); +  int level = intArg(args, 1); +  int num_dim = myloop->stmt[stmt_num].xform.n_out(); +   +  std::vector<std::map<std::string, int> >* cond; +  std::string cond_expr = strArg(args, 2); +  cond = parse_relation_vector(cond_expr.c_str()); +   +  Relation rel((num_dim-1)/2); +  F_And *f_root = rel.add_and(); +  for (int j = 0; j < cond->size(); j++) { +    GEQ_Handle h = f_root->add_GEQ(); +    for (std::map<std::string, int>::iterator it = (*cond)[j].begin(); it != (*cond)[j].end(); it++) { +      try { +        int dim = from_string<int>(it->first); +        if (dim == 0) +          h.update_const(it->second); +        else { +          if (dim > (num_dim-1)/2) +            throw std::invalid_argument("invalid loop level " + to_string(dim) + " in split condition"); +          h.update_coef(rel.set_var(dim), it->second); +        } +      } +      catch (std::ios::failure e) { +        Free_Var_Decl *g = NULL; +        for (unsigned i = 0; i < myloop->freevar.size(); i++) { +          std::string name = myloop->freevar[i]->base_name(); +          if (name == it->first) { +            g = myloop->freevar[i]; +            break; +          } +        } +        if (g == NULL) +          throw std::invalid_argument("unrecognized variable " + to_string(it->first.c_str())); +        h.update_coef(rel.get_local(g), it->second); +      } +    } +  } +  myloop->split(stmt_num,level,rel); +  Py_RETURN_NONE; +} + +static PyObject* chill_nonsingular(PyObject* self, PyObject* args) { +  std::vector< std::vector<int> > mat; +  tointmatrix(args, 0, mat); +  myloop->nonsingular(mat); +  Py_RETURN_NONE; +} + +static PyObject* chill_skew(PyObject* self, PyObject* args) { +  std::set<int> stmt_nums; +  std::vector<int> skew_amounts; +  int level = intArg(args, 1); +  tointset(args, 0, stmt_nums); +  tointvector(args, 2, skew_amounts); +  myloop->skew(stmt_nums, level, skew_amounts); +  Py_RETURN_NONE; +} + +static PyObject* chill_scale(PyObject* self, PyObject* args) { +  strict_arg_num(args, 3); +  std::set<int> stmt_nums; +  int level = intArg(args, 1); +  int scale_amount = intArg(args, 2); +  tointset(args, 0, stmt_nums); +  myloop->scale(stmt_nums, level, scale_amount); +  Py_RETURN_NONE; +} + +static PyObject* chill_reverse(PyObject* self, PyObject* args) { +  strict_arg_num(args, 2); +  std::set<int> stmt_nums; +  int level = intArg(args, 1); +  tointset(args, 0, stmt_nums); +  myloop->reverse(stmt_nums, level); +  Py_RETURN_NONE; +} + +static PyObject* chill_shift(PyObject* self, PyObject* args) { +  strict_arg_num(args, 3); +  std::set<int> stmt_nums; +  int level = intArg(args, 1); +  int shift_amount = intArg(args, 2); +  tointset(args, 0, stmt_nums); +  myloop->shift(stmt_nums, level, shift_amount); +  Py_RETURN_NONE; +} + +static PyObject* chill_shift_to(PyObject* self, PyObject* args) { +  strict_arg_num(args, 3); +  int stmt_num = intArg(args, 0); +  int level = intArg(args, 1); +  int absolute_pos = intArg(args, 2); +  myloop->shift_to(stmt_num, level, absolute_pos); +  Py_RETURN_NONE; +} + +static PyObject* chill_peel(PyObject* self, PyObject* args) { +  strict_arg_range(args, 2, 3); +  int stmt_num = intArg(args, 0); +  int level = intArg(args, 1); +  int amount = intArg(args, 2); +  myloop->peel(stmt_num, level, amount); +  Py_RETURN_NONE; +} + +static PyObject* chill_fuse(PyObject* self, PyObject* args) { +  strict_arg_num(args, 2); +  std::set<int> stmt_nums; +  int level = intArg(args, 1); +  tointset(args, 0, stmt_nums); +  myloop->fuse(stmt_nums, level); +  Py_RETURN_NONE; +} + +static PyObject* chill_distribute(PyObject* self, PyObject* args) { +  strict_arg_num(args, 2); +  std::set<int> stmts; +  int level = intArg(args, 1); +  tointset(args, 0, stmts); +  myloop->distribute(stmts, level); +  Py_RETURN_NONE; +} + +static PyObject * +chill_num_statements(PyObject *self, PyObject *args)   +{ +  //DEBUG_PRINT("\nC chill_num_statements() called from python\n");  +  int num = myloop->stmt.size(); +  //DEBUG_PRINT("C num_statement() = %d\n", num);  +  return Py_BuildValue( "i", num ); // BEWARE "d" is DOUBLE, not int +} + +static PyMethodDef ChillMethods[] = {  +   +  //python name           C routine                  parameter passing comment +  {"source",              chill_source,                    METH_VARARGS,     "set source file for chill script"}, +  {"procedure",           chill_procedure,                 METH_VARARGS,     "set the name of the procedure"}, +  {"loop",                chill_loop,                      METH_VARARGS,     "indicate which loop to optimize"}, +  {"print_code",          chill_print_code,                METH_VARARGS,     "print generated code"}, +  {"print_dep",           chill_print_dep,                 METH_VARARGS,     "print the dependencies graph"}, +  {"print_space",         chill_print_space,               METH_VARARGS,     "print space"}, +  {"exit",                chill_exit,                      METH_VARARGS,     "exit the interactive consule"}, +  {"known",               chill_known,                     METH_VARARGS,     "knwon"}, +  {"remove_dep",          chill_remove_dep,                METH_VARARGS,     "remove dependency i suppose"}, +  {"original",            chill_original,                  METH_VARARGS,     "original"}, +  {"permute",             chill_permute,                   METH_VARARGS,     "permute"}, +  {"pragma",              chill_pragma,                    METH_VARARGS,     "pragma"}, +  {"prefetch",            chill_prefetch,                  METH_VARARGS,     "prefetch"}, +  {"tile",                chill_tile,                      METH_VARARGS,     "tile"}, +  {"datacopy",            chill_datacopy,                  METH_VARARGS,     "datacopy"}, +  {"datacopy_privitized", chill_datacopy_privatized,       METH_VARARGS,     "datacopy_privatized"}, +  {"unroll",              chill_unroll,                    METH_VARARGS,     "unroll"}, +  {"unroll_extra",        chill_unroll_extra,              METH_VARARGS,     "unroll_extra"}, +  {"split",               chill_split,                     METH_VARARGS,     "split"}, +  {"nonsingular",         chill_nonsingular,               METH_VARARGS,     "nonsingular"}, +  {"skew",                chill_skew,                      METH_VARARGS,     "skew"}, +  {"scale",               chill_scale,                     METH_VARARGS,     "scale"}, +  {"reverse",             chill_reverse,                   METH_VARARGS,     "reverse"}, +  {"shift",               chill_shift,                     METH_VARARGS,     "shift"}, +  {"shift_to",            chill_shift_to,                  METH_VARARGS,     "shift_to"}, +  {"peel",                chill_peel,                      METH_VARARGS,     "peel"}, +  {"fuse",                chill_fuse,                      METH_VARARGS,     "fuse"}, +  {"distribute",          chill_distribute,                METH_VARARGS,     "distribute"}, +  {"num_statements",      chill_num_statements,            METH_VARARGS,     "number of statements in the current loop"}, +  {NULL, NULL, 0, NULL} +}; + +static void register_globals(PyObject* m) { +  // Preset globals +  PyModule_AddStringConstant(m, "VERSION", CHILL_BUILD_VERSION); +  PyModule_AddStringConstant(m, "dest", "C"); +  PyModule_AddStringConstant(m, "C", "C"); +  // Tile method +  PyModule_AddIntConstant(m, "strided", 0); +  PyModule_AddIntConstant(m, "counted", 1); +  // Memory mode +  PyModule_AddIntConstant(m, "global", 0); +  PyModule_AddIntConstant(m, "shared", 1); +  PyModule_AddIntConstant(m, "textured", 2); +  // Bool flags +  PyModule_AddIntConstant(m, "sync", 1); +}  + +PyMODINIT_FUNC +initchill(void)    // pass C methods to python  +{ +  DEBUG_PRINT("in C, initchill() to set up C methods to be called from python\n"); +  PyObject* m = Py_InitModule("chill", ChillMethods); +  register_globals(m); +} diff --git a/src/dep.cc b/src/dep.cc new file mode 100644 index 0000000..a675d03 --- /dev/null +++ b/src/dep.cc @@ -0,0 +1,567 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Data dependence vector and graph. + + Notes: + All dependence vectors are normalized, i.e., the first non-zero distance + must be positve. Thus the correct dependence meaning can be given based on + source/destination pair's read/write type. Suppose for a dependence vector + 1, 0~5, -3), we want to permute the first and the second dimension, + the result would be two dependence vectors (0, 1, -3) and (1~5, 1, -3). + All operations on dependence vectors are non-destructive, i.e., new + dependence vectors are returned. + + History: + 01/2006 Created by Chun Chen. + 03/2009 Use IR_Ref interface in source and destination arrays -chun +*****************************************************************************/ + +#include "dep.hh" + +//----------------------------------------------------------------------------- +// Class: DependeceVector +//----------------------------------------------------------------------------- + +std::ostream& operator<<(std::ostream &os, const DependenceVector &d) { +  if (d.sym != NULL) { +    os << d.sym->name(); +    os << ':'; +    if (d.quasi) +      os << "_quasi"; +     +  } +   +  switch (d.type) { +  case DEP_W2R: +    os << "true"; +    if (d.is_reduction) +      os << "_reduction"; +    break; +  case DEP_R2W: +    os << "anti"; +    break; +  case DEP_W2W: +    os << "output"; +    break; +  case DEP_R2R: +    os << "input"; +    break; +  case DEP_CONTROL: +    os << "control"; +    break; +  default: +    os << "unknown"; +    break; +  } +   +  os << '('; +   +  for (int i = 0; i < d.lbounds.size(); i++) { +    omega::coef_t lbound = d.lbounds[i]; +    omega::coef_t ubound = d.ubounds[i]; +     +    if (lbound == ubound) +      os << lbound; +    else { +      if (lbound == -posInfinity) +        if (ubound == posInfinity) +          os << '*'; +        else { +          if (ubound == -1) +            os << '-'; +          else +            os << ubound << '-'; +        } +      else if (ubound == posInfinity) { +        if (lbound == 1) +          os << '+'; +        else +          os << lbound << '+'; +      } else +        os << lbound << '~' << ubound; +    } +     +    if (i < d.lbounds.size() - 1) +      os << ", "; +  } +   +  os << ')'; +   +  return os; +} + +// DependenceVector::DependenceVector(int size): +//   lbounds(std::vector<coef_t>(size, 0)), +//   ubounds(std::vector<coef_t>(size, 0)) { +//   src = NULL; +//   dst = NULL; +// } + +DependenceVector::DependenceVector(const DependenceVector &that) { +  if (that.sym != NULL) +    this->sym = that.sym->clone(); +  else +    this->sym = NULL; +  this->type = that.type; +  this->lbounds = that.lbounds; +  this->ubounds = that.ubounds; +  quasi = that.quasi; +  is_scalar_dependence = that.is_scalar_dependence; +  is_reduction = that.is_reduction; +} + +DependenceVector &DependenceVector::operator=(const DependenceVector &that) { +  if (this != &that) { +    delete this->sym; +    if (that.sym != NULL) +      this->sym = that.sym->clone(); +    else +      this->sym = NULL; +    this->type = that.type; +    this->lbounds = that.lbounds; +    this->ubounds = that.ubounds; +    quasi = that.quasi; +    is_scalar_dependence = that.is_scalar_dependence; +    is_reduction = that.is_reduction; +  } +  return *this; +} +DependenceType DependenceVector::getType() const { +  return type; +} + +bool DependenceVector::is_data_dependence() const { +  if (type == DEP_W2R || type == DEP_R2W || type == DEP_W2W +      || type == DEP_R2R) +    return true; +  else +    return false; +} + +bool DependenceVector::is_control_dependence() const { +  if (type == DEP_CONTROL) +    return true; +  else +    return false; +} + +bool DependenceVector::has_negative_been_carried_at(int dim) const { +  if (!is_data_dependence()) +    throw std::invalid_argument("only works for data dependences"); +   +  if (dim < 0 || dim >= lbounds.size()) +    return false; +   +  for (int i = 0; i < dim; i++) +    if (lbounds[i] > 0 || ubounds[i] < 0) +      return false; +   +  if (lbounds[dim] < 0) +    return true; +  else +    return false; +} + + +bool DependenceVector::has_been_carried_at(int dim) const { +  if (!is_data_dependence()) +    throw std::invalid_argument("only works for data dependences"); +   +  if (dim < 0 || dim >= lbounds.size()) +    return false; +   +  for (int i = 0; i < dim; i++) +    if (lbounds[i] > 0 || ubounds[i] < 0) +      return false; +   +  if ((lbounds[dim] != 0)  || (ubounds[dim] !=0)) +    return true; +   +  return false; +} + +bool DependenceVector::has_been_carried_before(int dim) const { +  if (!is_data_dependence()) +    throw std::invalid_argument("only works for data dependences"); +   +  if (dim < 0) +    return false; +  if (dim > lbounds.size()) +    dim = lbounds.size(); +   +  for (int i = 0; i < dim; i++) { +    if (lbounds[i] > 0) +      return true; +    if (ubounds[i] < 0) +      return true; +  } +   +  return false; +} + +bool DependenceVector::isZero() const { +  return isZero(lbounds.size() - 1); +} + +bool DependenceVector::isZero(int dim) const { +  if (dim >= lbounds.size()) +    throw std::invalid_argument("invalid dependence dimension"); +   +  for (int i = 0; i <= dim; i++) +    if (lbounds[i] != 0 || ubounds[i] != 0) +      return false; +   +  return true; +} + +bool DependenceVector::isPositive() const { +  for (int i = 0; i < lbounds.size(); i++) +    if (lbounds[i] != 0 || ubounds[i] != 0) { +      if (lbounds[i] < 0) +        return false; +      else if (lbounds[i] > 0) +        return true; +    } +   +  return false; +} + +bool DependenceVector::isNegative() const { +  for (int i = 0; i < lbounds.size(); i++) +    if (lbounds[i] != 0 || ubounds[i] != 0) { +      if (ubounds[i] > 0) +        return false; +      else if (ubounds[i] < 0) +        return true; +    } +   +  return false; +} + +bool DependenceVector::isAllPositive() const { +  for (int i = 0; i < lbounds.size(); i++) +    if (lbounds[i] < 0) +      return false; +   +  return true; +} + +bool DependenceVector::isAllNegative() const { +  for (int i = 0; i < ubounds.size(); i++) +    if (ubounds[i] > 0) +      return false; +   +  return true; +} + +bool DependenceVector::hasPositive(int dim) const { +  if (dim >= lbounds.size()) +    throw std::invalid_argument("invalid dependence dimension"); +   +  if (lbounds[dim] > 0) +    //av: changed from ubounds to lbounds may have side effects +    return true; +  else +    return false; +} + +bool DependenceVector::hasNegative(int dim) const { +  if (dim >= lbounds.size()) +    throw std::invalid_argument("invalid dependence dimension"); +   +  if (ubounds[dim] < 0) +    //av: changed from lbounds to ubounds may have side effects +    return true; +  else +    return false; +} + +bool DependenceVector::isCarried(int dim, omega::coef_t distance) const { +  if (distance <= 0) +    throw std::invalid_argument("invalid dependence distance size"); +   +  if (dim > lbounds.size()) +    dim = lbounds.size(); +   +  for (int i = 0; i < dim; i++) +    if (lbounds[i] > 0) +      return false; +    else if (ubounds[i] < 0) +      return false; +   +  if (dim >= lbounds.size()) +    return true; +   +  if (lbounds[dim] > distance) +    return false; +  else if (ubounds[dim] < -distance) +    return false; +   +  return true; +} + +bool DependenceVector::canPermute(const std::vector<int> &pi) const { +  if (pi.size() != lbounds.size()) +    throw std::invalid_argument( +      "permute dimensionality do not match dependence space"); +   +  for (int i = 0; i < pi.size(); i++) { +    if (lbounds[pi[i]] > 0) +      return true; +    else if (lbounds[pi[i]] < 0) +      return false; +  } +   +  return true; +} + +std::vector<DependenceVector> DependenceVector::normalize() const { +  std::vector<DependenceVector> result; +   +  DependenceVector dv(*this); +  for (int i = 0; i < dv.lbounds.size(); i++) { +    if (dv.lbounds[i] < 0 && dv.ubounds[i] >= 0) { +      omega::coef_t t = dv.ubounds[i]; +      dv.ubounds[i] = -1; +      result.push_back(dv); +      dv.lbounds[i] = 0; +      dv.ubounds[i] = t; +    } +    if (dv.lbounds[i] == 0 && dv.ubounds[i] > 0) { +      dv.lbounds[i] = 1; +      result.push_back(dv); +      dv.lbounds[i] = 0; +      dv.ubounds[i] = 0; +    } +    if (dv.lbounds[i] == 0 && dv.ubounds[i] == 0) +      continue; +    else +      break; +  } +   +  result.push_back(dv); +  return result; +} + +std::vector<DependenceVector> DependenceVector::permute( +  const std::vector<int> &pi) const { +  if (pi.size() != lbounds.size()) +    throw std::invalid_argument( +      "permute dimensionality do not match dependence space"); +   +  const int n = lbounds.size(); +   +  DependenceVector dv(*this); +  for (int i = 0; i < n; i++) { +    dv.lbounds[i] = lbounds[pi[i]]; +    dv.ubounds[i] = ubounds[pi[i]]; +  } +   +  int violated = 0; +   +  for (int i = 0; i < n; i++) { +    if (dv.lbounds[i] > 0) +      break; +    else if (dv.lbounds[i] < 0) +      violated = 1; +  } +   +  if (((violated == 1) && !quasi) && !is_scalar_dependence) { +    throw ir_error("dependence violation"); +     +  } +   +  return dv.normalize(); +} + +DependenceVector DependenceVector::reverse() const { +  const int n = lbounds.size(); +   +  DependenceVector dv(*this); +  switch (type) { +  case DEP_W2R: +    dv.type = DEP_R2W; +    break; +  case DEP_R2W: +    dv.type = DEP_W2R; +    break; +  default: +    dv.type = type; +  } +   +  for (int i = 0; i < n; i++) { +    dv.lbounds[i] = -ubounds[i]; +    dv.ubounds[i] = -lbounds[i]; +  } +  dv.quasi = true; +   +  return dv; +} + +// std::vector<DependenceVector> DependenceVector::matrix(const std::vector<std::vector<int> > &M) const { +//   if (M.size() != lbounds.size()) +//     throw std::invalid_argument("(non)unimodular transformation dimensionality does not match dependence space"); + +//   const int n = lbounds.size(); +//   DependenceVector dv; +//   if (sym != NULL) +//     dv.sym = sym->clone(); +//   else +//     dv.sym = NULL; +//   dv.type = type; + +//   for (int i = 0; i < n; i++) { +//     assert(M[i].size() == n+1 || M[i].size() == n); + +//     omega::coef_t lb, ub; +//     if (M[i].size() == n+1) +//       lb = ub = M[i][n]; +//     else +//       lb = ub = 0; + +//     for (int j = 0; j < n; j++) { +//       int c = M[i][j]; +//       if (c == 0) +//         continue; + +//       if (c > 0) { +//         if (lbounds[j] == -posInfinity) +//           lb = -posInfinity; +//         else if (lb != -posInfinity) +//           lb += c * lbounds[j]; +//         if (ubounds[j] == posInfinity) +//           ub = posInfinity; +//         else if (ub != posInfinity) +//           ub += c * ubounds[j]; +//       } +//       else { +//         if (ubounds[j] == posInfinity) +//           lb = -posInfinity; +//         else if (lb != -posInfinity) +//           lb += c * ubounds[j]; +//         if (lbounds[j] == -posInfinity) +//           ub = posInfinity; +//         else if (ub != posInfinity) +//           ub += c * lbounds[j]; +//       } +//     } +//     dv.lbounds.push_back(lb); +//     dv.ubounds.push_back(ub); +//   } +//   dv.is_reduction = is_reduction; + +//   return dv.normalize(); +// } + +//----------------------------------------------------------------------------- +// Class: DependenceGraph +//----------------------------------------------------------------------------- + +DependenceGraph DependenceGraph::permute(const std::vector<int> &pi, +                                         const std::set<int> &active) const { +  DependenceGraph g; +   +  for (int i = 0; i < vertex.size(); i++) +    g.insert(vertex[i].first); +   +  for (int i = 0; i < vertex.size(); i++) +    for (EdgeList::const_iterator j = vertex[i].second.begin(); +         j != vertex[i].second.end(); j++) { +      if (active.empty() +          || (active.find(i) != active.end() +              && active.find(j->first) != active.end())) { +        for (int k = 0; k < j->second.size(); k++) { +          std::vector<DependenceVector> dv = j->second[k].permute(pi); +          g.connect(i, j->first, dv); +        } +      } else if (active.find(i) == active.end() +                 && active.find(j->first) == active.end()) { +        std::vector<DependenceVector> dv = j->second; +        g.connect(i, j->first, dv); +      } else { +        std::vector<DependenceVector> dv = j->second; +        for (int k = 0; k < dv.size(); k++) +          for (int d = 0; d < pi.size(); d++) +            if (pi[d] != d) { +              dv[k].lbounds[d] = -posInfinity; +              dv[k].ubounds[d] = posInfinity; +            } +        g.connect(i, j->first, dv); +      } +    } +   +  return g; +} + +// DependenceGraph DependenceGraph::matrix(const std::vector<std::vector<int> > &M) const { +//   DependenceGraph g; + +//   for (int i = 0; i < vertex.size(); i++) +//     g.insert(vertex[i].first); + +//   for (int i = 0; i < vertex.size(); i++) +//     for (EdgeList::const_iterator j = vertex[i].second.begin(); j != vertex[i].second.end(); j++) +//       for (int k = 0; k < j->second.size(); k++) +//         g.connect(i, j->first, j->second[k].matrix(M)); + +//   return g; +// } + +DependenceGraph DependenceGraph::subspace(int dim) const { +  DependenceGraph g; +   +  for (int i = 0; i < vertex.size(); i++) +    g.insert(vertex[i].first); +   +  for (int i = 0; i < vertex.size(); i++) +    for (EdgeList::const_iterator j = vertex[i].second.begin(); +         j != vertex[i].second.end(); j++) +       +      for (int k = 0; k < j->second.size(); k++) { +        if(j->second[k].type != DEP_CONTROL){ +          if (j->second[k].isCarried(dim)) +            g.connect(i, j->first, j->second[k]); +        }else +          g.connect(i, j->first, j->second[k]); +         +      } +   +  return g; +} + +bool DependenceGraph::isPositive() const { +  for (int i = 0; i < vertex.size(); i++) +    for (EdgeList::const_iterator j = vertex[i].second.begin(); +         j != vertex[i].second.end(); j++) +      for (int k = 0; k < j->second.size(); k++) +        if (!j->second[k].isPositive()) +          return false; +   +  return true; +} + +bool DependenceGraph::hasPositive(int dim) const { +  for (int i = 0; i < vertex.size(); i++) +    for (EdgeList::const_iterator j = vertex[i].second.begin(); +         j != vertex[i].second.end(); j++) +      for (int k = 0; k < j->second.size(); k++) +        if (!j->second[k].hasPositive(dim)) +          return false; +   +  return true; +} + +bool DependenceGraph::hasNegative(int dim) const { +  for (int i = 0; i < vertex.size(); i++) +    for (EdgeList::const_iterator j = vertex[i].second.begin(); +         j != vertex[i].second.end(); j++) +      for (int k = 0; k < j->second.size(); k++) +        if (!j->second[k].hasNegative(dim)) +          return false; +   +  return true; +} diff --git a/src/ir_rose.cc b/src/ir_rose.cc new file mode 100644 index 0000000..dc3eed8 --- /dev/null +++ b/src/ir_rose.cc @@ -0,0 +1,1756 @@ +/***************************************************************************** + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + CHiLL's rose interface. + + Notes: + Array supports mixed pointer and array type in a single declaration. + + History: + 02/23/2009 Created by Chun Chen. +*****************************************************************************/ +#include <string> +#include "ir_rose.hh" +#include "ir_rose_utils.hh" +#include "rose_attributes.h" +#include "CG_roseRepr.h" +#include "CG_roseBuilder.h" + +using namespace SageBuilder; +using namespace SageInterface; +using namespace omega; +// ---------------------------------------------------------------------------- +// Class: IR_roseScalarSymbol +// ---------------------------------------------------------------------------- + +std::string IR_roseScalarSymbol::name() const { +  return vs_->get_name().getString(); +} + +int IR_roseScalarSymbol::size() const { +  return (vs_->get_type()->memoryUsage()) / (vs_->get_type()->numberOfNodes()); +} + +bool IR_roseScalarSymbol::operator==(const IR_Symbol &that) const { +  if (typeid(*this) != typeid(that)) +    return false; +   +  const IR_roseScalarSymbol *l_that = +    static_cast<const IR_roseScalarSymbol *>(&that); +  return this->vs_ == l_that->vs_; +} + +IR_Symbol *IR_roseScalarSymbol::clone() const { +  return NULL; +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseArraySymbol +// ---------------------------------------------------------------------------- + +std::string IR_roseArraySymbol::name() const { +  return (vs_->get_declaration()->get_name().getString()); +} + +int IR_roseArraySymbol::elem_size() const { +   +  SgType *tn = vs_->get_type(); +  SgType* arrType; +   +  int elemsize; +   +  if (arrType = isSgArrayType(tn)) { +    while (isSgArrayType(arrType)) { +      arrType = arrType->findBaseType(); +    } +  } else if (arrType = isSgPointerType(tn)) { +    while (isSgPointerType(arrType)) { +      arrType = arrType->findBaseType(); +    } +  } +   +  elemsize = (int) arrType->memoryUsage() / arrType->numberOfNodes(); +  return elemsize; +} + +int IR_roseArraySymbol::n_dim() const { +  int dim = 0; +  SgType* arrType = isSgArrayType(vs_->get_type()); +  SgType* ptrType = isSgPointerType(vs_->get_type()); +  if (arrType != NULL) { +    while (isSgArrayType(arrType)) { +      arrType = isSgArrayType(arrType)->get_base_type(); +      dim++; +    } +  } else if (ptrType != NULL) { +    while (isSgPointerType(ptrType)) { +      ptrType = isSgPointerType(ptrType)->get_base_type(); +      dim++; +    } +  } + +  // Manu:: fortran support +  if (static_cast<const IR_roseCode *>(ir_)->is_fortran_) { + +	  if (arrType != NULL) { +		  dim = 0; +		  SgExprListExp * dimList = isSgArrayType(vs_->get_type())->get_dim_info(); +		  SgExpressionPtrList::iterator it = dimList->get_expressions().begin(); +		  for(;it != dimList->get_expressions().end(); it++) { +		    dim++; +		  } +	  } else if (ptrType != NULL) { +		  //std::cout << "pntrType \n"; +		  ; // not sure if this case will happen +	  } +  } + +  return dim; +} + +omega::CG_outputRepr *IR_roseArraySymbol::size(int dim) const { +   +  SgArrayType* arrType = isSgArrayType(vs_->get_type()); +  // SgExprListExp* dimList = arrType->get_dim_info(); +  int count = 0; +  SgExpression* expr; +  SgType* pntrType = isSgPointerType(vs_->get_type()); +   +  if (arrType != NULL) { +    SgExprListExp* dimList = arrType->get_dim_info(); +    if (!static_cast<const IR_roseCode *>(ir_)->is_fortran_) { +      SgExpressionPtrList::iterator it = +        dimList->get_expressions().begin(); +       +      while ((it != dimList->get_expressions().end()) && (count < dim)) { +        it++; +        count++; +      } +       +      expr = *it; +    } else { +      SgExpressionPtrList::reverse_iterator i = +        dimList->get_expressions().rbegin(); +      for (; (i != dimList->get_expressions().rend()) && (count < dim); +           i++) { +         +        count++; +      } +       +      expr = *i; +    } +  } else if (pntrType != NULL) { +     +    while (count < dim) { +      pntrType = (isSgPointerType(pntrType))->get_base_type(); +      count++; +    } +    if (isSgPointerType(pntrType)) +      expr = new SgExpression; +  } +   +  if (!expr) +    throw ir_error("Index variable is NULL!!"); +   +  // Manu :: debug +  std::cout << "---------- size :: " << isSgNode(expr)->unparseToString().c_str() << "\n"; + +  return new omega::CG_roseRepr(expr); +   +} + +IR_ARRAY_LAYOUT_TYPE IR_roseArraySymbol::layout_type() const { +  if (static_cast<const IR_roseCode *>(ir_)->is_fortran_) +    return IR_ARRAY_LAYOUT_COLUMN_MAJOR; +  else +    return IR_ARRAY_LAYOUT_ROW_MAJOR; +   +} + +bool IR_roseArraySymbol::operator==(const IR_Symbol &that) const { +   +  if (typeid(*this) != typeid(that)) +    return false; +   +  const IR_roseArraySymbol *l_that = +    static_cast<const IR_roseArraySymbol *>(&that); +  return this->vs_ == l_that->vs_; +   +} + +IR_Symbol *IR_roseArraySymbol::clone() const { +  return new IR_roseArraySymbol(ir_, vs_); +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseConstantRef +// ---------------------------------------------------------------------------- + +bool IR_roseConstantRef::operator==(const IR_Ref &that) const { +   +  if (typeid(*this) != typeid(that)) +    return false; +   +  const IR_roseConstantRef *l_that = +    static_cast<const IR_roseConstantRef *>(&that); +   +  if (this->type_ != l_that->type_) +    return false; +   +  if (this->type_ == IR_CONSTANT_INT) +    return this->i_ == l_that->i_; +  else +    return this->f_ == l_that->f_; +   +} + +omega::CG_outputRepr *IR_roseConstantRef::convert() { +  if (type_ == IR_CONSTANT_INT) { +    omega::CG_roseRepr *result = new omega::CG_roseRepr( +      isSgExpression(buildIntVal(static_cast<int>(i_)))); +    delete this; +    return result; +  } else +    throw ir_error("constant type not supported"); +   +} + +IR_Ref *IR_roseConstantRef::clone() const { +  if (type_ == IR_CONSTANT_INT) +    return new IR_roseConstantRef(ir_, i_); +  else if (type_ == IR_CONSTANT_FLOAT) +    return new IR_roseConstantRef(ir_, f_); +  else +    throw ir_error("constant type not supported"); +   +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseScalarRef +// ---------------------------------------------------------------------------- + +bool IR_roseScalarRef::is_write() const { +  if (is_write_ == 1) +    return true; +   +  return false; +} + +IR_ScalarSymbol *IR_roseScalarRef::symbol() const { +  return new IR_roseScalarSymbol(ir_, vs_->get_symbol()); +} + +bool IR_roseScalarRef::operator==(const IR_Ref &that) const { +  if (typeid(*this) != typeid(that)) +    return false; +   +  const IR_roseScalarRef *l_that = +    static_cast<const IR_roseScalarRef *>(&that); +   +  if (this->ins_pos_ == NULL) +    return this->vs_ == l_that->vs_; +  else +    return this->ins_pos_ == l_that->ins_pos_ +      && this->op_pos_ == l_that->op_pos_; +} + +omega::CG_outputRepr *IR_roseScalarRef::convert() { +  omega::CG_roseRepr *result = new omega::CG_roseRepr(isSgExpression(vs_)); +  delete this; +  return result; +   +} + +IR_Ref * IR_roseScalarRef::clone() const { +  return new IR_roseScalarRef(ir_, vs_, this->is_write_); +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseArrayRef +// ---------------------------------------------------------------------------- + +bool IR_roseArrayRef::is_write() const { +  SgAssignOp* assignment; +   +  if (is_write_ == 1 || is_write_ == 0) +    return is_write_; +  if (assignment = isSgAssignOp(ia_->get_parent())) { +    if (assignment->get_lhs_operand() == ia_) +      return true; +  } else if (SgExprStatement* expr_stmt = isSgExprStatement( +               ia_->get_parent())) { +    SgExpression* exp = expr_stmt->get_expression(); +     +    if (exp) { +      if (assignment = isSgAssignOp(exp)) { +        if (assignment->get_lhs_operand() == ia_) +          return true; +         +      } +    } +     +  } +  return false; +} + +omega::CG_outputRepr *IR_roseArrayRef::index(int dim) const { +   +  SgExpression *current = isSgExpression(ia_); +  SgExpression* expr; +  int count = 0; +   +  while (isSgPntrArrRefExp(current)) { +    current = isSgPntrArrRefExp(current)->get_lhs_operand(); +    count++; +  } +   +  current = ia_; +   +  while (count > dim) { +    expr = isSgPntrArrRefExp(current)->get_rhs_operand(); +    current = isSgPntrArrRefExp(current)->get_lhs_operand(); +    count--; +  } + +  // Manu:: fortran support +  if (static_cast<const IR_roseCode *>(ir_)->is_fortran_) { +	  expr = isSgPntrArrRefExp(ia_)->get_rhs_operand(); +	  count = 0; +	  if (isSgExprListExp(expr)) { +		  SgExpressionPtrList::iterator indexList = isSgExprListExp(expr)->get_expressions().begin(); +		  while (count < dim) { +			  indexList++; +			  count++; +		  } +		  expr = isSgExpression(*indexList); +	  } +  } + +  if (!expr) +    throw ir_error("Index variable is NULL!!"); + + +  omega::CG_roseRepr* ind = new omega::CG_roseRepr(expr); +   +  return ind->clone(); +   +} + +IR_ArraySymbol *IR_roseArrayRef::symbol() const { +   +  SgExpression *current = isSgExpression(ia_); +   +  SgVarRefExp* base; +  SgVariableSymbol *arrSymbol; +  while (isSgPntrArrRefExp(current) || isSgUnaryOp(current)) { +    if (isSgPntrArrRefExp(current)) +      current = isSgPntrArrRefExp(current)->get_lhs_operand(); +    else if (isSgUnaryOp(current)) +      /* To handle support for addressof operator and pointer dereference +       * both of which are unary ops +       */ +      current = isSgUnaryOp(current)->get_operand(); +  } +  if (base = isSgVarRefExp(current)) { +    arrSymbol = (SgVariableSymbol*) (base->get_symbol()); +    std::string x = arrSymbol->get_name().getString(); +  } else +    throw ir_error("Array Symbol is not a variable?!"); +   +  return new IR_roseArraySymbol(ir_, arrSymbol); +   +} + +bool IR_roseArrayRef::operator==(const IR_Ref &that) const { +  if (typeid(*this) != typeid(that)) +    return false; +   +  const IR_roseArrayRef *l_that = static_cast<const IR_roseArrayRef *>(&that); +   +  return this->ia_ == l_that->ia_; +} + +omega::CG_outputRepr *IR_roseArrayRef::convert() { +  omega::CG_roseRepr *temp = new omega::CG_roseRepr( +    isSgExpression(this->ia_)); +  omega::CG_outputRepr *result = temp->clone(); +//  delete this;   // Commented by Manu +  return result; +} + +IR_Ref *IR_roseArrayRef::clone() const { +  return new IR_roseArrayRef(ir_, ia_, is_write_); +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseLoop +// ---------------------------------------------------------------------------- + +IR_ScalarSymbol *IR_roseLoop::index() const { +  SgForStatement *tf = isSgForStatement(tf_); +  SgFortranDo *tfortran = isSgFortranDo(tf_); +  SgVariableSymbol* vs = NULL; +  if (tf) { +    SgForInitStatement* list = tf->get_for_init_stmt(); +    SgStatementPtrList& initStatements = list->get_init_stmt(); +    SgStatementPtrList::const_iterator j = initStatements.begin(); +     +    if (SgExprStatement *expr = isSgExprStatement(*j)) +      if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) +        if (SgVarRefExp* var_ref = isSgVarRefExp(op->get_lhs_operand())) +          vs = var_ref->get_symbol(); +  } else if (tfortran) { +    SgExpression* init = tfortran->get_initialization(); +     +    if (SgAssignOp* op = isSgAssignOp(init)) +      if (SgVarRefExp* var_ref = isSgVarRefExp(op->get_lhs_operand())) +        vs = var_ref->get_symbol(); +     +  } +   +  if (vs == NULL) +    throw ir_error("Index variable is NULL!!"); +   +  return new IR_roseScalarSymbol(ir_, vs); +} + +omega::CG_outputRepr *IR_roseLoop::lower_bound() const { +  SgForStatement *tf = isSgForStatement(tf_); +  SgFortranDo *tfortran = isSgFortranDo(tf_); +   +  SgExpression* lowerBound = NULL; +   +  if (tf) { +    SgForInitStatement* list = tf->get_for_init_stmt(); +    SgStatementPtrList& initStatements = list->get_init_stmt(); +    SgStatementPtrList::const_iterator j = initStatements.begin(); +     +    if (SgExprStatement *expr = isSgExprStatement(*j)) +      if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) { +        lowerBound = op->get_rhs_operand(); +        //Rose sometimes introduces an unnecessary cast which is a unary op +        if (isSgUnaryOp(lowerBound)) +          lowerBound = isSgUnaryOp(lowerBound)->get_operand(); +         +      } +  } else if (tfortran) { +    SgExpression* init = tfortran->get_initialization(); +     +    if (SgAssignOp* op = isSgAssignOp(init)) +      lowerBound = op->get_rhs_operand(); +  } +   +  if (lowerBound == NULL) +    throw ir_error("Lower Bound is NULL!!"); +   +  return new omega::CG_roseRepr(lowerBound); +} + +omega::CG_outputRepr *IR_roseLoop::upper_bound() const { +  SgForStatement *tf = isSgForStatement(tf_); +  SgFortranDo *tfortran = isSgFortranDo(tf_); +  SgExpression* upperBound = NULL; +  if (tf) { +    SgBinaryOp* test_expr = isSgBinaryOp(tf->get_test_expr()); +    if (test_expr == NULL) +      throw ir_error("Test Expression is NULL!!"); +     +    upperBound = test_expr->get_rhs_operand(); +    //Rose sometimes introduces an unnecessary cast which is a unary op +    if (isSgUnaryOp(upperBound)) +      upperBound = isSgUnaryOp(upperBound)->get_operand(); +    if (upperBound == NULL) +      throw ir_error("Upper Bound is NULL!!"); +  } else if (tfortran) { +     +    upperBound = tfortran->get_bound(); +     +  } +   +  return new omega::CG_roseRepr(upperBound); +   +} + +IR_CONDITION_TYPE IR_roseLoop::stop_cond() const { +  SgForStatement *tf = isSgForStatement(tf_); +  SgFortranDo *tfortran = isSgFortranDo(tf_); +   +  if (tf) { +    SgExpression* stopCond = NULL; +    SgExpression* test_expr = tf->get_test_expr(); +     +    if (isSgLessThanOp(test_expr)) +      return IR_COND_LT; +    else if (isSgLessOrEqualOp(test_expr)) +      return IR_COND_LE; +    else if (isSgGreaterThanOp(test_expr)) +      return IR_COND_GT; +    else if (isSgGreaterOrEqualOp(test_expr)) +      return IR_COND_GE; +     +    else +      throw ir_error("loop stop condition unsupported"); +  } else if (tfortran) { +    SgExpression* increment = tfortran->get_increment(); +    if (!isSgNullExpression(increment)) { +      if (isSgMinusOp(increment) +          && !isSgBinaryOp(isSgMinusOp(increment)->get_operand())) +        return IR_COND_GE; +      else +        return IR_COND_LE; +    } else { +    	return IR_COND_LE; // Manu:: if increment is not present, assume it to be 1. Just a workaround, not sure if it will be correct for all cases. +      SgExpression* lowerBound = NULL; +      SgExpression* upperBound = NULL; +      SgExpression* init = tfortran->get_initialization(); +      SgIntVal* ub; +      SgIntVal* lb; +      if (SgAssignOp* op = isSgAssignOp(init)) +        lowerBound = op->get_rhs_operand(); +       +      upperBound = tfortran->get_bound(); +       +      if ((upperBound != NULL) && (lowerBound != NULL)) { +         +        if ((ub = isSgIntVal(isSgValueExp(upperBound))) && (lb = +                                                            isSgIntVal(isSgValueExp(lowerBound)))) { +          if (ub->get_value() > lb->get_value()) +            return IR_COND_LE; +          else +            return IR_COND_GE; +        } else +          throw ir_error("loop stop condition unsupported"); +         +      } else +        throw ir_error("malformed fortran loop bounds!!"); +       +    } +  } +   +} + +IR_Block *IR_roseLoop::body() const { +  SgForStatement *tf = isSgForStatement(tf_); +  SgFortranDo *tfortran = isSgFortranDo(tf_); +  SgNode* loop_body = NULL; +  SgStatement* body_statements = NULL; +   +  if (tf) { +    body_statements = tf->get_loop_body(); +  } else if (tfortran) { +    body_statements = isSgStatement(tfortran->get_body()); +     +  } +   +  loop_body = isSgNode(body_statements); +   +  SgStatementPtrList list; +  if (isSgBasicBlock(loop_body)) { +    list = isSgBasicBlock(loop_body)->get_statements(); +     +    if (list.size() == 1) +      loop_body = isSgNode(*(list.begin())); +  } +   +  if (loop_body == NULL) +    throw ir_error("for loop body is NULL!!"); +   +  return new IR_roseBlock(ir_, loop_body); +} + +int IR_roseLoop::step_size() const { +   +  SgForStatement *tf = isSgForStatement(tf_); +  SgFortranDo *tfortran = isSgFortranDo(tf_); +   +  if (tf) { +    SgExpression *increment = tf->get_increment(); +     +    if (isSgPlusPlusOp(increment)) +      return 1; +    if (isSgMinusMinusOp(increment)) +      return -1; +    else if (SgAssignOp* assignment = isSgAssignOp(increment)) { +      SgBinaryOp* stepsize = isSgBinaryOp(assignment->get_lhs_operand()); +      if (stepsize == NULL) +        throw ir_error("Step size expression is NULL!!"); +      SgIntVal* step = isSgIntVal(stepsize->get_lhs_operand()); +      return step->get_value(); +    } else if (SgBinaryOp* inc = isSgPlusAssignOp(increment)) { +      SgIntVal* step = isSgIntVal(inc->get_rhs_operand()); +      return (step->get_value()); +    } else if (SgBinaryOp * inc = isSgMinusAssignOp(increment)) { +      SgIntVal* step = isSgIntVal(inc->get_rhs_operand()); +      return -(step->get_value()); +    } else if (SgBinaryOp * inc = isSgCompoundAssignOp(increment)) { +      SgIntVal* step = isSgIntVal(inc->get_rhs_operand()); +      return (step->get_value()); +    } +     +  } else if (tfortran) { +     +    SgExpression* increment = tfortran->get_increment(); +     +    if (!isSgNullExpression(increment)) { +      if (isSgMinusOp(increment)) { +        if (SgValueExp *inc = isSgValueExp( +              isSgMinusOp(increment)->get_operand())) +          if (isSgIntVal(inc)) +            return -(isSgIntVal(inc)->get_value()); +      } else { +        if (SgValueExp* inc = isSgValueExp(increment)) +          if (isSgIntVal(inc)) +            return isSgIntVal(inc)->get_value(); +      } +    } else { +    	return 1; // Manu:: if increment is not present, assume it to be 1. Just a workaround, not sure if it will be correct for all cases. +      SgExpression* lowerBound = NULL; +      SgExpression* upperBound = NULL; +      SgExpression* init = tfortran->get_initialization(); +      SgIntVal* ub; +      SgIntVal* lb; +      if (SgAssignOp* op = isSgAssignOp(init)) +        lowerBound = op->get_rhs_operand(); +       +      upperBound = tfortran->get_bound(); +       +      if ((upperBound != NULL) && (lowerBound != NULL)) { +         +        if ((ub = isSgIntVal(isSgValueExp(upperBound))) && (lb = +                                                            isSgIntVal(isSgValueExp(lowerBound)))) { +          if (ub->get_value() > lb->get_value()) +            return 1; +          else +            return -1; +        } else +          throw ir_error("loop stop condition unsupported"); +         +      } else +        throw ir_error("loop stop condition unsupported"); +       +    } +     +  } +   +} + +IR_Block *IR_roseLoop::convert() { +  const IR_Code *ir = ir_; +  SgNode *tnl = isSgNode(tf_); +  delete this; +  return new IR_roseBlock(ir, tnl); +} + +IR_Control *IR_roseLoop::clone() const { +   +  return new IR_roseLoop(ir_, tf_); +   +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseBlock +// ---------------------------------------------------------------------------- + +omega::CG_outputRepr *IR_roseBlock::original() const { +   +  omega::CG_outputRepr * tnl; +   +  if (isSgBasicBlock(tnl_)) { +     +    SgStatementPtrList *bb = new SgStatementPtrList(); +    SgStatementPtrList::iterator it; +    for (it = (isSgBasicBlock(tnl_)->get_statements()).begin(); +         it != (isSgBasicBlock(tnl_)->get_statements()).end() +           && (*it != start_); it++) +      ; +     +    if (it != (isSgBasicBlock(tnl_)->get_statements()).end()) { +      for (; it != (isSgBasicBlock(tnl_)->get_statements()).end(); it++) { +        bb->push_back(*it); +        if ((*it) == end_) +          break; +      } +    } +    tnl = new omega::CG_roseRepr(bb); + +  } else { + +    tnl = new omega::CG_roseRepr(tnl_); + +  } +   +  return tnl; +   +} +omega::CG_outputRepr *IR_roseBlock::extract() const { +   +  std::string x = tnl_->unparseToString(); +   +  omega::CG_roseRepr * tnl; +   +  omega::CG_outputRepr* block; +   +  if (isSgBasicBlock(tnl_)) { +     +    SgStatementPtrList *bb = new SgStatementPtrList(); +    SgStatementPtrList::iterator it; +    for (it = (isSgBasicBlock(tnl_)->get_statements()).begin(); +         it != (isSgBasicBlock(tnl_)->get_statements()).end() +           && (*it != start_); it++) +      ; +     +    if (it != (isSgBasicBlock(tnl_)->get_statements()).end()) { +      for (; it != (isSgBasicBlock(tnl_)->get_statements()).end(); it++) { +        bb->push_back(*it); +        if ((*it) == end_) +          break; +      } +    } +    tnl = new omega::CG_roseRepr(bb); +    block = tnl->clone(); +     +  } else { +    tnl = new omega::CG_roseRepr(tnl_); +     +    block = tnl->clone(); +  } +   +  delete tnl; +  return block; +} + +IR_Control *IR_roseBlock::clone() const { +  return new IR_roseBlock(ir_, tnl_, start_, end_); +   +} +// ---------------------------------------------------------------------------- +// Class: IR_roseIf +// ---------------------------------------------------------------------------- +omega::CG_outputRepr *IR_roseIf::condition() const { +  SgNode *tnl = isSgNode(isSgIfStmt(ti_)->get_conditional()); +  SgExpression* exp = NULL; +  if (SgExprStatement* stmt = isSgExprStatement(tnl)) +    exp = stmt->get_expression(); +  if (exp == NULL) +    return new omega::CG_roseRepr(tnl); +  else +    return new omega::CG_roseRepr(exp); +} + +IR_Block *IR_roseIf::then_body() const { +  SgNode *tnl = isSgNode(isSgIfStmt(ti_)->get_true_body()); +   +  if (tnl == NULL) +    return NULL; +   +  return new IR_roseBlock(ir_, tnl); +} + +IR_Block *IR_roseIf::else_body() const { +  SgNode *tnl = isSgNode(isSgIfStmt(ti_)->get_false_body()); +   +  if (tnl == NULL) +    return NULL; +   +  return new IR_roseBlock(ir_, tnl); +} + +IR_Block *IR_roseIf::convert() { +  const IR_Code *ir = ir_; +  delete this; +  return new IR_roseBlock(ir, ti_); +} + +IR_Control *IR_roseIf::clone() const { +  return new IR_roseIf(ir_, ti_); +} + +// -----------------------------------------------------------y----------------- +// Class: IR_roseCode_Global_Init +// ---------------------------------------------------------------------------- + +IR_roseCode_Global_Init *IR_roseCode_Global_Init::pinstance = 0; + +IR_roseCode_Global_Init * IR_roseCode_Global_Init::Instance(char** argv) { +  if (pinstance == 0) { +    pinstance = new IR_roseCode_Global_Init; +    pinstance->project = frontend(2, argv); +     +  } +  return pinstance; +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseCode +// ---------------------------------------------------------------------------- + +IR_roseCode::IR_roseCode(const char *filename, const char* proc_name) : +  IR_Code() { +   +  SgProject* project; +   +  char* argv[2]; +  int counter = 0; +  argv[0] = (char*) malloc(5 * sizeof(char)); +  argv[1] = (char*) malloc((strlen(filename) + 1) * sizeof(char)); +  strcpy(argv[0], "rose"); +  strcpy(argv[1], filename); +   +  project = (IR_roseCode_Global_Init::Instance(argv))->project; +  firstScope = getFirstGlobalScope(project); +  SgFilePtrList& file_list = project->get_fileList(); +   +  for (SgFilePtrList::iterator it = file_list.begin(); it != file_list.end(); +       it++) { +    file = isSgSourceFile(*it); +    if (file->get_outputLanguage() == SgFile::e_Fortran_output_language) +      is_fortran_ = true; +    else +      is_fortran_ = false; + +    root = file->get_globalScope(); + +    if (!is_fortran_) { // Manu:: this macro should not be created if the input code is in fortran +    	buildCpreprocessorDefineDeclaration(root, +                                        "#define __rose_lt(x,y) ((x)<(y)?(x):(y))", +                                        PreprocessingInfo::before); +    	buildCpreprocessorDefineDeclaration(root, +                                        "#define __rose_gt(x,y) ((x)>(y)?(x):(y))", +                                        PreprocessingInfo::before); +    } +     +    symtab_ = isSgScopeStatement(root)->get_symbol_table(); +    SgDeclarationStatementPtrList& declList = root->get_declarations(); +     +    p = declList.begin(); + +    while (p != declList.end()) { +      func = isSgFunctionDeclaration(*p); +      if (func) { +        if (!strcmp((func->get_name().getString()).c_str(), proc_name)) +          break; +         +      } +      p++; +      counter++; +    } +    if (p != declList.end()) +      break; +     +  } +   +  symtab2_ = func->get_definition()->get_symbol_table(); +  symtab3_ = func->get_definition()->get_body()->get_symbol_table(); +  // Manu:: added is_fortran_ parameter +  // TODO Substitute it with a better builder +  ocg_ = new omega::CG_roseBuilder(is_fortran_, root, firstScope, +                                   func->get_definition()->get_symbol_table(), +                                   func->get_definition()->get_body()->get_symbol_table(), +                                   isSgNode(func->get_definition()->get_body())); +   +  i_ = 0; /*i_ handling may need revision */ +   +  free(argv[1]); +  free(argv[0]); +   +} + +IR_roseCode::~IR_roseCode() { +} + +void IR_roseCode::finalizeRose() { +  SgProject* project = (IR_roseCode_Global_Init::Instance(NULL))->project; +  project->unparse(); +} + +IR_ScalarSymbol *IR_roseCode::CreateScalarSymbol(const IR_Symbol *sym, int) { +  char str1[14]; +  if (typeid(*sym) == typeid(IR_roseScalarSymbol)) { +    SgType *tn = +      static_cast<const IR_roseScalarSymbol *>(sym)->vs_->get_type(); +    sprintf(str1, "newVariable%i", i_); +    SgVariableDeclaration* defn = buildVariableDeclaration(str1, tn); +    i_++; +     +    SgInitializedNamePtrList& variables = defn->get_variables(); +    SgInitializedNamePtrList::const_iterator i = variables.begin(); +    SgInitializedName* initializedName = *i; +    SgVariableSymbol* vs = new SgVariableSymbol(initializedName); +     +    prependStatement(defn, +                     isSgScopeStatement(func->get_definition()->get_body())); +    vs->set_parent(symtab_); +    symtab_->insert(str1, vs); +     +    if (vs == NULL) +      throw ir_error("in CreateScalarSymbol: vs is NULL!!"); +     +    return new IR_roseScalarSymbol(this, vs); +  } else if (typeid(*sym) == typeid(IR_roseArraySymbol)) { +    SgType *tn1 = +      static_cast<const IR_roseArraySymbol *>(sym)->vs_->get_type(); +    while (isSgArrayType(tn1) || isSgPointerType(tn1)) { +      if (isSgArrayType(tn1)) +        tn1 = isSgArrayType(tn1)->get_base_type(); +      else if (isSgPointerType(tn1)) +        tn1 = isSgPointerType(tn1)->get_base_type(); +      else +        throw ir_error( +          "in CreateScalarSymbol: symbol not an array nor a pointer!"); +    } +     +    sprintf(str1, "newVariable%i", i_); +    i_++; +     +    SgVariableDeclaration* defn1 = buildVariableDeclaration(str1, tn1); +    SgInitializedNamePtrList& variables1 = defn1->get_variables(); +     +    SgInitializedNamePtrList::const_iterator i1 = variables1.begin(); +    SgInitializedName* initializedName1 = *i1; +     +    SgVariableSymbol *vs1 = new SgVariableSymbol(initializedName1); +    prependStatement(defn1, +                     isSgScopeStatement(func->get_definition()->get_body())); +     +    vs1->set_parent(symtab_); +    symtab_->insert(str1, vs1); +     +    if (vs1 == NULL) +      throw ir_error("in CreateScalarSymbol: vs1 is NULL!!"); +     +    return new IR_roseScalarSymbol(this, vs1); +  } else +    throw std::bad_typeid(); +   +} + +IR_ArraySymbol *IR_roseCode::CreateArraySymbol(const IR_Symbol *sym, +                                               std::vector<omega::CG_outputRepr *> &size, int) { +  SgType *tn; +  char str1[14]; +   +  if (typeid(*sym) == typeid(IR_roseScalarSymbol)) { +    tn = static_cast<const IR_roseScalarSymbol *>(sym)->vs_->get_type(); +  } else if (typeid(*sym) == typeid(IR_roseArraySymbol)) { +    tn = static_cast<const IR_roseArraySymbol *>(sym)->vs_->get_type(); +    while (isSgArrayType(tn) || isSgPointerType(tn)) { +      if (isSgArrayType(tn)) +        tn = isSgArrayType(tn)->get_base_type(); +      else if (isSgPointerType(tn)) +        tn = isSgPointerType(tn)->get_base_type(); +      else +        throw ir_error( +          "in CreateScalarSymbol: symbol not an array nor a pointer!"); +    } +  } else +    throw std::bad_typeid(); + +   +  // Manu:: Fortran support +  std::vector<SgExpression *>exprs; +  SgExprListExp *exprLstExp; +  SgExpression* sizeExpression = new SgNullExpression(); +  SgArrayType* arrayType = new SgArrayType(tn,sizeExpression); +  sizeExpression->set_parent(arrayType); + +  if (!is_fortran_) { +	  for (int i = size.size() - 1; i >= 0; i--) { +		tn = buildArrayType(tn,static_cast<omega::CG_roseRepr *>(size[i])->GetExpression()); +	  } +  } else { // Manu:: required for fortran support +	  for (int i = size.size() - 1; i >= 0; i--) { +		exprs.push_back(static_cast<omega::CG_roseRepr *>(size[i])->GetExpression()); +	  } +  } + +  if (is_fortran_) { +	  exprLstExp = buildExprListExp(exprs); +	  arrayType->set_dim_info(exprLstExp); + 	  exprLstExp->set_parent(arrayType); + 	  arrayType->set_rank(exprLstExp->get_expressions().size()); +  } + +  static int rose_array_counter = 1; +  SgVariableDeclaration* defn2; +  std::string s; +  if (!is_fortran_) { +	  s = std::string("_P") + omega::to_string(rose_array_counter++); +	  defn2 = buildVariableDeclaration(const_cast<char *>(s.c_str()), tn); +  } else {// Manu:: fortran support +	  s = std::string("f_P") + omega::to_string(rose_array_counter++); +	  defn2 = buildVariableDeclaration(const_cast<char *>(s.c_str()), arrayType); +  } + + +  SgInitializedNamePtrList& variables2 = defn2->get_variables(); +   +  SgInitializedNamePtrList::const_iterator i2 = variables2.begin(); +  SgInitializedName* initializedName2 = *i2; +  SgVariableSymbol *vs = new SgVariableSymbol(initializedName2); +   +  prependStatement(defn2, +                   isSgScopeStatement(func->get_definition()->get_body())); +   +  vs->set_parent(symtab_); +  symtab_->insert(SgName(s.c_str()), vs); +   +  return new IR_roseArraySymbol(this, vs); +} + +IR_ScalarRef *IR_roseCode::CreateScalarRef(const IR_ScalarSymbol *sym) { +  return new IR_roseScalarRef(this, +                              buildVarRefExp(static_cast<const IR_roseScalarSymbol *>(sym)->vs_)); +   +} + +IR_ArrayRef *IR_roseCode::CreateArrayRef(const IR_ArraySymbol *sym, +                                         std::vector<omega::CG_outputRepr *> &index) { +   +  int t; +   +  if (sym->n_dim() != index.size()) +    throw std::invalid_argument("incorrect array symbol dimensionality"); +   +  const IR_roseArraySymbol *l_sym = +    static_cast<const IR_roseArraySymbol *>(sym); +   +  SgVariableSymbol *vs = l_sym->vs_; +  SgExpression* ia1 = buildVarRefExp(vs); +   + + +  if (is_fortran_) { // Manu:: fortran support +	  std::vector<SgExpression *>exprs; +	  for (int i = 0 ; i < index.size(); i++) { +		exprs.push_back(static_cast<omega::CG_roseRepr *>(index[i])->GetExpression()); +	  } +	  SgExprListExp *exprLstExp; +	  exprLstExp = buildExprListExp(exprs); +	  ia1 = buildPntrArrRefExp(ia1,exprLstExp); +  } else { +     for (int i = 0; i < index.size(); i++) { + +        ia1 = buildPntrArrRefExp(ia1, +                             static_cast<omega::CG_roseRepr *>(index[i])->GetExpression()); +     +     } +  } +   +  SgPntrArrRefExp *ia = isSgPntrArrRefExp(ia1); +   +  return new IR_roseArrayRef(this, ia, -1); +   +} + +std::vector<IR_ScalarRef *> IR_roseCode::FindScalarRef( +  const omega::CG_outputRepr *repr) const { +  std::vector<IR_ScalarRef *> scalars; +  SgNode *tnl = static_cast<const omega::CG_roseRepr *>(repr)->GetCode(); +  SgStatementPtrList *list = +    static_cast<const omega::CG_roseRepr *>(repr)->GetList(); +  SgStatement* stmt; +  SgExpression * exp; +   +  if (list != NULL) { +    for (SgStatementPtrList::iterator it = (*list).begin(); +         it != (*list).end(); it++) { +      omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(*it)); +      std::vector<IR_ScalarRef *> a = FindScalarRef(r); +      delete r; +      std::copy(a.begin(), a.end(), back_inserter(scalars)); +    } +  } +   +  else if (tnl != NULL) { +    if (stmt = isSgStatement(tnl)) { +      if (isSgBasicBlock(stmt)) { +        SgStatementPtrList& stmts = +          isSgBasicBlock(stmt)->get_statements(); +        for (int i = 0; i < stmts.size(); i++) { +          omega::CG_roseRepr *r = new omega::CG_roseRepr( +            isSgNode(stmts[i])); +          std::vector<IR_ScalarRef *> a = FindScalarRef(r); +          delete r; +          std::copy(a.begin(), a.end(), back_inserter(scalars)); +        } +         +      } else if (isSgForStatement(stmt)) { +         +        SgForStatement *tnf = isSgForStatement(stmt); +        omega::CG_roseRepr *r = new omega::CG_roseRepr( +          isSgStatement(tnf->get_loop_body())); +        std::vector<IR_ScalarRef *> a = FindScalarRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(scalars)); +      } else if (isSgFortranDo(stmt)) { +        SgFortranDo *tfortran = isSgFortranDo(stmt); +        omega::CG_roseRepr *r = new omega::CG_roseRepr( +          isSgStatement(tfortran->get_body())); +        std::vector<IR_ScalarRef *> a = FindScalarRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(scalars)); +      } else if (isSgIfStmt(stmt)) { +        SgIfStmt* tni = isSgIfStmt(stmt); +        omega::CG_roseRepr *r = new omega::CG_roseRepr( +          isSgNode(tni->get_conditional())); +        std::vector<IR_ScalarRef *> a = FindScalarRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(scalars)); +        r = new omega::CG_roseRepr(isSgNode(tni->get_true_body())); +        a = FindScalarRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(scalars)); +        r = new omega::CG_roseRepr(isSgNode(tni->get_false_body())); +        a = FindScalarRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(scalars)); +      } else if (isSgExprStatement(stmt)) { +        omega::CG_roseRepr *r = new omega::CG_roseRepr( +          isSgExpression( +            isSgExprStatement(stmt)->get_expression())); +        std::vector<IR_ScalarRef *> a = FindScalarRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(scalars)); +         +      } +    } +  } else { +    SgExpression* op = +      static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); +    if (isSgVarRefExp(op) +        && (!isSgArrayType(isSgVarRefExp(op)->get_type()))) { +      if (SgBinaryOp* op_ = isSgBinaryOp( +            isSgVarRefExp(op)->get_parent())) { +        if (SgCompoundAssignOp *op__ = isSgCompoundAssignOp(op_)) { +          if (isSgCompoundAssignOp(op_)->get_lhs_operand() +              == isSgVarRefExp(op)) { +            scalars.push_back( +              new IR_roseScalarRef(this, isSgVarRefExp(op), +                                   1)); +            scalars.push_back( +              new IR_roseScalarRef(this, isSgVarRefExp(op), +                                   0)); +          } +        } +      } else if (SgAssignOp* assmt = isSgAssignOp( +                   isSgVarRefExp(op)->get_parent())) { +         +        if (assmt->get_lhs_operand() == isSgVarRefExp(op)) +          scalars.push_back( +            new IR_roseScalarRef(this, isSgVarRefExp(op), 1)); +      } else if (SgAssignOp * assmt = isSgAssignOp( +                   isSgVarRefExp(op)->get_parent())) { +         +        if (assmt->get_rhs_operand() == isSgVarRefExp(op)) +          scalars.push_back( +            new IR_roseScalarRef(this, isSgVarRefExp(op), 0)); +      } else +        scalars.push_back( +          new IR_roseScalarRef(this, isSgVarRefExp(op), 0)); +    } else if (isSgAssignOp(op)) { +      omega::CG_roseRepr *r1 = new omega::CG_roseRepr( +        isSgAssignOp(op)->get_lhs_operand()); +      std::vector<IR_ScalarRef *> a1 = FindScalarRef(r1); +      delete r1; +      std::copy(a1.begin(), a1.end(), back_inserter(scalars)); +      omega::CG_roseRepr *r2 = new omega::CG_roseRepr( +        isSgAssignOp(op)->get_rhs_operand()); +      std::vector<IR_ScalarRef *> a2 = FindScalarRef(r2); +      delete r2; +      std::copy(a2.begin(), a2.end(), back_inserter(scalars)); +       +    } else if (isSgBinaryOp(op)) { +      omega::CG_roseRepr *r1 = new omega::CG_roseRepr( +        isSgBinaryOp(op)->get_lhs_operand()); +      std::vector<IR_ScalarRef *> a1 = FindScalarRef(r1); +      delete r1; +      std::copy(a1.begin(), a1.end(), back_inserter(scalars)); +      omega::CG_roseRepr *r2 = new omega::CG_roseRepr( +        isSgBinaryOp(op)->get_rhs_operand()); +      std::vector<IR_ScalarRef *> a2 = FindScalarRef(r2); +      delete r2; +      std::copy(a2.begin(), a2.end(), back_inserter(scalars)); +    } else if (isSgUnaryOp(op)) { +      omega::CG_roseRepr *r1 = new omega::CG_roseRepr( +        isSgUnaryOp(op)->get_operand()); +      std::vector<IR_ScalarRef *> a1 = FindScalarRef(r1); +      delete r1; +      std::copy(a1.begin(), a1.end(), back_inserter(scalars)); +    } +     +  } +  return scalars; +   +} + +std::vector<IR_ArrayRef *> IR_roseCode::FindArrayRef( +  const omega::CG_outputRepr *repr) const { +  std::vector<IR_ArrayRef *> arrays; +  SgNode *tnl = static_cast<const omega::CG_roseRepr *>(repr)->GetCode(); +  SgStatementPtrList* list = +    static_cast<const omega::CG_roseRepr *>(repr)->GetList(); +  SgStatement* stmt; +  SgExpression * exp; +   +  if (list != NULL) { +    for (SgStatementPtrList::iterator it = (*list).begin(); +         it != (*list).end(); it++) { +      omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(*it)); +      std::vector<IR_ArrayRef *> a = FindArrayRef(r); +      delete r; +      std::copy(a.begin(), a.end(), back_inserter(arrays)); +    } +  } else if (tnl != NULL) { +    if (stmt = isSgStatement(tnl)) { +      if (isSgBasicBlock(stmt)) { +        SgStatementPtrList& stmts = +          isSgBasicBlock(stmt)->get_statements(); +        for (int i = 0; i < stmts.size(); i++) { +          omega::CG_roseRepr *r = new omega::CG_roseRepr( +            isSgNode(stmts[i])); +          std::vector<IR_ArrayRef *> a = FindArrayRef(r); +          delete r; +          std::copy(a.begin(), a.end(), back_inserter(arrays)); +        } +         +      } else if (isSgForStatement(stmt)) { +         +        SgForStatement *tnf = isSgForStatement(stmt); +        omega::CG_roseRepr *r = new omega::CG_roseRepr( +          isSgStatement(tnf->get_loop_body())); +        std::vector<IR_ArrayRef *> a = FindArrayRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(arrays)); +      } else if (isSgFortranDo(stmt)) { +        SgFortranDo *tfortran = isSgFortranDo(stmt); +        omega::CG_roseRepr *r = new omega::CG_roseRepr( +          isSgStatement(tfortran->get_body())); +        std::vector<IR_ArrayRef *> a = FindArrayRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(arrays)); +      } else if (isSgIfStmt(stmt)) { +        SgIfStmt* tni = isSgIfStmt(stmt); +        omega::CG_roseRepr *r = new omega::CG_roseRepr( +          isSgNode(tni->get_conditional())); +        std::vector<IR_ArrayRef *> a = FindArrayRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(arrays)); +        r = new omega::CG_roseRepr(isSgNode(tni->get_true_body())); +        a = FindArrayRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(arrays)); +        r = new omega::CG_roseRepr(isSgNode(tni->get_false_body())); +        a = FindArrayRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(arrays)); +      } else if (isSgExprStatement(stmt)) { +        omega::CG_roseRepr *r = new omega::CG_roseRepr( +          isSgExpression( +            isSgExprStatement(stmt)->get_expression())); +        std::vector<IR_ArrayRef *> a = FindArrayRef(r); +        delete r; +        std::copy(a.begin(), a.end(), back_inserter(arrays)); +         +      } +    } +  } else { +    SgExpression* op = +      static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); +    if (isSgPntrArrRefExp(op)) { +       +      SgVarRefExp* base; +      SgExpression* op2; +      if (isSgCompoundAssignOp(isSgPntrArrRefExp(op)->get_parent())) { +        IR_roseArrayRef *ref1 = new IR_roseArrayRef(this, +                                                    isSgPntrArrRefExp(op), 0); +        arrays.push_back(ref1); +        IR_roseArrayRef *ref2 = new IR_roseArrayRef(this, +                                                    isSgPntrArrRefExp(op), 1); +        arrays.push_back(ref2); +      } else { +        IR_roseArrayRef *ref3 = new IR_roseArrayRef(this, +                                                    isSgPntrArrRefExp(op), -1); +        arrays.push_back(ref3); +         +        while (isSgPntrArrRefExp(op)) { +          op2 = isSgPntrArrRefExp(op)->get_rhs_operand(); +          op = isSgPntrArrRefExp(op)->get_lhs_operand(); +          omega::CG_roseRepr *r = new omega::CG_roseRepr(op2); +          std::vector<IR_ArrayRef *> a = FindArrayRef(r); +          delete r; +          std::copy(a.begin(), a.end(), back_inserter(arrays)); +           +        } +      } +    } else if (isSgAssignOp(op)) { +      omega::CG_roseRepr *r1 = new omega::CG_roseRepr( +        isSgAssignOp(op)->get_lhs_operand()); +      std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); +      delete r1; +      std::copy(a1.begin(), a1.end(), back_inserter(arrays)); +      omega::CG_roseRepr *r2 = new omega::CG_roseRepr( +        isSgAssignOp(op)->get_rhs_operand()); +      std::vector<IR_ArrayRef *> a2 = FindArrayRef(r2); +      delete r2; +      std::copy(a2.begin(), a2.end(), back_inserter(arrays)); +       +    } else if (isSgBinaryOp(op)) { +      omega::CG_roseRepr *r1 = new omega::CG_roseRepr( +        isSgBinaryOp(op)->get_lhs_operand()); +      std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); +      delete r1; +      std::copy(a1.begin(), a1.end(), back_inserter(arrays)); +      omega::CG_roseRepr *r2 = new omega::CG_roseRepr( +        isSgBinaryOp(op)->get_rhs_operand()); +      std::vector<IR_ArrayRef *> a2 = FindArrayRef(r2); +      delete r2; +      std::copy(a2.begin(), a2.end(), back_inserter(arrays)); +    } else if (isSgUnaryOp(op)) { +      omega::CG_roseRepr *r1 = new omega::CG_roseRepr( +        isSgUnaryOp(op)->get_operand()); +      std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); +      delete r1; +      std::copy(a1.begin(), a1.end(), back_inserter(arrays)); +    } +     +  } +  return arrays; +} + +std::vector<IR_Control *> IR_roseCode::FindOneLevelControlStructure( +  const IR_Block *block) const { + +  std::vector<IR_Control *> controls; +  int i; +  int j; +  int begin; +  int end; +  SgNode* tnl_ = +    ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_); +   +  if (isSgForStatement(tnl_)) +    controls.push_back(new IR_roseLoop(this, tnl_)); +  else if (isSgFortranDo(tnl_)) +	  controls.push_back(new IR_roseLoop(this, tnl_)); +  else if (isSgIfStmt(tnl_)) +    controls.push_back(new IR_roseIf(this, tnl_)); +   +  else if (isSgBasicBlock(tnl_)) { +     +    SgStatementPtrList& stmts = isSgBasicBlock(tnl_)->get_statements(); +     +    for (i = 0; i < stmts.size(); i++) { +      if (isSgNode(stmts[i]) +          == ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->start_)) +        begin = i; +      if (isSgNode(stmts[i]) +          == ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->end_)) +        end = i; +    } +     +    SgNode* start = NULL; +    SgNode* prev = NULL; +    for (i = begin; i <= end; i++) { +      if (isSgForStatement(stmts[i]) || isSgFortranDo(stmts[i])) { +        if (start != NULL) { +          controls.push_back( +            new IR_roseBlock(this, +                             (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_, +                             start, prev)); +          start = NULL; +        } +        controls.push_back(new IR_roseLoop(this, isSgNode(stmts[i]))); +      } else if (isSgIfStmt(stmts[i])) { +        if (start != NULL) { +          controls.push_back( +            new IR_roseBlock(this, +                             (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_, +                             start, prev)); +          start = NULL; +        } +        controls.push_back(new IR_roseIf(this, isSgNode(stmts[i]))); +         +      } else if (start == NULL) +        start = isSgNode(stmts[i]); +       +      prev = isSgNode(stmts[i]); +    } +     +    if ((start != NULL) && (start != isSgNode(stmts[begin]))) +      controls.push_back( +        new IR_roseBlock(this, +                         (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_, +                         start, prev)); +  } +   +  return controls; +   +} + +IR_Block *IR_roseCode::MergeNeighboringControlStructures( +  const std::vector<IR_Control *> &controls) const { +  if (controls.size() == 0) +    return NULL; +   +  SgNode *tnl = NULL; +  SgNode *start, *end; +  for (int i = 0; i < controls.size(); i++) { +    switch (controls[i]->type()) { +    case IR_CONTROL_LOOP: { +      SgNode *tf = static_cast<IR_roseLoop *>(controls[i])->tf_; +      if (tnl == NULL) { +        tnl = tf->get_parent(); +        start = end = tf; +      } else { +        if (tnl != tf->get_parent()) +          throw ir_error("controls to merge not at the same level"); +        end = tf; +      } +      break; +    } +    case IR_CONTROL_BLOCK: { +      if (tnl == NULL) { +        tnl = static_cast<IR_roseBlock *>(controls[0])->tnl_; +        start = static_cast<IR_roseBlock *>(controls[0])->start_; +        end = static_cast<IR_roseBlock *>(controls[0])->end_; +      } else { +        if (tnl != static_cast<IR_roseBlock *>(controls[0])->tnl_) +          throw ir_error("controls to merge not at the same level"); +        end = static_cast<IR_roseBlock *>(controls[0])->end_; +      } +      break; +    } +    default: +      throw ir_error("unrecognized control to merge"); +    } +  } +   +  return new IR_roseBlock(controls[0]->ir_, tnl, start, end); +} + +IR_Block *IR_roseCode::GetCode() const { +  SgFunctionDefinition* def = NULL; +  SgBasicBlock* block = NULL; +  if (func != 0) { +    if (def = func->get_definition()) { +      if (block = def->get_body()) +        return new IR_roseBlock(this, +                                func->get_definition()->get_body()); +    } +  } +   +  return NULL; +   +} + +void IR_roseCode::ReplaceCode(IR_Control *old, omega::CG_outputRepr *repr) { +  /*    SgStatementPtrList *tnl = +        static_cast<omega::CG_roseRepr *>(repr)->GetList(); +        SgNode *tf_old; +  */ +  SgStatementPtrList *tnl = +    static_cast<omega::CG_roseRepr *>(repr)->GetList(); +  SgNode* node_ = static_cast<omega::CG_roseRepr *>(repr)->GetCode(); +  SgNode * tf_old; +   +  /* May need future revision it tnl has more than one statement */ +   +  switch (old->type()) { +     +  case IR_CONTROL_LOOP: +    tf_old = static_cast<IR_roseLoop *>(old)->tf_; +    break; +  case IR_CONTROL_BLOCK: +    tf_old = static_cast<IR_roseBlock *>(old)->start_; +    break; +     +  default: +    throw ir_error("control structure to be replaced not supported"); +    break; +  } +   +  std::string y = tf_old->unparseToString(); +  SgStatement *s = isSgStatement(tf_old); +  if (s != 0) { +    SgStatement *p = isSgStatement(tf_old->get_parent()); +     +    if (p != 0) { +      SgStatement* temp = s; +      if (tnl != NULL) { +        SgStatementPtrList::iterator it = (*tnl).begin(); +        p->insert_statement(temp, *it, true); +        temp = *it; +        p->remove_statement(s); +        it++; +        for (; it != (*tnl).end(); it++) { +          p->insert_statement(temp, *it, false); +          temp = *it; +        } +      } else if (node_ != NULL) { +        if (!isSgStatement(node_)) +          throw ir_error("Replacing Code not a statement!"); +        else { +          SgStatement* replace_ = isSgStatement(node_); +          p->insert_statement(s, replace_, true); +          p->remove_statement(s); +           +        } +      } else { +        throw ir_error("Replacing Code not a statement!"); +      } +    } else +      throw ir_error("Replacing Code not a statement!"); +  } else +    throw ir_error("Replacing Code not a statement!"); +   +  delete old; +  delete repr; +  /* May need future revision it tnl has more than one statement */ +  /* +    switch (old->type()) { +     +    case IR_CONTROL_LOOP: +    tf_old = static_cast<IR_roseLoop *>(old)->tf_; +    break; +    case IR_CONTROL_BLOCK: +    tf_old = static_cast<IR_roseBlock *>(old)->start_; +    break; +     +    default: +    throw ir_error("control structure to be replaced not supported"); +    break; +    } +     +    // std::string y = tf_old->unparseToString(); +    SgStatement *s = isSgStatement(tf_old); +    if (s != 0) { +    SgStatement *p = isSgStatement(tf_old->get_parent()); +     +    if (p != 0) { +    //      SgStatement* it2 = isSgStatement(tnl); +     +    //   if(it2 != NULL){ +    p->replace_statement(s, *tnl); +    //   } +    //   else { +    //          throw ir_error("Replacing Code not a statement!"); +    //      } +    } else +    throw ir_error("Replacing Code not a statement!"); +    } else +    throw ir_error("Replacing Code not a statement!"); +    //  y = tnl->unparseToString(); +    delete old; +    delete repr; +  */ +} + +void IR_roseCode::ReplaceExpression(IR_Ref *old, omega::CG_outputRepr *repr) { +   +  SgExpression* op = static_cast<omega::CG_roseRepr *>(repr)->GetExpression(); +   +  if (typeid(*old) == typeid(IR_roseArrayRef)) { +    SgPntrArrRefExp* ia_orig = static_cast<IR_roseArrayRef *>(old)->ia_; +    SgExpression* parent = isSgExpression(isSgNode(ia_orig)->get_parent()); +    std::string x = isSgNode(op)->unparseToString(); +    std::string y = isSgNode(ia_orig)->unparseToString(); +    if (parent != NULL) { +      std::string z = isSgNode(parent)->unparseToString(); +      parent->replace_expression(ia_orig, op); +      isSgNode(op)->set_parent(isSgNode(parent)); +       +      /* if(isSgBinaryOp(parent)) +         { +         if(isSgBinaryOp(parent)->get_lhs_operand() == ia_orig){ +         isSgBinaryOp(parent)->set_lhs_operand(op);    +         }else if(isSgBinaryOp(parent)->get_rhs_operand() == ia_orig){ +         isSgBinaryOp(parent)->set_rhs_operand(op);  +          +          +         }  +         else +         parent->replace_expression(ia_orig, op); +      */ +    } else { +      SgStatement* parent_stmt = isSgStatement( +        isSgNode(ia_orig)->get_parent()); +      if (parent_stmt != NULL) +        parent_stmt->replace_expression(ia_orig, op); +      else +        throw ir_error( +          "ReplaceExpression: parent neither expression nor statement"); +    } +  } else +    throw ir_error("replacing a scalar variable not implemented"); +   +  delete old; +} + +IR_OPERATION_TYPE IR_roseCode::QueryExpOperation( +  const omega::CG_outputRepr *repr) const { +  SgExpression* op = +    static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); +   +  if (isSgValueExp(op)) +    return IR_OP_CONSTANT; +  else if (isSgVarRefExp(op) || isSgPntrArrRefExp(op)) +    return IR_OP_VARIABLE; +  else if (isSgAssignOp(op) || isSgCompoundAssignOp(op)) +    return IR_OP_ASSIGNMENT; +  else if (isSgAddOp(op)) +    return IR_OP_PLUS; +  else if (isSgSubtractOp(op)) +    return IR_OP_MINUS; +  else if (isSgMultiplyOp(op)) +    return IR_OP_MULTIPLY; +  else if (isSgDivideOp(op)) +    return IR_OP_DIVIDE; +  else if (isSgMinusOp(op)) +    return IR_OP_NEGATIVE; +  else if (isSgConditionalExp(op)) { +    SgExpression* cond = isSgConditionalExp(op)->get_conditional_exp(); +    if (isSgGreaterThanOp(cond)) +      return IR_OP_MAX; +    else if (isSgLessThanOp(cond)) +      return IR_OP_MIN; +  } else if (isSgUnaryAddOp(op)) +    return IR_OP_POSITIVE; +  else if (isSgNullExpression(op)) +    return IR_OP_NULL; +  else +    return IR_OP_UNKNOWN; +} + +IR_CONDITION_TYPE IR_roseCode::QueryBooleanExpOperation( +  const omega::CG_outputRepr *repr) const { +  SgExpression* op2 = +    static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); +  SgNode* op; +   +  if (op2 == NULL) { +    op = static_cast<const omega::CG_roseRepr *>(repr)->GetCode(); +     +    if (op != NULL) { +      if (isSgExprStatement(op)) +        op2 = isSgExprStatement(op)->get_expression(); +      else +        return IR_COND_UNKNOWN; +    } else +      return IR_COND_UNKNOWN; +  } +   +  if (isSgEqualityOp(op2)) +    return IR_COND_EQ; +  else if (isSgNotEqualOp(op2)) +    return IR_COND_NE; +  else if (isSgLessThanOp(op2)) +    return IR_COND_LT; +  else if (isSgLessOrEqualOp(op2)) +    return IR_COND_LE; +  else if (isSgGreaterThanOp(op2)) +    return IR_COND_GT; +  else if (isSgGreaterOrEqualOp(op2)) +    return IR_COND_GE; +   +  return IR_COND_UNKNOWN; +   +} + +std::vector<omega::CG_outputRepr *> IR_roseCode::QueryExpOperand( +  const omega::CG_outputRepr *repr) const { +  std::vector<omega::CG_outputRepr *> v; +  SgExpression* op1; +  SgExpression* op2; +  SgExpression* op = +    static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); +  omega::CG_roseRepr *repr1; +   +  if (isSgValueExp(op) || isSgVarRefExp(op)) { +    omega::CG_roseRepr *repr = new omega::CG_roseRepr(op); +    v.push_back(repr); +  } else if (isSgAssignOp(op)) { +    op1 = isSgAssignOp(op)->get_rhs_operand(); +    repr1 = new omega::CG_roseRepr(op1); +    v.push_back(repr1); +    /*may be a problem as assignOp is a binaryop destop might be needed */ +  } else if (isSgMinusOp(op)) { +    op1 = isSgMinusOp(op)->get_operand(); +    repr1 = new omega::CG_roseRepr(op1); +    v.push_back(repr1); +  } else if (isSgUnaryAddOp(op)) { +    op1 = isSgUnaryAddOp(op)->get_operand(); +    repr1 = new omega::CG_roseRepr(op1); +    v.push_back(repr1); +  } else if ((isSgAddOp(op) || isSgSubtractOp(op)) +             || (isSgMultiplyOp(op) || isSgDivideOp(op))) { +    op1 = isSgBinaryOp(op)->get_lhs_operand(); +    repr1 = new omega::CG_roseRepr(op1); +    v.push_back(repr1); +     +    op2 = isSgBinaryOp(op)->get_rhs_operand(); +    repr1 = new omega::CG_roseRepr(op2); +    v.push_back(repr1); +  } else if (isSgConditionalExp(op)) { +    SgExpression* cond = isSgConditionalExp(op)->get_conditional_exp(); +    op1 = isSgBinaryOp(cond)->get_lhs_operand(); +    repr1 = new omega::CG_roseRepr(op1); +    v.push_back(repr1); +     +    op2 = isSgBinaryOp(cond)->get_rhs_operand(); +    repr1 = new omega::CG_roseRepr(op2); +    v.push_back(repr1); +  } else if (isSgCompoundAssignOp(op)) { +    SgExpression* cond = isSgCompoundAssignOp(op); +    op1 = isSgBinaryOp(cond)->get_lhs_operand(); +    repr1 = new omega::CG_roseRepr(op1); +    v.push_back(repr1); +     +    op2 = isSgBinaryOp(cond)->get_rhs_operand(); +    repr1 = new omega::CG_roseRepr(op2); +    v.push_back(repr1); +     +  } else if (isSgBinaryOp(op)) { +     +    op1 = isSgBinaryOp(op)->get_lhs_operand(); +    repr1 = new omega::CG_roseRepr(op1); +    v.push_back(repr1); +     +    op2 = isSgBinaryOp(op)->get_rhs_operand(); +    repr1 = new omega::CG_roseRepr(op2); +    v.push_back(repr1); +  } +   +  else +    throw ir_error("operation not supported"); +   +  return v; +} + +IR_Ref *IR_roseCode::Repr2Ref(const omega::CG_outputRepr *repr) const { +  SgExpression* op = +    static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); +   +  if (SgValueExp* im = isSgValueExp(op)) { +    if (isSgIntVal(im)) +      return new IR_roseConstantRef(this, +                                    static_cast<omega::coef_t>(isSgIntVal(im)->get_value())); +    else if (isSgUnsignedIntVal(im)) +      return new IR_roseConstantRef(this, +                                    static_cast<omega::coef_t>(isSgUnsignedIntVal(im)->get_value())); +    else if (isSgLongIntVal(im)) +      return new IR_roseConstantRef(this, +                                    static_cast<omega::coef_t>(isSgLongIntVal(im)->get_value())); +    else if (isSgFloatVal(im)) +      return new IR_roseConstantRef(this, isSgFloatVal(im)->get_value()); +    else +      assert(0); +     +  } else if (isSgVarRefExp(op)) +    return new IR_roseScalarRef(this, isSgVarRefExp(op)); +  else +    assert(0); +   +} + diff --git a/src/ir_rose_utils.cc b/src/ir_rose_utils.cc new file mode 100644 index 0000000..64b0891 --- /dev/null +++ b/src/ir_rose_utils.cc @@ -0,0 +1,67 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009 University of Utah + All Rights Reserved. + + Purpose: +   ROSE interface utilities. + + Notes: + + Update history: +   01/2006 created by Chun Chen +*****************************************************************************/ + +#include "ir_rose_utils.hh" + + + +std::vector<SgForStatement *> find_loops(SgNode *tnl) { +  std::vector<SgForStatement *> result; +  +  SgStatementPtrList& blockStatements = isSgBasicBlock(tnl)->get_statements(); +  for(SgStatementPtrList::const_iterator j = blockStatements.begin(); j != blockStatements.end(); j++) +    if(isSgForStatement(*j)) +      result.push_back(isSgForStatement(*j)); +   +  return result; +} + +std::vector<SgForStatement *> find_deepest_loops(SgStatementPtrList& tnl) { +   +  std::vector<SgForStatement *> loops; +   +   +   +  for(SgStatementPtrList::const_iterator j = tnl.begin(); j != tnl.end(); j++) +  { +    std::vector<SgForStatement *> t = find_deepest_loops(isSgNode(*j)); +    if (t.size() > loops.size()) +      loops = t; +  }        +   +   +   +  return loops; +   +} + +std::vector<SgForStatement *> find_deepest_loops(SgNode *tn) { +  if (isSgForStatement(tn)) { +    std::vector<SgForStatement *> loops; +     +    SgForStatement *tnf = static_cast<SgForStatement*>(tn); +    loops.insert(loops.end(), tnf); +    std::vector<SgForStatement*> t = find_deepest_loops(isSgNode(tnf->get_loop_body())); +    std::copy(t.begin(), t.end(), std::back_inserter(loops)); +     +    return loops; +  } +  else if (isSgBasicBlock(tn)) { +    SgBasicBlock *tnb = static_cast<SgBasicBlock*>(tn); +    return find_deepest_loops(tnb->get_statements()); +  } +  else  +    return std::vector<SgForStatement *>();                +} + diff --git a/src/irtools.cc b/src/irtools.cc new file mode 100644 index 0000000..4ab6c85 --- /dev/null +++ b/src/irtools.cc @@ -0,0 +1,279 @@ +/***************************************************************************** + Copyright (C) 2010 University of Utah + All Rights Reserved. + + Purpose: +   Useful tools to analyze code in compiler IR format. + + Notes: + + History: +   06/2010 Created by Chun Chen. +*****************************************************************************/ + +#include <iostream> +#include <code_gen/CG_outputBuilder.h> +#include "irtools.hh" +#include "omegatools.hh" +#include "chill_error.hh" + +using namespace omega; + +// Build IR tree from the source code.  Block type node can only be +// leaf, i.e., there is no further structures inside a block allowed. +std::vector<ir_tree_node *> build_ir_tree(IR_Control *control, ir_tree_node *parent) { +  std::vector<ir_tree_node *> result; +   +  switch (control->type()) { +  case IR_CONTROL_BLOCK: { +    std::vector<IR_Control *> controls = control->ir_->FindOneLevelControlStructure(static_cast<IR_Block *>(control)); +    if (controls.size() == 0) { +      ir_tree_node *node = new ir_tree_node; +      node->content = control; +      node->parent = parent; +      node->payload = -1; +      result.push_back(node); +    } +    else { +      delete control; +      for (int i = 0; i < controls.size(); i++) +        switch (controls[i]->type()) { +        case IR_CONTROL_BLOCK: { +          std::vector<ir_tree_node *> t = build_ir_tree(controls[i], parent); +          result.insert(result.end(), t.begin(), t.end()); +          break; +        } +        case IR_CONTROL_LOOP: { +          ir_tree_node *node = new ir_tree_node; +          node->content = controls[i]; +          node->parent = parent; +          node->children = build_ir_tree(static_cast<IR_Loop *>(controls[i])->body(), node); +          node->payload = -1; +          result.push_back(node); +          break; +        } +        case IR_CONTROL_IF: { +          static int unique_if_identifier = 0; +           +          IR_Block *block = static_cast<IR_If *>(controls[i])->then_body(); +          if (block != NULL) { +            ir_tree_node *node = new ir_tree_node; +            node->content = controls[i]; +            node->parent = parent; +            node->children = build_ir_tree(block, node); +            node->payload = unique_if_identifier+1; +            result.push_back(node); +          } +           +           +          block = static_cast<IR_If *>(controls[i])->else_body(); +          if ( block != NULL) { +            ir_tree_node *node = new ir_tree_node; +            node->content = controls[i]->clone(); +            node->parent = parent; +            node->children = build_ir_tree(block, node); +            node->payload = unique_if_identifier; +            result.push_back(node); +          } +           +          unique_if_identifier += 2; +          break; +        } +        default: +          ir_tree_node *node = new ir_tree_node; +          node->content = controls[i]; +          node->parent = parent; +          node->payload = -1; +          result.push_back(node); +          break; +        } +    } +    break; +  } +  case IR_CONTROL_LOOP: { +    ir_tree_node *node = new ir_tree_node; +    node->content = control; +    node->parent = parent; +    node->children = build_ir_tree(static_cast<const IR_Loop *>(control)->body(), node); +    node->payload = -1; +    result.push_back(node); +    break; +  } +  default: +    ir_tree_node *node = new ir_tree_node; +    node->content = control; +    node->parent = parent; +    node->payload = -1; +    result.push_back(node); +    break; +  } +   +  return result; +} + + +// Extract statements from IR tree. Statements returned are ordered in +// lexical order in the source code. +std::vector<ir_tree_node *> extract_ir_stmts(const std::vector<ir_tree_node *> &ir_tree) { +  std::vector<ir_tree_node *> result; +  for (int i = 0; i < ir_tree.size(); i++) +    switch (ir_tree[i]->content->type()) { +    case IR_CONTROL_BLOCK: +      result.push_back(ir_tree[i]); +      break; +    case IR_CONTROL_LOOP: { +      // clear loop payload from previous unsuccessful initialization process +      ir_tree[i]->payload = -1; +       +      std::vector<ir_tree_node *> t = extract_ir_stmts(ir_tree[i]->children); +      result.insert(result.end(), t.begin(), t.end()); +      break; +    }       +    case IR_CONTROL_IF: { +      std::vector<ir_tree_node *> t = extract_ir_stmts(ir_tree[i]->children); +      result.insert(result.end(), t.begin(), t.end()); +      break; +    } +    default: +      throw std::invalid_argument("invalid ir tree"); +    } +   +  return result; +} + + +bool is_dependence_valid(ir_tree_node *src_node, ir_tree_node *dst_node, +                         const DependenceVector &dv, bool before) { +  std::set<ir_tree_node *> loop_nodes; +  ir_tree_node *itn = src_node; +   +  if (!dv.is_scalar_dependence) { +    while (itn->parent != NULL) { +      itn = itn->parent; +      if (itn->content->type() == IR_CONTROL_LOOP) +        loop_nodes.insert(itn); +    } +     +    int last_dim = -1; +    itn = dst_node; +    while (itn->parent != NULL) { +      itn = itn->parent; +      if (itn->content->type() == IR_CONTROL_LOOP +          && loop_nodes.find(itn) != loop_nodes.end() +          && itn->payload > last_dim) +        last_dim = itn->payload; +    } +     +    if (last_dim == -1) +      return true; +     +    for (int i = 0; i <= last_dim; i++) { +      if (dv.lbounds[i] > 0) +        return true; +      else if (dv.lbounds[i] < 0) +        return false; +    } +     +    if (before) +      return true; +    else +      return false; +  } +   +  return true; +   +} + + + +// Test data dependences between two statements. The first statement +// in parameter must be lexically before the second statement in +// parameter.  Returned dependences are all lexicographically +// positive. The first vector in returned pair is dependences from the +// first statement to the second statement and the second vector in +// returned pair is in reverse order. +std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > test_data_dependences( +  IR_Code *ir, const CG_outputRepr *repr1, const Relation &IS1, +  const CG_outputRepr *repr2, const Relation &IS2, +  std::vector<Free_Var_Decl*> &freevar, std::vector<std::string> index, +  int i, int j) { +  std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > result; +   +  if (repr1 == repr2) { +    std::vector<IR_ArrayRef *> access = ir->FindArrayRef(repr1); +     +    for (int i = 0; i < access.size(); i++) { +      IR_ArrayRef *a = access[i]; +      IR_ArraySymbol *sym_a = a->symbol(); +      for (int j = i; j < access.size(); j++) { +        IR_ArrayRef *b = access[j]; +        IR_ArraySymbol *sym_b = b->symbol(); +         +        if (*sym_a == *sym_b && (a->is_write() || b->is_write())) { +          Relation r = arrays2relation(ir, freevar, a, IS1, b, IS2); +          std::pair<std::vector<DependenceVector>, +            std::vector<DependenceVector> > dv = +            relation2dependences(a, b, r); +          result.first.insert(result.first.end(), dv.first.begin(), +                              dv.first.end()); +          result.second.insert(result.second.end(), dv.second.begin(), +                               dv.second.end()); +        } +        delete sym_b; +      } +      delete sym_a; +       +    } +     +    for (int i = 0; i < access.size(); i++) +      delete access[i]; +  } else { +    std::vector<IR_ArrayRef *> access1 = ir->FindArrayRef(repr1); +    std::vector<IR_ArrayRef *> access2 = ir->FindArrayRef(repr2); +     +    for (int i = 0; i < access1.size(); i++) { +      IR_ArrayRef *a = access1[i]; +      IR_ArraySymbol *sym_a = a->symbol(); +       +      for (int j = 0; j < access2.size(); j++) { +        IR_ArrayRef *b = access2[j]; +        IR_ArraySymbol *sym_b = b->symbol(); +        if (*sym_a == *sym_b && (a->is_write() || b->is_write())) { +          Relation r = arrays2relation(ir, freevar, a, IS1, b, IS2); +          std::pair<std::vector<DependenceVector>, +            std::vector<DependenceVector> > dv = +            relation2dependences(a, b, r); +           +          result.first.insert(result.first.end(), dv.first.begin(), +                              dv.first.end()); +          result.second.insert(result.second.end(), dv.second.begin(), +                               dv.second.end()); +        } +        delete sym_b; +      } +      delete sym_a; +    } +     +    for (int i = 0; i < access1.size(); i++) +      delete access1[i]; +    for (int i = 0; i < access2.size(); i++) +      delete access2[i]; +  } +  /*std::pair<std::vector<DependenceVector>, +    std::vector<DependenceVector> > dv = +    ir->FindScalarDeps(repr1, repr2, index, i, j); +     +     +    result.first.insert(result.first.end(), dv.first.begin(), +    dv.first.end()); +    result.second.insert(result.second.end(), dv.second.begin(), +    dv.second.end());*/ +  /*result.first.insert(result.first.end(), dv.first.begin(), +    dv.first.end()); +    result.second.insert(result.second.end(), dv.second.begin(), +    dv.second.end()); +  */ +   +  return result; +} + diff --git a/src/loop.cc b/src/loop.cc new file mode 100644 index 0000000..f187a50 --- /dev/null +++ b/src/loop.cc @@ -0,0 +1,1859 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Core loop transformation functionality. + + Notes: + "level" (starting from 1) means loop level and it corresponds to "dim" + (starting from 0) in transformed iteration space [c_1,l_1,c_2,l_2,...., + c_n,l_n,c_(n+1)], e.g., l_2 is loop level 2 in generated code, dim 3 + in transformed iteration space, and variable 4 in Omega relation. + All c's are constant numbers only and they will not show up as actual loops. + Formula: + dim = 2*level - 1 + var = dim + 1 + + History: + 10/2005 Created by Chun Chen. + 09/2009 Expand tile functionality, -chun + 10/2009 Initialize unfusible loop nest without bailing out, -chun +*****************************************************************************/ + +#include <limits.h> +#include <math.h> +#include <code_gen/codegen.h> +#include <code_gen/CG_utils.h> +#include <iostream> +#include <algorithm> +#include <map> +#include "loop.hh" +#include "omegatools.hh" +#include "irtools.hh" +#include "chill_error.hh" +#include <string.h> +#include <list> +using namespace omega; + +const std::string Loop::tmp_loop_var_name_prefix = std::string("chill_t"); // Manu:: In fortran, first character of a variable name must be a letter, so this change +const std::string Loop::overflow_var_name_prefix = std::string("over"); + +//----------------------------------------------------------------------------- +// Class Loop +//----------------------------------------------------------------------------- +// --begin Anand: Added from CHiLL 0.2 + +bool Loop::isInitialized() const { +  return stmt.size() != 0 && !stmt[0].xform.is_null(); +} + +//--end Anand: added from CHiLL 0.2 + +bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, +                     std::vector<ir_tree_node *> &ir_stmt) { + +  ir_stmt = extract_ir_stmts(ir_tree); +  stmt_nesting_level_.resize(ir_stmt.size()); +  std::vector<int> stmt_nesting_level(ir_stmt.size()); +  for (int i = 0; i < ir_stmt.size(); i++) { +    ir_stmt[i]->payload = i; +    int t = 0; +    ir_tree_node *itn = ir_stmt[i]; +    while (itn->parent != NULL) { +      itn = itn->parent; +      if (itn->content->type() == IR_CONTROL_LOOP) +        t++; +    } +    stmt_nesting_level_[i] = t; +    stmt_nesting_level[i] = t; +  } +   +  stmt = std::vector<Statement>(ir_stmt.size()); +  int n_dim = -1; +  int max_loc; +  //std::vector<std::string> index; +  for (int i = 0; i < ir_stmt.size(); i++) { +    int max_nesting_level = -1; +    int loc; +    for (int j = 0; j < ir_stmt.size(); j++) +      if (stmt_nesting_level[j] > max_nesting_level) { +        max_nesting_level = stmt_nesting_level[j]; +        loc = j; +      } +     +    // most deeply nested statement acting as a reference point +    if (n_dim == -1) { +      n_dim = max_nesting_level; +      max_loc = loc; +       +      index = std::vector<std::string>(n_dim); +       +      ir_tree_node *itn = ir_stmt[loc]; +      int cur_dim = n_dim - 1; +      while (itn->parent != NULL) { +        itn = itn->parent; +        if (itn->content->type() == IR_CONTROL_LOOP) { +          index[cur_dim] = +            static_cast<IR_Loop *>(itn->content)->index()->name(); +          itn->payload = cur_dim--; +        } +      } +    } +     +    // align loops by names, temporary solution +    ir_tree_node *itn = ir_stmt[loc]; +    int depth = stmt_nesting_level_[loc] - 1; +    /*   while (itn->parent != NULL) { +         itn = itn->parent; +         if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { +         std::string name = static_cast<IR_Loop *>(itn->content)->index()->name(); +         for (int j = 0; j < n_dim; j++) +         if (index[j] == name) { +         itn->payload = j; +         break; +         } +         if (itn->payload == -1) +         throw loop_error("no complex alignment yet"); +         } +         } +    */ +    for (int t = depth; t >= 0; t--) { +      int y = t; +      ir_tree_node *itn = ir_stmt[loc]; +       +      while ((itn->parent != NULL) && (y >= 0)) { +        itn = itn->parent; +        if (itn->content->type() == IR_CONTROL_LOOP) +          y--; +      } +       +      if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { +        CG_outputBuilder *ocg = ir->builder(); +         +        itn->payload = depth - t; +         +        CG_outputRepr *code = +          static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); +         +        std::vector<CG_outputRepr *> index_expr; +        std::vector<std::string> old_index; +        CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]); +        index_expr.push_back(repl); +        old_index.push_back( +          static_cast<IR_Loop *>(itn->content)->index()->name()); +        code = ocg->CreateSubstitutedStmt(0, code, old_index, +                                          index_expr); +         +        replace.insert(std::pair<int, CG_outputRepr*>(loc, code)); +        //stmt[loc].code = code; +         +      } +    } +     +    // set relation variable names +    Relation r(n_dim); +    F_And *f_root = r.add_and(); +    itn = ir_stmt[loc]; +    int temp_depth = depth; +    while (itn->parent != NULL) { +       +      itn = itn->parent; +      if (itn->content->type() == IR_CONTROL_LOOP) { +        r.name_set_var(itn->payload + 1, index[temp_depth]); +         +        temp_depth--; +      } +      //static_cast<IR_Loop *>(itn->content)->index()->name()); +    } +     +    /*while (itn->parent != NULL) { +      itn = itn->parent; +      if (itn->content->type() == IR_CONTROL_LOOP) +      r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name()); +      }*/ +     +    // extract information from loop/if structures +    std::vector<bool> processed(n_dim, false); +    std::vector<std::string> vars_to_be_reversed; +    itn = ir_stmt[loc]; +    while (itn->parent != NULL) { +      itn = itn->parent; +       +      switch (itn->content->type()) { +      case IR_CONTROL_LOOP: { +        IR_Loop *lp = static_cast<IR_Loop *>(itn->content); +        Variable_ID v = r.set_var(itn->payload + 1); +        int c; +         +        try { +          c = lp->step_size(); +          if (c > 0) { +            CG_outputRepr *lb = lp->lower_bound(); +            exp2formula(ir, r, f_root, freevar, lb, v, 's', +                        IR_COND_GE, true); +            CG_outputRepr *ub = lp->upper_bound(); +            IR_CONDITION_TYPE cond = lp->stop_cond(); +            if (cond == IR_COND_LT || cond == IR_COND_LE) +              exp2formula(ir, r, f_root, freevar, ub, v, 's', +                          cond, true); +            else +              throw ir_error("loop condition not supported"); +             +          } else if (c < 0) { +            CG_outputBuilder *ocg = ir->builder(); +            CG_outputRepr *lb = lp->lower_bound(); +            lb = ocg->CreateMinus(NULL, lb); +            exp2formula(ir, r, f_root, freevar, lb, v, 's', +                        IR_COND_GE, true); +            CG_outputRepr *ub = lp->upper_bound(); +            ub = ocg->CreateMinus(NULL, ub); +            IR_CONDITION_TYPE cond = lp->stop_cond(); +            if (cond == IR_COND_GE) +              exp2formula(ir, r, f_root, freevar, ub, v, 's', +                          IR_COND_LE, true); +            else if (cond == IR_COND_GT) +              exp2formula(ir, r, f_root, freevar, ub, v, 's', +                          IR_COND_LT, true); +            else +              throw ir_error("loop condition not supported"); +             +            vars_to_be_reversed.push_back(lp->index()->name()); +          } else +            throw ir_error("loop step size zero"); +        } catch (const ir_error &e) { +          for (int i = 0; i < itn->children.size(); i++) +            delete itn->children[i]; +          itn->children = std::vector<ir_tree_node *>(); +          itn->content = itn->content->convert(); +          return false; +        } +         +        if (abs(c) != 1) { +          F_Exists *f_exists = f_root->add_exists(); +          Variable_ID e = f_exists->declare(); +          F_And *f_and = f_exists->add_and(); +          Stride_Handle h = f_and->add_stride(abs(c)); +          if (c > 0) +            h.update_coef(e, 1); +          else +            h.update_coef(e, -1); +          h.update_coef(v, -1); +          CG_outputRepr *lb = lp->lower_bound(); +          exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, +                      true); +        } +         +        processed[itn->payload] = true; +        break; +      } +      case IR_CONTROL_IF: { +        CG_outputRepr *cond = +          static_cast<IR_If *>(itn->content)->condition(); +        try { +          if (itn->payload % 2 == 1) +            exp2constraint(ir, r, f_root, freevar, cond, true); +          else { +            F_Not *f_not = f_root->add_not(); +            F_And *f_and = f_not->add_and(); +            exp2constraint(ir, r, f_and, freevar, cond, true); +          } +        } catch (const ir_error &e) { +          std::vector<ir_tree_node *> *t; +          if (itn->parent == NULL) +            t = &ir_tree; +          else +            t = &(itn->parent->children); +          int id = itn->payload; +          int i = t->size() - 1; +          while (i >= 0) { +            if ((*t)[i] == itn) { +              for (int j = 0; j < itn->children.size(); j++) +                delete itn->children[j]; +              itn->children = std::vector<ir_tree_node *>(); +              itn->content = itn->content->convert(); +            } else if ((*t)[i]->payload >> 1 == id >> 1) { +              delete (*t)[i]; +              t->erase(t->begin() + i); +            } +            i--; +          } +          return false; +        } +         +        break; +      } +      default: +        for (int i = 0; i < itn->children.size(); i++) +          delete itn->children[i]; +        itn->children = std::vector<ir_tree_node *>(); +        itn->content = itn->content->convert(); +        return false; +      } +    } +     +    // add information for missing loops +    for (int j = 0; j < n_dim; j++) +      if (!processed[j]) { +        ir_tree_node *itn = ir_stmt[max_loc]; +        while (itn->parent != NULL) { +          itn = itn->parent; +          if (itn->content->type() == IR_CONTROL_LOOP +              && itn->payload == j) +            break; +        } +         +        Variable_ID v = r.set_var(j + 1); +        if (loc < max_loc) { +           +          CG_outputBuilder *ocg = ir->builder(); +           +          CG_outputRepr *lb = +            static_cast<IR_Loop *>(itn->content)->lower_bound(); +           +          exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ, +                      false); +           +          /*    if (ir->QueryExpOperation( +                static_cast<IR_Loop *>(itn->content)->lower_bound()) +                == IR_OP_VARIABLE) { +                IR_ScalarRef *ref = +                static_cast<IR_ScalarRef *>(ir->Repr2Ref( +                static_cast<IR_Loop *>(itn->content)->lower_bound())); +                std::string name_ = ref->name(); +                 +                for (int i = 0; i < index.size(); i++) +                if (index[i] == name_) { +                exp2formula(ir, r, f_root, freevar, lb, v, 's', +                IR_COND_GE, false); +                 +                CG_outputRepr *ub = +                static_cast<IR_Loop *>(itn->content)->upper_bound(); +                IR_CONDITION_TYPE cond = +                static_cast<IR_Loop *>(itn->content)->stop_cond(); +                if (cond == IR_COND_LT || cond == IR_COND_LE) +                exp2formula(ir, r, f_root, freevar, ub, v, +                's', cond, false); +                 +                 +                 +                } +                 +                } +          */ +           +        } else { // loc > max_loc +           +          CG_outputBuilder *ocg = ir->builder(); +          CG_outputRepr *ub = +            static_cast<IR_Loop *>(itn->content)->upper_bound(); +           +          exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ, +                      false); +          /*if (ir->QueryExpOperation( +            static_cast<IR_Loop *>(itn->content)->upper_bound()) +            == IR_OP_VARIABLE) { +            IR_ScalarRef *ref = +            static_cast<IR_ScalarRef *>(ir->Repr2Ref( +            static_cast<IR_Loop *>(itn->content)->upper_bound())); +            std::string name_ = ref->name(); +             +            for (int i = 0; i < index.size(); i++) +            if (index[i] == name_) { +             +            CG_outputRepr *lb = +            static_cast<IR_Loop *>(itn->content)->lower_bound(); +             +            exp2formula(ir, r, f_root, freevar, lb, v, 's', +            IR_COND_GE, false); +             +            CG_outputRepr *ub = +            static_cast<IR_Loop *>(itn->content)->upper_bound(); +            IR_CONDITION_TYPE cond = +            static_cast<IR_Loop *>(itn->content)->stop_cond(); +            if (cond == IR_COND_LT || cond == IR_COND_LE) +            exp2formula(ir, r, f_root, freevar, ub, v, +            's', cond, false); +             +             +            } +            } +          */ +        } +      } +     +    r.setup_names(); +    r.simplify(); +     +    // insert the statement +    CG_outputBuilder *ocg = ir->builder(); +    std::vector<CG_outputRepr *> reverse_expr; +    for (int j = 1; j <= vars_to_be_reversed.size(); j++) { +      CG_outputRepr *repl = ocg->CreateIdent(vars_to_be_reversed[j]); +      repl = ocg->CreateMinus(NULL, repl); +      reverse_expr.push_back(repl); +    } +    CG_outputRepr *code = +      static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); +    code = ocg->CreateSubstitutedStmt(0, code, vars_to_be_reversed, +                                      reverse_expr); +    stmt[loc].code = code; +    stmt[loc].IS = r; +    stmt[loc].loop_level = std::vector<LoopLevel>(n_dim); +    stmt[loc].ir_stmt_node = ir_stmt[loc]; +    for (int i = 0; i < n_dim; i++) { +      stmt[loc].loop_level[i].type = LoopLevelOriginal; +      stmt[loc].loop_level[i].payload = i; +      stmt[loc].loop_level[i].parallel_level = 0; +    } +     +    stmt_nesting_level[loc] = -1; +  } +   +  return true; +} + +Loop::Loop(const IR_Control *control) { + +  last_compute_cgr_ = NULL; +  last_compute_cg_ = NULL; +   +  ir = const_cast<IR_Code *>(control->ir_); +  init_code = NULL; +  cleanup_code = NULL; +  tmp_loop_var_name_counter = 1; +  overflow_var_name_counter = 1; +  known = Relation::True(0); +   +  ir_tree = build_ir_tree(control->clone(), NULL); +  //    std::vector<ir_tree_node *> ir_stmt; +   +  while (!init_loop(ir_tree, ir_stmt)) { +  } + +   +   +  for (int i = 0; i < stmt.size(); i++) { +    std::map<int, CG_outputRepr*>::iterator it = replace.find(i); +     +    if (it != replace.end()) +      stmt[i].code = it->second; +    else +      stmt[i].code = stmt[i].code; +  } +   +  if (stmt.size() != 0) +    dep = DependenceGraph(stmt[0].IS.n_set()); +  else +    dep = DependenceGraph(0); +  // init the dependence graph +  for (int i = 0; i < stmt.size(); i++) +    dep.insert(); +   +  for (int i = 0; i < stmt.size(); i++) +    for (int j = i; j < stmt.size(); j++) { +      std::pair<std::vector<DependenceVector>, +        std::vector<DependenceVector> > dv = test_data_dependences( +          ir, stmt[i].code, stmt[i].IS, stmt[j].code, stmt[j].IS, +          freevar, index, stmt_nesting_level_[i], +          stmt_nesting_level_[j]); +       +      for (int k = 0; k < dv.first.size(); k++) { +        if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k], +                                true)) +          dep.connect(i, j, dv.first[k]); +        else { +          dep.connect(j, i, dv.first[k].reverse()); +        } +         +      } +      for (int k = 0; k < dv.second.size(); k++) +        if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], +                                false)) +          dep.connect(j, i, dv.second[k]); +        else { +          dep.connect(i, j, dv.second[k].reverse()); +        } +      // std::pair<std::vector<DependenceVector>, +      //                std::vector<DependenceVector> > dv_ = test_data_dependences( +       +    } +   + + +  // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] +  for (int i = 0; i < stmt.size(); i++) { +    int n = stmt[i].IS.n_set(); +    stmt[i].xform = Relation(n, 2 * n + 1); +    F_And *f_root = stmt[i].xform.add_and(); +     +    for (int j = 1; j <= n; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(stmt[i].xform.output_var(2 * j), 1); +      h.update_coef(stmt[i].xform.input_var(j), -1); +    } +     +    for (int j = 1; j <= 2 * n + 1; j += 2) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(stmt[i].xform.output_var(j), 1); +    } +    stmt[i].xform.simplify(); +  } +   +  if (stmt.size() != 0) +    num_dep_dim = stmt[0].IS.n_set(); +  else +    num_dep_dim = 0; +  // debug +  /*for (int i = 0; i < stmt.size(); i++) { +    std::cout << i << ": "; +    //stmt[i].xform.print(); +    stmt[i].IS.print(); +    std::cout << std::endl; +     +    }*/ +  //end debug +} + +Loop::~Loop() { +   +  delete last_compute_cgr_; +  delete last_compute_cg_; +   +  for (int i = 0; i < stmt.size(); i++) +    if (stmt[i].code != NULL) { +      stmt[i].code->clear(); +      delete stmt[i].code; +    } +   +  for (int i = 0; i < ir_tree.size(); i++) +    delete ir_tree[i]; +   +  if (init_code != NULL) { +    init_code->clear(); +    delete init_code; +  } +  if (cleanup_code != NULL) { +    cleanup_code->clear(); +    delete cleanup_code; +  } +} + +int Loop::get_dep_dim_of(int stmt_num, int level) const { +  if (stmt_num < 0 || stmt_num >= stmt.size()) +    throw std::invalid_argument("invaid statement " + to_string(stmt_num)); +   +  if (level < 1 || level > stmt[stmt_num].loop_level.size()) +    return -1; +   +  int trip_count = 0; +  while (true) { +    switch (stmt[stmt_num].loop_level[level - 1].type) { +    case LoopLevelOriginal: +      return stmt[stmt_num].loop_level[level - 1].payload; +    case LoopLevelTile: +      level = stmt[stmt_num].loop_level[level - 1].payload; +      if (level < 1) +        return -1; +      if (level > stmt[stmt_num].loop_level.size()) +        throw loop_error( +          "incorrect loop level information for statement " +          + to_string(stmt_num)); +      break; +    default: +      throw loop_error( +        "unknown loop level information for statement " +        + to_string(stmt_num)); +    } +    trip_count++; +    if (trip_count >= stmt[stmt_num].loop_level.size()) +      throw loop_error( +        "incorrect loop level information for statement " +        + to_string(stmt_num)); +  } +} + +int Loop::get_last_dep_dim_before(int stmt_num, int level) const { +  if (stmt_num < 0 || stmt_num >= stmt.size()) +    throw std::invalid_argument("invaid statement " + to_string(stmt_num)); +   +  if (level < 1) +    return -1; +  if (level > stmt[stmt_num].loop_level.size()) +    level = stmt[stmt_num].loop_level.size() + 1; +   +  for (int i = level - 1; i >= 1; i--) +    if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal) +      return stmt[stmt_num].loop_level[i - 1].payload; +   +  return -1; +} + +void Loop::print_internal_loop_structure() const { +  for (int i = 0; i < stmt.size(); i++) { +    std::vector<int> lex = getLexicalOrder(i); +    std::cout << "s" << i + 1 << ": "; +    for (int j = 0; j < stmt[i].loop_level.size(); j++) { +      if (2 * j < lex.size()) +        std::cout << lex[2 * j]; +      switch (stmt[i].loop_level[j].type) { +      case LoopLevelOriginal: +        std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; +        break; +      case LoopLevelTile: +        std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; +        break; +      default: +        std::cout << "(unknown)"; +      } +      std::cout << ' '; +    } +    for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { +      std::cout << lex[j]; +      if (j != lex.size() - 1) +        std::cout << ' '; +    } +    std::cout << std::endl; +  } +} + +CG_outputRepr *Loop::getCode(int effort) const { +  const int m = stmt.size(); +  if (m == 0) +    return NULL; +  const int n = stmt[0].xform.n_out(); +   +  if (last_compute_cg_ == NULL) { +    std::vector<Relation> IS(m); +    std::vector<Relation> xforms(m); +    for (int i = 0; i < m; i++) { +      IS[i] = stmt[i].IS; +      xforms[i] = stmt[i].xform; +    } +    Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); +     +    last_compute_cg_ = new CodeGen(xforms, IS, known); +    delete last_compute_cgr_; +    last_compute_cgr_ = NULL; +  } +   +  if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { +    delete last_compute_cgr_; +    last_compute_cgr_ = last_compute_cg_->buildAST(effort); +    last_compute_effort_ = effort; +  } +   +  std::vector<CG_outputRepr *> stmts(m); +  for (int i = 0; i < m; i++) +    stmts[i] = stmt[i].code; +  CG_outputBuilder *ocg = ir->builder(); +  CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts); +   +  if (init_code != NULL) +    repr = ocg->StmtListAppend(init_code->clone(), repr); +  if (cleanup_code != NULL) +    repr = ocg->StmtListAppend(repr, cleanup_code->clone()); +   +  return repr; +} + +void Loop::printCode(int effort) const { +  const int m = stmt.size(); +  if (m == 0) +    return; +  const int n = stmt[0].xform.n_out(); +   +  if (last_compute_cg_ == NULL) { +    std::vector<Relation> IS(m); +    std::vector<Relation> xforms(m); +    for (int i = 0; i < m; i++) { +      IS[i] = stmt[i].IS; +      xforms[i] = stmt[i].xform; +    } +    Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); +     +    last_compute_cg_ = new CodeGen(xforms, IS, known); +    delete last_compute_cgr_; +    last_compute_cgr_ = NULL; +  } +   +  if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { +    delete last_compute_cgr_; +    last_compute_cgr_ = last_compute_cg_->buildAST(effort); +    last_compute_effort_ = effort; +  } +   +  std::string repr = last_compute_cgr_->printString(); +  std::cout << repr << std::endl; +} + +void Loop::printIterationSpace() const { +  for (int i = 0; i < stmt.size(); i++) { +    std::cout << "s" << i << ": "; +    Relation r = getNewIS(i); +    for (int j = 1; j <= r.n_inp(); j++) +      r.name_input_var(j, CodeGen::loop_var_name_prefix + to_string(j)); +    r.setup_names(); +    r.print(); +  } +} + +void Loop::printDependenceGraph() const { +  if (dep.edgeCount() == 0) +    std::cout << "no dependence exists" << std::endl; +  else { +    std::cout << "dependence graph:" << std::endl; +    std::cout << dep; +  } +} + +Relation Loop::getNewIS(int stmt_num) const { +  Relation result; +   +  if (stmt[stmt_num].xform.is_null()) { +    Relation known = Extend_Set(copy(this->known), +                                stmt[stmt_num].IS.n_set() - this->known.n_set()); +    result = Intersection(copy(stmt[stmt_num].IS), known); +  } else { +    Relation known = Extend_Set(copy(this->known), +                                stmt[stmt_num].xform.n_out() - this->known.n_set()); +    result = Intersection( +      Range( +        Restrict_Domain(copy(stmt[stmt_num].xform), +                        copy(stmt[stmt_num].IS))), known); +  } +   +  result.simplify(2, 4); +   +  return result; +} + +std::vector<Relation> Loop::getNewIS() const { +  const int m = stmt.size(); +   +  std::vector<Relation> new_IS(m); +  for (int i = 0; i < m; i++) +    new_IS[i] = getNewIS(i); +   +  return new_IS; +} + +void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) { +	// check sanity of parameters +	if(stmt_num < 0) +		throw std::invalid_argument("invalid statement " + to_string(stmt_num)); +	 +	CG_outputBuilder *ocg = ir->builder(); +	CG_outputRepr *code = stmt[stmt_num].code; +	ocg->CreatePragmaAttribute(code, level, pragmaText); +} + +void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hint) { +	// check sanity of parameters +	if(stmt_num < 0) +		throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + +	CG_outputBuilder *ocg = ir->builder(); +	CG_outputRepr *code = stmt[stmt_num].code; +	ocg->CreatePrefetchAttribute(code, level, arrName, hint); +} + +std::vector<int> Loop::getLexicalOrder(int stmt_num) const { +  assert(stmt_num < stmt.size()); +   +  const int n = stmt[stmt_num].xform.n_out(); +  std::vector<int> lex(n, 0); +   +  for (int i = 0; i < n; i += 2) +    lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); +   +  return lex; +} + +// find the sub loop nest specified by stmt_num and level, +// only iteration space satisfiable statements returned. +std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const { +  assert(stmt_num >= 0 && stmt_num < stmt.size()); +  assert(level > 0 && level <= stmt[stmt_num].loop_level.size()); +   +  std::set<int> working; +  for (int i = 0; i < stmt.size(); i++) +    if (const_cast<Loop *>(this)->stmt[i].IS.is_upper_bound_satisfiable() +        && stmt[i].loop_level.size() >= level) +      working.insert(i); +   +  for (int i = 1; i <= level; i++) { +    int a = getLexicalOrder(stmt_num, i); +    for (std::set<int>::iterator j = working.begin(); j != working.end();) { +      int b = getLexicalOrder(*j, i); +      if (b != a) +        working.erase(j++); +      else +        ++j; +    } +  } +   +  return working; +} + +int Loop::getLexicalOrder(int stmt_num, int level) const { +  assert(stmt_num >= 0 && stmt_num < stmt.size()); +  assert(level > 0 && level <= stmt[stmt_num].loop_level.size()+1); +   +  Relation &r = const_cast<Loop *>(this)->stmt[stmt_num].xform; +  for (EQ_Iterator e(r.single_conjunct()->EQs()); e; e++) +    if (abs((*e).get_coef(r.output_var(2 * level - 1))) == 1) { +      bool is_const = true; +      for (Constr_Vars_Iter cvi(*e); cvi; cvi++) +        if (cvi.curr_var() != r.output_var(2 * level - 1)) { +          is_const = false; +          break; +        } +      if (is_const) { +        int t = static_cast<int>((*e).get_const()); +        return (*e).get_coef(r.output_var(2 * level - 1)) > 0 ? -t : t; +      } +    } +   +  throw loop_error( +    "can't find lexical order for statement " + to_string(stmt_num) +    + "'s loop level " + to_string(level)); +} + +std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const { +  const int m = stmt.size(); +   +  std::set<int> same_loops; +  for (int i = 0; i < m; i++) { +    if (dim < 0) +      same_loops.insert(i); +    else { +      std::vector<int> a_lex = getLexicalOrder(i); +      int j; +      for (j = 0; j <= dim; j += 2) +        if (lex[j] != a_lex[j]) +          break; +      if (j > dim) +        same_loops.insert(i); +    } +     +  } +   +  return same_loops; +} + +void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) { +  const int m = stmt.size(); +   +  if (amount == 0) +    return; +   +  for (int i = 0; i < m; i++) { +    std::vector<int> lex2 = getLexicalOrder(i); +     +    bool need_shift = true; +     +    for (int j = 0; j < dim; j++) +      if (lex2[j] != lex[j]) { +        need_shift = false; +        break; +      } +     +    if (!need_shift) +      continue; +     +    if (amount > 0) { +      if (lex2[dim] < lex[dim]) +        continue; +    } else if (amount < 0) { +      if (lex2[dim] > lex[dim]) +        continue; +    } +     +    assign_const(stmt[i].xform, dim, lex2[dim] + amount); +  } +} + +std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active, +                                                     int level) { +   +  std::set<int> not_nested_at_this_level; +  std::map<ir_tree_node*, std::set<int> > sorted_by_loop; +  std::map<int, std::set<int> > sorted_by_lex_order; +  std::vector<std::set<int> > to_return; +  bool lex_order_already_set = false; +  for (std::set<int>::iterator it = active.begin(); it != active.end(); +       it++) { +     +    if (stmt[*it].ir_stmt_node == NULL) +      lex_order_already_set = true; +  } +   +  if (lex_order_already_set) { +     +    for (std::set<int>::iterator it = active.begin(); it != active.end(); +         it++) { +      std::map<int, std::set<int> >::iterator it2 = +        sorted_by_lex_order.find( +          get_const(stmt[*it].xform, 2 * (level - 1), +                    Output_Var)); +       +      if (it2 != sorted_by_lex_order.end()) +        it2->second.insert(*it); +      else { +         +        std::set<int> to_insert; +         +        to_insert.insert(*it); +         +        sorted_by_lex_order.insert( +          std::pair<int, std::set<int> >( +            get_const(stmt[*it].xform, 2 * (level - 1), +                      Output_Var), to_insert)); +         +      } +       +    } +     +    for (std::map<int, std::set<int> >::iterator it2 = +           sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end(); +         it2++) +      to_return.push_back(it2->second); +     +  } else { +     +    for (std::set<int>::iterator it = active.begin(); it != active.end(); +         it++) { +       +      ir_tree_node* itn = stmt[*it].ir_stmt_node; +      itn = itn->parent; +      while ((itn != NULL) && (itn->payload != level - 1)) +        itn = itn->parent; +       +      if (itn == NULL) +        not_nested_at_this_level.insert(*it); +      else { +        std::map<ir_tree_node*, std::set<int> >::iterator it2 = +          sorted_by_loop.find(itn); +         +        if (it2 != sorted_by_loop.end()) +          it2->second.insert(*it); +        else { +          std::set<int> to_insert; +           +          to_insert.insert(*it); +           +          sorted_by_loop.insert( +            std::pair<ir_tree_node*, std::set<int> >(itn, +                                                     to_insert)); +           +        } +         +      } +       +    } +    if (not_nested_at_this_level.size() > 0) { +      for (std::set<int>::iterator it = not_nested_at_this_level.begin(); +           it != not_nested_at_this_level.end(); it++) { +        std::set<int> temp; +        temp.insert(*it); +        to_return.push_back(temp); +         +      } +    } +    for (std::map<ir_tree_node*, std::set<int> >::iterator it2 = +           sorted_by_loop.begin(); it2 != sorted_by_loop.end(); it2++) +      to_return.push_back(it2->second); +  } +  return to_return; +} + +void update_successors(int n, int node_num[], int cant_fuse_with[], +                       Graph<std::set<int>, bool> &g, std::list<int> &work_list) { +   +  std::set<int> disconnect; +  for (Graph<std::set<int>, bool>::EdgeList::iterator i = +         g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) { +    int m = i->first; +     +    if (node_num[m] != -1) +      throw loop_error("Graph input for fusion has cycles not a DAG!!"); +     +    std::vector<bool> check_ = g.getEdge(n, m); +     +    bool has_bad_edge_path = false; +    for (int i = 0; i < check_.size(); i++) +      if (!check_[i]) { +        has_bad_edge_path = true; +        break; +      } +    if (has_bad_edge_path) +      cant_fuse_with[m] = std::max(cant_fuse_with[m], node_num[n]); +    else +      cant_fuse_with[m] = std::max(cant_fuse_with[m], cant_fuse_with[n]); +    disconnect.insert(m); +  } +   +   +  for (std::set<int>::iterator i = disconnect.begin(); i != disconnect.end(); +       i++) { +    g.disconnect(n, *i); +     +    bool no_incoming_edges = true; +    for (int j = 0; j < g.vertex.size(); j++) +      if (j != *i) +        if (g.hasEdge(j, *i)) { +          no_incoming_edges = false; +          break; +        } +     +     +    if (no_incoming_edges) +      work_list.push_back(*i); +  } +   +} + +Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level( +  std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim) { +  Graph<std::set<int>, bool> g; +   +  for (int i = 0; i < s.size(); i++) +    g.insert(s[i]); +   +  for (int i = 0; i < s.size(); i++) { +     +    for (int j = i + 1; j < s.size(); j++) { +      bool has_true_edge_i_to_j = false; +      bool has_true_edge_j_to_i = false; +      bool is_connected_i_to_j = false; +      bool is_connected_j_to_i = false; +      for (std::set<int>::iterator ii = s[i].begin(); ii != s[i].end(); +           ii++) { +         +        for (std::set<int>::iterator jj = s[j].begin(); +             jj != s[j].end(); jj++) { +           +          std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj); +          for (int k = 0; k < dvs.size(); k++) +            if (dvs[k].is_control_dependence() +                || (dvs[k].is_data_dependence() +                    && dvs[k].has_been_carried_at(dep_dim))) { +               +              if (dvs[k].is_data_dependence() +                  && dvs[k].has_negative_been_carried_at( +                    dep_dim)) { +                //g.connect(i, j, false); +                is_connected_i_to_j = true; +                break; +              } else { +                //g.connect(i, j, true); +                 +                has_true_edge_i_to_j = true; +                //break +              } +            } +           +          //if (is_connected) +           +          //    break; +          //        if (has_true_edge_i_to_j && !is_connected_i_to_j) +          //                g.connect(i, j, true); +          dvs = dep.getEdge(*jj, *ii); +          for (int k = 0; k < dvs.size(); k++) +            if (dvs[k].is_control_dependence() +                || (dvs[k].is_data_dependence() +                    && dvs[k].has_been_carried_at(dep_dim))) { +               +              if (is_connected_i_to_j || has_true_edge_i_to_j) +                throw loop_error( +                  "Graph input for fusion has cycles not a DAG!!"); +               +              if (dvs[k].is_data_dependence() +                  && dvs[k].has_negative_been_carried_at( +                    dep_dim)) { +                //g.connect(i, j, false); +                is_connected_j_to_i = true; +                break; +              } else { +                //g.connect(i, j, true); +                 +                has_true_edge_j_to_i = true; +                //break; +              } +            } +           +          //    if (is_connected) +          //break; +          //    if (is_connected) +          //break; +        } +         +         +        //if (is_connected) +        //  break; +      } +       +       +      if (is_connected_i_to_j) +        g.connect(i, j, false); +      else if (has_true_edge_i_to_j) +        g.connect(i, j, true); +       +      if (is_connected_j_to_i) +        g.connect(j, i, false); +      else if (has_true_edge_j_to_i) +        g.connect(j, i, true); +       +       +    } +  } +  return g; +} + +std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g) { +   +  bool roots[g.vertex.size()]; +   +  for (int i = 0; i < g.vertex.size(); i++) +    roots[i] = true; +   +  for (int i = 0; i < g.vertex.size(); i++) +    for (int j = i + 1; j < g.vertex.size(); j++) { +       +      if (g.hasEdge(i, j)) +        roots[j] = false; +       +      if (g.hasEdge(j, i)) +        roots[i] = false; +       +    } +   +  std::list<int> work_list; +  int cant_fuse_with[g.vertex.size()]; +  std::vector<std::set<int> > s; +  //Each Fused set's representative node +   +  int node_to_fused_nodes[g.vertex.size()]; +  int node_num[g.vertex.size()]; +  for (int i = 0; i < g.vertex.size(); i++) { +    if (roots[i] == true) +      work_list.push_back(i); +    cant_fuse_with[i] = 0; +    node_to_fused_nodes[i] = 0; +    node_num[i] = -1; +  } +  // topological sort according to chun's permute algorithm +  //   std::vector<std::set<int> > s = g.topoSort(); +  std::vector<std::set<int> > s2 = g.topoSort(); +  if (work_list.empty() || (s2.size() != g.vertex.size())) { +     +    std::cout << s2.size() << "\t" << g.vertex.size() << std::endl; +    throw loop_error("Input for fusion not a DAG!!"); +     +     +  } +  int fused_nodes_counter = 0; +  while (!work_list.empty()) { +    int n = work_list.front(); +    //int n_ = g.vertex[n].first; +    work_list.pop_front(); +    int node; +    if (cant_fuse_with[n] == 0) +      node = 0; +    else +      node = cant_fuse_with[n]; +     +    if ((fused_nodes_counter != 0) && (node != fused_nodes_counter)) { +      int rep_node = node_to_fused_nodes[node]; +      node_num[n] = node_num[rep_node]; +       +      try { +        update_successors(n, node_num, cant_fuse_with, g, work_list); +      } catch (const loop_error &e) { +         +        throw loop_error( +          "statements cannot be fused together due to negative dependence"); +         +         +      } +      for (std::set<int>::iterator it = g.vertex[n].first.begin(); +           it != g.vertex[n].first.end(); it++) +        s[node].insert(*it); +    } else { +      //std::set<int> new_node; +      //new_node.insert(n_); +      s.push_back(g.vertex[n].first); +      node_to_fused_nodes[node] = n; +      node_num[n] = ++node; +      try { +        update_successors(n, node_num, cant_fuse_with, g, work_list); +      } catch (const loop_error &e) { +         +        throw loop_error( +          "statements cannot be fused together due to negative dependence"); +         +         +      } +      fused_nodes_counter++; +    } +  } +   +  return s; +} + +void Loop::setLexicalOrder(int dim, const std::set<int> &active, +                           int starting_order, std::vector<std::vector<std::string> > idxNames) { +  if (active.size() == 0) +    return; +   +  // check for sanity of parameters +  if (dim < 0 || dim % 2 != 0) +    throw std::invalid_argument( +      "invalid constant loop level to set lexicographical order"); +  std::vector<int> lex; +  int ref_stmt_num; +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { +    if ((*i) < 0 || (*i) >= stmt.size()) +      throw std::invalid_argument( +        "invalid statement number " + to_string(*i)); +    if (dim >= stmt[*i].xform.n_out()) +      throw std::invalid_argument( +        "invalid constant loop level to set lexicographical order"); +    if (i == active.begin()) { +      lex = getLexicalOrder(*i); +      ref_stmt_num = *i; +    } else { +      std::vector<int> lex2 = getLexicalOrder(*i); +      for (int j = 0; j < dim; j += 2) +        if (lex[j] != lex2[j]) +          throw std::invalid_argument( +            "statements are not in the same sub loop nest"); +    } +  } +   +  // sepearate statements by current loop level types +  int level = (dim + 2) / 2; +  std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type; +  std::set<int> active_by_no_level; +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { +    if (level > stmt[*i].loop_level.size()) +      active_by_no_level.insert(*i); +    else +      active_by_level_type[std::make_pair( +          stmt[*i].loop_level[level - 1].type, +          stmt[*i].loop_level[level - 1].payload)].insert(*i); +  } +   +  // further separate statements due to control dependences +  std::vector<std::set<int> > active_by_level_type_splitted; +  for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i = +         active_by_level_type.begin(); i != active_by_level_type.end(); i++) +    active_by_level_type_splitted.push_back(i->second); +  for (std::set<int>::iterator i = active_by_no_level.begin(); +       i != active_by_no_level.end(); i++) +    for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) { +      std::set<int> controlled, not_controlled; +      for (std::set<int>::iterator k = +             active_by_level_type_splitted[j].begin(); +           k != active_by_level_type_splitted[j].end(); k++) { +        std::vector<DependenceVector> dvs = dep.getEdge(*i, *k); +        bool is_controlled = false; +        for (int kk = 0; kk < dvs.size(); kk++) +          if (dvs[kk].type = DEP_CONTROL) { +            is_controlled = true; +            break; +          } +        if (is_controlled) +          controlled.insert(*k); +        else +          not_controlled.insert(*k); +      } +      if (controlled.size() != 0 && not_controlled.size() != 0) { +        active_by_level_type_splitted.erase( +          active_by_level_type_splitted.begin() + j); +        active_by_level_type_splitted.push_back(controlled); +        active_by_level_type_splitted.push_back(not_controlled); +      } +    } +   +  // set lexical order separating loops with different loop types first +  if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) { +    int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; +     +    Graph<std::set<int>, Empty> g; +    for (std::vector<std::set<int> >::iterator i = +           active_by_level_type_splitted.begin(); +         i != active_by_level_type_splitted.end(); i++) +      g.insert(*i); +    for (std::set<int>::iterator i = active_by_no_level.begin(); +         i != active_by_no_level.end(); i++) { +      std::set<int> t; +      t.insert(*i); +      g.insert(t); +    } +    for (int i = 0; i < g.vertex.size(); i++) +      for (int j = i + 1; j < g.vertex.size(); j++) { +        bool connected = false; +        for (std::set<int>::iterator ii = g.vertex[i].first.begin(); +             ii != g.vertex[i].first.end(); ii++) { +          for (std::set<int>::iterator jj = g.vertex[j].first.begin(); +               jj != g.vertex[j].first.end(); jj++) { +            std::vector<DependenceVector> dvs = dep.getEdge(*ii, +                                                            *jj); +            for (int k = 0; k < dvs.size(); k++) +              if (dvs[k].is_control_dependence() +                  || (dvs[k].is_data_dependence() +                      && !dvs[k].has_been_carried_before( +                        dep_dim))) { +                g.connect(i, j); +                connected = true; +                break; +              } +            if (connected) +              break; +          } +          if (connected) +            break; +        } +        connected = false; +        for (std::set<int>::iterator ii = g.vertex[i].first.begin(); +             ii != g.vertex[i].first.end(); ii++) { +          for (std::set<int>::iterator jj = g.vertex[j].first.begin(); +               jj != g.vertex[j].first.end(); jj++) { +            std::vector<DependenceVector> dvs = dep.getEdge(*jj, +                                                            *ii); +            // find the sub loop nest specified by stmt_num and level, +            // only iteration space satisfiable statements returned. +            for (int k = 0; k < dvs.size(); k++) +              if (dvs[k].is_control_dependence() +                  || (dvs[k].is_data_dependence() +                      && !dvs[k].has_been_carried_before( +                        dep_dim))) { +                g.connect(j, i); +                connected = true; +                break; +              } +            if (connected) +              break; +          } +          if (connected) +            break; +        } +      } +     +    std::vector<std::set<int> > s = g.topoSort(); +    if (s.size() != g.vertex.size()) +      throw loop_error( +        "cannot separate statements with different loop types at loop level " +        + to_string(level)); +     +    // assign lexical order +    int order = starting_order; +    for (int i = 0; i < s.size(); i++) { +      std::set<int> &cur_scc = g.vertex[*(s[i].begin())].first; +      int sz = cur_scc.size(); +      if (sz == 1) { +        int cur_stmt = *(cur_scc.begin()); +        assign_const(stmt[cur_stmt].xform, dim, order); +        for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) +          assign_const(stmt[cur_stmt].xform, j, 0); +        order++; +      } else { +        setLexicalOrder(dim, cur_scc, order, idxNames); +        order += sz; +      } +    } +  } +  // set lexical order seperating single iteration statements and loops +  else { +    std::set<int> true_singles; +    std::set<int> nonsingles; +    std::map<coef_t, std::set<int> > fake_singles; +    std::set<int> fake_singles_; +     +    // sort out statements that do not require loops +    for (std::set<int>::iterator i = active.begin(); i != active.end(); +         i++) { +      Relation cur_IS = getNewIS(*i); +      if (is_single_iteration(cur_IS, dim + 1)) { +        bool is_all_single = true; +        for (int j = dim + 3; j < stmt[*i].xform.n_out(); j += 2) +          if (!is_single_iteration(cur_IS, j)) { +            is_all_single = false; +            break; +          } +        if (is_all_single) +          true_singles.insert(*i); +        else { +          fake_singles_.insert(*i); +          try { +            fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert( +              *i); +          } catch (const std::exception &e) { +            fake_singles[posInfinity].insert(*i); +          } +        } +      } else +        nonsingles.insert(*i); +    } +     +     +    // split nonsingles forcibly according to negative dependences present (loop unfusible) +    int dep_dim = get_dep_dim_of(ref_stmt_num, level); +     +    if (dim < stmt[ref_stmt_num].xform.n_out() - 1) { +       +      bool dummy_level_found = false; +       +      std::vector<std::set<int> > s; +       +      s = sort_by_same_loops(active, level); +      bool further_levels_exist = false; +       +      if (!idxNames.empty()) +        if (level <= idxNames[ref_stmt_num].size()) +          if (idxNames[ref_stmt_num][level - 1].length() == 0) { +            //  && s.size() == 1) { +            int order1 = 0; +            dummy_level_found = true; +             +            for (int i = level; i < idxNames[ref_stmt_num].size(); +                 i++) +              if (idxNames[ref_stmt_num][i].length() > 0) +                further_levels_exist = true; +             +          } +       +      //if (!dummy_level_found) { +       +      if (s.size() > 1) { +         +        Graph<std::set<int>, bool> g = construct_induced_graph_at_level( +          s, dep, dep_dim); +        s = typed_fusion(g); +      } +      int order = 0; +      for (int i = 0; i < s.size(); i++) { +         +        for (std::set<int>::iterator it = s[i].begin(); +             it != s[i].end(); it++) +          assign_const(stmt[*it].xform, dim, order); +         +        if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1)) +          setLexicalOrder(dim + 2, s[i], order, idxNames); +         +        order++; +      } +      //} +      /*    else { +             +            int order1 = 0; +            int order = 0; +            for (std::set<int>::iterator i = active.begin(); +            i != active.end(); i++) { +            if (!further_levels_exist) +            assign_const(stmt[*i].xform, dim, order1++); +            else +            assign_const(stmt[*i].xform, dim, order1); +             +            } +             +            if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1) && further_levels_exist) +            setLexicalOrder(dim + 2, active, order, idxNames); +            } +      */ +    } else { +      int dummy_order = 0; +      for (std::set<int>::iterator i = active.begin(); i != active.end(); +           i++) +        assign_const(stmt[*i].xform, dim, dummy_order++); +    } +    /*for (int i = 0; i < g2.vertex.size(); i++) +      for (int j = i+1; j < g2.vertex.size(); j++) { +      std::vector<DependenceVector> dvs = dep.getEdge(g2.vertex[i].first, g2.vertex[j].first); +      for (int k = 0; k < dvs.size(); k++) +      if (dvs[k].is_control_dependence() || +      (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { +      g2.connect(i, j); +      break; +      } +      dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first); +      for (int k = 0; k < dvs.size(); k++) +      if (dvs[k].is_control_dependence() || +      (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { +      g2.connect(j, i); +      break; +      } +      } +       +      std::vector<std::set<int> > s2 = g2.packed_topoSort(); +       +      std::vector<std::set<int> > splitted_nonsingles; +      for (int i = 0; i < s2.size(); i++) { +      std::set<int> cur_scc; +      for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) +      cur_scc.insert(g2.vertex[*j].first); +      splitted_nonsingles.push_back(cur_scc); +      } +    */ +    //convert to dependence graph for grouped statements +    //dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; +    /*int order = 0; +      for (std::set<int>::iterator j = active.begin(); j != active.end(); +      j++) { +      std::set<int> continuous; +      std::cout<< active.size()<<std::endl; +      while (nonsingles.find(*j) != nonsingles.end() && j != active.end()) { +      continuous.insert(*j); +      j++; +      } +       +      printf("continuous size is %d\n", continuous.size()); +       +       +       +      if (continuous.size() > 0) { +      std::vector<std::set<int> > s = typed_fusion(continuous, dep, +      dep_dim); +       +      for (int i = 0; i < s.size(); i++) { +      for (std::set<int>::iterator l = s[i].begin(); +      l != s[i].end(); l++) { +      assign_const(stmt[*l].xform, dim + 2, order); +      setLexicalOrder(dim + 2, s[i]); +      } +      order++; +      } +      } +       +      if (j != active.end()) { +      assign_const(stmt[*j].xform, dim + 2, order); +       +      for (int k = dim + 4; k < stmt[*j].xform.n_out(); k += 2) +      assign_const(stmt[*j].xform, k, 0); +      order++; +      } +       +      if( j == active.end()) +      break; +      } +    */ +     +     +    // assign lexical order +    /*int order = starting_order; +      for (int i = 0; i < s.size(); i++) { +      // translate each SCC into original statements +      std::set<int> cur_scc; +      for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) +      copy(s[i].begin(), s[i].end(), +      inserter(cur_scc, cur_scc.begin())); +       +      // now assign the constant +      for (std::set<int>::iterator j = cur_scc.begin(); +      j != cur_scc.end(); j++) +      assign_const(stmt[*j].xform, dim, order); +       +      if (cur_scc.size() > 1) +      setLexicalOrder(dim + 2, cur_scc); +      else if (cur_scc.size() == 1) { +      int cur_stmt = *(cur_scc.begin()); +      for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) +      assign_const(stmt[cur_stmt].xform, j, 0); +      } +       +      if (cur_scc.size() > 0) +      order++; +      } +    */ +  } +} + +void Loop::apply_xform() { +  std::set<int> active; +  for (int i = 0; i < stmt.size(); i++) +    active.insert(i); +  apply_xform(active); +} + +void Loop::apply_xform(int stmt_num) { +  std::set<int> active; +  active.insert(stmt_num); +  apply_xform(active); +} + +void Loop::apply_xform(std::set<int> &active) { +  int max_n = 0; +   +  CG_outputBuilder *ocg = ir->builder(); +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { +    int n = stmt[*i].loop_level.size(); +    if (n > max_n) +      max_n = n; +     +    std::vector<int> lex = getLexicalOrder(*i); +     +    Relation mapping(2 * n + 1, n); +    F_And *f_root = mapping.add_and(); +    for (int j = 1; j <= n; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(j), 1); +      h.update_coef(mapping.input_var(2 * j), -1); +    } +    mapping = Composition(mapping, stmt[*i].xform); +    mapping.simplify(); +     +    // match omega input/output variables to variable names in the code +    for (int j = 1; j <= stmt[*i].IS.n_set(); j++) +      mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); +    for (int j = 1; j <= n; j++) +      mapping.name_output_var(j, +                              tmp_loop_var_name_prefix +                              + to_string(tmp_loop_var_name_counter + j - 1)); +    mapping.setup_names(); +     +    Relation known = Extend_Set(copy(this->known), +                                mapping.n_out() - this->known.n_set()); +    //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL)); +    std::vector<std::string> loop_vars; +    for (int j = 1; j <= stmt[*i].IS.n_set(); j++) +      loop_vars.push_back(stmt[*i].IS.set_var(j)->name()); +    std::vector<CG_outputRepr *> subs = output_substitutions(ocg, +                                                             Inverse(copy(mapping)), +                                                             std::vector<std::pair<CG_outputRepr *, int> >(mapping.n_out(), +                                                                                                           std::make_pair(static_cast<CG_outputRepr *>(NULL), 0))); +    stmt[*i].code = ocg->CreateSubstitutedStmt(0, stmt[*i].code, loop_vars, +                                               subs); +    stmt[*i].IS = Range(Restrict_Domain(mapping, stmt[*i].IS)); +    stmt[*i].IS.simplify(); +     +    // replace original transformation relation with straight 1-1 mapping +    mapping = Relation(n, 2 * n + 1); +    f_root = mapping.add_and(); +    for (int j = 1; j <= n; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(2 * j), 1); +      h.update_coef(mapping.input_var(j), -1); +    } +    for (int j = 1; j <= 2 * n + 1; j += 2) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(j), 1); +      h.update_const(-lex[j - 1]); +    } +    stmt[*i].xform = mapping; +  } +   +  tmp_loop_var_name_counter += max_n; +} + +void Loop::addKnown(const Relation &cond) { +   +  // invalidate saved codegen computation +  delete last_compute_cgr_; +  last_compute_cgr_ = NULL; +  delete last_compute_cg_; +  last_compute_cg_ = NULL; +   +  int n1 = this->known.n_set(); +   +  Relation r = copy(cond); +  int n2 = r.n_set(); +   +  if (n1 < n2) +    this->known = Extend_Set(this->known, n2 - n1); +  else if (n1 > n2) +    r = Extend_Set(r, n1 - n2); +   +  this->known = Intersection(this->known, r); +} + +void Loop::removeDependence(int stmt_num_from, int stmt_num_to) { +  // check for sanity of parameters +  if (stmt_num_from >= stmt.size()) +    throw std::invalid_argument( +      "invalid statement number " + to_string(stmt_num_from)); +  if (stmt_num_to >= stmt.size()) +    throw std::invalid_argument( +      "invalid statement number " + to_string(stmt_num_to)); +   +  dep.disconnect(stmt_num_from, stmt_num_to); +} + +void Loop::dump() const { +  for (int i = 0; i < stmt.size(); i++) { +    std::vector<int> lex = getLexicalOrder(i); +    std::cout << "s" << i + 1 << ": "; +    for (int j = 0; j < stmt[i].loop_level.size(); j++) { +      if (2 * j < lex.size()) +        std::cout << lex[2 * j]; +      switch (stmt[i].loop_level[j].type) { +      case LoopLevelOriginal: +        std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; +        break; +      case LoopLevelTile: +        std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; +        break; +      default: +        std::cout << "(unknown)"; +      } +      std::cout << ' '; +    } +    for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { +      std::cout << lex[j]; +      if (j != lex.size() - 1) +        std::cout << ' '; +    } +    std::cout << std::endl; +  } +} + +bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { +  if (stmt.size() == 0) +    return true; +   +  // check for sanity of parameters +  for (int i = 0; i < stmt.size(); i++) { +    if (stmt[i].loop_level.size() != num_dep_dim) +      throw std::invalid_argument( +        "nonsingular loop transformations must be applied to original perfect loop nest"); +    for (int j = 0; j < stmt[i].loop_level.size(); j++) +      if (stmt[i].loop_level[j].type != LoopLevelOriginal) +        throw std::invalid_argument( +          "nonsingular loop transformations must be applied to original perfect loop nest"); +  } +  if (T.size() != num_dep_dim) +    throw std::invalid_argument("invalid transformation matrix"); +  for (int i = 0; i < stmt.size(); i++) +    if (T[i].size() != num_dep_dim + 1 && T[i].size() != num_dep_dim) +      throw std::invalid_argument("invalid transformation matrix"); +  // invalidate saved codegen computation +  delete last_compute_cgr_; +  last_compute_cgr_ = NULL; +  delete last_compute_cg_; +  last_compute_cg_ = NULL; +  // build relation from matrix +  Relation mapping(2 * num_dep_dim + 1, 2 * num_dep_dim + 1); +  F_And *f_root = mapping.add_and(); +  for (int i = 0; i < num_dep_dim; i++) { +    EQ_Handle h = f_root->add_EQ(); +    h.update_coef(mapping.output_var(2 * (i + 1)), -1); +    for (int j = 0; j < num_dep_dim; j++) +      if (T[i][j] != 0) +        h.update_coef(mapping.input_var(2 * (j + 1)), T[i][j]); +    if (T[i].size() == num_dep_dim + 1) +      h.update_const(T[i][num_dep_dim]); +  } +  for (int i = 1; i <= 2 * num_dep_dim + 1; i += 2) { +    EQ_Handle h = f_root->add_EQ(); +    h.update_coef(mapping.output_var(i), -1); +    h.update_coef(mapping.input_var(i), 1); +  } +   +  // update transformation relations +  for (int i = 0; i < stmt.size(); i++) +    stmt[i].xform = Composition(copy(mapping), stmt[i].xform); +   +  // update dependence graph +  for (int i = 0; i < dep.vertex.size(); i++) +    for (DependenceGraph::EdgeList::iterator j = +           dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); +         j++) { +      std::vector<DependenceVector> dvs = j->second; +      for (int k = 0; k < dvs.size(); k++) { +        DependenceVector &dv = dvs[k]; +        switch (dv.type) { +        case DEP_W2R: +        case DEP_R2W: +        case DEP_W2W: +        case DEP_R2R: { +          std::vector<coef_t> lbounds(num_dep_dim), ubounds( +            num_dep_dim); +          for (int p = 0; p < num_dep_dim; p++) { +            coef_t lb = 0; +            coef_t ub = 0; +            for (int q = 0; q < num_dep_dim; q++) { +              if (T[p][q] > 0) { +                if (lb == -posInfinity +                    || dv.lbounds[q] == -posInfinity) +                  lb = -posInfinity; +                else +                  lb += T[p][q] * dv.lbounds[q]; +                if (ub == posInfinity +                    || dv.ubounds[q] == posInfinity) +                  ub = posInfinity; +                else +                  ub += T[p][q] * dv.ubounds[q]; +              } else if (T[p][q] < 0) { +                if (lb == -posInfinity +                    || dv.ubounds[q] == posInfinity) +                  lb = -posInfinity; +                else +                  lb += T[p][q] * dv.ubounds[q]; +                if (ub == posInfinity +                    || dv.lbounds[q] == -posInfinity) +                  ub = posInfinity; +                else +                  ub += T[p][q] * dv.lbounds[q]; +              } +            } +            if (T[p].size() == num_dep_dim + 1) { +              if (lb != -posInfinity) +                lb += T[p][num_dep_dim]; +              if (ub != posInfinity) +                ub += T[p][num_dep_dim]; +            } +            lbounds[p] = lb; +            ubounds[p] = ub; +          } +          dv.lbounds = lbounds; +          dv.ubounds = ubounds; +           +          break; +        } +        default: +          ; +        } +      } +      j->second = dvs; +    } +   +  // set constant loop values +  std::set<int> active; +  for (int i = 0; i < stmt.size(); i++) +    active.insert(i); +  setLexicalOrder(0, active); +   +  return true; +} + + +bool Loop::is_dependence_valid_based_on_lex_order(int i, int j, +                                                  const DependenceVector &dv, bool before) { +  std::vector<int> lex_i = getLexicalOrder(i); +  std::vector<int> lex_j = getLexicalOrder(j); +  int last_dim; +  if (!dv.is_scalar_dependence) { +    for (last_dim = 0; +         last_dim < lex_i.size() && (lex_i[last_dim] == lex_j[last_dim]); +         last_dim++) +      ; +    last_dim = last_dim / 2; +    if (last_dim == 0) +      return true; +     +    for (int i = 0; i < last_dim; i++) { +      if (dv.lbounds[i] > 0) +        return true; +      else if (dv.lbounds[i] < 0) +        return false; +    } +  } +  if (before) +    return true; +   +  return false; +   +} + diff --git a/src/loop_basic.cc b/src/loop_basic.cc new file mode 100644 index 0000000..f5234b9 --- /dev/null +++ b/src/loop_basic.cc @@ -0,0 +1,1538 @@ +/* + * loop_basic.cc + * + *  Created on: Nov 12, 2012 + *      Author: anand + */ + +#include "loop.hh" +#include "chill_error.hh" +#include <omega.h> +#include "omegatools.hh" +#include <string.h> + +using namespace omega; + +void Loop::permute(const std::vector<int> &pi) { +  std::set<int> active; +  for (int i = 0; i < stmt.size(); i++) +    active.insert(i); +   +  permute(active, pi); +} + +void Loop::original() { +  std::set<int> active; +  for (int i = 0; i < stmt.size(); i++) +    active.insert(i); +  setLexicalOrder(0, active); +} +void Loop::permute(int stmt_num, int level, const std::vector<int> &pi) { +  // check for sanity of parameters +  int starting_order; +  if (stmt_num < 0 || stmt_num >= stmt.size()) +    throw std::invalid_argument( +      "invalid statement number " + to_string(stmt_num)); +  std::set<int> active; +  if (level < 0 || level > stmt[stmt_num].loop_level.size()) +    throw std::invalid_argument("invalid loop level " + to_string(level)); +  else if (level == 0) { +    for (int i = 0; i < stmt.size(); i++) +      active.insert(i); +    level = 1; +    starting_order = 0; +  } else { +    std::vector<int> lex = getLexicalOrder(stmt_num); +    active = getStatements(lex, 2 * level - 2); +    starting_order = lex[2 * level - 2]; +    lex[2 * level - 2]++; +    shiftLexicalOrder(lex, 2 * level - 2, active.size() - 1); +  } +  std::vector<int> pi_inverse(pi.size(), 0); +  for (int i = 0; i < pi.size(); i++) { +    if (pi[i] >= level + pi.size() || pi[i] < level +        || pi_inverse[pi[i] - level] != 0) +      throw std::invalid_argument("invalid permuation"); +    pi_inverse[pi[i] - level] = level + i; +  } +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) +    if (level + pi.size() - 1 > stmt[*i].loop_level.size()) +      throw std::invalid_argument( +        "invalid permutation for statement " + to_string(*i)); +   +  // invalidate saved codegen computation +  delete last_compute_cgr_; +  last_compute_cgr_ = NULL; +  delete last_compute_cg_; +  last_compute_cg_ = NULL; +   +  // Update transformation relations +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { +    int n = stmt[*i].xform.n_out(); +    Relation mapping(n, n); +    F_And *f_root = mapping.add_and(); +    for (int j = 1; j <= 2 * level - 2; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(j), 1); +      h.update_coef(mapping.input_var(j), -1); +    } +    for (int j = level; j <= level + pi.size() - 1; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(2 * j), 1); +      h.update_coef(mapping.input_var(2 * pi[j - level]), -1); +    } +    for (int j = level; j <= level + pi.size() - 1; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(2 * j - 1), 1); +      h.update_coef(mapping.input_var(2 * j - 1), -1); +    } +    for (int j = 2 * (level + pi.size() - 1) + 1; j <= n; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(j), 1); +      h.update_coef(mapping.input_var(j), -1); +    } +    stmt[*i].xform = Composition(mapping, stmt[*i].xform); +    stmt[*i].xform.simplify(); +  } +   +  // get the permuation for dependence vectors +  std::vector<int> t; +  for (int i = 0; i < pi.size(); i++) +    if (stmt[stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal) +      t.push_back(stmt[stmt_num].loop_level[pi[i] - 1].payload); +  int max_dep_dim = -1; +  int min_dep_dim = dep.num_dim(); +  for (int i = 0; i < t.size(); i++) { +    if (t[i] > max_dep_dim) +      max_dep_dim = t[i]; +    if (t[i] < min_dep_dim) +      min_dep_dim = t[i]; +  } +  if (min_dep_dim > max_dep_dim) +    return; +  if (max_dep_dim - min_dep_dim + 1 != t.size()) +    throw loop_error("cannot update the dependence graph after permuation"); +  std::vector<int> dep_pi(dep.num_dim()); +  for (int i = 0; i < min_dep_dim; i++) +    dep_pi[i] = i; +  for (int i = min_dep_dim; i <= max_dep_dim; i++) +    dep_pi[i] = t[i - min_dep_dim]; +  for (int i = max_dep_dim + 1; i < dep.num_dim(); i++) +    dep_pi[i] = i; +   +  dep.permute(dep_pi, active); +   +  // update the dependence graph +  DependenceGraph g(dep.num_dim()); +  for (int i = 0; i < dep.vertex.size(); i++) +    g.insert(); +  for (int i = 0; i < dep.vertex.size(); i++) +    for (DependenceGraph::EdgeList::iterator j = +           dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); +         j++) { +      if ((active.find(i) != active.end() +           && active.find(j->first) != active.end())) { +        std::vector<DependenceVector> dv = j->second; +        for (int k = 0; k < dv.size(); k++) { +          switch (dv[k].type) { +          case DEP_W2R: +          case DEP_R2W: +          case DEP_W2W: +          case DEP_R2R: { +            std::vector<coef_t> lbounds(dep.num_dim()); +            std::vector<coef_t> ubounds(dep.num_dim()); +            for (int d = 0; d < dep.num_dim(); d++) { +              lbounds[d] = dv[k].lbounds[dep_pi[d]]; +              ubounds[d] = dv[k].ubounds[dep_pi[d]]; +            } +            dv[k].lbounds = lbounds; +            dv[k].ubounds = ubounds; +            break; +          } +          case DEP_CONTROL: { +            break; +          } +          default: +            throw loop_error("unknown dependence type"); +          } +        } +        g.connect(i, j->first, dv); +      } else if (active.find(i) == active.end() +                 && active.find(j->first) == active.end()) { +        std::vector<DependenceVector> dv = j->second; +        g.connect(i, j->first, dv); +      } else { +        std::vector<DependenceVector> dv = j->second; +        for (int k = 0; k < dv.size(); k++) +          switch (dv[k].type) { +          case DEP_W2R: +          case DEP_R2W: +          case DEP_W2W: +          case DEP_R2R: { +            for (int d = 0; d < dep.num_dim(); d++) +              if (dep_pi[d] != d) { +                dv[k].lbounds[d] = -posInfinity; +                dv[k].ubounds[d] = posInfinity; +              } +            break; +          } +          case DEP_CONTROL: +            break; +          default: +            throw loop_error("unknown dependence type"); +          } +        g.connect(i, j->first, dv); +      } +    } +  dep = g; +   +  // update loop level information +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { +    int cur_dep_dim = min_dep_dim; +    std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size()); +    for (int j = 1; j <= stmt[*i].loop_level.size(); j++) +      if (j >= level && j < level + pi.size()) { +        switch (stmt[*i].loop_level[pi_inverse[j - level] - 1].type) { +        case LoopLevelOriginal: +          new_loop_level[j - 1].type = LoopLevelOriginal; +          new_loop_level[j - 1].payload = cur_dep_dim++; +          new_loop_level[j - 1].parallel_level = +            stmt[*i].loop_level[pi_inverse[j - level] - 1].parallel_level; +          break; +        case LoopLevelTile: { +          new_loop_level[j - 1].type = LoopLevelTile; +          int ref_level = stmt[*i].loop_level[pi_inverse[j - level] +                                              - 1].payload; +          if (ref_level >= level && ref_level < level + pi.size()) +            new_loop_level[j - 1].payload = pi_inverse[ref_level +                                                       - level]; +          else +            new_loop_level[j - 1].payload = ref_level; +          new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j +                                                                     - 1].parallel_level; +          break; +        } +        default: +          throw loop_error( +            "unknown loop level information for statement " +            + to_string(*i)); +        } +      } else { +        switch (stmt[*i].loop_level[j - 1].type) { +        case LoopLevelOriginal: +          new_loop_level[j - 1].type = LoopLevelOriginal; +          new_loop_level[j - 1].payload = +            stmt[*i].loop_level[j - 1].payload; +          new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j +                                                                     - 1].parallel_level; +          break; +        case LoopLevelTile: { +          new_loop_level[j - 1].type = LoopLevelTile; +          int ref_level = stmt[*i].loop_level[j - 1].payload; +          if (ref_level >= level && ref_level < level + pi.size()) +            new_loop_level[j - 1].payload = pi_inverse[ref_level +                                                       - level]; +          else +            new_loop_level[j - 1].payload = ref_level; +          new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j +                                                                     - 1].parallel_level; +          break; +        } +        default: +          throw loop_error( +            "unknown loop level information for statement " +            + to_string(*i)); +        } +      } +    stmt[*i].loop_level = new_loop_level; +  } +   +  setLexicalOrder(2 * level - 2, active, starting_order); +} +void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) { +  if (active.size() == 0 || pi.size() == 0) +    return; +   +  // check for sanity of parameters +  int level = pi[0]; +  for (int i = 1; i < pi.size(); i++) +    if (pi[i] < level) +      level = pi[i]; +  if (level < 1) +    throw std::invalid_argument("invalid permuation"); +  std::vector<int> reverse_pi(pi.size(), 0); +  for (int i = 0; i < pi.size(); i++) +    if (pi[i] >= level + pi.size()) +      throw std::invalid_argument("invalid permutation"); +    else +      reverse_pi[pi[i] - level] = i + level; +  for (int i = 0; i < reverse_pi.size(); i++) +    if (reverse_pi[i] == 0) +      throw std::invalid_argument("invalid permuation"); +  int ref_stmt_num; +  std::vector<int> lex; +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { +    if (*i < 0 || *i >= stmt.size()) +      throw std::invalid_argument("invalid statement " + to_string(*i)); +    if (i == active.begin()) { +      ref_stmt_num = *i; +      lex = getLexicalOrder(*i); +    } else { +      if (level + pi.size() - 1 > stmt[*i].loop_level.size()) +        throw std::invalid_argument("invalid permuation"); +      std::vector<int> lex2 = getLexicalOrder(*i); +      for (int j = 0; j < 2 * level - 3; j += 2) +        if (lex[j] != lex2[j]) +          throw std::invalid_argument( +            "statements to permute must be in the same subloop"); +      for (int j = 0; j < pi.size(); j++) +        if (!(stmt[*i].loop_level[level + j - 1].type +              == stmt[ref_stmt_num].loop_level[level + j - 1].type +              && stmt[*i].loop_level[level + j - 1].payload +              == stmt[ref_stmt_num].loop_level[level + j - 1].payload)) +          throw std::invalid_argument( +            "permuted loops must have the same loop level types"); +    } +  } +  // invalidate saved codegen computation +  delete last_compute_cgr_; +  last_compute_cgr_ = NULL; +  delete last_compute_cg_; +  last_compute_cg_ = NULL; +   +  // Update transformation relations +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { +    int n = stmt[*i].xform.n_out(); +    Relation mapping(n, n); +    F_And *f_root = mapping.add_and(); +    for (int j = 1; j <= n; j += 2) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(j), 1); +      h.update_coef(mapping.input_var(j), -1); +    } +    for (int j = 0; j < pi.size(); j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(2 * (level + j)), 1); +      h.update_coef(mapping.input_var(2 * pi[j]), -1); +    } +    for (int j = 1; j < level; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(2 * j), 1); +      h.update_coef(mapping.input_var(2 * j), -1); +    } +    for (int j = level + pi.size(); j <= stmt[*i].loop_level.size(); j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(2 * j), 1); +      h.update_coef(mapping.input_var(2 * j), -1); +    } +     +    stmt[*i].xform = Composition(mapping, stmt[*i].xform); +    stmt[*i].xform.simplify(); +  } +   +  // get the permuation for dependence vectors +  std::vector<int> t; +  for (int i = 0; i < pi.size(); i++) +    if (stmt[ref_stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal) +      t.push_back(stmt[ref_stmt_num].loop_level[pi[i] - 1].payload); +  int max_dep_dim = -1; +  int min_dep_dim = num_dep_dim; +  for (int i = 0; i < t.size(); i++) { +    if (t[i] > max_dep_dim) +      max_dep_dim = t[i]; +    if (t[i] < min_dep_dim) +      min_dep_dim = t[i]; +  } +  if (min_dep_dim > max_dep_dim) +    return; +  if (max_dep_dim - min_dep_dim + 1 != t.size()) +    throw loop_error("cannot update the dependence graph after permuation"); +  std::vector<int> dep_pi(num_dep_dim); +  for (int i = 0; i < min_dep_dim; i++) +    dep_pi[i] = i; +  for (int i = min_dep_dim; i <= max_dep_dim; i++) +    dep_pi[i] = t[i - min_dep_dim]; +  for (int i = max_dep_dim + 1; i < num_dep_dim; i++) +    dep_pi[i] = i; +   +  dep.permute(dep_pi, active); +   +  // update the dependence graph +  DependenceGraph g(dep.num_dim()); +  for (int i = 0; i < dep.vertex.size(); i++) +    g.insert(); +  for (int i = 0; i < dep.vertex.size(); i++) +    for (DependenceGraph::EdgeList::iterator j = +           dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); +         j++) {         // +      if ((active.find(i) != active.end() +           && active.find(j->first) != active.end())) { +        std::vector<DependenceVector> dv = j->second; +        for (int k = 0; k < dv.size(); k++) { +          switch (dv[k].type) { +          case DEP_W2R: +          case DEP_R2W: +          case DEP_W2W: +          case DEP_R2R: { +            std::vector<coef_t> lbounds(num_dep_dim); +            std::vector<coef_t> ubounds(num_dep_dim); +            for (int d = 0; d < num_dep_dim; d++) { +              lbounds[d] = dv[k].lbounds[dep_pi[d]]; +              ubounds[d] = dv[k].ubounds[dep_pi[d]]; +            } +            dv[k].lbounds = lbounds; +            dv[k].ubounds = ubounds; +            break; +          } +          case DEP_CONTROL: { +            break; +          } +          default: +            throw loop_error("unknown dependence type"); +          } +        } +        g.connect(i, j->first, dv); +      } else if (active.find(i) == active.end() +                 && active.find(j->first) == active.end()) { +        std::vector<DependenceVector> dv = j->second; +        g.connect(i, j->first, dv); +      } else { +        std::vector<DependenceVector> dv = j->second; +        for (int k = 0; k < dv.size(); k++) +          switch (dv[k].type) { +          case DEP_W2R: +          case DEP_R2W: +          case DEP_W2W: +          case DEP_R2R: { +            for (int d = 0; d < num_dep_dim; d++) +              if (dep_pi[d] != d) { +                dv[k].lbounds[d] = -posInfinity; +                dv[k].ubounds[d] = posInfinity; +              } +            break; +          } +          case DEP_CONTROL: +            break; +          default: +            throw loop_error("unknown dependence type"); +          } +        g.connect(i, j->first, dv); +      } +    } +  dep = g; +   +  // update loop level information +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { +    int cur_dep_dim = min_dep_dim; +    std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size()); +    for (int j = 1; j <= stmt[*i].loop_level.size(); j++) +      if (j >= level && j < level + pi.size()) { +        switch (stmt[*i].loop_level[reverse_pi[j - level] - 1].type) { +        case LoopLevelOriginal: +          new_loop_level[j - 1].type = LoopLevelOriginal; +          new_loop_level[j - 1].payload = cur_dep_dim++; +          new_loop_level[j - 1].parallel_level = +            stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; +          break; +        case LoopLevelTile: { +          new_loop_level[j - 1].type = LoopLevelTile; +          int ref_level = stmt[*i].loop_level[reverse_pi[j - level] +                                              - 1].payload; +          if (ref_level >= level && ref_level < level + pi.size()) +            new_loop_level[j - 1].payload = reverse_pi[ref_level +                                                       - level]; +          else +            new_loop_level[j - 1].payload = ref_level; +          new_loop_level[j - 1].parallel_level = +            stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; +          break; +        } +        default: +          throw loop_error( +            "unknown loop level information for statement " +            + to_string(*i)); +        } +      } else { +        switch (stmt[*i].loop_level[j - 1].type) { +        case LoopLevelOriginal: +          new_loop_level[j - 1].type = LoopLevelOriginal; +          new_loop_level[j - 1].payload = +            stmt[*i].loop_level[j - 1].payload; +          new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j +                                                                     - 1].parallel_level; +          break; +        case LoopLevelTile: { +          new_loop_level[j - 1].type = LoopLevelTile; +          int ref_level = stmt[*i].loop_level[j - 1].payload; +          if (ref_level >= level && ref_level < level + pi.size()) +            new_loop_level[j - 1].payload = reverse_pi[ref_level +                                                       - level]; +          else +            new_loop_level[j - 1].payload = ref_level; +          new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j +                                                                     - 1].parallel_level; +          break; +        } +        default: +          throw loop_error( +            "unknown loop level information for statement " +            + to_string(*i)); +        } +      } +    stmt[*i].loop_level = new_loop_level; +  } +   +  setLexicalOrder(2 * level - 2, active); +} + +std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) { +  // check for sanity of parameters +  if (stmt_num < 0 || stmt_num >= stmt.size()) +    throw std::invalid_argument("invalid statement " + to_string(stmt_num)); +  if (level <= 0 || level > stmt[stmt_num].loop_level.size()) +    throw std::invalid_argument("invalid loop level " + to_string(level)); +   +  std::set<int> result; +  int dim = 2 * level - 1; +  std::vector<int> lex = getLexicalOrder(stmt_num); +  std::set<int> same_loop = getStatements(lex, dim - 1); +   +  Relation cond2 = copy(cond); +  cond2.simplify(); +  cond2 = EQs_to_GEQs(cond2); +  Conjunct *c = cond2.single_conjunct(); +  int cur_lex = lex[dim - 1]; +   +  for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { +    int max_level = (*gi).max_tuple_pos(); +    Relation single_cond(max_level); +    single_cond.and_with_GEQ(*gi); +     +    // TODO: should decide where to place newly created statements with +    // complementary split condition from dependence graph. +    bool place_after; +    if (max_level == 0) +      place_after = true; +    else if ((*gi).get_coef(cond2.set_var(max_level)) < 0) +      place_after = true; +    else +      place_after = false; +     +    bool temp_place_after;      // = place_after; +    bool assigned = false; +    int part1_to_part2; +    int part2_to_part1; +    // original statements with split condition, +    // new statements with complement of split condition +    int old_num_stmt = stmt.size(); +    std::map<int, int> what_stmt_num; +    apply_xform(same_loop); +    for (std::set<int>::iterator i = same_loop.begin(); +         i != same_loop.end(); i++) { +      int n = stmt[*i].IS.n_set(); +      Relation part1, part2; +      if (max_level > n) { +        part1 = copy(stmt[*i].IS); +        part2 = Relation::False(0); +      } else { +        part1 = Intersection(copy(stmt[*i].IS), +                             Extend_Set(copy(single_cond), n - max_level)); +        part2 = Intersection(copy(stmt[*i].IS), +                             Extend_Set(Complement(copy(single_cond)), +                                        n - max_level)); +      } +       +      //split dependence check +       +      if (max_level > level) { +         +        DNF_Iterator di1(stmt[*i].IS.query_DNF()); +        DNF_Iterator di2(part1.query_DNF()); +        for (; di1 && di2; di1++, di2++) { +          //printf("In next conjunct,\n"); +          EQ_Iterator ei1 = (*di1)->EQs(); +          EQ_Iterator ei2 = (*di2)->EQs(); +          for (; ei1 && ei2; ei1++, ei2++) { +            //printf(" In next equality constraint,\n"); +            Constr_Vars_Iter cvi1(*ei1); +            Constr_Vars_Iter cvi2(*ei2); +            int dimension = (*cvi1).var->get_position(); +            int same = 0; +            bool identical = false; +            if (identical = !strcmp((*cvi1).var->char_name(), +                                    (*cvi2).var->char_name())) { +               +              for (; cvi1 && cvi2; cvi1++, cvi2++) { +                 +                if (((*cvi1).coef != (*cvi2).coef +                     || (*ei1).get_const() +                     != (*ei2).get_const()) +                    || (strcmp((*cvi1).var->char_name(), +                               (*cvi2).var->char_name()))) { +                   +                  same++; +                } +              } +            } +            if ((same != 0) || !identical) { +               +              dimension = dimension - 1; +               +              while (stmt[*i].loop_level[dimension].type +                     == LoopLevelTile) +                dimension = +                  stmt[*i].loop_level[dimension].payload; +               +              dimension = stmt[*i].loop_level[dimension].payload; +               +              for (int i = 0; i < stmt.size(); i++) { +                std::vector<std::pair<int, DependenceVector> > D; +                for (DependenceGraph::EdgeList::iterator j = +                       dep.vertex[i].second.begin(); +                     j != dep.vertex[i].second.end(); j++) { +                  for (int k = 0; k < j->second.size(); k++) { +                    DependenceVector dv = j->second[k]; +                    if (dv.type != DEP_CONTROL) +                      if (dv.hasNegative(dimension) +                          && !dv.quasi) +                        throw loop_error( +                          "loop error: Split is illegal, dependence violation!"); +                     +                  } +                } +              } +               +            } +             +            GEQ_Iterator gi1 = (*di1)->GEQs(); +            GEQ_Iterator gi2 = (*di2)->GEQs(); +             +            for (; gi1 && gi2; gi++, gi2++) { +               +              Constr_Vars_Iter cvi1(*gi1); +              Constr_Vars_Iter cvi2(*gi2); +              int dimension = (*cvi1).var->get_position(); +              int same = 0; +              bool identical = false; +              if (identical = !strcmp((*cvi1).var->char_name(), +                                      (*cvi2).var->char_name())) { +                 +                for (; cvi1 && cvi2; cvi1++, cvi2++) { +                   +                  if (((*cvi1).coef != (*cvi2).coef +                       || (*gi1).get_const() +                       != (*gi2).get_const()) +                      || (strcmp((*cvi1).var->char_name(), +                                 (*cvi2).var->char_name()))) { +                     +                    same++; +                  } +                } +              } +              if ((same != 0) || !identical) { +                dimension = dimension - 1; +                 +                while (stmt[*i].loop_level[dimension].type +                       == LoopLevelTile) +                  stmt[*i].loop_level[dimension].payload; +                 +                dimension = +                  stmt[*i].loop_level[dimension].payload; +                 +                for (int i = 0; i < stmt.size(); i++) { +                  std::vector<std::pair<int, DependenceVector> > D; +                  for (DependenceGraph::EdgeList::iterator j = +                         dep.vertex[i].second.begin(); +                       j != dep.vertex[i].second.end(); +                       j++) { +                    for (int k = 0; k < j->second.size(); +                         k++) { +                      DependenceVector dv = j->second[k]; +                      if (dv.type != DEP_CONTROL) +                        if (dv.hasNegative(dimension) +                            && !dv.quasi) +                           +                          throw loop_error( +                            "loop error: Split is illegal, dependence violation!"); +                       +                    } +                  } +                } +                 +              } +               +            } +             +          } +           +        } +         +        DNF_Iterator di3(stmt[*i].IS.query_DNF()); +        DNF_Iterator di4(part2.query_DNF());        // +        for (; di3 && di4; di3++, di4++) { +          EQ_Iterator ei1 = (*di3)->EQs(); +          EQ_Iterator ei2 = (*di4)->EQs(); +          for (; ei1 && ei2; ei1++, ei2++) { +            Constr_Vars_Iter cvi1(*ei1); +            Constr_Vars_Iter cvi2(*ei2); +            int dimension = (*cvi1).var->get_position(); +            int same = 0; +            bool identical = false; +            if (identical = !strcmp((*cvi1).var->char_name(), +                                    (*cvi2).var->char_name())) { +               +              for (; cvi1 && cvi2; cvi1++, cvi2++) { +                 +                if (((*cvi1).coef != (*cvi2).coef +                     || (*ei1).get_const() +                     != (*ei2).get_const()) +                    || (strcmp((*cvi1).var->char_name(), +                               (*cvi2).var->char_name()))) { +                   +                  same++; +                } +              } +            } +            if ((same != 0) || !identical) { +              dimension = dimension - 1; +               +              while (stmt[*i].loop_level[dimension].type +                     == LoopLevelTile) +                stmt[*i].loop_level[dimension].payload; +               +              dimension = stmt[*i].loop_level[dimension].payload; +               +              for (int i = 0; i < stmt.size(); i++) { +                std::vector<std::pair<int, DependenceVector> > D; +                for (DependenceGraph::EdgeList::iterator j = +                       dep.vertex[i].second.begin(); +                     j != dep.vertex[i].second.end(); j++) { +                  for (int k = 0; k < j->second.size(); k++) { +                    DependenceVector dv = j->second[k]; +                    if (dv.type != DEP_CONTROL) +                      if (dv.hasNegative(dimension) +                          && !dv.quasi) +                         +                        throw loop_error( +                          "loop error: Split is illegal, dependence violation!"); +                     +                  } +                } +              } +               +            } +             +          } +          GEQ_Iterator gi1 = (*di3)->GEQs(); +          GEQ_Iterator gi2 = (*di4)->GEQs(); +           +          for (; gi1 && gi2; gi++, gi2++) { +            Constr_Vars_Iter cvi1(*gi1); +            Constr_Vars_Iter cvi2(*gi2); +            int dimension = (*cvi1).var->get_position(); +            int same = 0; +            bool identical = false; +            if (identical = !strcmp((*cvi1).var->char_name(), +                                    (*cvi2).var->char_name())) { +               +              for (; cvi1 && cvi2; cvi1++, cvi2++) { +                 +                if (((*cvi1).coef != (*cvi2).coef +                     || (*gi1).get_const() +                     != (*gi2).get_const()) +                    || (strcmp((*cvi1).var->char_name(), +                               (*cvi2).var->char_name()))) { +                   +                  same++; +                } +              } +            } +            if ((same != 0) || !identical) { +              dimension = dimension - 1; +               +              while (stmt[*i].loop_level[dimension].type        // +                     == LoopLevelTile) +                stmt[*i].loop_level[dimension].payload; +               +              dimension = stmt[*i].loop_level[dimension].payload; +               +              for (int i = 0; i < stmt.size(); i++) { +                std::vector<std::pair<int, DependenceVector> > D; +                for (DependenceGraph::EdgeList::iterator j = +                       dep.vertex[i].second.begin(); +                     j != dep.vertex[i].second.end(); j++) { +                  for (int k = 0; k < j->second.size(); k++) { +                    DependenceVector dv = j->second[k]; +                    if (dv.type != DEP_CONTROL) +                      if (dv.hasNegative(dimension) +                          && !dv.quasi) +                         +                        throw loop_error( +                          "loop error: Split is illegal, dependence violation!"); +                     +                  } +                } +              } +               +            } +             +          } +           +        } +         +      } +       +      stmt[*i].IS = part1; +       +      if (Intersection(copy(part2), +                       Extend_Set(copy(this->known), n - this->known.n_set())).is_upper_bound_satisfiable()) { +        Statement new_stmt; +        new_stmt.code = stmt[*i].code->clone(); +        new_stmt.IS = part2; +        new_stmt.xform = copy(stmt[*i].xform); +        new_stmt.ir_stmt_node = NULL; +        new_stmt.loop_level = stmt[*i].loop_level; +         +        stmt_nesting_level_.push_back(stmt_nesting_level_[*i]); +         +        /*std::pair<std::vector<DependenceVector>, +          std::vector<DependenceVector> > dv = +          test_data_dependences(ir, stmt[*i].code, part1, +          stmt[*i].code, part2, freevar, index, +          stmt_nesting_level_[*i], +          stmt_nesting_level_[stmt.size() - 1]); +           +           +           +           +          for (int k = 0; k < dv.first.size(); k++) +          part1_to_part2++; +          if (part1_to_part2 > 0 && part2_to_part1 > 0) +          throw loop_error( +          "loop error: Aborting, split resulted in impossible dependence cycle!"); +           +          for (int k = 0; k < dv.second.size(); k++) +          part2_to_part1++; +           +           +           +          if (part1_to_part2 > 0 && part2_to_part1 > 0) +          throw loop_error( +          "loop error: Aborting, split resulted in impossible dependence cycle!"); +           +           +           +          if (part2_to_part1 > 0){ +          temp_place_after = false; +          assigned = true; +           +          }else if (part1_to_part2 > 0){ +          temp_place_after = true; +           +          assigned = true; +          } +           +        */ +         +        if (place_after) +          assign_const(new_stmt.xform, dim - 1, cur_lex + 1); +        else +          assign_const(new_stmt.xform, dim - 1, cur_lex - 1); +         +        stmt.push_back(new_stmt); +        dep.insert(); +        what_stmt_num[*i] = stmt.size() - 1; +        if (*i == stmt_num) +          result.insert(stmt.size() - 1); +      } +       +    } +    // make adjacent lexical number available for new statements +    if (place_after) { +      lex[dim - 1] = cur_lex + 1; +      shiftLexicalOrder(lex, dim - 1, 1); +    } else { +      lex[dim - 1] = cur_lex - 1; +      shiftLexicalOrder(lex, dim - 1, -1); +    } +    // update dependence graph +    int dep_dim = get_dep_dim_of(stmt_num, level); +    for (int i = 0; i < old_num_stmt; i++) { +      std::vector<std::pair<int, std::vector<DependenceVector> > > D; +       +      for (DependenceGraph::EdgeList::iterator j = +             dep.vertex[i].second.begin(); +           j != dep.vertex[i].second.end(); j++) { +        if (same_loop.find(i) != same_loop.end()) { +          if (same_loop.find(j->first) != same_loop.end()) { +            if (what_stmt_num.find(i) != what_stmt_num.end() +                && what_stmt_num.find(j->first) +                != what_stmt_num.end()) +              dep.connect(what_stmt_num[i], +                          what_stmt_num[j->first], j->second); +            if (place_after +                && what_stmt_num.find(j->first) +                != what_stmt_num.end()) { +              std::vector<DependenceVector> dvs; +              for (int k = 0; k < j->second.size(); k++) { +                DependenceVector dv = j->second[k]; +                if (dv.is_data_dependence() && dep_dim != -1) { +                  dv.lbounds[dep_dim] = -posInfinity; +                  dv.ubounds[dep_dim] = posInfinity; +                } +                dvs.push_back(dv); +              } +              if (dvs.size() > 0) +                D.push_back( +                  std::make_pair(what_stmt_num[j->first], +                                 dvs)); +            } else if (!place_after +                       && what_stmt_num.find(i) +                       != what_stmt_num.end()) { +              std::vector<DependenceVector> dvs; +              for (int k = 0; k < j->second.size(); k++) { +                DependenceVector dv = j->second[k]; +                if (dv.is_data_dependence() && dep_dim != -1) { +                  dv.lbounds[dep_dim] = -posInfinity; +                  dv.ubounds[dep_dim] = posInfinity; +                } +                dvs.push_back(dv); +              } +              if (dvs.size() > 0) +                dep.connect(what_stmt_num[i], j->first, dvs); +               +            } +          } else { +            if (what_stmt_num.find(i) != what_stmt_num.end()) +              dep.connect(what_stmt_num[i], j->first, j->second); +          } +        } else if (same_loop.find(j->first) != same_loop.end()) { +          if (what_stmt_num.find(j->first) != what_stmt_num.end()) +            D.push_back( +              std::make_pair(what_stmt_num[j->first], +                             j->second)); +        } +      } +       +      for (int j = 0; j < D.size(); j++) +        dep.connect(i, D[j].first, D[j].second); +    } +     +  } +   +  return result; +} + +void Loop::skew(const std::set<int> &stmt_nums, int level, +                const std::vector<int> &skew_amount) { +  if (stmt_nums.size() == 0) +    return; +   +  // check for sanity of parameters +  int ref_stmt_num = *(stmt_nums.begin()); +  for (std::set<int>::const_iterator i = stmt_nums.begin(); +       i != stmt_nums.end(); i++) { +    if (*i < 0 || *i >= stmt.size()) +      throw std::invalid_argument( +        "invalid statement number " + to_string(*i)); +    if (level < 1 || level > stmt[*i].loop_level.size()) +      throw std::invalid_argument( +        "invalid loop level " + to_string(level)); +    for (int j = stmt[*i].loop_level.size(); j < skew_amount.size(); j++) +      if (skew_amount[j] != 0) +        throw std::invalid_argument("invalid skewing formula"); +  } +   +  // invalidate saved codegen computation +  delete last_compute_cgr_; +  last_compute_cgr_ = NULL; +  delete last_compute_cg_; +  last_compute_cg_ = NULL; +   +  // set trasformation relations +  for (std::set<int>::const_iterator i = stmt_nums.begin(); +       i != stmt_nums.end(); i++) { +    int n = stmt[*i].xform.n_out(); +    Relation r(n, n); +    F_And *f_root = r.add_and(); +    for (int j = 1; j <= n; j++) +      if (j != 2 * level) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(r.input_var(j), 1); +        h.update_coef(r.output_var(j), -1); +      } +    EQ_Handle h = f_root->add_EQ(); +    h.update_coef(r.output_var(2 * level), -1); +    for (int j = 0; j < skew_amount.size(); j++) +      if (skew_amount[j] != 0) +        h.update_coef(r.input_var(2 * (j + 1)), skew_amount[j]); +     +    stmt[*i].xform = Composition(r, stmt[*i].xform); +    stmt[*i].xform.simplify(); +  } +   +  // update dependence graph +  if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { +    int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; +    for (std::set<int>::const_iterator i = stmt_nums.begin(); +         i != stmt_nums.end(); i++) +      for (DependenceGraph::EdgeList::iterator j = +             dep.vertex[*i].second.begin(); +           j != dep.vertex[*i].second.end(); j++) +        if (stmt_nums.find(j->first) != stmt_nums.end()) { +          // dependence between skewed statements +          std::vector<DependenceVector> dvs = j->second; +          for (int k = 0; k < dvs.size(); k++) { +            DependenceVector &dv = dvs[k]; +            if (dv.is_data_dependence()) { +              coef_t lb = 0; +              coef_t ub = 0; +              for (int kk = 0; kk < skew_amount.size(); kk++) { +                int cur_dep_dim = get_dep_dim_of(*i, kk + 1); +                if (skew_amount[kk] > 0) { +                  if (lb != -posInfinity +                      && stmt[*i].loop_level[kk].type +                      == LoopLevelOriginal +                      && dv.lbounds[cur_dep_dim] +                      != -posInfinity) +                    lb += skew_amount[kk] +                      * dv.lbounds[cur_dep_dim]; +                  else { +                    if (cur_dep_dim != -1 +                        && !(dv.lbounds[cur_dep_dim] +                             == 0 +                             && dv.ubounds[cur_dep_dim] +                             == 0)) +                      lb = -posInfinity; +                  } +                  if (ub != posInfinity +                      && stmt[*i].loop_level[kk].type +                      == LoopLevelOriginal +                      && dv.ubounds[cur_dep_dim] +                      != posInfinity) +                    ub += skew_amount[kk] +                      * dv.ubounds[cur_dep_dim]; +                  else { +                    if (cur_dep_dim != -1 +                        && !(dv.lbounds[cur_dep_dim] +                             == 0 +                             && dv.ubounds[cur_dep_dim] +                             == 0)) +                      ub = posInfinity; +                  } +                } else if (skew_amount[kk] < 0) { +                  if (lb != -posInfinity +                      && stmt[*i].loop_level[kk].type +                      == LoopLevelOriginal +                      && dv.ubounds[cur_dep_dim] +                      != posInfinity) +                    lb += skew_amount[kk] +                      * dv.ubounds[cur_dep_dim]; +                  else { +                    if (cur_dep_dim != -1 +                        && !(dv.lbounds[cur_dep_dim] +                             == 0 +                             && dv.ubounds[cur_dep_dim] +                             == 0)) +                      lb = -posInfinity; +                  } +                  if (ub != posInfinity +                      && stmt[*i].loop_level[kk].type +                      == LoopLevelOriginal +                      && dv.lbounds[cur_dep_dim] +                      != -posInfinity) +                    ub += skew_amount[kk] +                      * dv.lbounds[cur_dep_dim]; +                  else { +                    if (cur_dep_dim != -1 +                        && !(dv.lbounds[cur_dep_dim] +                             == 0 +                             && dv.ubounds[cur_dep_dim] +                             == 0)) +                      ub = posInfinity; +                  } +                } +              } +              dv.lbounds[dep_dim] = lb; +              dv.ubounds[dep_dim] = ub; +              if ((dv.isCarried(dep_dim) +                   && dv.hasPositive(dep_dim)) && dv.quasi) +                dv.quasi = false; +               +              if ((dv.isCarried(dep_dim) +                   && dv.hasNegative(dep_dim)) && !dv.quasi) +                throw loop_error( +                  "loop error: Skewing is illegal, dependence violation!"); +              dv.lbounds[dep_dim] = lb; +              dv.ubounds[dep_dim] = ub; +              if ((dv.isCarried(dep_dim) +                   && dv.hasPositive(dep_dim)) && dv.quasi) +                dv.quasi = false; +               +              if ((dv.isCarried(dep_dim) +                   && dv.hasNegative(dep_dim)) && !dv.quasi) +                throw loop_error( +                  "loop error: Skewing is illegal, dependence violation!"); +            } +          } +          j->second = dvs; +        } else { +          // dependence from skewed statement to unskewed statement becomes jumbled, +          // put distance value at skewed dimension to unknown +          std::vector<DependenceVector> dvs = j->second; +          for (int k = 0; k < dvs.size(); k++) { +            DependenceVector &dv = dvs[k]; +            if (dv.is_data_dependence()) { +              dv.lbounds[dep_dim] = -posInfinity; +              dv.ubounds[dep_dim] = posInfinity; +            } +          } +          j->second = dvs; +        } +    for (int i = 0; i < dep.vertex.size(); i++) +      if (stmt_nums.find(i) == stmt_nums.end()) +        for (DependenceGraph::EdgeList::iterator j = +               dep.vertex[i].second.begin(); +             j != dep.vertex[i].second.end(); j++) +          if (stmt_nums.find(j->first) != stmt_nums.end()) { +            // dependence from unskewed statement to skewed statement becomes jumbled, +            // put distance value at skewed dimension to unknown +            std::vector<DependenceVector> dvs = j->second; +            for (int k = 0; k < dvs.size(); k++) { +              DependenceVector &dv = dvs[k]; +              if (dv.is_data_dependence()) { +                dv.lbounds[dep_dim] = -posInfinity; +                dv.ubounds[dep_dim] = posInfinity; +              } +            } +            j->second = dvs; +          } +  } +} + + +void Loop::shift(const std::set<int> &stmt_nums, int level, int shift_amount) { +  if (stmt_nums.size() == 0) +    return; +   +  // check for sanity of parameters +  int ref_stmt_num = *(stmt_nums.begin()); +  for (std::set<int>::const_iterator i = stmt_nums.begin(); +       i != stmt_nums.end(); i++) { +    if (*i < 0 || *i >= stmt.size()) +      throw std::invalid_argument( +        "invalid statement number " + to_string(*i)); +    if (level < 1 || level > stmt[*i].loop_level.size()) +      throw std::invalid_argument( +        "invalid loop level " + to_string(level)); +  } +   +  // do nothing +  if (shift_amount == 0) +    return; +   +  // invalidate saved codegen computation +  delete last_compute_cgr_; +  last_compute_cgr_ = NULL; +  delete last_compute_cg_; +  last_compute_cg_ = NULL; +   +  // set trasformation relations +  for (std::set<int>::const_iterator i = stmt_nums.begin(); +       i != stmt_nums.end(); i++) { +    int n = stmt[*i].xform.n_out(); +     +    Relation r(n, n); +    F_And *f_root = r.add_and(); +    for (int j = 1; j <= n; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(r.input_var(j), 1); +      h.update_coef(r.output_var(j), -1); +      if (j == 2 * level) +        h.update_const(shift_amount); +    } +     +    stmt[*i].xform = Composition(r, stmt[*i].xform); +    stmt[*i].xform.simplify(); +  } +   +  // update dependence graph +  if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { +    int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; +    for (std::set<int>::const_iterator i = stmt_nums.begin(); +         i != stmt_nums.end(); i++) +      for (DependenceGraph::EdgeList::iterator j = +             dep.vertex[*i].second.begin(); +           j != dep.vertex[*i].second.end(); j++) +        if (stmt_nums.find(j->first) == stmt_nums.end()) { +          // dependence from shifted statement to unshifted statement +          std::vector<DependenceVector> dvs = j->second; +          for (int k = 0; k < dvs.size(); k++) { +            DependenceVector &dv = dvs[k]; +            if (dv.is_data_dependence()) { +              if (dv.lbounds[dep_dim] != -posInfinity) +                dv.lbounds[dep_dim] -= shift_amount; +              if (dv.ubounds[dep_dim] != posInfinity) +                dv.ubounds[dep_dim] -= shift_amount; +            } +          } +          j->second = dvs; +        } +    for (int i = 0; i < dep.vertex.size(); i++) +      if (stmt_nums.find(i) == stmt_nums.end()) +        for (DependenceGraph::EdgeList::iterator j = +               dep.vertex[i].second.begin(); +             j != dep.vertex[i].second.end(); j++) +          if (stmt_nums.find(j->first) != stmt_nums.end()) { +            // dependence from unshifted statement to shifted statement +            std::vector<DependenceVector> dvs = j->second; +            for (int k = 0; k < dvs.size(); k++) { +              DependenceVector &dv = dvs[k]; +              if (dv.is_data_dependence()) { +                if (dv.lbounds[dep_dim] != -posInfinity) +                  dv.lbounds[dep_dim] += shift_amount; +                if (dv.ubounds[dep_dim] != posInfinity) +                  dv.ubounds[dep_dim] += shift_amount; +              } +            } +            j->second = dvs; +          } +  } +} + +void Loop::scale(const std::set<int> &stmt_nums, int level, int scale_amount) { +  std::vector<int> skew_amount(level, 0); +  skew_amount[level - 1] = scale_amount; +  skew(stmt_nums, level, skew_amount); +} + +void Loop::reverse(const std::set<int> &stmt_nums, int level) { +  scale(stmt_nums, level, -1); +} + +void Loop::fuse(const std::set<int> &stmt_nums, int level) { +  if (stmt_nums.size() == 0 || stmt_nums.size() == 1) +    return; +   +  // invalidate saved codegen computation +  delete last_compute_cgr_; +  last_compute_cgr_ = NULL; +  delete last_compute_cg_; +  last_compute_cg_ = NULL; +   +  int dim = 2 * level - 1; +  // check for sanity of parameters +  std::vector<int> ref_lex; +  int ref_stmt_num; +  for (std::set<int>::const_iterator i = stmt_nums.begin(); +       i != stmt_nums.end(); i++) { +    if (*i < 0 || *i >= stmt.size()) +      throw std::invalid_argument( +        "invalid statement number " + to_string(*i)); +    if (level <= 0 +        || (level > (stmt[*i].xform.n_out() - 1) / 2 +            || level > stmt[*i].loop_level.size())) +      throw std::invalid_argument( +        "invalid loop level " + to_string(level)); +    if (ref_lex.size() == 0) { +      ref_lex = getLexicalOrder(*i); +      ref_stmt_num = *i; +    } else { +      std::vector<int> lex = getLexicalOrder(*i); +      for (int j = 0; j < dim - 1; j += 2) +        if (lex[j] != ref_lex[j]) +          throw std::invalid_argument( +            "statements for fusion must be in the same level-" +            + to_string(level - 1) + " subloop"); +    } +  } +   +  // collect lexicographical order values from to-be-fused statements +  std::set<int> lex_values; +  for (std::set<int>::const_iterator i = stmt_nums.begin(); +       i != stmt_nums.end(); i++) { +    std::vector<int> lex = getLexicalOrder(*i); +    lex_values.insert(lex[dim - 1]); +  } +  if (lex_values.size() == 1) +    return; +  // negative dependence would prevent fusion +   +  int dep_dim = get_dep_dim_of(ref_stmt_num, level); +   +  for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); +       i++) { +    ref_lex[dim - 1] = *i; +    std::set<int> a = getStatements(ref_lex, dim - 1); +    std::set<int>::iterator j = i; +    j++; +    for (; j != lex_values.end(); j++) { +      ref_lex[dim - 1] = *j; +      std::set<int> b = getStatements(ref_lex, dim - 1); +      for (std::set<int>::iterator ii = a.begin(); ii != a.end(); ii++) +        for (std::set<int>::iterator jj = b.begin(); jj != b.end(); +             jj++) { +          std::vector<DependenceVector> dvs; +          dvs = dep.getEdge(*ii, *jj); +          for (int k = 0; k < dvs.size(); k++) +            if (dvs[k].isCarried(dep_dim) +                && dvs[k].hasNegative(dep_dim)) +              throw loop_error( +                "loop error: statements " + to_string(*ii) +                + " and " + to_string(*jj) +                + " cannot be fused together due to negative dependence"); +          dvs = dep.getEdge(*jj, *ii); +          for (int k = 0; k < dvs.size(); k++) +            if (dvs[k].isCarried(dep_dim) +                && dvs[k].hasNegative(dep_dim)) +              throw loop_error( +                "loop error: statements " + to_string(*jj) +                + " and " + to_string(*ii) +                + " cannot be fused together due to negative dependence"); +        } +    } +  } +   +  std::set<int> same_loop = getStatements(ref_lex, dim - 3); +   +  std::vector<std::set<int> > s = sort_by_same_loops(same_loop, level); +   +  std::set<int> s1; +  std::set<int> s2; +  std::set<int> s4; +  std::vector<std::set<int> > s3; +  for (std::set<int>::iterator kk = stmt_nums.begin(); kk != stmt_nums.end(); +       kk++) +    for (int i = 0; i < s.size(); i++) +      if (s[i].find(*kk) != s[i].end()) { +        s1.insert(s[i].begin(), s[i].end()); +        s2.insert(i); +      } +   +  s3.push_back(s1); +  for (int i = 0; i < s.size(); i++) +    if (s2.find(i) == s2.end()) { +      s3.push_back(s[i]); +      s4.insert(s[i].begin(), s[i].end()); +    } +  try { +    std::vector<std::set<int> > s5; +    s5.push_back(s1); +    s5.push_back(s4); +     +    //Dependence Check for Ordering Constraint +    //Graph<std::set<int>, bool> dummy = construct_induced_graph_at_level(s5, +    //      dep, dep_dim); +     +    Graph<std::set<int>, bool> g = construct_induced_graph_at_level(s3, dep, +                                                                    dep_dim); +     +    s = typed_fusion(g); +  } catch (const loop_error &e) { +     +    throw loop_error( +      "statements cannot be fused together due to negative dependence"); +     +  } +   +  if (s3.size() == s.size()) { +    int order = 0; +    for (int i = 0; i < s.size(); i++) { +       +      for (std::set<int>::iterator it = s[i].begin(); it != s[i].end(); +           it++) { +         +        assign_const(stmt[*it].xform, 2 * level - 2, order); +         +      } +       +      order++; +    } +  } else if (s3.size() > s.size()) { +     +    int order = 0; +    for (int j = 0; j < s.size(); j++) { +      std::set<int>::iterator it3; +      for (it3 = s1.begin(); it3 != s1.end(); it3++) { +        if (s[j].find(*it3) != s[j].end()) +          break; +      } +      if (it3 != s1.end()) { +        for (std::set<int>::iterator it = s1.begin(); it != s1.end(); +             it++) +          assign_const(stmt[*it].xform, 2 * level - 2, order); +         +        order++; +         +      } +       +      for (int i = 0; i < s3.size(); i++) { +        std::set<int>::iterator it2; +         +        for (it2 = s3[i].begin(); it2 != s3[i].end(); it2++) { +          if (s[j].find(*it2) != s[j].end()) +            break; +        } +         +        if (it2 != s3[i].end()) { +          for (std::set<int>::iterator it = s3[i].begin(); +               it != s3[i].end(); it++) +            assign_const(stmt[*it].xform, 2 * level - 2, order); +           +          order++; +           +        } +      } +    } +     +  } else +    throw loop_error("Typed Fusion Error"); +   +} + + + +void Loop::distribute(const std::set<int> &stmt_nums, int level) { +  if (stmt_nums.size() == 0 || stmt_nums.size() == 1) +    return; +   +  // invalidate saved codegen computation +  delete last_compute_cgr_; +  last_compute_cgr_ = NULL; +  delete last_compute_cg_; +  last_compute_cg_ = NULL; +  int dim = 2 * level - 1; +  int ref_stmt_num; +  // check for sanity of parameters +  std::vector<int> ref_lex; +  for (std::set<int>::const_iterator i = stmt_nums.begin(); +       i != stmt_nums.end(); i++) { +    if (*i < 0 || *i >= stmt.size()) +      throw std::invalid_argument( +        "invalid statement number " + to_string(*i)); +    if (level < 1 +        || (level > (stmt[*i].xform.n_out() - 1) / 2 +            || level > stmt[*i].loop_level.size())) +      throw std::invalid_argument( +        "invalid loop level " + to_string(level)); +    if (ref_lex.size() == 0) { +      ref_lex = getLexicalOrder(*i); +      ref_stmt_num = *i; +    } else { +      std::vector<int> lex = getLexicalOrder(*i); +      for (int j = 0; j <= dim - 1; j += 2) +        if (lex[j] != ref_lex[j]) +          throw std::invalid_argument( +            "statements for distribution must be in the same level-" +            + to_string(level) + " subloop"); +    } +  } +  // find SCC in the to-be-distributed loop +  int dep_dim = get_dep_dim_of(ref_stmt_num, level); +  std::set<int> same_loop = getStatements(ref_lex, dim - 1); +  Graph<int, Empty> g; +  for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); +       i++) +    g.insert(*i); +  for (int i = 0; i < g.vertex.size(); i++) +    for (int j = i + 1; j < g.vertex.size(); j++) { +      std::vector<DependenceVector> dvs; +      dvs = dep.getEdge(g.vertex[i].first, g.vertex[j].first); +      for (int k = 0; k < dvs.size(); k++) +        if (dvs[k].isCarried(dep_dim)) { +          g.connect(i, j); +          break; +        } +      dvs = dep.getEdge(g.vertex[j].first, g.vertex[i].first); +      for (int k = 0; k < dvs.size(); k++) +        if (dvs[k].isCarried(dep_dim)) { +          g.connect(j, i); +          break; +        } +    } +  std::vector<std::set<int> > s = g.topoSort(); +  // find statements that cannot be distributed due to dependence cycle +  Graph<std::set<int>, Empty> g2; +  for (int i = 0; i < s.size(); i++) { +    std::set<int> t; +    for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) +      if (stmt_nums.find(g.vertex[*j].first) != stmt_nums.end()) +        t.insert(g.vertex[*j].first); +    if (!t.empty()) +      g2.insert(t); +  } +  for (int i = 0; i < g2.vertex.size(); i++) +    for (int j = i + 1; j < g2.vertex.size(); j++) +      for (std::set<int>::iterator ii = g2.vertex[i].first.begin(); +           ii != g2.vertex[i].first.end(); ii++) +        for (std::set<int>::iterator jj = g2.vertex[j].first.begin(); +             jj != g2.vertex[j].first.end(); jj++) { +          std::vector<DependenceVector> dvs; +          dvs = dep.getEdge(*ii, *jj); +          for (int k = 0; k < dvs.size(); k++) +            if (dvs[k].isCarried(dep_dim)) { +              g2.connect(i, j); +              break; +            } +          dvs = dep.getEdge(*jj, *ii); +          for (int k = 0; k < dvs.size(); k++) +            if (dvs[k].isCarried(dep_dim)) { +              g2.connect(j, i); +              break; +            } +        } +  std::vector<std::set<int> > s2 = g2.topoSort(); +  // nothing to distribute +  if (s2.size() == 1) +    throw loop_error( +      "loop error: no statement can be distributed due to dependence cycle"); +  std::vector<std::set<int> > s3; +  for (int i = 0; i < s2.size(); i++) { +    std::set<int> t; +    for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) +      std::set_union(t.begin(), t.end(), g2.vertex[*j].first.begin(), +                     g2.vertex[*j].first.end(), inserter(t, t.begin())); +    s3.push_back(t); +  } +  // associate other affected statements with the right distributed statements +  for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); +       i++) +    if (stmt_nums.find(*i) == stmt_nums.end()) { +      bool is_inserted = false; +      int potential_insertion_point = 0; +      for (int j = 0; j < s3.size(); j++) { +        for (std::set<int>::iterator k = s3[j].begin(); +             k != s3[j].end(); k++) { +          std::vector<DependenceVector> dvs; +          dvs = dep.getEdge(*i, *k); +          for (int kk = 0; kk < dvs.size(); kk++) +            if (dvs[kk].isCarried(dep_dim)) { +              s3[j].insert(*i); +              is_inserted = true; +              break; +            } +          dvs = dep.getEdge(*k, *i); +          for (int kk = 0; kk < dvs.size(); kk++) +            if (dvs[kk].isCarried(dep_dim)) +              potential_insertion_point = j; +        } +        if (is_inserted) +          break; +      } +      if (!is_inserted) +        s3[potential_insertion_point].insert(*i); +    } +  // set lexicographical order after distribution +  int order = ref_lex[dim - 1]; +  shiftLexicalOrder(ref_lex, dim - 1, s3.size() - 1); +  for (std::vector<std::set<int> >::iterator i = s3.begin(); i != s3.end(); +       i++) { +    for (std::set<int>::iterator j = (*i).begin(); j != (*i).end(); j++) +      assign_const(stmt[*j].xform, dim - 1, order); +    order++; +  } +  // no need to update dependence graph +  ; +  return; +} + diff --git a/src/loop_datacopy.cc b/src/loop_datacopy.cc new file mode 100644 index 0000000..8d11b0a --- /dev/null +++ b/src/loop_datacopy.cc @@ -0,0 +1,2166 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: +   Various data copy schemes. + + Notes: + + History: +   02/20/09 Created by Chun Chen by splitting original datacopy from loop.cc +*****************************************************************************/ + +#include <code_gen/codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + +// +// data copy function by referring arrays by numbers. +// e.g. A[i] = A[i-1] + B[i] +//      parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i] +// +bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, +                    bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { +  // check for sanity of parameters +  std::set<int> same_loop; +  for (int i = 0; i < array_ref_nums.size(); i++) { +    int stmt_num = array_ref_nums[i].first; +    if (stmt_num < 0 || stmt_num >= stmt.size()) +      throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); +    if (level <= 0 || level > stmt[stmt_num].loop_level.size()) +      throw std::invalid_argument("invalid loop level " + to_string(level)); +    if (i == 0) { +      std::vector<int> lex = getLexicalOrder(stmt_num); +      same_loop = getStatements(lex, 2*level-2); +    } +    else if (same_loop.find(stmt_num) == same_loop.end()) +      throw std::invalid_argument("array references for data copy must be located in the same subloop"); +  } +   +  // convert array reference numbering scheme to actual array references +  std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; +  for (int i = 0; i < array_ref_nums.size(); i++) { +    if (array_ref_nums[i].second.size() == 0) +      continue; +     +    int stmt_num = array_ref_nums[i].first; +    selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); +    std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); +    std::vector<bool> selected(refs.size(), false); +    for (int j = 0; j < array_ref_nums[i].second.size(); j++) { +      int ref_num = array_ref_nums[i].second[j]; +      if (ref_num < 0 || ref_num >= refs.size()) { +        for (int k = 0; k < refs.size(); k++) +          delete refs[k]; +        throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); +      } +      selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); +      selected[ref_num] = true; +    } +    for (int j = 0; j < refs.size(); j++) +      if (!selected[j]) +        delete refs[j]; +  } +  if (selected_refs.size() == 0) +    throw std::invalid_argument("found no array references to copy"); +   +  // do the copy +  return datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +// +// data copy function by referring arrays by name. +// e.g. A[i] = A[i-1] + B[i] +//      parameter array_name=A means to copy data touched by A[i-1] and A[i] +// +bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, +                    bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { +  // check for sanity of parameters +  if (stmt_num < 0 || stmt_num >= stmt.size()) +    throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); +  if (level <= 0 || level > stmt[stmt_num].loop_level.size()) +    throw std::invalid_argument("invalid loop level " + to_string(level)); +   +  // collect array references by name +  std::vector<int> lex = getLexicalOrder(stmt_num); +  int dim = 2*level - 1; +  std::set<int> same_loop = getStatements(lex, dim-1); +   +  std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; +  for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { +    std::vector<IR_ArrayRef *> t; +    std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code);   +    for (int j = 0; j < refs.size(); j++) +      if (refs[j]->name() == array_name) +        t.push_back(refs[j]); +      else +        delete refs[j]; +    if (t.size() != 0) +      selected_refs.push_back(std::make_pair(*i, t));  +  } +  if (selected_refs.size() == 0) +    throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); +   +  // do the copy +  return datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, +                               bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { +  // check for sanity of parameters +  if (stmt_num < 0 || stmt_num >= stmt.size()) +    throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); +  if (level <= 0 || level > stmt[stmt_num].loop_level.size()) +    throw std::invalid_argument("invalid loop level " + to_string(level)); +   +  // collect array references by name +  std::vector<int> lex = getLexicalOrder(stmt_num); +  int dim = 2*level - 1; +  std::set<int> same_loop = getStatements(lex, dim-1); +   +  std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; +  for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { +    selected_refs.push_back(std::make_pair(*i, std::vector<IR_ArrayRef *>())); +     +    std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code);   +    for (int j = 0; j < refs.size(); j++) +      if (refs[j]->name() == array_name) +        selected_refs[selected_refs.size()-1].second.push_back(refs[j]); +      else +        delete refs[j]; +  } +  if (selected_refs.size() == 0) +    throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); +   +  // do the copy +  return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, const std::vector<int> &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { +  // check for sanity of parameters +  std::set<int> same_loop; +  for (int i = 0; i < array_ref_nums.size(); i++) { +    int stmt_num = array_ref_nums[i].first; +    if (stmt_num < 0 || stmt_num >= stmt.size()) +      throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); +    if (level <= 0 || level > stmt[stmt_num].loop_level.size()) +      throw std::invalid_argument("invalid loop level " + to_string(level)); +    if (i == 0) { +      std::vector<int> lex = getLexicalOrder(stmt_num); +      same_loop = getStatements(lex, 2*level-2); +    } +    else if (same_loop.find(stmt_num) == same_loop.end()) +      throw std::invalid_argument("array references for data copy must be located in the same subloop"); +  } +   +  // convert array reference numbering scheme to actual array references +  std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; +  for (int i = 0; i < array_ref_nums.size(); i++) { +    if (array_ref_nums[i].second.size() == 0) +      continue; +     +    int stmt_num = array_ref_nums[i].first; +    selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); +    std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); +    std::vector<bool> selected(refs.size(), false); +    for (int j = 0; j < array_ref_nums[i].second.size(); j++) { +      int ref_num = array_ref_nums[i].second[j]; +      if (ref_num < 0 || ref_num >= refs.size()) { +        for (int k = 0; k < refs.size(); k++) +          delete refs[k]; +        throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); +      } +      selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); +      selected[ref_num] = true; +    } +    for (int j = 0; j < refs.size(); j++) +      if (!selected[j]) +        delete refs[j]; +  } +  if (selected_refs.size() == 0) +    throw std::invalid_argument("found no array references to copy"); +   +  // do the copy +  return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +// +// Implement low level datacopy function with lots of options. +// +/*bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, +  const std::vector<int> &privatized_levels, +  bool allow_extra_read, int fastest_changing_dimension, +  int padding_stride, int padding_alignment, int memory_type) { +  if (stmt_refs.size() == 0) +  return true; +   +  // check for sanity of parameters +  IR_ArraySymbol *sym = NULL; +  std::vector<int> lex; +  std::set<int> active; +  if (level <= 0) +  throw std::invalid_argument("invalid loop level " + to_string(level)); +  for (int i = 0; i < privatized_levels.size(); i++) { +  if (i == 0) { +  if (privatized_levels[i] < level) +  throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level)); +  } +  else if (privatized_levels[i] <= privatized_levels[i-1]) +  throw std::invalid_argument("privatized loop levels must be in ascending order"); +  } +  for (int i = 0; i < stmt_refs.size(); i++) { +  int stmt_num = stmt_refs[i].first; +  active.insert(stmt_num); +  if (stmt_num < 0 || stmt_num >= stmt.size()) +  throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); +  if (privatized_levels.size() != 0) { +  if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) +  throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); +  } +  else { +  if (level > stmt[stmt_num].loop_level.size()) +  throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); +  } +  for (int j = 0; j < stmt_refs[i].second.size(); j++) { +  if (sym == NULL) { +  sym = stmt_refs[i].second[j]->symbol(); +  lex = getLexicalOrder(stmt_num); +  } +  else { +  IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); +  if (t->name() != sym->name()) { +  delete t; +  delete sym; +  throw std::invalid_argument("try to copy data from different arrays"); +  } +  delete t; +  } +  } +  } +  if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim())) +  throw std::invalid_argument("invalid fastest changing dimension for the array to be copied"); +  if (padding_stride < 0) +  throw std::invalid_argument("invalid temporary array stride requirement"); +  if (padding_alignment == -1 || padding_alignment == 0) +  throw std::invalid_argument("invalid temporary array alignment requirement"); +   +  int dim = 2*level - 1; +  int n_dim = sym->n_dim(); +   +  if (fastest_changing_dimension == -1) +  switch (sym->layout_type()) { +  case IR_ARRAY_LAYOUT_ROW_MAJOR: +  fastest_changing_dimension = n_dim - 1; +  break; +  case IR_ARRAY_LAYOUT_COLUMN_MAJOR: +  fastest_changing_dimension = 0; +  break; +  default: +  throw loop_error("unsupported array layout"); +  } +   +   +  // build iteration spaces for all reads and for all writes separately +  apply_xform(active); +  bool has_write_refs = false; +  bool has_read_refs = false; +  Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); +  Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); +  for (int i = 0; i < stmt_refs.size(); i++) { +  int stmt_num = stmt_refs[i].first; +   +  for (int j = 0; j < stmt_refs[i].second.size(); j++) { +  Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim); +  for (int k = 1; k <= mapping.n_inp(); k++) +  mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name()); +  mapping.setup_names(); +  F_And *f_root = mapping.add_and(); +  for (int k = 1; k <= level-1; k++) { +  EQ_Handle h = f_root->add_EQ(); +  h.update_coef(mapping.input_var(k), 1); +  h.update_coef(mapping.output_var(k), -1); +  } +  for (int k = 0; k < privatized_levels.size(); k++) { +  EQ_Handle h = f_root->add_EQ(); +  h.update_coef(mapping.input_var(privatized_levels[k]), 1); +  h.update_coef(mapping.output_var(level+k), -1); +  } +  for (int k = 0; k < n_dim; k++) { +  CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); +  exp2formula(ir, mapping, f_root, freevar, repr, mapping.output_var(level-1+privatized_levels.size()+k+1), 'w', IR_COND_EQ, false); +  repr->clear(); +  delete repr; +  } +  Relation r = Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); +  if (stmt_refs[i].second[j]->is_write()) { +  has_write_refs = true; +  wo_copy_is = Union(wo_copy_is, r); +  wo_copy_is.simplify(2, 4); +  } +  else { +  has_read_refs = true; +  //protonu--removing the next line for now +  ro_copy_is = Union(ro_copy_is, r); +  ro_copy_is.simplify(2, 4); +  //ro_copy_is = ConvexRepresentation(Union(ro_copy_is, r)); +   +  } +  } +  } +   +  if (allow_extra_read) { +  Relation t = DecoupledConvexHull(copy(ro_copy_is)); +  if (t.number_of_conjuncts() > 1) +  ro_copy_is = RectHull(ro_copy_is); +  else +  ro_copy_is = t; +  } +  else { +  Relation t = ConvexRepresentation(copy(ro_copy_is)); +  if (t.number_of_conjuncts() > 1) +  ro_copy_is = RectHull(ro_copy_is); +  else +  ro_copy_is = t; +  } +  wo_copy_is = ConvexRepresentation(wo_copy_is); +   +  if (allow_extra_read) { +  Tuple<Relation> Rs; +  Tuple<int> active; +  for (DNF_Iterator di(ro_copy_is.query_DNF()); di; di++) { +  Rs.append(Relation(ro_copy_is, di.curr())); +  active.append(1); +  } +  Relation the_gcs = Relation::True(ro_copy_is.n_set()); +  for (int i = level-1+privatized_levels.size()+1; i <= level-1+privatized_levels.size()+n_dim; i++) { +  Relation r = greatest_common_step(Rs, active, i, Relation::Null()); +  the_gcs = Intersection(the_gcs, r); +  } +   +  ro_copy_is = Approximate(ro_copy_is); +  ro_copy_is = ConvexRepresentation(ro_copy_is); +  ro_copy_is = Intersection(ro_copy_is, the_gcs); +  ro_copy_is.simplify(); +  } +   +   +   +  for (int i = 1; i < level; i++) { +  std::string s = stmt[*active.begin()].IS.input_var(i)->name(); +  wo_copy_is.name_set_var(i, s); +  ro_copy_is.name_set_var(i, s); +  } +  for (int i = 0; i < privatized_levels.size(); i++) { +  std::string s = stmt[*active.begin()].IS.input_var(privatized_levels[i])->name(); +  wo_copy_is.name_set_var(level+i, s); +  ro_copy_is.name_set_var(level+i, s); +  } +  for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) { +  std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size()); +  wo_copy_is.name_set_var(i, s); +  ro_copy_is.name_set_var(i, s); +  } +  tmp_loop_var_name_counter += n_dim; +   +  //protonu--end change +   +  wo_copy_is.setup_names(); +  ro_copy_is.setup_names(); +   +  // build merged iteration space for calculating temporary array size +  bool already_use_recthull = false; +  Relation untampered_copy_is = ConvexRepresentation(Union(copy(wo_copy_is), copy(ro_copy_is))); +  Relation copy_is = untampered_copy_is; +  if (copy_is.number_of_conjuncts() > 1) { +  try { +  copy_is = ConvexHull(copy(untampered_copy_is)); +  } +  catch (const std::overflow_error &e) { +  copy_is = RectHull(copy(untampered_copy_is)); +  already_use_recthull = true; +  } +  } +   +   +  Retry_copy_is: +  // extract temporary array information +  CG_outputBuilder *ocg = ir->builder(); +  std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL +  std::vector<coef_t> index_stride(n_dim, 1); +  std::vector<bool> is_index_eq(n_dim, false); +  std::vector<std::pair<int, CG_outputRepr *> > index_sz(0);   +  Relation reduced_copy_is = copy(copy_is); +   +  for (int i = 0; i < n_dim; i++) { +  if (i != 0) +  reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); +  Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); +   +  // extract stride +  EQ_Handle stride_eq; +  { +  bool simple_stride = true; +  int strides = countStrides(bound.query_DNF()->single_conjunct(), bound.set_var(level-1+privatized_levels.size()+i+1), stride_eq, simple_stride); +  if (strides > 1) { +  throw loop_error("too many strides"); +  } +  else if (strides == 1) { +  int sign = stride_eq.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); +  Constr_Vars_Iter it(stride_eq, true); +  index_stride[i] = abs((*it).coef/sign); +  } +  } +   +  // check if this arary index requires loop +  Conjunct *c = bound.query_DNF()->single_conjunct(); +  for (EQ_Iterator ei(c->EQs()); ei; ei++) { +  if ((*ei).has_wildcards()) +  continue; +   +  int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); +  if (coef != 0) { +  int sign = 1; +  if (coef < 0) { +  coef = -coef; +  sign = -1; +  } +   +  CG_outputRepr *op = NULL; +  for (Constr_Vars_Iter ci(*ei); ci; ci++) { +  switch ((*ci).var->kind()) { +  case Input_Var: +  { +  if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) +  if ((*ci).coef*sign == 1) +  op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); +  else if ((*ci).coef*sign == -1) +  op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); +  else if ((*ci).coef*sign > 1) +  op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); +  else // (*ci).coef*sign < -1 +  op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); +  break; +  } +  case Global_Var: +  { +  Global_Var_ID g = (*ci).var->get_global_var(); +  if ((*ci).coef*sign == 1) +  op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); +  else if ((*ci).coef*sign == -1) +  op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); +  else if ((*ci).coef*sign > 1) +  op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); +  else // (*ci).coef*sign < -1 +  op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); +  break; +  } +  default: +  throw loop_error("unsupported array index expression"); +  } +  } +  if ((*ei).get_const() != 0) +  op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); +  if (coef != 1) +  op = ocg->CreateIntegerDivide(op, ocg->CreateInt(coef)); +   +  index_lb[i] = op; +  is_index_eq[i] = true; +  break; +  } +  } +  if (is_index_eq[i]) +  continue; +   +  // seperate lower and upper bounds +  std::vector<GEQ_Handle> lb_list, ub_list; +  for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { +  int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); +  if (coef != 0 && (*gi).has_wildcards()) { +  bool clean_bound = true; +  GEQ_Handle h; +  for (Constr_Vars_Iter cvi(*gi, true); gi; gi++) +  if (!findFloorInequality(bound, (*cvi).var, h, bound.set_var(level-1+privatized_levels.size()+i+1))) { +  clean_bound = false; +  break; +  } +  if (!clean_bound) +  continue; +  } +   +  if (coef > 0) +  lb_list.push_back(*gi); +  else if (coef < 0) +  ub_list.push_back(*gi); +  } +  if (lb_list.size() == 0 || ub_list.size() == 0) +  if (already_use_recthull) +  throw loop_error("failed to calcuate array footprint size"); +  else { +  copy_is = RectHull(copy(untampered_copy_is)); +  already_use_recthull = true; +  goto Retry_copy_is; +  } +   +  // build lower bound representation +  Tuple<CG_outputRepr *> lb_repr_list; +  for (int j = 0; j < lb_list.size(); j++) +  lb_repr_list.append(outputLBasRepr(ocg, lb_list[j], bound, +  bound.set_var(level-1+privatized_levels.size()+i+1),  +  index_stride[i], stride_eq, Relation::True(bound.n_set()), +  std::vector<CG_outputRepr *>(bound.n_set()))); +   +  if (lb_repr_list.size() > 1) +  index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); +  else if (lb_repr_list.size() == 1) +  index_lb[i] = lb_repr_list[1]; +   +  // build temporary array size representation +  { +  Relation cal(copy_is.n_set(), 1); +  F_And *f_root = cal.add_and(); +  for (int j = 0; j < ub_list.size(); j++) +  for (int k = 0; k < lb_list.size(); k++) { +  GEQ_Handle h = f_root->add_GEQ(); +   +  for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { +  switch ((*ci).var->kind()) { +  case Input_Var: +  { +  int pos = (*ci).var->get_position(); +  h.update_coef(cal.input_var(pos), (*ci).coef); +  break; +  } +  case Global_Var: +  { +  Global_Var_ID g = (*ci).var->get_global_var(); +  Variable_ID v; +  if (g->arity() == 0) +  v = cal.get_local(g); +  else +  v = cal.get_local(g, (*ci).var->function_of()); +  h.update_coef(v, (*ci).coef); +  break; +  } +  default: +  throw loop_error("cannot calculate temporay array size statically"); +  } +  } +  h.update_const(ub_list[j].get_const()); +   +  for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { +  switch ((*ci).var->kind()) { +  case Input_Var: +  { +  int pos = (*ci).var->get_position(); +  h.update_coef(cal.input_var(pos), (*ci).coef); +  break; +  } +  case Global_Var: +  { +  Global_Var_ID g = (*ci).var->get_global_var(); +  Variable_ID v; +  if (g->arity() == 0) +  v = cal.get_local(g); +  else +  v = cal.get_local(g, (*ci).var->function_of()); +  h.update_coef(v, (*ci).coef); +  break; +  } +  default: +  throw loop_error("cannot calculate temporay array size statically"); +  } +  } +  h.update_const(lb_list[k].get_const()); +   +  h.update_const(1); +  h.update_coef(cal.output_var(1), -1); +  } +   +  cal = Restrict_Domain(cal, copy(copy_is)); +  for (int j = 1; j <= cal.n_inp(); j++) +  cal = Project(cal, j, Input_Var); +  cal.simplify(); +   +  // pad temporary array size +  // TODO: for variable array size, create padding formula +  Conjunct *c = cal.query_DNF()->single_conjunct(); +  bool is_index_bound_const = false; +  for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) +  if ((*gi).is_const(cal.output_var(1))) { +  coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1))); +  if (padding_stride != 0) { +  size = (size + index_stride[i] - 1) / index_stride[i]; +  if (i == fastest_changing_dimension) +  size = size * padding_stride; +  } +  if (i == fastest_changing_dimension) { +  if (padding_alignment > 1) { // align to boundary for data packing +  int residue = size % padding_alignment; +  if (residue) +  size = size+padding_alignment-residue; +  } +  else if (padding_alignment < -1) {  // un-alignment for memory bank conflicts +  while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1) +  size++; +  } +  } +  index_sz.push_back(std::make_pair(i, ocg->CreateInt(size))); +  is_index_bound_const = true; +  } +   +  if (!is_index_bound_const) { +  for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { +  int coef = (*gi).get_coef(cal.output_var(1)); +  if (coef < 0) { +  CG_outputRepr *op = NULL; +  for (Constr_Vars_Iter ci(*gi); ci; ci++) { +  if ((*ci).var != cal.output_var(1)) { +  switch((*ci).var->kind()) { +  case Global_Var: +  { +  Global_Var_ID g = (*ci).var->get_global_var(); +  if ((*ci).coef == 1) +  op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); +  else if ((*ci).coef == -1) +  op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); +  else if ((*ci).coef > 1) +  op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); +  else // (*ci).coef < -1 +  op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); +  break; +  } +  default: +  throw loop_error("failed to generate array index bound code"); +  } +  } +  } +  int c = (*gi).get_const(); +  if (c > 0) +  op = ocg->CreatePlus(op, ocg->CreateInt(c)); +  else if (c < 0) +  op = ocg->CreateMinus(op, ocg->CreateInt(-c)); +  if (padding_stride != 0) { +  if (i == fastest_changing_dimension) { +  coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); +  coef_t t1 = index_stride[i] / g; +  if (t1 != 1) +  op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); +  coef_t t2 = padding_stride / g; +  if (t2 != 1) +  op = ocg->CreateTimes(op, ocg->CreateInt(t2)); +  } +  else if (index_stride[i] != 1) { +  op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); +  } +  } +   +  index_sz.push_back(std::make_pair(i, op)); +  break; +  } +  } +  } +  } +  } +   +  // change the temporary array index order +  for (int i = 0; i < index_sz.size(); i++) +  if (index_sz[i].first == fastest_changing_dimension) +  switch (sym->layout_type()) { +  case IR_ARRAY_LAYOUT_ROW_MAJOR: +  std::swap(index_sz[index_sz.size()-1], index_sz[i]); +  break; +  case IR_ARRAY_LAYOUT_COLUMN_MAJOR: +  std::swap(index_sz[0], index_sz[i]); +  break; +  default: +  throw loop_error("unsupported array layout"); +  } +   +  // declare temporary array or scalar +  IR_Symbol *tmp_sym; +  if (index_sz.size() == 0) { +  tmp_sym = ir->CreateScalarSymbol(sym, memory_type); +  } +  else { +  std::vector<CG_outputRepr *> tmp_array_size(index_sz.size()); +  for (int i = 0; i < index_sz.size(); i++) +  tmp_array_size[i] = index_sz[i].second->clone(); +  tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); +  } +   +  // create temporary array read initialization code +  CG_outputRepr *copy_code_read; +  if (has_read_refs) +  if (index_sz.size() == 0) { +  IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); +   +  std::vector<CG_outputRepr *> rhs_index(n_dim); +  for (int i = 0; i < index_lb.size(); i++) +  if (is_index_eq[i]) +  rhs_index[i] = index_lb[i]->clone(); +  else +  rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); +  IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); +   +  copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); +  } +  else { +  std::vector<CG_outputRepr *> lhs_index(index_sz.size()); +  for (int i = 0; i < index_sz.size(); i++) { +  int cur_index_num = index_sz[i].first; +  CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); +  if (padding_stride != 0) { +  if (i == n_dim-1) { +  coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); +  coef_t t1 = index_stride[cur_index_num] / g; +  if (t1 != 1) +  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); +  coef_t t2 = padding_stride / g; +  if (t2 != 1) +  cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); +  } +  else if (index_stride[cur_index_num] != 1) { +  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); +  } +  } +   +  if (ir->ArrayIndexStartAt() != 0) +  cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); +  lhs_index[i] = cur_index_repr; +  } +   +  IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index); +   +  std::vector<CG_outputRepr *> rhs_index(n_dim); +  for (int i = 0; i < index_lb.size(); i++) +  if (is_index_eq[i]) +  rhs_index[i] = index_lb[i]->clone(); +  else +  rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); +  IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); +   +  copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); +  } +   +  // create temporary array write back code +  CG_outputRepr *copy_code_write; +  if (has_write_refs) +  if (index_sz.size() == 0) { +  IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); +   +  std::vector<CG_outputRepr *> rhs_index(n_dim); +  for (int i = 0; i < index_lb.size(); i++) +  if (is_index_eq[i]) +  rhs_index[i] = index_lb[i]->clone(); +  else +  rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); +  IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); +   +  copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); +  } +  else { +  std::vector<CG_outputRepr *> lhs_index(n_dim); +  for (int i = 0; i < index_lb.size(); i++) +  if (is_index_eq[i]) +  lhs_index[i] = index_lb[i]->clone(); +  else +  lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); +  IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); +   +  std::vector<CG_outputRepr *> rhs_index(index_sz.size()); +  for (int i = 0; i < index_sz.size(); i++) { +  int cur_index_num = index_sz[i].first; +  CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); +  if (padding_stride != 0) { +  if (i == n_dim-1) { +  coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); +  coef_t t1 = index_stride[cur_index_num] / g; +  if (t1 != 1) +  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); +  coef_t t2 = padding_stride / g; +  if (t2 != 1) +  cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); +  } +  else if (index_stride[cur_index_num] != 1) { +  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); +  } +  } +   +  if (ir->ArrayIndexStartAt() != 0) +  cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); +  rhs_index[i] = cur_index_repr; +  } +  IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index); +   +  copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); +  } +   +  // now we can remove those loops for array indexes that are +  // dependent on others +  if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { +  Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); +  F_And *f_root = mapping.add_and(); +  for (int i = 1; i <= level-1+privatized_levels.size(); i++) { +  EQ_Handle h = f_root->add_EQ(); +  h.update_coef(mapping.input_var(i), 1); +  h.update_coef(mapping.output_var(i), -1); +  } +   +  int cur_index = 0; +  std::vector<int> mapped_index(index_sz.size()); +  for (int i = 0; i < n_dim; i++) +  if (!is_index_eq[i]) { +  EQ_Handle h = f_root->add_EQ(); +  h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); +  switch (sym->layout_type()) { +  case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { +  h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); +  mapped_index[index_sz.size()-cur_index-1] = i; +  break; +  } +  case IR_ARRAY_LAYOUT_ROW_MAJOR: { +  h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); +  mapped_index[cur_index] = i; +  break; +  } +  default: +  throw loop_error("unsupported array layout"); +  } +  cur_index++; +  } +   +  wo_copy_is = Range(Restrict_Domain(copy(mapping), wo_copy_is)); +  ro_copy_is = Range(Restrict_Domain(copy(mapping), ro_copy_is)); +   +  // protonu--replacing Chun's old code  +  for (int i = 1; i <= level-1+privatized_levels.size(); i++) { +  wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); +  ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); +  } +   +   +   +  for (int i = 0; i < index_sz.size(); i++) { +  wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); +  ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); +  }       +  wo_copy_is.setup_names(); +  ro_copy_is.setup_names(); +  } +   +  // insert read copy statement +  int old_num_stmt = stmt.size(); +  int ro_copy_stmt_num = -1; +  if (has_read_refs) { +  Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); +  { +  F_And *f_root = copy_xform.add_and(); +  for (int i = 1; i <= ro_copy_is.n_set(); i++) { +  EQ_Handle h = f_root->add_EQ(); +  h.update_coef(copy_xform.input_var(i), 1); +  h.update_coef(copy_xform.output_var(2*i), -1); +  } +  for (int i = 1; i <= dim; i+=2) { +  EQ_Handle h = f_root->add_EQ(); +  h.update_coef(copy_xform.output_var(i), -1); +  h.update_const(lex[i-1]); +  } +  for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { +  EQ_Handle h = f_root->add_EQ(); +  h.update_coef(copy_xform.output_var(i), 1); +  } +  } +   +  Statement copy_stmt_read; +  copy_stmt_read.IS = ro_copy_is; +  copy_stmt_read.xform = copy_xform; +  copy_stmt_read.code = copy_code_read; +  copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set()); +  copy_stmt_read.ir_stmt_node = NULL; +  for (int i = 0; i < level-1; i++) { +  copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; +  if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && +  stmt[*(active.begin())].loop_level[i].payload >= level) { +  int j; +  for (j = 0; j < privatized_levels.size(); j++) +  if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) +  break; +  if (j == privatized_levels.size()) +  copy_stmt_read.loop_level[i].payload = -1; +  else +  copy_stmt_read.loop_level[i].payload = level + j; +  } +  else +  copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; +  copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; +  } +  for (int i = 0; i < privatized_levels.size(); i++) { +  copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; +  copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; +  copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; +  } +  int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); +  for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { +  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; +  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; +  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; +  } +  for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { +  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; +  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; +  copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; +  } +   +  shiftLexicalOrder(lex, dim-1, 1); +  stmt.push_back(copy_stmt_read); +  ro_copy_stmt_num = stmt.size() - 1; +  dep.insert(); +  } +   +  // insert write copy statement +  int wo_copy_stmt_num = -1; +  if (has_write_refs) { +  Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1); +  { +  F_And *f_root = copy_xform.add_and(); +  for (int i = 1; i <= wo_copy_is.n_set(); i++) { +  EQ_Handle h = f_root->add_EQ(); +  h.update_coef(copy_xform.input_var(i), 1); +  h.update_coef(copy_xform.output_var(2*i), -1); +  } +  for (int i = 1; i <= dim; i+=2) { +  EQ_Handle h = f_root->add_EQ(); +  h.update_coef(copy_xform.output_var(i), -1); +  h.update_const(lex[i-1]); +  } +  for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { +  EQ_Handle h = f_root->add_EQ(); +  h.update_coef(copy_xform.output_var(i), 1); +  } +  } +   +  Statement copy_stmt_write; +  copy_stmt_write.IS = wo_copy_is; +  copy_stmt_write.xform = copy_xform; +  copy_stmt_write.code = copy_code_write; +  copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set()); +  copy_stmt_write.ir_stmt_node = NULL; +   +  for (int i = 0; i < level-1; i++) { +  copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; +  if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && +  stmt[*(active.begin())].loop_level[i].payload >= level) { +  int j; +  for (j = 0; j < privatized_levels.size(); j++) +  if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) +  break; +  if (j == privatized_levels.size()) +  copy_stmt_write.loop_level[i].payload = -1; +  else +  copy_stmt_write.loop_level[i].payload = level + j; +  } +  else +  copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; +  copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; +  } +  for (int i = 0; i < privatized_levels.size(); i++) { +  copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; +  copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; +  copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; +  } +  int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); +  for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { +  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; +  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; +  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; +  } +  for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { +  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; +  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; +  copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; +  } +   +  lex[dim-1]++; +  shiftLexicalOrder(lex, dim-1, -2); +  stmt.push_back(copy_stmt_write); +  wo_copy_stmt_num = stmt.size() - 1; +  dep.insert(); +  }  +   +  // replace original array accesses with temporary array accesses +  for (int i =0; i < stmt_refs.size(); i++) +  for (int j = 0; j < stmt_refs[i].second.size(); j++) { +  if (index_sz.size() == 0) { +  IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); +  ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); +  } +  else { +  std::vector<CG_outputRepr *> index_repr(index_sz.size()); +  for (int k = 0; k < index_sz.size(); k++) { +  int cur_index_num = index_sz[k].first; +   +  CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); +  if (padding_stride != 0) { +  if (k == n_dim-1) { +  coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); +  coef_t t1 = index_stride[cur_index_num] / g; +  if (t1 != 1) +  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); +  coef_t t2 = padding_stride / g; +  if (t2 != 1) +  cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); +  } +  else if (index_stride[cur_index_num] != 1) { +  cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); +  } +  } +   +  if (ir->ArrayIndexStartAt() != 0) +  cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); +  index_repr[k] = cur_index_repr; +  } +   +  IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr); +  ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); +  } +  } +   +  // update dependence graph +  int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1; +  if (ro_copy_stmt_num != -1) { +  for (int i = 0; i < old_num_stmt; i++) { +  std::vector<std::vector<DependenceVector> > D; +   +  for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { +  if (active.find(i) != active.end() && active.find(j->first) == active.end()) { +  std::vector<DependenceVector> dvs1, dvs2; +  for (int k = 0; k < j->second.size(); k++) { +  DependenceVector dv = j->second[k]; +  if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W)) +  dvs1.push_back(dv); +  else +  dvs2.push_back(dv); +  } +  j->second = dvs2; +  if (dvs1.size() > 0) +  dep.connect(ro_copy_stmt_num, j->first, dvs1); +  } +  else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { +  std::vector<DependenceVector> dvs1, dvs2; +  for (int k = 0; k < j->second.size(); k++) { +  DependenceVector dv = j->second[k]; +  if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R)) +  dvs1.push_back(dv); +  else +  dvs2.push_back(dv); +  } +  j->second = dvs2; +  if (dvs1.size() > 0) +  D.push_back(dvs1); +  } +   +  if (j->second.size() == 0) +  dep.vertex[i].second.erase(j++); +  else +  j++; +  } +   +  for (int j = 0; j < D.size(); j++) +  dep.connect(i, ro_copy_stmt_num, D[j]); +  } +   +  // insert dependences from copy statement loop to copied statements +  DependenceVector dv; +  dv.type = DEP_W2R; +  dv.sym = tmp_sym->clone(); +  dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); +  dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); +  for (int i = dep_dim; i < num_dep_dim; i++) { +  dv.lbounds[i] = -posInfinity; +  dv.ubounds[i] = posInfinity; +  }  +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) +  dep.connect(ro_copy_stmt_num, *i, dv); +  } +   +  if (wo_copy_stmt_num != -1) { +  for (int i = 0; i < old_num_stmt; i++) { +  std::vector<std::vector<DependenceVector> > D; +   +  for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { +  if (active.find(i) != active.end() && active.find(j->first) == active.end()) { +  std::vector<DependenceVector> dvs1, dvs2; +  for (int k = 0; k < j->second.size(); k++) { +  DependenceVector dv = j->second[k]; +  if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W)) +  dvs1.push_back(dv); +  else +  dvs2.push_back(dv); +  } +  j->second = dvs2; +  if (dvs1.size() > 0) +  dep.connect(wo_copy_stmt_num, j->first, dvs1); +  } +  else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { +  std::vector<DependenceVector> dvs1, dvs2; +  for (int k = 0; k < j->second.size(); k++) { +  DependenceVector dv = j->second[k]; +  if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W)) +  dvs1.push_back(dv); +  else +  dvs2.push_back(dv); +  } +  j->second = dvs2; +  if (dvs1.size() > 0) +  D.push_back(dvs1); +  } +   +  if (j->second.size() == 0) +  dep.vertex[i].second.erase(j++); +  else +  j++; +  } +   +  for (int j = 0; j < D.size(); j++) +  dep.connect(i, wo_copy_stmt_num, D[j]); +  } +   +  // insert dependences from copied statements to write statements +  DependenceVector dv; +  dv.type = DEP_W2R; +  dv.sym = tmp_sym->clone(); +  dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); +  dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); +  for (int i = dep_dim; i < num_dep_dim; i++) { +  dv.lbounds[i] = -posInfinity; +  dv.ubounds[i] = posInfinity; +  }  +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) +  dep.connect(*i, wo_copy_stmt_num, dv); +   +  } +   +  // update variable name for dependences among copied statements +  for (int i = 0; i < old_num_stmt; i++) { +  if (active.find(i) != active.end()) +  for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) +  if (active.find(j->first) != active.end()) +  for (int k = 0; k < j->second.size(); k++) { +  IR_Symbol *s = tmp_sym->clone(); +  j->second[k].sym = s; +  } +  } +   +  // insert anti-dependence from write statement to read statement +  if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) +  if (dep_dim >= 0) { +  DependenceVector dv; +  dv.type = DEP_R2W; +  dv.sym = tmp_sym->clone(); +  dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); +  dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); +  for (int k = dep_dim; k < num_dep_dim; k++) { +  dv.lbounds[k] = -posInfinity; +  dv.ubounds[k] = posInfinity; +  } +  for (int k = 0; k < dep_dim; k++) { +  if (k != 0) { +  dv.lbounds[k-1] = 0; +  dv.ubounds[k-1] = 0; +  } +  dv.lbounds[k] = 1; +  dv.ubounds[k] = posInfinity; +  dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv); +  } +  } +   +   +  // cleanup +  delete sym; +  delete tmp_sym; +  for (int i = 0; i < index_lb.size(); i++) { +  index_lb[i]->clear(); +  delete index_lb[i]; +  } +  for (int i = 0; i < index_sz.size(); i++) { +  index_sz[i].second->clear(); +  delete index_sz[i].second; +  } +   +  return true; +  } +*/ +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, +                               const std::vector<int> &privatized_levels, +                               bool allow_extra_read, int fastest_changing_dimension, +                               int padding_stride, int padding_alignment, int memory_type) { +  if (stmt_refs.size() == 0) +    return true; +   +  // check for sanity of parameters +  IR_ArraySymbol *sym = NULL; +  std::vector<int> lex; +  std::set<int> active; +  if (level <= 0) +    throw std::invalid_argument("invalid loop level " + to_string(level)); +  for (int i = 0; i < privatized_levels.size(); i++) { +    if (i == 0) { +      if (privatized_levels[i] < level) +        throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level)); +    } +    else if (privatized_levels[i] <= privatized_levels[i-1]) +      throw std::invalid_argument("privatized loop levels must be in ascending order"); +  } +  for (int i = 0; i < stmt_refs.size(); i++) { +    int stmt_num = stmt_refs[i].first; +    active.insert(stmt_num); +    if (stmt_num < 0 || stmt_num >= stmt.size()) +      throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); +    if (privatized_levels.size() != 0) { +      if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) +        throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); +    } +    else { +      if (level > stmt[stmt_num].loop_level.size()) +        throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); +    } +    for (int j = 0; j < stmt_refs[i].second.size(); j++) { +      if (sym == NULL) { +        sym = stmt_refs[i].second[j]->symbol(); +        lex = getLexicalOrder(stmt_num); +      } +      else { +        IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); +        if (t->name() != sym->name()) { +          delete t; +          delete sym; +          throw std::invalid_argument("try to copy data from different arrays"); +        } +        delete t; +      } +    } +  } +  if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim())) +    throw std::invalid_argument("invalid fastest changing dimension for the array to be copied"); +  if (padding_stride < 0) +    throw std::invalid_argument("invalid temporary array stride requirement"); +  if (padding_alignment == -1 || padding_alignment == 0) +    throw std::invalid_argument("invalid temporary array alignment requirement"); +   +  int dim = 2*level - 1; +  int n_dim = sym->n_dim(); +   + +  if (fastest_changing_dimension == -1) +    switch (sym->layout_type()) { +    case IR_ARRAY_LAYOUT_ROW_MAJOR: +      fastest_changing_dimension = n_dim - 1; +      break; +    case IR_ARRAY_LAYOUT_COLUMN_MAJOR: +      fastest_changing_dimension = 0; +      break; +    default: +      throw loop_error("unsupported array layout"); +    } + +   +  // invalidate saved codegen computation +  delete last_compute_cgr_; +  last_compute_cgr_ = NULL; +  delete last_compute_cg_; +  last_compute_cg_ = NULL; +   +  // build iteration spaces for all reads and for all writes separately +  apply_xform(active); +   +  bool has_write_refs = false; +  bool has_read_refs = false; +  Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); +  Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); +  for (int i = 0; i < stmt_refs.size(); i++) { +    int stmt_num = stmt_refs[i].first; +     +    for (int j = 0; j < stmt_refs[i].second.size(); j++) { +      Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim); +      for (int k = 1; k <= mapping.n_inp(); k++) +        mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name()); +      mapping.setup_names(); +      F_And *f_root = mapping.add_and(); +      for (int k = 1; k <= level-1; k++) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(mapping.input_var(k), 1); +        h.update_coef(mapping.output_var(k), -1); +      } +      for (int k = 0; k < privatized_levels.size(); k++) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(mapping.input_var(privatized_levels[k]), 1); +        h.update_coef(mapping.output_var(level+k), -1); +      } +      for (int k = 0; k < n_dim; k++) { +        CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); +        exp2formula(ir, mapping, f_root, freevar, repr, mapping.output_var(level-1+privatized_levels.size()+k+1), 'w', IR_COND_EQ, false); +        repr->clear(); +        delete repr; +      } +      Relation r = Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); +      if (stmt_refs[i].second[j]->is_write()) { +        has_write_refs = true; +        wo_copy_is = Union(wo_copy_is, r); +        wo_copy_is.simplify(2, 4); +         +         +      } +      else { +        has_read_refs = true; +        ro_copy_is = Union(ro_copy_is, r); +        ro_copy_is.simplify(2, 4); +         +      } +    } +  } +   +  // simplify read and write footprint iteration space +  { +    if (allow_extra_read) +      ro_copy_is = SimpleHull(ro_copy_is, true, true); +    else +      ro_copy_is = ConvexRepresentation(ro_copy_is); +     +    wo_copy_is = ConvexRepresentation(wo_copy_is); +    if (wo_copy_is.number_of_conjuncts() > 1) { +      Relation t = SimpleHull(wo_copy_is, true, true); +      if (Must_Be_Subset(copy(t), copy(ro_copy_is))) +        wo_copy_is = t; +      else if (Must_Be_Subset(copy(wo_copy_is), copy(ro_copy_is))) +        wo_copy_is = ro_copy_is; +    } +  } +   +  // make copy statement variable names match the ones in the original statements which +  // already have the same names due to apply_xform +  { +    int ref_stmt = *active.begin(); +    for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) +      if (stmt[*i].IS.n_set() > stmt[ref_stmt].IS.n_set()) +        ref_stmt = *i; +    for (int i = 1; i < level; i++) { +      std::string s = stmt[ref_stmt].IS.input_var(i)->name(); +      wo_copy_is.name_set_var(i, s); +      ro_copy_is.name_set_var(i, s); +    } +    for (int i = 0; i < privatized_levels.size(); i++) { +      std::string s = stmt[ref_stmt].IS.input_var(privatized_levels[i])->name(); +      wo_copy_is.name_set_var(level+i, s); +      ro_copy_is.name_set_var(level+i, s); +    } +    for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) { +      std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size()); +      wo_copy_is.name_set_var(i, s); +      ro_copy_is.name_set_var(i, s); +    } +    tmp_loop_var_name_counter += n_dim; +    wo_copy_is.setup_names(); +    ro_copy_is.setup_names(); +  } +   +  // build merged footprint iteration space for calculating temporary array size +  Relation copy_is = SimpleHull(Union(copy(ro_copy_is), copy(wo_copy_is)), true, true); +   +  // extract temporary array information +  CG_outputBuilder *ocg = ir->builder(); +  std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL +  std::vector<coef_t> index_stride(n_dim); +  std::vector<bool> is_index_eq(n_dim, false); +  std::vector<std::pair<int, CG_outputRepr *> > index_sz(0); +  Relation reduced_copy_is = copy(copy_is); +   +  for (int i = 0; i < n_dim; i++) { +    if (i != 0) +      reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); +    Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); +     +    // extract stride +    std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, bound.set_var(level-1+privatized_levels.size()+i+1)); +    if (result.second != NULL) +      index_stride[i] = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)))); +    else +      index_stride[i] = 1; +     +    // check if this arary index requires loop +    Conjunct *c = bound.query_DNF()->single_conjunct(); +    for (EQ_Iterator ei(c->EQs()); ei; ei++) { +      if ((*ei).has_wildcards()) +        continue; +       +      int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); +      if (coef != 0) { +        int sign = 1; +        if (coef < 0) { +          coef = -coef; +          sign = -1; +        } +         +        CG_outputRepr *op = NULL; +        for (Constr_Vars_Iter ci(*ei); ci; ci++) { +          switch ((*ci).var->kind()) { +          case Input_Var: +          { +            if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) +              if ((*ci).coef*sign == 1) +                op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); +              else if ((*ci).coef*sign == -1) +                op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); +              else if ((*ci).coef*sign > 1) +                op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); +              else // (*ci).coef*sign < -1 +                op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); +            break; +          } +          case Global_Var: +          { +            Global_Var_ID g = (*ci).var->get_global_var(); +            if ((*ci).coef*sign == 1) +              op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); +            else if ((*ci).coef*sign == -1) +              op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); +            else if ((*ci).coef*sign > 1) +              op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); +            else // (*ci).coef*sign < -1 +              op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); +            break; +          } +          default: +            throw loop_error("unsupported array index expression"); +          } +        } +        if ((*ei).get_const() != 0) +          op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); +        if (coef != 1) +          op = ocg->CreateIntegerFloor(op, ocg->CreateInt(coef)); +         +        index_lb[i] = op; +        is_index_eq[i] = true; +        break; +      } +    } +    if (is_index_eq[i]) +      continue; +     +    // seperate lower and upper bounds +    std::vector<GEQ_Handle> lb_list, ub_list; +    std::set<Variable_ID> excluded_floor_vars; +    excluded_floor_vars.insert(bound.set_var(level-1+privatized_levels.size()+i+1)); +    for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { +      int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); +      if (coef != 0 && (*gi).has_wildcards()) { +        bool clean_bound = true; +        GEQ_Handle h; +        for (Constr_Vars_Iter cvi(*gi, true); gi; gi++) +          if (!find_floor_definition(bound, (*cvi).var, excluded_floor_vars).first) { +            clean_bound = false; +            break; +          } +        if (!clean_bound) +          continue; +      } +       +      if (coef > 0) +        lb_list.push_back(*gi); +      else if (coef < 0) +        ub_list.push_back(*gi); +    } +    if (lb_list.size() == 0 || ub_list.size() == 0) +      throw loop_error("failed to calcuate array footprint size"); +     +    // build lower bound representation +    std::vector<CG_outputRepr *> lb_repr_list; +    for (int j = 0; j < lb_list.size(); j++){ +      if(this->known.n_set() == 0) +        lb_repr_list.push_back(output_lower_bound_repr(ocg, lb_list[j], bound.set_var(level-1+privatized_levels.size()+i+1), result.first, result.second, bound, Relation::True(bound.n_set()), std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)))); +      else +        lb_repr_list.push_back(output_lower_bound_repr(ocg, lb_list[j], bound.set_var(level-1+privatized_levels.size()+i+1), result.first, result.second, bound, this->known, std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)))); +    } +    if (lb_repr_list.size() > 1) +      index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); +    else if (lb_repr_list.size() == 1) +      index_lb[i] = lb_repr_list[0]; +     +    // build temporary array size representation +    { +      Relation cal(copy_is.n_set(), 1); +      F_And *f_root = cal.add_and(); +      for (int j = 0; j < ub_list.size(); j++) +        for (int k = 0; k < lb_list.size(); k++) { +          GEQ_Handle h = f_root->add_GEQ(); +           +          for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { +            switch ((*ci).var->kind()) { +            case Input_Var: +            { +              int pos = (*ci).var->get_position(); +              h.update_coef(cal.input_var(pos), (*ci).coef); +              break; +            } +            case Global_Var: +            { +              Global_Var_ID g = (*ci).var->get_global_var(); +              Variable_ID v; +              if (g->arity() == 0) +                v = cal.get_local(g); +              else +                v = cal.get_local(g, (*ci).var->function_of()); +              h.update_coef(v, (*ci).coef); +              break; +            } +            default: +              throw loop_error("cannot calculate temporay array size statically"); +            } +          } +          h.update_const(ub_list[j].get_const()); +           +          for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { +            switch ((*ci).var->kind()) { +            case Input_Var: +            { +              int pos = (*ci).var->get_position(); +              h.update_coef(cal.input_var(pos), (*ci).coef); +              break; +            } +            case Global_Var: +            { +              Global_Var_ID g = (*ci).var->get_global_var(); +              Variable_ID v; +              if (g->arity() == 0) +                v = cal.get_local(g); +              else +                v = cal.get_local(g, (*ci).var->function_of()); +              h.update_coef(v, (*ci).coef); +              break; +            } +            default: +              throw loop_error("cannot calculate temporay array size statically"); +            } +          } +          h.update_const(lb_list[k].get_const()); +           +          h.update_const(1); +          h.update_coef(cal.output_var(1), -1); +        } +       +      cal = Restrict_Domain(cal, copy(copy_is)); +      for (int j = 1; j <= cal.n_inp(); j++) +        cal = Project(cal, j, Input_Var); +      cal.simplify(); +       +      // pad temporary array size +      // TODO: for variable array size, create padding formula +      Conjunct *c = cal.query_DNF()->single_conjunct(); +      bool is_index_bound_const = false; +      for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) +        if ((*gi).is_const(cal.output_var(1))) { +          coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1))); +          if (padding_stride != 0) { +            size = (size + index_stride[i] - 1) / index_stride[i]; +            if (i == fastest_changing_dimension) +              size = size * padding_stride; +          } +          if (i == fastest_changing_dimension) { +            if (padding_alignment > 1) { // align to boundary for data packing +              int residue = size % padding_alignment; +              if (residue) +                size = size+padding_alignment-residue; +            } +            else if (padding_alignment < -1) {  // un-alignment for memory bank conflicts +              while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1) +                size++; +            } +          } +          index_sz.push_back(std::make_pair(i, ocg->CreateInt(size))); +          is_index_bound_const = true; +        } +       +      if (!is_index_bound_const) { +        for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { +          int coef = (*gi).get_coef(cal.output_var(1)); +          if (coef < 0) { +            CG_outputRepr *op = NULL; +            for (Constr_Vars_Iter ci(*gi); ci; ci++) { +              if ((*ci).var != cal.output_var(1)) { +                switch((*ci).var->kind()) { +                case Global_Var: +                { +                  Global_Var_ID g = (*ci).var->get_global_var(); +                  if ((*ci).coef == 1) +                    op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); +                  else if ((*ci).coef == -1) +                    op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); +                  else if ((*ci).coef > 1) +                    op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); +                  else // (*ci).coef < -1 +                    op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); +                  break; +                } +                default: +                  throw loop_error("failed to generate array index bound code"); +                } +              } +            } +            int c = (*gi).get_const(); +            if (c > 0) +              op = ocg->CreatePlus(op, ocg->CreateInt(c)); +            else if (c < 0) +              op = ocg->CreateMinus(op, ocg->CreateInt(-c)); +            if (padding_stride != 0) { +              if (i == fastest_changing_dimension) { +                coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); +                coef_t t1 = index_stride[i] / g; +                if (t1 != 1) +                  op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); +                coef_t t2 = padding_stride / g; +                if (t2 != 1) +                  op = ocg->CreateTimes(op, ocg->CreateInt(t2)); +              } +              else if (index_stride[i] != 1) { +                op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); +              } +            } +             +            index_sz.push_back(std::make_pair(i, op)); +            break; +          } +        } +      } +    } +  } +   +  // change the temporary array index order +  for (int i = 0; i < index_sz.size(); i++) +    if (index_sz[i].first == fastest_changing_dimension) +      switch (sym->layout_type()) { +      case IR_ARRAY_LAYOUT_ROW_MAJOR: +        std::swap(index_sz[index_sz.size()-1], index_sz[i]); +        break; +      case IR_ARRAY_LAYOUT_COLUMN_MAJOR: +        std::swap(index_sz[0], index_sz[i]); +        break; +      default: +        throw loop_error("unsupported array layout"); +      } +   +  // declare temporary array or scalar +  IR_Symbol *tmp_sym; +  if (index_sz.size() == 0) { +    tmp_sym = ir->CreateScalarSymbol(sym, memory_type); +  } +  else { +    std::vector<CG_outputRepr *> tmp_array_size(index_sz.size()); +    for (int i = 0; i < index_sz.size(); i++) +      tmp_array_size[i] = index_sz[i].second->clone(); +    tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); +  } +   +  // create temporary array read initialization code +  CG_outputRepr *copy_code_read; +  if (has_read_refs) +    if (index_sz.size() == 0) { +      IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); +       +      std::vector<CG_outputRepr *> rhs_index(n_dim); +      for (int i = 0; i < index_lb.size(); i++) +        if (is_index_eq[i]) +          rhs_index[i] = index_lb[i]->clone(); +        else +          rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); +      IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); +       +      copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); +    } +    else { +      std::vector<CG_outputRepr *> lhs_index(index_sz.size()); +      for (int i = 0; i < index_sz.size(); i++) { +        int cur_index_num = index_sz[i].first; +        CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); +        if (padding_stride != 0) { +          if (i == n_dim-1) { +            coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); +            coef_t t1 = index_stride[cur_index_num] / g; +            if (t1 != 1) +              cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); +            coef_t t2 = padding_stride / g; +            if (t2 != 1) +              cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); +          } +          else if (index_stride[cur_index_num] != 1) { +            cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); +          } +        } +         +        if (ir->ArrayIndexStartAt() != 0) +          cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); +        lhs_index[i] = cur_index_repr; +      } +       +      IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index); +       +      std::vector<CG_outputRepr *> rhs_index(n_dim); +      for (int i = 0; i < index_lb.size(); i++) +        if (is_index_eq[i]) +          rhs_index[i] = index_lb[i]->clone(); +        else +          rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); +      IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); +       +      copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); +    } +   +  // create temporary array write back code +  CG_outputRepr *copy_code_write; +  if (has_write_refs) +    if (index_sz.size() == 0) { +      IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); +       +      std::vector<CG_outputRepr *> rhs_index(n_dim); +      for (int i = 0; i < index_lb.size(); i++) +        if (is_index_eq[i]) +          rhs_index[i] = index_lb[i]->clone(); +        else +          rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); +      IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); +       +      copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); +    } +    else { +      std::vector<CG_outputRepr *> lhs_index(n_dim); +      for (int i = 0; i < index_lb.size(); i++) +        if (is_index_eq[i]) +          lhs_index[i] = index_lb[i]->clone(); +        else +          lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); +      IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); +       +      std::vector<CG_outputRepr *> rhs_index(index_sz.size()); +      for (int i = 0; i < index_sz.size(); i++) { +        int cur_index_num = index_sz[i].first; +        CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); +        if (padding_stride != 0) { +          if (i == n_dim-1) { +            coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); +            coef_t t1 = index_stride[cur_index_num] / g; +            if (t1 != 1) +              cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); +            coef_t t2 = padding_stride / g; +            if (t2 != 1) +              cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); +          } +          else if (index_stride[cur_index_num] != 1) { +            cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); +          } +        } +         +        if (ir->ArrayIndexStartAt() != 0) +          cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); +        rhs_index[i] = cur_index_repr; +      } +      IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index); +       +      copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); +    } +   +  // now we can remove those loops for array indexes that are +  // dependent on others +  if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { +    Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); +    F_And *f_root = mapping.add_and(); +    for (int i = 1; i <= level-1+privatized_levels.size(); i++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.input_var(i), 1); +      h.update_coef(mapping.output_var(i), -1); +    } +     +    int cur_index = 0; +    std::vector<int> mapped_index(index_sz.size()); +    for (int i = 0; i < n_dim; i++) +      if (!is_index_eq[i]) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); +        switch (sym->layout_type()) { +        case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { +          h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); +          mapped_index[index_sz.size()-cur_index-1] = i; +          break; +        } +        case IR_ARRAY_LAYOUT_ROW_MAJOR: { +          h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); +          mapped_index[cur_index] = i; +          break; +        } +        default: +          throw loop_error("unsupported array layout"); +        } +        cur_index++; +      } +     +    wo_copy_is = Range(Restrict_Domain(copy(mapping), wo_copy_is)); +    ro_copy_is = Range(Restrict_Domain(copy(mapping), ro_copy_is)); +    for (int i = 1; i <= level-1+privatized_levels.size(); i++) { +      wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); +      ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); +    } +    for (int i = 0; i < index_sz.size(); i++) { +      wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); +      ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); +    } +    wo_copy_is.setup_names(); +    ro_copy_is.setup_names(); +  } +   +  // insert read copy statement +  int old_num_stmt = stmt.size(); +  int ro_copy_stmt_num = -1; +  if (has_read_refs) { +    Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); +    { +      F_And *f_root = copy_xform.add_and(); +      for (int i = 1; i <= ro_copy_is.n_set(); i++) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(copy_xform.input_var(i), 1); +        h.update_coef(copy_xform.output_var(2*i), -1); +      } +      for (int i = 1; i <= dim; i+=2) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(copy_xform.output_var(i), -1); +        h.update_const(lex[i-1]); +      } +      for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(copy_xform.output_var(i), 1); +      } +    } +     +    Statement copy_stmt_read; +    copy_stmt_read.IS = ro_copy_is; +    copy_stmt_read.xform = copy_xform; +    copy_stmt_read.code = copy_code_read; +    copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set()); +    copy_stmt_read.ir_stmt_node = NULL; +    for (int i = 0; i < level-1; i++) { +      copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; +      if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && +          stmt[*(active.begin())].loop_level[i].payload >= level) { +        int j; +        for (j = 0; j < privatized_levels.size(); j++) +          if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) +            break; +        if (j == privatized_levels.size()) +          copy_stmt_read.loop_level[i].payload = -1; +        else +          copy_stmt_read.loop_level[i].payload = level + j; +      } +      else +        copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; +      copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; +    } +    for (int i = 0; i < privatized_levels.size(); i++) { +      copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; +      copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; +      copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; +    } +    int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); +    for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { +      copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; +      copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; +      copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; +    } +    for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { +      copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; +      copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; +      copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; +    } +     +     +    shiftLexicalOrder(lex, dim-1, 1); +    stmt.push_back(copy_stmt_read); +    ro_copy_stmt_num = stmt.size() - 1; +    dep.insert(); +  } +   +  // insert write copy statement +  int wo_copy_stmt_num = -1; +  if (has_write_refs) { +    Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1); +    { +      F_And *f_root = copy_xform.add_and(); +      for (int i = 1; i <= wo_copy_is.n_set(); i++) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(copy_xform.input_var(i), 1); +        h.update_coef(copy_xform.output_var(2*i), -1); +      } +      for (int i = 1; i <= dim; i+=2) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(copy_xform.output_var(i), -1); +        h.update_const(lex[i-1]); +      } +      for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(copy_xform.output_var(i), 1); +      } +    } +     +    Statement copy_stmt_write; +    copy_stmt_write.IS = wo_copy_is; +    copy_stmt_write.xform = copy_xform; +    copy_stmt_write.code = copy_code_write; +    copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set()); +    copy_stmt_write.ir_stmt_node = NULL; +     +    for (int i = 0; i < level-1; i++) { +      copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; +      if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && +          stmt[*(active.begin())].loop_level[i].payload >= level) { +        int j; +        for (j = 0; j < privatized_levels.size(); j++) +          if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) +            break; +        if (j == privatized_levels.size()) +          copy_stmt_write.loop_level[i].payload = -1; +        else +          copy_stmt_write.loop_level[i].payload = level + j; +      } +      else +        copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; +      copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; +    } +    for (int i = 0; i < privatized_levels.size(); i++) { +      copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; +      copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; +      copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; +    } +    int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); +    for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { +      copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; +      copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; +      copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; +    } +    for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { +      copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; +      copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; +      copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; +    } +    lex[dim-1]++; +    shiftLexicalOrder(lex, dim-1, -2); +    stmt.push_back(copy_stmt_write); +    wo_copy_stmt_num = stmt.size() - 1; +    dep.insert(); +  } +   +  // replace original array accesses with temporary array accesses +  for (int i =0; i < stmt_refs.size(); i++) +    for (int j = 0; j < stmt_refs[i].second.size(); j++) { +      if (index_sz.size() == 0) { +        IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); +        ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); +      } +      else { +        std::vector<CG_outputRepr *> index_repr(index_sz.size()); +        for (int k = 0; k < index_sz.size(); k++) { +          int cur_index_num = index_sz[k].first; +           +          CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); +          if (padding_stride != 0) { +            if (k == n_dim-1) { +              coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); +              coef_t t1 = index_stride[cur_index_num] / g; +              if (t1 != 1) +                cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); +              coef_t t2 = padding_stride / g; +              if (t2 != 1) +                cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); +            } +            else if (index_stride[cur_index_num] != 1) { +              cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); +            } +          } +           +          if (ir->ArrayIndexStartAt() != 0) +            cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); +          index_repr[k] = cur_index_repr; +        } +         +        IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr); +        ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); +      } +    } +   +  // update dependence graph +  int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1; +  if (ro_copy_stmt_num != -1) { +    for (int i = 0; i < old_num_stmt; i++) { +      std::vector<std::vector<DependenceVector> > D; +       +      for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { +        if (active.find(i) != active.end() && active.find(j->first) == active.end()) { +          std::vector<DependenceVector> dvs1, dvs2; +          for (int k = 0; k < j->second.size(); k++) { +            DependenceVector dv = j->second[k]; +            if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W)) +              dvs1.push_back(dv); +            else +              dvs2.push_back(dv); +          } +          j->second = dvs2; +          if (dvs1.size() > 0) +            dep.connect(ro_copy_stmt_num, j->first, dvs1); +        } +        else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { +          std::vector<DependenceVector> dvs1, dvs2; +          for (int k = 0; k < j->second.size(); k++) { +            DependenceVector dv = j->second[k]; +            if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R)) +              dvs1.push_back(dv); +            else +              dvs2.push_back(dv); +          } +          j->second = dvs2; +          if (dvs1.size() > 0) +            D.push_back(dvs1); +        } +         +        if (j->second.size() == 0) +          dep.vertex[i].second.erase(j++); +        else +          j++; +      } +       +      for (int j = 0; j < D.size(); j++) +        dep.connect(i, ro_copy_stmt_num, D[j]); +    } +     +    // insert dependences from copy statement loop to copied statements +    DependenceVector dv; +    dv.type = DEP_W2R; +    dv.sym = tmp_sym->clone(); +    dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); +    dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); +    for (int i = dep_dim; i < dep.num_dim(); i++) { +      dv.lbounds[i] = -posInfinity; +      dv.ubounds[i] = posInfinity; +    } +    for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) +      dep.connect(ro_copy_stmt_num, *i, dv); +  } +   +  if (wo_copy_stmt_num != -1) { +    for (int i = 0; i < old_num_stmt; i++) { +      std::vector<std::vector<DependenceVector> > D; +       +      for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { +        if (active.find(i) != active.end() && active.find(j->first) == active.end()) { +          std::vector<DependenceVector> dvs1, dvs2; +          for (int k = 0; k < j->second.size(); k++) { +            DependenceVector dv = j->second[k]; +            if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W)) +              dvs1.push_back(dv); +            else +              dvs2.push_back(dv); +          } +          j->second = dvs2; +          if (dvs1.size() > 0) +            dep.connect(wo_copy_stmt_num, j->first, dvs1); +        } +        else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { +          std::vector<DependenceVector> dvs1, dvs2; +          for (int k = 0; k < j->second.size(); k++) { +            DependenceVector dv = j->second[k]; +            if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W)) +              dvs1.push_back(dv); +            else +              dvs2.push_back(dv); +          } +          j->second = dvs2; +          if (dvs1.size() > 0) +            D.push_back(dvs1); +        } +         +        if (j->second.size() == 0) +          dep.vertex[i].second.erase(j++); +        else +          j++; +      } +       +      for (int j = 0; j < D.size(); j++) +        dep.connect(i, wo_copy_stmt_num, D[j]); +    } +     +    // insert dependences from copied statements to write statements +    DependenceVector dv; +    dv.type = DEP_W2R; +    dv.sym = tmp_sym->clone(); +    dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); +    dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); +    for (int i = dep_dim; i < dep.num_dim(); i++) { +      dv.lbounds[i] = -posInfinity; +      dv.ubounds[i] = posInfinity; +    } +    for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) +      dep.connect(*i, wo_copy_stmt_num, dv); +     +  } +   +  // update variable name for dependences among copied statements +  for (int i = 0; i < old_num_stmt; i++) { +    if (active.find(i) != active.end()) +      for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) +        if (active.find(j->first) != active.end()) +          for (int k = 0; k < j->second.size(); k++) { +            IR_Symbol *s = tmp_sym->clone(); +            j->second[k].sym = s; +          } +  } +   +  // insert anti-dependence from write statement to read statement +  if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) +    if (dep_dim >= 0) { +      DependenceVector dv; +      dv.type = DEP_R2W; +      dv.sym = tmp_sym->clone(); +      dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); +      dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); +      for (int k = dep_dim; k < dep.num_dim(); k++) { +        dv.lbounds[k] = -posInfinity; +        dv.ubounds[k] = posInfinity; +      } +      for (int k = 0; k < dep_dim; k++) { +        if (k != 0) { +          dv.lbounds[k-1] = 0; +          dv.ubounds[k-1] = 0; +        } +        dv.lbounds[k] = 1; +        dv.ubounds[k] = posInfinity; +        dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv); +      } +    } +   +  // cleanup +  delete sym; +  delete tmp_sym; +  for (int i = 0; i < index_lb.size(); i++) { +    index_lb[i]->clear(); +    delete index_lb[i]; +  } +  for (int i = 0; i < index_sz.size(); i++) { +    index_sz[i].second->clear(); +    delete index_sz[i].second; +  } +   +  return true; +} diff --git a/src/loop_extra.cc b/src/loop_extra.cc new file mode 100644 index 0000000..dac05bf --- /dev/null +++ b/src/loop_extra.cc @@ -0,0 +1,224 @@ +/***************************************************************************** + Copyright (C) 2010 University of Utah + All Rights Reserved. + + Purpose: +   Additional loop transformations. + + Notes: + + History: +   07/31/10 Created by Chun Chen +*****************************************************************************/ + +#include <code_gen/codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + + +void Loop::shift_to(int stmt_num, int level, int absolute_position) { +  // combo +  tile(stmt_num, level, 1, level, CountedTile); +  std::vector<int> lex = getLexicalOrder(stmt_num); +  std::set<int> active = getStatements(lex, 2*level-2); +  shift(active, level, absolute_position); +   +  // remove unnecessary tiled loop since tile size is one +  for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { +    int n = stmt[*i].xform.n_out(); +    Relation mapping(n, n-2); +    F_And *f_root = mapping.add_and(); +    for (int j = 1; j <= 2*level; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(j), 1); +      h.update_coef(mapping.input_var(j), -1); +    } +    for (int j = 2*level+3; j <= n; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(j-2), 1); +      h.update_coef(mapping.input_var(j), -1); +    } +    stmt[*i].xform = Composition(mapping, stmt[*i].xform); +    stmt[*i].xform.simplify(); +     +    for (int j = 0; j < stmt[*i].loop_level.size(); j++) +      if (j != level-1 && +          stmt[*i].loop_level[j].type == LoopLevelTile && +          stmt[*i].loop_level[j].payload >= level) +        stmt[*i].loop_level[j].payload--; +     +    stmt[*i].loop_level.erase(stmt[*i].loop_level.begin()+level-1); +  } +} + + +std::set<int> Loop::unroll_extra(int stmt_num, int level, int unroll_amount, int cleanup_split_level) { +  std::set<int> cleanup_stmts = unroll(stmt_num, level, unroll_amount,std::vector< std::vector<std::string> >(), cleanup_split_level); +  for (std::set<int>::iterator i = cleanup_stmts.begin(); i != cleanup_stmts.end(); i++) +    unroll(*i, level, 0); +   +  return cleanup_stmts; +} + +void Loop::peel(int stmt_num, int level, int peel_amount) { +  // check for sanity of parameters +  if (stmt_num < 0 || stmt_num >= stmt.size()) +    throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); +  if (level <= 0 || level > stmt[stmt_num].loop_level.size()) +    throw std::invalid_argument("invalid loop level " + to_string(level)); +   +  if (peel_amount == 0) +    return; +   +  std::set<int> subloop = getSubLoopNest(stmt_num, level); +  std::vector<Relation> Rs; +  for (std::set<int>::iterator i = subloop.begin(); i != subloop.end(); i++) { +    Relation r = getNewIS(*i); +    Relation f(r.n_set(), level); +    F_And *f_root = f.add_and(); +    for (int j = 1; j <= level; j++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(f.input_var(2*j), 1); +      h.update_coef(f.output_var(j), -1); +    } +    r = Composition(f, r); +    r.simplify(); +    Rs.push_back(r); +  } +  Relation hull = SimpleHull(Rs); +   +  if (peel_amount > 0) { +    GEQ_Handle bound_eq; +    bool found_bound = false; +    for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) +      if (!(*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) > 0) { +        bound_eq = *e; +        found_bound = true; +        break; +      } +    if (!found_bound) +      for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) +        if ((*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) > 0) { +          bool is_bound = true; +          for (Constr_Vars_Iter cvi(*e, true); cvi; cvi++) { +            std::pair<bool, GEQ_Handle> result = find_floor_definition(hull, cvi.curr_var()); +            if (!result.first) { +              is_bound = false; +              break; +            } +          } +          if (is_bound) { +            bound_eq = *e; +            found_bound = true; +            break; +          } +        } +    if (!found_bound) +      throw loop_error("can't find lower bound for peeling at loop level " + to_string(level)); +     +    for (int i = 1; i <= peel_amount; i++) { +      Relation r(level); +      F_Exists *f_exists = r.add_and()->add_exists(); +      F_And *f_root = f_exists->add_and(); +      GEQ_Handle h = f_root->add_GEQ(); +      std::map<Variable_ID, Variable_ID> exists_mapping; +      for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++) +        switch (cvi.curr_var()->kind()) { +        case Input_Var: +          h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); +          break; +        case Wildcard_Var: { +          Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); +          h.update_coef(v, cvi.curr_coef()); +          break; +        } +        case Global_Var: { +          Global_Var_ID g = cvi.curr_var()->get_global_var(); +          Variable_ID v; +          if (g->arity() == 0) +            v = r.get_local(g); +          else +            v = r.get_local(g, cvi.curr_var()->function_of()); +          h.update_coef(v, cvi.curr_coef()); +          break; +        } +        default: +          assert(false); +        } +      h.update_const(bound_eq.get_const() - i); +      r.simplify(); +       +      split(stmt_num, level, r); +    } +  } +  else { // peel_amount < 0 +    GEQ_Handle bound_eq; +    bool found_bound = false; +    for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) +      if (!(*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) < 0) { +        bound_eq = *e; +        found_bound = true; +        break; +      } +    if (!found_bound) +      for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) +        if ((*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) < 0) { +          bool is_bound = true; +          for (Constr_Vars_Iter cvi(*e, true); cvi; cvi++) { +            std::pair<bool, GEQ_Handle> result = find_floor_definition(hull, cvi.curr_var()); +            if (!result.first) { +              is_bound = false; +              break; +            } +          } +          if (is_bound) { +            bound_eq = *e; +            found_bound = true; +            break; +          } +        } +    if (!found_bound) +      throw loop_error("can't find upper bound for peeling at loop level " + to_string(level)); +     +    for (int i = 1; i <= -peel_amount; i++) { +      Relation r(level); +      F_Exists *f_exists = r.add_and()->add_exists(); +      F_And *f_root = f_exists->add_and(); +      GEQ_Handle h = f_root->add_GEQ(); +      std::map<Variable_ID, Variable_ID> exists_mapping; +      for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++) +        switch (cvi.curr_var()->kind()) { +        case Input_Var: +          h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); +          break; +        case Wildcard_Var: { +          Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); +          h.update_coef(v, cvi.curr_coef()); +          break; +        } +        case Global_Var: { +          Global_Var_ID g = cvi.curr_var()->get_global_var(); +          Variable_ID v; +          if (g->arity() == 0) +            v = r.get_local(g); +          else +            v = r.get_local(g, cvi.curr_var()->function_of()); +          h.update_coef(v, cvi.curr_coef()); +          break; +        } +        default: +          assert(false); +        } +      h.update_const(bound_eq.get_const() - i); +      r.simplify(); +       +      split(stmt_num, level, r); +    } +  } +} + diff --git a/src/loop_tile.cc b/src/loop_tile.cc new file mode 100644 index 0000000..aae8dd8 --- /dev/null +++ b/src/loop_tile.cc @@ -0,0 +1,630 @@ +/* + * loop_tile.cc + * + *  Created on: Nov 12, 2012 + *      Author: anand + */ + +#include <code_gen/codegen.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + + + + +void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, +                TilingMethodType method, int alignment_offset, int alignment_multiple) { +  // check for sanity of parameters +  if (tile_size < 0) +    throw std::invalid_argument("invalid tile size"); +  if (alignment_multiple < 1 || alignment_offset < 0) +    throw std::invalid_argument("invalid alignment for tile"); +  if (stmt_num < 0 || stmt_num >= stmt.size()) +    throw std::invalid_argument("invalid statement " + to_string(stmt_num)); +  if (level <= 0) +    throw std::invalid_argument("invalid loop level " + to_string(level)); +  if (level > stmt[stmt_num].loop_level.size()) +    throw std::invalid_argument( +      "there is no loop level " + to_string(level) + " for statement " +      + to_string(stmt_num)); +  if (outer_level <= 0 || outer_level > level) +    throw std::invalid_argument( +      "invalid tile controlling loop level " +      + to_string(outer_level)); +   +  // invalidate saved codegen computation +  delete last_compute_cgr_; +  last_compute_cgr_ = NULL; +  delete last_compute_cg_; +  last_compute_cg_ = NULL; +   +  int dim = 2 * level - 1; +  int outer_dim = 2 * outer_level - 1; +  std::vector<int> lex = getLexicalOrder(stmt_num); +  std::set<int> same_tiled_loop = getStatements(lex, dim - 1); +  std::set<int> same_tile_controlling_loop = getStatements(lex, +                                                           outer_dim - 1); +   +  for (std::set<int>::iterator i = same_tiled_loop.begin(); +       i != same_tiled_loop.end(); i++) { +    for (DependenceGraph::EdgeList::iterator j = +           dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); +         j++) { +      if (same_tiled_loop.find(j->first) != same_tiled_loop.end()) +        for (int k = 0; k < j->second.size(); k++) { +          DependenceVector dv = j->second[k]; +          int dim2 = level - 1; +          if ((dv.type != DEP_CONTROL) && (dv.type != DEP_UNKNOWN)) { +            while (stmt[*i].loop_level[dim2].type == LoopLevelTile) { +              dim2 = stmt[*i].loop_level[dim2].payload - 1; +            } +            dim2 = stmt[*i].loop_level[dim2].payload; +             +            if (dv.hasNegative(dim2) && (!dv.quasi)) { +              for (int l = outer_level; l < level; l++) +                if (stmt[*i].loop_level[l - 1].type +                    != LoopLevelTile) { +                  if (dv.isCarried( +                        stmt[*i].loop_level[l - 1].payload) +                      && dv.hasPositive( +                        stmt[*i].loop_level[l - 1].payload)) +                    throw loop_error( +                      "loop error: Tiling is illegal, dependence violation!"); +                } else { +                   +                  int dim3 = l - 1; +                  while (stmt[*i].loop_level[l - 1].type +                         != LoopLevelTile) { +                    dim3 = +                      stmt[*i].loop_level[l - 1].payload +                      - 1; +                     +                  } +                   +                  dim3 = stmt[*i].loop_level[l - 1].payload; +                  if (dim3 < level - 1) +                    if (dv.isCarried(dim3) +                        && dv.hasPositive(dim3)) +                      throw loop_error( +                        "loop error: Tiling is illegal, dependence violation!"); +                } +            } +          } +        } +    } +  } +  // special case for no tiling +  if (tile_size == 0) { +    for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); +         i != same_tile_controlling_loop.end(); i++) { +      Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); +      F_And *f_root = r.add_and(); +      for (int j = 1; j <= 2 * outer_level - 1; j++) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(r.input_var(j), 1); +        h.update_coef(r.output_var(j), -1); +      } +      EQ_Handle h1 = f_root->add_EQ(); +      h1.update_coef(r.output_var(2 * outer_level), 1); +      EQ_Handle h2 = f_root->add_EQ(); +      h2.update_coef(r.output_var(2 * outer_level + 1), 1); +      for (int j = 2 * outer_level; j <= stmt[*i].xform.n_out(); j++) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(r.input_var(j), 1); +        h.update_coef(r.output_var(j + 2), -1); +      } +       +      stmt[*i].xform = Composition(copy(r), stmt[*i].xform); +    } +  } +  // normal tiling +  else { +    std::set<int> private_stmt; +    for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); +         i != same_tile_controlling_loop.end(); i++) { +//     if (same_tiled_loop.find(*i) == same_tiled_loop.end() && !is_single_iteration(getNewIS(*i), dim)) +//       same_tiled_loop.insert(*i); +       +      // should test dim's value directly but it is ok for now +//    if (same_tiled_loop.find(*i) == same_tiled_loop.end() && get_const(stmt[*i].xform, dim+1, Output_Var) == posInfinity) +      if (same_tiled_loop.find(*i) == same_tiled_loop.end() +          && overflow.find(*i) != overflow.end()) +        private_stmt.insert(*i); +    } +     +    // extract the union of the iteration space to be considered +    Relation hull; +    /*{ +      Tuple < Relation > r_list; +      Tuple<int> r_mask; +       +      for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); +      i != same_tile_controlling_loop.end(); i++) +      if (private_stmt.find(*i) == private_stmt.end()) { +      Relation r = project_onto_levels(getNewIS(*i), dim + 1, +      true); +      for (int j = outer_dim; j < dim; j++) +      r = Project(r, j + 1, Set_Var); +      for (int j = 0; j < outer_dim; j += 2) +      r = Project(r, j + 1, Set_Var); +      r_list.append(r); +      r_mask.append(1); +      } +       +      hull = Hull(r_list, r_mask, 1, true); +      }*/ +     +    { +      std::vector<Relation> r_list; +       +      for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); +           i != same_tile_controlling_loop.end(); i++) +        if (private_stmt.find(*i) == private_stmt.end()) { +          Relation r = getNewIS(*i); +          for (int j = dim + 2; j <= r.n_set(); j++) +            r = Project(r, r.set_var(j)); +          for (int j = outer_dim; j < dim; j++) +            r = Project(r, j + 1, Set_Var); +          for (int j = 0; j < outer_dim; j += 2) +            r = Project(r, j + 1, Set_Var); +          r.simplify(2, 4); +          r_list.push_back(r); +        } +       +      hull = SimpleHull(r_list); +      // hull = Hull(r_list, std::vector<bool>(r_list.size(), true), 1, true); +    } +     +    // extract the bound of the dimension to be tiled +    Relation bound = get_loop_bound(hull, dim); +    if (!bound.has_single_conjunct()) { +      // further simplify the bound +      hull = Approximate(hull); +      bound = get_loop_bound(hull, dim); +       +      int i = outer_dim - 2; +      while (!bound.has_single_conjunct() && i >= 0) { +        hull = Project(hull, i + 1, Set_Var); +        bound = get_loop_bound(hull, dim); +        i -= 2; +      } +       +      if (!bound.has_single_conjunct()) +        throw loop_error("cannot handle tile bounds"); +    } +     +    // separate lower and upper bounds +    std::vector<GEQ_Handle> lb_list, ub_list; +    { +      Conjunct *c = bound.query_DNF()->single_conjunct(); +      for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { +        int coef = (*gi).get_coef(bound.set_var(dim + 1)); +        if (coef < 0) +          ub_list.push_back(*gi); +        else if (coef > 0) +          lb_list.push_back(*gi); +      } +    } +    if (lb_list.size() == 0) +      throw loop_error( +        "unable to calculate tile controlling loop lower bound"); +    if (ub_list.size() == 0) +      throw loop_error( +        "unable to calculate tile controlling loop upper bound"); +     +    // find the simplest lower bound for StridedTile or simplest iteration count for CountedTile +    int simplest_lb = 0, simplest_ub = 0; +    if (method == StridedTile) { +      int best_cost = INT_MAX; +      for (int i = 0; i < lb_list.size(); i++) { +        int cost = 0; +        for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { +          switch ((*ci).var->kind()) { +          case Input_Var: { +            cost += 5; +            break; +          } +          case Global_Var: { +            cost += 2; +            break; +          } +          default: +            cost += 15; +            break; +          } +        } +         +        if (cost < best_cost) { +          best_cost = cost; +          simplest_lb = i; +        } +      } +    } else if (method == CountedTile) { +      std::map<Variable_ID, coef_t> s1, s2, s3; +      int best_cost = INT_MAX; +      for (int i = 0; i < lb_list.size(); i++) +        for (int j = 0; j < ub_list.size(); j++) { +          int cost = 0; +           +          for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { +            switch ((*ci).var->kind()) { +            case Input_Var: { +              s1[(*ci).var] += (*ci).coef; +              break; +            } +            case Global_Var: { +              s2[(*ci).var] += (*ci).coef; +              break; +            } +            case Exists_Var: +            case Wildcard_Var: { +              s3[(*ci).var] += (*ci).coef; +              break; +            } +            default: +              cost = INT_MAX - 2; +              break; +            } +          } +           +          for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { +            switch ((*ci).var->kind()) { +            case Input_Var: { +              s1[(*ci).var] += (*ci).coef; +              break; +            } +            case Global_Var: { +              s2[(*ci).var] += (*ci).coef; +              break; +            } +            case Exists_Var: +            case Wildcard_Var: { +              s3[(*ci).var] += (*ci).coef; +              break; +            } +            default: +              if (cost == INT_MAX - 2) +                cost = INT_MAX - 1; +              else +                cost = INT_MAX - 3; +              break; +            } +          } +           +          if (cost == 0) { +            for (std::map<Variable_ID, coef_t>::iterator k = +                   s1.begin(); k != s1.end(); k++) +              if ((*k).second != 0) +                cost += 5; +            for (std::map<Variable_ID, coef_t>::iterator k = +                   s2.begin(); k != s2.end(); k++) +              if ((*k).second != 0) +                cost += 2; +            for (std::map<Variable_ID, coef_t>::iterator k = +                   s3.begin(); k != s3.end(); k++) +              if ((*k).second != 0) +                cost += 15; +          } +           +          if (cost < best_cost) { +            best_cost = cost; +            simplest_lb = i; +            simplest_ub = j; +          } +        } +    } +     +    // prepare the new transformation relations +    for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); +         i != same_tile_controlling_loop.end(); i++) { +      Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); +      F_And *f_root = r.add_and(); +      for (int j = 0; j < outer_dim - 1; j++) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(r.output_var(j + 1), 1); +        h.update_coef(r.input_var(j + 1), -1); +      } +       +      for (int j = outer_dim - 1; j < stmt[*i].xform.n_out(); j++) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(r.output_var(j + 3), 1); +        h.update_coef(r.input_var(j + 1), -1); +      } +       +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(r.output_var(outer_dim), 1); +      h.update_const(-lex[outer_dim - 1]); +       +      stmt[*i].xform = Composition(r, stmt[*i].xform); +    } +     +    // add tiling constraints. +    for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); +         i != same_tile_controlling_loop.end(); i++) { +      F_And *f_super_root = stmt[*i].xform.and_with_and(); +      F_Exists *f_exists = f_super_root->add_exists(); +      F_And *f_root = f_exists->add_and(); +       +      // create a lower bound variable for easy formula creation later +      Variable_ID aligned_lb; +      { +        Variable_ID lb = f_exists->declare(); +        coef_t coef = lb_list[simplest_lb].get_coef( +          bound.set_var(dim + 1)); +        if (coef == 1) { // e.g. if i >= m+5, then LB = m+5 +          EQ_Handle h = f_root->add_EQ(); +          h.update_coef(lb, 1); +          for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { +            switch ((*ci).var->kind()) { +            case Input_Var: { +              int pos = (*ci).var->get_position(); +              if (pos != dim + 1) +                h.update_coef(stmt[*i].xform.output_var(pos), +                              (*ci).coef); +              break; +            } +            case Global_Var: { +              Global_Var_ID g = (*ci).var->get_global_var(); +              Variable_ID v; +              if (g->arity() == 0) +                v = stmt[*i].xform.get_local(g); +              else +                v = stmt[*i].xform.get_local(g, +                                             (*ci).var->function_of()); +              h.update_coef(v, (*ci).coef); +              break; +            } +            default: +              throw loop_error("cannot handle tile bounds"); +            } +          } +          h.update_const(lb_list[simplest_lb].get_const()); +        } else { // e.g. if 2i >= m+5, then m+5 <= 2*LB < m+5+2 +          GEQ_Handle h1 = f_root->add_GEQ(); +          GEQ_Handle h2 = f_root->add_GEQ(); +          for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { +            switch ((*ci).var->kind()) { +            case Input_Var: { +              int pos = (*ci).var->get_position(); +              if (pos == dim + 1) { +                h1.update_coef(lb, (*ci).coef); +                h2.update_coef(lb, -(*ci).coef); +              } else { +                h1.update_coef(stmt[*i].xform.output_var(pos), +                               (*ci).coef); +                h2.update_coef(stmt[*i].xform.output_var(pos), +                               -(*ci).coef); +              } +              break; +            } +            case Global_Var: { +              Global_Var_ID g = (*ci).var->get_global_var(); +              Variable_ID v; +              if (g->arity() == 0) +                v = stmt[*i].xform.get_local(g); +              else +                v = stmt[*i].xform.get_local(g, +                                             (*ci).var->function_of()); +              h1.update_coef(v, (*ci).coef); +              h2.update_coef(v, -(*ci).coef); +              break; +            } +            default: +              throw loop_error("cannot handle tile bounds"); +            } +          } +          h1.update_const(lb_list[simplest_lb].get_const()); +          h2.update_const(-lb_list[simplest_lb].get_const()); +          h2.update_const(coef - 1); +        } +         +        Variable_ID offset_lb; +        if (alignment_offset == 0) +          offset_lb = lb; +        else { +          EQ_Handle h = f_root->add_EQ(); +          offset_lb = f_exists->declare(); +          h.update_coef(offset_lb, 1); +          h.update_coef(lb, -1); +          h.update_const(alignment_offset); +        } +         +        if (alignment_multiple == 1) { // trivial +          aligned_lb = offset_lb; +        } else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB +          aligned_lb = f_exists->declare(); +          Variable_ID e = f_exists->declare(); +           +          EQ_Handle h = f_root->add_EQ(); +          h.update_coef(aligned_lb, 1); +          h.update_coef(e, -alignment_multiple); +           +          GEQ_Handle h1 = f_root->add_GEQ(); +          GEQ_Handle h2 = f_root->add_GEQ(); +          h1.update_coef(e, alignment_multiple); +          h2.update_coef(e, -alignment_multiple); +          h1.update_coef(offset_lb, -1); +          h2.update_coef(offset_lb, 1); +          h1.update_const(alignment_multiple - 1); +        } +      } +       +      // create an upper bound variable for easy formula creation later +      Variable_ID ub = f_exists->declare(); +      { +        coef_t coef = -ub_list[simplest_ub].get_coef( +          bound.set_var(dim + 1)); +        if (coef == 1) { // e.g. if i <= m+5, then UB = m+5 +          EQ_Handle h = f_root->add_EQ(); +          h.update_coef(ub, -1); +          for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { +            switch ((*ci).var->kind()) { +            case Input_Var: { +              int pos = (*ci).var->get_position(); +              if (pos != dim + 1) +                h.update_coef(stmt[*i].xform.output_var(pos), +                              (*ci).coef); +              break; +            } +            case Global_Var: { +              Global_Var_ID g = (*ci).var->get_global_var(); +              Variable_ID v; +              if (g->arity() == 0) +                v = stmt[*i].xform.get_local(g); +              else +                v = stmt[*i].xform.get_local(g, +                                             (*ci).var->function_of()); +              h.update_coef(v, (*ci).coef); +              break; +            } +            default: +              throw loop_error("cannot handle tile bounds"); +            } +          } +          h.update_const(ub_list[simplest_ub].get_const()); +        } else { // e.g. if 2i <= m+5, then m+5-2 < 2*UB <= m+5 +          GEQ_Handle h1 = f_root->add_GEQ(); +          GEQ_Handle h2 = f_root->add_GEQ(); +          for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { +            switch ((*ci).var->kind()) { +            case Input_Var: { +              int pos = (*ci).var->get_position(); +              if (pos == dim + 1) { +                h1.update_coef(ub, -(*ci).coef); +                h2.update_coef(ub, (*ci).coef); +              } else { +                h1.update_coef(stmt[*i].xform.output_var(pos), +                               -(*ci).coef); +                h2.update_coef(stmt[*i].xform.output_var(pos), +                               (*ci).coef); +              } +              break; +            } +            case Global_Var: { +              Global_Var_ID g = (*ci).var->get_global_var(); +              Variable_ID v; +              if (g->arity() == 0) +                v = stmt[*i].xform.get_local(g); +              else +                v = stmt[*i].xform.get_local(g, +                                             (*ci).var->function_of()); +              h1.update_coef(v, -(*ci).coef); +              h2.update_coef(v, (*ci).coef); +              break; +            } +            default: +              throw loop_error("cannot handle tile bounds"); +            } +          } +          h1.update_const(-ub_list[simplest_ub].get_const()); +          h2.update_const(ub_list[simplest_ub].get_const()); +          h1.update_const(coef - 1); +        } +      } +       +      // insert tile controlling loop constraints +      if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0 +        Variable_ID e = f_exists->declare(); +        GEQ_Handle h1 = f_root->add_GEQ(); +        h1.update_coef(e, 1); +         +        EQ_Handle h2 = f_root->add_EQ(); +        h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); +        h2.update_coef(e, -tile_size); +        h2.update_coef(aligned_lb, -1); +      } else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32) +        GEQ_Handle h1 = f_root->add_GEQ(); +        h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); +         +        GEQ_Handle h2 = f_root->add_GEQ(); +        h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), +                       -tile_size); +        h2.update_coef(aligned_lb, -1); +        h2.update_coef(ub, 1); +      } +       +      // special care for private statements like overflow assignment +      if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB +        GEQ_Handle h = f_root->add_GEQ(); +        h.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1); +        h.update_coef(ub, 1); +      } +      // if (private_stmt.find(*i) != private_stmt.end()) { +      //   if (stmt[*i].xform.n_out() > dim+3) { // e.g. ii <= UB && i = ii +      //     GEQ_Handle h = f_root->add_GEQ(); +      //     h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); +      //     h.update_coef(ub, 1); +       +      //     stmt[*i].xform = Project(stmt[*i].xform, dim+3, Output_Var); +      //     f_root = stmt[*i].xform.and_with_and(); +      //     EQ_Handle h1 = f_root->add_EQ(); +      //     h1.update_coef(stmt[*i].xform.output_var(dim+3), 1); +      //     h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); +      //   } +      //   else if (method == StridedTile) { // e.g. ii <= UB since i does not exist +      //     GEQ_Handle h = f_root->add_GEQ(); +      //     h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); +      //     h.update_coef(ub, 1); +      //   } +      // } +       +      // restrict original loop index inside the tile +      else { +        if (method == StridedTile) { // e.g. ii <= i < ii + tile_size +          GEQ_Handle h1 = f_root->add_GEQ(); +          h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); +          h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), +                         -1); +           +          GEQ_Handle h2 = f_root->add_GEQ(); +          h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); +          h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); +          h2.update_const(tile_size - 1); +        } else if (method == CountedTile) { // e.g. LB+32*ii <= i < LB+32*ii+tile_size +          GEQ_Handle h1 = f_root->add_GEQ(); +          h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), +                         -tile_size); +          h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); +          h1.update_coef(aligned_lb, -1); +           +          GEQ_Handle h2 = f_root->add_GEQ(); +          h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), +                         tile_size); +          h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); +          h2.update_const(tile_size - 1); +          h2.update_coef(aligned_lb, 1); +        } +      } +    } +  } +   +  // update loop level information +  for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); +       i != same_tile_controlling_loop.end(); i++) { +    for (int j = 1; j <= stmt[*i].loop_level.size(); j++) +      switch (stmt[*i].loop_level[j - 1].type) { +      case LoopLevelOriginal: +        break; +      case LoopLevelTile: +        if (stmt[*i].loop_level[j - 1].payload >= outer_level) +          stmt[*i].loop_level[j - 1].payload++; +        break; +      default: +        throw loop_error( +          "unknown loop level type for statement " +          + to_string(*i)); +      } +     +    LoopLevel ll; +    ll.type = LoopLevelTile; +    ll.payload = level + 1; +    ll.parallel_level = 0; +    stmt[*i].loop_level.insert( +      stmt[*i].loop_level.begin() + (outer_level - 1), ll); +  } +} + diff --git a/src/loop_unroll.cc b/src/loop_unroll.cc new file mode 100644 index 0000000..9bc6acf --- /dev/null +++ b/src/loop_unroll.cc @@ -0,0 +1,1166 @@ +/* + * loop_unroll.cc + * + *  Created on: Nov 12, 2012 + *      Author: anand + */ + +#include <code_gen/codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" +#include <math.h> + +using namespace omega; + + +std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, +                           std::vector<std::vector<std::string> > idxNames, +                           int cleanup_split_level) { +  // check for sanity of parameters +  // check for sanity of parameters +  if (unroll_amount < 0) +    throw std::invalid_argument( +      "invalid unroll amount " + to_string(unroll_amount)); +  if (stmt_num < 0 || stmt_num >= stmt.size()) +    throw std::invalid_argument("invalid statement " + to_string(stmt_num)); +  if (level <= 0 || level > stmt[stmt_num].loop_level.size()) +    throw std::invalid_argument("invalid loop level " + to_string(level)); +   +  if (cleanup_split_level == 0) +    cleanup_split_level = level; +  if (cleanup_split_level > level) +    throw std::invalid_argument( +      "cleanup code must be split at or outside the unrolled loop level " +      + to_string(level)); +  if (cleanup_split_level <= 0) +    throw std::invalid_argument( +      "invalid split loop level " + to_string(cleanup_split_level)); +   +  // invalidate saved codegen computation +  delete last_compute_cgr_; +  last_compute_cgr_ = NULL; +  delete last_compute_cg_; +  last_compute_cg_ = NULL; +   +  int dim = 2 * level - 1; +  std::vector<int> lex = getLexicalOrder(stmt_num); +  std::set<int> same_loop = getStatements(lex, dim - 1); +   +  // nothing to do +  if (unroll_amount == 1) +    return std::set<int>(); +   +  for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); +       i++) { +    std::vector<std::pair<int, DependenceVector> > D; +    int n = stmt[*i].xform.n_out(); +    for (DependenceGraph::EdgeList::iterator j = +           dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); +         j++) { +      if (same_loop.find(j->first) != same_loop.end()) +        for (int k = 0; k < j->second.size(); k++) { +          DependenceVector dv = j->second[k]; +          int dim2 = level - 1; +          if (dv.type != DEP_CONTROL) { +             +            while (stmt[*i].loop_level[dim2].type == LoopLevelTile) { +              dim2 = stmt[*i].loop_level[dim2].payload - 1; +            } +            dim2 = stmt[*i].loop_level[dim2].payload; +             +            /*if (dv.isCarried(dim2) +              && (dv.hasNegative(dim2) && !dv.quasi)) +              throw loop_error( +              "loop error: Unrolling is illegal, dependence violation!"); +               +              if (dv.isCarried(dim2) +              && (dv.hasPositive(dim2) && dv.quasi)) +              throw loop_error( +              "loop error: Unrolling is illegal, dependence violation!"); +            */ +            bool safe = false; +             +            if (dv.isCarried(dim2) && dv.hasPositive(dim2)) { +              if (dv.quasi) +                throw loop_error( +                  "loop error: a quasi dependence with a positive carried distance"); +              if (!dv.quasi) { +                if (dv.lbounds[dim2] != posInfinity) { +                  //if (dv.lbounds[dim2] != negInfinity) +                  if (dv.lbounds[dim2] > unroll_amount) +                    safe = true; +                } else +                  safe = true; +              }/* else { +                  if (dv.ubounds[dim2] != negInfinity) { +                  if (dv.ubounds[dim2] != posInfinity) +                  if ((-(dv.ubounds[dim2])) > unroll_amount) +                  safe = true; +                  } else +                  safe = true; +                  }*/ +               +              if (!safe) { +                for (int l = level + 1; l <= (n - 1) / 2; l++) { +                  int dim3 = l - 1; +                   +                  if (stmt[*i].loop_level[dim3].type +                      != LoopLevelTile) +                    dim3 = +                      stmt[*i].loop_level[dim3].payload; +                  else { +                    while (stmt[*i].loop_level[dim3].type +                           == LoopLevelTile) { +                      dim3 = +                        stmt[*i].loop_level[dim3].payload +                        - 1; +                    } +                    dim3 = +                      stmt[*i].loop_level[dim3].payload; +                  } +                   +                  if (dim3 > dim2) { +                     +                    if (dv.hasPositive(dim3)) +                      break; +                    else if (dv.hasNegative(dim3)) +                      throw loop_error( +                        "loop error: Unrolling is illegal, dependence violation!"); +                  } +                } +              } +            } +          } +        } +    } +  } +  // extract the intersection of the iteration space to be considered +  Relation hull = Relation::True(level); +  apply_xform(same_loop); +  for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); +       i++) { +    if (stmt[*i].IS.is_upper_bound_satisfiable()) { +      Relation mapping(stmt[*i].IS.n_set(), level); +      F_And *f_root = mapping.add_and(); +      for (int j = 1; j <= level; j++) { +        EQ_Handle h = f_root->add_EQ(); +        h.update_coef(mapping.input_var(j), 1); +        h.update_coef(mapping.output_var(j), -1); +      } +      hull = Intersection(hull, +                          Range(Restrict_Domain(mapping, copy(stmt[*i].IS)))); +      hull.simplify(2, 4); +       +    } +  } +  for (int i = 1; i <= level; i++) { +    std::string name = tmp_loop_var_name_prefix + to_string(i); +    hull.name_set_var(i, name); +  } +  hull.setup_names(); +   +  // extract the exact loop bound of the dimension to be unrolled +  if (is_single_loop_iteration(hull, level, this->known)) +    return std::set<int>(); +  Relation bound = get_loop_bound(hull, level, this->known); +  if (!bound.has_single_conjunct() || !bound.is_satisfiable() +      || bound.is_tautology()) +    throw loop_error("unable to extract loop bound for unrolling"); +   +  // extract the loop stride +  coef_t stride; +  std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, +                                                                  bound.set_var(level)); +  if (result.second == NULL) +    stride = 1; +  else +    stride = abs(result.first.get_coef(result.second)) +      / gcd(abs(result.first.get_coef(result.second)), +            abs(result.first.get_coef(bound.set_var(level)))); +   +  // separate lower and upper bounds +  std::vector<GEQ_Handle> lb_list, ub_list; +  { +    Conjunct *c = bound.query_DNF()->single_conjunct(); +    for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { +      int coef = (*gi).get_coef(bound.set_var(level)); +      if (coef < 0) +        ub_list.push_back(*gi); +      else if (coef > 0) +        lb_list.push_back(*gi); +    } +  } +   +  // simplify overflow expression for each pair of upper and lower bounds +  std::vector<std::vector<std::map<Variable_ID, int> > > overflow_table( +    lb_list.size(), +    std::vector<std::map<Variable_ID, int> >(ub_list.size(), +                                             std::map<Variable_ID, int>())); +  bool is_overflow_simplifiable = true; +  for (int i = 0; i < lb_list.size(); i++) { +    if (!is_overflow_simplifiable) +      break; +     +    for (int j = 0; j < ub_list.size(); j++) { +      // lower bound or upper bound has non-unit coefficient, can't simplify +      if (ub_list[j].get_coef(bound.set_var(level)) != -1 +          || lb_list[i].get_coef(bound.set_var(level)) != 1) { +        is_overflow_simplifiable = false; +        break; +      } +       +      for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { +        switch ((*ci).var->kind()) { +        case Input_Var: { +          if ((*ci).var != bound.set_var(level)) +            overflow_table[i][j][(*ci).var] += (*ci).coef; +           +          break; +        } +        case Global_Var: { +          Global_Var_ID g = (*ci).var->get_global_var(); +          Variable_ID v; +          if (g->arity() == 0) +            v = bound.get_local(g); +          else +            v = bound.get_local(g, (*ci).var->function_of()); +          overflow_table[i][j][(*ci).var] += (*ci).coef; +          break; +        } +        default: +          throw loop_error("failed to calculate overflow amount"); +        } +      } +      overflow_table[i][j][NULL] += ub_list[j].get_const(); +       +      for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { +        switch ((*ci).var->kind()) { +        case Input_Var: { +          if ((*ci).var != bound.set_var(level)) { +            overflow_table[i][j][(*ci).var] += (*ci).coef; +            if (overflow_table[i][j][(*ci).var] == 0) +              overflow_table[i][j].erase( +                overflow_table[i][j].find((*ci).var)); +          } +          break; +        } +        case Global_Var: { +          Global_Var_ID g = (*ci).var->get_global_var(); +          Variable_ID v; +          if (g->arity() == 0) +            v = bound.get_local(g); +          else +            v = bound.get_local(g, (*ci).var->function_of()); +          overflow_table[i][j][(*ci).var] += (*ci).coef; +          if (overflow_table[i][j][(*ci).var] == 0) +            overflow_table[i][j].erase( +              overflow_table[i][j].find((*ci).var)); +          break; +        } +        default: +          throw loop_error("failed to calculate overflow amount"); +        } +      } +      overflow_table[i][j][NULL] += lb_list[i].get_const(); +       +      overflow_table[i][j][NULL] += stride; +      if (unroll_amount == 0 +          || (overflow_table[i][j].size() == 1 +              && overflow_table[i][j][NULL] / stride +              < unroll_amount)) +        unroll_amount = overflow_table[i][j][NULL] / stride; +    } +  } +   +  // loop iteration count can't be determined, bail out gracefully +  if (unroll_amount == 0) +    return std::set<int>(); +   +  // further simply overflow calculation using coefficients' modular +  if (is_overflow_simplifiable) { +    for (int i = 0; i < lb_list.size(); i++) +      for (int j = 0; j < ub_list.size(); j++) +        if (stride == 1) { +          for (std::map<Variable_ID, int>::iterator k = +                 overflow_table[i][j].begin(); +               k != overflow_table[i][j].end();) +            if ((*k).first != NULL) { +              int t = int_mod_hat((*k).second, unroll_amount); +              if (t == 0) { +                overflow_table[i][j].erase(k++); +              } else { +                int t2 = hull.query_variable_mod((*k).first, +                                                 unroll_amount); +                if (t2 != INT_MAX) { +                  overflow_table[i][j][NULL] += t * t2; +                  overflow_table[i][j].erase(k++); +                } else { +                  (*k).second = t; +                  k++; +                } +              } +            } else +              k++; +           +          overflow_table[i][j][NULL] = int_mod_hat( +            overflow_table[i][j][NULL], unroll_amount); +           +          // Since we don't have MODULO instruction in SUIF yet (only MOD), make all coef positive in the final formula +          for (std::map<Variable_ID, int>::iterator k = +                 overflow_table[i][j].begin(); +               k != overflow_table[i][j].end(); k++) +            if ((*k).second < 0) +              (*k).second += unroll_amount; +        } +  } +   +  // build overflow statement +  CG_outputBuilder *ocg = ir->builder(); +  CG_outputRepr *overflow_code = NULL; +  Relation cond_upper(level), cond_lower(level); +  Relation overflow_constraint(0); +  F_And *overflow_constraint_root = overflow_constraint.add_and(); +  std::vector<Free_Var_Decl *> over_var_list; +  if (is_overflow_simplifiable && lb_list.size() == 1) { +    for (int i = 0; i < ub_list.size(); i++) { +      if (overflow_table[0][i].size() == 1) { +        // upper splitting condition +        GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); +        h.update_const( +          ((overflow_table[0][i][NULL] / stride) % unroll_amount) +          * -stride); +      } else { +        // upper splitting condition +        std::string over_name = overflow_var_name_prefix +          + to_string(overflow_var_name_counter++); +        Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); +        over_var_list.push_back(over_free_var); +        GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); +        h.update_coef(cond_upper.get_local(over_free_var), -stride); +         +        // insert constraint 0 <= overflow < unroll_amount +        Variable_ID v = overflow_constraint.get_local(over_free_var); +        GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); +        h1.update_coef(v, 1); +        GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); +        h2.update_coef(v, -1); +        h2.update_const(unroll_amount - 1); +         +        // create overflow assignment +        bound.setup_names(); // hack to fix omega relation variable names issue +        CG_outputRepr *rhs = NULL; +        bool is_split_illegal = false; +        for (std::map<Variable_ID, int>::iterator j = +               overflow_table[0][i].begin(); +             j != overflow_table[0][i].end(); j++) +          if ((*j).first != NULL) { +            if ((*j).first->kind() == Input_Var +                && (*j).first->get_position() +                >= cleanup_split_level) +              is_split_illegal = true; +             +            CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); +            if ((*j).second != 1) +              t = ocg->CreateTimes(ocg->CreateInt((*j).second), +                                   t); +            rhs = ocg->CreatePlus(rhs, t); +          } else if ((*j).second != 0) +            rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); +         +        if (is_split_illegal) { +          rhs->clear(); +          delete rhs; +          throw loop_error( +            "cannot split cleanup code at loop level " +            + to_string(cleanup_split_level) +            + " due to overflow variable data dependence"); +        } +         +        if (stride != 1) +          rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); +        rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); +         +        CG_outputRepr *lhs = ocg->CreateIdent(over_name); +        init_code = ocg->StmtListAppend(init_code, +                                        ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); +        lhs = ocg->CreateIdent(over_name); +        overflow_code = ocg->StmtListAppend(overflow_code, +                                            ocg->CreateAssignment(0, lhs, rhs)); +      } +    } +     +    // lower splitting condition +    GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]); +  } else if (is_overflow_simplifiable && ub_list.size() == 1) { +    for (int i = 0; i < lb_list.size(); i++) { +       +      if (overflow_table[i][0].size() == 1) { +        // lower splitting condition +        GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); +        h.update_const(overflow_table[i][0][NULL] * -stride); +      } else { +        // lower splitting condition +        std::string over_name = overflow_var_name_prefix +          + to_string(overflow_var_name_counter++); +        Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); +        over_var_list.push_back(over_free_var); +        GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); +        h.update_coef(cond_lower.get_local(over_free_var), -stride); +         +        // insert constraint 0 <= overflow < unroll_amount +        Variable_ID v = overflow_constraint.get_local(over_free_var); +        GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); +        h1.update_coef(v, 1); +        GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); +        h2.update_coef(v, -1); +        h2.update_const(unroll_amount - 1); +         +        // create overflow assignment +        bound.setup_names(); // hack to fix omega relation variable names issue +        CG_outputRepr *rhs = NULL; +        for (std::map<Variable_ID, int>::iterator j = +               overflow_table[0][i].begin(); +             j != overflow_table[0][i].end(); j++) +          if ((*j).first != NULL) { +            CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); +            if ((*j).second != 1) +              t = ocg->CreateTimes(ocg->CreateInt((*j).second), +                                   t); +            rhs = ocg->CreatePlus(rhs, t); +          } else if ((*j).second != 0) +            rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); +         +        if (stride != 1) +          rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); +        rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); +         +        CG_outputRepr *lhs = ocg->CreateIdent(over_name); +        init_code = ocg->StmtListAppend(init_code, +                                        ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); +        lhs = ocg->CreateIdent(over_name); +        overflow_code = ocg->StmtListAppend(overflow_code, +                                            ocg->CreateAssignment(0, lhs, rhs)); +      } +    } +     +    // upper splitting condition +    GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]); +  } else { +    std::string over_name = overflow_var_name_prefix +      + to_string(overflow_var_name_counter++); +    Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); +    over_var_list.push_back(over_free_var); +     +    std::vector<CG_outputRepr *> lb_repr_list, ub_repr_list; +    for (int i = 0; i < lb_list.size(); i++) { +      lb_repr_list.push_back( +        output_lower_bound_repr(ocg, lb_list[i], +                                bound.set_var(dim + 1), result.first, result.second, +                                bound, Relation::True(bound.n_set()), +                                std::vector<std::pair<CG_outputRepr *, int> >( +                                  bound.n_set(), +                                  std::make_pair( +                                    static_cast<CG_outputRepr *>(NULL), +                                    0)))); +      GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); +    } +    for (int i = 0; i < ub_list.size(); i++) { +      ub_repr_list.push_back( +        output_upper_bound_repr(ocg, ub_list[i], +                                bound.set_var(dim + 1), bound, +                                std::vector<std::pair<CG_outputRepr *, int> >( +                                  bound.n_set(), +                                  std::make_pair( +                                    static_cast<CG_outputRepr *>(NULL), +                                    0)))); +      GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); +      h.update_coef(cond_upper.get_local(over_free_var), -stride); +    } +     +    CG_outputRepr *lbRepr, *ubRepr; +    if (lb_repr_list.size() > 1) +      lbRepr = ocg->CreateInvoke("max", lb_repr_list); +    else if (lb_repr_list.size() == 1) +      lbRepr = lb_repr_list[0]; +     +    if (ub_repr_list.size() > 1) +      ubRepr = ocg->CreateInvoke("min", ub_repr_list); +    else if (ub_repr_list.size() == 1) +      ubRepr = ub_repr_list[0]; +     +    // create overflow assignment +    CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr), +                                         ocg->CreateInt(1)); +    if (stride != 1) +      rhs = ocg->CreateIntegerFloor(rhs, ocg->CreateInt(stride)); +    rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); +    CG_outputRepr *lhs = ocg->CreateIdent(over_name); +    init_code = ocg->StmtListAppend(init_code, +                                    ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); +    lhs = ocg->CreateIdent(over_name); +    overflow_code = ocg->CreateAssignment(0, lhs, rhs); +     +    // insert constraint 0 <= overflow < unroll_amount +    Variable_ID v = overflow_constraint.get_local(over_free_var); +    GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); +    h1.update_coef(v, 1); +    GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); +    h2.update_coef(v, -1); +    h2.update_const(unroll_amount - 1); +  } +   +  // insert overflow statement +  int overflow_stmt_num = -1; +  if (overflow_code != NULL) { +    // build iteration space for overflow statement +    Relation mapping(level, cleanup_split_level - 1); +    F_And *f_root = mapping.add_and(); +    for (int i = 1; i < cleanup_split_level; i++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(i), 1); +      h.update_coef(mapping.input_var(i), -1); +    } +    Relation overflow_IS = Range(Restrict_Domain(mapping, copy(hull))); +    for (int i = 1; i < cleanup_split_level; i++) +      overflow_IS.name_set_var(i, hull.set_var(i)->name()); +    overflow_IS.setup_names(); +     +    // build dumb transformation relation for overflow statement +    Relation overflow_xform(cleanup_split_level - 1, +                            2 * (cleanup_split_level - 1) + 1); +    f_root = overflow_xform.add_and(); +    for (int i = 1; i <= cleanup_split_level - 1; i++) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(overflow_xform.output_var(2 * i), 1); +      h.update_coef(overflow_xform.input_var(i), -1); +       +      h = f_root->add_EQ(); +      h.update_coef(overflow_xform.output_var(2 * i - 1), 1); +      h.update_const(-lex[2 * i - 2]); +    } +    EQ_Handle h = f_root->add_EQ(); +    h.update_coef( +      overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1), +      1); +    h.update_const(-lex[2 * (cleanup_split_level - 1)]); +     +    shiftLexicalOrder(lex, 2 * cleanup_split_level - 2, 1); +    Statement overflow_stmt; +     +    overflow_stmt.code = overflow_code; +    overflow_stmt.IS = overflow_IS; +    overflow_stmt.xform = overflow_xform; +    overflow_stmt.loop_level = std::vector<LoopLevel>(level - 1); +    overflow_stmt.ir_stmt_node = NULL; +    for (int i = 0; i < level - 1; i++) { +      overflow_stmt.loop_level[i].type = +        stmt[stmt_num].loop_level[i].type; +      if (stmt[stmt_num].loop_level[i].type == LoopLevelTile +          && stmt[stmt_num].loop_level[i].payload >= level) +        overflow_stmt.loop_level[i].payload = -1; +      else +        overflow_stmt.loop_level[i].payload = +          stmt[stmt_num].loop_level[i].payload; +      overflow_stmt.loop_level[i].parallel_level = +        stmt[stmt_num].loop_level[i].parallel_level; +    } +     +    stmt.push_back(overflow_stmt); +    dep.insert(); +    overflow_stmt_num = stmt.size() - 1; +    overflow[overflow_stmt_num] = over_var_list; +     +    // update the global known information on overflow variable +    this->known = Intersection(this->known, +                               Extend_Set(copy(overflow_constraint), +                                          this->known.n_set() - overflow_constraint.n_set())); +     +    // update dependence graph +    DependenceVector dv; +    dv.type = DEP_CONTROL; +    for (std::set<int>::iterator i = same_loop.begin(); +         i != same_loop.end(); i++) +      dep.connect(overflow_stmt_num, *i, dv); +    dv.type = DEP_W2W; +    { +      IR_ScalarSymbol *overflow_sym = NULL; +      std::vector<IR_ScalarRef *> scalars = ir->FindScalarRef( +        overflow_code); +      for (int i = scalars.size() - 1; i >= 0; i--) +        if (scalars[i]->is_write()) { +          overflow_sym = scalars[i]->symbol(); +          break; +        } +      for (int i = scalars.size() - 1; i >= 0; i--) +        delete scalars[i]; +      dv.sym = overflow_sym; +    } +    dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); +    dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); +    int dep_dim = get_last_dep_dim_before(stmt_num, level); +    for (int i = dep_dim + 1; i < dep.num_dim(); i++) { +      dv.lbounds[i] = -posInfinity; +      dv.ubounds[i] = posInfinity; +    } +    for (int i = 0; i <= dep_dim; i++) { +      if (i != 0) { +        dv.lbounds[i - 1] = 0; +        dv.ubounds[i - 1] = 0; +      } +      dv.lbounds[i] = 1; +      dv.ubounds[i] = posInfinity; +      dep.connect(overflow_stmt_num, overflow_stmt_num, dv); +    } +  } +   +  // split the loop so it can be fully unrolled +  std::set<int> new_stmts = split(stmt_num, cleanup_split_level, cond_upper); +  std::set<int> new_stmts2 = split(stmt_num, cleanup_split_level, cond_lower); +  new_stmts.insert(new_stmts2.begin(), new_stmts2.end()); +   +  // check if unrolled statements can be trivially lumped together as one statement +  bool can_be_lumped = true; +  if (can_be_lumped) { +    for (std::set<int>::iterator i = same_loop.begin(); +         i != same_loop.end(); i++) +      if (*i != stmt_num) { +        if (stmt[*i].loop_level.size() +            != stmt[stmt_num].loop_level.size()) { +          can_be_lumped = false; +          break; +        } +        for (int j = 0; j < stmt[stmt_num].loop_level.size(); j++) +          if (!(stmt[*i].loop_level[j].type +                == stmt[stmt_num].loop_level[j].type +                && stmt[*i].loop_level[j].payload +                == stmt[stmt_num].loop_level[j].payload)) { +            can_be_lumped = false; +            break; +          } +        if (!can_be_lumped) +          break; +        std::vector<int> lex2 = getLexicalOrder(*i); +        for (int j = 2 * level; j < lex.size() - 1; j += 2) +          if (lex[j] != lex2[j]) { +            can_be_lumped = false; +            break; +          } +        if (!can_be_lumped) +          break; +      } +  } +  if (can_be_lumped) { +    for (std::set<int>::iterator i = same_loop.begin(); +         i != same_loop.end(); i++) +      if (is_inner_loop_depend_on_level(stmt[*i].IS, level, +                                        this->known)) { +        can_be_lumped = false; +        break; +      } +  } +  if (can_be_lumped) { +    for (std::set<int>::iterator i = same_loop.begin(); +         i != same_loop.end(); i++) +      if (*i != stmt_num) { +        if (!(Must_Be_Subset(copy(stmt[*i].IS), copy(stmt[stmt_num].IS)) +              && Must_Be_Subset(copy(stmt[stmt_num].IS), +                                copy(stmt[*i].IS)))) { +          can_be_lumped = false; +          break; +        } +      } +  } +  if (can_be_lumped) { +    for (std::set<int>::iterator i = same_loop.begin(); +         i != same_loop.end(); i++) { +      for (DependenceGraph::EdgeList::iterator j = +             dep.vertex[*i].second.begin(); +           j != dep.vertex[*i].second.end(); j++) +        if (same_loop.find(j->first) != same_loop.end()) { +          for (int k = 0; k < j->second.size(); k++) +            if (j->second[k].type == DEP_CONTROL +                || j->second[k].type == DEP_UNKNOWN) { +              can_be_lumped = false; +              break; +            } +          if (!can_be_lumped) +            break; +        } +      if (!can_be_lumped) +        break; +    } +  } +   +  // insert unrolled statements +  int old_num_stmt = stmt.size(); +  if (!can_be_lumped) { +    std::map<int, std::vector<int> > what_stmt_num; +     +    for (int j = 1; j < unroll_amount; j++) { +      for (std::set<int>::iterator i = same_loop.begin(); +           i != same_loop.end(); i++) { +        Statement new_stmt; +         +        std::vector<std::string> loop_vars; +        std::vector<CG_outputRepr *> subs; +        loop_vars.push_back(stmt[*i].IS.set_var(level)->name()); +        subs.push_back( +          ocg->CreatePlus( +            ocg->CreateIdent( +              stmt[*i].IS.set_var(level)->name()), +            ocg->CreateInt(j * stride))); +        new_stmt.code = ocg->CreateSubstitutedStmt(0, +                                                   stmt[*i].code->clone(), loop_vars, subs); +         +        new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride); +        add_loop_stride(new_stmt.IS, bound, level - 1, +                        unroll_amount * stride); +         +        new_stmt.xform = copy(stmt[*i].xform); +         +        new_stmt.loop_level = stmt[*i].loop_level; +        new_stmt.ir_stmt_node = NULL; +        stmt.push_back(new_stmt); +        dep.insert(); +        what_stmt_num[*i].push_back(stmt.size() - 1); +      } +    } +    for (std::set<int>::iterator i = same_loop.begin(); +         i != same_loop.end(); i++) +      add_loop_stride(stmt[*i].IS, bound, level - 1, +                      unroll_amount * stride); +     +    // update dependence graph +    if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { +      int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; +      int new_stride = unroll_amount * stride; +      for (int i = 0; i < old_num_stmt; i++) { +        std::vector<std::pair<int, DependenceVector> > D; +         +        for (DependenceGraph::EdgeList::iterator j = +               dep.vertex[i].second.begin(); +             j != dep.vertex[i].second.end();) { +          if (same_loop.find(i) != same_loop.end()) { +            if (same_loop.find(j->first) != same_loop.end()) { +              for (int k = 0; k < j->second.size(); k++) { +                DependenceVector dv = j->second[k]; +                if (dv.type == DEP_CONTROL +                    || dv.type == DEP_UNKNOWN) { +                  D.push_back(std::make_pair(j->first, dv)); +                  for (int kk = 0; kk < unroll_amount - 1; +                       kk++) +                    if (what_stmt_num[i][kk] != -1 +                        && what_stmt_num[j->first][kk] +                        != -1) +                      dep.connect(what_stmt_num[i][kk], +                                  what_stmt_num[j->first][kk], +                                  dv); +                } else { +                  coef_t lb = dv.lbounds[dep_dim]; +                  coef_t ub = dv.ubounds[dep_dim]; +                  if (ub == lb +                      && int_mod(lb, +                                 static_cast<coef_t>(new_stride)) +                      == 0) { +                    D.push_back( +                      std::make_pair(j->first, dv)); +                    for (int kk = 0; kk < unroll_amount - 1; +                         kk++) +                      if (what_stmt_num[i][kk] != -1 +                          && what_stmt_num[j->first][kk] +                          != -1) +                        dep.connect( +                          what_stmt_num[i][kk], +                          what_stmt_num[j->first][kk], +                          dv); +                  } else if (lb == -posInfinity +                             && ub == posInfinity) { +                    D.push_back( +                      std::make_pair(j->first, dv)); +                    for (int kk = 0; kk < unroll_amount; +                         kk++) +                      if (kk == 0) +                        D.push_back( +                          std::make_pair(j->first, +                                         dv)); +                      else if (what_stmt_num[j->first][kk +                                                       - 1] != -1) +                        D.push_back( +                          std::make_pair( +                            what_stmt_num[j->first][kk +                                                    - 1], +                            dv)); +                    for (int t = 0; t < unroll_amount - 1; +                         t++) +                      if (what_stmt_num[i][t] != -1) +                        for (int kk = 0; +                             kk < unroll_amount; +                             kk++) +                          if (kk == 0) +                            dep.connect( +                              what_stmt_num[i][t], +                              j->first, dv); +                          else if (what_stmt_num[j->first][kk +                                                           - 1] != -1) +                            dep.connect( +                              what_stmt_num[i][t], +                              what_stmt_num[j->first][kk +                                                      - 1], +                              dv); +                  } else { +                    for (int kk = 0; kk < unroll_amount; +                         kk++) { +                      if (lb != -posInfinity) { +                        if (kk * stride +                            < int_mod(lb, +                                      static_cast<coef_t>(new_stride))) +                          dv.lbounds[dep_dim] = +                            floor( +                              static_cast<double>(lb) +                              / new_stride) +                            * new_stride +                            + new_stride; +                        else +                          dv.lbounds[dep_dim] = +                            floor( +                              static_cast<double>(lb) +                              / new_stride) +                            * new_stride; +                      } +                      if (ub != posInfinity) { +                        if (kk * stride +                            > int_mod(ub, +                                      static_cast<coef_t>(new_stride))) +                          dv.ubounds[dep_dim] = +                            floor( +                              static_cast<double>(ub) +                              / new_stride) +                            * new_stride +                            - new_stride; +                        else +                          dv.ubounds[dep_dim] = +                            floor( +                              static_cast<double>(ub) +                              / new_stride) +                            * new_stride; +                      } +                      if (dv.ubounds[dep_dim] +                          >= dv.lbounds[dep_dim]) { +                        if (kk == 0) +                          D.push_back( +                            std::make_pair( +                              j->first, +                              dv)); +                        else if (what_stmt_num[j->first][kk +                                                         - 1] != -1) +                          D.push_back( +                            std::make_pair( +                              what_stmt_num[j->first][kk +                                                      - 1], +                              dv)); +                      } +                    } +                    for (int t = 0; t < unroll_amount - 1; +                         t++) +                      if (what_stmt_num[i][t] != -1) +                        for (int kk = 0; +                             kk < unroll_amount; +                             kk++) { +                          if (lb != -posInfinity) { +                            if (kk * stride +                                < int_mod( +                                  lb + t +                                  + 1, +                                  static_cast<coef_t>(new_stride))) +                              dv.lbounds[dep_dim] = +                                floor( +                                  static_cast<double>(lb +                                                      + (t +                                                         + 1) +                                                      * stride) +                                  / new_stride) +                                * new_stride +                                + new_stride; +                            else +                              dv.lbounds[dep_dim] = +                                floor( +                                  static_cast<double>(lb +                                                      + (t +                                                         + 1) +                                                      * stride) +                                  / new_stride) +                                * new_stride; +                          } +                          if (ub != posInfinity) { +                            if (kk * stride +                                > int_mod( +                                  ub + t +                                  + 1, +                                  static_cast<coef_t>(new_stride))) +                              dv.ubounds[dep_dim] = +                                floor( +                                  static_cast<double>(ub +                                                      + (t +                                                         + 1) +                                                      * stride) +                                  / new_stride) +                                * new_stride +                                - new_stride; +                            else +                              dv.ubounds[dep_dim] = +                                floor( +                                  static_cast<double>(ub +                                                      + (t +                                                         + 1) +                                                      * stride) +                                  / new_stride) +                                * new_stride; +                          } +                          if (dv.ubounds[dep_dim] +                              >= dv.lbounds[dep_dim]) { +                            if (kk == 0) +                              dep.connect( +                                what_stmt_num[i][t], +                                j->first, +                                dv); +                            else if (what_stmt_num[j->first][kk +                                                             - 1] != -1) +                              dep.connect( +                                what_stmt_num[i][t], +                                what_stmt_num[j->first][kk +                                                        - 1], +                                dv); +                          } +                        } +                  } +                } +              } +               +              dep.vertex[i].second.erase(j++); +            } else { +              for (int kk = 0; kk < unroll_amount - 1; kk++) +                if (what_stmt_num[i][kk] != -1) +                  dep.connect(what_stmt_num[i][kk], j->first, +                              j->second); +               +              j++; +            } +          } else { +            if (same_loop.find(j->first) != same_loop.end()) +              for (int k = 0; k < j->second.size(); k++) +                for (int kk = 0; kk < unroll_amount - 1; kk++) +                  if (what_stmt_num[j->first][kk] != -1) +                    D.push_back( +                      std::make_pair( +                        what_stmt_num[j->first][kk], +                        j->second[k])); +            j++; +          } +        } +         +        for (int j = 0; j < D.size(); j++) +          dep.connect(i, D[j].first, D[j].second); +      } +    } +     +    // reset lexical order for the unrolled loop body +    std::set<int> new_same_loop; +     +    int count = 0; +     +    for (std::map<int, std::vector<int> >::iterator i = +           what_stmt_num.begin(); i != what_stmt_num.end(); i++) { +       +      new_same_loop.insert(i->first); +      for (int k = dim + 1; k < stmt[i->first].xform.n_out(); k += 2) +        assign_const(stmt[i->first].xform, k, +                     get_const(stmt[(what_stmt_num.begin())->first].xform, k, +                               Output_Var) + count); +      count++; +      for (int j = 0; j < i->second.size(); j++) { +        new_same_loop.insert(i->second[j]); +        for (int k = dim + 1; k < stmt[i->second[j]].xform.n_out(); k += +               2) +          assign_const(stmt[i->second[j]].xform, k, +                       get_const( +                         stmt[(what_stmt_num.begin())->first].xform, +                         k, Output_Var) + count); +        count++; +      } +    } +    setLexicalOrder(dim + 1, new_same_loop, 0, idxNames); +  } else { +    for (std::set<int>::iterator i = same_loop.begin(); +         i != same_loop.end(); i++) +      add_loop_stride(stmt[*i].IS, bound, level - 1, +                      unroll_amount * stride); +     +    int max_level = stmt[stmt_num].loop_level.size(); +    std::vector<std::pair<int, int> > stmt_order; +    for (std::set<int>::iterator i = same_loop.begin(); +         i != same_loop.end(); i++) +      stmt_order.push_back( +        std::make_pair( +          get_const(stmt[*i].xform, 2 * max_level, +                    Output_Var), *i)); +    sort(stmt_order.begin(), stmt_order.end()); +     +    Statement new_stmt; +    new_stmt.code = NULL; +    for (int j = 1; j < unroll_amount; j++) +      for (int i = 0; i < stmt_order.size(); i++) { +        std::vector<std::string> loop_vars; +        std::vector<CG_outputRepr *> subs; +        loop_vars.push_back( +          stmt[stmt_order[i].second].IS.set_var(level)->name()); +        subs.push_back( +          ocg->CreatePlus( +            ocg->CreateIdent( +              stmt[stmt_order[i].second].IS.set_var( +                level)->name()), +            ocg->CreateInt(j * stride))); +        CG_outputRepr *code = ocg->CreateSubstitutedStmt(0, +                                                         stmt[stmt_order[i].second].code->clone(), loop_vars, +                                                         subs); +        new_stmt.code = ocg->StmtListAppend(new_stmt.code, code); +      } +     +    new_stmt.IS = copy(stmt[stmt_num].IS); +    new_stmt.xform = copy(stmt[stmt_num].xform); +    assign_const(new_stmt.xform, 2 * max_level, +                 stmt_order[stmt_order.size() - 1].first + 1); +    new_stmt.loop_level = stmt[stmt_num].loop_level; +    new_stmt.ir_stmt_node = NULL; +    stmt.push_back(new_stmt); +    dep.insert(); +     +    // update dependence graph +    if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { +      int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; +      int new_stride = unroll_amount * stride; +      for (int i = 0; i < old_num_stmt; i++) { +        std::vector<std::pair<int, std::vector<DependenceVector> > > D; +         +        for (DependenceGraph::EdgeList::iterator j = +               dep.vertex[i].second.begin(); +             j != dep.vertex[i].second.end();) { +          if (same_loop.find(i) != same_loop.end()) { +            if (same_loop.find(j->first) != same_loop.end()) { +              std::vector<DependenceVector> dvs11, dvs12, dvs22, +                dvs21; +              for (int k = 0; k < j->second.size(); k++) { +                DependenceVector dv = j->second[k]; +                if (dv.type == DEP_CONTROL +                    || dv.type == DEP_UNKNOWN) { +                  if (i == j->first) { +                    dvs11.push_back(dv); +                    dvs22.push_back(dv); +                  } else +                    throw loop_error( +                      "unrolled statements lumped together illegally"); +                } else { +                  coef_t lb = dv.lbounds[dep_dim]; +                  coef_t ub = dv.ubounds[dep_dim]; +                  if (ub == lb +                      && int_mod(lb, +                                 static_cast<coef_t>(new_stride)) +                      == 0) { +                    dvs11.push_back(dv); +                    dvs22.push_back(dv); +                  } else { +                    if (lb != -posInfinity) +                      dv.lbounds[dep_dim] = ceil( +                        static_cast<double>(lb) +                        / new_stride) +                        * new_stride; +                    if (ub != posInfinity) +                      dv.ubounds[dep_dim] = floor( +                        static_cast<double>(ub) +                        / new_stride) +                        * new_stride; +                    if (dv.ubounds[dep_dim] +                        >= dv.lbounds[dep_dim]) +                      dvs11.push_back(dv); +                     +                    if (lb != -posInfinity) +                      dv.lbounds[dep_dim] = ceil( +                        static_cast<double>(lb) +                        / new_stride) +                        * new_stride; +                    if (ub != posInfinity) +                      dv.ubounds[dep_dim] = ceil( +                        static_cast<double>(ub) +                        / new_stride) +                        * new_stride; +                    if (dv.ubounds[dep_dim] +                        >= dv.lbounds[dep_dim]) +                      dvs21.push_back(dv); +                     +                    if (lb != -posInfinity) +                      dv.lbounds[dep_dim] = floor( +                        static_cast<double>(lb) +                        / new_stride) +                        * new_stride; +                    if (ub != posInfinity) +                      dv.ubounds[dep_dim] = floor( +                        static_cast<double>(ub +                                            - stride) +                        / new_stride) +                        * new_stride; +                    if (dv.ubounds[dep_dim] +                        >= dv.lbounds[dep_dim]) +                      dvs12.push_back(dv); +                     +                    if (lb != -posInfinity) +                      dv.lbounds[dep_dim] = floor( +                        static_cast<double>(lb) +                        / new_stride) +                        * new_stride; +                    if (ub != posInfinity) +                      dv.ubounds[dep_dim] = ceil( +                        static_cast<double>(ub +                                            - stride) +                        / new_stride) +                        * new_stride; +                    if (dv.ubounds[dep_dim] +                        >= dv.lbounds[dep_dim]) +                      dvs22.push_back(dv); +                  } +                } +              } +              if (dvs11.size() > 0) +                D.push_back(std::make_pair(i, dvs11)); +              if (dvs22.size() > 0) +                dep.connect(old_num_stmt, old_num_stmt, dvs22); +              if (dvs12.size() > 0) +                D.push_back( +                  std::make_pair(old_num_stmt, dvs12)); +              if (dvs21.size() > 0) +                dep.connect(old_num_stmt, i, dvs21); +               +              dep.vertex[i].second.erase(j++); +            } else { +              dep.connect(old_num_stmt, j->first, j->second); +              j++; +            } +          } else { +            if (same_loop.find(j->first) != same_loop.end()) +              D.push_back( +                std::make_pair(old_num_stmt, j->second)); +            j++; +          } +        } +         +        for (int j = 0; j < D.size(); j++) +          dep.connect(i, D[j].first, D[j].second); +      } +    } +  } +   +  return new_stmts; +} + + diff --git a/src/omegatools.cc b/src/omegatools.cc new file mode 100644 index 0000000..3aac404 --- /dev/null +++ b/src/omegatools.cc @@ -0,0 +1,1185 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: +   Useful tools involving Omega manipulation. + + Notes: + + History: +   01/2006 Created by Chun Chen. +   03/2009 Upgrade Omega's interaction with compiler to IR_Code, by Chun Chen. +*****************************************************************************/ + +#include <code_gen/codegen.h> +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + +namespace { +  struct DependenceLevel { +    Relation r; +    int level; +    int dir; // direction upto current level: +    // -1:negative, 0: undetermined, 1: postive +    std::vector<coef_t> lbounds; +    std::vector<coef_t> ubounds; +    DependenceLevel(const Relation &_r, int _dims): +      r(_r), level(0), dir(0), lbounds(_dims), ubounds(_dims) {} +  }; +} + + + + +std::string tmp_e() { +  static int counter = 1; +  return std::string("e")+to_string(counter++); +} + +void exp2formula(IR_Code *ir, Relation &r, F_And *f_root, std::vector<Free_Var_Decl*> &freevars, +                 CG_outputRepr *repr, Variable_ID lhs, char side, IR_CONDITION_TYPE rel, bool destroy) { +   +  switch (ir->QueryExpOperation(repr)) { +  case IR_OP_CONSTANT: +  { +    std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); +    IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[0])); +    if (!ref->is_integer()) +      throw ir_exp_error("non-integer constant coefficient"); +     +    coef_t c = ref->integer(); +    if (rel == IR_COND_GE || rel == IR_COND_GT) { +      GEQ_Handle h = f_root->add_GEQ(); +      h.update_coef(lhs, 1); +      if (rel == IR_COND_GE) +        h.update_const(-c); +      else +        h.update_const(-c-1); +    } +    else if (rel == IR_COND_LE || rel == IR_COND_LT) { +      GEQ_Handle h = f_root->add_GEQ(); +      h.update_coef(lhs, -1); +      if (rel == IR_COND_LE) +        h.update_const(c); +      else +        h.update_const(c-1); +    } +    else if (rel == IR_COND_EQ) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(lhs, 1); +      h.update_const(-c); +    } +    else +      throw std::invalid_argument("unsupported condition type"); +     +    delete v[0]; +    delete ref; +    if (destroy) +      delete repr; +    break; +  } +  case IR_OP_VARIABLE: +  { +    std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); +    IR_ScalarRef *ref = static_cast<IR_ScalarRef *>(ir->Repr2Ref(v[0])); +     +    std::string s = ref->name(); +    Variable_ID e = find_index(r, s, side); +     +    if (e == NULL) { // must be free variable +      Free_Var_Decl *t = NULL; +      for (unsigned i = 0; i < freevars.size(); i++) { +        std::string ss = freevars[i]->base_name(); +        if (s == ss) { +          t = freevars[i]; +          break; +        } +      } +       +      if (t == NULL) { +        t = new Free_Var_Decl(s); +        freevars.insert(freevars.end(), t); +      } +       +      e = r.get_local(t); +    } +     +    if (rel == IR_COND_GE || rel == IR_COND_GT) { +      GEQ_Handle h = f_root->add_GEQ(); +      h.update_coef(lhs, 1); +      h.update_coef(e, -1); +      if (rel == IR_COND_GT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_LE || rel == IR_COND_LT) { +      GEQ_Handle h = f_root->add_GEQ(); +      h.update_coef(lhs, -1); +      h.update_coef(e, 1); +      if (rel == IR_COND_LT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_EQ) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(lhs, 1); +      h.update_coef(e, -1); +    } +    else +      throw std::invalid_argument("unsupported condition type"); +     +    //  delete v[0]; +    delete ref; +    if (destroy) +      delete repr; +    break; +  } +  case IR_OP_ASSIGNMENT: +  { +    std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); +    exp2formula(ir, r, f_root, freevars, v[0], lhs, side, rel, true); +    if (destroy) +      delete repr; +    break; +  } +  case IR_OP_PLUS: +  { +    F_Exists *f_exists = f_root->add_exists(); +    Variable_ID e1 = f_exists->declare(tmp_e()); +    Variable_ID e2 = f_exists->declare(tmp_e()); +    F_And *f_and = f_exists->add_and(); +     +    if (rel == IR_COND_GE || rel == IR_COND_GT) { +      GEQ_Handle h = f_and->add_GEQ(); +      h.update_coef(lhs, 1); +      h.update_coef(e1, -1); +      h.update_coef(e2, -1); +      if (rel == IR_COND_GT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_LE || rel == IR_COND_LT) { +      GEQ_Handle h = f_and->add_GEQ(); +      h.update_coef(lhs, -1); +      h.update_coef(e1, 1); +      h.update_coef(e2, 1); +      if (rel == IR_COND_LT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_EQ) { +      EQ_Handle h = f_and->add_EQ(); +      h.update_coef(lhs, 1); +      h.update_coef(e1, -1); +      h.update_coef(e2, -1); +    } +    else +      throw std::invalid_argument("unsupported condition type"); +     +    std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); +    exp2formula(ir, r, f_and, freevars, v[0], e1, side, IR_COND_EQ, true); +    exp2formula(ir, r, f_and, freevars, v[1], e2, side, IR_COND_EQ, true); +    if (destroy) +      delete repr; +    break; +  } +  case IR_OP_MINUS: +  { +    F_Exists *f_exists = f_root->add_exists(); +    Variable_ID e1 = f_exists->declare(tmp_e()); +    Variable_ID e2 = f_exists->declare(tmp_e()); +    F_And *f_and = f_exists->add_and(); +     +    if (rel == IR_COND_GE || rel == IR_COND_GT) { +      GEQ_Handle h = f_and->add_GEQ(); +      h.update_coef(lhs, 1); +      h.update_coef(e1, -1); +      h.update_coef(e2, 1); +      if (rel == IR_COND_GT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_LE || rel == IR_COND_LT) { +      GEQ_Handle h = f_and->add_GEQ(); +      h.update_coef(lhs, -1); +      h.update_coef(e1, 1); +      h.update_coef(e2, -1); +      if (rel == IR_COND_LT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_EQ) { +      EQ_Handle h = f_and->add_EQ(); +      h.update_coef(lhs, 1); +      h.update_coef(e1, -1); +      h.update_coef(e2, 1); +    } +    else +      throw std::invalid_argument("unsupported condition type"); +     +    std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); +    exp2formula(ir, r, f_and, freevars, v[0], e1, side, IR_COND_EQ, true); +    exp2formula(ir, r, f_and, freevars, v[1], e2, side, IR_COND_EQ, true); +    if (destroy) +      delete repr; +    break; +  } +  case IR_OP_MULTIPLY: +  { +    std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); +     +    coef_t coef; +    CG_outputRepr *term; +    if (ir->QueryExpOperation(v[0]) == IR_OP_CONSTANT) { +      IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[0])); +      coef = ref->integer(); +      delete v[0]; +      delete ref; +      term = v[1]; +    } +    else if (ir->QueryExpOperation(v[1]) == IR_OP_CONSTANT) { +      IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[1])); +      coef = ref->integer(); +      delete v[1]; +      delete ref; +      term = v[0]; +    } +    else +      throw ir_exp_error("not presburger expression"); +     +    F_Exists *f_exists = f_root->add_exists(); +    Variable_ID e = f_exists->declare(tmp_e()); +    F_And *f_and = f_exists->add_and(); +     +    if (rel == IR_COND_GE || rel == IR_COND_GT) { +      GEQ_Handle h = f_and->add_GEQ(); +      h.update_coef(lhs, 1); +      h.update_coef(e, -coef); +      if (rel == IR_COND_GT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_LE || rel == IR_COND_LT) { +      GEQ_Handle h = f_and->add_GEQ(); +      h.update_coef(lhs, -1); +      h.update_coef(e, coef); +      if (rel == IR_COND_LT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_EQ) { +      EQ_Handle h = f_and->add_EQ(); +      h.update_coef(lhs, 1); +      h.update_coef(e, -coef); +    } +    else +      throw std::invalid_argument("unsupported condition type"); +     +    exp2formula(ir, r, f_and, freevars, term, e, side, IR_COND_EQ, true); +    if (destroy) +      delete repr; +    break; +  } +  case IR_OP_DIVIDE: +  { +    std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); +     +    assert(ir->QueryExpOperation(v[1]) == IR_OP_CONSTANT); +    IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[1])); +    coef_t coef = ref->integer(); +    delete v[1]; +    delete ref; +     +    F_Exists *f_exists = f_root->add_exists(); +    Variable_ID e = f_exists->declare(tmp_e()); +    F_And *f_and = f_exists->add_and(); +     +    if (rel == IR_COND_GE || rel == IR_COND_GT) { +      GEQ_Handle h = f_and->add_GEQ(); +      h.update_coef(lhs, coef); +      h.update_coef(e, -1); +      if (rel == IR_COND_GT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_LE || rel == IR_COND_LT) { +      GEQ_Handle h = f_and->add_GEQ(); +      h.update_coef(lhs, -coef); +      h.update_coef(e, 1); +      if (rel == IR_COND_LT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_EQ) { +      EQ_Handle h = f_and->add_EQ(); +      h.update_coef(lhs, coef); +      h.update_coef(e, -1); +    } +    else +      throw std::invalid_argument("unsupported condition type"); +     +    exp2formula(ir, r, f_and, freevars, v[0], e, side, IR_COND_EQ, true); +    if (destroy) +      delete repr; +    break; +  } +  case IR_OP_POSITIVE: +  { +    std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); +     +    exp2formula(ir, r, f_root, freevars, v[0], lhs, side, rel, true); +    if (destroy) +      delete repr; +    break; +  } +  case IR_OP_NEGATIVE: +  { +    std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); +     +    F_Exists *f_exists = f_root->add_exists(); +    Variable_ID e = f_exists->declare(tmp_e()); +    F_And *f_and = f_exists->add_and(); +     +    if (rel == IR_COND_GE || rel == IR_COND_GT) { +      GEQ_Handle h = f_and->add_GEQ(); +      h.update_coef(lhs, 1); +      h.update_coef(e, 1); +      if (rel == IR_COND_GT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_LE || rel == IR_COND_LT) { +      GEQ_Handle h = f_and->add_GEQ(); +      h.update_coef(lhs, -1); +      h.update_coef(e, -1); +      if (rel == IR_COND_LT) +        h.update_const(-1); +    } +    else if (rel == IR_COND_EQ) { +      EQ_Handle h = f_and->add_EQ(); +      h.update_coef(lhs, 1); +      h.update_coef(e, 1); +    } +    else +      throw std::invalid_argument("unsupported condition type"); +     +    exp2formula(ir, r, f_and, freevars, v[0], e, side, IR_COND_EQ, true); +    if (destroy) +      delete repr; +    break; +  } +  case IR_OP_MIN: +  { +    std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); +     +    F_Exists *f_exists = f_root->add_exists(); +     +    if (rel == IR_COND_GE || rel == IR_COND_GT) { +      F_Or *f_or = f_exists->add_and()->add_or(); +      for (int i = 0; i < v.size(); i++) { +        Variable_ID e = f_exists->declare(tmp_e()); +        F_And *f_and = f_or->add_and(); +        GEQ_Handle h = f_and->add_GEQ(); +        h.update_coef(lhs, 1); +        h.update_coef(e, -1); +        if (rel == IR_COND_GT) +          h.update_const(-1); +         +        exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); +      } +    } +    else if (rel == IR_COND_LE || rel == IR_COND_LT) { +      F_And *f_and = f_exists->add_and(); +      for (int i = 0; i < v.size(); i++) { +        Variable_ID e = f_exists->declare(tmp_e());         +        GEQ_Handle h = f_and->add_GEQ(); +        h.update_coef(lhs, -1); +        h.update_coef(e, 1); +        if (rel == IR_COND_LT) +          h.update_const(-1); +         +        exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); +      } +    } +    else if (rel == IR_COND_EQ) { +      F_Or *f_or = f_exists->add_and()->add_or(); +      for (int i = 0; i < v.size(); i++) { +        Variable_ID e = f_exists->declare(tmp_e()); +        F_And *f_and = f_or->add_and(); +         +        EQ_Handle h = f_and->add_EQ(); +        h.update_coef(lhs, 1); +        h.update_coef(e, -1); +         +        exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, false); +         +        for (int j = 0; j < v.size(); j++) +          if (j != i) { +            Variable_ID e2 = f_exists->declare(tmp_e()); +            GEQ_Handle h2 = f_and->add_GEQ(); +            h2.update_coef(e, -1); +            h2.update_coef(e2, 1); +             +            exp2formula(ir, r, f_and, freevars, v[j], e2, side, IR_COND_EQ, false); +          } +      } +       +      for (int i = 0; i < v.size(); i++) +        delete v[i]; +    } +    else +      throw std::invalid_argument("unsupported condition type"); +     +    if (destroy) +      delete repr; +  } +  case IR_OP_MAX: +  { +    std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); +     +    F_Exists *f_exists = f_root->add_exists(); +     +    if (rel == IR_COND_LE || rel == IR_COND_LT) { +      F_Or *f_or = f_exists->add_and()->add_or(); +      for (int i = 0; i < v.size(); i++) { +        Variable_ID e = f_exists->declare(tmp_e()); +        F_And *f_and = f_or->add_and(); +        GEQ_Handle h = f_and->add_GEQ(); +        h.update_coef(lhs, -1); +        h.update_coef(e, 1); +        if (rel == IR_COND_LT) +          h.update_const(-1); +         +        exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); +      } +    } +    else if (rel == IR_COND_GE || rel == IR_COND_GT) { +      F_And *f_and = f_exists->add_and(); +      for (int i = 0; i < v.size(); i++) { +        Variable_ID e = f_exists->declare(tmp_e());         +        GEQ_Handle h = f_and->add_GEQ(); +        h.update_coef(lhs, 1); +        h.update_coef(e, -1); +        if (rel == IR_COND_GT) +          h.update_const(-1); +         +        exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); +      } +    } +    else if (rel == IR_COND_EQ) { +      F_Or *f_or = f_exists->add_and()->add_or(); +      for (int i = 0; i < v.size(); i++) { +        Variable_ID e = f_exists->declare(tmp_e()); +        F_And *f_and = f_or->add_and(); +         +        EQ_Handle h = f_and->add_EQ(); +        h.update_coef(lhs, 1); +        h.update_coef(e, -1); +         +        exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, false); +         +        for (int j = 0; j < v.size(); j++) +          if (j != i) { +            Variable_ID e2 = f_exists->declare(tmp_e()); +            GEQ_Handle h2 = f_and->add_GEQ(); +            h2.update_coef(e, 1); +            h2.update_coef(e2, -1); +             +            exp2formula(ir, r, f_and, freevars, v[j], e2, side, IR_COND_EQ, false); +          } +      } +       +      for (int i = 0; i < v.size(); i++) +        delete v[i]; +    } +    else +      throw std::invalid_argument("unsupported condition type"); +     +    if (destroy) +      delete repr; +  } +  case IR_OP_NULL: +    break; +  default: +    throw ir_exp_error("unsupported operand type"); +  } +} + +Relation arrays2relation(IR_Code *ir, std::vector<Free_Var_Decl*> &freevars, +                         const IR_ArrayRef *ref_src, const Relation &IS_w, +                         const IR_ArrayRef *ref_dst, const Relation &IS_r) { +  Relation &IS1 = const_cast<Relation &>(IS_w); +  Relation &IS2 = const_cast<Relation &>(IS_r); +   +  Relation r(IS1.n_set(), IS2.n_set()); +   +  for (int i = 1; i <= IS1.n_set(); i++) +    r.name_input_var(i, IS1.set_var(i)->name()); +   +  for (int i = 1; i <= IS2.n_set(); i++) +    r.name_output_var(i, IS2.set_var(i)->name()+"'"); +   +  IR_Symbol *sym_src = ref_src->symbol(); +  IR_Symbol *sym_dst = ref_dst->symbol(); +  if (*sym_src != *sym_dst) { +    r.add_or(); // False Relation +    delete sym_src; +    delete sym_dst; +    return r; +  } +  else { +    delete sym_src; +    delete sym_dst; +  } +   +  F_And *f_root = r.add_and(); +   +  for (int i = 0; i < ref_src->n_dim(); i++) { +    F_Exists *f_exists = f_root->add_exists(); +    Variable_ID e1 = f_exists->declare(tmp_e()); +    Variable_ID e2 = f_exists->declare(tmp_e()); +    F_And *f_and = f_exists->add_and(); +     +    CG_outputRepr *repr_src = ref_src->index(i); +    CG_outputRepr *repr_dst = ref_dst->index(i); +     +    bool has_complex_formula = false; +    try { +      exp2formula(ir, r, f_and, freevars, repr_src, e1, 'w', IR_COND_EQ, false); +      exp2formula(ir, r, f_and, freevars, repr_dst, e2, 'r', IR_COND_EQ, false); +    } +    catch (const ir_exp_error &e) { +      has_complex_formula = true; +    } +     +    if (!has_complex_formula) { +      EQ_Handle h = f_and->add_EQ(); +      h.update_coef(e1, 1); +      h.update_coef(e2, -1); +    } +     +    repr_src->clear(); +    repr_dst->clear(); +    delete repr_src; +    delete repr_dst; +  } +   +  // add iteration space restriction +  r = Restrict_Domain(r, copy(IS1)); +  r = Restrict_Range(r, copy(IS2)); +   +  // reset the output variable names lost in restriction +  for (int i = 1; i <= IS2.n_set(); i++) +    r.name_output_var(i, IS2.set_var(i)->name()+"'"); +   +  return r; +} + +std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > relation2dependences (const IR_ArrayRef *ref_src, const IR_ArrayRef *ref_dst, const Relation &r) { +  assert(r.n_inp() == r.n_out()); +   +  std::vector<DependenceVector> dependences1, dependences2;   +  std::stack<DependenceLevel> working; +  working.push(DependenceLevel(r, r.n_inp())); +   +  while (!working.empty()) { +    DependenceLevel dep = working.top(); +    working.pop(); +     +    // No dependence exists, move on. +    if (!dep.r.is_satisfiable()) +      continue; +     +    if (dep.level == r.n_inp()) { +      DependenceVector dv; +       +      // for loop independent dependence, use lexical order to +      // determine the correct source and destination +      if (dep.dir == 0) { +        if (*ref_src == *ref_dst) +          continue; // trivial self zero-dependence +         +        if (ref_src->is_write()) { +          if (ref_dst->is_write()) +            dv.type = DEP_W2W; +          else +            dv.type = DEP_W2R; +        } +        else { +          if (ref_dst->is_write()) +            dv.type = DEP_R2W; +          else +            dv.type = DEP_R2R; +        } +         +      } +      else if (dep.dir == 1) { +        if (ref_src->is_write()) { +          if (ref_dst->is_write()) +            dv.type = DEP_W2W; +          else +            dv.type = DEP_W2R; +        } +        else { +          if (ref_dst->is_write()) +            dv.type = DEP_R2W; +          else +            dv.type = DEP_R2R; +        } +      } +      else { // dep.dir == -1 +        if (ref_dst->is_write()) { +          if (ref_src->is_write()) +            dv.type = DEP_W2W; +          else +            dv.type = DEP_W2R; +        } +        else { +          if (ref_src->is_write()) +            dv.type = DEP_R2W; +          else +            dv.type = DEP_R2R; +        } +      } +       +      dv.lbounds = dep.lbounds; +      dv.ubounds = dep.ubounds; +      dv.sym = ref_src->symbol(); +       +      if (dep.dir == 0 || dep.dir == 1) +        dependences1.push_back(dv); +      else +        dependences2.push_back(dv); +    } +    else { +      // now work on the next dimension level +      int level = ++dep.level; +       +      coef_t lbound, ubound; +      Relation delta = Deltas(copy(dep.r)); +      delta.query_variable_bounds(delta.set_var(level), lbound, ubound); +       +      if (dep.dir == 0) { +        if (lbound > 0) { +          dep.dir = 1; +          dep.lbounds[level-1] = lbound; +          dep.ubounds[level-1] = ubound; +           +          working.push(dep); +        } +        else if (ubound < 0) { +          dep.dir = -1; +          dep.lbounds[level-1] = -ubound; +          dep.ubounds[level-1] = -lbound; +           +          working.push(dep); +        } +        else { +          // split the dependence vector into flow- and anti-dependence +          // for the first non-zero distance, also separate zero distance +          // at this level. +          { +            DependenceLevel dep2 = dep; +             +            dep2.lbounds[level-1] =  0; +            dep2.ubounds[level-1] =  0; +             +            F_And *f_root = dep2.r.and_with_and(); +            EQ_Handle h = f_root->add_EQ(); +            h.update_coef(dep2.r.input_var(level), 1); +            h.update_coef(dep2.r.output_var(level), -1); +             +            working.push(dep2); +          } +           +          if (lbound < 0 && *ref_src != *ref_dst) { +            DependenceLevel dep2 = dep; +             +            F_And *f_root = dep2.r.and_with_and(); +            GEQ_Handle h = f_root->add_GEQ(); +            h.update_coef(dep2.r.input_var(level), 1); +            h.update_coef(dep2.r.output_var(level), -1); +            h.update_const(-1); +             +            // get tighter bounds under new constraints +            coef_t lbound, ubound; +            delta = Deltas(copy(dep2.r)); +            delta.query_variable_bounds(delta.set_var(level), +                                        lbound, ubound); +             +            dep2.dir = -1;             +            dep2.lbounds[level-1] = max(-ubound,static_cast<coef_t>(1)); // use max() to avoid Omega retardness +            dep2.ubounds[level-1] = -lbound; +             +            working.push(dep2); +          } +           +          if (ubound > 0) { +            DependenceLevel dep2 = dep; +             +            F_And *f_root = dep2.r.and_with_and(); +            GEQ_Handle h = f_root->add_GEQ(); +            h.update_coef(dep2.r.input_var(level), -1); +            h.update_coef(dep2.r.output_var(level), 1); +            h.update_const(-1); +             +            // get tighter bonds under new constraints +            coef_t lbound, ubound; +            delta = Deltas(copy(dep2.r)); +            delta.query_variable_bounds(delta.set_var(level), +                                        lbound, ubound); +            dep2.dir = 1; +            dep2.lbounds[level-1] = max(lbound,static_cast<coef_t>(1)); // use max() to avoid Omega retardness +            dep2.ubounds[level-1] = ubound; +             +            working.push(dep2); +          } +        } +      } +      // now deal with dependence vector with known direction +      // determined at previous levels +      else { +        // For messy bounds, further test to see if the dependence distance +        // can be reduced to positive/negative.  This is an omega hack. +        if (lbound == negInfinity && ubound == posInfinity) { +          { +            Relation t = dep.r; +            F_And *f_root = t.and_with_and(); +            GEQ_Handle h = f_root->add_GEQ(); +            h.update_coef(t.input_var(level), 1); +            h.update_coef(t.output_var(level), -1); +            h.update_const(-1); +             +            if (!t.is_satisfiable()) { +              lbound = 0; +            } +          } +          { +            Relation t = dep.r; +            F_And *f_root = t.and_with_and(); +            GEQ_Handle h = f_root->add_GEQ(); +            h.update_coef(t.input_var(level), -1); +            h.update_coef(t.output_var(level), 1); +            h.update_const(-1); +             +            if (!t.is_satisfiable()) { +              ubound = 0; +            } +          } +        } +         +        // Same thing as above, test to see if zero dependence +        // distance possible. +        if (lbound == 0 || ubound == 0) { +          Relation t = dep.r; +          F_And *f_root = t.and_with_and(); +          EQ_Handle h = f_root->add_EQ(); +          h.update_coef(t.input_var(level), 1); +          h.update_coef(t.output_var(level), -1); +           +          if (!t.is_satisfiable()) { +            if (lbound == 0) +              lbound = 1; +            if (ubound == 0) +              ubound = -1; +          } +        } +         +        if (dep.dir == -1) { +          dep.lbounds[level-1] = -ubound; +          dep.ubounds[level-1] = -lbound; +        } +        else { // dep.dir == 1 +          dep.lbounds[level-1] = lbound; +          dep.ubounds[level-1] = ubound; +        } +         +        working.push(dep); +      } +    } +  } +   +  return std::make_pair(dependences1, dependences2); +} + +void exp2constraint(IR_Code *ir, Relation &r, F_And *f_root, +                    std::vector<Free_Var_Decl *> &freevars, +                    CG_outputRepr *repr, bool destroy) { +  IR_CONDITION_TYPE cond = ir->QueryBooleanExpOperation(repr); +  switch (cond) { +  case IR_COND_LT: +  case IR_COND_LE: +  case IR_COND_EQ: +  case IR_COND_GT: +  case IR_COND_GE: { +    F_Exists *f_exist = f_root->add_exists(); +    Variable_ID e = f_exist->declare(); +    F_And *f_and = f_exist->add_and(); +    std::vector<omega::CG_outputRepr *> op = ir->QueryExpOperand(repr); +    exp2formula(ir, r, f_and, freevars, op[0], e, 's', IR_COND_EQ, true); +    exp2formula(ir, r, f_and, freevars, op[1], e, 's', cond, true); +    if (destroy) +      delete repr; +    break; +  } +  case IR_COND_NE: { +    F_Exists *f_exist = f_root->add_exists(); +    Variable_ID e = f_exist->declare(); +    F_Or *f_or = f_exist->add_or(); +    F_And *f_and = f_or->add_and(); +    std::vector<omega::CG_outputRepr *> op = ir->QueryExpOperand(repr); +    exp2formula(ir, r, f_and, freevars, op[0], e, 's', IR_COND_EQ, false); +    exp2formula(ir, r, f_and, freevars, op[1], e, 's', IR_COND_GT, false); +     +    f_and = f_or->add_and(); +    exp2formula(ir, r, f_and, freevars, op[0], e, 's', IR_COND_EQ, true); +    exp2formula(ir, r, f_and, freevars, op[1], e, 's', IR_COND_LT, true); +     +    if (destroy) +      delete repr; +    break; +  }     +  default: +    throw ir_exp_error("unrecognized conditional expression"); +  } +} + +bool is_single_loop_iteration(const Relation &r, int level, const Relation &known) { +  int n = r.n_set(); +  Relation r1 = Intersection(copy(r), Extend_Set(copy(known), n-known.n_set())); +   +  Relation mapping(n, n); +  F_And *f_root = mapping.add_and(); +  for (int i = 1; i <= level; i++) { +    EQ_Handle h = f_root->add_EQ(); +    h.update_coef(mapping.input_var(i), 1); +    h.update_coef(mapping.output_var(i), -1); +  } +  r1 = Range(Restrict_Domain(mapping, r1)); +  r1.simplify(); +   +  Variable_ID v = r1.set_var(level); +  for (DNF_Iterator di(r1.query_DNF()); di; di++) { +    bool is_single = false; +    for (EQ_Iterator ei((*di)->EQs()); ei; ei++) +      if ((*ei).get_coef(v) != 0 && !(*ei).has_wildcards()) { +        is_single = true; +        break; +      } +     +    if (!is_single) +      return false; +  } +   +  return true; +} + + +bool is_single_iteration(const Relation &r, int dim) { +  assert(r.is_set()); +  const int n = r.n_set(); +   +  if (dim >= n) +    return true; +   +  Relation bound = get_loop_bound(r, dim); +   +  for (DNF_Iterator di(bound.query_DNF()); di; di++) { +    bool is_single = false; +    for (EQ_Iterator ei((*di)->EQs()); ei; ei++) +      if (!(*ei).has_wildcards()) { +        is_single = true; +        break; +      } +     +    if (!is_single) +      return false; +  } +   +  return true; +} + +void assign_const(Relation &r, int dim, int val) { +  const int n = r.n_out(); +   +  Relation mapping(n, n); +  F_And *f_root = mapping.add_and(); +   +  for (int i = 1; i <= n; i++) { +    if (i != dim+1) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(i), 1); +      h.update_coef(mapping.input_var(i), -1); +    } +    else { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.output_var(i), 1); +      h.update_const(-val); +    } +  } +   +  r = Composition(mapping, r); +} + + +int get_const(const Relation &r, int dim, Var_Kind type) { +  Relation &rr = const_cast<Relation &>(r); +   +  Variable_ID v; +  switch (type) { +  case Input_Var: +    v = rr.input_var(dim+1); +    break; +  case Output_Var: +    v = rr.output_var(dim+1); +    break; +  default: +    throw std::invalid_argument("unsupported variable type"); +  } +   +  for (DNF_Iterator di(rr.query_DNF()); di; di++) +    for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) +      if ((*ei).is_const(v)) +        return (*ei).get_const(); +   +  throw std::runtime_error("cannot get variable's constant value"); +} + +Relation get_loop_bound(const Relation &r, int dim) { +  assert(r.is_set()); +  const int n = r.n_set(); +   +  Relation mapping(n,n); +  F_And *f_root = mapping.add_and(); +  for (int i = 1; i <= dim+1; i++) { +    EQ_Handle h = f_root->add_EQ(); +    h.update_coef(mapping.input_var(i), 1); +    h.update_coef(mapping.output_var(i), -1); +  } +  Relation r1 = Range(Restrict_Domain(mapping, copy(r))); +  for (int i = 1; i <= n; i++) +    r1.name_set_var(i, const_cast<Relation &>(r).set_var(i)->name()); +  r1.setup_names(); +  Relation r2 = Project(copy(r1), dim+1, Set_Var); +   +  return Gist(r1, r2, 1); +} + +Relation get_loop_bound(const Relation &r, int level, const Relation &known) { +  int n = r.n_set(); +  Relation r1 = Intersection(copy(r), Extend_Set(copy(known), n-known.n_set())); +   +  Relation mapping(n, n); +  F_And *f_root = mapping.add_and(); +  for (int i = 1; i <= level; i++) { +    EQ_Handle h = f_root->add_EQ(); +    h.update_coef(mapping.input_var(i), 1); +    h.update_coef(mapping.output_var(i), -1); +  } +  r1 = Range(Restrict_Domain(mapping, r1)); +  Relation r2 = Project(copy(r1), level, Set_Var); +  r1 = Gist(r1, r2, 1); +   +  for (int i = 1; i <= n; i++) +    r1.name_set_var(i, const_cast<Relation &>(r).set_var(i)->name()); +  r1.setup_names(); +   +  return r1; +} + + + +Relation get_max_loop_bound(const std::vector<Relation> &r, int dim) { +  if (r.size() == 0) +    return Relation::Null(); +   +  const int n = r[0].n_set(); +  Relation res(Relation::False(n)); +  for (int i = 0; i < r.size(); i++) { +    Relation &t = const_cast<Relation &>(r[i]); +    if (t.is_satisfiable()) +      res = Union(get_loop_bound(t, dim), res); +  } +   +  res.simplify(); +   +  return res; +} + +Relation get_min_loop_bound(const std::vector<Relation> &r, int dim) { +  if (r.size() == 0) +    return Relation::Null(); +   +  const int n = r[0].n_set(); +  Relation res(Relation::True(n)); +  for (int i = 0; i < r.size(); i++) { +    Relation &t = const_cast<Relation &>(r[i]); +    if (t.is_satisfiable()) +      res = Intersection(get_loop_bound(t, dim), res); +  } +   +  res.simplify(); +   +  return res; +} + +void add_loop_stride(Relation &r, const Relation &bound_, int dim, int stride) { +  F_And *f_root = r.and_with_and(); +  Relation &bound = const_cast<Relation &>(bound_); +  for (DNF_Iterator di(bound.query_DNF()); di; di++) { +    F_Exists *f_exists = f_root->add_exists(); +    Variable_ID e1 = f_exists->declare(tmp_e()); +    Variable_ID e2 = f_exists->declare(tmp_e()); +    F_And *f_and = f_exists->add_and(); +    EQ_Handle stride_eq = f_and->add_EQ(); +    stride_eq.update_coef(e1, 1); +    stride_eq.update_coef(e2, stride); +    if (!r.is_set()) +      stride_eq.update_coef(r.output_var(dim+1), -1); +    else +      stride_eq.update_coef(r.set_var(dim+1), -1); +    F_Or *f_or = f_and->add_or(); +     +    for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) { +      if ((*gi).get_coef(bound.set_var(dim+1)) > 0) { +        // copy the lower bound constraint +        EQ_Handle h1 = f_or->add_and()->add_EQ(); +        GEQ_Handle h2 = f_and->add_GEQ(); +        for (Constr_Vars_Iter ci(*gi); ci; ci++) { +          switch ((*ci).var->kind()) { +            // case Set_Var: +          case Input_Var: { +            int pos = (*ci).var->get_position(); +            if (pos == dim + 1) { +              h1.update_coef(e1, (*ci).coef); +              h2.update_coef(e1, (*ci).coef); +            } +            else { +              if (!r.is_set()) { +                h1.update_coef(r.output_var(pos), (*ci).coef); +                h2.update_coef(r.output_var(pos), (*ci).coef); +              } +              else { +                h1.update_coef(r.set_var(pos), (*ci).coef); +                h2.update_coef(r.set_var(pos), (*ci).coef); +              }                 +            } +            break; +          } +          case Global_Var: { +            Global_Var_ID g = (*ci).var->get_global_var(); +            h1.update_coef(r.get_local(g, (*ci).var->function_of()), (*ci).coef); +            h2.update_coef(r.get_local(g, (*ci).var->function_of()), (*ci).coef); +            break; +          } +          default: +            break; +          } +        } +        h1.update_const((*gi).get_const()); +        h2.update_const((*gi).get_const()); +      } +    } +  } +} + + +bool is_inner_loop_depend_on_level(const Relation &r, int level, const Relation &known) { +  Relation r1 = Intersection(copy(r), Extend_Set(copy(known), r.n_set()-known.n_set())); +  Relation r2 = copy(r1); +  for (int i = level+1; i <= r2.n_set(); i++) +    r2 = Project(r2, r2.set_var(i)); +  r2.simplify(2, 4); +  Relation r3 = Gist(r1, r2); +   +  Variable_ID v = r3.set_var(level); +  for (DNF_Iterator di(r3.query_DNF()); di; di++) { +    for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) +      if ((*ei).get_coef(v) != 0) +        return true; +     +    for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) +      if ((*gi).get_coef(v) != 0) +        return true; +  } +   +  return false; +} + +Relation adjust_loop_bound(const Relation &r, int level, int adjustment) { +  if (adjustment == 0) +    return copy(r); +   +  const int n = r.n_set(); +  Relation r1 = copy(r); +  for (int i = level+1; i <= r1.n_set(); i++) +    r1 = Project(r1, r1.set_var(i)); +  r1.simplify(2, 4); +  Relation r2 = Gist(copy(r), copy(r1)); +   +  Relation mapping(n, n); +  F_And *f_root = mapping.add_and(); +  for (int i = 1; i <= n; i++) +    if (i == level) { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.input_var(level), -1); +      h.update_coef(mapping.output_var(level), 1); +      h.update_const(static_cast<coef_t>(adjustment)); +    } +    else { +      EQ_Handle h = f_root->add_EQ(); +      h.update_coef(mapping.input_var(i), -1); +      h.update_coef(mapping.output_var(i), 1); +    } +   +  r2 = Range(Restrict_Domain(mapping, r2)); +  r1 = Intersection(r1, r2); +  r1.simplify(); +   +  for (int i = 1; i <= n; i++) +    r1.name_set_var(i, const_cast<Relation &>(r).set_var(i)->name()); +  r1.setup_names(); +  return r1; +} + +Relation permute_relation(const std::vector<int> &pi) { +  const int n = pi.size(); +   +  Relation r(n, n); +  F_And *f_root = r.add_and(); +   +  for (int i = 0; i < n; i++) {     +    EQ_Handle h = f_root->add_EQ(); +    h.update_coef(r.output_var(i+1), 1); +    h.update_coef(r.input_var(pi[i]+1), -1); +  } +   +  return r; +} + +Variable_ID find_index(Relation &r, const std::string &s, char side) { +  // Omega quirks: assure the names are propagated inside the relation +  r.setup_names(); +   +  if (r.is_set()) { // side == 's' +    for (int i = 1; i <= r.n_set(); i++) { +      std::string ss = r.set_var(i)->name(); +      if (s == ss) { +        return r.set_var(i); +      } +    } +  } +  else if (side == 'w') { +    for (int i = 1; i <= r.n_inp(); i++) { +      std::string ss = r.input_var(i)->name(); +      if (s == ss) { +        return r.input_var(i); +      } +    } +  } +  else { // side == 'r' +    for (int i = 1; i <= r.n_out(); i++) { +      std::string ss = r.output_var(i)->name(); +      if (s+"'" == ss) { +        return r.output_var(i); +      } +    } +  } +   +  return NULL; +} + diff --git a/src/parse_expr.ll b/src/parse_expr.ll new file mode 100644 index 0000000..a9b389f --- /dev/null +++ b/src/parse_expr.ll @@ -0,0 +1,24 @@ +%{ +// some C++ code +#include "chill_run_util.hh" +#include "parse_expr.tab.hh" +%} + +%option noyywrap + +%% +[ \t]+                  /*ignore*/ +\n                      /*ignore*/ +L[0-9]+                 { yylval.val = atoi(&yytext[1]); return LEVEL; } +[0-9]+                  { yylval.val = atoi(yytext); return NUMBER; } +\<\=                    return LE; +\>\=                    return GE; +\=(\=)?                 return EQ; +[a-zA-Z_][a-zA-Z_0-9]*  { +                           yylval.str_val = new char[yyleng+1]; +                           strcpy(yylval.str_val, yytext); +                           return VARIABLE; +                         } +.                        return (int)yytext[0]; +%% + diff --git a/src/parse_expr.yy b/src/parse_expr.yy new file mode 100644 index 0000000..c2943c2 --- /dev/null +++ b/src/parse_expr.yy @@ -0,0 +1,85 @@ +%{ +#include "chill_run_util.hh" +#include "parse_expr.ll.hh" + +extern int yydebug; + +void yyerror(const char*); +int yyparse(simap_vec_t** rel); + +static simap_vec_t* return_rel; // used as the return value for yyparse + +%} + +%union { +  int val; +  char* str_val; +  simap_t* cond_item; +  simap_vec_t* cond; +} + +%token <val> NUMBER +%token <val> LEVEL +%token <str_val> VARIABLE + +%left LE GE EQ '<' '>' +%left '-' '+' '*' '/' + +/*the final output from this language should be an Omega Relation object*/ +%type <cond> cond prog +%type <cond_item> expr add_expr mul_expr neg_expr + +%% +prog : cond                      { return_rel = make_prog($1); } +; + +cond : expr '>' expr             { $$ = make_cond_gt($1, $3); } +     | expr '<' expr             { $$ = make_cond_lt($1, $3); } +     | expr GE expr              { $$ = make_cond_ge($1, $3); } +     | expr LE expr              { $$ = make_cond_le($1, $3); } +     | expr EQ expr              { $$ = make_cond_eq($1, $3); } +; + +expr : add_expr                  { $$ = $1; } +; + +add_expr : add_expr '+' mul_expr { $$ = make_cond_item_add($1,$3); } +         | add_expr '-' mul_expr { $$ = make_cond_item_sub($1,$3); } +         | mul_expr              { $$ = $1; } +; + +mul_expr : mul_expr '*' neg_expr { $$ = make_cond_item_mul($1,$3); } +         | neg_expr              { $$ = $1; } +; + +neg_expr : '-' neg_expr          { $$ = make_cond_item_neg($2); } +         | '(' expr ')'          { $$ = $2; } +         | NUMBER                { $$ = make_cond_item_number($1); } +         | LEVEL                 { $$ = make_cond_item_level($1); } +         | VARIABLE              { $$ = make_cond_item_variable($1); } +; +%% + +void yyerror(const char* msg) { +  fprintf(stderr, "Parse error: %s", msg); +} + +simap_vec_t* parse_relation_vector(const char* expr) { +  yydebug=0; +  YY_BUFFER_STATE state; +   +  //if(yylex_init()) { +  //   TODO: error out or something +  //} +   +  state = yy_scan_string(expr); +   +  if(yyparse()) { +    // TODO: error out or something +  } +   +  yy_delete_buffer(state); +  yylex_destroy(); +  return return_rel; +} + | 
