diff options
Diffstat (limited to 'chill/src')
-rw-r--r-- | chill/src/chill_run.cc | 394 | ||||
-rw-r--r-- | chill/src/chill_run_util.cc | 129 | ||||
-rw-r--r-- | chill/src/chillmodule.cc | 1834 | ||||
-rw-r--r-- | chill/src/dep.cc | 567 | ||||
-rw-r--r-- | chill/src/ir_rose.cc | 2296 | ||||
-rw-r--r-- | chill/src/ir_rose_utils.cc | 88 | ||||
-rw-r--r-- | chill/src/irtools.cc | 279 | ||||
-rw-r--r-- | chill/src/loop.cc | 1870 | ||||
-rw-r--r-- | chill/src/loop_basic.cc | 1538 | ||||
-rw-r--r-- | chill/src/loop_datacopy.cc | 2166 | ||||
-rw-r--r-- | chill/src/loop_extra.cc | 224 | ||||
-rw-r--r-- | chill/src/loop_tile.cc | 630 | ||||
-rw-r--r-- | chill/src/loop_unroll.cc | 1166 | ||||
-rw-r--r-- | chill/src/omegatools.cc | 2312 | ||||
-rw-r--r-- | chill/src/parse_expr.ll | 24 | ||||
-rw-r--r-- | chill/src/parse_expr.yy | 85 |
16 files changed, 15602 insertions, 0 deletions
diff --git a/chill/src/chill_run.cc b/chill/src/chill_run.cc new file mode 100644 index 0000000..59cd6e5 --- /dev/null +++ b/chill/src/chill_run.cc @@ -0,0 +1,394 @@ +#include "chilldebug.h" + +// this is a little messy. the Makefile should be able to define one or the other +#ifndef PYTHON +#ifndef LUA +#define LUA +#endif +#endif + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +//#include "chill_env.hh" + +#include "loop.hh" +#include <omega.h> +#include "ir_code.hh" + +#ifdef CUDACHILL + +#ifdef BUILD_ROSE +#include "loop_cuda_rose.hh" +#include "ir_cudarose.hh" +#elif BUILD_SUIF +#include "loop_cuda.hh" +#include "ir_cudasuif.hh" +#endif + +#else + +#ifdef BUILD_ROSE +#include "ir_rose.hh" +#elif BUILD_SUIF +#include "ir_suif.hh" +#endif + +#endif + +#ifdef LUA +#define lua_c //Get the configuration defines for doing an interactive shell +#include <lua.hpp> //All lua includes wrapped in extern "C" +#include "chill_env.hh" // Lua wrapper functions for CHiLL +#elif PYTHON +#include "chillmodule.hh" // Python wrapper functions for CHiLL +#endif + +//--- +// CHiLL globals +//--- +Loop *myloop = NULL; +IR_Code *ir_code = NULL; +bool repl_stop = false; +bool is_interactive = false; + +std::vector<IR_Control *> ir_controls; +std::vector<int> loops; + +// this whole section belongs somewhere else +#ifdef LUA +//--- +// Interactive mode functions, directly copied out of lua.c +//--- +// The Lua interpreter state +static lua_State *globalL = NULL; +static const char *progname = "CHiLL"; + +static void lstop (lua_State *L, lua_Debug *ar) { + (void)ar; /* unused arg. */ + lua_sethook(L, NULL, 0, 0); + luaL_error(L, "interrupted!"); +} + + +static void laction (int i) { + signal(i, SIG_DFL); /* if another SIGINT happens before lstop, + terminate process (default action) */ + lua_sethook(globalL, lstop, LUA_MASKCALL | LUA_MASKRET | LUA_MASKCOUNT, 1); +} + + +static void l_message (const char *pname, const char *msg) { + if (pname) fprintf(stderr, "%s: ", pname); + fprintf(stderr, "%s\n", msg); + fflush(stderr); // ? does this do anything ? +} + + +static int report (lua_State *L, int status) { + if (status && !lua_isnil(L, -1)) { + const char *msg = lua_tostring(L, -1); + if (msg == NULL) msg = "(error object is not a string)"; + l_message(progname, msg); + lua_pop(L, 1); + } + return status; +} + + +static int traceback (lua_State *L) { + if (!lua_isstring(L, 1)) /* 'message' not a string? */ + return 1; /* keep it intact */ + lua_getfield(L, LUA_GLOBALSINDEX, "debug"); + if (!lua_istable(L, -1)) { + lua_pop(L, 1); + return 1; + } + lua_getfield(L, -1, "traceback"); + if (!lua_isfunction(L, -1)) { + lua_pop(L, 2); + return 1; + } + lua_pushvalue(L, 1); /* pass error message */ + lua_pushinteger(L, 2); /* skip this function and traceback */ + lua_call(L, 2, 1); /* call debug.traceback */ + return 1; +} + + +static int docall (lua_State *L, int narg, int clear) { + DEBUG_PRINT("\ndocall()\n"); + int status; + int base = lua_gettop(L) - narg; /* function index */ + lua_pushcfunction(L, traceback); /* push traceback function */ + lua_insert(L, base); /* put it under chunk and args */ + signal(SIGINT, laction); + + DEBUG_PRINT("status = lua_pcall(L, narg, (clear ? 0 : LUA_MULTRET), base);\n"); + + status = lua_pcall(L, narg, (clear ? 0 : LUA_MULTRET), base); + signal(SIGINT, SIG_DFL); + lua_remove(L, base); /* remove traceback function */ + /* force a complete garbage collection in case of errors */ + if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); + return status; +} + +static int dofile (lua_State *L, const char *name) { + int status = luaL_loadfile(L, name) || docall(L, 0, 1); + return report(L, status); +} + +static const char *get_prompt (lua_State *L, int firstline) { + const char *p; + lua_getfield(L, LUA_GLOBALSINDEX, firstline ? "_PROMPT" : "_PROMPT2"); + p = lua_tostring(L, -1); + if (p == NULL) p = (firstline ? LUA_PROMPT : LUA_PROMPT2); + lua_pop(L, 1); /* remove global */ + return p; +} + + +static int incomplete (lua_State *L, int status) { + if (status == LUA_ERRSYNTAX) { + size_t lmsg; + const char *msg = lua_tolstring(L, -1, &lmsg); + const char *tp = msg + lmsg - (sizeof(LUA_QL("<eof>")) - 1); + if (strstr(msg, LUA_QL("<eof>")) == tp) { + lua_pop(L, 1); + return 1; + } + } + return 0; /* else... */ +} + + +static int pushline (lua_State *L, int firstline) { + char buffer[LUA_MAXINPUT]; + char *b = buffer; + size_t l; + const char *prmt = get_prompt(L, firstline); + if (lua_readline(L, b, prmt) == 0) + return 0; /* no input */ + l = strlen(b); + if (l > 0 && b[l-1] == '\n') /* line ends with newline? */ + b[l-1] = '\0'; /* remove it */ + if (firstline && b[0] == '=') /* first line starts with `=' ? */ + lua_pushfstring(L, "return %s", b+1); /* change it to `return' */ + else + lua_pushstring(L, b); + lua_freeline(L, b); + return 1; +} + + +static int loadline (lua_State *L) { + int status; + lua_settop(L, 0); + if (!pushline(L, 1)) + return -1; /* no input */ + for (;;) { /* repeat until gets a complete line */ + status = luaL_loadbuffer(L, lua_tostring(L, 1), lua_strlen(L, 1), "=stdin"); + if (!incomplete(L, status)) break; /* cannot try to add lines? */ + if (!pushline(L, 0)) /* no more input? */ + return -1; + lua_pushliteral(L, "\n"); /* add a new line... */ + lua_insert(L, -2); /* ...between the two lines */ + lua_concat(L, 3); /* join them */ + } + lua_saveline(L, 1); + lua_remove(L, 1); /* remove line */ + return status; +} + + +static void dotty (lua_State *L) { + int status; + const char *oldprogname = progname; + progname = NULL; + while ((status = loadline(L)) != -1) { + if (status == 0) status = docall(L, 0, 0); + report(L, status); + if(repl_stop) + break; + if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ + lua_getglobal(L, "print"); + lua_insert(L, 1); + if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) + l_message(progname, lua_pushfstring(L, + "error calling " LUA_QL("print") " (%s)", + lua_tostring(L, -1))); + } + } + lua_settop(L, 0); /* clear stack */ + fputs("\n", stdout); + fflush(stdout); + progname = oldprogname; +} +#endif + +//--- +//--- + +//--- +// CHiLL program main +// Initialize state and run script or interactive mode +//--- +int main( int argc, char* argv[] ) +{ + DEBUG_PRINT("%s main()\n", argv[0]); + if (argc > 2) { + fprintf(stderr, "Usage: %s [script_file]\n", argv[0]); + exit(-1); + } + + int fail = 0; + +#ifdef PYTHON + // Create PYTHON interpreter + /* Pass argv[0] to the Python interpreter */ + Py_SetProgramName(argv[0]); + + /* Initialize the Python interpreter. Required. */ + Py_Initialize(); + + /* Add a static module */ + initchill(); + + if (argc == 2) { +/* #ifdef CUDACHILL --- This code is for translating lua to python before interprating. --- + //DEBUG_PRINT("\ncalling python\n"); + // file interpretlua.py has routines to read the lua transformation file + PyRun_SimpleString("from interpretlua import *"); + //DEBUG_PRINT("DONE calling python import of functions\n\n"); + char pythoncommand[800]; + sprintf(pythoncommand, "\n\ndopytransform(\"%s\")\0", argv[1]); + //DEBUG_PRINT("in C, running python command '%s'\n", pythoncommand); + + PyRun_SimpleString( pythoncommand ); + #else*/ + FILE* f = fopen(argv[1], "r"); + if(!f){ + printf("can't open script file \"%s\"\n", argv[1]); + exit(-1); + } + PyRun_SimpleFile(f, argv[1]); + fclose(f); + } + if (argc == 1) { + //--- + // Run a CHiLL interpreter + //--- + printf("CHiLL v0.2.1 (built on %s)\n", CHILL_BUILD_DATE); + printf("Copyright (C) 2008 University of Southern California\n"); + printf("Copyright (C) 2009-2012 University of Utah\n"); + //is_interactive = true; // let the lua interpreter know. + fflush(stdout); + // TODO: read lines of python code. + //Not sure if we should set fail from interactive mode + printf("CHiLL ending...\n"); + fflush(stdout); + } + + //printf("DONE with PyRun_SimpleString()\n"); +// #endif --- endif for CUDACHILL --- +#endif + //END python setup +#ifdef LUA + + //Create interpreter + lua_State* L = lua_open(); + globalL = L; + + //Initialize the std libs + luaL_openlibs(L); + + //Initialize globals + register_globals(L); + + //Register CHiLL functions + register_functions(L); + + if (argc == 2) { + //--- + // Run a CHiLL script from a file + //--- + + //Check that the file can be opened + FILE* f = fopen(argv[1],"r"); + if(!f){ + printf("can't open script file \"%s\"\n", argv[1]); + exit(-1); + } + fclose(f); + + DEBUG_PRINT("\n*********************evaluating file '%s'\n", argv[1]); + + //Evaluate the file + fail = dofile(L, argv[1]); + if(!fail){ + fprintf(stderr, "script success!\n"); + } + } + if (argc == 1 && isatty((int)fileno(stdin))) { + //--- + // Run a CHiLL interpreter + //--- + printf("CUDA-CHiLL v0.2.1 (built on %s)\n", CHILL_BUILD_DATE); + printf("Copyright (C) 2008 University of Southern California\n"); + printf("Copyright (C) 2009-2012 University of Utah\n"); + is_interactive = true; // let the lua interpreter know. + fflush(stdout); + dotty(L); + //Not sure if we should set fail from interactive mode + printf("CUDA-CHiLL ending...\n"); + fflush(stdout); + } +#endif + + + if (!fail && ir_code != NULL && myloop != NULL && myloop->stmt.size() != 0 && !myloop->stmt[0].xform.is_null()) { +#ifdef CUDACHILL + int lnum; + #ifdef PYTHON + lnum = 0; + #else + lnum = get_loop_num( L ); + #endif + #ifdef BUILD_ROSE + ((IR_cudaroseCode *)(ir_code))->commit_loop(myloop, lnum); + #elif BUILD_SUIF + ((IR_cudasuifCode *)(ir_code))->commit_loop(myloop, lnum); + #endif +#else + int lnum_start; + int lnum_end; + #ifdef PYTHON + lnum_start = get_loop_num_start(); + lnum_end = get_loop_num_end(); + DEBUG_PRINT("calling ROSE code gen? loop num %d\n", lnum); + #else + lnum_start = get_loop_num_start(L); + lnum_end = get_loop_num_end(L); + DEBUG_PRINT("calling ROSE code gen? loop num %d - %d\n", lnum_start, lnum_end); + #endif +#endif + #ifdef BUILD_ROSE + finalize_loop(lnum_start, lnum_end); + //((IR_roseCode*)(ir_cide))->commit_loop(myloop, lnum); + ((IR_roseCode*)(ir_code))->finalizeRose(); + //#elif BUILD_SUIF + //((IR_suifCode*)(ir_code))->commit_loop(myloop, lnum); + #endif + delete ir_code; + } +#ifdef PYTHON + Py_Finalize(); +#endif +#ifdef LUA + lua_close(L); +#endif + return 0; +} diff --git a/chill/src/chill_run_util.cc b/chill/src/chill_run_util.cc new file mode 100644 index 0000000..566bc61 --- /dev/null +++ b/chill/src/chill_run_util.cc @@ -0,0 +1,129 @@ +#include <stdio.h> +#include <string.h> +#include "chill_run_util.hh" + +static std::string to_string(int ival) { + char buffer[4]; + sprintf(buffer, "%d", ival); + return std::string(buffer); +} + +simap_vec_t* make_prog(simap_vec_t* cond) { + return cond; +} + +simap_vec_t* make_cond_gt(simap_t* lhs, simap_t* rhs) { + simap_vec_t* nvec = new simap_vec_t(); + for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) + (*lhs)[it->first] -= it->second; + (*lhs)[to_string(0)] -= 1; + nvec->push_back(*lhs); + delete rhs; + delete lhs; + return nvec; +} + +simap_vec_t* make_cond_lt(simap_t* lhs, simap_t* rhs) { + return make_cond_gt(rhs, lhs); +} + +simap_vec_t* make_cond_ge(simap_t* lhs, simap_t* rhs) { + simap_vec_t* nvec = new simap_vec_t(); + for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) + (*lhs)[it->first] -= it->second; + nvec->push_back(*lhs); + delete rhs; + delete lhs; + return nvec; +} + +simap_vec_t* make_cond_le(simap_t* lhs, simap_t* rhs) { + return make_cond_ge(rhs, lhs); +} + +simap_vec_t* make_cond_eq(simap_t* lhs, simap_t* rhs) { + simap_vec_t* nvec = new simap_vec_t(); + for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) + (*rhs)[it->first] -= it->second; + nvec->push_back(*rhs); + for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) + it->second = -it->second; + nvec->push_back(*rhs); + delete rhs; + delete lhs; + return nvec; +} + +simap_t* make_cond_item_add(simap_t* lhs, simap_t* rhs) { + for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) + (*rhs)[it->first] += it->second; + delete lhs; + return rhs; +} + +simap_t* make_cond_item_sub(simap_t* lhs, simap_t* rhs) { + for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) + (*rhs)[it->first] -= it->second; + delete lhs; + return rhs; +} + +simap_t* make_cond_item_mul(simap_t* lhs, simap_t* rhs) { + (*lhs)[to_string(0)] += 0; + (*rhs)[to_string(0)] += 0; + if(rhs->size() == 1) { + int t = (*rhs)[to_string(0)]; + for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) + it->second *= t; + delete rhs; + return lhs; + } + else if(rhs->size() == 1) { + int t = (*lhs)[to_string(0)]; + for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) + it->second *= t; + delete lhs; + return rhs; + } + else { + fprintf(stderr, "require Presburger formula"); + delete lhs; + delete rhs; + // exit(2); <-- this may be a boost feature + } +} + +simap_t* make_cond_item_neg(simap_t* expr) { + for (simap_t::iterator it = expr->begin(); it != expr->end(); it++) { + it->second = -(it->second); + } + return expr; +} + +simap_t* make_cond_item_number(int n) { + simap_t* nmap = new simap_t(); + (*nmap)[to_string(0)] = n; + return nmap; +} + +simap_t* make_cond_item_variable(const char* var) { + simap_t* nmap = new simap_t(); + (*nmap)[std::string(var)] = 1; + return nmap; +} + +simap_t* make_cond_item_level(int n) { + simap_t* nmap = new simap_t(); + (*nmap)[to_string(n)] = 1; + return nmap; +} + +/*simap_t* make_cond_item_variable(const char* varname) { + simap_t* nmap = new simap_t(); +#ifdef PYTHON + PyObject* globals = PyEval_GetGlobals(); + PyObject* itemval = PyDict_GetItemString(globals, varname); + +#elif LUA +#endif +}*/ diff --git a/chill/src/chillmodule.cc b/chill/src/chillmodule.cc new file mode 100644 index 0000000..fbeb477 --- /dev/null +++ b/chill/src/chillmodule.cc @@ -0,0 +1,1834 @@ + +// chill interface to python + +#include "chilldebug.h" + +#ifdef CUDACHILL + +#include "rose.h" // ?? +#include "loop_cuda_rose.hh" +#include "ir_rose.hh" +#include "ir_cudarose.hh" + +#include <vector> + +#else + +#include "chill_run_util.hh" + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <omega.h> +#include "loop.hh" +#include "ir_code.hh" +#ifdef BUILD_ROSE +#include "ir_rose.hh" +#elif BUILD_SUIF +#include "ir_suif.hh" +#endif + +#endif + +#include "chillmodule.hh" + +// TODO +#undef _POSIX_C_SOURCE +#undef _XOPEN_SOURCE +#include <Python.h> + +using namespace omega; + +// -- Cuda CHiLL global variables -- +#ifdef CUDACHILL + +extern LoopCuda *myloop; +extern IR_Code *ir_code; +extern std::vector<IR_Control *> ir_controls; +extern std::vector<int> loops; + +#else + +extern Loop *myloop; +extern IR_Code *ir_code; +extern bool is_interactive; +extern bool repl_stop; + +std::string procedure_name; +std::string source_filename; + +int loop_start_num; +int loop_end_num; + +extern std::vector<IR_Control *> ir_controls; +extern std::vector<int> loops; + +#endif + +// ----------------------- // +// CHiLL support functions // +// ----------------------- // +#ifndef CUDACHILL +// not sure yet if this actually needs to be exposed to the python interface +// these four functions are here to maintain similarity to the Lua interface +int get_loop_num_start() { + return loop_start_num; +} + +int get_loop_num_end() { + return loop_end_num; +} + +static void set_loop_num_start(int start_num) { + loop_start_num = start_num; +} + +static void set_loop_num_end(int end_num) { + loop_end_num = end_num; +} + +// TODO: finalize_loop(int,int) and init_loop(int,int) are identical to thier Lua counterparts. +// consider integrating them + +void finalize_loop(int loop_num_start, int loop_num_end) { + if (loop_num_start == loop_num_end) { + ir_code->ReplaceCode(ir_controls[loops[loop_num_start]], myloop->getCode()); + ir_controls[loops[loop_num_start]] = NULL; + } + else { + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) + parm.push_back(ir_controls[i]); + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + ir_code->ReplaceCode(block, myloop->getCode()); + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + delete ir_controls[i]; + ir_controls[i] = NULL; + } + } + delete myloop; +} +void finalize_loop() { + int loop_num_start = get_loop_num_start(); + int loop_num_end = get_loop_num_end(); + finalize_loop(loop_num_start, loop_num_end); +} +static void init_loop(int loop_num_start, int loop_num_end) { + if (source_filename.empty()) { + fprintf(stderr, "source file not set when initializing the loop"); + if (!is_interactive) + exit(2); + } + else { + if (ir_code == NULL) { + #ifdef BUILD_ROSE + if (procedure_name.empty()) + procedure_name = "main"; + #elif BUILD_SUIF + if (procedure_number == -1) + procedure_number = 0; + #endif + + #ifdef BUILD_ROSE + ir_code = new IR_roseCode(source_filename.c_str(), procedure_name.c_str()); + #elif BUILD_SUIF + ir_code = new IR_suifCode(source_filename.c_str(), procedure_name.c_str()); + #endif + + IR_Block *block = ir_code->GetCode(); + ir_controls = ir_code->FindOneLevelControlStructure(block); + for (int i = 0; i < ir_controls.size(); i++) { + if (ir_controls[i]->type() == IR_CONTROL_LOOP) + loops.push_back(i); + } + delete block; + } + if (myloop != NULL && myloop->isInitialized()) { + finalize_loop(); + } + } + set_loop_num_start(loop_num_start); + set_loop_num_end(loop_num_end); + if (loop_num_end < loop_num_start) { + fprintf(stderr, "the last loop must be after the start loop"); + if (!is_interactive) + exit(2); + } + if (loop_num_end >= loops.size()) { + fprintf(stderr, "loop %d does not exist", loop_num_end); + if (!is_interactive) + exit(2); + } + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + if (ir_controls[i] == NULL) { + fprintf(stderr, "loop has already been processed"); + if (!is_interactive) + exit(2); + } + parm.push_back(ir_controls[i]); + } + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + myloop = new Loop(block); + delete block; + //if (is_interactive) printf("%s ", PROMPT_STRING); +} +#endif + +// ----------------------- // +// Python support funcions // +// ----------------------- // + +// -- CHiLL support -- // +static void strict_arg_num(PyObject* args, int arg_num, const char* fname = NULL) { + int arg_given = PyTuple_Size(args); + char msg[128]; + if(arg_num != arg_given) { + if(fname) + sprintf(msg, "%s: expected %i arguments, was given %i.", fname, arg_num, arg_given); + else + sprintf(msg, "Expected %i argumets, was given %i.", arg_num, arg_given); + throw std::runtime_error(msg); + } +} + +static int strict_arg_range(PyObject* args, int arg_min, int arg_max, const char* fname = NULL) { + int arg_given = PyTuple_Size(args); + char msg[128]; + if(arg_given < arg_min || arg_given > arg_max) { + if(fname) + sprintf(msg, "%s: expected %i to %i arguments, was given %i.", fname, arg_min, arg_max, arg_given); + else + sprintf(msg, "Expected %i to %i, argumets, was given %i.", arg_min, arg_max, arg_given); + throw std::runtime_error(msg); + } + return arg_given; +} + +static int intArg(PyObject* args, int index, int dval = 0) { + if(PyTuple_Size(args) <= index) + return dval; + int ival; + PyObject *item = PyTuple_GetItem(args, index); + Py_INCREF(item); + if (PyInt_Check(item)) ival = PyInt_AsLong(item); + else { + fprintf(stderr, "argument at index %i is not an int\n", index); + exit(-1); + } + return ival; +} + +static std::string strArg(PyObject* args, int index, const char* dval = NULL) { + if(PyTuple_Size(args) <= index) + return dval; + std::string strval; + PyObject *item = PyTuple_GetItem(args, index); + Py_INCREF(item); + if (PyString_Check(item)) strval = strdup(PyString_AsString(item)); + else { + fprintf(stderr, "argument at index %i is not an string\n", index); + exit(-1); + } + return strval; +} + +static bool boolArg(PyObject* args, int index, bool dval = false) { + if(PyTuple_Size(args) <= index) + return dval; + bool bval; + PyObject* item = PyTuple_GetItem(args, index); + Py_INCREF(item); + return (bool)PyObject_IsTrue(item); +} + +static bool tostringintmapvector(PyObject* args, int index, std::vector<std::map<std::string,int> >& vec) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq = PyTuple_GetItem(args, index); + //TODO: Typecheck + int seq_len = PyList_Size(seq); + for(int i = 0; i < seq_len; i++) { + std::map<std::string,int> map; + PyObject* dict = PyList_GetItem(seq, i); + PyObject* keys = PyDict_Keys(dict); + //TODO: Typecheck + int dict_len = PyList_Size(keys); + for(int j = 0; j < dict_len; j++) { + PyObject* key = PyList_GetItem(keys, j); + PyObject* value = PyDict_GetItem(dict, key); + std::string str_key = strdup(PyString_AsString(key)); + int int_value = PyInt_AsLong(value); + map[str_key] = int_value; + } + vec.push_back(map); + } + return true; +} + +static bool tointvector(PyObject* seq, std::vector<int>& vec) { + //TODO: Typecheck + int seq_len = PyList_Size(seq); + for(int i = 0; i < seq_len; i++) { + PyObject* item = PyList_GetItem(seq, i); + vec.push_back(PyInt_AsLong(item)); + } + return true; +} + +static bool tointvector(PyObject* args, int index, std::vector<int>& vec) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq = PyTuple_GetItem(args, index); + return tointvector(seq, vec); +} + +static bool tointset(PyObject* args, int index, std::set<int>& set) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq = PyTuple_GetItem(args, index); + //TODO: Typecheck + int seq_len = PyList_Size(seq); + for(int i = 0; i < seq_len; i++) { + PyObject* item = PyList_GetItem(seq, i); + set.insert(PyInt_AsLong(item)); + } + return true; +} +static bool tointmatrix(PyObject* args, int index, std::vector<std::vector<int> >& mat) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq_one = PyTuple_GetItem(args, index); + int seq_one_len = PyList_Size(seq_one); + for(int i = 0; i < seq_one_len; i++) { + std::vector<int> vec; + PyObject* seq_two = PyList_GetItem(seq_one, i); + int seq_two_len = PyList_Size(seq_two); + for(int j = 0; j < seq_two_len; j++) { + PyObject* item = PyList_GetItem(seq_two, j); + vec.push_back(PyInt_AsLong(item)); + } + mat.push_back(vec); + } + return true; +} + +#ifdef CUDACHILL +// ------------------------------ // +// Cuda CHiLL interface functions // +// ------------------------------ // + +static PyObject * +chill_print_code(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC print_code() PY\n"); + + myloop->printCode(); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); + +} + +static PyObject * +chill_print_ri(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_print_ri() called from python\n"); + myloop->printRuntimeInfo(); + DEBUG_PRINT("\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_print_idx(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_print_idx() called from python\n"); + myloop->printIndexes(); + DEBUG_PRINT("\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_print_dep(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_print_dep()\n"); + std::cout << myloop->dep; + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_print_space(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_print_space()\n"); + for (int i = 0; i < myloop->stmt.size(); i++) { + DEBUG_PRINT("s%d: ", i+1); + Relation r; + if (!myloop->stmt[i].xform.is_null()) + r = Composition(copy(myloop->stmt[i].xform), copy(myloop->stmt[i].IS)); + else + r = copy(myloop->stmt[i].IS); + r.simplify(2, 4); + r.print(); + } + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_num_statements(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_num_statements() called from python\n"); + int num = myloop->stmt.size(); + //DEBUG_PRINT("C num_statement() = %d\n", num); + return Py_BuildValue( "i", num ); // BEWARE "d" is DOUBLE, not int +} + +static PyObject * +chill_does_var_exist( PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_does_var_exist()\n"); + int yesno = 0; + // TODO if (myloop->symbolExists(symName)) yesno = 1; + DEBUG_PRINT("*** chill_does_var_exist *** UNIMPLEMENTED\n"); + return Py_BuildValue( "i", yesno); // there seems to be no boolean type +} + + +static PyObject * +chill_add_sync(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_add_sync() *UNTESTED*\n"); + int sstmt = -123; + // char index_name[180]; + static char Buffer[1024]; + static char *index_name = &Buffer[0]; + + if (!PyArg_ParseTuple(args, "is", &sstmt, &index_name)){ + fprintf(stderr, "chill_add_sync, can't parse statement number and name passed from python\n"); + exit(-1); + } + + DEBUG_PRINT("chill_add_sync, statement %d index_name '%s'\n", + sstmt, index_name); + std::string idxName( index_name); // ?? + myloop->addSync(sstmt, idxName); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_rename_index(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_rename_index() called from python\n"); + int sstmt; + //char oldname[80], newname[80]; + static char old[1024], newn[1024]; + + static char *oldname = &old[0], *newname=&newn[0]; + + if (!PyArg_ParseTuple(args, "iss", &sstmt, &oldname, &newname)){ + fprintf(stderr, "chill_rename_index, can't parse statement number and names passed from python\n"); + exit(-1); + } + + //DEBUG_PRINT("chill_rename_index, statement %d oldname '%s' newname '%s'\n", + //sstmt, oldname, newname); + + std::string idxName(oldname); + std::string newName(newname); + + //DEBUG_PRINT("calling myloop->renameIndex( %d, %s, %s )\n", + //sstmt, idxName.c_str(), newName.c_str()); + + myloop->renameIndex(sstmt, idxName, newName); + + //DEBUG_PRINT("after myloop->renameIndex()\n"); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + + + +//THIS NEEDS TO MOVE + + + +static PyObject * +chill_permute_v2(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("C permute_v2()\n"); + //int tot = sizeof(args); + //int things = tot / sizeof(PyObject *); + //DEBUG_PRINT("tot %d bytes, %d things\n", tot, things); + + int sstmt = -123; + PyObject *pyObj; + + //if (!PyArg_ParseTuple( args, "iO", &sstmt, &pyObj)) { + //if (!PyArg_ParseTuple( args, "i", &sstmt)) { + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + Py_XINCREF(pyObj); + + // the ONLY arg is a tuple. figure out how big it is + int tupleSize = PyTuple_Size(pyObj); + //DEBUG_PRINT("%d things in order tuple\n", tupleSize); + + // first has to be the statement number + PyObject *tupleItem = PyTuple_GetItem(pyObj, 0); + Py_XINCREF(tupleItem); + if (PyInt_Check( tupleItem )) sstmt = PyInt_AsLong( tupleItem ); + else { + fflush(stdout); + fprintf(stderr, "first tuple item in chill_permute_v2 is not an int?\n"); + exit(-1); + } + + //DEBUG_PRINT("stmt %d\n", sstmt); + + char **strings; + std::vector<std::string> order; + std::string *cppstrptr; + std::string cppstr; + + strings = (char **) malloc( sizeof(char *) * tupleSize ) ; // too big + for (int i=1; i<tupleSize; i++) { + tupleItem = PyTuple_GetItem(pyObj, i); + Py_XINCREF(tupleItem); + int im1 = i-1; // offset needed for the actual string vector + if (PyString_Check( tupleItem)) { + strings[im1] = strdup(PyString_AsString(tupleItem)); + //DEBUG_PRINT("item %d = '%s'\n", i, strings[im1]); + //cppstrptr = new std::string( strings[im1] ); + //order.push_back( &(new std::string( strings[im1] ))); + //order.push_back( &cppstrptr ); + + cppstr = strings[im1]; + order.push_back( cppstr ); + } + else { + fprintf(stderr, "later parameter was not a string?\n"); + exit(-1); + } + + } + + myloop->permute_cuda(sstmt,order); + //DEBUG_PRINT("returned from permute_cuda()\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + + +static PyObject * +chill_tile_v2_3arg( PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("in chillmodule.cc, chill_tile_v2_3arg()\n"); + + int sstmt, level, tile_size, outer_level; + //char index_name[80], control_name[80]; + static char *index_name, *control_name; + int tiling_method; + + if (!PyArg_ParseTuple(args, "iii", &sstmt, &level, &outer_level)) { + fprintf(stderr,"chill_tile_v2, can't parse parameters passed from python\n"); + exit(-1); + } + + // 3 parameter version + //DEBUG_PRINT("chill_tile_v2( %d %d %d) (3 parameter version) \n", + //sstmt,level,outer_level); + myloop->tile_cuda(sstmt,level,outer_level); + //DEBUG_PRINT("chill_tile_v2 3 parameter version returning normally\n"); + Py_RETURN_NONE; +} + + +static PyObject * +chill_tile_v2_7arg( PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("in chillmodule.cc, chill_tile_v2_7arg()\n"); + + int sstmt, level, tile_size, outer_level; + //char index_name[80], control_name[80]; + static char iname[1024], cname[1024]; + static char *index_name = &iname[0], *control_name=&cname[0]; + int tiling_method; + + if (!PyArg_ParseTuple(args, "iiiissi", + &sstmt, &level, &tile_size, &outer_level, + &index_name, &control_name, &tiling_method)){ + fprintf(stderr, "chill_tile_v2_7arg, can't parse parameters passed from python\n"); + exit(-1); + } + + //DEBUG_PRINT("7 parameter version was called?\n"); + + // 7 parameter version was called + //DEBUG_PRINT("tile_v2( %d, %d, %d, %d ... )\n", + // sstmt, level, tile_size, outer_level); + + //DEBUG_PRINT("tile_v2( %d, %d, %d, %d, %s, %s, %d)\n", + //sstmt,level,tile_size,outer_level,index_name, control_name, tiling_method); + + TilingMethodType method = StridedTile; + if (tiling_method == 0) method = StridedTile; + else if (tiling_method == 1) method = CountedTile; + else fprintf(stderr, "ERROR: tile_v2 illegal tiling method, using StridedTile\n"); + + //DEBUG_PRINT("outer level %d\n", outer_level); + //DEBUG_PRINT("calling myloop->tile_cuda( %d, %d, %d, %d, %s, %s, method)\n", + // sstmt, level, tile_size, outer_level, index_name, control_name); + + // BUH level+1? + myloop->tile_cuda(sstmt, level, tile_size, outer_level, index_name, control_name, method); + Py_RETURN_NONE; +} + + +static PyObject * +chill_cur_indices(PyObject *self, PyObject *args) +{ + int stmt_num = -123; + if (!PyArg_ParseTuple(args, "i", &stmt_num)){ + fprintf(stderr, "chill_cur_indides, can't parse statement number passed from python\n"); + exit(-1); + } + //DEBUG_PRINT("cur_indices( %d )\n", stmt_num); + + char formatstring[1024]; + for (int i=0; i<1024; i++) formatstring[i] = '\0'; + + int num = myloop->idxNames[stmt_num].size(); + for(int i=0; i<num; i++){ + //DEBUG_PRINT("myloop->idxNames[%d] index %d = '%s'\n", + //stmt_num, i, myloop->idxNames[stmt_num][i].c_str()); + + // backwards, works because all entries are the same + //sprintf(formatstring, "i %s", formatstring); + strcat( formatstring, "s "); + // put this in a list or something to pass back to python + } + + int l = strlen(formatstring); + if (l > 0) formatstring[l-1] = '\0'; + + //DEBUG_PRINT("%d current indices, format string '%s'\n\n",num,formatstring); + //DEBUG_PRINT("%d current indices\n\n", num); + + //return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(),myloop->idxNames[stmt_num][1].c_str() ); + + // I don't know a clean way to do this. + if (num == 2) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str()); + if (num == 3) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str()); + if (num == 4) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str()); + if (num == 5) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str()); + if (num == 6) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str()); + if (num == 7) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str()); + if (num == 8) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str()); + if (num == 9) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str()); + if (num == 10) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str()); + if (num == 11) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str()); + if (num == 12) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str()); + if (num == 13) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str()); + if (num == 14) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str()); + if (num == 15) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str()); + if (num == 16) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str(), + myloop->idxNames[stmt_num][15].c_str()); + if (num == 17) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str(), + myloop->idxNames[stmt_num][15].c_str(), + myloop->idxNames[stmt_num][16].c_str()); + if (num == 18) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str(), + myloop->idxNames[stmt_num][15].c_str(), + myloop->idxNames[stmt_num][16].c_str(), + myloop->idxNames[stmt_num][17].c_str()); + + fprintf(stderr, "going to die horribly, num=%d\n", num); +} + + +static PyObject * +chill_block_indices(PyObject *self, PyObject *args) { + + // I'm unsure what the legal states are here + // is it always "bx", or ("bx" and "by") ? + int howmany = 0; + char *loopnames[2]; + if (myloop->cu_bx > 1) { + loopnames[howmany] = strdup("bx"); + howmany++; + } + if (myloop->cu_by > 1) { + loopnames[howmany] = strdup("by"); + howmany++; + } + + if (howmany == 0) return Py_BuildValue("()"); + if (howmany == 1) return Py_BuildValue("(s)", loopnames[0]); + if (howmany == 2) return Py_BuildValue("(ss)", loopnames[0], loopnames[1]); + fprintf(stderr, "chill_block_indices(), gonna die, howmany == %d", howmany); + exit(666); + + Py_RETURN_NONE; +} + +static PyObject * +chill_thread_indices(PyObject *self, PyObject *args) { + + // I'm unsure what the legal states are here + // is it always "tx", or ("tx" and "ty") or ("tx" and "ty" and "tz") ? + int howmany = 0; + char *loopnames[3]; + if (myloop->cu_tx > 1) { + loopnames[howmany++] = strdup("tx"); + } + if (myloop->cu_ty > 1) { + loopnames[howmany++] = strdup("ty"); + } + if (myloop->cu_tz > 1) { + loopnames[howmany++] = strdup("tz"); + } + + if (howmany == 0) return Py_BuildValue("()"); + if (howmany == 1) return Py_BuildValue("(s)", + loopnames[0]); + if (howmany == 2) return Py_BuildValue("(ss)", + loopnames[0], + loopnames[1]); + if (howmany == 3) return Py_BuildValue("(sss)", + loopnames[0], + loopnames[1], + loopnames[2]); + + fprintf(stderr, "chill_thread_indices(), gonna die, howmany == %d", howmany); + exit(999); +} + + + + + +static PyObject * +block_dims(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("block_dims() returning %d %d\n", myloop->cu_bx, myloop->cu_by); + Py_BuildValue( "i i", myloop->cu_bx, myloop->cu_by); +} + + +static PyObject * +thread_dims(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("thread_dims() returning %d %d %d\n", + //myloop->cu_tx, myloop->cu_ty, myloop->cu_tz); + + Py_BuildValue( "i i i", myloop->cu_tx, myloop->cu_ty, myloop->cu_tz); +} + + +static PyObject * +chill_hard_loop_bounds(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("hard_loop_bounds("); + int sstmt, level; // input parameters + int upper, lower; // output + + if (!PyArg_ParseTuple(args, "ii", &sstmt, &level)){ + fprintf(stderr, "hard_loop_bounds, "); + fprintf(stderr, "can't parse statement numbers passed from python\n"); + exit(-1); + } + //DEBUG_PRINT(" %d, %d )\n", sstmt, level); + + myloop->extractCudaUB(sstmt, level, upper, lower); + + //DEBUG_PRINT("lower %d upper %d\n", lower, upper); + + Py_BuildValue( "i i", lower, upper); +} + + +static PyObject * +chill_datacopy9(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\n\n\n***** datacopy_v2() 9ARGS\n"); + + int sstmt; + int level; + std::string cppstr; + std::string array_name; + std::vector<std::string> new_idxs; + bool allow_extra_read; + int fastest_changing_dimension; + int padding_stride; + int padding_alignment; + bool cuda_shared; + + PyObject *pyObj; + + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + Py_XINCREF( pyObj ); + + //if (PyList_Check(pyObj)) fprintf(stderr, "it's a list\n"); + //if (PyTuple_Check(pyObj)) fprintf(stderr, "it's a tuple\n"); + + + + // the ONLY arg is a tuple. figure out how big it is + int tupleSize = PyTuple_Size(pyObj); + //DEBUG_PRINT("%d things in object tuple\n", tupleSize); + + // first has to be the statement number + PyObject *tupleItem1 = PyTuple_GetItem(pyObj, 0); + Py_INCREF(tupleItem1); + if (PyInt_Check( tupleItem1)) sstmt = PyInt_AsLong( tupleItem1 ); + else { + fprintf(stderr, "second tuple item in chill_datacopy9 is not an int?\n"); + exit(-1); + } + //DEBUG_PRINT("stmt %d\n", sstmt); + + PyObject *tupleItem2 = PyTuple_GetItem(pyObj, 1); // second item is level + Py_INCREF(tupleItem2); + if (PyInt_Check( tupleItem2 )) level = PyInt_AsLong( tupleItem2); + else { + fprintf(stderr, "second tuple item in chill_datacopy9 is not an int?\n"); + exit(-1); + } + //DEBUG_PRINT("level %d\n", level ); + + // third item is array name + PyObject *tupleItem3 = PyTuple_GetItem(pyObj, 2); + Py_INCREF(tupleItem3); + array_name = strdup(PyString_AsString(tupleItem3)); + //DEBUG_PRINT("array name '%s'\n", array_name.c_str()); + + + // integer number of indices + PyObject *tupleItem4 = PyTuple_GetItem(pyObj, 3); + Py_INCREF(tupleItem4); + int numindex= PyInt_AsLong( tupleItem4 ); + //DEBUG_PRINT("%d indices\n", numindex); + + + PyObject *tupleItemTEMP; + for (int i=0; i<numindex; i++) { + tupleItemTEMP = PyTuple_GetItem(pyObj, 4+i); + Py_INCREF(tupleItemTEMP); + cppstr = strdup(PyString_AsString(tupleItemTEMP)); + new_idxs.push_back( cppstr ); + //DEBUG_PRINT("%s\n", cppstr.c_str()); + } + + PyObject *tupleItem5 = PyTuple_GetItem(pyObj, 4+numindex); + Py_INCREF(tupleItem5); + allow_extra_read = PyInt_AsLong( tupleItem5 ); + + PyObject *tupleItem6 = PyTuple_GetItem(pyObj, 5+numindex); + Py_INCREF(tupleItem6); + fastest_changing_dimension = PyInt_AsLong( tupleItem6 ); + + PyObject *tupleItem7 = PyTuple_GetItem(pyObj, 6+numindex); + Py_INCREF(tupleItem7); + padding_stride = PyInt_AsLong( tupleItem7 ); + + PyObject *tupleItem8 = PyTuple_GetItem(pyObj, 7+numindex); + Py_INCREF(tupleItem8); + padding_alignment = PyInt_AsLong( tupleItem8 ); + + PyObject *tupleItem9 = PyTuple_GetItem(pyObj, 8+numindex); + Py_INCREF(tupleItem9); + cuda_shared = PyInt_AsLong( tupleItem9 ); + + + //DEBUG_PRINT("calling myloop->datacopy_cuda()\n"); + + // corruption happenes in here??? + myloop->datacopy_cuda(sstmt, level, array_name, new_idxs, + allow_extra_read, fastest_changing_dimension, + padding_stride, padding_alignment, cuda_shared); + + DEBUG_PRINT("before attempt (after actual datacopy)\n"); + //myloop->printCode(); // attempt to debug + DEBUG_PRINT("back from attempt\n"); + + //DEBUG_PRINT("datacopy_9args returning\n"); + + Py_RETURN_NONE; +} + + + + + +static PyObject * +chill_datacopy_privatized(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("C datacopy_privatized\n"); + PyObject *pyObj; + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + + PyObject *tupleItem = PyTuple_GetItem(pyObj, 0); // statement number + Py_XINCREF(tupleItem); + int sstmt = PyInt_AsLong( tupleItem ); + + tupleItem = PyTuple_GetItem(pyObj, 1); // start_loop + Py_XINCREF(tupleItem); + std::string start_loop = strdup(PyString_AsString(tupleItem)); + int level = myloop->findCurLevel(sstmt, start_loop); + + + tupleItem = PyTuple_GetItem(pyObj, 2); // array_name + Py_XINCREF(tupleItem); + std::string array_name = strdup(PyString_AsString(tupleItem)); + + // things to hold constant - first a count, then the things + tupleItem = PyTuple_GetItem(pyObj, 3); // how many things in the array + Py_XINCREF(tupleItem); + int howmany = PyInt_AsLong( tupleItem ); + + //DEBUG_PRINT("%d things to hold constant: ", howmany); + std::vector<std::string> holdconstant; + std::string cppstr; + + for (int i=0; i<howmany; i++) { + tupleItem = PyTuple_GetItem(pyObj, 4+i); + Py_XINCREF(tupleItem); + cppstr = strdup(PyString_AsString(tupleItem)); + holdconstant.push_back( cppstr ); // add at end + } + + std::vector<int> privatized_levels(howmany); + for(int i=0; i<howmany; i++) { + privatized_levels[i] = myloop->findCurLevel(sstmt, holdconstant[i]); + //DEBUG_PRINT("privatized_levels[ %d ] = %d\n", i, privatized_levels[i] ); + } + + bool allow_extra_read = false; + int fastest_changing_dimension = -1; + int padding_stride = 1; + int padding_alignment = 1; + bool cuda_shared = false; + + + myloop->datacopy_privatized_cuda(sstmt, level, array_name, privatized_levels, + allow_extra_read, fastest_changing_dimension, + padding_stride, padding_alignment, + cuda_shared); + + + Py_RETURN_NONE; +} + + + + + + +static PyObject * +chill_unroll(PyObject *self, PyObject *args) +{ + int sstmt, level, unroll_amount; + + if (!PyArg_ParseTuple(args, "iii", &sstmt, &level, &unroll_amount)) { + fprintf(stderr, "chill_unroll, can't parse parameters passed from python\n"); + exit(-1); + } + + //DEBUG_PRINT("chill_unroll( %d, %d, %d)\n", sstmt, level, unroll_amount ); + bool does_expand = myloop->unroll_cuda(sstmt,level,unroll_amount); + + // TODO return the boolean? + Py_RETURN_NONE; +} + + + + +static PyObject * +chill_cudaize_v2(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("cudaize_v2\n"); + PyObject *pyObj; + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + + // the ONLY arg is a tuple. figure out how big it is + int tupleSize = PyTuple_Size(pyObj); + //DEBUG_PRINT("%d things in tuple\n", tupleSize); + + PyObject *tupleItem = PyTuple_GetItem(pyObj, 0); //the kernel name + Py_XINCREF(tupleItem); + std::string kernel_name = strdup(PyString_AsString(tupleItem)); + + std::map<std::string, int> array_sizes; + tupleItem = PyTuple_GetItem(pyObj, 1); // number of array sizes + Py_XINCREF(tupleItem); + int numarraysizes = PyInt_AsLong( tupleItem ); + + std::string cppstr; + int offset = 2; + for (int i=0; i<numarraysizes; i++) { + tupleItem = PyTuple_GetItem(pyObj, offset++); + Py_XINCREF(tupleItem); + cppstr = strdup(PyString_AsString(tupleItem)); + tupleItem = PyTuple_GetItem(pyObj, offset++); // integer size + int siz = PyInt_AsLong( tupleItem ); + + //DEBUG_PRINT("arraysize for %s = %d\n", cppstr.c_str(), siz); + array_sizes.insert( std::make_pair( cppstr, siz )); + } + + + std::vector<std::string> blockIdxs; + tupleItem = PyTuple_GetItem(pyObj, offset++); // integer number of blocks + Py_XINCREF(tupleItem); + int numblocks = PyInt_AsLong( tupleItem ); + //DEBUG_PRINT("%d blocks\n", numblocks); + for (int i=0; i<numblocks; i++) { + tupleItem = PyTuple_GetItem(pyObj, offset++); + cppstr = strdup(PyString_AsString(tupleItem)); + blockIdxs.push_back( cppstr ); + //DEBUG_PRINT("%s\n", cppstr.c_str()); + } + + std::vector<std::string> threadIdxs; + tupleItem = PyTuple_GetItem(pyObj, offset++); // integer number of threads + Py_XINCREF(tupleItem); + int numthreads= PyInt_AsLong( tupleItem ); + //DEBUG_PRINT("%d threads\n", numthreads); + for (int i=0; i<numthreads; i++) { + tupleItem = PyTuple_GetItem(pyObj, offset++); + Py_XINCREF(tupleItem); + cppstr = strdup(PyString_AsString(tupleItem)); + threadIdxs.push_back( cppstr ); + //DEBUG_PRINT("%s\n", cppstr.c_str()); + } + + + myloop->cudaize_v2(kernel_name, array_sizes, blockIdxs, threadIdxs); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + + + +static PyObject *get_loop_num() { + // TODO get_loop_num() it's a global value? + fprintf(stderr, "get_loop_num() UNIMPLEMENTED\n"); + exit(-1); +} + + + + +static PyObject * +chill_copy_to_texture(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("C copy_to_texture() called from python \n"); + const char *array_name; + if (!PyArg_ParseTuple(args, "s", &array_name)){ + fprintf(stderr, "chill_copy_to_texture can't parse array name\n"); + exit(-1); + } + //DEBUG_PRINT("array name = %s\n", array_name); + myloop->copy_to_texture(array_name); + + Py_RETURN_NONE; +} + + + + + + + +static PyObject * +chill_init(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("C chill_init() called from python as read_IR()\n"); + DEBUG_PRINT("C init( "); + const char *filename; + const char *procname; + if (!PyArg_ParseTuple(args, "ss", &filename, &procname)){ + fprintf(stderr, "umwut? can't parse file name and procedure name?\n"); + exit(-1); + } + + int loop_num = 0; + + DEBUG_PRINT("%s, 0, 0 )\n", filename); + + DEBUG_PRINT("GETTING IR CODE in chill_init() in chillmodule.cc\n"); + DEBUG_PRINT("ir_code = new IR_cudaroseCode(%s, %s);\n",filename, procname); + ir_code = new IR_cudaroseCode(filename, procname); //this produces 15000 lines of output + fflush(stdout); + + + + + //protonu--here goes my initializations + //A lot of this code was lifted from Chun's parser.yy + //the plan is now to create the LoopCuda object directly + IR_Block *block = ir_code->GetCode(); + DEBUG_PRINT("ir_code->FindOneLevelControlStructure(block); chillmodule.cc\n"); + ir_controls = ir_code->FindOneLevelControlStructure(block); + + int loop_count = 0; + for (int i = 0; i < ir_controls.size(); i++) { + if (ir_controls[i]->type() == IR_CONTROL_LOOP) { + loops.push_back(i); + loop_count++; + } + } + delete block; + + + std::vector<IR_Control *> parm; + for(int j = 0; j < loop_count; j++) + parm.push_back(ir_controls[loops[j]]); + + + DEBUG_PRINT("block = ir_code->MergeNeighboringControlStructures(parm);\n"); + block = ir_code->MergeNeighboringControlStructures(parm); + + //DEBUG_PRINT("myloop = new LoopCuda(block, loop_num); in chillmodule.cc\n"); + myloop = new LoopCuda(block, loop_num); + fflush(stdout); DEBUG_PRINT("back\n"); + delete block; + + //end-protonu + + fflush(stdout); + DEBUG_PRINT("myloop->original();\n"); + myloop->original(); + fflush(stdout); + DEBUG_PRINT("myloop->useIdxNames=true;\n"); + myloop->useIdxNames=true;//Use idxName in code_gen + //register_v2(L); + + fflush(stdout); + DEBUG_PRINT("chill_init DONE\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); + +} + +#else +// ------------------------- // +// CHiLL interface functions // +// ------------------------- // + +static PyObject* chill_source(PyObject* self, PyObject* args) { + strict_arg_num(args, 1, "source"); + source_filename = strArg(args, 0); + Py_RETURN_NONE; +} + +static PyObject* chill_procedure(PyObject* self, PyObject* args) { + if(!procedure_name.empty()) { + fprintf(stderr, "only one procedure can be handled in a script"); + if(!is_interactive) + exit(2); + } + procedure_name = strArg(args, 0); + Py_RETURN_NONE; +} + +static PyObject* chill_loop(PyObject* self, PyObject* args) { + // loop (n) + // loop (n:m) + + int nargs = PyTuple_Size(args); + int start_num; + int end_num; + if(nargs == 1) { + start_num = intArg(args, 0); + end_num = start_num; + } + else if(nargs == 2) { + start_num = intArg(args, 0); + end_num = intArg(args, 1); + } + else { + fprintf(stderr, "loop takes one or two arguments"); + if(!is_interactive) + exit(2); + } + set_loop_num_start(start_num); + set_loop_num_end(end_num); + init_loop(start_num, end_num); + Py_RETURN_NONE; +} + +static PyObject* chill_print_code(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "print_code"); + myloop->printCode(); + printf("\n"); + Py_RETURN_NONE; +} + +static PyObject* chill_print_dep(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "print_dep"); + myloop->printDependenceGraph(); + Py_RETURN_NONE; +} + +static PyObject* chill_print_space(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "print_space"); + myloop->printIterationSpace(); + Py_RETURN_NONE; +} + +static PyObject* chill_exit(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "exit"); + repl_stop = true; + Py_RETURN_NONE; +} + +static void add_known(std::string cond_expr) { + int num_dim = myloop->known.n_set(); + std::vector<std::map<std::string, int> >* cond; + cond = parse_relation_vector(cond_expr.c_str()); + + Relation rel(num_dim); + F_And *f_root = rel.add_and(); + for (int j = 0; j < cond->size(); j++) { + GEQ_Handle h = f_root->add_GEQ(); + for (std::map<std::string, int>::iterator it = (*cond)[j].begin(); it != (*cond)[j].end(); it++) { + try { + int dim = from_string<int>(it->first); + if (dim == 0) + h.update_const(it->second); + else + throw std::invalid_argument("only symbolic variables are allowed in known condition"); + } + catch (std::ios::failure e) { + Free_Var_Decl *g = NULL; + for (unsigned i = 0; i < myloop->freevar.size(); i++) { + std::string name = myloop->freevar[i]->base_name(); + if (name == it->first) { + g = myloop->freevar[i]; + break; + } + } + if (g == NULL) + throw std::invalid_argument("symbolic variable " + it->first + " not found"); + else + h.update_coef(rel.get_local(g), it->second); + } + } + } + myloop->addKnown(rel); +} + +static PyObject* chill_known(PyObject* self, PyObject* args) { + strict_arg_num(args, 1, "known"); + if (PyList_Check(PyTuple_GetItem(args, 0))) { + PyObject* list = PyTuple_GetItem(args, 0); + for (int i = 0; i < PyList_Size(list); i++) { + add_known(std::string(PyString_AsString(PyList_GetItem(list, i)))); + } + } + else { + add_known(strArg(args, 0)); + } + Py_RETURN_NONE; +} + +static PyObject* chill_remove_dep(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "remove_dep"); + int from = intArg(args, 0); + int to = intArg(args, 1); + myloop->removeDependence(from, to); + Py_RETURN_NONE; +} + +static PyObject* chill_original(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "original"); + myloop->original(); + Py_RETURN_NONE; +} + +static PyObject* chill_permute(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 1, 3, "permute"); + if((nargs < 1) || (nargs > 3)) + throw std::runtime_error("incorrect number of arguments in permute"); + if(nargs == 1) { + // premute ( vector ) + std::vector<int> pi; + if(!tointvector(args, 0, pi)) + throw std::runtime_error("first arg in permute(pi) must be an int vector"); + myloop->permute(pi); + } + else if (nargs == 2) { + // permute ( set, vector ) + std::set<int> active; + std::vector<int> pi; + if(!tointset(args, 0, active)) + throw std::runtime_error("the first argument in permute(active, pi) must be an int set"); + if(!tointvector(args, 1, pi)) + throw std::runtime_error("the second argument in permute(active, pi) must be an int vector"); + myloop->permute(active, pi); + } + else if (nargs == 3) { + int stmt_num = intArg(args, 1); + int level = intArg(args, 2); + std::vector<int> pi; + if(!tointvector(args, 3, pi)) + throw std::runtime_error("the third argument in permute(stmt_num, level, pi) must be an int vector"); + myloop->permute(stmt_num, level, pi); + } + Py_RETURN_NONE; +} + +static PyObject* chill_pragma(PyObject* self, PyObject* args) { + strict_arg_num(args, 3, "pragma"); + int stmt_num = intArg(args, 1); + int level = intArg(args, 1); + std::string pragmaText = strArg(args, 2); + myloop->pragma(stmt_num, level, pragmaText); + Py_RETURN_NONE; +} + +static PyObject* chill_prefetch(PyObject* self, PyObject* args) { + strict_arg_num(args, 3, "prefetch"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + std::string prefetchText = strArg(args, 2); + int hint = intArg(args, 3); + myloop->prefetch(stmt_num, level, prefetchText, hint); + Py_RETURN_NONE; +} + +static PyObject* chill_tile(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 3, 7, "tile"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int tile_size = intArg(args, 2); + if(nargs == 3) { + myloop->tile(stmt_num, level, tile_size); + } + else if(nargs >= 4) { + int outer_level = intArg(args, 3); + if(nargs >= 5) { + TilingMethodType method = StridedTile; + int imethod = intArg(args, 4, 2); //< don't know if a default value is needed + // check method input against expected values + if (imethod == 0) + method = StridedTile; + else if (imethod == 1) + method = CountedTile; + else + throw std::runtime_error("5th argument must be either strided or counted"); + if(nargs >= 6) { + int alignment_offset = intArg(args, 5); + if(nargs == 7) { + int alignment_multiple = intArg(args, 6, 1); + myloop->tile(stmt_num, level, tile_size, outer_level, method, alignment_offset, alignment_multiple); + } + if(nargs == 6) + myloop->tile(stmt_num, level, tile_size, outer_level, method, alignment_offset); + } + if(nargs == 5) + myloop->tile(stmt_num, level, tile_size, outer_level, method); + } + if(nargs == 4) + myloop->tile(stmt_num, level, tile_size, outer_level); + } + Py_RETURN_NONE; +} + +static void chill_datacopy_vec(PyObject* args) { + // Overload 1: bool datacopy( + // const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, + // int level, + // bool allow_extra_read = false, + // int fastest_changing_dimension = -1, + // int padding_stride = 1, + // int padding_alignment = 4, + // int memory_type = 0); + std::vector<std::pair<int, std::vector<int> > > array_ref_nums; + // expect list(tuple(int,list(int))) + // or dict(int,list(int)) + if(PyList_CheckExact(PyTuple_GetItem(args, 0))) { + PyObject* list = PyTuple_GetItem(args, 0); + for(int i = 0; i < PyList_Size(list); i ++) { + PyObject* tup = PyList_GetItem(list, i); + int index = PyLong_AsLong(PyTuple_GetItem(tup, 0)); + std::vector<int> vec; + tointvector(PyTuple_GetItem(tup, 1), vec); + array_ref_nums.push_back(std::pair<int, std::vector<int> >(index, vec)); + } + } + else if(PyList_CheckExact(PyTuple_GetItem(args, 0))) { + PyObject* dict = PyTuple_GetItem(args, 0); + PyObject* klist = PyDict_Keys(dict); + for(int ki = 0; ki < PyList_Size(klist); ki++) { + PyObject* index = PyList_GetItem(klist, ki); + std::vector<int> vec; + tointvector(PyDict_GetItem(dict,index), vec); + array_ref_nums.push_back(std::pair<int, std::vector<int> >(PyLong_AsLong(index), vec)); + } + Py_DECREF(klist); + } + else { + //TODO: this should never happen + } + int level = intArg(args, 1); + bool allow_extra_read = boolArg(args, 2, false); + int fastest_changing_dimension = intArg(args, 3, -1); + int padding_stride = intArg(args, 4, 1); + int padding_alignment = intArg(args, 5, 4); + int memory_type = intArg(args, 6, 0); + myloop->datacopy(array_ref_nums, level, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +static void chill_datacopy_int(PyObject* args) { + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + std::string array_name = strArg(args,2,0); + bool allow_extra_read = boolArg(args,3,false); + int fastest_changing_dimension = intArg(args, 4, -1); + int padding_stride = intArg(args, 5, 1); + int padding_alignment = intArg(args, 6, 4); + int memory_type = intArg(args, 7, 0); + myloop->datacopy(stmt_num, level, array_name, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +static PyObject* chill_datacopy(PyObject* self, PyObject* args) { + // Overload 2: bool datacopy(int stmt_num, int level, const std::string &array_name, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); + int nargs = strict_arg_range(args, 3, 7, "datacopy"); + if(PyList_CheckExact(PyTuple_GetItem(args,0)) || PyDict_CheckExact(PyTuple_GetItem(args, 0))) { + chill_datacopy_vec(args); + } + else { + chill_datacopy_int(args); + } + Py_RETURN_NONE; +} + +static PyObject* chill_datacopy_privatized(PyObject* self, PyObject* args) { + // bool datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, int memory_type = 0); + int nargs = strict_arg_range(args, 4, 9, "datacopy_privatized"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + std::string array_name = strArg(args, 2); + std::vector<int> privatized_levels; + tointvector(args, 3, privatized_levels); + bool allow_extra_read = boolArg(args, 4, false); + int fastest_changing_dimension = intArg(args, 5, -1); + int padding_stride = intArg(args, 6, 1); + int padding_alignment = intArg(args, 7, 1); + int memory_type = intArg(args, 8); + myloop->datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + Py_RETURN_NONE; +} + +static PyObject* chill_unroll(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 3, 4, "unroll"); + //std::set<int> unroll(int stmt_num, int level, int unroll_amount, std::vector< std::vector<std::string> >idxNames= std::vector< std::vector<std::string> >(), int cleanup_split_level = 0); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int unroll_amount = intArg(args, 2); + std::vector< std::vector<std::string> > idxNames = std::vector< std::vector<std::string> >(); + int cleanup_split_level = intArg(args, 3); + myloop->unroll(stmt_num, level, unroll_amount, idxNames, cleanup_split_level); + Py_RETURN_NONE; +} + +static PyObject* chill_unroll_extra(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 3, 4, "unroll_extra"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int unroll_amount = intArg(args, 2); + int cleanup_split_level = intArg(args, 3, 0); + myloop->unroll_extra(stmt_num, level, unroll_amount, cleanup_split_level); + Py_RETURN_NONE; +} + +static PyObject* chill_split(PyObject* self, PyObject* args) { + strict_arg_num(args, 3, "split"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int num_dim = myloop->stmt[stmt_num].xform.n_out(); + + std::vector<std::map<std::string, int> >* cond; + std::string cond_expr = strArg(args, 2); + cond = parse_relation_vector(cond_expr.c_str()); + + Relation rel((num_dim-1)/2); + F_And *f_root = rel.add_and(); + for (int j = 0; j < cond->size(); j++) { + GEQ_Handle h = f_root->add_GEQ(); + for (std::map<std::string, int>::iterator it = (*cond)[j].begin(); it != (*cond)[j].end(); it++) { + try { + int dim = from_string<int>(it->first); + if (dim == 0) + h.update_const(it->second); + else { + if (dim > (num_dim-1)/2) + throw std::invalid_argument("invalid loop level " + to_string(dim) + " in split condition"); + h.update_coef(rel.set_var(dim), it->second); + } + } + catch (std::ios::failure e) { + Free_Var_Decl *g = NULL; + for (unsigned i = 0; i < myloop->freevar.size(); i++) { + std::string name = myloop->freevar[i]->base_name(); + if (name == it->first) { + g = myloop->freevar[i]; + break; + } + } + if (g == NULL) + throw std::invalid_argument("unrecognized variable " + to_string(it->first.c_str())); + h.update_coef(rel.get_local(g), it->second); + } + } + } + myloop->split(stmt_num,level,rel); + Py_RETURN_NONE; +} + +static PyObject* chill_nonsingular(PyObject* self, PyObject* args) { + std::vector< std::vector<int> > mat; + tointmatrix(args, 0, mat); + myloop->nonsingular(mat); + Py_RETURN_NONE; +} + +static PyObject* chill_skew(PyObject* self, PyObject* args) { + std::set<int> stmt_nums; + std::vector<int> skew_amounts; + int level = intArg(args, 1); + tointset(args, 0, stmt_nums); + tointvector(args, 2, skew_amounts); + myloop->skew(stmt_nums, level, skew_amounts); + Py_RETURN_NONE; +} + +static PyObject* chill_scale(PyObject* self, PyObject* args) { + strict_arg_num(args, 3); + std::set<int> stmt_nums; + int level = intArg(args, 1); + int scale_amount = intArg(args, 2); + tointset(args, 0, stmt_nums); + myloop->scale(stmt_nums, level, scale_amount); + Py_RETURN_NONE; +} + +static PyObject* chill_reverse(PyObject* self, PyObject* args) { + strict_arg_num(args, 2); + std::set<int> stmt_nums; + int level = intArg(args, 1); + tointset(args, 0, stmt_nums); + myloop->reverse(stmt_nums, level); + Py_RETURN_NONE; +} + +static PyObject* chill_shift(PyObject* self, PyObject* args) { + strict_arg_num(args, 3); + std::set<int> stmt_nums; + int level = intArg(args, 1); + int shift_amount = intArg(args, 2); + tointset(args, 0, stmt_nums); + myloop->shift(stmt_nums, level, shift_amount); + Py_RETURN_NONE; +} + +static PyObject* chill_shift_to(PyObject* self, PyObject* args) { + strict_arg_num(args, 3); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int absolute_pos = intArg(args, 2); + myloop->shift_to(stmt_num, level, absolute_pos); + Py_RETURN_NONE; +} + +static PyObject* chill_peel(PyObject* self, PyObject* args) { + strict_arg_range(args, 2, 3); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int amount = intArg(args, 2); + myloop->peel(stmt_num, level, amount); + Py_RETURN_NONE; +} + +static PyObject* chill_fuse(PyObject* self, PyObject* args) { + strict_arg_num(args, 2); + std::set<int> stmt_nums; + int level = intArg(args, 1); + tointset(args, 0, stmt_nums); + myloop->fuse(stmt_nums, level); + Py_RETURN_NONE; +} + +static PyObject* chill_distribute(PyObject* self, PyObject* args) { + strict_arg_num(args, 2); + std::set<int> stmts; + int level = intArg(args, 1); + tointset(args, 0, stmts); + myloop->distribute(stmts, level); + Py_RETURN_NONE; +} + +static PyObject * +chill_num_statements(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_num_statements() called from python\n"); + int num = myloop->stmt.size(); + //DEBUG_PRINT("C num_statement() = %d\n", num); + return Py_BuildValue( "i", num ); // BEWARE "d" is DOUBLE, not int +} +#endif + +#ifdef CUDACHILL +static PyMethodDef ChillMethods[] = { + + // python name C routine parameter passing comment + {"print_code", chill_print_code, METH_VARARGS, "print the code at this point"}, + {"print_ri", chill_print_ri , METH_VARARGS, "print Runtime Info "}, + {"print_idx", chill_print_idx , METH_VARARGS, "print indices "}, + {"print_dep", chill_print_dep , METH_VARARGS, "print dep, dependecies?"}, + {"print_space", chill_print_space, METH_VARARGS, "print something or other "}, + {"add_sync", chill_add_sync, METH_VARARGS, "add sync, whatever that is"}, + {"rename_index", chill_rename_index, METH_VARARGS, "rename a loop index"}, + {"permute", chill_permute, METH_VARARGS, "change the order of loops?"}, + {"tile3", chill_tile_v2_3arg, METH_VARARGS, "something to do with tile"}, + {"tile7", chill_tile_v2_7arg, METH_VARARGS, "something to do with tile"}, + {"thread_dims", thread_dims, METH_VARARGS, "tx, ty, tz "}, + {"block_dims", block_dims, METH_VARARGS, "bx, by"}, + {"thread_indices", chill_thread_indices, METH_VARARGS, "bx, by"}, + {"block_indices", chill_block_indices, METH_VARARGS, "bx, by"}, + {"hard_loop_bounds", chill_hard_loop_bounds, METH_VARARGS, "lower, upper"}, + {"unroll", chill_unroll, METH_VARARGS, "unroll a loop"}, + {"cudaize", chill_cudaize_v2, METH_VARARGS, "dunno"}, + {"datacopy_privatized", chill_datacopy_privatized, METH_VARARGS, "dunno"}, + + {"datacopy_9arg", chill_datacopy9, METH_VARARGS, "datacopy with 9 arguments"}, + {"copy_to_texture", chill_copy_to_texture, METH_VARARGS, "copy to texture mem"}, + {"read_IR", chill_init, METH_VARARGS, "read an Intermediate Representation file"}, + {"cur_indices", chill_cur_indices, METH_VARARGS, "currently active indices"}, + {"num_statements", chill_num_statements, METH_VARARGS, "number of statements in ... something"}, + {NULL, NULL, 0, NULL} /* Sentinel */ + + //{"copy_to_constant", chill_copy_to_constant, METH_VARARGS, "copy to constant mem"}, + +}; +#else +static PyMethodDef ChillMethods[] = { + + //python name C routine parameter passing comment + {"source", chill_source, METH_VARARGS, "set source file for chill script"}, + {"procedure", chill_procedure, METH_VARARGS, "set the name of the procedure"}, + {"loop", chill_loop, METH_VARARGS, "indicate which loop to optimize"}, + {"print_code", chill_print_code, METH_VARARGS, "print generated code"}, + {"print_dep", chill_print_dep, METH_VARARGS, "print the dependencies graph"}, + {"print_space", chill_print_space, METH_VARARGS, "print space"}, + {"exit", chill_exit, METH_VARARGS, "exit the interactive consule"}, + {"known", chill_known, METH_VARARGS, "knwon"}, + {"remove_dep", chill_remove_dep, METH_VARARGS, "remove dependency i suppose"}, + {"original", chill_original, METH_VARARGS, "original"}, + {"permute", chill_permute, METH_VARARGS, "permute"}, + {"pragma", chill_pragma, METH_VARARGS, "pragma"}, + {"prefetch", chill_prefetch, METH_VARARGS, "prefetch"}, + {"tile", chill_tile, METH_VARARGS, "tile"}, + {"datacopy", chill_datacopy, METH_VARARGS, "datacopy"}, + {"datacopy_privitized", chill_datacopy_privatized, METH_VARARGS, "datacopy_privatized"}, + {"unroll", chill_unroll, METH_VARARGS, "unroll"}, + {"unroll_extra", chill_unroll_extra, METH_VARARGS, "unroll_extra"}, + {"split", chill_split, METH_VARARGS, "split"}, + {"nonsingular", chill_nonsingular, METH_VARARGS, "nonsingular"}, + {"skew", chill_skew, METH_VARARGS, "skew"}, + {"scale", chill_scale, METH_VARARGS, "scale"}, + {"reverse", chill_reverse, METH_VARARGS, "reverse"}, + {"shift", chill_shift, METH_VARARGS, "shift"}, + {"shift_to", chill_shift_to, METH_VARARGS, "shift_to"}, + {"peel", chill_peel, METH_VARARGS, "peel"}, + {"fuse", chill_fuse, METH_VARARGS, "fuse"}, + {"distribute", chill_distribute, METH_VARARGS, "distribute"}, + {"num_statements", chill_num_statements, METH_VARARGS, "number of statements in the current loop"}, + {NULL, NULL, 0, NULL} +}; +#endif + +static void register_globals(PyObject* m) { + // Preset globals + PyModule_AddStringConstant(m, "VERSION", CHILL_BUILD_VERSION); + PyModule_AddStringConstant(m, "dest", "C"); + PyModule_AddStringConstant(m, "C", "C"); + // Tile method + PyModule_AddIntConstant(m, "strided", 0); + PyModule_AddIntConstant(m, "counted", 1); + // Memory mode + PyModule_AddIntConstant(m, "global", 0); + PyModule_AddIntConstant(m, "shared", 1); + PyModule_AddIntConstant(m, "textured", 2); + // Bool flags + PyModule_AddIntConstant(m, "sync", 1); +} + +PyMODINIT_FUNC +initchill(void) // pass C methods to python +{ + DEBUG_PRINT("in C, initchill() to set up C methods to be called from python\n"); + PyObject* m = Py_InitModule("chill", ChillMethods); + register_globals(m); +} diff --git a/chill/src/dep.cc b/chill/src/dep.cc new file mode 100644 index 0000000..a675d03 --- /dev/null +++ b/chill/src/dep.cc @@ -0,0 +1,567 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Data dependence vector and graph. + + Notes: + All dependence vectors are normalized, i.e., the first non-zero distance + must be positve. Thus the correct dependence meaning can be given based on + source/destination pair's read/write type. Suppose for a dependence vector + 1, 0~5, -3), we want to permute the first and the second dimension, + the result would be two dependence vectors (0, 1, -3) and (1~5, 1, -3). + All operations on dependence vectors are non-destructive, i.e., new + dependence vectors are returned. + + History: + 01/2006 Created by Chun Chen. + 03/2009 Use IR_Ref interface in source and destination arrays -chun +*****************************************************************************/ + +#include "dep.hh" + +//----------------------------------------------------------------------------- +// Class: DependeceVector +//----------------------------------------------------------------------------- + +std::ostream& operator<<(std::ostream &os, const DependenceVector &d) { + if (d.sym != NULL) { + os << d.sym->name(); + os << ':'; + if (d.quasi) + os << "_quasi"; + + } + + switch (d.type) { + case DEP_W2R: + os << "true"; + if (d.is_reduction) + os << "_reduction"; + break; + case DEP_R2W: + os << "anti"; + break; + case DEP_W2W: + os << "output"; + break; + case DEP_R2R: + os << "input"; + break; + case DEP_CONTROL: + os << "control"; + break; + default: + os << "unknown"; + break; + } + + os << '('; + + for (int i = 0; i < d.lbounds.size(); i++) { + omega::coef_t lbound = d.lbounds[i]; + omega::coef_t ubound = d.ubounds[i]; + + if (lbound == ubound) + os << lbound; + else { + if (lbound == -posInfinity) + if (ubound == posInfinity) + os << '*'; + else { + if (ubound == -1) + os << '-'; + else + os << ubound << '-'; + } + else if (ubound == posInfinity) { + if (lbound == 1) + os << '+'; + else + os << lbound << '+'; + } else + os << lbound << '~' << ubound; + } + + if (i < d.lbounds.size() - 1) + os << ", "; + } + + os << ')'; + + return os; +} + +// DependenceVector::DependenceVector(int size): +// lbounds(std::vector<coef_t>(size, 0)), +// ubounds(std::vector<coef_t>(size, 0)) { +// src = NULL; +// dst = NULL; +// } + +DependenceVector::DependenceVector(const DependenceVector &that) { + if (that.sym != NULL) + this->sym = that.sym->clone(); + else + this->sym = NULL; + this->type = that.type; + this->lbounds = that.lbounds; + this->ubounds = that.ubounds; + quasi = that.quasi; + is_scalar_dependence = that.is_scalar_dependence; + is_reduction = that.is_reduction; +} + +DependenceVector &DependenceVector::operator=(const DependenceVector &that) { + if (this != &that) { + delete this->sym; + if (that.sym != NULL) + this->sym = that.sym->clone(); + else + this->sym = NULL; + this->type = that.type; + this->lbounds = that.lbounds; + this->ubounds = that.ubounds; + quasi = that.quasi; + is_scalar_dependence = that.is_scalar_dependence; + is_reduction = that.is_reduction; + } + return *this; +} +DependenceType DependenceVector::getType() const { + return type; +} + +bool DependenceVector::is_data_dependence() const { + if (type == DEP_W2R || type == DEP_R2W || type == DEP_W2W + || type == DEP_R2R) + return true; + else + return false; +} + +bool DependenceVector::is_control_dependence() const { + if (type == DEP_CONTROL) + return true; + else + return false; +} + +bool DependenceVector::has_negative_been_carried_at(int dim) const { + if (!is_data_dependence()) + throw std::invalid_argument("only works for data dependences"); + + if (dim < 0 || dim >= lbounds.size()) + return false; + + for (int i = 0; i < dim; i++) + if (lbounds[i] > 0 || ubounds[i] < 0) + return false; + + if (lbounds[dim] < 0) + return true; + else + return false; +} + + +bool DependenceVector::has_been_carried_at(int dim) const { + if (!is_data_dependence()) + throw std::invalid_argument("only works for data dependences"); + + if (dim < 0 || dim >= lbounds.size()) + return false; + + for (int i = 0; i < dim; i++) + if (lbounds[i] > 0 || ubounds[i] < 0) + return false; + + if ((lbounds[dim] != 0) || (ubounds[dim] !=0)) + return true; + + return false; +} + +bool DependenceVector::has_been_carried_before(int dim) const { + if (!is_data_dependence()) + throw std::invalid_argument("only works for data dependences"); + + if (dim < 0) + return false; + if (dim > lbounds.size()) + dim = lbounds.size(); + + for (int i = 0; i < dim; i++) { + if (lbounds[i] > 0) + return true; + if (ubounds[i] < 0) + return true; + } + + return false; +} + +bool DependenceVector::isZero() const { + return isZero(lbounds.size() - 1); +} + +bool DependenceVector::isZero(int dim) const { + if (dim >= lbounds.size()) + throw std::invalid_argument("invalid dependence dimension"); + + for (int i = 0; i <= dim; i++) + if (lbounds[i] != 0 || ubounds[i] != 0) + return false; + + return true; +} + +bool DependenceVector::isPositive() const { + for (int i = 0; i < lbounds.size(); i++) + if (lbounds[i] != 0 || ubounds[i] != 0) { + if (lbounds[i] < 0) + return false; + else if (lbounds[i] > 0) + return true; + } + + return false; +} + +bool DependenceVector::isNegative() const { + for (int i = 0; i < lbounds.size(); i++) + if (lbounds[i] != 0 || ubounds[i] != 0) { + if (ubounds[i] > 0) + return false; + else if (ubounds[i] < 0) + return true; + } + + return false; +} + +bool DependenceVector::isAllPositive() const { + for (int i = 0; i < lbounds.size(); i++) + if (lbounds[i] < 0) + return false; + + return true; +} + +bool DependenceVector::isAllNegative() const { + for (int i = 0; i < ubounds.size(); i++) + if (ubounds[i] > 0) + return false; + + return true; +} + +bool DependenceVector::hasPositive(int dim) const { + if (dim >= lbounds.size()) + throw std::invalid_argument("invalid dependence dimension"); + + if (lbounds[dim] > 0) + //av: changed from ubounds to lbounds may have side effects + return true; + else + return false; +} + +bool DependenceVector::hasNegative(int dim) const { + if (dim >= lbounds.size()) + throw std::invalid_argument("invalid dependence dimension"); + + if (ubounds[dim] < 0) + //av: changed from lbounds to ubounds may have side effects + return true; + else + return false; +} + +bool DependenceVector::isCarried(int dim, omega::coef_t distance) const { + if (distance <= 0) + throw std::invalid_argument("invalid dependence distance size"); + + if (dim > lbounds.size()) + dim = lbounds.size(); + + for (int i = 0; i < dim; i++) + if (lbounds[i] > 0) + return false; + else if (ubounds[i] < 0) + return false; + + if (dim >= lbounds.size()) + return true; + + if (lbounds[dim] > distance) + return false; + else if (ubounds[dim] < -distance) + return false; + + return true; +} + +bool DependenceVector::canPermute(const std::vector<int> &pi) const { + if (pi.size() != lbounds.size()) + throw std::invalid_argument( + "permute dimensionality do not match dependence space"); + + for (int i = 0; i < pi.size(); i++) { + if (lbounds[pi[i]] > 0) + return true; + else if (lbounds[pi[i]] < 0) + return false; + } + + return true; +} + +std::vector<DependenceVector> DependenceVector::normalize() const { + std::vector<DependenceVector> result; + + DependenceVector dv(*this); + for (int i = 0; i < dv.lbounds.size(); i++) { + if (dv.lbounds[i] < 0 && dv.ubounds[i] >= 0) { + omega::coef_t t = dv.ubounds[i]; + dv.ubounds[i] = -1; + result.push_back(dv); + dv.lbounds[i] = 0; + dv.ubounds[i] = t; + } + if (dv.lbounds[i] == 0 && dv.ubounds[i] > 0) { + dv.lbounds[i] = 1; + result.push_back(dv); + dv.lbounds[i] = 0; + dv.ubounds[i] = 0; + } + if (dv.lbounds[i] == 0 && dv.ubounds[i] == 0) + continue; + else + break; + } + + result.push_back(dv); + return result; +} + +std::vector<DependenceVector> DependenceVector::permute( + const std::vector<int> &pi) const { + if (pi.size() != lbounds.size()) + throw std::invalid_argument( + "permute dimensionality do not match dependence space"); + + const int n = lbounds.size(); + + DependenceVector dv(*this); + for (int i = 0; i < n; i++) { + dv.lbounds[i] = lbounds[pi[i]]; + dv.ubounds[i] = ubounds[pi[i]]; + } + + int violated = 0; + + for (int i = 0; i < n; i++) { + if (dv.lbounds[i] > 0) + break; + else if (dv.lbounds[i] < 0) + violated = 1; + } + + if (((violated == 1) && !quasi) && !is_scalar_dependence) { + throw ir_error("dependence violation"); + + } + + return dv.normalize(); +} + +DependenceVector DependenceVector::reverse() const { + const int n = lbounds.size(); + + DependenceVector dv(*this); + switch (type) { + case DEP_W2R: + dv.type = DEP_R2W; + break; + case DEP_R2W: + dv.type = DEP_W2R; + break; + default: + dv.type = type; + } + + for (int i = 0; i < n; i++) { + dv.lbounds[i] = -ubounds[i]; + dv.ubounds[i] = -lbounds[i]; + } + dv.quasi = true; + + return dv; +} + +// std::vector<DependenceVector> DependenceVector::matrix(const std::vector<std::vector<int> > &M) const { +// if (M.size() != lbounds.size()) +// throw std::invalid_argument("(non)unimodular transformation dimensionality does not match dependence space"); + +// const int n = lbounds.size(); +// DependenceVector dv; +// if (sym != NULL) +// dv.sym = sym->clone(); +// else +// dv.sym = NULL; +// dv.type = type; + +// for (int i = 0; i < n; i++) { +// assert(M[i].size() == n+1 || M[i].size() == n); + +// omega::coef_t lb, ub; +// if (M[i].size() == n+1) +// lb = ub = M[i][n]; +// else +// lb = ub = 0; + +// for (int j = 0; j < n; j++) { +// int c = M[i][j]; +// if (c == 0) +// continue; + +// if (c > 0) { +// if (lbounds[j] == -posInfinity) +// lb = -posInfinity; +// else if (lb != -posInfinity) +// lb += c * lbounds[j]; +// if (ubounds[j] == posInfinity) +// ub = posInfinity; +// else if (ub != posInfinity) +// ub += c * ubounds[j]; +// } +// else { +// if (ubounds[j] == posInfinity) +// lb = -posInfinity; +// else if (lb != -posInfinity) +// lb += c * ubounds[j]; +// if (lbounds[j] == -posInfinity) +// ub = posInfinity; +// else if (ub != posInfinity) +// ub += c * lbounds[j]; +// } +// } +// dv.lbounds.push_back(lb); +// dv.ubounds.push_back(ub); +// } +// dv.is_reduction = is_reduction; + +// return dv.normalize(); +// } + +//----------------------------------------------------------------------------- +// Class: DependenceGraph +//----------------------------------------------------------------------------- + +DependenceGraph DependenceGraph::permute(const std::vector<int> &pi, + const std::set<int> &active) const { + DependenceGraph g; + + for (int i = 0; i < vertex.size(); i++) + g.insert(vertex[i].first); + + for (int i = 0; i < vertex.size(); i++) + for (EdgeList::const_iterator j = vertex[i].second.begin(); + j != vertex[i].second.end(); j++) { + if (active.empty() + || (active.find(i) != active.end() + && active.find(j->first) != active.end())) { + for (int k = 0; k < j->second.size(); k++) { + std::vector<DependenceVector> dv = j->second[k].permute(pi); + g.connect(i, j->first, dv); + } + } else if (active.find(i) == active.end() + && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dv = j->second; + g.connect(i, j->first, dv); + } else { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) + for (int d = 0; d < pi.size(); d++) + if (pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + g.connect(i, j->first, dv); + } + } + + return g; +} + +// DependenceGraph DependenceGraph::matrix(const std::vector<std::vector<int> > &M) const { +// DependenceGraph g; + +// for (int i = 0; i < vertex.size(); i++) +// g.insert(vertex[i].first); + +// for (int i = 0; i < vertex.size(); i++) +// for (EdgeList::const_iterator j = vertex[i].second.begin(); j != vertex[i].second.end(); j++) +// for (int k = 0; k < j->second.size(); k++) +// g.connect(i, j->first, j->second[k].matrix(M)); + +// return g; +// } + +DependenceGraph DependenceGraph::subspace(int dim) const { + DependenceGraph g; + + for (int i = 0; i < vertex.size(); i++) + g.insert(vertex[i].first); + + for (int i = 0; i < vertex.size(); i++) + for (EdgeList::const_iterator j = vertex[i].second.begin(); + j != vertex[i].second.end(); j++) + + for (int k = 0; k < j->second.size(); k++) { + if(j->second[k].type != DEP_CONTROL){ + if (j->second[k].isCarried(dim)) + g.connect(i, j->first, j->second[k]); + }else + g.connect(i, j->first, j->second[k]); + + } + + return g; +} + +bool DependenceGraph::isPositive() const { + for (int i = 0; i < vertex.size(); i++) + for (EdgeList::const_iterator j = vertex[i].second.begin(); + j != vertex[i].second.end(); j++) + for (int k = 0; k < j->second.size(); k++) + if (!j->second[k].isPositive()) + return false; + + return true; +} + +bool DependenceGraph::hasPositive(int dim) const { + for (int i = 0; i < vertex.size(); i++) + for (EdgeList::const_iterator j = vertex[i].second.begin(); + j != vertex[i].second.end(); j++) + for (int k = 0; k < j->second.size(); k++) + if (!j->second[k].hasPositive(dim)) + return false; + + return true; +} + +bool DependenceGraph::hasNegative(int dim) const { + for (int i = 0; i < vertex.size(); i++) + for (EdgeList::const_iterator j = vertex[i].second.begin(); + j != vertex[i].second.end(); j++) + for (int k = 0; k < j->second.size(); k++) + if (!j->second[k].hasNegative(dim)) + return false; + + return true; +} diff --git a/chill/src/ir_rose.cc b/chill/src/ir_rose.cc new file mode 100644 index 0000000..5acb175 --- /dev/null +++ b/chill/src/ir_rose.cc @@ -0,0 +1,2296 @@ +/***************************************************************************** + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + CHiLL's rose interface. + + Notes: + Array supports mixed pointer and array type in a single declaration. + + History: + 02/23/2009 Created by Chun Chen. +*****************************************************************************/ +#include <string> +#include "ir_rose.hh" +#include "ir_rose_utils.hh" +#include <code_gen/rose_attributes.h> +#include <code_gen/CG_roseRepr.h> +#include <code_gen/CG_roseBuilder.h> + +using namespace SageBuilder; +using namespace SageInterface; +using namespace omega; +// ---------------------------------------------------------------------------- +// Class: IR_roseScalarSymbol +// ---------------------------------------------------------------------------- + +std::string IR_roseScalarSymbol::name() const { + return vs_->get_name().getString(); +} + +int IR_roseScalarSymbol::size() const { + return (vs_->get_type()->memoryUsage()) / (vs_->get_type()->numberOfNodes()); +} + +bool IR_roseScalarSymbol::operator==(const IR_Symbol &that) const { + if (typeid(*this) != typeid(that)) + return false; + + const IR_roseScalarSymbol *l_that = + static_cast<const IR_roseScalarSymbol *>(&that); + return this->vs_ == l_that->vs_; +} + +IR_Symbol *IR_roseScalarSymbol::clone() const { + return NULL; +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseArraySymbol +// ---------------------------------------------------------------------------- + +std::string IR_roseArraySymbol::name() const { + return (vs_->get_declaration()->get_name().getString()); +} + +int IR_roseArraySymbol::elem_size() const { + + SgType *tn = vs_->get_type(); + SgType* arrType; + + int elemsize; + + if (arrType = isSgArrayType(tn)) { + while (isSgArrayType(arrType)) { + arrType = arrType->findBaseType(); + } + } else if (arrType = isSgPointerType(tn)) { + while (isSgPointerType(arrType)) { + arrType = arrType->findBaseType(); + } + } + + elemsize = (int) arrType->memoryUsage() / arrType->numberOfNodes(); + return elemsize; +} + +int IR_roseArraySymbol::n_dim() const { + int dim = 0; + SgType* arrType = isSgArrayType(vs_->get_type()); + SgType* ptrType = isSgPointerType(vs_->get_type()); + if (arrType != NULL) { + while (isSgArrayType(arrType)) { + arrType = isSgArrayType(arrType)->get_base_type(); + dim++; + } + } else if (ptrType != NULL) { + while (isSgPointerType(ptrType)) { + ptrType = isSgPointerType(ptrType)->get_base_type(); + dim++; + } + } + + // Manu:: fortran support + if (static_cast<const IR_roseCode *>(ir_)->is_fortran_) { + + if (arrType != NULL) { + dim = 0; + SgExprListExp * dimList = isSgArrayType(vs_->get_type())->get_dim_info(); + SgExpressionPtrList::iterator it = dimList->get_expressions().begin(); + for(;it != dimList->get_expressions().end(); it++) { + dim++; + } + } else if (ptrType != NULL) { + //std::cout << "pntrType \n"; + ; // not sure if this case will happen + } + } + + return dim; +} + +omega::CG_outputRepr *IR_roseArraySymbol::size(int dim) const { + + SgArrayType* arrType = isSgArrayType(vs_->get_type()); + // SgExprListExp* dimList = arrType->get_dim_info(); + int count = 0; + SgExpression* expr; + SgType* pntrType = isSgPointerType(vs_->get_type()); + + if (arrType != NULL) { + SgExprListExp* dimList = arrType->get_dim_info(); + if (!static_cast<const IR_roseCode *>(ir_)->is_fortran_) { + SgExpressionPtrList::iterator it = + dimList->get_expressions().begin(); + + while ((it != dimList->get_expressions().end()) && (count < dim)) { + it++; + count++; + } + + expr = *it; + } else { + SgExpressionPtrList::reverse_iterator i = + dimList->get_expressions().rbegin(); + for (; (i != dimList->get_expressions().rend()) && (count < dim); + i++) { + + count++; + } + + expr = *i; + } + } else if (pntrType != NULL) { + + while (count < dim) { + pntrType = (isSgPointerType(pntrType))->get_base_type(); + count++; + } + if (isSgPointerType(pntrType)) + expr = new SgExpression; + } + + if (!expr) + throw ir_error("Index variable is NULL!!"); + + // Manu :: debug + std::cout << "---------- size :: " << isSgNode(expr)->unparseToString().c_str() << "\n"; + + return new omega::CG_roseRepr(expr); + +} + +IR_ARRAY_LAYOUT_TYPE IR_roseArraySymbol::layout_type() const { + if (static_cast<const IR_roseCode *>(ir_)->is_fortran_) + return IR_ARRAY_LAYOUT_COLUMN_MAJOR; + else + return IR_ARRAY_LAYOUT_ROW_MAJOR; + +} + +bool IR_roseArraySymbol::operator==(const IR_Symbol &that) const { + + if (typeid(*this) != typeid(that)) + return false; + + const IR_roseArraySymbol *l_that = + static_cast<const IR_roseArraySymbol *>(&that); + return this->vs_ == l_that->vs_; + +} + +IR_Symbol *IR_roseArraySymbol::clone() const { + return new IR_roseArraySymbol(ir_, vs_); +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseConstantRef +// ---------------------------------------------------------------------------- + +bool IR_roseConstantRef::operator==(const IR_Ref &that) const { + + if (typeid(*this) != typeid(that)) + return false; + + const IR_roseConstantRef *l_that = + static_cast<const IR_roseConstantRef *>(&that); + + if (this->type_ != l_that->type_) + return false; + + if (this->type_ == IR_CONSTANT_INT) + return this->i_ == l_that->i_; + else + return this->f_ == l_that->f_; + +} + +omega::CG_outputRepr *IR_roseConstantRef::convert() { + if (type_ == IR_CONSTANT_INT) { + omega::CG_roseRepr *result = new omega::CG_roseRepr( + isSgExpression(buildIntVal(static_cast<int>(i_)))); + delete this; + return result; + } else + throw ir_error("constant type not supported"); + +} + +IR_Ref *IR_roseConstantRef::clone() const { + if (type_ == IR_CONSTANT_INT) + return new IR_roseConstantRef(ir_, i_); + else if (type_ == IR_CONSTANT_FLOAT) + return new IR_roseConstantRef(ir_, f_); + else + throw ir_error("constant type not supported"); + +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseScalarRef +// ---------------------------------------------------------------------------- + +bool IR_roseScalarRef::is_write() const { + /* if (ins_pos_ != NULL && op_pos_ == -1) + return true; + else + return false; + */ + + if (is_write_ == 1) + return true; + + return false; +} + +IR_ScalarSymbol *IR_roseScalarRef::symbol() const { + return new IR_roseScalarSymbol(ir_, vs_->get_symbol()); +} + +bool IR_roseScalarRef::operator==(const IR_Ref &that) const { + if (typeid(*this) != typeid(that)) + return false; + + const IR_roseScalarRef *l_that = + static_cast<const IR_roseScalarRef *>(&that); + + if (this->ins_pos_ == NULL) + return this->vs_ == l_that->vs_; + else + return this->ins_pos_ == l_that->ins_pos_ + && this->op_pos_ == l_that->op_pos_; +} + +omega::CG_outputRepr *IR_roseScalarRef::convert() { + omega::CG_roseRepr *result = new omega::CG_roseRepr(isSgExpression(vs_)); + delete this; + return result; + +} + +IR_Ref * IR_roseScalarRef::clone() const { + //if (ins_pos_ == NULL) + return new IR_roseScalarRef(ir_, vs_, this->is_write_); + //else + // return new IR_roseScalarRef(ir_, , op_pos_); + +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseArrayRef +// ---------------------------------------------------------------------------- + +bool IR_roseArrayRef::is_write() const { + SgAssignOp* assignment; + + if (is_write_ == 1 || is_write_ == 0) + return is_write_; + if (assignment = isSgAssignOp(ia_->get_parent())) { + if (assignment->get_lhs_operand() == ia_) + return true; + } else if (SgExprStatement* expr_stmt = isSgExprStatement( + ia_->get_parent())) { + SgExpression* exp = expr_stmt->get_expression(); + + if (exp) { + if (assignment = isSgAssignOp(exp)) { + if (assignment->get_lhs_operand() == ia_) + return true; + + } + } + + } + return false; +} + +omega::CG_outputRepr *IR_roseArrayRef::index(int dim) const { + + SgExpression *current = isSgExpression(ia_); + SgExpression* expr; + int count = 0; + + while (isSgPntrArrRefExp(current)) { + current = isSgPntrArrRefExp(current)->get_lhs_operand(); + count++; + } + + current = ia_; + + while (count > dim) { + expr = isSgPntrArrRefExp(current)->get_rhs_operand(); + current = isSgPntrArrRefExp(current)->get_lhs_operand(); + count--; + } + + // Manu:: fortran support + if (static_cast<const IR_roseCode *>(ir_)->is_fortran_) { + expr = isSgPntrArrRefExp(ia_)->get_rhs_operand(); + count = 0; + if (isSgExprListExp(expr)) { + SgExpressionPtrList::iterator indexList = isSgExprListExp(expr)->get_expressions().begin(); + while (count < dim) { + indexList++; + count++; + } + expr = isSgExpression(*indexList); + } + } + + if (!expr) + throw ir_error("Index variable is NULL!!"); + + + omega::CG_roseRepr* ind = new omega::CG_roseRepr(expr); + + return ind->clone(); + +} + +IR_ArraySymbol *IR_roseArrayRef::symbol() const { + + SgExpression *current = isSgExpression(ia_); + + SgVarRefExp* base; + SgVariableSymbol *arrSymbol; + while (isSgPntrArrRefExp(current) || isSgUnaryOp(current)) { + if (isSgPntrArrRefExp(current)) + current = isSgPntrArrRefExp(current)->get_lhs_operand(); + else if (isSgUnaryOp(current)) + /* To handle support for addressof operator and pointer dereference + * both of which are unary ops + */ + current = isSgUnaryOp(current)->get_operand(); + } + if (base = isSgVarRefExp(current)) { + arrSymbol = (SgVariableSymbol*) (base->get_symbol()); + std::string x = arrSymbol->get_name().getString(); + } else + throw ir_error("Array Symbol is not a variable?!"); + + return new IR_roseArraySymbol(ir_, arrSymbol); + +} + +bool IR_roseArrayRef::operator==(const IR_Ref &that) const { + if (typeid(*this) != typeid(that)) + return false; + + const IR_roseArrayRef *l_that = static_cast<const IR_roseArrayRef *>(&that); + + return this->ia_ == l_that->ia_; +} + +omega::CG_outputRepr *IR_roseArrayRef::convert() { + omega::CG_roseRepr *temp = new omega::CG_roseRepr( + isSgExpression(this->ia_)); + omega::CG_outputRepr *result = temp->clone(); +// delete this; // Commented by Manu + return result; +} + +IR_Ref *IR_roseArrayRef::clone() const { + return new IR_roseArrayRef(ir_, ia_, is_write_); +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseLoop +// ---------------------------------------------------------------------------- + +IR_ScalarSymbol *IR_roseLoop::index() const { + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + SgVariableSymbol* vs = NULL; + if (tf) { + SgForInitStatement* list = tf->get_for_init_stmt(); + SgStatementPtrList& initStatements = list->get_init_stmt(); + SgStatementPtrList::const_iterator j = initStatements.begin(); + + if (SgExprStatement *expr = isSgExprStatement(*j)) + if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) + if (SgVarRefExp* var_ref = isSgVarRefExp(op->get_lhs_operand())) + vs = var_ref->get_symbol(); + } else if (tfortran) { + SgExpression* init = tfortran->get_initialization(); + + if (SgAssignOp* op = isSgAssignOp(init)) + if (SgVarRefExp* var_ref = isSgVarRefExp(op->get_lhs_operand())) + vs = var_ref->get_symbol(); + + } + + if (vs == NULL) + throw ir_error("Index variable is NULL!!"); + + return new IR_roseScalarSymbol(ir_, vs); +} + +omega::CG_outputRepr *IR_roseLoop::lower_bound() const { + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + + SgExpression* lowerBound = NULL; + + if (tf) { + SgForInitStatement* list = tf->get_for_init_stmt(); + SgStatementPtrList& initStatements = list->get_init_stmt(); + SgStatementPtrList::const_iterator j = initStatements.begin(); + + if (SgExprStatement *expr = isSgExprStatement(*j)) + if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) { + lowerBound = op->get_rhs_operand(); + //Rose sometimes introduces an unnecessary cast which is a unary op + if (isSgUnaryOp(lowerBound)) + lowerBound = isSgUnaryOp(lowerBound)->get_operand(); + + } + } else if (tfortran) { + SgExpression* init = tfortran->get_initialization(); + + if (SgAssignOp* op = isSgAssignOp(init)) + lowerBound = op->get_rhs_operand(); + } + + if (lowerBound == NULL) + throw ir_error("Lower Bound is NULL!!"); + + return new omega::CG_roseRepr(lowerBound); +} + +omega::CG_outputRepr *IR_roseLoop::upper_bound() const { + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + SgExpression* upperBound = NULL; + if (tf) { + SgBinaryOp* test_expr = isSgBinaryOp(tf->get_test_expr()); + if (test_expr == NULL) + throw ir_error("Test Expression is NULL!!"); + + upperBound = test_expr->get_rhs_operand(); + //Rose sometimes introduces an unnecessary cast which is a unary op + if (isSgUnaryOp(upperBound)) + upperBound = isSgUnaryOp(upperBound)->get_operand(); + if (upperBound == NULL) + throw ir_error("Upper Bound is NULL!!"); + } else if (tfortran) { + + upperBound = tfortran->get_bound(); + + } + + return new omega::CG_roseRepr(upperBound); + +} + +IR_CONDITION_TYPE IR_roseLoop::stop_cond() const { + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + + if (tf) { + SgExpression* stopCond = NULL; + SgExpression* test_expr = tf->get_test_expr(); + + if (isSgLessThanOp(test_expr)) + return IR_COND_LT; + else if (isSgLessOrEqualOp(test_expr)) + return IR_COND_LE; + else if (isSgGreaterThanOp(test_expr)) + return IR_COND_GT; + else if (isSgGreaterOrEqualOp(test_expr)) + return IR_COND_GE; + + else + throw ir_error("loop stop condition unsupported"); + } else if (tfortran) { + SgExpression* increment = tfortran->get_increment(); + if (!isSgNullExpression(increment)) { + if (isSgMinusOp(increment) + && !isSgBinaryOp(isSgMinusOp(increment)->get_operand())) + return IR_COND_GE; + else + return IR_COND_LE; + } else { + return IR_COND_LE; // Manu:: if increment is not present, assume it to be 1. Just a workaround, not sure if it will be correct for all cases. + SgExpression* lowerBound = NULL; + SgExpression* upperBound = NULL; + SgExpression* init = tfortran->get_initialization(); + SgIntVal* ub; + SgIntVal* lb; + if (SgAssignOp* op = isSgAssignOp(init)) + lowerBound = op->get_rhs_operand(); + + upperBound = tfortran->get_bound(); + + if ((upperBound != NULL) && (lowerBound != NULL)) { + + if ((ub = isSgIntVal(isSgValueExp(upperBound))) && (lb = + isSgIntVal(isSgValueExp(lowerBound)))) { + if (ub->get_value() > lb->get_value()) + return IR_COND_LE; + else + return IR_COND_GE; + } else + throw ir_error("loop stop condition unsupported"); + + } else + throw ir_error("malformed fortran loop bounds!!"); + + } + } + +} + +IR_Block *IR_roseLoop::body() const { + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + SgNode* loop_body = NULL; + SgStatement* body_statements = NULL; + + if (tf) { + body_statements = tf->get_loop_body(); + } else if (tfortran) { + body_statements = isSgStatement(tfortran->get_body()); + + } + + loop_body = isSgNode(body_statements); + + SgStatementPtrList list; + if (isSgBasicBlock(loop_body)) { + list = isSgBasicBlock(loop_body)->get_statements(); + + if (list.size() == 1) + loop_body = isSgNode(*(list.begin())); + } + + if (loop_body == NULL) + throw ir_error("for loop body is NULL!!"); + + return new IR_roseBlock(ir_, loop_body); +} + +int IR_roseLoop::step_size() const { + + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + + if (tf) { + SgExpression *increment = tf->get_increment(); + + if (isSgPlusPlusOp(increment)) + return 1; + if (isSgMinusMinusOp(increment)) + return -1; + else if (SgAssignOp* assignment = isSgAssignOp(increment)) { + SgBinaryOp* stepsize = isSgBinaryOp(assignment->get_lhs_operand()); + if (stepsize == NULL) + throw ir_error("Step size expression is NULL!!"); + SgIntVal* step = isSgIntVal(stepsize->get_lhs_operand()); + return step->get_value(); + } else if (SgBinaryOp* inc = isSgPlusAssignOp(increment)) { + SgIntVal* step = isSgIntVal(inc->get_rhs_operand()); + return (step->get_value()); + } else if (SgBinaryOp * inc = isSgMinusAssignOp(increment)) { + SgIntVal* step = isSgIntVal(inc->get_rhs_operand()); + return -(step->get_value()); + } else if (SgBinaryOp * inc = isSgCompoundAssignOp(increment)) { + SgIntVal* step = isSgIntVal(inc->get_rhs_operand()); + return (step->get_value()); + } + + } else if (tfortran) { + + SgExpression* increment = tfortran->get_increment(); + + if (!isSgNullExpression(increment)) { + if (isSgMinusOp(increment)) { + if (SgValueExp *inc = isSgValueExp( + isSgMinusOp(increment)->get_operand())) + if (isSgIntVal(inc)) + return -(isSgIntVal(inc)->get_value()); + } else { + if (SgValueExp* inc = isSgValueExp(increment)) + if (isSgIntVal(inc)) + return isSgIntVal(inc)->get_value(); + } + } else { + return 1; // Manu:: if increment is not present, assume it to be 1. Just a workaround, not sure if it will be correct for all cases. + SgExpression* lowerBound = NULL; + SgExpression* upperBound = NULL; + SgExpression* init = tfortran->get_initialization(); + SgIntVal* ub; + SgIntVal* lb; + if (SgAssignOp* op = isSgAssignOp(init)) + lowerBound = op->get_rhs_operand(); + + upperBound = tfortran->get_bound(); + + if ((upperBound != NULL) && (lowerBound != NULL)) { + + if ((ub = isSgIntVal(isSgValueExp(upperBound))) && (lb = + isSgIntVal(isSgValueExp(lowerBound)))) { + if (ub->get_value() > lb->get_value()) + return 1; + else + return -1; + } else + throw ir_error("loop stop condition unsupported"); + + } else + throw ir_error("loop stop condition unsupported"); + + } + + } + +} + +IR_Block *IR_roseLoop::convert() { + const IR_Code *ir = ir_; + SgNode *tnl = isSgNode(tf_); + delete this; + return new IR_roseBlock(ir, tnl); +} + +IR_Control *IR_roseLoop::clone() const { + + return new IR_roseLoop(ir_, tf_); + +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseBlock +// ---------------------------------------------------------------------------- + +omega::CG_outputRepr *IR_roseBlock::original() const { + + omega::CG_outputRepr * tnl; + + if (isSgBasicBlock(tnl_)) { + + SgStatementPtrList *bb = new SgStatementPtrList(); + SgStatementPtrList::iterator it; + for (it = (isSgBasicBlock(tnl_)->get_statements()).begin(); + it != (isSgBasicBlock(tnl_)->get_statements()).end() + && (*it != start_); it++) + ; + + if (it != (isSgBasicBlock(tnl_)->get_statements()).end()) { + for (; it != (isSgBasicBlock(tnl_)->get_statements()).end(); it++) { + bb->push_back(*it); + if ((*it) == end_) + break; + } + } + tnl = new omega::CG_roseRepr(bb); + //block = tnl->clone(); + + } else { + tnl = new omega::CG_roseRepr(tnl_); + + //block = tnl->clone(); + } + + return tnl; + +} +omega::CG_outputRepr *IR_roseBlock::extract() const { + + std::string x = tnl_->unparseToString(); + + omega::CG_roseRepr * tnl; + + omega::CG_outputRepr* block; + + if (isSgBasicBlock(tnl_)) { + + SgStatementPtrList *bb = new SgStatementPtrList(); + SgStatementPtrList::iterator it; + for (it = (isSgBasicBlock(tnl_)->get_statements()).begin(); + it != (isSgBasicBlock(tnl_)->get_statements()).end() + && (*it != start_); it++) + ; + + if (it != (isSgBasicBlock(tnl_)->get_statements()).end()) { + for (; it != (isSgBasicBlock(tnl_)->get_statements()).end(); it++) { + bb->push_back(*it); + if ((*it) == end_) + break; + } + } + tnl = new omega::CG_roseRepr(bb); + block = tnl->clone(); + + } else { + tnl = new omega::CG_roseRepr(tnl_); + + block = tnl->clone(); + } + + delete tnl; + return block; +} + +IR_Control *IR_roseBlock::clone() const { + return new IR_roseBlock(ir_, tnl_, start_, end_); + +} +// ---------------------------------------------------------------------------- +// Class: IR_roseIf +// ---------------------------------------------------------------------------- +omega::CG_outputRepr *IR_roseIf::condition() const { + SgNode *tnl = isSgNode(isSgIfStmt(ti_)->get_conditional()); + SgExpression* exp = NULL; + if (SgExprStatement* stmt = isSgExprStatement(tnl)) + exp = stmt->get_expression(); + /* + SgExpression *op = iter(tnl); + if (iter.is_empty()) + throw ir_error("unrecognized if structure"); + tree_node *tn = iter.step(); + if (!iter.is_empty()) + throw ir_error("unrecognized if structure"); + if (!tn->is_instr()) + throw ir_error("unrecognized if structure"); + instruction *ins = static_cast<tree_instr *>(tn)->instr(); + if (!ins->opcode() == io_bfalse) + throw ir_error("unrecognized if structure"); + operand op = ins->src_op(0);*/ + if (exp == NULL) + return new omega::CG_roseRepr(tnl); + else + return new omega::CG_roseRepr(exp); +} + +IR_Block *IR_roseIf::then_body() const { + SgNode *tnl = isSgNode(isSgIfStmt(ti_)->get_true_body()); + + //tree_node_list *tnl = ti_->then_part(); + if (tnl == NULL) + return NULL; + /* + tree_node_list_iter iter(tnl); + if (iter.is_empty()) + return NULL; */ + + return new IR_roseBlock(ir_, tnl); +} + +IR_Block *IR_roseIf::else_body() const { + SgNode *tnl = isSgNode(isSgIfStmt(ti_)->get_false_body()); + + //tree_node_list *tnl = ti_->else_part(); + + if (tnl == NULL) + return NULL; + /* + tree_node_list_iter iter(tnl); + if (iter.is_empty()) + return NULL;*/ + + return new IR_roseBlock(ir_, tnl); +} + +IR_Block *IR_roseIf::convert() { + const IR_Code *ir = ir_; + /* SgNode *tnl = ti_->get_parent(); + SgNode *start, *end; + start = end = ti_; + + //tree_node_list *tnl = ti_->parent(); + //tree_node_list_e *start, *end; + //start = end = ti_->list_e(); + */ + delete this; + return new IR_roseBlock(ir, ti_); +} + +IR_Control *IR_roseIf::clone() const { + return new IR_roseIf(ir_, ti_); +} + +// -----------------------------------------------------------y----------------- +// Class: IR_roseCode_Global_Init +// ---------------------------------------------------------------------------- + +IR_roseCode_Global_Init *IR_roseCode_Global_Init::pinstance = 0; + +IR_roseCode_Global_Init * IR_roseCode_Global_Init::Instance(char** argv) { + if (pinstance == 0) { + pinstance = new IR_roseCode_Global_Init; + pinstance->project = frontend(2, argv); + + } + return pinstance; +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseCode +// ---------------------------------------------------------------------------- + +IR_roseCode::IR_roseCode(const char *filename, const char* proc_name) : + IR_Code() { + + SgProject* project; + + char* argv[2]; + int counter = 0; + argv[0] = (char*) malloc(5 * sizeof(char)); + argv[1] = (char*) malloc((strlen(filename) + 1) * sizeof(char)); + strcpy(argv[0], "rose"); + strcpy(argv[1], filename); + + project = (IR_roseCode_Global_Init::Instance(argv))->project; + //main_ssa = new ssa_unfiltered_cfg::SSA_UnfilteredCfg(project); + //main_ssa->run(); + firstScope = getFirstGlobalScope(project); + SgFilePtrList& file_list = project->get_fileList(); + + for (SgFilePtrList::iterator it = file_list.begin(); it != file_list.end(); + it++) { + file = isSgSourceFile(*it); + if (file->get_outputLanguage() == SgFile::e_Fortran_output_language) + is_fortran_ = true; + else + is_fortran_ = false; + + // Manu:: debug + // if (is_fortran_) + // std::cout << "Input is a fortran file\n"; + // else + // std::cout << "Input is a C file\n"; + + root = file->get_globalScope(); + + if (!is_fortran_) { // Manu:: this macro should not be created if the input code is in fortran + buildCpreprocessorDefineDeclaration(root, + "#define __rose_lt(x,y) ((x)<(y)?(x):(y))", + PreprocessingInfo::before); + buildCpreprocessorDefineDeclaration(root, + "#define __rose_gt(x,y) ((x)>(y)?(x):(y))", + PreprocessingInfo::before); + } + + symtab_ = isSgScopeStatement(root)->get_symbol_table(); + SgDeclarationStatementPtrList& declList = root->get_declarations(); + + p = declList.begin(); + + while (p != declList.end()) { + func = isSgFunctionDeclaration(*p); + if (func) { + if (!strcmp((func->get_name().getString()).c_str(), proc_name)) + break; + + } + p++; + counter++; + } + if (p != declList.end()) + break; + + } + + symtab2_ = func->get_definition()->get_symbol_table(); + symtab3_ = func->get_definition()->get_body()->get_symbol_table(); + // ocg_ = new omega::CG_roseBuilder(func->get_definition()->get_body()->get_symbol_table() , isSgNode(func->get_definition()->get_body())); + // Manu:: added is_fortran_ parameter + ocg_ = new omega::CG_roseBuilder(is_fortran_, root, firstScope, + func->get_definition()->get_symbol_table(), + func->get_definition()->get_body()->get_symbol_table(), + isSgNode(func->get_definition()->get_body())); + + i_ = 0; /*i_ handling may need revision */ + + free(argv[1]); + free(argv[0]); + +} + +IR_roseCode::~IR_roseCode() { +} + +void IR_roseCode::finalizeRose() { + // Moved this out of the deconstructor + // ???? + SgProject* project = (IR_roseCode_Global_Init::Instance(NULL))->project; + // -- Causes coredump. commented out for now -- // + // processes attributes left in Rose Ast + //postProcessRoseCodeInsertion(project); + project->unparse(); + //backend((IR_roseCode_Global_Init::Instance(NULL))->project); +} + +IR_ScalarSymbol *IR_roseCode::CreateScalarSymbol(const IR_Symbol *sym, int) { + char str1[14]; + if (typeid(*sym) == typeid(IR_roseScalarSymbol)) { + SgType *tn = + static_cast<const IR_roseScalarSymbol *>(sym)->vs_->get_type(); + sprintf(str1, "newVariable%i\0", i_); + SgVariableDeclaration* defn = buildVariableDeclaration(str1, tn); + i_++; + + SgInitializedNamePtrList& variables = defn->get_variables(); + SgInitializedNamePtrList::const_iterator i = variables.begin(); + SgInitializedName* initializedName = *i; + SgVariableSymbol* vs = new SgVariableSymbol(initializedName); + + prependStatement(defn, + isSgScopeStatement(func->get_definition()->get_body())); + vs->set_parent(symtab_); + symtab_->insert(str1, vs); + + if (vs == NULL) + throw ir_error("in CreateScalarSymbol: vs is NULL!!"); + + return new IR_roseScalarSymbol(this, vs); + } else if (typeid(*sym) == typeid(IR_roseArraySymbol)) { + SgType *tn1 = + static_cast<const IR_roseArraySymbol *>(sym)->vs_->get_type(); + while (isSgArrayType(tn1) || isSgPointerType(tn1)) { + if (isSgArrayType(tn1)) + tn1 = isSgArrayType(tn1)->get_base_type(); + else if (isSgPointerType(tn1)) + tn1 = isSgPointerType(tn1)->get_base_type(); + else + throw ir_error( + "in CreateScalarSymbol: symbol not an array nor a pointer!"); + } + + sprintf(str1, "newVariable%i\0", i_); + i_++; + + SgVariableDeclaration* defn1 = buildVariableDeclaration(str1, tn1); + SgInitializedNamePtrList& variables1 = defn1->get_variables(); + + SgInitializedNamePtrList::const_iterator i1 = variables1.begin(); + SgInitializedName* initializedName1 = *i1; + + SgVariableSymbol *vs1 = new SgVariableSymbol(initializedName1); + prependStatement(defn1, + isSgScopeStatement(func->get_definition()->get_body())); + + vs1->set_parent(symtab_); + symtab_->insert(str1, vs1); + + if (vs1 == NULL) + throw ir_error("in CreateScalarSymbol: vs1 is NULL!!"); + + return new IR_roseScalarSymbol(this, vs1); + } else + throw std::bad_typeid(); + +} + +IR_ArraySymbol *IR_roseCode::CreateArraySymbol(const IR_Symbol *sym, + std::vector<omega::CG_outputRepr *> &size, int) { + SgType *tn; + char str1[14]; + + if (typeid(*sym) == typeid(IR_roseScalarSymbol)) { + tn = static_cast<const IR_roseScalarSymbol *>(sym)->vs_->get_type(); + } else if (typeid(*sym) == typeid(IR_roseArraySymbol)) { + tn = static_cast<const IR_roseArraySymbol *>(sym)->vs_->get_type(); + while (isSgArrayType(tn) || isSgPointerType(tn)) { + if (isSgArrayType(tn)) + tn = isSgArrayType(tn)->get_base_type(); + else if (isSgPointerType(tn)) + tn = isSgPointerType(tn)->get_base_type(); + else + throw ir_error( + "in CreateScalarSymbol: symbol not an array nor a pointer!"); + } + } else + throw std::bad_typeid(); + + + // Manu:: Fortran support + std::vector<SgExpression *>exprs; + SgExprListExp *exprLstExp; + SgExpression* sizeExpression = new SgNullExpression(); + SgArrayType* arrayType = new SgArrayType(tn,sizeExpression); + sizeExpression->set_parent(arrayType); + + if (!is_fortran_) { + for (int i = size.size() - 1; i >= 0; i--) { + tn = buildArrayType(tn,static_cast<omega::CG_roseRepr *>(size[i])->GetExpression()); + } + } else { // Manu:: required for fortran support + for (int i = size.size() - 1; i >= 0; i--) { + exprs.push_back(static_cast<omega::CG_roseRepr *>(size[i])->GetExpression()); + } + } + + if (is_fortran_) { + exprLstExp = buildExprListExp(exprs); + arrayType->set_dim_info(exprLstExp); + exprLstExp->set_parent(arrayType); + arrayType->set_rank(exprLstExp->get_expressions().size()); + } + + static int rose_array_counter = 1; + SgVariableDeclaration* defn2; + std::string s; + if (!is_fortran_) { + s = std::string("_P") + omega::to_string(rose_array_counter++); + defn2 = buildVariableDeclaration(const_cast<char *>(s.c_str()), tn); + } else {// Manu:: fortran support + s = std::string("f_P") + omega::to_string(rose_array_counter++); + defn2 = buildVariableDeclaration(const_cast<char *>(s.c_str()), arrayType); + } + + + SgInitializedNamePtrList& variables2 = defn2->get_variables(); + + SgInitializedNamePtrList::const_iterator i2 = variables2.begin(); + SgInitializedName* initializedName2 = *i2; + SgVariableSymbol *vs = new SgVariableSymbol(initializedName2); + + prependStatement(defn2, + isSgScopeStatement(func->get_definition()->get_body())); + + vs->set_parent(symtab_); + symtab_->insert(SgName(s.c_str()), vs); + + return new IR_roseArraySymbol(this, vs); +} + +IR_ScalarRef *IR_roseCode::CreateScalarRef(const IR_ScalarSymbol *sym) { + return new IR_roseScalarRef(this, + buildVarRefExp(static_cast<const IR_roseScalarSymbol *>(sym)->vs_)); + +} + +IR_ArrayRef *IR_roseCode::CreateArrayRef(const IR_ArraySymbol *sym, + std::vector<omega::CG_outputRepr *> &index) { + + int t; + + if (sym->n_dim() != index.size()) + throw std::invalid_argument("incorrect array symbol dimensionality"); + + const IR_roseArraySymbol *l_sym = + static_cast<const IR_roseArraySymbol *>(sym); + + SgVariableSymbol *vs = l_sym->vs_; + SgExpression* ia1 = buildVarRefExp(vs); + + + + if (is_fortran_) { // Manu:: fortran support + std::vector<SgExpression *>exprs; + for (int i = 0 ; i < index.size(); i++) { + exprs.push_back(static_cast<omega::CG_roseRepr *>(index[i])->GetExpression()); + } + SgExprListExp *exprLstExp; + exprLstExp = buildExprListExp(exprs); + ia1 = buildPntrArrRefExp(ia1,exprLstExp); + } else { + for (int i = 0; i < index.size(); i++) { +/* + if (is_fortran_) + t = index.size() - i - 1; + else + t = i; +*/ + + // std::string y = + // isSgNode( + // static_cast<omega::CG_roseRepr *>(index[i])->GetExpression())->unparseToString(); + ia1 = buildPntrArrRefExp(ia1, + static_cast<omega::CG_roseRepr *>(index[i])->GetExpression()); + + } + } + + SgPntrArrRefExp *ia = isSgPntrArrRefExp(ia1); + //std::string z = isSgNode(ia)->unparseToString(); + + return new IR_roseArrayRef(this, ia, -1); + +} + +std::vector<IR_ScalarRef *> IR_roseCode::FindScalarRef( + const omega::CG_outputRepr *repr) const { + std::vector<IR_ScalarRef *> scalars; + SgNode *tnl = static_cast<const omega::CG_roseRepr *>(repr)->GetCode(); + SgStatementPtrList *list = + static_cast<const omega::CG_roseRepr *>(repr)->GetList(); + SgStatement* stmt; + SgExpression * exp; + + if (list != NULL) { + for (SgStatementPtrList::iterator it = (*list).begin(); + it != (*list).end(); it++) { + omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(*it)); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } + } + + else if (tnl != NULL) { + if (stmt = isSgStatement(tnl)) { + if (isSgBasicBlock(stmt)) { + SgStatementPtrList& stmts = + isSgBasicBlock(stmt)->get_statements(); + for (int i = 0; i < stmts.size(); i++) { + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgNode(stmts[i])); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } + + } else if (isSgForStatement(stmt)) { + + SgForStatement *tnf = isSgForStatement(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgStatement(tnf->get_loop_body())); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } else if (isSgFortranDo(stmt)) { + SgFortranDo *tfortran = isSgFortranDo(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgStatement(tfortran->get_body())); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } else if (isSgIfStmt(stmt)) { + SgIfStmt* tni = isSgIfStmt(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgNode(tni->get_conditional())); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + r = new omega::CG_roseRepr(isSgNode(tni->get_true_body())); + a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + r = new omega::CG_roseRepr(isSgNode(tni->get_false_body())); + a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } else if (isSgExprStatement(stmt)) { + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgExpression( + isSgExprStatement(stmt)->get_expression())); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + + } + } + } else { + SgExpression* op = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + if (isSgVarRefExp(op) + && (!isSgArrayType(isSgVarRefExp(op)->get_type()))) { + /* if ((isSgAssignOp(isSgNode(op)->get_parent())) + && ((isSgAssignOp(isSgNode(op)->get_parent())->get_lhs_operand()) + == op)) + scalars.push_back( + new IR_roseScalarRef(this, + isSgAssignOp(isSgNode(op)->get_parent()), -1)); + else + */ + if (SgBinaryOp* op_ = isSgBinaryOp( + isSgVarRefExp(op)->get_parent())) { + if (SgCompoundAssignOp *op__ = isSgCompoundAssignOp(op_)) { + if (isSgCompoundAssignOp(op_)->get_lhs_operand() + == isSgVarRefExp(op)) { + scalars.push_back( + new IR_roseScalarRef(this, isSgVarRefExp(op), + 1)); + scalars.push_back( + new IR_roseScalarRef(this, isSgVarRefExp(op), + 0)); + } + } + } else if (SgAssignOp* assmt = isSgAssignOp( + isSgVarRefExp(op)->get_parent())) { + + if (assmt->get_lhs_operand() == isSgVarRefExp(op)) + scalars.push_back( + new IR_roseScalarRef(this, isSgVarRefExp(op), 1)); + } else if (SgAssignOp * assmt = isSgAssignOp( + isSgVarRefExp(op)->get_parent())) { + + if (assmt->get_rhs_operand() == isSgVarRefExp(op)) + scalars.push_back( + new IR_roseScalarRef(this, isSgVarRefExp(op), 0)); + } else + scalars.push_back( + new IR_roseScalarRef(this, isSgVarRefExp(op), 0)); + } else if (isSgAssignOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgAssignOp(op)->get_lhs_operand()); + std::vector<IR_ScalarRef *> a1 = FindScalarRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(scalars)); + omega::CG_roseRepr *r2 = new omega::CG_roseRepr( + isSgAssignOp(op)->get_rhs_operand()); + std::vector<IR_ScalarRef *> a2 = FindScalarRef(r2); + delete r2; + std::copy(a2.begin(), a2.end(), back_inserter(scalars)); + + } else if (isSgBinaryOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgBinaryOp(op)->get_lhs_operand()); + std::vector<IR_ScalarRef *> a1 = FindScalarRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(scalars)); + omega::CG_roseRepr *r2 = new omega::CG_roseRepr( + isSgBinaryOp(op)->get_rhs_operand()); + std::vector<IR_ScalarRef *> a2 = FindScalarRef(r2); + delete r2; + std::copy(a2.begin(), a2.end(), back_inserter(scalars)); + } else if (isSgUnaryOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgUnaryOp(op)->get_operand()); + std::vector<IR_ScalarRef *> a1 = FindScalarRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(scalars)); + } + + } + return scalars; + +} + +std::vector<IR_ArrayRef *> IR_roseCode::FindArrayRef( + const omega::CG_outputRepr *repr) const { + std::vector<IR_ArrayRef *> arrays; + SgNode *tnl = static_cast<const omega::CG_roseRepr *>(repr)->GetCode(); + SgStatementPtrList* list = + static_cast<const omega::CG_roseRepr *>(repr)->GetList(); + SgStatement* stmt; + SgExpression * exp; + + if (list != NULL) { + for (SgStatementPtrList::iterator it = (*list).begin(); + it != (*list).end(); it++) { + omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(*it)); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } + } else if (tnl != NULL) { + if (stmt = isSgStatement(tnl)) { + if (isSgBasicBlock(stmt)) { + SgStatementPtrList& stmts = + isSgBasicBlock(stmt)->get_statements(); + for (int i = 0; i < stmts.size(); i++) { + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgNode(stmts[i])); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } + + } else if (isSgForStatement(stmt)) { + + SgForStatement *tnf = isSgForStatement(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgStatement(tnf->get_loop_body())); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } else if (isSgFortranDo(stmt)) { + SgFortranDo *tfortran = isSgFortranDo(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgStatement(tfortran->get_body())); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } else if (isSgIfStmt(stmt)) { + SgIfStmt* tni = isSgIfStmt(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgNode(tni->get_conditional())); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + r = new omega::CG_roseRepr(isSgNode(tni->get_true_body())); + a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + r = new omega::CG_roseRepr(isSgNode(tni->get_false_body())); + a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } else if (isSgExprStatement(stmt)) { + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgExpression( + isSgExprStatement(stmt)->get_expression())); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + + } + } + } else { + SgExpression* op = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + if (isSgPntrArrRefExp(op)) { + + SgVarRefExp* base; + SgExpression* op2; + if (isSgCompoundAssignOp(isSgPntrArrRefExp(op)->get_parent())) { + IR_roseArrayRef *ref1 = new IR_roseArrayRef(this, + isSgPntrArrRefExp(op), 0); + arrays.push_back(ref1); + IR_roseArrayRef *ref2 = new IR_roseArrayRef(this, + isSgPntrArrRefExp(op), 1); + arrays.push_back(ref2); + } else { + IR_roseArrayRef *ref3 = new IR_roseArrayRef(this, + isSgPntrArrRefExp(op), -1); + arrays.push_back(ref3); + + while (isSgPntrArrRefExp(op)) { + op2 = isSgPntrArrRefExp(op)->get_rhs_operand(); + op = isSgPntrArrRefExp(op)->get_lhs_operand(); + omega::CG_roseRepr *r = new omega::CG_roseRepr(op2); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + + } + } + /* base = isSgVarRefExp(op); + SgVariableSymbol *arrSymbol = (SgVariableSymbol*)(base->get_symbol()); + SgArrayType *arrType = isSgArrayType(arrSymbol->get_type()); + + SgExprListExp* dimList = arrType->get_dim_info(); + + if(dimList != NULL){ + SgExpressionPtrList::iterator it = dimList->get_expressions().begin(); + SgExpression *expr; + + + for (int i = 0; it != dimList->get_expressions().end(); it++, i++) + { + expr = *it; + + omega::CG_roseRepr *r = new omega::CG_roseRepr(expr); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } + + } + arrays.push_back(ref); + */ + } else if (isSgAssignOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgAssignOp(op)->get_lhs_operand()); + std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(arrays)); + omega::CG_roseRepr *r2 = new omega::CG_roseRepr( + isSgAssignOp(op)->get_rhs_operand()); + std::vector<IR_ArrayRef *> a2 = FindArrayRef(r2); + delete r2; + std::copy(a2.begin(), a2.end(), back_inserter(arrays)); + + } else if (isSgBinaryOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgBinaryOp(op)->get_lhs_operand()); + std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(arrays)); + omega::CG_roseRepr *r2 = new omega::CG_roseRepr( + isSgBinaryOp(op)->get_rhs_operand()); + std::vector<IR_ArrayRef *> a2 = FindArrayRef(r2); + delete r2; + std::copy(a2.begin(), a2.end(), back_inserter(arrays)); + } else if (isSgUnaryOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgUnaryOp(op)->get_operand()); + std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(arrays)); + } + + } + return arrays; + + /* std::string x; + SgStatement* stmt = isSgStatement(tnl); + SGExprStatement* expr_statement = isSgExprStatement(stmt); + SgExpression* exp= NULL; + if(expr_statement == NULL){ + if(! (SgExpression* exp = isSgExpression(tnl)) + throw ir_error("FindArrayRef: Not a stmt nor an expression!!"); + + if( expr_statement != NULL){ + for(int i=0; i < tnl->get_numberOfTraversalSuccessors(); i++){ + + SgNode* tn = isSgStatement(tnl); + SgStatement* stmt = isSgStatement(tn); + if(stmt != NULL){ + SgExprStatement* expr_statement = isSgExprStatement(tn); + if(expr_statement != NULL) + x = isSgNode(expr_statement)->unparseToString(); + exp = expr_statement->get_expression(); + + } + else{ + + exp = isSgExpression(tn); + } + if(exp != NULL){ + x = isSgNode(exp)->unparseToString(); + + if(SgPntrArrRefExp* arrRef = isSgPntrArrRefExp(exp) ){ + if(arrRef == NULL) + throw ir_error("something wrong"); + IR_roseArrayRef *ref = new IR_roseArrayRef(this, arrRef); + arrays.push_back(ref); + } + + omega::CG_outputRepr *r = new omega::CG_roseRepr(isSgNode(exp->get_rhs_operand())); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + + omega::CG_outputRepr *r1 = new omega::CG_roseRepr(isSgNode(exp->get_lhs_operand())); + std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(arrays)); + + } + }*/ + +} + +std::vector<IR_Control *> IR_roseCode::FindOneLevelControlStructure( + const IR_Block *block) const { + + std::vector<IR_Control *> controls; + int i; + int j; + int begin; + int end; + SgNode* tnl_ = + ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_); + + if (isSgForStatement(tnl_)) + controls.push_back(new IR_roseLoop(this, tnl_)); + else if (isSgFortranDo(tnl_)) + controls.push_back(new IR_roseLoop(this, tnl_)); + else if (isSgIfStmt(tnl_)) + controls.push_back(new IR_roseIf(this, tnl_)); + + else if (isSgBasicBlock(tnl_)) { + + SgStatementPtrList& stmts = isSgBasicBlock(tnl_)->get_statements(); + + for (i = 0; i < stmts.size(); i++) { + if (isSgNode(stmts[i]) + == ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->start_)) + begin = i; + if (isSgNode(stmts[i]) + == ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->end_)) + end = i; + } + + SgNode* start = NULL; + SgNode* prev = NULL; + for (i = begin; i <= end; i++) { + if (isSgForStatement(stmts[i]) || isSgFortranDo(stmts[i])) { + if (start != NULL) { + controls.push_back( + new IR_roseBlock(this, + (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_, + start, prev)); + start = NULL; + } + controls.push_back(new IR_roseLoop(this, isSgNode(stmts[i]))); + } else if (isSgIfStmt(stmts[i])) { + if (start != NULL) { + controls.push_back( + new IR_roseBlock(this, + (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_, + start, prev)); + start = NULL; + } + controls.push_back(new IR_roseIf(this, isSgNode(stmts[i]))); + + } else if (start == NULL) + start = isSgNode(stmts[i]); + + prev = isSgNode(stmts[i]); + } + + if ((start != NULL) && (start != isSgNode(stmts[begin]))) + controls.push_back( + new IR_roseBlock(this, + (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_, + start, prev)); + } + + return controls; + +} + +/*std::vector<IR_Control *> IR_roseCode::FindOneLevelControlStructure(const IR_Block *block) const { + + std::vector<IR_Control *> controls; + int i; + int j; + SgNode* tnl_ = ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_); + + + if(isSgForStatement(tnl_)) + controls.push_back(new IR_roseLoop(this,tnl_)); + + else if(isSgBasicBlock(tnl_)){ + + SgStatementPtrList& stmts = isSgBasicBlock(tnl_)->get_statements(); + + for(i =0; i < stmts.size(); i++){ + if(isSgNode(stmts[i]) == ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->start_)) + break; + } + + + SgNode* start= NULL; + SgNode* prev= NULL; + for(; i < stmts.size(); i++){ + if ( isSgForStatement(stmts[i]) || isSgFortranDo(stmts[i])){ + if(start != NULL){ + controls.push_back(new IR_roseBlock(this, (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_ , start, prev)); + start = NULL; + } + controls.push_back(new IR_roseLoop(this, isSgNode(stmts[i]))); + } + else if( start == NULL ) + start = isSgNode(stmts[i]); + + prev = isSgNode(stmts[i]); + } + + if((start != NULL) && (start != isSgNode(stmts[0]))) + controls.push_back(new IR_roseBlock(this, (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_, start, prev)); + } + + return controls; + + } + +*/ +IR_Block *IR_roseCode::MergeNeighboringControlStructures( + const std::vector<IR_Control *> &controls) const { + if (controls.size() == 0) + return NULL; + + SgNode *tnl = NULL; + SgNode *start, *end; + for (int i = 0; i < controls.size(); i++) { + switch (controls[i]->type()) { + case IR_CONTROL_LOOP: { + SgNode *tf = static_cast<IR_roseLoop *>(controls[i])->tf_; + if (tnl == NULL) { + tnl = tf->get_parent(); + start = end = tf; + } else { + if (tnl != tf->get_parent()) + throw ir_error("controls to merge not at the same level"); + end = tf; + } + break; + } + case IR_CONTROL_BLOCK: { + if (tnl == NULL) { + tnl = static_cast<IR_roseBlock *>(controls[0])->tnl_; + start = static_cast<IR_roseBlock *>(controls[0])->start_; + end = static_cast<IR_roseBlock *>(controls[0])->end_; + } else { + if (tnl != static_cast<IR_roseBlock *>(controls[0])->tnl_) + throw ir_error("controls to merge not at the same level"); + end = static_cast<IR_roseBlock *>(controls[0])->end_; + } + break; + } + default: + throw ir_error("unrecognized control to merge"); + } + } + + return new IR_roseBlock(controls[0]->ir_, tnl, start, end); +} + +IR_Block *IR_roseCode::GetCode() const { + SgFunctionDefinition* def = NULL; + SgBasicBlock* block = NULL; + if (func != 0) { + if (def = func->get_definition()) { + if (block = def->get_body()) + return new IR_roseBlock(this, + func->get_definition()->get_body()); + } + } + + return NULL; + +} + +void IR_roseCode::ReplaceCode(IR_Control *old, omega::CG_outputRepr *repr) { + /* SgStatementPtrList *tnl = + static_cast<omega::CG_roseRepr *>(repr)->GetList(); + SgNode *tf_old; + */ + SgStatementPtrList *tnl = + static_cast<omega::CG_roseRepr *>(repr)->GetList(); + SgNode* node_ = static_cast<omega::CG_roseRepr *>(repr)->GetCode(); + SgNode * tf_old; + + /* May need future revision it tnl has more than one statement */ + + switch (old->type()) { + + case IR_CONTROL_LOOP: + tf_old = static_cast<IR_roseLoop *>(old)->tf_; + break; + case IR_CONTROL_BLOCK: + tf_old = static_cast<IR_roseBlock *>(old)->start_; + break; + + default: + throw ir_error("control structure to be replaced not supported"); + break; + } + + std::string y = tf_old->unparseToString(); + SgStatement *s = isSgStatement(tf_old); + if (s != 0) { + SgStatement *p = isSgStatement(tf_old->get_parent()); + + if (p != 0) { + SgStatement* temp = s; + if (tnl != NULL) { + SgStatementPtrList::iterator it = (*tnl).begin(); + p->insert_statement(temp, *it, true); + temp = *it; + p->remove_statement(s); + it++; + for (; it != (*tnl).end(); it++) { + p->insert_statement(temp, *it, false); + temp = *it; + } + } else if (node_ != NULL) { + if (!isSgStatement(node_)) + throw ir_error("Replacing Code not a statement!"); + else { + SgStatement* replace_ = isSgStatement(node_); + p->insert_statement(s, replace_, true); + p->remove_statement(s); + + } + } else { + throw ir_error("Replacing Code not a statement!"); + } + } else + throw ir_error("Replacing Code not a statement!"); + } else + throw ir_error("Replacing Code not a statement!"); + + delete old; + delete repr; + /* May need future revision it tnl has more than one statement */ + /* + switch (old->type()) { + + case IR_CONTROL_LOOP: + tf_old = static_cast<IR_roseLoop *>(old)->tf_; + break; + case IR_CONTROL_BLOCK: + tf_old = static_cast<IR_roseBlock *>(old)->start_; + break; + + default: + throw ir_error("control structure to be replaced not supported"); + break; + } + + // std::string y = tf_old->unparseToString(); + SgStatement *s = isSgStatement(tf_old); + if (s != 0) { + SgStatement *p = isSgStatement(tf_old->get_parent()); + + if (p != 0) { + // SgStatement* it2 = isSgStatement(tnl); + + // if(it2 != NULL){ + p->replace_statement(s, *tnl); + // } + // else { + // throw ir_error("Replacing Code not a statement!"); + // } + } else + throw ir_error("Replacing Code not a statement!"); + } else + throw ir_error("Replacing Code not a statement!"); + // y = tnl->unparseToString(); + delete old; + delete repr; + */ +} + +void IR_roseCode::ReplaceExpression(IR_Ref *old, omega::CG_outputRepr *repr) { + + SgExpression* op = static_cast<omega::CG_roseRepr *>(repr)->GetExpression(); + + if (typeid(*old) == typeid(IR_roseArrayRef)) { + SgPntrArrRefExp* ia_orig = static_cast<IR_roseArrayRef *>(old)->ia_; + SgExpression* parent = isSgExpression(isSgNode(ia_orig)->get_parent()); + std::string x = isSgNode(op)->unparseToString(); + std::string y = isSgNode(ia_orig)->unparseToString(); + if (parent != NULL) { + std::string z = isSgNode(parent)->unparseToString(); + parent->replace_expression(ia_orig, op); + isSgNode(op)->set_parent(isSgNode(parent)); + + /* if(isSgBinaryOp(parent)) + { + if(isSgBinaryOp(parent)->get_lhs_operand() == ia_orig){ + isSgBinaryOp(parent)->set_lhs_operand(op); + }else if(isSgBinaryOp(parent)->get_rhs_operand() == ia_orig){ + isSgBinaryOp(parent)->set_rhs_operand(op); + + + } + else + parent->replace_expression(ia_orig, op); + */ + } else { + SgStatement* parent_stmt = isSgStatement( + isSgNode(ia_orig)->get_parent()); + if (parent_stmt != NULL) + parent_stmt->replace_expression(ia_orig, op); + else + throw ir_error( + "ReplaceExpression: parent neither expression nor statement"); + } + } else + throw ir_error("replacing a scalar variable not implemented"); + + delete old; +} + +/*std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > IR_roseCode::FindScalarDeps( + const omega::CG_outputRepr *repr1, const omega::CG_outputRepr *repr2, + std::vector<std::string> index, int i, int j) { + + std::vector<DependenceVector> dvs1; + std::vector<DependenceVector> dvs2; + SgNode *tnl_1 = static_cast<const omega::CG_roseRepr *>(repr1)->GetCode(); + SgNode *tnl_2 = static_cast<const omega::CG_roseRepr *>(repr2)->GetCode(); + SgStatementPtrList* list_1 = + static_cast<const omega::CG_roseRepr *>(repr1)->GetList(); + SgStatementPtrList output_list_1; + + std::map<SgVarRefExp*, IR_ScalarRef*> read_scalars_1; + std::map<SgVarRefExp*, IR_ScalarRef*> write_scalars_1; + std::set<std::string> indices; + //std::set<VirtualCFG::CFGNode> reaching_defs_1; + std::set<std::string> def_vars_1; + + populateLists(tnl_1, list_1, output_list_1); + populateScalars(repr1, read_scalars_1, write_scalars_1, indices, index); + //def_vars_1); + //findDefinitions(output_list_1, reaching_defs_1, write_scalars_1); + //def_vars_1); + if (repr1 == repr2) + checkSelfDependency(output_list_1, dvs1, read_scalars_1, + write_scalars_1, index, i, j); + else { + SgStatementPtrList* list_2 = + static_cast<const omega::CG_roseRepr *>(repr2)->GetList(); + SgStatementPtrList output_list_2; + + std::map<SgVarRefExp*, IR_ScalarRef*> read_scalars_2; + std::map<SgVarRefExp*, IR_ScalarRef*> write_scalars_2; + //std::set<VirtualCFG::CFGNode> reaching_defs_2; + std::set<std::string> def_vars_2; + + populateLists(tnl_2, list_2, output_list_2); + populateScalars(repr2, read_scalars_2, write_scalars_2, indices, index); + //def_vars_2); + + checkDependency(output_list_2, dvs1, read_scalars_2, write_scalars_1, + index, i, j); + checkDependency(output_list_1, dvs1, read_scalars_1, write_scalars_2, + index, i, j); + checkWriteDependency(output_list_2, dvs1, write_scalars_2, + write_scalars_1, index, i, j); + checkWriteDependency(output_list_1, dvs1, write_scalars_1, + write_scalars_2, index, i, j); + } + + return std::make_pair(dvs1, dvs2); + //populateLists(tnl_2, list_2, list2); + + } +*/ +IR_OPERATION_TYPE IR_roseCode::QueryExpOperation( + const omega::CG_outputRepr *repr) const { + SgExpression* op = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + + if (isSgValueExp(op)) + return IR_OP_CONSTANT; + else if (isSgVarRefExp(op) || isSgPntrArrRefExp(op)) + return IR_OP_VARIABLE; + else if (isSgAssignOp(op) || isSgCompoundAssignOp(op)) + return IR_OP_ASSIGNMENT; + else if (isSgAddOp(op)) + return IR_OP_PLUS; + else if (isSgSubtractOp(op)) + return IR_OP_MINUS; + else if (isSgMultiplyOp(op)) + return IR_OP_MULTIPLY; + else if (isSgDivideOp(op)) + return IR_OP_DIVIDE; + else if (isSgMinusOp(op)) + return IR_OP_NEGATIVE; + else if (isSgConditionalExp(op)) { + SgExpression* cond = isSgConditionalExp(op)->get_conditional_exp(); + if (isSgGreaterThanOp(cond)) + return IR_OP_MAX; + else if (isSgLessThanOp(cond)) + return IR_OP_MIN; + } else if (isSgUnaryAddOp(op)) + return IR_OP_POSITIVE; + else if (isSgNullExpression(op)) + return IR_OP_NULL; + else + return IR_OP_UNKNOWN; +} +/*void IR_roseCode::populateLists(SgNode* tnl_1, SgStatementPtrList* list_1, + SgStatementPtrList& output_list_1) { + if ((tnl_1 == NULL) && (list_1 != NULL)) { + output_list_1 = *list_1; + } else if (tnl_1 != NULL) { + + if (isSgForStatement(tnl_1)) { + SgStatement* check = isSgForStatement(tnl_1)->get_loop_body(); + if (isSgBasicBlock(check)) { + output_list_1 = isSgBasicBlock(check)->get_statements(); + + } else + output_list_1.push_back(check); + + } else if (isSgBasicBlock(tnl_1)) + output_list_1 = isSgBasicBlock(tnl_1)->get_statements(); + else if (isSgExprStatement(tnl_1)) + output_list_1.push_back(isSgExprStatement(tnl_1)); + else + //if (isSgIfStmt(tnl_1)) { + + throw ir_error( + "Statement type not handled, (probably IF statement)!!"); + + } + + } + + void IR_roseCode::populateScalars(const omega::CG_outputRepr *repr1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::set<std::string> &indices, std::vector<std::string> &index) { + + //std::set<std::string> &def_vars) { + std::vector<IR_ScalarRef *> scalars = FindScalarRef(repr1); + + for (int k = 0; k < index.size(); k++) + indices.insert(index[k]); + + for (int k = 0; k < scalars.size(); k++) + if (indices.find(scalars[k]->name()) == indices.end()) { + if (scalars[k]->is_write()) { + write_scalars_1.insert( + std::pair<SgVarRefExp*, IR_ScalarRef*>( + (isSgVarRefExp( + static_cast<const omega::CG_roseRepr *>(scalars[k]->convert())->GetExpression())), + scalars[k])); + + } else + + read_scalars_1.insert( + std::pair<SgVarRefExp*, IR_ScalarRef*>( + (isSgVarRefExp( + static_cast<const omega::CG_roseRepr *>(scalars[k]->convert())->GetExpression())), + scalars[k])); + } + + } + + + void IR_roseCode::checkWriteDependency(SgStatementPtrList &output_list_1, + std::vector<DependenceVector> &dvs1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::vector<std::string> &index, int i, int j) { + + for (std::map<SgVarRefExp*, IR_ScalarRef*>::iterator it = + read_scalars_1.begin(); it != read_scalars_1.end(); it++) { + SgVarRefExp* var__ = it->first; + + ssa_unfiltered_cfg::SSA_UnfilteredCfg::NodeReachingDefTable to_compare = + main_ssa->getReachingDefsBefore(isSgNode(var__)); + + for (ssa_unfiltered_cfg::SSA_UnfilteredCfg::NodeReachingDefTable::iterator it4 = + to_compare.begin(); it4 != to_compare.end(); it4++) { + ssa_unfiltered_cfg::SSA_UnfilteredCfg::VarName var_ = it4->first; + for (int j = 0; j < var_.size(); j++) { + int found = 0; + if (var_[j] == var__->get_symbol()->get_declaration()) { + + ssa_unfiltered_cfg::ReachingDef::ReachingDefPtr to_compare_2 = + it4->second; + + if (to_compare_2->isPhiFunction()) { + std::set<VirtualCFG::CFGNode> to_compare_set = + to_compare_2->getActualDefinitions(); + for (std::set<VirtualCFG::CFGNode>::iterator cfg_it = + to_compare_set.begin(); + cfg_it != to_compare_set.end(); cfg_it++) { + + if (isSgAssignOp(cfg_it->getNode()) + || isSgCompoundAssignOp(cfg_it->getNode())) + if (SgVarRefExp* variable = + isSgVarRefExp( + isSgBinaryOp(cfg_it->getNode())->get_lhs_operand())) { + + if (write_scalars_1.find(variable) + != write_scalars_1.end()) { + + + //end debug + found = 1; + DependenceVector dv1; + dv1.sym = it->second->symbol(); + dv1.is_scalar_dependence = true; + + int max = (j > i) ? j : i; + int start = index.size() - max; + + //1.lbounds.push_back(0); + //1.ubounds.push_back(0); + //dv2.sym = + // read_scalars_2.find(*di)->second->symbol(); + for (int k = 0; k < index.size(); k++) { + if (k >= max) { + dv1.lbounds.push_back( + negInfinity); + dv1.ubounds.push_back(-1); + } else { + dv1.lbounds.push_back(0); + dv1.ubounds.push_back(0); + + } + + } + dvs1.push_back(dv1); + break; + } + } + } + + } + + } + if (found == 1) + break; + } + } + } + } + void IR_roseCode::checkDependency(SgStatementPtrList &output_list_1, + std::vector<DependenceVector> &dvs1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::vector<std::string> &index, int i, int j) { + + for (SgStatementPtrList::iterator it2 = output_list_1.begin(); + it2 != output_list_1.end(); it2++) { + + std::set<SgVarRefExp*> vars_1 = main_ssa->getUsesAtNode( + isSgNode(isSgExprStatement(*it2)->get_expression())); + + std::set<SgVarRefExp*>::iterator di; + + for (di = vars_1.begin(); di != vars_1.end(); di++) { + int found = 0; + if (read_scalars_1.find(*di) != read_scalars_1.end()) { + + ssa_unfiltered_cfg::ReachingDef::ReachingDefPtr to_compare = + main_ssa->getDefinitionForUse(*di); + if (to_compare->isPhiFunction()) { + + std::set<VirtualCFG::CFGNode> to_compare_set = + to_compare->getActualDefinitions(); + + for (std::set<VirtualCFG::CFGNode>::iterator cfg_it = + to_compare_set.begin(); + cfg_it != to_compare_set.end(); cfg_it++) { + + + if (SgAssignOp* definition = isSgAssignOp( + cfg_it->getNode())) + if (SgVarRefExp* variable = isSgVarRefExp( + definition->get_lhs_operand())) { + + if (write_scalars_1.find(variable) + != write_scalars_1.end()) { + + found = 1; + DependenceVector dv1; + //DependenceVector dv2; + dv1.sym = + read_scalars_1.find(*di)->second->symbol(); + dv1.is_scalar_dependence = true; + + int max = (j > i) ? j : i; + int start = index.size() - max; + + //1.lbounds.push_back(0); + //1.ubounds.push_back(0); + //dv2.sym = + // read_scalars_2.find(*di)->second->symbol(); + for (int k = 0; k < index.size(); k++) { + if (k >= max) { + dv1.lbounds.push_back(negInfinity); + dv1.ubounds.push_back(-1); + } else { + dv1.lbounds.push_back(0); + dv1.ubounds.push_back(0); + + } + + } + dvs1.push_back(dv1); + break; + } + } + } + } + if (found == 1) + break; + } + } + } + + } + + void IR_roseCode::checkSelfDependency(SgStatementPtrList &output_list_1, + std::vector<DependenceVector> &dvs1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::vector<std::string> &index, int i, int j) { + + for (SgStatementPtrList::iterator it2 = output_list_1.begin(); + it2 != output_list_1.end(); it2++) { + + std::set<SgVarRefExp*> vars_1 = main_ssa->getUsesAtNode( + isSgNode(isSgExprStatement(*it2)->get_expression())); + + std::set<SgVarRefExp*>::iterator di; + + for (di = vars_1.begin(); di != vars_1.end(); di++) { + + if (read_scalars_1.find(*di) != read_scalars_1.end()) { + + ssa_unfiltered_cfg::ReachingDef::ReachingDefPtr to_compare = + main_ssa->getDefinitionForUse(*di); + if (to_compare->isPhiFunction()) { + + std::set<VirtualCFG::CFGNode> to_compare_set = + to_compare->getActualDefinitions(); + int found = 0; + for (std::set<VirtualCFG::CFGNode>::iterator cfg_it = + to_compare_set.begin(); + cfg_it != to_compare_set.end(); cfg_it++) { + + if (isSgAssignOp(cfg_it->getNode()) + || isSgCompoundAssignOp(cfg_it->getNode())) + if (SgVarRefExp* variable = + isSgVarRefExp( + isSgBinaryOp(cfg_it->getNode())->get_lhs_operand())) { + + if (write_scalars_1.find(variable) + == write_scalars_1.end()) { + + + found = 1; + DependenceVector dv1; + dv1.sym = + read_scalars_1.find(*di)->second->symbol(); + dv1.is_scalar_dependence = true; + + int max = (j > i) ? j : i; + int start = index.size() - max; + + //1.lbounds.push_back(0); + //1.ubounds.push_back(0); + //dv2.sym = + // read_scalars_2.find(*di)->second->symbol(); + for (int k = 0; k < index.size(); k++) { + if (k >= max) { + dv1.lbounds.push_back(negInfinity); + dv1.ubounds.push_back(-1); + } else { + dv1.lbounds.push_back(0); + dv1.ubounds.push_back(0); + + } + + } + dvs1.push_back(dv1); + break; + } + } + } + } + + } + } + } + + } +*/ +IR_CONDITION_TYPE IR_roseCode::QueryBooleanExpOperation( + const omega::CG_outputRepr *repr) const { + SgExpression* op2 = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + SgNode* op; + + if (op2 == NULL) { + op = static_cast<const omega::CG_roseRepr *>(repr)->GetCode(); + + if (op != NULL) { + if (isSgExprStatement(op)) + op2 = isSgExprStatement(op)->get_expression(); + else + return IR_COND_UNKNOWN; + } else + return IR_COND_UNKNOWN; + } + + if (isSgEqualityOp(op2)) + return IR_COND_EQ; + else if (isSgNotEqualOp(op2)) + return IR_COND_NE; + else if (isSgLessThanOp(op2)) + return IR_COND_LT; + else if (isSgLessOrEqualOp(op2)) + return IR_COND_LE; + else if (isSgGreaterThanOp(op2)) + return IR_COND_GT; + else if (isSgGreaterOrEqualOp(op2)) + return IR_COND_GE; + + return IR_COND_UNKNOWN; + +} + +std::vector<omega::CG_outputRepr *> IR_roseCode::QueryExpOperand( + const omega::CG_outputRepr *repr) const { + std::vector<omega::CG_outputRepr *> v; + SgExpression* op1; + SgExpression* op2; + SgExpression* op = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + omega::CG_roseRepr *repr1; + + if (isSgValueExp(op) || isSgVarRefExp(op)) { + omega::CG_roseRepr *repr = new omega::CG_roseRepr(op); + v.push_back(repr); + } else if (isSgAssignOp(op)) { + op1 = isSgAssignOp(op)->get_rhs_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + /*may be a problem as assignOp is a binaryop destop might be needed */ + } else if (isSgMinusOp(op)) { + op1 = isSgMinusOp(op)->get_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + } else if (isSgUnaryAddOp(op)) { + op1 = isSgUnaryAddOp(op)->get_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + } else if ((isSgAddOp(op) || isSgSubtractOp(op)) + || (isSgMultiplyOp(op) || isSgDivideOp(op))) { + op1 = isSgBinaryOp(op)->get_lhs_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + + op2 = isSgBinaryOp(op)->get_rhs_operand(); + repr1 = new omega::CG_roseRepr(op2); + v.push_back(repr1); + } else if (isSgConditionalExp(op)) { + SgExpression* cond = isSgConditionalExp(op)->get_conditional_exp(); + op1 = isSgBinaryOp(cond)->get_lhs_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + + op2 = isSgBinaryOp(cond)->get_rhs_operand(); + repr1 = new omega::CG_roseRepr(op2); + v.push_back(repr1); + } else if (isSgCompoundAssignOp(op)) { + SgExpression* cond = isSgCompoundAssignOp(op); + op1 = isSgBinaryOp(cond)->get_lhs_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + + op2 = isSgBinaryOp(cond)->get_rhs_operand(); + repr1 = new omega::CG_roseRepr(op2); + v.push_back(repr1); + + } else if (isSgBinaryOp(op)) { + + op1 = isSgBinaryOp(op)->get_lhs_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + + op2 = isSgBinaryOp(op)->get_rhs_operand(); + repr1 = new omega::CG_roseRepr(op2); + v.push_back(repr1); + } + + else + throw ir_error("operation not supported"); + + return v; +} + +IR_Ref *IR_roseCode::Repr2Ref(const omega::CG_outputRepr *repr) const { + SgExpression* op = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + + if (SgValueExp* im = isSgValueExp(op)) { + if (isSgIntVal(im)) + return new IR_roseConstantRef(this, + static_cast<omega::coef_t>(isSgIntVal(im)->get_value())); + else if (isSgUnsignedIntVal(im)) + return new IR_roseConstantRef(this, + static_cast<omega::coef_t>(isSgUnsignedIntVal(im)->get_value())); + else if (isSgLongIntVal(im)) + return new IR_roseConstantRef(this, + static_cast<omega::coef_t>(isSgLongIntVal(im)->get_value())); + else if (isSgFloatVal(im)) + return new IR_roseConstantRef(this, isSgFloatVal(im)->get_value()); + else + assert(0); + + } else if (isSgVarRefExp(op)) + return new IR_roseScalarRef(this, isSgVarRefExp(op)); + else + assert(0); + +} + diff --git a/chill/src/ir_rose_utils.cc b/chill/src/ir_rose_utils.cc new file mode 100644 index 0000000..fbce2f1 --- /dev/null +++ b/chill/src/ir_rose_utils.cc @@ -0,0 +1,88 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009 University of Utah + All Rights Reserved. + + Purpose: + SUIF interface utilities. + + Notes: + + Update history: + 01/2006 created by Chun Chen +*****************************************************************************/ + +//#include <suif1.h> +//#include <useful.h> +//#include <vector> +//#include <algorithm> +//#include <code_gen/CG_suifRepr.h> +#include "ir_rose_utils.hh" + + + +std::vector<SgForStatement *> find_loops(SgNode *tnl) { + std::vector<SgForStatement *> result; + + //tree_node_list_iter iter(tnl); + + /*while (!iter.is_empty()) { + tree_node *tn = iter.step(); + if (tn->kind() == TREE_FOR) + result.push_back(static_cast<tree_for *>(tn)); + } + */ + + SgStatementPtrList& blockStatements = isSgBasicBlock(tnl)->get_statements(); + for(SgStatementPtrList::const_iterator j = blockStatements.begin(); j != blockStatements.end(); j++) + if(isSgForStatement(*j)) + result.push_back(isSgForStatement(*j)); + + return result; +} + +std::vector<SgForStatement *> find_deepest_loops(SgStatementPtrList& tnl) { + + std::vector<SgForStatement *> loops; + + + + for(SgStatementPtrList::const_iterator j = tnl.begin(); j != tnl.end(); j++) + { + std::vector<SgForStatement *> t = find_deepest_loops(isSgNode(*j)); + if (t.size() > loops.size()) + loops = t; + } + + + + return loops; + +} + + + + + + + + +std::vector<SgForStatement *> find_deepest_loops(SgNode *tn) { + if (isSgForStatement(tn)) { + std::vector<SgForStatement *> loops; + + SgForStatement *tnf = static_cast<SgForStatement*>(tn); + loops.insert(loops.end(), tnf); + std::vector<SgForStatement*> t = find_deepest_loops(isSgNode(tnf->get_loop_body())); + std::copy(t.begin(), t.end(), std::back_inserter(loops)); + + return loops; + } + else if (isSgBasicBlock(tn)) { + SgBasicBlock *tnb = static_cast<SgBasicBlock*>(tn); + return find_deepest_loops(tnb->get_statements()); + } + else + return std::vector<SgForStatement *>(); +} + diff --git a/chill/src/irtools.cc b/chill/src/irtools.cc new file mode 100644 index 0000000..4ab6c85 --- /dev/null +++ b/chill/src/irtools.cc @@ -0,0 +1,279 @@ +/***************************************************************************** + Copyright (C) 2010 University of Utah + All Rights Reserved. + + Purpose: + Useful tools to analyze code in compiler IR format. + + Notes: + + History: + 06/2010 Created by Chun Chen. +*****************************************************************************/ + +#include <iostream> +#include <code_gen/CG_outputBuilder.h> +#include "irtools.hh" +#include "omegatools.hh" +#include "chill_error.hh" + +using namespace omega; + +// Build IR tree from the source code. Block type node can only be +// leaf, i.e., there is no further structures inside a block allowed. +std::vector<ir_tree_node *> build_ir_tree(IR_Control *control, ir_tree_node *parent) { + std::vector<ir_tree_node *> result; + + switch (control->type()) { + case IR_CONTROL_BLOCK: { + std::vector<IR_Control *> controls = control->ir_->FindOneLevelControlStructure(static_cast<IR_Block *>(control)); + if (controls.size() == 0) { + ir_tree_node *node = new ir_tree_node; + node->content = control; + node->parent = parent; + node->payload = -1; + result.push_back(node); + } + else { + delete control; + for (int i = 0; i < controls.size(); i++) + switch (controls[i]->type()) { + case IR_CONTROL_BLOCK: { + std::vector<ir_tree_node *> t = build_ir_tree(controls[i], parent); + result.insert(result.end(), t.begin(), t.end()); + break; + } + case IR_CONTROL_LOOP: { + ir_tree_node *node = new ir_tree_node; + node->content = controls[i]; + node->parent = parent; + node->children = build_ir_tree(static_cast<IR_Loop *>(controls[i])->body(), node); + node->payload = -1; + result.push_back(node); + break; + } + case IR_CONTROL_IF: { + static int unique_if_identifier = 0; + + IR_Block *block = static_cast<IR_If *>(controls[i])->then_body(); + if (block != NULL) { + ir_tree_node *node = new ir_tree_node; + node->content = controls[i]; + node->parent = parent; + node->children = build_ir_tree(block, node); + node->payload = unique_if_identifier+1; + result.push_back(node); + } + + + block = static_cast<IR_If *>(controls[i])->else_body(); + if ( block != NULL) { + ir_tree_node *node = new ir_tree_node; + node->content = controls[i]->clone(); + node->parent = parent; + node->children = build_ir_tree(block, node); + node->payload = unique_if_identifier; + result.push_back(node); + } + + unique_if_identifier += 2; + break; + } + default: + ir_tree_node *node = new ir_tree_node; + node->content = controls[i]; + node->parent = parent; + node->payload = -1; + result.push_back(node); + break; + } + } + break; + } + case IR_CONTROL_LOOP: { + ir_tree_node *node = new ir_tree_node; + node->content = control; + node->parent = parent; + node->children = build_ir_tree(static_cast<const IR_Loop *>(control)->body(), node); + node->payload = -1; + result.push_back(node); + break; + } + default: + ir_tree_node *node = new ir_tree_node; + node->content = control; + node->parent = parent; + node->payload = -1; + result.push_back(node); + break; + } + + return result; +} + + +// Extract statements from IR tree. Statements returned are ordered in +// lexical order in the source code. +std::vector<ir_tree_node *> extract_ir_stmts(const std::vector<ir_tree_node *> &ir_tree) { + std::vector<ir_tree_node *> result; + for (int i = 0; i < ir_tree.size(); i++) + switch (ir_tree[i]->content->type()) { + case IR_CONTROL_BLOCK: + result.push_back(ir_tree[i]); + break; + case IR_CONTROL_LOOP: { + // clear loop payload from previous unsuccessful initialization process + ir_tree[i]->payload = -1; + + std::vector<ir_tree_node *> t = extract_ir_stmts(ir_tree[i]->children); + result.insert(result.end(), t.begin(), t.end()); + break; + } + case IR_CONTROL_IF: { + std::vector<ir_tree_node *> t = extract_ir_stmts(ir_tree[i]->children); + result.insert(result.end(), t.begin(), t.end()); + break; + } + default: + throw std::invalid_argument("invalid ir tree"); + } + + return result; +} + + +bool is_dependence_valid(ir_tree_node *src_node, ir_tree_node *dst_node, + const DependenceVector &dv, bool before) { + std::set<ir_tree_node *> loop_nodes; + ir_tree_node *itn = src_node; + + if (!dv.is_scalar_dependence) { + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + loop_nodes.insert(itn); + } + + int last_dim = -1; + itn = dst_node; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP + && loop_nodes.find(itn) != loop_nodes.end() + && itn->payload > last_dim) + last_dim = itn->payload; + } + + if (last_dim == -1) + return true; + + for (int i = 0; i <= last_dim; i++) { + if (dv.lbounds[i] > 0) + return true; + else if (dv.lbounds[i] < 0) + return false; + } + + if (before) + return true; + else + return false; + } + + return true; + +} + + + +// Test data dependences between two statements. The first statement +// in parameter must be lexically before the second statement in +// parameter. Returned dependences are all lexicographically +// positive. The first vector in returned pair is dependences from the +// first statement to the second statement and the second vector in +// returned pair is in reverse order. +std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > test_data_dependences( + IR_Code *ir, const CG_outputRepr *repr1, const Relation &IS1, + const CG_outputRepr *repr2, const Relation &IS2, + std::vector<Free_Var_Decl*> &freevar, std::vector<std::string> index, + int i, int j) { + std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > result; + + if (repr1 == repr2) { + std::vector<IR_ArrayRef *> access = ir->FindArrayRef(repr1); + + for (int i = 0; i < access.size(); i++) { + IR_ArrayRef *a = access[i]; + IR_ArraySymbol *sym_a = a->symbol(); + for (int j = i; j < access.size(); j++) { + IR_ArrayRef *b = access[j]; + IR_ArraySymbol *sym_b = b->symbol(); + + if (*sym_a == *sym_b && (a->is_write() || b->is_write())) { + Relation r = arrays2relation(ir, freevar, a, IS1, b, IS2); + std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = + relation2dependences(a, b, r); + result.first.insert(result.first.end(), dv.first.begin(), + dv.first.end()); + result.second.insert(result.second.end(), dv.second.begin(), + dv.second.end()); + } + delete sym_b; + } + delete sym_a; + + } + + for (int i = 0; i < access.size(); i++) + delete access[i]; + } else { + std::vector<IR_ArrayRef *> access1 = ir->FindArrayRef(repr1); + std::vector<IR_ArrayRef *> access2 = ir->FindArrayRef(repr2); + + for (int i = 0; i < access1.size(); i++) { + IR_ArrayRef *a = access1[i]; + IR_ArraySymbol *sym_a = a->symbol(); + + for (int j = 0; j < access2.size(); j++) { + IR_ArrayRef *b = access2[j]; + IR_ArraySymbol *sym_b = b->symbol(); + if (*sym_a == *sym_b && (a->is_write() || b->is_write())) { + Relation r = arrays2relation(ir, freevar, a, IS1, b, IS2); + std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = + relation2dependences(a, b, r); + + result.first.insert(result.first.end(), dv.first.begin(), + dv.first.end()); + result.second.insert(result.second.end(), dv.second.begin(), + dv.second.end()); + } + delete sym_b; + } + delete sym_a; + } + + for (int i = 0; i < access1.size(); i++) + delete access1[i]; + for (int i = 0; i < access2.size(); i++) + delete access2[i]; + } + /*std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = + ir->FindScalarDeps(repr1, repr2, index, i, j); + + + result.first.insert(result.first.end(), dv.first.begin(), + dv.first.end()); + result.second.insert(result.second.end(), dv.second.begin(), + dv.second.end());*/ + /*result.first.insert(result.first.end(), dv.first.begin(), + dv.first.end()); + result.second.insert(result.second.end(), dv.second.begin(), + dv.second.end()); + */ + + return result; +} + diff --git a/chill/src/loop.cc b/chill/src/loop.cc new file mode 100644 index 0000000..0a82f7a --- /dev/null +++ b/chill/src/loop.cc @@ -0,0 +1,1870 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Core loop transformation functionality. + + Notes: + "level" (starting from 1) means loop level and it corresponds to "dim" + (starting from 0) in transformed iteration space [c_1,l_1,c_2,l_2,...., + c_n,l_n,c_(n+1)], e.g., l_2 is loop level 2 in generated code, dim 3 + in transformed iteration space, and variable 4 in Omega relation. + All c's are constant numbers only and they will not show up as actual loops. + Formula: + dim = 2*level - 1 + var = dim + 1 + + History: + 10/2005 Created by Chun Chen. + 09/2009 Expand tile functionality, -chun + 10/2009 Initialize unfusible loop nest without bailing out, -chun +*****************************************************************************/ + +#include <limits.h> +#include <math.h> +#include <codegen.h> +#include <code_gen/CG_utils.h> +#include <iostream> +#include <algorithm> +#include <map> +#include "loop.hh" +#include "omegatools.hh" +#include "irtools.hh" +#include "chill_error.hh" +#include <string.h> +#include <list> +using namespace omega; + +const std::string Loop::tmp_loop_var_name_prefix = std::string("chill_t"); // Manu:: In fortran, first character of a variable name must be a letter, so this change +const std::string Loop::overflow_var_name_prefix = std::string("over"); + +//----------------------------------------------------------------------------- +// Class Loop +//----------------------------------------------------------------------------- +// --begin Anand: Added from CHiLL 0.2 + +bool Loop::isInitialized() const { + return stmt.size() != 0 && !stmt[0].xform.is_null(); +} + +//--end Anand: added from CHiLL 0.2 + +bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, + std::vector<ir_tree_node *> &ir_stmt) { + + ir_stmt = extract_ir_stmts(ir_tree); + stmt_nesting_level_.resize(ir_stmt.size()); + std::vector<int> stmt_nesting_level(ir_stmt.size()); + for (int i = 0; i < ir_stmt.size(); i++) { + ir_stmt[i]->payload = i; + int t = 0; + ir_tree_node *itn = ir_stmt[i]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + t++; + } + stmt_nesting_level_[i] = t; + stmt_nesting_level[i] = t; + } + + stmt = std::vector<Statement>(ir_stmt.size()); + int n_dim = -1; + int max_loc; + //std::vector<std::string> index; + for (int i = 0; i < ir_stmt.size(); i++) { + int max_nesting_level = -1; + int loc; + for (int j = 0; j < ir_stmt.size(); j++) + if (stmt_nesting_level[j] > max_nesting_level) { + max_nesting_level = stmt_nesting_level[j]; + loc = j; + } + + // most deeply nested statement acting as a reference point + if (n_dim == -1) { + n_dim = max_nesting_level; + max_loc = loc; + + index = std::vector<std::string>(n_dim); + + ir_tree_node *itn = ir_stmt[loc]; + int cur_dim = n_dim - 1; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) { + index[cur_dim] = + static_cast<IR_Loop *>(itn->content)->index()->name(); + itn->payload = cur_dim--; + } + } + } + + // align loops by names, temporary solution + ir_tree_node *itn = ir_stmt[loc]; + int depth = stmt_nesting_level_[loc] - 1; + /* while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { + std::string name = static_cast<IR_Loop *>(itn->content)->index()->name(); + for (int j = 0; j < n_dim; j++) + if (index[j] == name) { + itn->payload = j; + break; + } + if (itn->payload == -1) + throw loop_error("no complex alignment yet"); + } + } + */ + for (int t = depth; t >= 0; t--) { + int y = t; + ir_tree_node *itn = ir_stmt[loc]; + + while ((itn->parent != NULL) && (y >= 0)) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + y--; + } + + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { + CG_outputBuilder *ocg = ir->builder(); + + itn->payload = depth - t; + + CG_outputRepr *code = + static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); + + std::vector<CG_outputRepr *> index_expr; + std::vector<std::string> old_index; + CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]); + index_expr.push_back(repl); + old_index.push_back( + static_cast<IR_Loop *>(itn->content)->index()->name()); + code = ocg->CreateSubstitutedStmt(0, code, old_index, + index_expr); + + replace.insert(std::pair<int, CG_outputRepr*>(loc, code)); + //stmt[loc].code = code; + + } + } + + // set relation variable names + Relation r(n_dim); + F_And *f_root = r.add_and(); + itn = ir_stmt[loc]; + int temp_depth = depth; + while (itn->parent != NULL) { + + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) { + r.name_set_var(itn->payload + 1, index[temp_depth]); + + temp_depth--; + } + //static_cast<IR_Loop *>(itn->content)->index()->name()); + } + + /*while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name()); + }*/ + + // extract information from loop/if structures + std::vector<bool> processed(n_dim, false); + std::vector<std::string> vars_to_be_reversed; + itn = ir_stmt[loc]; + while (itn->parent != NULL) { + itn = itn->parent; + + switch (itn->content->type()) { + case IR_CONTROL_LOOP: { + IR_Loop *lp = static_cast<IR_Loop *>(itn->content); + Variable_ID v = r.set_var(itn->payload + 1); + int c; + + try { + c = lp->step_size(); + if (c > 0) { + CG_outputRepr *lb = lp->lower_bound(); + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true); + CG_outputRepr *ub = lp->upper_bound(); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + cond, true); + else + throw ir_error("loop condition not supported"); + + } else if (c < 0) { + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *lb = lp->lower_bound(); + lb = ocg->CreateMinus(NULL, lb); + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true); + CG_outputRepr *ub = lp->upper_bound(); + ub = ocg->CreateMinus(NULL, ub); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_GE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LE, true); + else if (cond == IR_COND_GT) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LT, true); + else + throw ir_error("loop condition not supported"); + + vars_to_be_reversed.push_back(lp->index()->name()); + } else + throw ir_error("loop step size zero"); + } catch (const ir_error &e) { + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + + if (abs(c) != 1) { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(); + F_And *f_and = f_exists->add_and(); + Stride_Handle h = f_and->add_stride(abs(c)); + if (c > 0) + h.update_coef(e, 1); + else + h.update_coef(e, -1); + h.update_coef(v, -1); + CG_outputRepr *lb = lp->lower_bound(); + exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, + true); + } + + processed[itn->payload] = true; + break; + } + case IR_CONTROL_IF: { + CG_outputRepr *cond = + static_cast<IR_If *>(itn->content)->condition(); + try { + if (itn->payload % 2 == 1) + exp2constraint(ir, r, f_root, freevar, cond, true); + else { + F_Not *f_not = f_root->add_not(); + F_And *f_and = f_not->add_and(); + exp2constraint(ir, r, f_and, freevar, cond, true); + } + } catch (const ir_error &e) { + std::vector<ir_tree_node *> *t; + if (itn->parent == NULL) + t = &ir_tree; + else + t = &(itn->parent->children); + int id = itn->payload; + int i = t->size() - 1; + while (i >= 0) { + if ((*t)[i] == itn) { + for (int j = 0; j < itn->children.size(); j++) + delete itn->children[j]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + } else if ((*t)[i]->payload >> 1 == id >> 1) { + delete (*t)[i]; + t->erase(t->begin() + i); + } + i--; + } + return false; + } + + break; + } + default: + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + } + + // add information for missing loops + for (int j = 0; j < n_dim; j++) + if (!processed[j]) { + ir_tree_node *itn = ir_stmt[max_loc]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP + && itn->payload == j) + break; + } + + Variable_ID v = r.set_var(j + 1); + if (loc < max_loc) { + + CG_outputBuilder *ocg = ir->builder(); + + CG_outputRepr *lb = + static_cast<IR_Loop *>(itn->content)->lower_bound(); + + exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ, + false); + + /* if (ir->QueryExpOperation( + static_cast<IR_Loop *>(itn->content)->lower_bound()) + == IR_OP_VARIABLE) { + IR_ScalarRef *ref = + static_cast<IR_ScalarRef *>(ir->Repr2Ref( + static_cast<IR_Loop *>(itn->content)->lower_bound())); + std::string name_ = ref->name(); + + for (int i = 0; i < index.size(); i++) + if (index[i] == name_) { + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, false); + + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + IR_CONDITION_TYPE cond = + static_cast<IR_Loop *>(itn->content)->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, + 's', cond, false); + + + + } + + } + */ + + } else { // loc > max_loc + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + + exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ, + false); + /*if (ir->QueryExpOperation( + static_cast<IR_Loop *>(itn->content)->upper_bound()) + == IR_OP_VARIABLE) { + IR_ScalarRef *ref = + static_cast<IR_ScalarRef *>(ir->Repr2Ref( + static_cast<IR_Loop *>(itn->content)->upper_bound())); + std::string name_ = ref->name(); + + for (int i = 0; i < index.size(); i++) + if (index[i] == name_) { + + CG_outputRepr *lb = + static_cast<IR_Loop *>(itn->content)->lower_bound(); + + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, false); + + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + IR_CONDITION_TYPE cond = + static_cast<IR_Loop *>(itn->content)->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, + 's', cond, false); + + + } + } + */ + } + } + + r.setup_names(); + r.simplify(); + + // insert the statement + CG_outputBuilder *ocg = ir->builder(); + std::vector<CG_outputRepr *> reverse_expr; + for (int j = 1; j <= vars_to_be_reversed.size(); j++) { + CG_outputRepr *repl = ocg->CreateIdent(vars_to_be_reversed[j]); + repl = ocg->CreateMinus(NULL, repl); + reverse_expr.push_back(repl); + } + CG_outputRepr *code = + static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); + code = ocg->CreateSubstitutedStmt(0, code, vars_to_be_reversed, + reverse_expr); + stmt[loc].code = code; + stmt[loc].IS = r; + stmt[loc].loop_level = std::vector<LoopLevel>(n_dim); + stmt[loc].ir_stmt_node = ir_stmt[loc]; + for (int i = 0; i < n_dim; i++) { + stmt[loc].loop_level[i].type = LoopLevelOriginal; + stmt[loc].loop_level[i].payload = i; + stmt[loc].loop_level[i].parallel_level = 0; + } + + stmt_nesting_level[loc] = -1; + } + + return true; +} + +Loop::Loop(const IR_Control *control) { + + last_compute_cgr_ = NULL; + last_compute_cg_ = NULL; + + ir = const_cast<IR_Code *>(control->ir_); + init_code = NULL; + cleanup_code = NULL; + tmp_loop_var_name_counter = 1; + overflow_var_name_counter = 1; + known = Relation::True(0); + + ir_tree = build_ir_tree(control->clone(), NULL); + // std::vector<ir_tree_node *> ir_stmt; + + while (!init_loop(ir_tree, ir_stmt)) { + } + + + + for (int i = 0; i < stmt.size(); i++) { + std::map<int, CG_outputRepr*>::iterator it = replace.find(i); + + if (it != replace.end()) + stmt[i].code = it->second; + else + stmt[i].code = stmt[i].code; + } + + if (stmt.size() != 0) + dep = DependenceGraph(stmt[0].IS.n_set()); + else + dep = DependenceGraph(0); + // init the dependence graph + for (int i = 0; i < stmt.size(); i++) + dep.insert(); + + for (int i = 0; i < stmt.size(); i++) + for (int j = i; j < stmt.size(); j++) { + std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = test_data_dependences( + ir, stmt[i].code, stmt[i].IS, stmt[j].code, stmt[j].IS, + freevar, index, stmt_nesting_level_[i], + stmt_nesting_level_[j]); + + for (int k = 0; k < dv.first.size(); k++) { + if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k], + true)) + dep.connect(i, j, dv.first[k]); + else { + dep.connect(j, i, dv.first[k].reverse()); + } + + } + for (int k = 0; k < dv.second.size(); k++) + if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], + false)) + dep.connect(j, i, dv.second[k]); + else { + dep.connect(i, j, dv.second[k].reverse()); + } + // std::pair<std::vector<DependenceVector>, + // std::vector<DependenceVector> > dv_ = test_data_dependences( + + } + + + + // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] + for (int i = 0; i < stmt.size(); i++) { + int n = stmt[i].IS.n_set(); + stmt[i].xform = Relation(n, 2 * n + 1); + F_And *f_root = stmt[i].xform.add_and(); + + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(2 * j), 1); + h.update_coef(stmt[i].xform.input_var(j), -1); + } + + for (int j = 1; j <= 2 * n + 1; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(j), 1); + } + stmt[i].xform.simplify(); + } + + if (stmt.size() != 0) + num_dep_dim = stmt[0].IS.n_set(); + else + num_dep_dim = 0; + // debug + /*for (int i = 0; i < stmt.size(); i++) { + std::cout << i << ": "; + //stmt[i].xform.print(); + stmt[i].IS.print(); + std::cout << std::endl; + + }*/ + //end debug +} + +Loop::~Loop() { + + delete last_compute_cgr_; + delete last_compute_cg_; + + for (int i = 0; i < stmt.size(); i++) + if (stmt[i].code != NULL) { + stmt[i].code->clear(); + delete stmt[i].code; + } + + for (int i = 0; i < ir_tree.size(); i++) + delete ir_tree[i]; + + if (init_code != NULL) { + init_code->clear(); + delete init_code; + } + if (cleanup_code != NULL) { + cleanup_code->clear(); + delete cleanup_code; + } +} + +int Loop::get_dep_dim_of(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1 || level > stmt[stmt_num].loop_level.size()) + return -1; + + int trip_count = 0; + while (true) { + switch (stmt[stmt_num].loop_level[level - 1].type) { + case LoopLevelOriginal: + return stmt[stmt_num].loop_level[level - 1].payload; + case LoopLevelTile: + level = stmt[stmt_num].loop_level[level - 1].payload; + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + throw loop_error( + "incorrect loop level information for statement " + + to_string(stmt_num)); + break; + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(stmt_num)); + } + trip_count++; + if (trip_count >= stmt[stmt_num].loop_level.size()) + throw loop_error( + "incorrect loop level information for statement " + + to_string(stmt_num)); + } +} + +int Loop::get_last_dep_dim_before(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + level = stmt[stmt_num].loop_level.size() + 1; + + for (int i = level - 1; i >= 1; i--) + if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal) + return stmt[stmt_num].loop_level[i - 1].payload; + + return -1; +} + +void Loop::print_internal_loop_structure() const { + for (int i = 0; i < stmt.size(); i++) { + std::vector<int> lex = getLexicalOrder(i); + std::cout << "s" << i + 1 << ": "; + for (int j = 0; j < stmt[i].loop_level.size(); j++) { + if (2 * j < lex.size()) + std::cout << lex[2 * j]; + switch (stmt[i].loop_level[j].type) { + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; + } + std::cout << ' '; + } + for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { + std::cout << lex[j]; + if (j != lex.size() - 1) + std::cout << ' '; + } + std::cout << std::endl; + } +} + +CG_outputRepr *Loop::getCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return NULL; + const int n = stmt[0].xform.n_out(); + + if (last_compute_cg_ == NULL) { + std::vector<Relation> IS(m); + std::vector<Relation> xforms(m); + for (int i = 0; i < m; i++) { + IS[i] = stmt[i].IS; + xforms[i] = stmt[i].xform; + } + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + + last_compute_cg_ = new CodeGen(xforms, IS, known); + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + } + + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { + delete last_compute_cgr_; + last_compute_cgr_ = last_compute_cg_->buildAST(effort); + last_compute_effort_ = effort; + } + + std::vector<CG_outputRepr *> stmts(m); + for (int i = 0; i < m; i++) + stmts[i] = stmt[i].code; + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts); + + if (init_code != NULL) + repr = ocg->StmtListAppend(init_code->clone(), repr); + if (cleanup_code != NULL) + repr = ocg->StmtListAppend(repr, cleanup_code->clone()); + + return repr; +} + +void Loop::printCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return; + const int n = stmt[0].xform.n_out(); + + if (last_compute_cg_ == NULL) { + std::vector<Relation> IS(m); + std::vector<Relation> xforms(m); + for (int i = 0; i < m; i++) { + IS[i] = stmt[i].IS; + xforms[i] = stmt[i].xform; + } + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + + last_compute_cg_ = new CodeGen(xforms, IS, known); + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + } + + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { + delete last_compute_cgr_; + last_compute_cgr_ = last_compute_cg_->buildAST(effort); + last_compute_effort_ = effort; + } + + std::string repr = last_compute_cgr_->printString(); + std::cout << repr << std::endl; +} + +void Loop::printIterationSpace() const { + for (int i = 0; i < stmt.size(); i++) { + std::cout << "s" << i << ": "; + Relation r = getNewIS(i); + for (int j = 1; j <= r.n_inp(); j++) + r.name_input_var(j, CodeGen::loop_var_name_prefix + to_string(j)); + r.setup_names(); + r.print(); + } +} + +void Loop::printDependenceGraph() const { + if (dep.edgeCount() == 0) + std::cout << "no dependence exists" << std::endl; + else { + std::cout << "dependence graph:" << std::endl; + std::cout << dep; + } +} + +Relation Loop::getNewIS(int stmt_num) const { + Relation result; + + if (stmt[stmt_num].xform.is_null()) { + Relation known = Extend_Set(copy(this->known), + stmt[stmt_num].IS.n_set() - this->known.n_set()); + result = Intersection(copy(stmt[stmt_num].IS), known); + } else { + Relation known = Extend_Set(copy(this->known), + stmt[stmt_num].xform.n_out() - this->known.n_set()); + result = Intersection( + Range( + Restrict_Domain(copy(stmt[stmt_num].xform), + copy(stmt[stmt_num].IS))), known); + } + + result.simplify(2, 4); + + return result; +} + +std::vector<Relation> Loop::getNewIS() const { + const int m = stmt.size(); + + std::vector<Relation> new_IS(m); + for (int i = 0; i < m; i++) + new_IS[i] = getNewIS(i); + + return new_IS; +} + +void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePragmaAttribute(code, level, pragmaText); +} +/* +void Loop::prefetch(int stmt_num, int level, const std::string &arrName, const std::string &indexName, int offset, int hint) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePrefetchAttribute(code, level, arrName, indexName, int offset, hint); +} +*/ + +void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hint) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePrefetchAttribute(code, level, arrName, hint); +} + +std::vector<int> Loop::getLexicalOrder(int stmt_num) const { + assert(stmt_num < stmt.size()); + + const int n = stmt[stmt_num].xform.n_out(); + std::vector<int> lex(n, 0); + + for (int i = 0; i < n; i += 2) + lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); + + return lex; +} + +// find the sub loop nest specified by stmt_num and level, +// only iteration space satisfiable statements returned. +std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const { + assert(stmt_num >= 0 && stmt_num < stmt.size()); + assert(level > 0 && level <= stmt[stmt_num].loop_level.size()); + + std::set<int> working; + for (int i = 0; i < stmt.size(); i++) + if (const_cast<Loop *>(this)->stmt[i].IS.is_upper_bound_satisfiable() + && stmt[i].loop_level.size() >= level) + working.insert(i); + + for (int i = 1; i <= level; i++) { + int a = getLexicalOrder(stmt_num, i); + for (std::set<int>::iterator j = working.begin(); j != working.end();) { + int b = getLexicalOrder(*j, i); + if (b != a) + working.erase(j++); + else + ++j; + } + } + + return working; +} + +int Loop::getLexicalOrder(int stmt_num, int level) const { + assert(stmt_num >= 0 && stmt_num < stmt.size()); + assert(level > 0 && level <= stmt[stmt_num].loop_level.size()+1); + + Relation &r = const_cast<Loop *>(this)->stmt[stmt_num].xform; + for (EQ_Iterator e(r.single_conjunct()->EQs()); e; e++) + if (abs((*e).get_coef(r.output_var(2 * level - 1))) == 1) { + bool is_const = true; + for (Constr_Vars_Iter cvi(*e); cvi; cvi++) + if (cvi.curr_var() != r.output_var(2 * level - 1)) { + is_const = false; + break; + } + if (is_const) { + int t = static_cast<int>((*e).get_const()); + return (*e).get_coef(r.output_var(2 * level - 1)) > 0 ? -t : t; + } + } + + throw loop_error( + "can't find lexical order for statement " + to_string(stmt_num) + + "'s loop level " + to_string(level)); +} + +std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const { + const int m = stmt.size(); + + std::set<int> same_loops; + for (int i = 0; i < m; i++) { + if (dim < 0) + same_loops.insert(i); + else { + std::vector<int> a_lex = getLexicalOrder(i); + int j; + for (j = 0; j <= dim; j += 2) + if (lex[j] != a_lex[j]) + break; + if (j > dim) + same_loops.insert(i); + } + + } + + return same_loops; +} + +void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) { + const int m = stmt.size(); + + if (amount == 0) + return; + + for (int i = 0; i < m; i++) { + std::vector<int> lex2 = getLexicalOrder(i); + + bool need_shift = true; + + for (int j = 0; j < dim; j++) + if (lex2[j] != lex[j]) { + need_shift = false; + break; + } + + if (!need_shift) + continue; + + if (amount > 0) { + if (lex2[dim] < lex[dim]) + continue; + } else if (amount < 0) { + if (lex2[dim] > lex[dim]) + continue; + } + + assign_const(stmt[i].xform, dim, lex2[dim] + amount); + } +} + +std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active, + int level) { + + std::set<int> not_nested_at_this_level; + std::map<ir_tree_node*, std::set<int> > sorted_by_loop; + std::map<int, std::set<int> > sorted_by_lex_order; + std::vector<std::set<int> > to_return; + bool lex_order_already_set = false; + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + + if (stmt[*it].ir_stmt_node == NULL) + lex_order_already_set = true; + } + + if (lex_order_already_set) { + + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + std::map<int, std::set<int> >::iterator it2 = + sorted_by_lex_order.find( + get_const(stmt[*it].xform, 2 * (level - 1), + Output_Var)); + + if (it2 != sorted_by_lex_order.end()) + it2->second.insert(*it); + else { + + std::set<int> to_insert; + + to_insert.insert(*it); + + sorted_by_lex_order.insert( + std::pair<int, std::set<int> >( + get_const(stmt[*it].xform, 2 * (level - 1), + Output_Var), to_insert)); + + } + + } + + for (std::map<int, std::set<int> >::iterator it2 = + sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end(); + it2++) + to_return.push_back(it2->second); + + } else { + + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + + ir_tree_node* itn = stmt[*it].ir_stmt_node; + itn = itn->parent; + while ((itn != NULL) && (itn->payload != level - 1)) + itn = itn->parent; + + if (itn == NULL) + not_nested_at_this_level.insert(*it); + else { + std::map<ir_tree_node*, std::set<int> >::iterator it2 = + sorted_by_loop.find(itn); + + if (it2 != sorted_by_loop.end()) + it2->second.insert(*it); + else { + std::set<int> to_insert; + + to_insert.insert(*it); + + sorted_by_loop.insert( + std::pair<ir_tree_node*, std::set<int> >(itn, + to_insert)); + + } + + } + + } + if (not_nested_at_this_level.size() > 0) { + for (std::set<int>::iterator it = not_nested_at_this_level.begin(); + it != not_nested_at_this_level.end(); it++) { + std::set<int> temp; + temp.insert(*it); + to_return.push_back(temp); + + } + } + for (std::map<ir_tree_node*, std::set<int> >::iterator it2 = + sorted_by_loop.begin(); it2 != sorted_by_loop.end(); it2++) + to_return.push_back(it2->second); + } + return to_return; +} + +void update_successors(int n, int node_num[], int cant_fuse_with[], + Graph<std::set<int>, bool> &g, std::list<int> &work_list) { + + std::set<int> disconnect; + for (Graph<std::set<int>, bool>::EdgeList::iterator i = + g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) { + int m = i->first; + + if (node_num[m] != -1) + throw loop_error("Graph input for fusion has cycles not a DAG!!"); + + std::vector<bool> check_ = g.getEdge(n, m); + + bool has_bad_edge_path = false; + for (int i = 0; i < check_.size(); i++) + if (!check_[i]) { + has_bad_edge_path = true; + break; + } + if (has_bad_edge_path) + cant_fuse_with[m] = std::max(cant_fuse_with[m], node_num[n]); + else + cant_fuse_with[m] = std::max(cant_fuse_with[m], cant_fuse_with[n]); + disconnect.insert(m); + } + + + for (std::set<int>::iterator i = disconnect.begin(); i != disconnect.end(); + i++) { + g.disconnect(n, *i); + + bool no_incoming_edges = true; + for (int j = 0; j < g.vertex.size(); j++) + if (j != *i) + if (g.hasEdge(j, *i)) { + no_incoming_edges = false; + break; + } + + + if (no_incoming_edges) + work_list.push_back(*i); + } + +} + +Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level( + std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim) { + Graph<std::set<int>, bool> g; + + for (int i = 0; i < s.size(); i++) + g.insert(s[i]); + + for (int i = 0; i < s.size(); i++) { + + for (int j = i + 1; j < s.size(); j++) { + bool has_true_edge_i_to_j = false; + bool has_true_edge_j_to_i = false; + bool is_connected_i_to_j = false; + bool is_connected_j_to_i = false; + for (std::set<int>::iterator ii = s[i].begin(); ii != s[i].end(); + ii++) { + + for (std::set<int>::iterator jj = s[j].begin(); + jj != s[j].end(); jj++) { + + std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && dvs[k].has_been_carried_at(dep_dim))) { + + if (dvs[k].is_data_dependence() + && dvs[k].has_negative_been_carried_at( + dep_dim)) { + //g.connect(i, j, false); + is_connected_i_to_j = true; + break; + } else { + //g.connect(i, j, true); + + has_true_edge_i_to_j = true; + //break + } + } + + //if (is_connected) + + // break; + // if (has_true_edge_i_to_j && !is_connected_i_to_j) + // g.connect(i, j, true); + dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && dvs[k].has_been_carried_at(dep_dim))) { + + if (is_connected_i_to_j || has_true_edge_i_to_j) + throw loop_error( + "Graph input for fusion has cycles not a DAG!!"); + + if (dvs[k].is_data_dependence() + && dvs[k].has_negative_been_carried_at( + dep_dim)) { + //g.connect(i, j, false); + is_connected_j_to_i = true; + break; + } else { + //g.connect(i, j, true); + + has_true_edge_j_to_i = true; + //break; + } + } + + // if (is_connected) + //break; + // if (is_connected) + //break; + } + + + //if (is_connected) + // break; + } + + + if (is_connected_i_to_j) + g.connect(i, j, false); + else if (has_true_edge_i_to_j) + g.connect(i, j, true); + + if (is_connected_j_to_i) + g.connect(j, i, false); + else if (has_true_edge_j_to_i) + g.connect(j, i, true); + + + } + } + return g; +} + +std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g) { + + bool roots[g.vertex.size()]; + + for (int i = 0; i < g.vertex.size(); i++) + roots[i] = true; + + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + + if (g.hasEdge(i, j)) + roots[j] = false; + + if (g.hasEdge(j, i)) + roots[i] = false; + + } + + std::list<int> work_list; + int cant_fuse_with[g.vertex.size()]; + std::vector<std::set<int> > s; + //Each Fused set's representative node + + int node_to_fused_nodes[g.vertex.size()]; + int node_num[g.vertex.size()]; + for (int i = 0; i < g.vertex.size(); i++) { + if (roots[i] == true) + work_list.push_back(i); + cant_fuse_with[i] = 0; + node_to_fused_nodes[i] = 0; + node_num[i] = -1; + } + // topological sort according to chun's permute algorithm + // std::vector<std::set<int> > s = g.topoSort(); + std::vector<std::set<int> > s2 = g.topoSort(); + if (work_list.empty() || (s2.size() != g.vertex.size())) { + + std::cout << s2.size() << "\t" << g.vertex.size() << std::endl; + throw loop_error("Input for fusion not a DAG!!"); + + + } + int fused_nodes_counter = 0; + while (!work_list.empty()) { + int n = work_list.front(); + //int n_ = g.vertex[n].first; + work_list.pop_front(); + int node; + if (cant_fuse_with[n] == 0) + node = 0; + else + node = cant_fuse_with[n]; + + if ((fused_nodes_counter != 0) && (node != fused_nodes_counter)) { + int rep_node = node_to_fused_nodes[node]; + node_num[n] = node_num[rep_node]; + + try { + update_successors(n, node_num, cant_fuse_with, g, work_list); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + + } + for (std::set<int>::iterator it = g.vertex[n].first.begin(); + it != g.vertex[n].first.end(); it++) + s[node].insert(*it); + } else { + //std::set<int> new_node; + //new_node.insert(n_); + s.push_back(g.vertex[n].first); + node_to_fused_nodes[node] = n; + node_num[n] = ++node; + try { + update_successors(n, node_num, cant_fuse_with, g, work_list); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + + } + fused_nodes_counter++; + } + } + + return s; +} + +void Loop::setLexicalOrder(int dim, const std::set<int> &active, + int starting_order, std::vector<std::vector<std::string> > idxNames) { + if (active.size() == 0) + return; + + // check for sanity of parameters + if (dim < 0 || dim % 2 != 0) + throw std::invalid_argument( + "invalid constant loop level to set lexicographical order"); + std::vector<int> lex; + int ref_stmt_num; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if ((*i) < 0 || (*i) >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (dim >= stmt[*i].xform.n_out()) + throw std::invalid_argument( + "invalid constant loop level to set lexicographical order"); + if (i == active.begin()) { + lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } else { + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 0; j < dim; j += 2) + if (lex[j] != lex2[j]) + throw std::invalid_argument( + "statements are not in the same sub loop nest"); + } + } + + // sepearate statements by current loop level types + int level = (dim + 2) / 2; + std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type; + std::set<int> active_by_no_level; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if (level > stmt[*i].loop_level.size()) + active_by_no_level.insert(*i); + else + active_by_level_type[std::make_pair( + stmt[*i].loop_level[level - 1].type, + stmt[*i].loop_level[level - 1].payload)].insert(*i); + } + + // further separate statements due to control dependences + std::vector<std::set<int> > active_by_level_type_splitted; + for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i = + active_by_level_type.begin(); i != active_by_level_type.end(); i++) + active_by_level_type_splitted.push_back(i->second); + for (std::set<int>::iterator i = active_by_no_level.begin(); + i != active_by_no_level.end(); i++) + for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) { + std::set<int> controlled, not_controlled; + for (std::set<int>::iterator k = + active_by_level_type_splitted[j].begin(); + k != active_by_level_type_splitted[j].end(); k++) { + std::vector<DependenceVector> dvs = dep.getEdge(*i, *k); + bool is_controlled = false; + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].type = DEP_CONTROL) { + is_controlled = true; + break; + } + if (is_controlled) + controlled.insert(*k); + else + not_controlled.insert(*k); + } + if (controlled.size() != 0 && not_controlled.size() != 0) { + active_by_level_type_splitted.erase( + active_by_level_type_splitted.begin() + j); + active_by_level_type_splitted.push_back(controlled); + active_by_level_type_splitted.push_back(not_controlled); + } + } + + // set lexical order separating loops with different loop types first + if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) { + int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + + Graph<std::set<int>, Empty> g; + for (std::vector<std::set<int> >::iterator i = + active_by_level_type_splitted.begin(); + i != active_by_level_type_splitted.end(); i++) + g.insert(*i); + for (std::set<int>::iterator i = active_by_no_level.begin(); + i != active_by_no_level.end(); i++) { + std::set<int> t; + t.insert(*i); + g.insert(t); + } + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + bool connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*ii, + *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(i, j); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*jj, + *ii); + // find the sub loop nest specified by stmt_num and level, + // only iteration space satisfiable statements returned. + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(j, i); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + } + + std::vector<std::set<int> > s = g.topoSort(); + if (s.size() != g.vertex.size()) + throw loop_error( + "cannot separate statements with different loop types at loop level " + + to_string(level)); + + // assign lexical order + int order = starting_order; + for (int i = 0; i < s.size(); i++) { + std::set<int> &cur_scc = g.vertex[*(s[i].begin())].first; + int sz = cur_scc.size(); + if (sz == 1) { + int cur_stmt = *(cur_scc.begin()); + assign_const(stmt[cur_stmt].xform, dim, order); + for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) + assign_const(stmt[cur_stmt].xform, j, 0); + order++; + } else { + setLexicalOrder(dim, cur_scc, order, idxNames); + order += sz; + } + } + } + // set lexical order seperating single iteration statements and loops + else { + std::set<int> true_singles; + std::set<int> nonsingles; + std::map<coef_t, std::set<int> > fake_singles; + std::set<int> fake_singles_; + + // sort out statements that do not require loops + for (std::set<int>::iterator i = active.begin(); i != active.end(); + i++) { + Relation cur_IS = getNewIS(*i); + if (is_single_iteration(cur_IS, dim + 1)) { + bool is_all_single = true; + for (int j = dim + 3; j < stmt[*i].xform.n_out(); j += 2) + if (!is_single_iteration(cur_IS, j)) { + is_all_single = false; + break; + } + if (is_all_single) + true_singles.insert(*i); + else { + fake_singles_.insert(*i); + try { + fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert( + *i); + } catch (const std::exception &e) { + fake_singles[posInfinity].insert(*i); + } + } + } else + nonsingles.insert(*i); + } + + + // split nonsingles forcibly according to negative dependences present (loop unfusible) + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + + if (dim < stmt[ref_stmt_num].xform.n_out() - 1) { + + bool dummy_level_found = false; + + std::vector<std::set<int> > s; + + s = sort_by_same_loops(active, level); + bool further_levels_exist = false; + + if (!idxNames.empty()) + if (level <= idxNames[ref_stmt_num].size()) + if (idxNames[ref_stmt_num][level - 1].length() == 0) { + // && s.size() == 1) { + int order1 = 0; + dummy_level_found = true; + + for (int i = level; i < idxNames[ref_stmt_num].size(); + i++) + if (idxNames[ref_stmt_num][i].length() > 0) + further_levels_exist = true; + + } + + //if (!dummy_level_found) { + + if (s.size() > 1) { + + Graph<std::set<int>, bool> g = construct_induced_graph_at_level( + s, dep, dep_dim); + s = typed_fusion(g); + } + int order = 0; + for (int i = 0; i < s.size(); i++) { + + for (std::set<int>::iterator it = s[i].begin(); + it != s[i].end(); it++) + assign_const(stmt[*it].xform, dim, order); + + if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1)) + setLexicalOrder(dim + 2, s[i], order, idxNames); + + order++; + } + //} + /* else { + + int order1 = 0; + int order = 0; + for (std::set<int>::iterator i = active.begin(); + i != active.end(); i++) { + if (!further_levels_exist) + assign_const(stmt[*i].xform, dim, order1++); + else + assign_const(stmt[*i].xform, dim, order1); + + } + + if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1) && further_levels_exist) + setLexicalOrder(dim + 2, active, order, idxNames); + } + */ + } else { + int dummy_order = 0; + for (std::set<int>::iterator i = active.begin(); i != active.end(); + i++) + assign_const(stmt[*i].xform, dim, dummy_order++); + } + /*for (int i = 0; i < g2.vertex.size(); i++) + for (int j = i+1; j < g2.vertex.size(); j++) { + std::vector<DependenceVector> dvs = dep.getEdge(g2.vertex[i].first, g2.vertex[j].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { + g2.connect(i, j); + break; + } + dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { + g2.connect(j, i); + break; + } + } + + std::vector<std::set<int> > s2 = g2.packed_topoSort(); + + std::vector<std::set<int> > splitted_nonsingles; + for (int i = 0; i < s2.size(); i++) { + std::set<int> cur_scc; + for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) + cur_scc.insert(g2.vertex[*j].first); + splitted_nonsingles.push_back(cur_scc); + } + */ + //convert to dependence graph for grouped statements + //dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + /*int order = 0; + for (std::set<int>::iterator j = active.begin(); j != active.end(); + j++) { + std::set<int> continuous; + std::cout<< active.size()<<std::endl; + while (nonsingles.find(*j) != nonsingles.end() && j != active.end()) { + continuous.insert(*j); + j++; + } + + printf("continuous size is %d\n", continuous.size()); + + + + if (continuous.size() > 0) { + std::vector<std::set<int> > s = typed_fusion(continuous, dep, + dep_dim); + + for (int i = 0; i < s.size(); i++) { + for (std::set<int>::iterator l = s[i].begin(); + l != s[i].end(); l++) { + assign_const(stmt[*l].xform, dim + 2, order); + setLexicalOrder(dim + 2, s[i]); + } + order++; + } + } + + if (j != active.end()) { + assign_const(stmt[*j].xform, dim + 2, order); + + for (int k = dim + 4; k < stmt[*j].xform.n_out(); k += 2) + assign_const(stmt[*j].xform, k, 0); + order++; + } + + if( j == active.end()) + break; + } + */ + + + // assign lexical order + /*int order = starting_order; + for (int i = 0; i < s.size(); i++) { + // translate each SCC into original statements + std::set<int> cur_scc; + for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) + copy(s[i].begin(), s[i].end(), + inserter(cur_scc, cur_scc.begin())); + + // now assign the constant + for (std::set<int>::iterator j = cur_scc.begin(); + j != cur_scc.end(); j++) + assign_const(stmt[*j].xform, dim, order); + + if (cur_scc.size() > 1) + setLexicalOrder(dim + 2, cur_scc); + else if (cur_scc.size() == 1) { + int cur_stmt = *(cur_scc.begin()); + for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) + assign_const(stmt[cur_stmt].xform, j, 0); + } + + if (cur_scc.size() > 0) + order++; + } + */ + } +} + +void Loop::apply_xform() { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + apply_xform(active); +} + +void Loop::apply_xform(int stmt_num) { + std::set<int> active; + active.insert(stmt_num); + apply_xform(active); +} + +void Loop::apply_xform(std::set<int> &active) { + int max_n = 0; + + CG_outputBuilder *ocg = ir->builder(); + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].loop_level.size(); + if (n > max_n) + max_n = n; + + std::vector<int> lex = getLexicalOrder(*i); + + Relation mapping(2 * n + 1, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + mapping = Composition(mapping, stmt[*i].xform); + mapping.simplify(); + + // match omega input/output variables to variable names in the code + for (int j = 1; j <= stmt[*i].IS.n_set(); j++) + mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); + for (int j = 1; j <= n; j++) + mapping.name_output_var(j, + tmp_loop_var_name_prefix + + to_string(tmp_loop_var_name_counter + j - 1)); + mapping.setup_names(); + + Relation known = Extend_Set(copy(this->known), + mapping.n_out() - this->known.n_set()); + //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL)); + std::vector<std::string> loop_vars; + for (int j = 1; j <= stmt[*i].IS.n_set(); j++) + loop_vars.push_back(stmt[*i].IS.set_var(j)->name()); + std::vector<CG_outputRepr *> subs = output_substitutions(ocg, + Inverse(copy(mapping)), + std::vector<std::pair<CG_outputRepr *, int> >(mapping.n_out(), + std::make_pair(static_cast<CG_outputRepr *>(NULL), 0))); + stmt[*i].code = ocg->CreateSubstitutedStmt(0, stmt[*i].code, loop_vars, + subs); + stmt[*i].IS = Range(Restrict_Domain(mapping, stmt[*i].IS)); + stmt[*i].IS.simplify(); + + // replace original transformation relation with straight 1-1 mapping + mapping = Relation(n, 2 * n + 1); + f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 1; j <= 2 * n + 1; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_const(-lex[j - 1]); + } + stmt[*i].xform = mapping; + } + + tmp_loop_var_name_counter += max_n; +} + +void Loop::addKnown(const Relation &cond) { + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int n1 = this->known.n_set(); + + Relation r = copy(cond); + int n2 = r.n_set(); + + if (n1 < n2) + this->known = Extend_Set(this->known, n2 - n1); + else if (n1 > n2) + r = Extend_Set(r, n1 - n2); + + this->known = Intersection(this->known, r); +} + +void Loop::removeDependence(int stmt_num_from, int stmt_num_to) { + // check for sanity of parameters + if (stmt_num_from >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num_from)); + if (stmt_num_to >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num_to)); + + dep.disconnect(stmt_num_from, stmt_num_to); +} + +void Loop::dump() const { + for (int i = 0; i < stmt.size(); i++) { + std::vector<int> lex = getLexicalOrder(i); + std::cout << "s" << i + 1 << ": "; + for (int j = 0; j < stmt[i].loop_level.size(); j++) { + if (2 * j < lex.size()) + std::cout << lex[2 * j]; + switch (stmt[i].loop_level[j].type) { + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; + } + std::cout << ' '; + } + for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { + std::cout << lex[j]; + if (j != lex.size() - 1) + std::cout << ' '; + } + std::cout << std::endl; + } +} + +bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { + if (stmt.size() == 0) + return true; + + // check for sanity of parameters + for (int i = 0; i < stmt.size(); i++) { + if (stmt[i].loop_level.size() != num_dep_dim) + throw std::invalid_argument( + "nonsingular loop transformations must be applied to original perfect loop nest"); + for (int j = 0; j < stmt[i].loop_level.size(); j++) + if (stmt[i].loop_level[j].type != LoopLevelOriginal) + throw std::invalid_argument( + "nonsingular loop transformations must be applied to original perfect loop nest"); + } + if (T.size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + for (int i = 0; i < stmt.size(); i++) + if (T[i].size() != num_dep_dim + 1 && T[i].size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + // build relation from matrix + Relation mapping(2 * num_dep_dim + 1, 2 * num_dep_dim + 1); + F_And *f_root = mapping.add_and(); + for (int i = 0; i < num_dep_dim; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * (i + 1)), -1); + for (int j = 0; j < num_dep_dim; j++) + if (T[i][j] != 0) + h.update_coef(mapping.input_var(2 * (j + 1)), T[i][j]); + if (T[i].size() == num_dep_dim + 1) + h.update_const(T[i][num_dep_dim]); + } + for (int i = 1; i <= 2 * num_dep_dim + 1; i += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), -1); + h.update_coef(mapping.input_var(i), 1); + } + + // update transformation relations + for (int i = 0; i < stmt.size(); i++) + stmt[i].xform = Composition(copy(mapping), stmt[i].xform); + + // update dependence graph + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + switch (dv.type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(num_dep_dim), ubounds( + num_dep_dim); + for (int p = 0; p < num_dep_dim; p++) { + coef_t lb = 0; + coef_t ub = 0; + for (int q = 0; q < num_dep_dim; q++) { + if (T[p][q] > 0) { + if (lb == -posInfinity + || dv.lbounds[q] == -posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.lbounds[q]; + if (ub == posInfinity + || dv.ubounds[q] == posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.ubounds[q]; + } else if (T[p][q] < 0) { + if (lb == -posInfinity + || dv.ubounds[q] == posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.ubounds[q]; + if (ub == posInfinity + || dv.lbounds[q] == -posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.lbounds[q]; + } + } + if (T[p].size() == num_dep_dim + 1) { + if (lb != -posInfinity) + lb += T[p][num_dep_dim]; + if (ub != posInfinity) + ub += T[p][num_dep_dim]; + } + lbounds[p] = lb; + ubounds[p] = ub; + } + dv.lbounds = lbounds; + dv.ubounds = ubounds; + + break; + } + default: + ; + } + } + j->second = dvs; + } + + // set constant loop values + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + setLexicalOrder(0, active); + + return true; +} + + +bool Loop::is_dependence_valid_based_on_lex_order(int i, int j, + const DependenceVector &dv, bool before) { + std::vector<int> lex_i = getLexicalOrder(i); + std::vector<int> lex_j = getLexicalOrder(j); + int last_dim; + if (!dv.is_scalar_dependence) { + for (last_dim = 0; + last_dim < lex_i.size() && (lex_i[last_dim] == lex_j[last_dim]); + last_dim++) + ; + last_dim = last_dim / 2; + if (last_dim == 0) + return true; + + for (int i = 0; i < last_dim; i++) { + if (dv.lbounds[i] > 0) + return true; + else if (dv.lbounds[i] < 0) + return false; + } + } + if (before) + return true; + + return false; + +} + diff --git a/chill/src/loop_basic.cc b/chill/src/loop_basic.cc new file mode 100644 index 0000000..f5234b9 --- /dev/null +++ b/chill/src/loop_basic.cc @@ -0,0 +1,1538 @@ +/* + * loop_basic.cc + * + * Created on: Nov 12, 2012 + * Author: anand + */ + +#include "loop.hh" +#include "chill_error.hh" +#include <omega.h> +#include "omegatools.hh" +#include <string.h> + +using namespace omega; + +void Loop::permute(const std::vector<int> &pi) { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + + permute(active, pi); +} + +void Loop::original() { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + setLexicalOrder(0, active); +} +void Loop::permute(int stmt_num, int level, const std::vector<int> &pi) { + // check for sanity of parameters + int starting_order; + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num)); + std::set<int> active; + if (level < 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + else if (level == 0) { + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + level = 1; + starting_order = 0; + } else { + std::vector<int> lex = getLexicalOrder(stmt_num); + active = getStatements(lex, 2 * level - 2); + starting_order = lex[2 * level - 2]; + lex[2 * level - 2]++; + shiftLexicalOrder(lex, 2 * level - 2, active.size() - 1); + } + std::vector<int> pi_inverse(pi.size(), 0); + for (int i = 0; i < pi.size(); i++) { + if (pi[i] >= level + pi.size() || pi[i] < level + || pi_inverse[pi[i] - level] != 0) + throw std::invalid_argument("invalid permuation"); + pi_inverse[pi[i] - level] = level + i; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + if (level + pi.size() - 1 > stmt[*i].loop_level.size()) + throw std::invalid_argument( + "invalid permutation for statement " + to_string(*i)); + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // Update transformation relations + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= 2 * level - 2; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = level; j <= level + pi.size() - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(2 * pi[j - level]), -1); + } + for (int j = level; j <= level + pi.size() - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j - 1), 1); + h.update_coef(mapping.input_var(2 * j - 1), -1); + } + for (int j = 2 * (level + pi.size() - 1) + 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + stmt[*i].xform = Composition(mapping, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // get the permuation for dependence vectors + std::vector<int> t; + for (int i = 0; i < pi.size(); i++) + if (stmt[stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal) + t.push_back(stmt[stmt_num].loop_level[pi[i] - 1].payload); + int max_dep_dim = -1; + int min_dep_dim = dep.num_dim(); + for (int i = 0; i < t.size(); i++) { + if (t[i] > max_dep_dim) + max_dep_dim = t[i]; + if (t[i] < min_dep_dim) + min_dep_dim = t[i]; + } + if (min_dep_dim > max_dep_dim) + return; + if (max_dep_dim - min_dep_dim + 1 != t.size()) + throw loop_error("cannot update the dependence graph after permuation"); + std::vector<int> dep_pi(dep.num_dim()); + for (int i = 0; i < min_dep_dim; i++) + dep_pi[i] = i; + for (int i = min_dep_dim; i <= max_dep_dim; i++) + dep_pi[i] = t[i - min_dep_dim]; + for (int i = max_dep_dim + 1; i < dep.num_dim(); i++) + dep_pi[i] = i; + + dep.permute(dep_pi, active); + + // update the dependence graph + DependenceGraph g(dep.num_dim()); + for (int i = 0; i < dep.vertex.size(); i++) + g.insert(); + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { + if ((active.find(i) != active.end() + && active.find(j->first) != active.end())) { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) { + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(dep.num_dim()); + std::vector<coef_t> ubounds(dep.num_dim()); + for (int d = 0; d < dep.num_dim(); d++) { + lbounds[d] = dv[k].lbounds[dep_pi[d]]; + ubounds[d] = dv[k].ubounds[dep_pi[d]]; + } + dv[k].lbounds = lbounds; + dv[k].ubounds = ubounds; + break; + } + case DEP_CONTROL: { + break; + } + default: + throw loop_error("unknown dependence type"); + } + } + g.connect(i, j->first, dv); + } else if (active.find(i) == active.end() + && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dv = j->second; + g.connect(i, j->first, dv); + } else { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + for (int d = 0; d < dep.num_dim(); d++) + if (dep_pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + break; + } + case DEP_CONTROL: + break; + default: + throw loop_error("unknown dependence type"); + } + g.connect(i, j->first, dv); + } + } + dep = g; + + // update loop level information + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int cur_dep_dim = min_dep_dim; + std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size()); + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + if (j >= level && j < level + pi.size()) { + switch (stmt[*i].loop_level[pi_inverse[j - level] - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = cur_dep_dim++; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[pi_inverse[j - level] - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[pi_inverse[j - level] + - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = pi_inverse[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } else { + switch (stmt[*i].loop_level[j - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = + stmt[*i].loop_level[j - 1].payload; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[j - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = pi_inverse[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } + stmt[*i].loop_level = new_loop_level; + } + + setLexicalOrder(2 * level - 2, active, starting_order); +} +void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) { + if (active.size() == 0 || pi.size() == 0) + return; + + // check for sanity of parameters + int level = pi[0]; + for (int i = 1; i < pi.size(); i++) + if (pi[i] < level) + level = pi[i]; + if (level < 1) + throw std::invalid_argument("invalid permuation"); + std::vector<int> reverse_pi(pi.size(), 0); + for (int i = 0; i < pi.size(); i++) + if (pi[i] >= level + pi.size()) + throw std::invalid_argument("invalid permutation"); + else + reverse_pi[pi[i] - level] = i + level; + for (int i = 0; i < reverse_pi.size(); i++) + if (reverse_pi[i] == 0) + throw std::invalid_argument("invalid permuation"); + int ref_stmt_num; + std::vector<int> lex; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(*i)); + if (i == active.begin()) { + ref_stmt_num = *i; + lex = getLexicalOrder(*i); + } else { + if (level + pi.size() - 1 > stmt[*i].loop_level.size()) + throw std::invalid_argument("invalid permuation"); + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 0; j < 2 * level - 3; j += 2) + if (lex[j] != lex2[j]) + throw std::invalid_argument( + "statements to permute must be in the same subloop"); + for (int j = 0; j < pi.size(); j++) + if (!(stmt[*i].loop_level[level + j - 1].type + == stmt[ref_stmt_num].loop_level[level + j - 1].type + && stmt[*i].loop_level[level + j - 1].payload + == stmt[ref_stmt_num].loop_level[level + j - 1].payload)) + throw std::invalid_argument( + "permuted loops must have the same loop level types"); + } + } + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // Update transformation relations + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 0; j < pi.size(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * (level + j)), 1); + h.update_coef(mapping.input_var(2 * pi[j]), -1); + } + for (int j = 1; j < level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + for (int j = level + pi.size(); j <= stmt[*i].loop_level.size(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + + stmt[*i].xform = Composition(mapping, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // get the permuation for dependence vectors + std::vector<int> t; + for (int i = 0; i < pi.size(); i++) + if (stmt[ref_stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal) + t.push_back(stmt[ref_stmt_num].loop_level[pi[i] - 1].payload); + int max_dep_dim = -1; + int min_dep_dim = num_dep_dim; + for (int i = 0; i < t.size(); i++) { + if (t[i] > max_dep_dim) + max_dep_dim = t[i]; + if (t[i] < min_dep_dim) + min_dep_dim = t[i]; + } + if (min_dep_dim > max_dep_dim) + return; + if (max_dep_dim - min_dep_dim + 1 != t.size()) + throw loop_error("cannot update the dependence graph after permuation"); + std::vector<int> dep_pi(num_dep_dim); + for (int i = 0; i < min_dep_dim; i++) + dep_pi[i] = i; + for (int i = min_dep_dim; i <= max_dep_dim; i++) + dep_pi[i] = t[i - min_dep_dim]; + for (int i = max_dep_dim + 1; i < num_dep_dim; i++) + dep_pi[i] = i; + + dep.permute(dep_pi, active); + + // update the dependence graph + DependenceGraph g(dep.num_dim()); + for (int i = 0; i < dep.vertex.size(); i++) + g.insert(); + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { // + if ((active.find(i) != active.end() + && active.find(j->first) != active.end())) { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) { + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(num_dep_dim); + std::vector<coef_t> ubounds(num_dep_dim); + for (int d = 0; d < num_dep_dim; d++) { + lbounds[d] = dv[k].lbounds[dep_pi[d]]; + ubounds[d] = dv[k].ubounds[dep_pi[d]]; + } + dv[k].lbounds = lbounds; + dv[k].ubounds = ubounds; + break; + } + case DEP_CONTROL: { + break; + } + default: + throw loop_error("unknown dependence type"); + } + } + g.connect(i, j->first, dv); + } else if (active.find(i) == active.end() + && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dv = j->second; + g.connect(i, j->first, dv); + } else { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + for (int d = 0; d < num_dep_dim; d++) + if (dep_pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + break; + } + case DEP_CONTROL: + break; + default: + throw loop_error("unknown dependence type"); + } + g.connect(i, j->first, dv); + } + } + dep = g; + + // update loop level information + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int cur_dep_dim = min_dep_dim; + std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size()); + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + if (j >= level && j < level + pi.size()) { + switch (stmt[*i].loop_level[reverse_pi[j - level] - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = cur_dep_dim++; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[reverse_pi[j - level] + - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = reverse_pi[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } else { + switch (stmt[*i].loop_level[j - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = + stmt[*i].loop_level[j - 1].payload; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[j - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = reverse_pi[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } + stmt[*i].loop_level = new_loop_level; + } + + setLexicalOrder(2 * level - 2, active); +} + +std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + std::set<int> result; + int dim = 2 * level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim - 1); + + Relation cond2 = copy(cond); + cond2.simplify(); + cond2 = EQs_to_GEQs(cond2); + Conjunct *c = cond2.single_conjunct(); + int cur_lex = lex[dim - 1]; + + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int max_level = (*gi).max_tuple_pos(); + Relation single_cond(max_level); + single_cond.and_with_GEQ(*gi); + + // TODO: should decide where to place newly created statements with + // complementary split condition from dependence graph. + bool place_after; + if (max_level == 0) + place_after = true; + else if ((*gi).get_coef(cond2.set_var(max_level)) < 0) + place_after = true; + else + place_after = false; + + bool temp_place_after; // = place_after; + bool assigned = false; + int part1_to_part2; + int part2_to_part1; + // original statements with split condition, + // new statements with complement of split condition + int old_num_stmt = stmt.size(); + std::map<int, int> what_stmt_num; + apply_xform(same_loop); + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + int n = stmt[*i].IS.n_set(); + Relation part1, part2; + if (max_level > n) { + part1 = copy(stmt[*i].IS); + part2 = Relation::False(0); + } else { + part1 = Intersection(copy(stmt[*i].IS), + Extend_Set(copy(single_cond), n - max_level)); + part2 = Intersection(copy(stmt[*i].IS), + Extend_Set(Complement(copy(single_cond)), + n - max_level)); + } + + //split dependence check + + if (max_level > level) { + + DNF_Iterator di1(stmt[*i].IS.query_DNF()); + DNF_Iterator di2(part1.query_DNF()); + for (; di1 && di2; di1++, di2++) { + //printf("In next conjunct,\n"); + EQ_Iterator ei1 = (*di1)->EQs(); + EQ_Iterator ei2 = (*di2)->EQs(); + for (; ei1 && ei2; ei1++, ei2++) { + //printf(" In next equality constraint,\n"); + Constr_Vars_Iter cvi1(*ei1); + Constr_Vars_Iter cvi2(*ei2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*ei1).get_const() + != (*ei2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + dimension = + stmt[*i].loop_level[dimension].payload; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + GEQ_Iterator gi1 = (*di1)->GEQs(); + GEQ_Iterator gi2 = (*di2)->GEQs(); + + for (; gi1 && gi2; gi++, gi2++) { + + Constr_Vars_Iter cvi1(*gi1); + Constr_Vars_Iter cvi2(*gi2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*gi1).get_const() + != (*gi2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + stmt[*i].loop_level[dimension].payload; + + dimension = + stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); + j++) { + for (int k = 0; k < j->second.size(); + k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + + } + + } + + DNF_Iterator di3(stmt[*i].IS.query_DNF()); + DNF_Iterator di4(part2.query_DNF()); // + for (; di3 && di4; di3++, di4++) { + EQ_Iterator ei1 = (*di3)->EQs(); + EQ_Iterator ei2 = (*di4)->EQs(); + for (; ei1 && ei2; ei1++, ei2++) { + Constr_Vars_Iter cvi1(*ei1); + Constr_Vars_Iter cvi2(*ei2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*ei1).get_const() + != (*ei2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + stmt[*i].loop_level[dimension].payload; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + GEQ_Iterator gi1 = (*di3)->GEQs(); + GEQ_Iterator gi2 = (*di4)->GEQs(); + + for (; gi1 && gi2; gi++, gi2++) { + Constr_Vars_Iter cvi1(*gi1); + Constr_Vars_Iter cvi2(*gi2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*gi1).get_const() + != (*gi2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type // + == LoopLevelTile) + stmt[*i].loop_level[dimension].payload; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + + } + + } + + stmt[*i].IS = part1; + + if (Intersection(copy(part2), + Extend_Set(copy(this->known), n - this->known.n_set())).is_upper_bound_satisfiable()) { + Statement new_stmt; + new_stmt.code = stmt[*i].code->clone(); + new_stmt.IS = part2; + new_stmt.xform = copy(stmt[*i].xform); + new_stmt.ir_stmt_node = NULL; + new_stmt.loop_level = stmt[*i].loop_level; + + stmt_nesting_level_.push_back(stmt_nesting_level_[*i]); + + /*std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = + test_data_dependences(ir, stmt[*i].code, part1, + stmt[*i].code, part2, freevar, index, + stmt_nesting_level_[*i], + stmt_nesting_level_[stmt.size() - 1]); + + + + + for (int k = 0; k < dv.first.size(); k++) + part1_to_part2++; + if (part1_to_part2 > 0 && part2_to_part1 > 0) + throw loop_error( + "loop error: Aborting, split resulted in impossible dependence cycle!"); + + for (int k = 0; k < dv.second.size(); k++) + part2_to_part1++; + + + + if (part1_to_part2 > 0 && part2_to_part1 > 0) + throw loop_error( + "loop error: Aborting, split resulted in impossible dependence cycle!"); + + + + if (part2_to_part1 > 0){ + temp_place_after = false; + assigned = true; + + }else if (part1_to_part2 > 0){ + temp_place_after = true; + + assigned = true; + } + + */ + + if (place_after) + assign_const(new_stmt.xform, dim - 1, cur_lex + 1); + else + assign_const(new_stmt.xform, dim - 1, cur_lex - 1); + + stmt.push_back(new_stmt); + dep.insert(); + what_stmt_num[*i] = stmt.size() - 1; + if (*i == stmt_num) + result.insert(stmt.size() - 1); + } + + } + // make adjacent lexical number available for new statements + if (place_after) { + lex[dim - 1] = cur_lex + 1; + shiftLexicalOrder(lex, dim - 1, 1); + } else { + lex[dim - 1] = cur_lex - 1; + shiftLexicalOrder(lex, dim - 1, -1); + } + // update dependence graph + int dep_dim = get_dep_dim_of(stmt_num, level); + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, std::vector<DependenceVector> > > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + if (what_stmt_num.find(i) != what_stmt_num.end() + && what_stmt_num.find(j->first) + != what_stmt_num.end()) + dep.connect(what_stmt_num[i], + what_stmt_num[j->first], j->second); + if (place_after + && what_stmt_num.find(j->first) + != what_stmt_num.end()) { + std::vector<DependenceVector> dvs; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.is_data_dependence() && dep_dim != -1) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + dvs.push_back(dv); + } + if (dvs.size() > 0) + D.push_back( + std::make_pair(what_stmt_num[j->first], + dvs)); + } else if (!place_after + && what_stmt_num.find(i) + != what_stmt_num.end()) { + std::vector<DependenceVector> dvs; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.is_data_dependence() && dep_dim != -1) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + dvs.push_back(dv); + } + if (dvs.size() > 0) + dep.connect(what_stmt_num[i], j->first, dvs); + + } + } else { + if (what_stmt_num.find(i) != what_stmt_num.end()) + dep.connect(what_stmt_num[i], j->first, j->second); + } + } else if (same_loop.find(j->first) != same_loop.end()) { + if (what_stmt_num.find(j->first) != what_stmt_num.end()) + D.push_back( + std::make_pair(what_stmt_num[j->first], + j->second)); + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + + } + + return result; +} + +void Loop::skew(const std::set<int> &stmt_nums, int level, + const std::vector<int> &skew_amount) { + if (stmt_nums.size() == 0) + return; + + // check for sanity of parameters + int ref_stmt_num = *(stmt_nums.begin()); + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level < 1 || level > stmt[*i].loop_level.size()) + throw std::invalid_argument( + "invalid loop level " + to_string(level)); + for (int j = stmt[*i].loop_level.size(); j < skew_amount.size(); j++) + if (skew_amount[j] != 0) + throw std::invalid_argument("invalid skewing formula"); + } + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // set trasformation relations + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation r(n, n); + F_And *f_root = r.add_and(); + for (int j = 1; j <= n; j++) + if (j != 2 * level) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + } + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(2 * level), -1); + for (int j = 0; j < skew_amount.size(); j++) + if (skew_amount[j] != 0) + h.update_coef(r.input_var(2 * (j + 1)), skew_amount[j]); + + stmt[*i].xform = Composition(r, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // update dependence graph + if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence between skewed statements + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + coef_t lb = 0; + coef_t ub = 0; + for (int kk = 0; kk < skew_amount.size(); kk++) { + int cur_dep_dim = get_dep_dim_of(*i, kk + 1); + if (skew_amount[kk] > 0) { + if (lb != -posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.lbounds[cur_dep_dim] + != -posInfinity) + lb += skew_amount[kk] + * dv.lbounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + lb = -posInfinity; + } + if (ub != posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.ubounds[cur_dep_dim] + != posInfinity) + ub += skew_amount[kk] + * dv.ubounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + ub = posInfinity; + } + } else if (skew_amount[kk] < 0) { + if (lb != -posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.ubounds[cur_dep_dim] + != posInfinity) + lb += skew_amount[kk] + * dv.ubounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + lb = -posInfinity; + } + if (ub != posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.lbounds[cur_dep_dim] + != -posInfinity) + ub += skew_amount[kk] + * dv.lbounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + ub = posInfinity; + } + } + } + dv.lbounds[dep_dim] = lb; + dv.ubounds[dep_dim] = ub; + if ((dv.isCarried(dep_dim) + && dv.hasPositive(dep_dim)) && dv.quasi) + dv.quasi = false; + + if ((dv.isCarried(dep_dim) + && dv.hasNegative(dep_dim)) && !dv.quasi) + throw loop_error( + "loop error: Skewing is illegal, dependence violation!"); + dv.lbounds[dep_dim] = lb; + dv.ubounds[dep_dim] = ub; + if ((dv.isCarried(dep_dim) + && dv.hasPositive(dep_dim)) && dv.quasi) + dv.quasi = false; + + if ((dv.isCarried(dep_dim) + && dv.hasNegative(dep_dim)) && !dv.quasi) + throw loop_error( + "loop error: Skewing is illegal, dependence violation!"); + } + } + j->second = dvs; + } else { + // dependence from skewed statement to unskewed statement becomes jumbled, + // put distance value at skewed dimension to unknown + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + } + j->second = dvs; + } + for (int i = 0; i < dep.vertex.size(); i++) + if (stmt_nums.find(i) == stmt_nums.end()) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence from unskewed statement to skewed statement becomes jumbled, + // put distance value at skewed dimension to unknown + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + } + j->second = dvs; + } + } +} + + +void Loop::shift(const std::set<int> &stmt_nums, int level, int shift_amount) { + if (stmt_nums.size() == 0) + return; + + // check for sanity of parameters + int ref_stmt_num = *(stmt_nums.begin()); + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level < 1 || level > stmt[*i].loop_level.size()) + throw std::invalid_argument( + "invalid loop level " + to_string(level)); + } + + // do nothing + if (shift_amount == 0) + return; + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // set trasformation relations + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + int n = stmt[*i].xform.n_out(); + + Relation r(n, n); + F_And *f_root = r.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + if (j == 2 * level) + h.update_const(shift_amount); + } + + stmt[*i].xform = Composition(r, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // update dependence graph + if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (stmt_nums.find(j->first) == stmt_nums.end()) { + // dependence from shifted statement to unshifted statement + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + if (dv.lbounds[dep_dim] != -posInfinity) + dv.lbounds[dep_dim] -= shift_amount; + if (dv.ubounds[dep_dim] != posInfinity) + dv.ubounds[dep_dim] -= shift_amount; + } + } + j->second = dvs; + } + for (int i = 0; i < dep.vertex.size(); i++) + if (stmt_nums.find(i) == stmt_nums.end()) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence from unshifted statement to shifted statement + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + if (dv.lbounds[dep_dim] != -posInfinity) + dv.lbounds[dep_dim] += shift_amount; + if (dv.ubounds[dep_dim] != posInfinity) + dv.ubounds[dep_dim] += shift_amount; + } + } + j->second = dvs; + } + } +} + +void Loop::scale(const std::set<int> &stmt_nums, int level, int scale_amount) { + std::vector<int> skew_amount(level, 0); + skew_amount[level - 1] = scale_amount; + skew(stmt_nums, level, skew_amount); +} + +void Loop::reverse(const std::set<int> &stmt_nums, int level) { + scale(stmt_nums, level, -1); +} + +void Loop::fuse(const std::set<int> &stmt_nums, int level) { + if (stmt_nums.size() == 0 || stmt_nums.size() == 1) + return; + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int dim = 2 * level - 1; + // check for sanity of parameters + std::vector<int> ref_lex; + int ref_stmt_num; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level <= 0 + || (level > (stmt[*i].xform.n_out() - 1) / 2 + || level > stmt[*i].loop_level.size())) + throw std::invalid_argument( + "invalid loop level " + to_string(level)); + if (ref_lex.size() == 0) { + ref_lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } else { + std::vector<int> lex = getLexicalOrder(*i); + for (int j = 0; j < dim - 1; j += 2) + if (lex[j] != ref_lex[j]) + throw std::invalid_argument( + "statements for fusion must be in the same level-" + + to_string(level - 1) + " subloop"); + } + } + + // collect lexicographical order values from to-be-fused statements + std::set<int> lex_values; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + std::vector<int> lex = getLexicalOrder(*i); + lex_values.insert(lex[dim - 1]); + } + if (lex_values.size() == 1) + return; + // negative dependence would prevent fusion + + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + + for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); + i++) { + ref_lex[dim - 1] = *i; + std::set<int> a = getStatements(ref_lex, dim - 1); + std::set<int>::iterator j = i; + j++; + for (; j != lex_values.end(); j++) { + ref_lex[dim - 1] = *j; + std::set<int> b = getStatements(ref_lex, dim - 1); + for (std::set<int>::iterator ii = a.begin(); ii != a.end(); ii++) + for (std::set<int>::iterator jj = b.begin(); jj != b.end(); + jj++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim) + && dvs[k].hasNegative(dep_dim)) + throw loop_error( + "loop error: statements " + to_string(*ii) + + " and " + to_string(*jj) + + " cannot be fused together due to negative dependence"); + dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim) + && dvs[k].hasNegative(dep_dim)) + throw loop_error( + "loop error: statements " + to_string(*jj) + + " and " + to_string(*ii) + + " cannot be fused together due to negative dependence"); + } + } + } + + std::set<int> same_loop = getStatements(ref_lex, dim - 3); + + std::vector<std::set<int> > s = sort_by_same_loops(same_loop, level); + + std::set<int> s1; + std::set<int> s2; + std::set<int> s4; + std::vector<std::set<int> > s3; + for (std::set<int>::iterator kk = stmt_nums.begin(); kk != stmt_nums.end(); + kk++) + for (int i = 0; i < s.size(); i++) + if (s[i].find(*kk) != s[i].end()) { + s1.insert(s[i].begin(), s[i].end()); + s2.insert(i); + } + + s3.push_back(s1); + for (int i = 0; i < s.size(); i++) + if (s2.find(i) == s2.end()) { + s3.push_back(s[i]); + s4.insert(s[i].begin(), s[i].end()); + } + try { + std::vector<std::set<int> > s5; + s5.push_back(s1); + s5.push_back(s4); + + //Dependence Check for Ordering Constraint + //Graph<std::set<int>, bool> dummy = construct_induced_graph_at_level(s5, + // dep, dep_dim); + + Graph<std::set<int>, bool> g = construct_induced_graph_at_level(s3, dep, + dep_dim); + + s = typed_fusion(g); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + } + + if (s3.size() == s.size()) { + int order = 0; + for (int i = 0; i < s.size(); i++) { + + for (std::set<int>::iterator it = s[i].begin(); it != s[i].end(); + it++) { + + assign_const(stmt[*it].xform, 2 * level - 2, order); + + } + + order++; + } + } else if (s3.size() > s.size()) { + + int order = 0; + for (int j = 0; j < s.size(); j++) { + std::set<int>::iterator it3; + for (it3 = s1.begin(); it3 != s1.end(); it3++) { + if (s[j].find(*it3) != s[j].end()) + break; + } + if (it3 != s1.end()) { + for (std::set<int>::iterator it = s1.begin(); it != s1.end(); + it++) + assign_const(stmt[*it].xform, 2 * level - 2, order); + + order++; + + } + + for (int i = 0; i < s3.size(); i++) { + std::set<int>::iterator it2; + + for (it2 = s3[i].begin(); it2 != s3[i].end(); it2++) { + if (s[j].find(*it2) != s[j].end()) + break; + } + + if (it2 != s3[i].end()) { + for (std::set<int>::iterator it = s3[i].begin(); + it != s3[i].end(); it++) + assign_const(stmt[*it].xform, 2 * level - 2, order); + + order++; + + } + } + } + + } else + throw loop_error("Typed Fusion Error"); + +} + + + +void Loop::distribute(const std::set<int> &stmt_nums, int level) { + if (stmt_nums.size() == 0 || stmt_nums.size() == 1) + return; + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + int dim = 2 * level - 1; + int ref_stmt_num; + // check for sanity of parameters + std::vector<int> ref_lex; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level < 1 + || (level > (stmt[*i].xform.n_out() - 1) / 2 + || level > stmt[*i].loop_level.size())) + throw std::invalid_argument( + "invalid loop level " + to_string(level)); + if (ref_lex.size() == 0) { + ref_lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } else { + std::vector<int> lex = getLexicalOrder(*i); + for (int j = 0; j <= dim - 1; j += 2) + if (lex[j] != ref_lex[j]) + throw std::invalid_argument( + "statements for distribution must be in the same level-" + + to_string(level) + " subloop"); + } + } + // find SCC in the to-be-distributed loop + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + std::set<int> same_loop = getStatements(ref_lex, dim - 1); + Graph<int, Empty> g; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) + g.insert(*i); + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(g.vertex[i].first, g.vertex[j].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g.connect(i, j); + break; + } + dvs = dep.getEdge(g.vertex[j].first, g.vertex[i].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g.connect(j, i); + break; + } + } + std::vector<std::set<int> > s = g.topoSort(); + // find statements that cannot be distributed due to dependence cycle + Graph<std::set<int>, Empty> g2; + for (int i = 0; i < s.size(); i++) { + std::set<int> t; + for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) + if (stmt_nums.find(g.vertex[*j].first) != stmt_nums.end()) + t.insert(g.vertex[*j].first); + if (!t.empty()) + g2.insert(t); + } + for (int i = 0; i < g2.vertex.size(); i++) + for (int j = i + 1; j < g2.vertex.size(); j++) + for (std::set<int>::iterator ii = g2.vertex[i].first.begin(); + ii != g2.vertex[i].first.end(); ii++) + for (std::set<int>::iterator jj = g2.vertex[j].first.begin(); + jj != g2.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g2.connect(i, j); + break; + } + dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g2.connect(j, i); + break; + } + } + std::vector<std::set<int> > s2 = g2.topoSort(); + // nothing to distribute + if (s2.size() == 1) + throw loop_error( + "loop error: no statement can be distributed due to dependence cycle"); + std::vector<std::set<int> > s3; + for (int i = 0; i < s2.size(); i++) { + std::set<int> t; + for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) + std::set_union(t.begin(), t.end(), g2.vertex[*j].first.begin(), + g2.vertex[*j].first.end(), inserter(t, t.begin())); + s3.push_back(t); + } + // associate other affected statements with the right distributed statements + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) + if (stmt_nums.find(*i) == stmt_nums.end()) { + bool is_inserted = false; + int potential_insertion_point = 0; + for (int j = 0; j < s3.size(); j++) { + for (std::set<int>::iterator k = s3[j].begin(); + k != s3[j].end(); k++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(*i, *k); + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].isCarried(dep_dim)) { + s3[j].insert(*i); + is_inserted = true; + break; + } + dvs = dep.getEdge(*k, *i); + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].isCarried(dep_dim)) + potential_insertion_point = j; + } + if (is_inserted) + break; + } + if (!is_inserted) + s3[potential_insertion_point].insert(*i); + } + // set lexicographical order after distribution + int order = ref_lex[dim - 1]; + shiftLexicalOrder(ref_lex, dim - 1, s3.size() - 1); + for (std::vector<std::set<int> >::iterator i = s3.begin(); i != s3.end(); + i++) { + for (std::set<int>::iterator j = (*i).begin(); j != (*i).end(); j++) + assign_const(stmt[*j].xform, dim - 1, order); + order++; + } + // no need to update dependence graph + ; + return; +} + diff --git a/chill/src/loop_datacopy.cc b/chill/src/loop_datacopy.cc new file mode 100644 index 0000000..36acb01 --- /dev/null +++ b/chill/src/loop_datacopy.cc @@ -0,0 +1,2166 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Various data copy schemes. + + Notes: + + History: + 02/20/09 Created by Chun Chen by splitting original datacopy from loop.cc +*****************************************************************************/ + +#include <codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + +// +// data copy function by referring arrays by numbers. +// e.g. A[i] = A[i-1] + B[i] +// parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i] +// +bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + std::set<int> same_loop; + for (int i = 0; i < array_ref_nums.size(); i++) { + int stmt_num = array_ref_nums[i].first; + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (i == 0) { + std::vector<int> lex = getLexicalOrder(stmt_num); + same_loop = getStatements(lex, 2*level-2); + } + else if (same_loop.find(stmt_num) == same_loop.end()) + throw std::invalid_argument("array references for data copy must be located in the same subloop"); + } + + // convert array reference numbering scheme to actual array references + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (int i = 0; i < array_ref_nums.size(); i++) { + if (array_ref_nums[i].second.size() == 0) + continue; + + int stmt_num = array_ref_nums[i].first; + selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); + std::vector<bool> selected(refs.size(), false); + for (int j = 0; j < array_ref_nums[i].second.size(); j++) { + int ref_num = array_ref_nums[i].second[j]; + if (ref_num < 0 || ref_num >= refs.size()) { + for (int k = 0; k < refs.size(); k++) + delete refs[k]; + throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + } + selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected[ref_num] = true; + } + for (int j = 0; j < refs.size(); j++) + if (!selected[j]) + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +// +// data copy function by referring arrays by name. +// e.g. A[i] = A[i-1] + B[i] +// parameter array_name=A means to copy data touched by A[i-1] and A[i] +// +bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + // collect array references by name + std::vector<int> lex = getLexicalOrder(stmt_num); + int dim = 2*level - 1; + std::set<int> same_loop = getStatements(lex, dim-1); + + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + std::vector<IR_ArrayRef *> t; + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); + for (int j = 0; j < refs.size(); j++) + if (refs[j]->name() == array_name) + t.push_back(refs[j]); + else + delete refs[j]; + if (t.size() != 0) + selected_refs.push_back(std::make_pair(*i, t)); + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + // collect array references by name + std::vector<int> lex = getLexicalOrder(stmt_num); + int dim = 2*level - 1; + std::set<int> same_loop = getStatements(lex, dim-1); + + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + selected_refs.push_back(std::make_pair(*i, std::vector<IR_ArrayRef *>())); + + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); + for (int j = 0; j < refs.size(); j++) + if (refs[j]->name() == array_name) + selected_refs[selected_refs.size()-1].second.push_back(refs[j]); + else + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, const std::vector<int> &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + std::set<int> same_loop; + for (int i = 0; i < array_ref_nums.size(); i++) { + int stmt_num = array_ref_nums[i].first; + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (i == 0) { + std::vector<int> lex = getLexicalOrder(stmt_num); + same_loop = getStatements(lex, 2*level-2); + } + else if (same_loop.find(stmt_num) == same_loop.end()) + throw std::invalid_argument("array references for data copy must be located in the same subloop"); + } + + // convert array reference numbering scheme to actual array references + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (int i = 0; i < array_ref_nums.size(); i++) { + if (array_ref_nums[i].second.size() == 0) + continue; + + int stmt_num = array_ref_nums[i].first; + selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); + std::vector<bool> selected(refs.size(), false); + for (int j = 0; j < array_ref_nums[i].second.size(); j++) { + int ref_num = array_ref_nums[i].second[j]; + if (ref_num < 0 || ref_num >= refs.size()) { + for (int k = 0; k < refs.size(); k++) + delete refs[k]; + throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + } + selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected[ref_num] = true; + } + for (int j = 0; j < refs.size(); j++) + if (!selected[j]) + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +// +// Implement low level datacopy function with lots of options. +// +/*bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, + const std::vector<int> &privatized_levels, + bool allow_extra_read, int fastest_changing_dimension, + int padding_stride, int padding_alignment, int memory_type) { + if (stmt_refs.size() == 0) + return true; + + // check for sanity of parameters + IR_ArraySymbol *sym = NULL; + std::vector<int> lex; + std::set<int> active; + if (level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + for (int i = 0; i < privatized_levels.size(); i++) { + if (i == 0) { + if (privatized_levels[i] < level) + throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level)); + } + else if (privatized_levels[i] <= privatized_levels[i-1]) + throw std::invalid_argument("privatized loop levels must be in ascending order"); + } + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + active.insert(stmt_num); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (privatized_levels.size() != 0) { + if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); + } + else { + if (level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); + } + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (sym == NULL) { + sym = stmt_refs[i].second[j]->symbol(); + lex = getLexicalOrder(stmt_num); + } + else { + IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); + if (t->name() != sym->name()) { + delete t; + delete sym; + throw std::invalid_argument("try to copy data from different arrays"); + } + delete t; + } + } + } + if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim())) + throw std::invalid_argument("invalid fastest changing dimension for the array to be copied"); + if (padding_stride < 0) + throw std::invalid_argument("invalid temporary array stride requirement"); + if (padding_alignment == -1 || padding_alignment == 0) + throw std::invalid_argument("invalid temporary array alignment requirement"); + + int dim = 2*level - 1; + int n_dim = sym->n_dim(); + + if (fastest_changing_dimension == -1) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + fastest_changing_dimension = n_dim - 1; + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + fastest_changing_dimension = 0; + break; + default: + throw loop_error("unsupported array layout"); + } + + + // build iteration spaces for all reads and for all writes separately + apply_xform(active); + bool has_write_refs = false; + bool has_read_refs = false; + Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim); + for (int k = 1; k <= mapping.n_inp(); k++) + mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name()); + mapping.setup_names(); + F_And *f_root = mapping.add_and(); + for (int k = 1; k <= level-1; k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(k), 1); + h.update_coef(mapping.output_var(k), -1); + } + for (int k = 0; k < privatized_levels.size(); k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(privatized_levels[k]), 1); + h.update_coef(mapping.output_var(level+k), -1); + } + for (int k = 0; k < n_dim; k++) { + CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); + exp2formula(ir, mapping, f_root, freevar, repr, mapping.output_var(level-1+privatized_levels.size()+k+1), 'w', IR_COND_EQ, false); + repr->clear(); + delete repr; + } + Relation r = Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); + if (stmt_refs[i].second[j]->is_write()) { + has_write_refs = true; + wo_copy_is = Union(wo_copy_is, r); + wo_copy_is.simplify(2, 4); + } + else { + has_read_refs = true; + //protonu--removing the next line for now + ro_copy_is = Union(ro_copy_is, r); + ro_copy_is.simplify(2, 4); + //ro_copy_is = ConvexRepresentation(Union(ro_copy_is, r)); + + } + } + } + + if (allow_extra_read) { + Relation t = DecoupledConvexHull(copy(ro_copy_is)); + if (t.number_of_conjuncts() > 1) + ro_copy_is = RectHull(ro_copy_is); + else + ro_copy_is = t; + } + else { + Relation t = ConvexRepresentation(copy(ro_copy_is)); + if (t.number_of_conjuncts() > 1) + ro_copy_is = RectHull(ro_copy_is); + else + ro_copy_is = t; + } + wo_copy_is = ConvexRepresentation(wo_copy_is); + + if (allow_extra_read) { + Tuple<Relation> Rs; + Tuple<int> active; + for (DNF_Iterator di(ro_copy_is.query_DNF()); di; di++) { + Rs.append(Relation(ro_copy_is, di.curr())); + active.append(1); + } + Relation the_gcs = Relation::True(ro_copy_is.n_set()); + for (int i = level-1+privatized_levels.size()+1; i <= level-1+privatized_levels.size()+n_dim; i++) { + Relation r = greatest_common_step(Rs, active, i, Relation::Null()); + the_gcs = Intersection(the_gcs, r); + } + + ro_copy_is = Approximate(ro_copy_is); + ro_copy_is = ConvexRepresentation(ro_copy_is); + ro_copy_is = Intersection(ro_copy_is, the_gcs); + ro_copy_is.simplify(); + } + + + + for (int i = 1; i < level; i++) { + std::string s = stmt[*active.begin()].IS.input_var(i)->name(); + wo_copy_is.name_set_var(i, s); + ro_copy_is.name_set_var(i, s); + } + for (int i = 0; i < privatized_levels.size(); i++) { + std::string s = stmt[*active.begin()].IS.input_var(privatized_levels[i])->name(); + wo_copy_is.name_set_var(level+i, s); + ro_copy_is.name_set_var(level+i, s); + } + for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) { + std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size()); + wo_copy_is.name_set_var(i, s); + ro_copy_is.name_set_var(i, s); + } + tmp_loop_var_name_counter += n_dim; + + //protonu--end change + + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + + // build merged iteration space for calculating temporary array size + bool already_use_recthull = false; + Relation untampered_copy_is = ConvexRepresentation(Union(copy(wo_copy_is), copy(ro_copy_is))); + Relation copy_is = untampered_copy_is; + if (copy_is.number_of_conjuncts() > 1) { + try { + copy_is = ConvexHull(copy(untampered_copy_is)); + } + catch (const std::overflow_error &e) { + copy_is = RectHull(copy(untampered_copy_is)); + already_use_recthull = true; + } + } + + + Retry_copy_is: + // extract temporary array information + CG_outputBuilder *ocg = ir->builder(); + std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL + std::vector<coef_t> index_stride(n_dim, 1); + std::vector<bool> is_index_eq(n_dim, false); + std::vector<std::pair<int, CG_outputRepr *> > index_sz(0); + Relation reduced_copy_is = copy(copy_is); + + for (int i = 0; i < n_dim; i++) { + if (i != 0) + reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); + Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); + + // extract stride + EQ_Handle stride_eq; + { + bool simple_stride = true; + int strides = countStrides(bound.query_DNF()->single_conjunct(), bound.set_var(level-1+privatized_levels.size()+i+1), stride_eq, simple_stride); + if (strides > 1) { + throw loop_error("too many strides"); + } + else if (strides == 1) { + int sign = stride_eq.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + Constr_Vars_Iter it(stride_eq, true); + index_stride[i] = abs((*it).coef/sign); + } + } + + // check if this arary index requires loop + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (EQ_Iterator ei(c->EQs()); ei; ei++) { + if ((*ei).has_wildcards()) + continue; + + int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0) { + int sign = 1; + if (coef < 0) { + coef = -coef; + sign = -1; + } + + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*ei); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("unsupported array index expression"); + } + } + if ((*ei).get_const() != 0) + op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); + if (coef != 1) + op = ocg->CreateIntegerDivide(op, ocg->CreateInt(coef)); + + index_lb[i] = op; + is_index_eq[i] = true; + break; + } + } + if (is_index_eq[i]) + continue; + + // seperate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0 && (*gi).has_wildcards()) { + bool clean_bound = true; + GEQ_Handle h; + for (Constr_Vars_Iter cvi(*gi, true); gi; gi++) + if (!findFloorInequality(bound, (*cvi).var, h, bound.set_var(level-1+privatized_levels.size()+i+1))) { + clean_bound = false; + break; + } + if (!clean_bound) + continue; + } + + if (coef > 0) + lb_list.push_back(*gi); + else if (coef < 0) + ub_list.push_back(*gi); + } + if (lb_list.size() == 0 || ub_list.size() == 0) + if (already_use_recthull) + throw loop_error("failed to calcuate array footprint size"); + else { + copy_is = RectHull(copy(untampered_copy_is)); + already_use_recthull = true; + goto Retry_copy_is; + } + + // build lower bound representation + Tuple<CG_outputRepr *> lb_repr_list; + for (int j = 0; j < lb_list.size(); j++) + lb_repr_list.append(outputLBasRepr(ocg, lb_list[j], bound, + bound.set_var(level-1+privatized_levels.size()+i+1), + index_stride[i], stride_eq, Relation::True(bound.n_set()), + std::vector<CG_outputRepr *>(bound.n_set()))); + + if (lb_repr_list.size() > 1) + index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); + else if (lb_repr_list.size() == 1) + index_lb[i] = lb_repr_list[1]; + + // build temporary array size representation + { + Relation cal(copy_is.n_set(), 1); + F_And *f_root = cal.add_and(); + for (int j = 0; j < ub_list.size(); j++) + for (int k = 0; k < lb_list.size(); k++) { + GEQ_Handle h = f_root->add_GEQ(); + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(ub_list[j].get_const()); + + for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(lb_list[k].get_const()); + + h.update_const(1); + h.update_coef(cal.output_var(1), -1); + } + + cal = Restrict_Domain(cal, copy(copy_is)); + for (int j = 1; j <= cal.n_inp(); j++) + cal = Project(cal, j, Input_Var); + cal.simplify(); + + // pad temporary array size + // TODO: for variable array size, create padding formula + Conjunct *c = cal.query_DNF()->single_conjunct(); + bool is_index_bound_const = false; + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) + if ((*gi).is_const(cal.output_var(1))) { + coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1))); + if (padding_stride != 0) { + size = (size + index_stride[i] - 1) / index_stride[i]; + if (i == fastest_changing_dimension) + size = size * padding_stride; + } + if (i == fastest_changing_dimension) { + if (padding_alignment > 1) { // align to boundary for data packing + int residue = size % padding_alignment; + if (residue) + size = size+padding_alignment-residue; + } + else if (padding_alignment < -1) { // un-alignment for memory bank conflicts + while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1) + size++; + } + } + index_sz.push_back(std::make_pair(i, ocg->CreateInt(size))); + is_index_bound_const = true; + } + + if (!is_index_bound_const) { + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { + int coef = (*gi).get_coef(cal.output_var(1)); + if (coef < 0) { + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*gi); ci; ci++) { + if ((*ci).var != cal.output_var(1)) { + switch((*ci).var->kind()) { + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef == 1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef == -1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef > 1) + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef < -1 + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("failed to generate array index bound code"); + } + } + } + int c = (*gi).get_const(); + if (c > 0) + op = ocg->CreatePlus(op, ocg->CreateInt(c)); + else if (c < 0) + op = ocg->CreateMinus(op, ocg->CreateInt(-c)); + if (padding_stride != 0) { + if (i == fastest_changing_dimension) { + coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[i] / g; + if (t1 != 1) + op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + op = ocg->CreateTimes(op, ocg->CreateInt(t2)); + } + else if (index_stride[i] != 1) { + op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); + } + } + + index_sz.push_back(std::make_pair(i, op)); + break; + } + } + } + } + } + + // change the temporary array index order + for (int i = 0; i < index_sz.size(); i++) + if (index_sz[i].first == fastest_changing_dimension) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + std::swap(index_sz[index_sz.size()-1], index_sz[i]); + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + std::swap(index_sz[0], index_sz[i]); + break; + default: + throw loop_error("unsupported array layout"); + } + + // declare temporary array or scalar + IR_Symbol *tmp_sym; + if (index_sz.size() == 0) { + tmp_sym = ir->CreateScalarSymbol(sym, memory_type); + } + else { + std::vector<CG_outputRepr *> tmp_array_size(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) + tmp_array_size[i] = index_sz[i].second->clone(); + tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); + } + + // create temporary array read initialization code + CG_outputRepr *copy_code_read; + if (has_read_refs) + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); + } + else { + std::vector<CG_outputRepr *> lhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + lhs_index[i] = cur_index_repr; + } + + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); + } + + // create temporary array write back code + CG_outputRepr *copy_code_write; + if (has_write_refs) + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); + } + else { + std::vector<CG_outputRepr *> lhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + lhs_index[i] = index_lb[i]->clone(); + else + lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); + + std::vector<CG_outputRepr *> rhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + rhs_index[i] = cur_index_repr; + } + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); + } + + // now we can remove those loops for array indexes that are + // dependent on others + if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { + Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + + int cur_index = 0; + std::vector<int> mapped_index(index_sz.size()); + for (int i = 0; i < n_dim; i++) + if (!is_index_eq[i]) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); + mapped_index[index_sz.size()-cur_index-1] = i; + break; + } + case IR_ARRAY_LAYOUT_ROW_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); + mapped_index[cur_index] = i; + break; + } + default: + throw loop_error("unsupported array layout"); + } + cur_index++; + } + + wo_copy_is = Range(Restrict_Domain(copy(mapping), wo_copy_is)); + ro_copy_is = Range(Restrict_Domain(copy(mapping), ro_copy_is)); + + // protonu--replacing Chun's old code + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + } + + + + for (int i = 0; i < index_sz.size(); i++) { + wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + } + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + } + + // insert read copy statement + int old_num_stmt = stmt.size(); + int ro_copy_stmt_num = -1; + if (has_read_refs) { + Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= ro_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_read; + copy_stmt_read.IS = ro_copy_is; + copy_stmt_read.xform = copy_xform; + copy_stmt_read.code = copy_code_read; + copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set()); + copy_stmt_read.ir_stmt_node = NULL; + for (int i = 0; i < level-1; i++) { + copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_read.loop_level[i].payload = -1; + else + copy_stmt_read.loop_level[i].payload = level + j; + } + else + copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + + shiftLexicalOrder(lex, dim-1, 1); + stmt.push_back(copy_stmt_read); + ro_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + // insert write copy statement + int wo_copy_stmt_num = -1; + if (has_write_refs) { + Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= wo_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_write; + copy_stmt_write.IS = wo_copy_is; + copy_stmt_write.xform = copy_xform; + copy_stmt_write.code = copy_code_write; + copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set()); + copy_stmt_write.ir_stmt_node = NULL; + + for (int i = 0; i < level-1; i++) { + copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_write.loop_level[i].payload = -1; + else + copy_stmt_write.loop_level[i].payload = level + j; + } + else + copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + + lex[dim-1]++; + shiftLexicalOrder(lex, dim-1, -2); + stmt.push_back(copy_stmt_write); + wo_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + // replace original array accesses with temporary array accesses + for (int i =0; i < stmt_refs.size(); i++) + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); + } + else { + std::vector<CG_outputRepr *> index_repr(index_sz.size()); + for (int k = 0; k < index_sz.size(); k++) { + int cur_index_num = index_sz[k].first; + + CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (k == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + index_repr[k] = cur_index_repr; + } + + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); + } + } + + // update dependence graph + int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1; + if (ro_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(ro_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, ro_copy_stmt_num, D[j]); + } + + // insert dependences from copy statement loop to copied statements + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); + dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); + for (int i = dep_dim; i < num_dep_dim; i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(ro_copy_stmt_num, *i, dv); + } + + if (wo_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(wo_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, wo_copy_stmt_num, D[j]); + } + + // insert dependences from copied statements to write statements + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); + dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); + for (int i = dep_dim; i < num_dep_dim; i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(*i, wo_copy_stmt_num, dv); + + } + + // update variable name for dependences among copied statements + for (int i = 0; i < old_num_stmt; i++) { + if (active.find(i) != active.end()) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) + if (active.find(j->first) != active.end()) + for (int k = 0; k < j->second.size(); k++) { + IR_Symbol *s = tmp_sym->clone(); + j->second[k].sym = s; + } + } + + // insert anti-dependence from write statement to read statement + if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) + if (dep_dim >= 0) { + DependenceVector dv; + dv.type = DEP_R2W; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); + dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); + for (int k = dep_dim; k < num_dep_dim; k++) { + dv.lbounds[k] = -posInfinity; + dv.ubounds[k] = posInfinity; + } + for (int k = 0; k < dep_dim; k++) { + if (k != 0) { + dv.lbounds[k-1] = 0; + dv.ubounds[k-1] = 0; + } + dv.lbounds[k] = 1; + dv.ubounds[k] = posInfinity; + dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv); + } + } + + + // cleanup + delete sym; + delete tmp_sym; + for (int i = 0; i < index_lb.size(); i++) { + index_lb[i]->clear(); + delete index_lb[i]; + } + for (int i = 0; i < index_sz.size(); i++) { + index_sz[i].second->clear(); + delete index_sz[i].second; + } + + return true; + } +*/ +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, + const std::vector<int> &privatized_levels, + bool allow_extra_read, int fastest_changing_dimension, + int padding_stride, int padding_alignment, int memory_type) { + if (stmt_refs.size() == 0) + return true; + + // check for sanity of parameters + IR_ArraySymbol *sym = NULL; + std::vector<int> lex; + std::set<int> active; + if (level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + for (int i = 0; i < privatized_levels.size(); i++) { + if (i == 0) { + if (privatized_levels[i] < level) + throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level)); + } + else if (privatized_levels[i] <= privatized_levels[i-1]) + throw std::invalid_argument("privatized loop levels must be in ascending order"); + } + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + active.insert(stmt_num); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (privatized_levels.size() != 0) { + if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); + } + else { + if (level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); + } + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (sym == NULL) { + sym = stmt_refs[i].second[j]->symbol(); + lex = getLexicalOrder(stmt_num); + } + else { + IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); + if (t->name() != sym->name()) { + delete t; + delete sym; + throw std::invalid_argument("try to copy data from different arrays"); + } + delete t; + } + } + } + if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim())) + throw std::invalid_argument("invalid fastest changing dimension for the array to be copied"); + if (padding_stride < 0) + throw std::invalid_argument("invalid temporary array stride requirement"); + if (padding_alignment == -1 || padding_alignment == 0) + throw std::invalid_argument("invalid temporary array alignment requirement"); + + int dim = 2*level - 1; + int n_dim = sym->n_dim(); + + + if (fastest_changing_dimension == -1) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + fastest_changing_dimension = n_dim - 1; + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + fastest_changing_dimension = 0; + break; + default: + throw loop_error("unsupported array layout"); + } + + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // build iteration spaces for all reads and for all writes separately + apply_xform(active); + + bool has_write_refs = false; + bool has_read_refs = false; + Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim); + for (int k = 1; k <= mapping.n_inp(); k++) + mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name()); + mapping.setup_names(); + F_And *f_root = mapping.add_and(); + for (int k = 1; k <= level-1; k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(k), 1); + h.update_coef(mapping.output_var(k), -1); + } + for (int k = 0; k < privatized_levels.size(); k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(privatized_levels[k]), 1); + h.update_coef(mapping.output_var(level+k), -1); + } + for (int k = 0; k < n_dim; k++) { + CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); + exp2formula(ir, mapping, f_root, freevar, repr, mapping.output_var(level-1+privatized_levels.size()+k+1), 'w', IR_COND_EQ, false); + repr->clear(); + delete repr; + } + Relation r = Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); + if (stmt_refs[i].second[j]->is_write()) { + has_write_refs = true; + wo_copy_is = Union(wo_copy_is, r); + wo_copy_is.simplify(2, 4); + + + } + else { + has_read_refs = true; + ro_copy_is = Union(ro_copy_is, r); + ro_copy_is.simplify(2, 4); + + } + } + } + + // simplify read and write footprint iteration space + { + if (allow_extra_read) + ro_copy_is = SimpleHull(ro_copy_is, true, true); + else + ro_copy_is = ConvexRepresentation(ro_copy_is); + + wo_copy_is = ConvexRepresentation(wo_copy_is); + if (wo_copy_is.number_of_conjuncts() > 1) { + Relation t = SimpleHull(wo_copy_is, true, true); + if (Must_Be_Subset(copy(t), copy(ro_copy_is))) + wo_copy_is = t; + else if (Must_Be_Subset(copy(wo_copy_is), copy(ro_copy_is))) + wo_copy_is = ro_copy_is; + } + } + + // make copy statement variable names match the ones in the original statements which + // already have the same names due to apply_xform + { + int ref_stmt = *active.begin(); + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + if (stmt[*i].IS.n_set() > stmt[ref_stmt].IS.n_set()) + ref_stmt = *i; + for (int i = 1; i < level; i++) { + std::string s = stmt[ref_stmt].IS.input_var(i)->name(); + wo_copy_is.name_set_var(i, s); + ro_copy_is.name_set_var(i, s); + } + for (int i = 0; i < privatized_levels.size(); i++) { + std::string s = stmt[ref_stmt].IS.input_var(privatized_levels[i])->name(); + wo_copy_is.name_set_var(level+i, s); + ro_copy_is.name_set_var(level+i, s); + } + for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) { + std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size()); + wo_copy_is.name_set_var(i, s); + ro_copy_is.name_set_var(i, s); + } + tmp_loop_var_name_counter += n_dim; + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + } + + // build merged footprint iteration space for calculating temporary array size + Relation copy_is = SimpleHull(Union(copy(ro_copy_is), copy(wo_copy_is)), true, true); + + // extract temporary array information + CG_outputBuilder *ocg = ir->builder(); + std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL + std::vector<coef_t> index_stride(n_dim); + std::vector<bool> is_index_eq(n_dim, false); + std::vector<std::pair<int, CG_outputRepr *> > index_sz(0); + Relation reduced_copy_is = copy(copy_is); + + for (int i = 0; i < n_dim; i++) { + if (i != 0) + reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); + Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); + + // extract stride + std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, bound.set_var(level-1+privatized_levels.size()+i+1)); + if (result.second != NULL) + index_stride[i] = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)))); + else + index_stride[i] = 1; + + // check if this arary index requires loop + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (EQ_Iterator ei(c->EQs()); ei; ei++) { + if ((*ei).has_wildcards()) + continue; + + int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0) { + int sign = 1; + if (coef < 0) { + coef = -coef; + sign = -1; + } + + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*ei); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("unsupported array index expression"); + } + } + if ((*ei).get_const() != 0) + op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); + if (coef != 1) + op = ocg->CreateIntegerFloor(op, ocg->CreateInt(coef)); + + index_lb[i] = op; + is_index_eq[i] = true; + break; + } + } + if (is_index_eq[i]) + continue; + + // seperate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + std::set<Variable_ID> excluded_floor_vars; + excluded_floor_vars.insert(bound.set_var(level-1+privatized_levels.size()+i+1)); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0 && (*gi).has_wildcards()) { + bool clean_bound = true; + GEQ_Handle h; + for (Constr_Vars_Iter cvi(*gi, true); gi; gi++) + if (!find_floor_definition(bound, (*cvi).var, excluded_floor_vars).first) { + clean_bound = false; + break; + } + if (!clean_bound) + continue; + } + + if (coef > 0) + lb_list.push_back(*gi); + else if (coef < 0) + ub_list.push_back(*gi); + } + if (lb_list.size() == 0 || ub_list.size() == 0) + throw loop_error("failed to calcuate array footprint size"); + + // build lower bound representation + std::vector<CG_outputRepr *> lb_repr_list; + for (int j = 0; j < lb_list.size(); j++){ + if(this->known.n_set() == 0) + lb_repr_list.push_back(output_lower_bound_repr(ocg, lb_list[j], bound.set_var(level-1+privatized_levels.size()+i+1), result.first, result.second, bound, Relation::True(bound.n_set()), std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)))); + else + lb_repr_list.push_back(output_lower_bound_repr(ocg, lb_list[j], bound.set_var(level-1+privatized_levels.size()+i+1), result.first, result.second, bound, this->known, std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)))); + } + if (lb_repr_list.size() > 1) + index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); + else if (lb_repr_list.size() == 1) + index_lb[i] = lb_repr_list[0]; + + // build temporary array size representation + { + Relation cal(copy_is.n_set(), 1); + F_And *f_root = cal.add_and(); + for (int j = 0; j < ub_list.size(); j++) + for (int k = 0; k < lb_list.size(); k++) { + GEQ_Handle h = f_root->add_GEQ(); + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(ub_list[j].get_const()); + + for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(lb_list[k].get_const()); + + h.update_const(1); + h.update_coef(cal.output_var(1), -1); + } + + cal = Restrict_Domain(cal, copy(copy_is)); + for (int j = 1; j <= cal.n_inp(); j++) + cal = Project(cal, j, Input_Var); + cal.simplify(); + + // pad temporary array size + // TODO: for variable array size, create padding formula + Conjunct *c = cal.query_DNF()->single_conjunct(); + bool is_index_bound_const = false; + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) + if ((*gi).is_const(cal.output_var(1))) { + coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1))); + if (padding_stride != 0) { + size = (size + index_stride[i] - 1) / index_stride[i]; + if (i == fastest_changing_dimension) + size = size * padding_stride; + } + if (i == fastest_changing_dimension) { + if (padding_alignment > 1) { // align to boundary for data packing + int residue = size % padding_alignment; + if (residue) + size = size+padding_alignment-residue; + } + else if (padding_alignment < -1) { // un-alignment for memory bank conflicts + while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1) + size++; + } + } + index_sz.push_back(std::make_pair(i, ocg->CreateInt(size))); + is_index_bound_const = true; + } + + if (!is_index_bound_const) { + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { + int coef = (*gi).get_coef(cal.output_var(1)); + if (coef < 0) { + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*gi); ci; ci++) { + if ((*ci).var != cal.output_var(1)) { + switch((*ci).var->kind()) { + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef == 1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef == -1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef > 1) + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef < -1 + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("failed to generate array index bound code"); + } + } + } + int c = (*gi).get_const(); + if (c > 0) + op = ocg->CreatePlus(op, ocg->CreateInt(c)); + else if (c < 0) + op = ocg->CreateMinus(op, ocg->CreateInt(-c)); + if (padding_stride != 0) { + if (i == fastest_changing_dimension) { + coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[i] / g; + if (t1 != 1) + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + op = ocg->CreateTimes(op, ocg->CreateInt(t2)); + } + else if (index_stride[i] != 1) { + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); + } + } + + index_sz.push_back(std::make_pair(i, op)); + break; + } + } + } + } + } + + // change the temporary array index order + for (int i = 0; i < index_sz.size(); i++) + if (index_sz[i].first == fastest_changing_dimension) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + std::swap(index_sz[index_sz.size()-1], index_sz[i]); + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + std::swap(index_sz[0], index_sz[i]); + break; + default: + throw loop_error("unsupported array layout"); + } + + // declare temporary array or scalar + IR_Symbol *tmp_sym; + if (index_sz.size() == 0) { + tmp_sym = ir->CreateScalarSymbol(sym, memory_type); + } + else { + std::vector<CG_outputRepr *> tmp_array_size(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) + tmp_array_size[i] = index_sz[i].second->clone(); + tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); + } + + // create temporary array read initialization code + CG_outputRepr *copy_code_read; + if (has_read_refs) + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); + } + else { + std::vector<CG_outputRepr *> lhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + lhs_index[i] = cur_index_repr; + } + + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); + } + + // create temporary array write back code + CG_outputRepr *copy_code_write; + if (has_write_refs) + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); + } + else { + std::vector<CG_outputRepr *> lhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + lhs_index[i] = index_lb[i]->clone(); + else + lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); + + std::vector<CG_outputRepr *> rhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + rhs_index[i] = cur_index_repr; + } + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); + } + + // now we can remove those loops for array indexes that are + // dependent on others + if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { + Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + + int cur_index = 0; + std::vector<int> mapped_index(index_sz.size()); + for (int i = 0; i < n_dim; i++) + if (!is_index_eq[i]) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); + mapped_index[index_sz.size()-cur_index-1] = i; + break; + } + case IR_ARRAY_LAYOUT_ROW_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); + mapped_index[cur_index] = i; + break; + } + default: + throw loop_error("unsupported array layout"); + } + cur_index++; + } + + wo_copy_is = Range(Restrict_Domain(copy(mapping), wo_copy_is)); + ro_copy_is = Range(Restrict_Domain(copy(mapping), ro_copy_is)); + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + } + for (int i = 0; i < index_sz.size(); i++) { + wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + } + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + } + + // insert read copy statement + int old_num_stmt = stmt.size(); + int ro_copy_stmt_num = -1; + if (has_read_refs) { + Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= ro_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_read; + copy_stmt_read.IS = ro_copy_is; + copy_stmt_read.xform = copy_xform; + copy_stmt_read.code = copy_code_read; + copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set()); + copy_stmt_read.ir_stmt_node = NULL; + for (int i = 0; i < level-1; i++) { + copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_read.loop_level[i].payload = -1; + else + copy_stmt_read.loop_level[i].payload = level + j; + } + else + copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + + + shiftLexicalOrder(lex, dim-1, 1); + stmt.push_back(copy_stmt_read); + ro_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + // insert write copy statement + int wo_copy_stmt_num = -1; + if (has_write_refs) { + Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= wo_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_write; + copy_stmt_write.IS = wo_copy_is; + copy_stmt_write.xform = copy_xform; + copy_stmt_write.code = copy_code_write; + copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set()); + copy_stmt_write.ir_stmt_node = NULL; + + for (int i = 0; i < level-1; i++) { + copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_write.loop_level[i].payload = -1; + else + copy_stmt_write.loop_level[i].payload = level + j; + } + else + copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + lex[dim-1]++; + shiftLexicalOrder(lex, dim-1, -2); + stmt.push_back(copy_stmt_write); + wo_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + // replace original array accesses with temporary array accesses + for (int i =0; i < stmt_refs.size(); i++) + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); + } + else { + std::vector<CG_outputRepr *> index_repr(index_sz.size()); + for (int k = 0; k < index_sz.size(); k++) { + int cur_index_num = index_sz[k].first; + + CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (k == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + index_repr[k] = cur_index_repr; + } + + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); + } + } + + // update dependence graph + int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1; + if (ro_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(ro_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, ro_copy_stmt_num, D[j]); + } + + // insert dependences from copy statement loop to copied statements + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + for (int i = dep_dim; i < dep.num_dim(); i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(ro_copy_stmt_num, *i, dv); + } + + if (wo_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(wo_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, wo_copy_stmt_num, D[j]); + } + + // insert dependences from copied statements to write statements + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + for (int i = dep_dim; i < dep.num_dim(); i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(*i, wo_copy_stmt_num, dv); + + } + + // update variable name for dependences among copied statements + for (int i = 0; i < old_num_stmt; i++) { + if (active.find(i) != active.end()) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) + if (active.find(j->first) != active.end()) + for (int k = 0; k < j->second.size(); k++) { + IR_Symbol *s = tmp_sym->clone(); + j->second[k].sym = s; + } + } + + // insert anti-dependence from write statement to read statement + if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) + if (dep_dim >= 0) { + DependenceVector dv; + dv.type = DEP_R2W; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + for (int k = dep_dim; k < dep.num_dim(); k++) { + dv.lbounds[k] = -posInfinity; + dv.ubounds[k] = posInfinity; + } + for (int k = 0; k < dep_dim; k++) { + if (k != 0) { + dv.lbounds[k-1] = 0; + dv.ubounds[k-1] = 0; + } + dv.lbounds[k] = 1; + dv.ubounds[k] = posInfinity; + dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv); + } + } + + // cleanup + delete sym; + delete tmp_sym; + for (int i = 0; i < index_lb.size(); i++) { + index_lb[i]->clear(); + delete index_lb[i]; + } + for (int i = 0; i < index_sz.size(); i++) { + index_sz[i].second->clear(); + delete index_sz[i].second; + } + + return true; +} diff --git a/chill/src/loop_extra.cc b/chill/src/loop_extra.cc new file mode 100644 index 0000000..2412403 --- /dev/null +++ b/chill/src/loop_extra.cc @@ -0,0 +1,224 @@ +/***************************************************************************** + Copyright (C) 2010 University of Utah + All Rights Reserved. + + Purpose: + Additional loop transformations. + + Notes: + + History: + 07/31/10 Created by Chun Chen +*****************************************************************************/ + +#include <codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + + +void Loop::shift_to(int stmt_num, int level, int absolute_position) { + // combo + tile(stmt_num, level, 1, level, CountedTile); + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> active = getStatements(lex, 2*level-2); + shift(active, level, absolute_position); + + // remove unnecessary tiled loop since tile size is one + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation mapping(n, n-2); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= 2*level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 2*level+3; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j-2), 1); + h.update_coef(mapping.input_var(j), -1); + } + stmt[*i].xform = Composition(mapping, stmt[*i].xform); + stmt[*i].xform.simplify(); + + for (int j = 0; j < stmt[*i].loop_level.size(); j++) + if (j != level-1 && + stmt[*i].loop_level[j].type == LoopLevelTile && + stmt[*i].loop_level[j].payload >= level) + stmt[*i].loop_level[j].payload--; + + stmt[*i].loop_level.erase(stmt[*i].loop_level.begin()+level-1); + } +} + + +std::set<int> Loop::unroll_extra(int stmt_num, int level, int unroll_amount, int cleanup_split_level) { + std::set<int> cleanup_stmts = unroll(stmt_num, level, unroll_amount,std::vector< std::vector<std::string> >(), cleanup_split_level); + for (std::set<int>::iterator i = cleanup_stmts.begin(); i != cleanup_stmts.end(); i++) + unroll(*i, level, 0); + + return cleanup_stmts; +} + +void Loop::peel(int stmt_num, int level, int peel_amount) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + if (peel_amount == 0) + return; + + std::set<int> subloop = getSubLoopNest(stmt_num, level); + std::vector<Relation> Rs; + for (std::set<int>::iterator i = subloop.begin(); i != subloop.end(); i++) { + Relation r = getNewIS(*i); + Relation f(r.n_set(), level); + F_And *f_root = f.add_and(); + for (int j = 1; j <= level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(f.input_var(2*j), 1); + h.update_coef(f.output_var(j), -1); + } + r = Composition(f, r); + r.simplify(); + Rs.push_back(r); + } + Relation hull = SimpleHull(Rs); + + if (peel_amount > 0) { + GEQ_Handle bound_eq; + bool found_bound = false; + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if (!(*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) > 0) { + bound_eq = *e; + found_bound = true; + break; + } + if (!found_bound) + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if ((*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) > 0) { + bool is_bound = true; + for (Constr_Vars_Iter cvi(*e, true); cvi; cvi++) { + std::pair<bool, GEQ_Handle> result = find_floor_definition(hull, cvi.curr_var()); + if (!result.first) { + is_bound = false; + break; + } + } + if (is_bound) { + bound_eq = *e; + found_bound = true; + break; + } + } + if (!found_bound) + throw loop_error("can't find lower bound for peeling at loop level " + to_string(level)); + + for (int i = 1; i <= peel_amount; i++) { + Relation r(level); + F_Exists *f_exists = r.add_and()->add_exists(); + F_And *f_root = f_exists->add_and(); + GEQ_Handle h = f_root->add_GEQ(); + std::map<Variable_ID, Variable_ID> exists_mapping; + for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++) + switch (cvi.curr_var()->kind()) { + case Input_Var: + h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); + break; + case Wildcard_Var: { + Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); + h.update_coef(v, cvi.curr_coef()); + break; + } + case Global_Var: { + Global_Var_ID g = cvi.curr_var()->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = r.get_local(g); + else + v = r.get_local(g, cvi.curr_var()->function_of()); + h.update_coef(v, cvi.curr_coef()); + break; + } + default: + assert(false); + } + h.update_const(bound_eq.get_const() - i); + r.simplify(); + + split(stmt_num, level, r); + } + } + else { // peel_amount < 0 + GEQ_Handle bound_eq; + bool found_bound = false; + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if (!(*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) < 0) { + bound_eq = *e; + found_bound = true; + break; + } + if (!found_bound) + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if ((*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) < 0) { + bool is_bound = true; + for (Constr_Vars_Iter cvi(*e, true); cvi; cvi++) { + std::pair<bool, GEQ_Handle> result = find_floor_definition(hull, cvi.curr_var()); + if (!result.first) { + is_bound = false; + break; + } + } + if (is_bound) { + bound_eq = *e; + found_bound = true; + break; + } + } + if (!found_bound) + throw loop_error("can't find upper bound for peeling at loop level " + to_string(level)); + + for (int i = 1; i <= -peel_amount; i++) { + Relation r(level); + F_Exists *f_exists = r.add_and()->add_exists(); + F_And *f_root = f_exists->add_and(); + GEQ_Handle h = f_root->add_GEQ(); + std::map<Variable_ID, Variable_ID> exists_mapping; + for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++) + switch (cvi.curr_var()->kind()) { + case Input_Var: + h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); + break; + case Wildcard_Var: { + Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); + h.update_coef(v, cvi.curr_coef()); + break; + } + case Global_Var: { + Global_Var_ID g = cvi.curr_var()->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = r.get_local(g); + else + v = r.get_local(g, cvi.curr_var()->function_of()); + h.update_coef(v, cvi.curr_coef()); + break; + } + default: + assert(false); + } + h.update_const(bound_eq.get_const() - i); + r.simplify(); + + split(stmt_num, level, r); + } + } +} + diff --git a/chill/src/loop_tile.cc b/chill/src/loop_tile.cc new file mode 100644 index 0000000..ad1d3b7 --- /dev/null +++ b/chill/src/loop_tile.cc @@ -0,0 +1,630 @@ +/* + * loop_tile.cc + * + * Created on: Nov 12, 2012 + * Author: anand + */ + +#include <codegen.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + + + + +void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, + TilingMethodType method, int alignment_offset, int alignment_multiple) { + // check for sanity of parameters + if (tile_size < 0) + throw std::invalid_argument("invalid tile size"); + if (alignment_multiple < 1 || alignment_offset < 0) + throw std::invalid_argument("invalid alignment for tile"); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument( + "there is no loop level " + to_string(level) + " for statement " + + to_string(stmt_num)); + if (outer_level <= 0 || outer_level > level) + throw std::invalid_argument( + "invalid tile controlling loop level " + + to_string(outer_level)); + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int dim = 2 * level - 1; + int outer_dim = 2 * outer_level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_tiled_loop = getStatements(lex, dim - 1); + std::set<int> same_tile_controlling_loop = getStatements(lex, + outer_dim - 1); + + for (std::set<int>::iterator i = same_tiled_loop.begin(); + i != same_tiled_loop.end(); i++) { + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); + j++) { + if (same_tiled_loop.find(j->first) != same_tiled_loop.end()) + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + int dim2 = level - 1; + if ((dv.type != DEP_CONTROL) && (dv.type != DEP_UNKNOWN)) { + while (stmt[*i].loop_level[dim2].type == LoopLevelTile) { + dim2 = stmt[*i].loop_level[dim2].payload - 1; + } + dim2 = stmt[*i].loop_level[dim2].payload; + + if (dv.hasNegative(dim2) && (!dv.quasi)) { + for (int l = outer_level; l < level; l++) + if (stmt[*i].loop_level[l - 1].type + != LoopLevelTile) { + if (dv.isCarried( + stmt[*i].loop_level[l - 1].payload) + && dv.hasPositive( + stmt[*i].loop_level[l - 1].payload)) + throw loop_error( + "loop error: Tiling is illegal, dependence violation!"); + } else { + + int dim3 = l - 1; + while (stmt[*i].loop_level[l - 1].type + != LoopLevelTile) { + dim3 = + stmt[*i].loop_level[l - 1].payload + - 1; + + } + + dim3 = stmt[*i].loop_level[l - 1].payload; + if (dim3 < level - 1) + if (dv.isCarried(dim3) + && dv.hasPositive(dim3)) + throw loop_error( + "loop error: Tiling is illegal, dependence violation!"); + } + } + } + } + } + } + // special case for no tiling + if (tile_size == 0) { + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); + F_And *f_root = r.add_and(); + for (int j = 1; j <= 2 * outer_level - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + } + EQ_Handle h1 = f_root->add_EQ(); + h1.update_coef(r.output_var(2 * outer_level), 1); + EQ_Handle h2 = f_root->add_EQ(); + h2.update_coef(r.output_var(2 * outer_level + 1), 1); + for (int j = 2 * outer_level; j <= stmt[*i].xform.n_out(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j + 2), -1); + } + + stmt[*i].xform = Composition(copy(r), stmt[*i].xform); + } + } + // normal tiling + else { + std::set<int> private_stmt; + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { +// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && !is_single_iteration(getNewIS(*i), dim)) +// same_tiled_loop.insert(*i); + + // should test dim's value directly but it is ok for now +// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && get_const(stmt[*i].xform, dim+1, Output_Var) == posInfinity) + if (same_tiled_loop.find(*i) == same_tiled_loop.end() + && overflow.find(*i) != overflow.end()) + private_stmt.insert(*i); + } + + // extract the union of the iteration space to be considered + Relation hull; + /*{ + Tuple < Relation > r_list; + Tuple<int> r_mask; + + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) + if (private_stmt.find(*i) == private_stmt.end()) { + Relation r = project_onto_levels(getNewIS(*i), dim + 1, + true); + for (int j = outer_dim; j < dim; j++) + r = Project(r, j + 1, Set_Var); + for (int j = 0; j < outer_dim; j += 2) + r = Project(r, j + 1, Set_Var); + r_list.append(r); + r_mask.append(1); + } + + hull = Hull(r_list, r_mask, 1, true); + }*/ + + { + std::vector<Relation> r_list; + + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) + if (private_stmt.find(*i) == private_stmt.end()) { + Relation r = getNewIS(*i); + for (int j = dim + 2; j <= r.n_set(); j++) + r = Project(r, r.set_var(j)); + for (int j = outer_dim; j < dim; j++) + r = Project(r, j + 1, Set_Var); + for (int j = 0; j < outer_dim; j += 2) + r = Project(r, j + 1, Set_Var); + r.simplify(2, 4); + r_list.push_back(r); + } + + hull = SimpleHull(r_list); + // hull = Hull(r_list, std::vector<bool>(r_list.size(), true), 1, true); + } + + // extract the bound of the dimension to be tiled + Relation bound = get_loop_bound(hull, dim); + if (!bound.has_single_conjunct()) { + // further simplify the bound + hull = Approximate(hull); + bound = get_loop_bound(hull, dim); + + int i = outer_dim - 2; + while (!bound.has_single_conjunct() && i >= 0) { + hull = Project(hull, i + 1, Set_Var); + bound = get_loop_bound(hull, dim); + i -= 2; + } + + if (!bound.has_single_conjunct()) + throw loop_error("cannot handle tile bounds"); + } + + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + { + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(dim + 1)); + if (coef < 0) + ub_list.push_back(*gi); + else if (coef > 0) + lb_list.push_back(*gi); + } + } + if (lb_list.size() == 0) + throw loop_error( + "unable to calculate tile controlling loop lower bound"); + if (ub_list.size() == 0) + throw loop_error( + "unable to calculate tile controlling loop upper bound"); + + // find the simplest lower bound for StridedTile or simplest iteration count for CountedTile + int simplest_lb = 0, simplest_ub = 0; + if (method == StridedTile) { + int best_cost = INT_MAX; + for (int i = 0; i < lb_list.size(); i++) { + int cost = 0; + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + cost += 5; + break; + } + case Global_Var: { + cost += 2; + break; + } + default: + cost += 15; + break; + } + } + + if (cost < best_cost) { + best_cost = cost; + simplest_lb = i; + } + } + } else if (method == CountedTile) { + std::map<Variable_ID, coef_t> s1, s2, s3; + int best_cost = INT_MAX; + for (int i = 0; i < lb_list.size(); i++) + for (int j = 0; j < ub_list.size(); j++) { + int cost = 0; + + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + cost = INT_MAX - 2; + break; + } + } + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + if (cost == INT_MAX - 2) + cost = INT_MAX - 1; + else + cost = INT_MAX - 3; + break; + } + } + + if (cost == 0) { + for (std::map<Variable_ID, coef_t>::iterator k = + s1.begin(); k != s1.end(); k++) + if ((*k).second != 0) + cost += 5; + for (std::map<Variable_ID, coef_t>::iterator k = + s2.begin(); k != s2.end(); k++) + if ((*k).second != 0) + cost += 2; + for (std::map<Variable_ID, coef_t>::iterator k = + s3.begin(); k != s3.end(); k++) + if ((*k).second != 0) + cost += 15; + } + + if (cost < best_cost) { + best_cost = cost; + simplest_lb = i; + simplest_ub = j; + } + } + } + + // prepare the new transformation relations + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); + F_And *f_root = r.add_and(); + for (int j = 0; j < outer_dim - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(j + 1), 1); + h.update_coef(r.input_var(j + 1), -1); + } + + for (int j = outer_dim - 1; j < stmt[*i].xform.n_out(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(j + 3), 1); + h.update_coef(r.input_var(j + 1), -1); + } + + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(outer_dim), 1); + h.update_const(-lex[outer_dim - 1]); + + stmt[*i].xform = Composition(r, stmt[*i].xform); + } + + // add tiling constraints. + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + F_And *f_super_root = stmt[*i].xform.and_with_and(); + F_Exists *f_exists = f_super_root->add_exists(); + F_And *f_root = f_exists->add_and(); + + // create a lower bound variable for easy formula creation later + Variable_ID aligned_lb; + { + Variable_ID lb = f_exists->declare(); + coef_t coef = lb_list[simplest_lb].get_coef( + bound.set_var(dim + 1)); + if (coef == 1) { // e.g. if i >= m+5, then LB = m+5 + EQ_Handle h = f_root->add_EQ(); + h.update_coef(lb, 1); + for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h.update_const(lb_list[simplest_lb].get_const()); + } else { // e.g. if 2i >= m+5, then m+5 <= 2*LB < m+5+2 + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(lb, (*ci).coef); + h2.update_coef(lb, -(*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, (*ci).coef); + h2.update_coef(v, -(*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h1.update_const(lb_list[simplest_lb].get_const()); + h2.update_const(-lb_list[simplest_lb].get_const()); + h2.update_const(coef - 1); + } + + Variable_ID offset_lb; + if (alignment_offset == 0) + offset_lb = lb; + else { + EQ_Handle h = f_root->add_EQ(); + offset_lb = f_exists->declare(); + h.update_coef(offset_lb, 1); + h.update_coef(lb, -1); + h.update_const(alignment_offset); + } + + if (alignment_multiple == 1) { // trivial + aligned_lb = offset_lb; + } else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB + aligned_lb = f_exists->declare(); + Variable_ID e = f_exists->declare(); + + EQ_Handle h = f_root->add_EQ(); + h.update_coef(aligned_lb, 1); + h.update_coef(e, -alignment_multiple); + + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + h1.update_coef(e, alignment_multiple); + h2.update_coef(e, -alignment_multiple); + h1.update_coef(offset_lb, -1); + h2.update_coef(offset_lb, 1); + h1.update_const(alignment_multiple - 1); + } + } + + // create an upper bound variable for easy formula creation later + Variable_ID ub = f_exists->declare(); + { + coef_t coef = -ub_list[simplest_ub].get_coef( + bound.set_var(dim + 1)); + if (coef == 1) { // e.g. if i <= m+5, then UB = m+5 + EQ_Handle h = f_root->add_EQ(); + h.update_coef(ub, -1); + for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h.update_const(ub_list[simplest_ub].get_const()); + } else { // e.g. if 2i <= m+5, then m+5-2 < 2*UB <= m+5 + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(ub, -(*ci).coef); + h2.update_coef(ub, (*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, -(*ci).coef); + h2.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h1.update_const(-ub_list[simplest_ub].get_const()); + h2.update_const(ub_list[simplest_ub].get_const()); + h1.update_const(coef - 1); + } + } + + // insert tile controlling loop constraints + if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0 + Variable_ID e = f_exists->declare(); + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(e, 1); + + EQ_Handle h2 = f_root->add_EQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + h2.update_coef(e, -tile_size); + h2.update_coef(aligned_lb, -1); + } else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32) + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -tile_size); + h2.update_coef(aligned_lb, -1); + h2.update_coef(ub, 1); + } + + // special care for private statements like overflow assignment + if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1); + h.update_coef(ub, 1); + } + // if (private_stmt.find(*i) != private_stmt.end()) { + // if (stmt[*i].xform.n_out() > dim+3) { // e.g. ii <= UB && i = ii + // GEQ_Handle h = f_root->add_GEQ(); + // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // h.update_coef(ub, 1); + + // stmt[*i].xform = Project(stmt[*i].xform, dim+3, Output_Var); + // f_root = stmt[*i].xform.and_with_and(); + // EQ_Handle h1 = f_root->add_EQ(); + // h1.update_coef(stmt[*i].xform.output_var(dim+3), 1); + // h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // } + // else if (method == StridedTile) { // e.g. ii <= UB since i does not exist + // GEQ_Handle h = f_root->add_GEQ(); + // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // h.update_coef(ub, 1); + // } + // } + + // restrict original loop index inside the tile + else { + if (method == StridedTile) { // e.g. ii <= i < ii + tile_size + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + h2.update_const(tile_size - 1); + } else if (method == CountedTile) { // e.g. LB+32*ii <= i < LB+32*ii+tile_size + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -tile_size); + h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); + h1.update_coef(aligned_lb, -1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + tile_size); + h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); + h2.update_const(tile_size - 1); + h2.update_coef(aligned_lb, 1); + } + } + } + } + + // update loop level information + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + switch (stmt[*i].loop_level[j - 1].type) { + case LoopLevelOriginal: + break; + case LoopLevelTile: + if (stmt[*i].loop_level[j - 1].payload >= outer_level) + stmt[*i].loop_level[j - 1].payload++; + break; + default: + throw loop_error( + "unknown loop level type for statement " + + to_string(*i)); + } + + LoopLevel ll; + ll.type = LoopLevelTile; + ll.payload = level + 1; + ll.parallel_level = 0; + stmt[*i].loop_level.insert( + stmt[*i].loop_level.begin() + (outer_level - 1), ll); + } +} + diff --git a/chill/src/loop_unroll.cc b/chill/src/loop_unroll.cc new file mode 100644 index 0000000..b75b738 --- /dev/null +++ b/chill/src/loop_unroll.cc @@ -0,0 +1,1166 @@ +/* + * loop_unroll.cc + * + * Created on: Nov 12, 2012 + * Author: anand + */ + +#include <codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" +#include <math.h> + +using namespace omega; + + +std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, + std::vector<std::vector<std::string> > idxNames, + int cleanup_split_level) { + // check for sanity of parameters + // check for sanity of parameters + if (unroll_amount < 0) + throw std::invalid_argument( + "invalid unroll amount " + to_string(unroll_amount)); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + if (cleanup_split_level == 0) + cleanup_split_level = level; + if (cleanup_split_level > level) + throw std::invalid_argument( + "cleanup code must be split at or outside the unrolled loop level " + + to_string(level)); + if (cleanup_split_level <= 0) + throw std::invalid_argument( + "invalid split loop level " + to_string(cleanup_split_level)); + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int dim = 2 * level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim - 1); + + // nothing to do + if (unroll_amount == 1) + return std::set<int>(); + + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) { + std::vector<std::pair<int, DependenceVector> > D; + int n = stmt[*i].xform.n_out(); + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); + j++) { + if (same_loop.find(j->first) != same_loop.end()) + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + int dim2 = level - 1; + if (dv.type != DEP_CONTROL) { + + while (stmt[*i].loop_level[dim2].type == LoopLevelTile) { + dim2 = stmt[*i].loop_level[dim2].payload - 1; + } + dim2 = stmt[*i].loop_level[dim2].payload; + + /*if (dv.isCarried(dim2) + && (dv.hasNegative(dim2) && !dv.quasi)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + + if (dv.isCarried(dim2) + && (dv.hasPositive(dim2) && dv.quasi)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + */ + bool safe = false; + + if (dv.isCarried(dim2) && dv.hasPositive(dim2)) { + if (dv.quasi) + throw loop_error( + "loop error: a quasi dependence with a positive carried distance"); + if (!dv.quasi) { + if (dv.lbounds[dim2] != posInfinity) { + //if (dv.lbounds[dim2] != negInfinity) + if (dv.lbounds[dim2] > unroll_amount) + safe = true; + } else + safe = true; + }/* else { + if (dv.ubounds[dim2] != negInfinity) { + if (dv.ubounds[dim2] != posInfinity) + if ((-(dv.ubounds[dim2])) > unroll_amount) + safe = true; + } else + safe = true; + }*/ + + if (!safe) { + for (int l = level + 1; l <= (n - 1) / 2; l++) { + int dim3 = l - 1; + + if (stmt[*i].loop_level[dim3].type + != LoopLevelTile) + dim3 = + stmt[*i].loop_level[dim3].payload; + else { + while (stmt[*i].loop_level[dim3].type + == LoopLevelTile) { + dim3 = + stmt[*i].loop_level[dim3].payload + - 1; + } + dim3 = + stmt[*i].loop_level[dim3].payload; + } + + if (dim3 > dim2) { + + if (dv.hasPositive(dim3)) + break; + else if (dv.hasNegative(dim3)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + } + } + } + } + } + } + } + } + // extract the intersection of the iteration space to be considered + Relation hull = Relation::True(level); + apply_xform(same_loop); + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) { + if (stmt[*i].IS.is_upper_bound_satisfiable()) { + Relation mapping(stmt[*i].IS.n_set(), level); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(j), 1); + h.update_coef(mapping.output_var(j), -1); + } + hull = Intersection(hull, + Range(Restrict_Domain(mapping, copy(stmt[*i].IS)))); + hull.simplify(2, 4); + + } + } + for (int i = 1; i <= level; i++) { + std::string name = tmp_loop_var_name_prefix + to_string(i); + hull.name_set_var(i, name); + } + hull.setup_names(); + + // extract the exact loop bound of the dimension to be unrolled + if (is_single_loop_iteration(hull, level, this->known)) + return std::set<int>(); + Relation bound = get_loop_bound(hull, level, this->known); + if (!bound.has_single_conjunct() || !bound.is_satisfiable() + || bound.is_tautology()) + throw loop_error("unable to extract loop bound for unrolling"); + + // extract the loop stride + coef_t stride; + std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, + bound.set_var(level)); + if (result.second == NULL) + stride = 1; + else + stride = abs(result.first.get_coef(result.second)) + / gcd(abs(result.first.get_coef(result.second)), + abs(result.first.get_coef(bound.set_var(level)))); + + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + { + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(level)); + if (coef < 0) + ub_list.push_back(*gi); + else if (coef > 0) + lb_list.push_back(*gi); + } + } + + // simplify overflow expression for each pair of upper and lower bounds + std::vector<std::vector<std::map<Variable_ID, int> > > overflow_table( + lb_list.size(), + std::vector<std::map<Variable_ID, int> >(ub_list.size(), + std::map<Variable_ID, int>())); + bool is_overflow_simplifiable = true; + for (int i = 0; i < lb_list.size(); i++) { + if (!is_overflow_simplifiable) + break; + + for (int j = 0; j < ub_list.size(); j++) { + // lower bound or upper bound has non-unit coefficient, can't simplify + if (ub_list[j].get_coef(bound.set_var(level)) != -1 + || lb_list[i].get_coef(bound.set_var(level)) != 1) { + is_overflow_simplifiable = false; + break; + } + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + if ((*ci).var != bound.set_var(level)) + overflow_table[i][j][(*ci).var] += (*ci).coef; + + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); + overflow_table[i][j][(*ci).var] += (*ci).coef; + break; + } + default: + throw loop_error("failed to calculate overflow amount"); + } + } + overflow_table[i][j][NULL] += ub_list[j].get_const(); + + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + if ((*ci).var != bound.set_var(level)) { + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase( + overflow_table[i][j].find((*ci).var)); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase( + overflow_table[i][j].find((*ci).var)); + break; + } + default: + throw loop_error("failed to calculate overflow amount"); + } + } + overflow_table[i][j][NULL] += lb_list[i].get_const(); + + overflow_table[i][j][NULL] += stride; + if (unroll_amount == 0 + || (overflow_table[i][j].size() == 1 + && overflow_table[i][j][NULL] / stride + < unroll_amount)) + unroll_amount = overflow_table[i][j][NULL] / stride; + } + } + + // loop iteration count can't be determined, bail out gracefully + if (unroll_amount == 0) + return std::set<int>(); + + // further simply overflow calculation using coefficients' modular + if (is_overflow_simplifiable) { + for (int i = 0; i < lb_list.size(); i++) + for (int j = 0; j < ub_list.size(); j++) + if (stride == 1) { + for (std::map<Variable_ID, int>::iterator k = + overflow_table[i][j].begin(); + k != overflow_table[i][j].end();) + if ((*k).first != NULL) { + int t = int_mod_hat((*k).second, unroll_amount); + if (t == 0) { + overflow_table[i][j].erase(k++); + } else { + int t2 = hull.query_variable_mod((*k).first, + unroll_amount); + if (t2 != INT_MAX) { + overflow_table[i][j][NULL] += t * t2; + overflow_table[i][j].erase(k++); + } else { + (*k).second = t; + k++; + } + } + } else + k++; + + overflow_table[i][j][NULL] = int_mod_hat( + overflow_table[i][j][NULL], unroll_amount); + + // Since we don't have MODULO instruction in SUIF yet (only MOD), make all coef positive in the final formula + for (std::map<Variable_ID, int>::iterator k = + overflow_table[i][j].begin(); + k != overflow_table[i][j].end(); k++) + if ((*k).second < 0) + (*k).second += unroll_amount; + } + } + + // build overflow statement + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *overflow_code = NULL; + Relation cond_upper(level), cond_lower(level); + Relation overflow_constraint(0); + F_And *overflow_constraint_root = overflow_constraint.add_and(); + std::vector<Free_Var_Decl *> over_var_list; + if (is_overflow_simplifiable && lb_list.size() == 1) { + for (int i = 0; i < ub_list.size(); i++) { + if (overflow_table[0][i].size() == 1) { + // upper splitting condition + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_const( + ((overflow_table[0][i][NULL] / stride) % unroll_amount) + * -stride); + } else { + // upper splitting condition + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_coef(cond_upper.get_local(over_free_var), -stride); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + + // create overflow assignment + bound.setup_names(); // hack to fix omega relation variable names issue + CG_outputRepr *rhs = NULL; + bool is_split_illegal = false; + for (std::map<Variable_ID, int>::iterator j = + overflow_table[0][i].begin(); + j != overflow_table[0][i].end(); j++) + if ((*j).first != NULL) { + if ((*j).first->kind() == Input_Var + && (*j).first->get_position() + >= cleanup_split_level) + is_split_illegal = true; + + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); + if ((*j).second != 1) + t = ocg->CreateTimes(ocg->CreateInt((*j).second), + t); + rhs = ocg->CreatePlus(rhs, t); + } else if ((*j).second != 0) + rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); + + if (is_split_illegal) { + rhs->clear(); + delete rhs; + throw loop_error( + "cannot split cleanup code at loop level " + + to_string(cleanup_split_level) + + " due to overflow variable data dependence"); + } + + if (stride != 1) + rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->StmtListAppend(overflow_code, + ocg->CreateAssignment(0, lhs, rhs)); + } + } + + // lower splitting condition + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]); + } else if (is_overflow_simplifiable && ub_list.size() == 1) { + for (int i = 0; i < lb_list.size(); i++) { + + if (overflow_table[i][0].size() == 1) { + // lower splitting condition + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + h.update_const(overflow_table[i][0][NULL] * -stride); + } else { + // lower splitting condition + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + h.update_coef(cond_lower.get_local(over_free_var), -stride); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + + // create overflow assignment + bound.setup_names(); // hack to fix omega relation variable names issue + CG_outputRepr *rhs = NULL; + for (std::map<Variable_ID, int>::iterator j = + overflow_table[0][i].begin(); + j != overflow_table[0][i].end(); j++) + if ((*j).first != NULL) { + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); + if ((*j).second != 1) + t = ocg->CreateTimes(ocg->CreateInt((*j).second), + t); + rhs = ocg->CreatePlus(rhs, t); + } else if ((*j).second != 0) + rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); + + if (stride != 1) + rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->StmtListAppend(overflow_code, + ocg->CreateAssignment(0, lhs, rhs)); + } + } + + // upper splitting condition + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]); + } else { + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + + std::vector<CG_outputRepr *> lb_repr_list, ub_repr_list; + for (int i = 0; i < lb_list.size(); i++) { + lb_repr_list.push_back( + output_lower_bound_repr(ocg, lb_list[i], + bound.set_var(dim + 1), result.first, result.second, + bound, Relation::True(bound.n_set()), + std::vector<std::pair<CG_outputRepr *, int> >( + bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0)))); + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + } + for (int i = 0; i < ub_list.size(); i++) { + ub_repr_list.push_back( + output_upper_bound_repr(ocg, ub_list[i], + bound.set_var(dim + 1), bound, + std::vector<std::pair<CG_outputRepr *, int> >( + bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0)))); + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_coef(cond_upper.get_local(over_free_var), -stride); + } + + CG_outputRepr *lbRepr, *ubRepr; + if (lb_repr_list.size() > 1) + lbRepr = ocg->CreateInvoke("max", lb_repr_list); + else if (lb_repr_list.size() == 1) + lbRepr = lb_repr_list[0]; + + if (ub_repr_list.size() > 1) + ubRepr = ocg->CreateInvoke("min", ub_repr_list); + else if (ub_repr_list.size() == 1) + ubRepr = ub_repr_list[0]; + + // create overflow assignment + CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr), + ocg->CreateInt(1)); + if (stride != 1) + rhs = ocg->CreateIntegerFloor(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->CreateAssignment(0, lhs, rhs); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + } + + // insert overflow statement + int overflow_stmt_num = -1; + if (overflow_code != NULL) { + // build iteration space for overflow statement + Relation mapping(level, cleanup_split_level - 1); + F_And *f_root = mapping.add_and(); + for (int i = 1; i < cleanup_split_level; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), 1); + h.update_coef(mapping.input_var(i), -1); + } + Relation overflow_IS = Range(Restrict_Domain(mapping, copy(hull))); + for (int i = 1; i < cleanup_split_level; i++) + overflow_IS.name_set_var(i, hull.set_var(i)->name()); + overflow_IS.setup_names(); + + // build dumb transformation relation for overflow statement + Relation overflow_xform(cleanup_split_level - 1, + 2 * (cleanup_split_level - 1) + 1); + f_root = overflow_xform.add_and(); + for (int i = 1; i <= cleanup_split_level - 1; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2 * i), 1); + h.update_coef(overflow_xform.input_var(i), -1); + + h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2 * i - 1), 1); + h.update_const(-lex[2 * i - 2]); + } + EQ_Handle h = f_root->add_EQ(); + h.update_coef( + overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1), + 1); + h.update_const(-lex[2 * (cleanup_split_level - 1)]); + + shiftLexicalOrder(lex, 2 * cleanup_split_level - 2, 1); + Statement overflow_stmt; + + overflow_stmt.code = overflow_code; + overflow_stmt.IS = overflow_IS; + overflow_stmt.xform = overflow_xform; + overflow_stmt.loop_level = std::vector<LoopLevel>(level - 1); + overflow_stmt.ir_stmt_node = NULL; + for (int i = 0; i < level - 1; i++) { + overflow_stmt.loop_level[i].type = + stmt[stmt_num].loop_level[i].type; + if (stmt[stmt_num].loop_level[i].type == LoopLevelTile + && stmt[stmt_num].loop_level[i].payload >= level) + overflow_stmt.loop_level[i].payload = -1; + else + overflow_stmt.loop_level[i].payload = + stmt[stmt_num].loop_level[i].payload; + overflow_stmt.loop_level[i].parallel_level = + stmt[stmt_num].loop_level[i].parallel_level; + } + + stmt.push_back(overflow_stmt); + dep.insert(); + overflow_stmt_num = stmt.size() - 1; + overflow[overflow_stmt_num] = over_var_list; + + // update the global known information on overflow variable + this->known = Intersection(this->known, + Extend_Set(copy(overflow_constraint), + this->known.n_set() - overflow_constraint.n_set())); + + // update dependence graph + DependenceVector dv; + dv.type = DEP_CONTROL; + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + dep.connect(overflow_stmt_num, *i, dv); + dv.type = DEP_W2W; + { + IR_ScalarSymbol *overflow_sym = NULL; + std::vector<IR_ScalarRef *> scalars = ir->FindScalarRef( + overflow_code); + for (int i = scalars.size() - 1; i >= 0; i--) + if (scalars[i]->is_write()) { + overflow_sym = scalars[i]->symbol(); + break; + } + for (int i = scalars.size() - 1; i >= 0; i--) + delete scalars[i]; + dv.sym = overflow_sym; + } + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + int dep_dim = get_last_dep_dim_before(stmt_num, level); + for (int i = dep_dim + 1; i < dep.num_dim(); i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (int i = 0; i <= dep_dim; i++) { + if (i != 0) { + dv.lbounds[i - 1] = 0; + dv.ubounds[i - 1] = 0; + } + dv.lbounds[i] = 1; + dv.ubounds[i] = posInfinity; + dep.connect(overflow_stmt_num, overflow_stmt_num, dv); + } + } + + // split the loop so it can be fully unrolled + std::set<int> new_stmts = split(stmt_num, cleanup_split_level, cond_upper); + std::set<int> new_stmts2 = split(stmt_num, cleanup_split_level, cond_lower); + new_stmts.insert(new_stmts2.begin(), new_stmts2.end()); + + // check if unrolled statements can be trivially lumped together as one statement + bool can_be_lumped = true; + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (*i != stmt_num) { + if (stmt[*i].loop_level.size() + != stmt[stmt_num].loop_level.size()) { + can_be_lumped = false; + break; + } + for (int j = 0; j < stmt[stmt_num].loop_level.size(); j++) + if (!(stmt[*i].loop_level[j].type + == stmt[stmt_num].loop_level[j].type + && stmt[*i].loop_level[j].payload + == stmt[stmt_num].loop_level[j].payload)) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 2 * level; j < lex.size() - 1; j += 2) + if (lex[j] != lex2[j]) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (is_inner_loop_depend_on_level(stmt[*i].IS, level, + this->known)) { + can_be_lumped = false; + break; + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (*i != stmt_num) { + if (!(Must_Be_Subset(copy(stmt[*i].IS), copy(stmt[stmt_num].IS)) + && Must_Be_Subset(copy(stmt[stmt_num].IS), + copy(stmt[*i].IS)))) { + can_be_lumped = false; + break; + } + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (same_loop.find(j->first) != same_loop.end()) { + for (int k = 0; k < j->second.size(); k++) + if (j->second[k].type == DEP_CONTROL + || j->second[k].type == DEP_UNKNOWN) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + } + if (!can_be_lumped) + break; + } + } + + // insert unrolled statements + int old_num_stmt = stmt.size(); + if (!can_be_lumped) { + std::map<int, std::vector<int> > what_stmt_num; + + for (int j = 1; j < unroll_amount; j++) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + Statement new_stmt; + + std::vector<std::string> loop_vars; + std::vector<CG_outputRepr *> subs; + loop_vars.push_back(stmt[*i].IS.set_var(level)->name()); + subs.push_back( + ocg->CreatePlus( + ocg->CreateIdent( + stmt[*i].IS.set_var(level)->name()), + ocg->CreateInt(j * stride))); + new_stmt.code = ocg->CreateSubstitutedStmt(0, + stmt[*i].code->clone(), loop_vars, subs); + + new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride); + add_loop_stride(new_stmt.IS, bound, level - 1, + unroll_amount * stride); + + new_stmt.xform = copy(stmt[*i].xform); + + new_stmt.loop_level = stmt[*i].loop_level; + new_stmt.ir_stmt_node = NULL; + stmt.push_back(new_stmt); + dep.insert(); + what_stmt_num[*i].push_back(stmt.size() - 1); + } + } + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + add_loop_stride(stmt[*i].IS, bound, level - 1, + unroll_amount * stride); + + // update dependence graph + if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; + int new_stride = unroll_amount * stride; + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end();) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type == DEP_CONTROL + || dv.type == DEP_UNKNOWN) { + D.push_back(std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount - 1; + kk++) + if (what_stmt_num[i][kk] != -1 + && what_stmt_num[j->first][kk] + != -1) + dep.connect(what_stmt_num[i][kk], + what_stmt_num[j->first][kk], + dv); + } else { + coef_t lb = dv.lbounds[dep_dim]; + coef_t ub = dv.ubounds[dep_dim]; + if (ub == lb + && int_mod(lb, + static_cast<coef_t>(new_stride)) + == 0) { + D.push_back( + std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount - 1; + kk++) + if (what_stmt_num[i][kk] != -1 + && what_stmt_num[j->first][kk] + != -1) + dep.connect( + what_stmt_num[i][kk], + what_stmt_num[j->first][kk], + dv); + } else if (lb == -posInfinity + && ub == posInfinity) { + D.push_back( + std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount; + kk++) + if (kk == 0) + D.push_back( + std::make_pair(j->first, + dv)); + else if (what_stmt_num[j->first][kk + - 1] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); + for (int t = 0; t < unroll_amount - 1; + t++) + if (what_stmt_num[i][t] != -1) + for (int kk = 0; + kk < unroll_amount; + kk++) + if (kk == 0) + dep.connect( + what_stmt_num[i][t], + j->first, dv); + else if (what_stmt_num[j->first][kk + - 1] != -1) + dep.connect( + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); + } else { + for (int kk = 0; kk < unroll_amount; + kk++) { + if (lb != -posInfinity) { + if (kk * stride + < int_mod(lb, + static_cast<coef_t>(new_stride))) + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb) + / new_stride) + * new_stride + + new_stride; + else + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + } + if (ub != posInfinity) { + if (kk * stride + > int_mod(ub, + static_cast<coef_t>(new_stride))) + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub) + / new_stride) + * new_stride + - new_stride; + else + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub) + / new_stride) + * new_stride; + } + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) { + if (kk == 0) + D.push_back( + std::make_pair( + j->first, + dv)); + else if (what_stmt_num[j->first][kk + - 1] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); + } + } + for (int t = 0; t < unroll_amount - 1; + t++) + if (what_stmt_num[i][t] != -1) + for (int kk = 0; + kk < unroll_amount; + kk++) { + if (lb != -posInfinity) { + if (kk * stride + < int_mod( + lb + t + + 1, + static_cast<coef_t>(new_stride))) + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride + + new_stride; + else + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride; + } + if (ub != posInfinity) { + if (kk * stride + > int_mod( + ub + t + + 1, + static_cast<coef_t>(new_stride))) + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride + - new_stride; + else + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride; + } + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) { + if (kk == 0) + dep.connect( + what_stmt_num[i][t], + j->first, + dv); + else if (what_stmt_num[j->first][kk + - 1] != -1) + dep.connect( + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); + } + } + } + } + } + + dep.vertex[i].second.erase(j++); + } else { + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[i][kk] != -1) + dep.connect(what_stmt_num[i][kk], j->first, + j->second); + + j++; + } + } else { + if (same_loop.find(j->first) != same_loop.end()) + for (int k = 0; k < j->second.size(); k++) + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[j->first][kk] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk], + j->second[k])); + j++; + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + } + + // reset lexical order for the unrolled loop body + std::set<int> new_same_loop; + + int count = 0; + + for (std::map<int, std::vector<int> >::iterator i = + what_stmt_num.begin(); i != what_stmt_num.end(); i++) { + + new_same_loop.insert(i->first); + for (int k = dim + 1; k < stmt[i->first].xform.n_out(); k += 2) + assign_const(stmt[i->first].xform, k, + get_const(stmt[(what_stmt_num.begin())->first].xform, k, + Output_Var) + count); + count++; + for (int j = 0; j < i->second.size(); j++) { + new_same_loop.insert(i->second[j]); + for (int k = dim + 1; k < stmt[i->second[j]].xform.n_out(); k += + 2) + assign_const(stmt[i->second[j]].xform, k, + get_const( + stmt[(what_stmt_num.begin())->first].xform, + k, Output_Var) + count); + count++; + } + } + setLexicalOrder(dim + 1, new_same_loop, 0, idxNames); + } else { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + add_loop_stride(stmt[*i].IS, bound, level - 1, + unroll_amount * stride); + + int max_level = stmt[stmt_num].loop_level.size(); + std::vector<std::pair<int, int> > stmt_order; + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + stmt_order.push_back( + std::make_pair( + get_const(stmt[*i].xform, 2 * max_level, + Output_Var), *i)); + sort(stmt_order.begin(), stmt_order.end()); + + Statement new_stmt; + new_stmt.code = NULL; + for (int j = 1; j < unroll_amount; j++) + for (int i = 0; i < stmt_order.size(); i++) { + std::vector<std::string> loop_vars; + std::vector<CG_outputRepr *> subs; + loop_vars.push_back( + stmt[stmt_order[i].second].IS.set_var(level)->name()); + subs.push_back( + ocg->CreatePlus( + ocg->CreateIdent( + stmt[stmt_order[i].second].IS.set_var( + level)->name()), + ocg->CreateInt(j * stride))); + CG_outputRepr *code = ocg->CreateSubstitutedStmt(0, + stmt[stmt_order[i].second].code->clone(), loop_vars, + subs); + new_stmt.code = ocg->StmtListAppend(new_stmt.code, code); + } + + new_stmt.IS = copy(stmt[stmt_num].IS); + new_stmt.xform = copy(stmt[stmt_num].xform); + assign_const(new_stmt.xform, 2 * max_level, + stmt_order[stmt_order.size() - 1].first + 1); + new_stmt.loop_level = stmt[stmt_num].loop_level; + new_stmt.ir_stmt_node = NULL; + stmt.push_back(new_stmt); + dep.insert(); + + // update dependence graph + if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; + int new_stride = unroll_amount * stride; + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, std::vector<DependenceVector> > > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end();) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + std::vector<DependenceVector> dvs11, dvs12, dvs22, + dvs21; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type == DEP_CONTROL + || dv.type == DEP_UNKNOWN) { + if (i == j->first) { + dvs11.push_back(dv); + dvs22.push_back(dv); + } else + throw loop_error( + "unrolled statements lumped together illegally"); + } else { + coef_t lb = dv.lbounds[dep_dim]; + coef_t ub = dv.ubounds[dep_dim]; + if (ub == lb + && int_mod(lb, + static_cast<coef_t>(new_stride)) + == 0) { + dvs11.push_back(dv); + dvs22.push_back(dv); + } else { + if (lb != -posInfinity) + dv.lbounds[dep_dim] = ceil( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = floor( + static_cast<double>(ub) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs11.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = ceil( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = ceil( + static_cast<double>(ub) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs21.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = floor( + static_cast<double>(ub + - stride) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs12.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = ceil( + static_cast<double>(ub + - stride) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs22.push_back(dv); + } + } + } + if (dvs11.size() > 0) + D.push_back(std::make_pair(i, dvs11)); + if (dvs22.size() > 0) + dep.connect(old_num_stmt, old_num_stmt, dvs22); + if (dvs12.size() > 0) + D.push_back( + std::make_pair(old_num_stmt, dvs12)); + if (dvs21.size() > 0) + dep.connect(old_num_stmt, i, dvs21); + + dep.vertex[i].second.erase(j++); + } else { + dep.connect(old_num_stmt, j->first, j->second); + j++; + } + } else { + if (same_loop.find(j->first) != same_loop.end()) + D.push_back( + std::make_pair(old_num_stmt, j->second)); + j++; + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + } + } + + return new_stmts; +} + + diff --git a/chill/src/omegatools.cc b/chill/src/omegatools.cc new file mode 100644 index 0000000..d88fd2a --- /dev/null +++ b/chill/src/omegatools.cc @@ -0,0 +1,2312 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Useful tools involving Omega manipulation. + + Notes: + + History: + 01/2006 Created by Chun Chen. + 03/2009 Upgrade Omega's interaction with compiler to IR_Code, by Chun Chen. +*****************************************************************************/ + +#include <codegen.h> +// #include <code_gen/output_repr.h> +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + +namespace { + struct DependenceLevel { + Relation r; + int level; + int dir; // direction upto current level: + // -1:negative, 0: undetermined, 1: postive + std::vector<coef_t> lbounds; + std::vector<coef_t> ubounds; + DependenceLevel(const Relation &_r, int _dims): + r(_r), level(0), dir(0), lbounds(_dims), ubounds(_dims) {} + }; +} + + + + +std::string tmp_e() { + static int counter = 1; + return std::string("e")+to_string(counter++); +} + + + +//----------------------------------------------------------------------------- +// Convert expression tree to omega relation. "destroy" means shallow +// deallocation of "repr", not freeing the actual code inside. +// ----------------------------------------------------------------------------- +void exp2formula(IR_Code *ir, Relation &r, F_And *f_root, std::vector<Free_Var_Decl*> &freevars, + CG_outputRepr *repr, Variable_ID lhs, char side, IR_CONDITION_TYPE rel, bool destroy) { + +// void exp2formula(IR_Code *ir, Relation &r, F_And *f_root, std::vector<Free_Var_Decl*> &freevars, +// CG_outputRepr *repr, Variable_ID lhs, char side, char rel, bool destroy) { + + switch (ir->QueryExpOperation(repr)) { + case IR_OP_CONSTANT: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[0])); + if (!ref->is_integer()) + throw ir_exp_error("non-integer constant coefficient"); + + coef_t c = ref->integer(); + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(lhs, 1); + if (rel == IR_COND_GE) + h.update_const(-c); + else + h.update_const(-c-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(lhs, -1); + if (rel == IR_COND_LE) + h.update_const(c); + else + h.update_const(c-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(lhs, 1); + h.update_const(-c); + } + else + throw std::invalid_argument("unsupported condition type"); + + delete v[0]; + delete ref; + if (destroy) + delete repr; + break; + } + case IR_OP_VARIABLE: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + IR_ScalarRef *ref = static_cast<IR_ScalarRef *>(ir->Repr2Ref(v[0])); + + std::string s = ref->name(); + Variable_ID e = find_index(r, s, side); + + if (e == NULL) { // must be free variable + Free_Var_Decl *t = NULL; + for (unsigned i = 0; i < freevars.size(); i++) { + std::string ss = freevars[i]->base_name(); + if (s == ss) { + t = freevars[i]; + break; + } + } + + if (t == NULL) { + t = new Free_Var_Decl(s); + freevars.insert(freevars.end(), t); + } + + e = r.get_local(t); + } + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e, 1); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + } + else + throw std::invalid_argument("unsupported condition type"); + + // delete v[0]; + delete ref; + if (destroy) + delete repr; + break; + } + case IR_OP_ASSIGNMENT: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + exp2formula(ir, r, f_root, freevars, v[0], lhs, side, rel, true); + if (destroy) + delete repr; + break; + } + case IR_OP_PLUS: + { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e1 = f_exists->declare(tmp_e()); + Variable_ID e2 = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e1, -1); + h.update_coef(e2, -1); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e1, 1); + h.update_coef(e2, 1); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e1, -1); + h.update_coef(e2, -1); + } + else + throw std::invalid_argument("unsupported condition type"); + + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + exp2formula(ir, r, f_and, freevars, v[0], e1, side, IR_COND_EQ, true); + exp2formula(ir, r, f_and, freevars, v[1], e2, side, IR_COND_EQ, true); + if (destroy) + delete repr; + break; + } + case IR_OP_MINUS: + { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e1 = f_exists->declare(tmp_e()); + Variable_ID e2 = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e1, -1); + h.update_coef(e2, 1); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e1, 1); + h.update_coef(e2, -1); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e1, -1); + h.update_coef(e2, 1); + } + else + throw std::invalid_argument("unsupported condition type"); + + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + exp2formula(ir, r, f_and, freevars, v[0], e1, side, IR_COND_EQ, true); + exp2formula(ir, r, f_and, freevars, v[1], e2, side, IR_COND_EQ, true); + if (destroy) + delete repr; + break; + } + case IR_OP_MULTIPLY: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + coef_t coef; + CG_outputRepr *term; + if (ir->QueryExpOperation(v[0]) == IR_OP_CONSTANT) { + IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[0])); + coef = ref->integer(); + delete v[0]; + delete ref; + term = v[1]; + } + else if (ir->QueryExpOperation(v[1]) == IR_OP_CONSTANT) { + IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[1])); + coef = ref->integer(); + delete v[1]; + delete ref; + term = v[0]; + } + else + throw ir_exp_error("not presburger expression"); + + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -coef); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e, coef); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -coef); + } + else + throw std::invalid_argument("unsupported condition type"); + + exp2formula(ir, r, f_and, freevars, term, e, side, IR_COND_EQ, true); + if (destroy) + delete repr; + break; + } + case IR_OP_DIVIDE: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + assert(ir->QueryExpOperation(v[1]) == IR_OP_CONSTANT); + IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[1])); + coef_t coef = ref->integer(); + delete v[1]; + delete ref; + + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, coef); + h.update_coef(e, -1); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -coef); + h.update_coef(e, 1); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, coef); + h.update_coef(e, -1); + } + else + throw std::invalid_argument("unsupported condition type"); + + exp2formula(ir, r, f_and, freevars, v[0], e, side, IR_COND_EQ, true); + if (destroy) + delete repr; + break; + } + case IR_OP_POSITIVE: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + exp2formula(ir, r, f_root, freevars, v[0], lhs, side, rel, true); + if (destroy) + delete repr; + break; + } + case IR_OP_NEGATIVE: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e, 1); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e, -1); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e, 1); + } + else + throw std::invalid_argument("unsupported condition type"); + + exp2formula(ir, r, f_and, freevars, v[0], e, side, IR_COND_EQ, true); + if (destroy) + delete repr; + break; + } + case IR_OP_MIN: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + F_Exists *f_exists = f_root->add_exists(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + F_Or *f_or = f_exists->add_and()->add_or(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_or->add_and(); + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + if (rel == IR_COND_GT) + h.update_const(-1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); + } + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + F_And *f_and = f_exists->add_and(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e, 1); + if (rel == IR_COND_LT) + h.update_const(-1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); + } + } + else if (rel == IR_COND_EQ) { + F_Or *f_or = f_exists->add_and()->add_or(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_or->add_and(); + + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, false); + + for (int j = 0; j < v.size(); j++) + if (j != i) { + Variable_ID e2 = f_exists->declare(tmp_e()); + GEQ_Handle h2 = f_and->add_GEQ(); + h2.update_coef(e, -1); + h2.update_coef(e2, 1); + + exp2formula(ir, r, f_and, freevars, v[j], e2, side, IR_COND_EQ, false); + } + } + + for (int i = 0; i < v.size(); i++) + delete v[i]; + } + else + throw std::invalid_argument("unsupported condition type"); + + if (destroy) + delete repr; + } + case IR_OP_MAX: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + F_Exists *f_exists = f_root->add_exists(); + + if (rel == IR_COND_LE || rel == IR_COND_LT) { + F_Or *f_or = f_exists->add_and()->add_or(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_or->add_and(); + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e, 1); + if (rel == IR_COND_LT) + h.update_const(-1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); + } + } + else if (rel == IR_COND_GE || rel == IR_COND_GT) { + F_And *f_and = f_exists->add_and(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + if (rel == IR_COND_GT) + h.update_const(-1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); + } + } + else if (rel == IR_COND_EQ) { + F_Or *f_or = f_exists->add_and()->add_or(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_or->add_and(); + + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, false); + + for (int j = 0; j < v.size(); j++) + if (j != i) { + Variable_ID e2 = f_exists->declare(tmp_e()); + GEQ_Handle h2 = f_and->add_GEQ(); + h2.update_coef(e, 1); + h2.update_coef(e2, -1); + + exp2formula(ir, r, f_and, freevars, v[j], e2, side, IR_COND_EQ, false); + } + } + + for (int i = 0; i < v.size(); i++) + delete v[i]; + } + else + throw std::invalid_argument("unsupported condition type"); + + if (destroy) + delete repr; + } + case IR_OP_NULL: + break; + default: + throw ir_exp_error("unsupported operand type"); + } +} + + +//----------------------------------------------------------------------------- +// Build dependence relation for two array references. +// ----------------------------------------------------------------------------- +Relation arrays2relation(IR_Code *ir, std::vector<Free_Var_Decl*> &freevars, + const IR_ArrayRef *ref_src, const Relation &IS_w, + const IR_ArrayRef *ref_dst, const Relation &IS_r) { + Relation &IS1 = const_cast<Relation &>(IS_w); + Relation &IS2 = const_cast<Relation &>(IS_r); + + Relation r(IS1.n_set(), IS2.n_set()); + + for (int i = 1; i <= IS1.n_set(); i++) + r.name_input_var(i, IS1.set_var(i)->name()); + + for (int i = 1; i <= IS2.n_set(); i++) + r.name_output_var(i, IS2.set_var(i)->name()+"'"); + + IR_Symbol *sym_src = ref_src->symbol(); + IR_Symbol *sym_dst = ref_dst->symbol(); + if (*sym_src != *sym_dst) { + r.add_or(); // False Relation + delete sym_src; + delete sym_dst; + return r; + } + else { + delete sym_src; + delete sym_dst; + } + + F_And *f_root = r.add_and(); + + for (int i = 0; i < ref_src->n_dim(); i++) { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e1 = f_exists->declare(tmp_e()); + Variable_ID e2 = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + CG_outputRepr *repr_src = ref_src->index(i); + CG_outputRepr *repr_dst = ref_dst->index(i); + + bool has_complex_formula = false; + try { + exp2formula(ir, r, f_and, freevars, repr_src, e1, 'w', IR_COND_EQ, false); + exp2formula(ir, r, f_and, freevars, repr_dst, e2, 'r', IR_COND_EQ, false); + } + catch (const ir_exp_error &e) { + has_complex_formula = true; + } + + if (!has_complex_formula) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(e1, 1); + h.update_coef(e2, -1); + } + + repr_src->clear(); + repr_dst->clear(); + delete repr_src; + delete repr_dst; + } + + // add iteration space restriction + r = Restrict_Domain(r, copy(IS1)); + r = Restrict_Range(r, copy(IS2)); + + // reset the output variable names lost in restriction + for (int i = 1; i <= IS2.n_set(); i++) + r.name_output_var(i, IS2.set_var(i)->name()+"'"); + + return r; +} + + +//----------------------------------------------------------------------------- +// Convert array dependence relation into set of dependence vectors, assuming +// ref_w is lexicographically before ref_r in the source code. +// ----------------------------------------------------------------------------- +std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > relation2dependences (const IR_ArrayRef *ref_src, const IR_ArrayRef *ref_dst, const Relation &r) { + assert(r.n_inp() == r.n_out()); + + std::vector<DependenceVector> dependences1, dependences2; + std::stack<DependenceLevel> working; + working.push(DependenceLevel(r, r.n_inp())); + + while (!working.empty()) { + DependenceLevel dep = working.top(); + working.pop(); + + // No dependence exists, move on. + if (!dep.r.is_satisfiable()) + continue; + + if (dep.level == r.n_inp()) { + DependenceVector dv; + + // for loop independent dependence, use lexical order to + // determine the correct source and destination + if (dep.dir == 0) { + if (*ref_src == *ref_dst) + continue; // trivial self zero-dependence + + if (ref_src->is_write()) { + if (ref_dst->is_write()) + dv.type = DEP_W2W; + else + dv.type = DEP_W2R; + } + else { + if (ref_dst->is_write()) + dv.type = DEP_R2W; + else + dv.type = DEP_R2R; + } + + } + else if (dep.dir == 1) { + if (ref_src->is_write()) { + if (ref_dst->is_write()) + dv.type = DEP_W2W; + else + dv.type = DEP_W2R; + } + else { + if (ref_dst->is_write()) + dv.type = DEP_R2W; + else + dv.type = DEP_R2R; + } + } + else { // dep.dir == -1 + if (ref_dst->is_write()) { + if (ref_src->is_write()) + dv.type = DEP_W2W; + else + dv.type = DEP_W2R; + } + else { + if (ref_src->is_write()) + dv.type = DEP_R2W; + else + dv.type = DEP_R2R; + } + } + + dv.lbounds = dep.lbounds; + dv.ubounds = dep.ubounds; + dv.sym = ref_src->symbol(); + + if (dep.dir == 0 || dep.dir == 1) + dependences1.push_back(dv); + else + dependences2.push_back(dv); + } + else { + // now work on the next dimension level + int level = ++dep.level; + + coef_t lbound, ubound; + Relation delta = Deltas(copy(dep.r)); + delta.query_variable_bounds(delta.set_var(level), lbound, ubound); + + if (dep.dir == 0) { + if (lbound > 0) { + dep.dir = 1; + dep.lbounds[level-1] = lbound; + dep.ubounds[level-1] = ubound; + + working.push(dep); + } + else if (ubound < 0) { + dep.dir = -1; + dep.lbounds[level-1] = -ubound; + dep.ubounds[level-1] = -lbound; + + working.push(dep); + } + else { + // split the dependence vector into flow- and anti-dependence + // for the first non-zero distance, also separate zero distance + // at this level. + { + DependenceLevel dep2 = dep; + + dep2.lbounds[level-1] = 0; + dep2.ubounds[level-1] = 0; + + F_And *f_root = dep2.r.and_with_and(); + EQ_Handle h = f_root->add_EQ(); + h.update_coef(dep2.r.input_var(level), 1); + h.update_coef(dep2.r.output_var(level), -1); + + working.push(dep2); + } + + if (lbound < 0 && *ref_src != *ref_dst) { + DependenceLevel dep2 = dep; + + F_And *f_root = dep2.r.and_with_and(); + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(dep2.r.input_var(level), 1); + h.update_coef(dep2.r.output_var(level), -1); + h.update_const(-1); + + // get tighter bounds under new constraints + coef_t lbound, ubound; + delta = Deltas(copy(dep2.r)); + delta.query_variable_bounds(delta.set_var(level), + lbound, ubound); + + dep2.dir = -1; + dep2.lbounds[level-1] = max(-ubound,static_cast<coef_t>(1)); // use max() to avoid Omega retardness + dep2.ubounds[level-1] = -lbound; + + working.push(dep2); + } + + if (ubound > 0) { + DependenceLevel dep2 = dep; + + F_And *f_root = dep2.r.and_with_and(); + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(dep2.r.input_var(level), -1); + h.update_coef(dep2.r.output_var(level), 1); + h.update_const(-1); + + // get tighter bonds under new constraints + coef_t lbound, ubound; + delta = Deltas(copy(dep2.r)); + delta.query_variable_bounds(delta.set_var(level), + lbound, ubound); + dep2.dir = 1; + dep2.lbounds[level-1] = max(lbound,static_cast<coef_t>(1)); // use max() to avoid Omega retardness + dep2.ubounds[level-1] = ubound; + + working.push(dep2); + } + } + } + // now deal with dependence vector with known direction + // determined at previous levels + else { + // For messy bounds, further test to see if the dependence distance + // can be reduced to positive/negative. This is an omega hack. + if (lbound == negInfinity && ubound == posInfinity) { + { + Relation t = dep.r; + F_And *f_root = t.and_with_and(); + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(t.input_var(level), 1); + h.update_coef(t.output_var(level), -1); + h.update_const(-1); + + if (!t.is_satisfiable()) { + lbound = 0; + } + } + { + Relation t = dep.r; + F_And *f_root = t.and_with_and(); + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(t.input_var(level), -1); + h.update_coef(t.output_var(level), 1); + h.update_const(-1); + + if (!t.is_satisfiable()) { + ubound = 0; + } + } + } + + // Same thing as above, test to see if zero dependence + // distance possible. + if (lbound == 0 || ubound == 0) { + Relation t = dep.r; + F_And *f_root = t.and_with_and(); + EQ_Handle h = f_root->add_EQ(); + h.update_coef(t.input_var(level), 1); + h.update_coef(t.output_var(level), -1); + + if (!t.is_satisfiable()) { + if (lbound == 0) + lbound = 1; + if (ubound == 0) + ubound = -1; + } + } + + if (dep.dir == -1) { + dep.lbounds[level-1] = -ubound; + dep.ubounds[level-1] = -lbound; + } + else { // dep.dir == 1 + dep.lbounds[level-1] = lbound; + dep.ubounds[level-1] = ubound; + } + + working.push(dep); + } + } + } + + return std::make_pair(dependences1, dependences2); +} + + +//----------------------------------------------------------------------------- +// Convert a boolean expression to omega relation. "destroy" means shallow +// deallocation of "repr", not freeing the actual code inside. +//----------------------------------------------------------------------------- +void exp2constraint(IR_Code *ir, Relation &r, F_And *f_root, + std::vector<Free_Var_Decl *> &freevars, + CG_outputRepr *repr, bool destroy) { + IR_CONDITION_TYPE cond = ir->QueryBooleanExpOperation(repr); + switch (cond) { + case IR_COND_LT: + case IR_COND_LE: + case IR_COND_EQ: + case IR_COND_GT: + case IR_COND_GE: { + F_Exists *f_exist = f_root->add_exists(); + Variable_ID e = f_exist->declare(); + F_And *f_and = f_exist->add_and(); + std::vector<omega::CG_outputRepr *> op = ir->QueryExpOperand(repr); + exp2formula(ir, r, f_and, freevars, op[0], e, 's', IR_COND_EQ, true); + exp2formula(ir, r, f_and, freevars, op[1], e, 's', cond, true); + if (destroy) + delete repr; + break; + } + case IR_COND_NE: { + F_Exists *f_exist = f_root->add_exists(); + Variable_ID e = f_exist->declare(); + F_Or *f_or = f_exist->add_or(); + F_And *f_and = f_or->add_and(); + std::vector<omega::CG_outputRepr *> op = ir->QueryExpOperand(repr); + exp2formula(ir, r, f_and, freevars, op[0], e, 's', IR_COND_EQ, false); + exp2formula(ir, r, f_and, freevars, op[1], e, 's', IR_COND_GT, false); + + f_and = f_or->add_and(); + exp2formula(ir, r, f_and, freevars, op[0], e, 's', IR_COND_EQ, true); + exp2formula(ir, r, f_and, freevars, op[1], e, 's', IR_COND_LT, true); + + if (destroy) + delete repr; + break; + } + default: + throw ir_exp_error("unrecognized conditional expression"); + } +} + + + + + +// inline void exp2formula(IR_Code *ir, Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// const CG_outputRepr *repr, Variable_ID lhs, char side, char rel) { +// exp2formula(ir, r, f_root, freevars, const_cast<CG_outputRepr *>(repr), lhs, side, rel, false); +// } + + + + + + + +//----------------------------------------------------------------------------- +// Convert suif expression tree to omega relation. +//----------------------------------------------------------------------------- + +// void suif2formula(Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// operand op, Variable_ID lhs, +// char side, char rel) { +// if (op.is_immed()) { +// immed im = op.immediate(); + +// if (im.is_integer()) { +// int c = im.integer(); + +// if (rel == '>') { +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(lhs, 1); +// h.update_const(-1*c); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(lhs, -1); +// h.update_const(c); +// } +// else { // '=' +// EQ_Handle h = f_root->add_EQ(); +// h.update_coef(lhs, 1); +// h.update_const(-1*c); +// } +// } +// else { +// return; //add Function in the future +// } +// } +// else if (op.is_symbol()) { +// String s = op.symbol()->name(); +// Variable_ID e = find_index(r, s, side); + +// if (e == NULL) { // must be free variable +// Free_Var_Decl *t = NULL; +// for (unsigned i = 0; i < freevars.size(); i++) { +// String ss = freevars[i]->base_name(); +// if (s == ss) { +// t = freevars[i]; +// break; +// } +// } + +// if (t == NULL) { +// t = new Free_Var_Decl(s); +// freevars.insert(freevars.end(), t); +// } + +// e = r.get_local(t); +// } + +// if (rel == '>') { +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, -1); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(lhs, -1); +// h.update_coef(e, 1); +// } +// else { // '=' +// EQ_Handle h = f_root->add_EQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, -1); +// } +// } +// else if (op.is_instr()) +// suif2formula(r, f_root, freevars, op.instr(), lhs, side, rel); +// } + + +// void suif2formula(Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// instruction *ins, Variable_ID lhs, +// char side, char rel) { +// if (ins->opcode() == io_cpy) { +// suif2formula(r, f_root, freevars, ins->src_op(0), lhs, side, rel); +// } +// else if (ins->opcode() == io_add || ins->opcode() == io_sub) { +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e1 = f_exists->declare(tmp_e()); +// Variable_ID e2 = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// int add_or_sub = ins->opcode() == io_add ? 1 : -1; +// if (rel == '>') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e1, -1); +// h.update_coef(e2, -1 * add_or_sub); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, -1); +// h.update_coef(e1, 1); +// h.update_coef(e2, 1 * add_or_sub); +// } +// else { // '=' +// EQ_Handle h = f_and->add_EQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e1, -1); +// h.update_coef(e2, -1 * add_or_sub); +// } + +// suif2formula(r, f_and, freevars, ins->src_op(0), e1, side, '='); +// suif2formula(r, f_and, freevars, ins->src_op(1), e2, side, '='); +// } +// else if (ins->opcode() == io_mul) { +// operand op1 = ins->src_op(0); +// operand op2 = ins->src_op(1); + +// if (!op1.is_immed() && !op2.is_immed()) +// return; // add Function in the future +// else { +// operand op; +// immed im; +// if (op1.is_immed()) { +// im = op1.immediate(); +// op = op2; +// } +// else { +// im = op2.immediate(); +// op = op1; +// } + +// if (!im.is_integer()) +// return; //add Function in the future +// else { +// int c = im.integer(); + +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// if (rel == '>') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, -c); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, -1); +// h.update_coef(e, c); +// } +// else { +// EQ_Handle h = f_and->add_EQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, -c); +// } + +// suif2formula(r, f_and, freevars, op, e, side, '='); +// } +// } +// } +// else if (ins->opcode() == io_div) { +// operand op1 = ins->src_op(0); +// operand op2 = ins->src_op(1); + +// if (!op2.is_immed()) +// return; //add Function in the future +// else { +// immed im = op2.immediate(); + +// if (!im.is_integer()) +// return; //add Function in the future +// else { +// int c = im.integer(); + +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// if (rel == '>') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, c); +// h.update_coef(e, -1); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, -c); +// h.update_coef(e, 1); +// } +// else { +// EQ_Handle h = f_and->add_EQ(); +// h.update_coef(lhs, c); +// h.update_coef(e, -1); +// } + +// suif2formula(r, f_and, freevars, op1, e, side, '='); +// } +// } +// } +// else if (ins->opcode() == io_neg) { +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// if (rel == '>') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, 1); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, -1); +// h.update_coef(e, -1); +// } +// else { +// EQ_Handle h = f_and->add_EQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, 1); +// } + +// suif2formula(r, f_and, freevars, ins->src_op(0), e, side, '='); +// } +// else if (ins->opcode() == io_min) { +// operand op1 = ins->src_op(0); +// operand op2 = ins->src_op(1); + +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e1 = f_exists->declare(tmp_e()); +// Variable_ID e2 = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// if (rel == '>') { +// F_Or *f_or = f_and->add_or(); +// F_And *f_and1 = f_or->add_and(); +// GEQ_Handle h1 = f_and1->add_GEQ(); +// h1.update_coef(lhs, 1); +// h1.update_coef(e1, -1); +// F_And *f_and2 = f_or->add_and(); +// GEQ_Handle h2 = f_and2->add_GEQ(); +// h2.update_coef(lhs, 1); +// h2.update_coef(e2, -1); +// } +// else if (rel == '<') { +// GEQ_Handle h1 = f_and->add_GEQ(); +// h1.update_coef(lhs, -1); +// h1.update_coef(e1, 1); +// GEQ_Handle h2 = f_and->add_GEQ(); +// h2.update_coef(lhs, -1); +// h2.update_coef(e2, 1); +// } +// else { +// F_Or *f_or = f_and->add_or(); +// F_And *f_and1 = f_or->add_and(); +// EQ_Handle h1 = f_and1->add_EQ(); +// h1.update_coef(lhs, 1); +// h1.update_coef(e1, -1); +// GEQ_Handle h2 = f_and1->add_GEQ(); +// h2.update_coef(e1, -1); +// h2.update_coef(e2, 1); +// F_And *f_and2 = f_or->add_and(); +// EQ_Handle h3 = f_and2->add_EQ(); +// h3.update_coef(lhs, 1); +// h3.update_coef(e2, -1); +// GEQ_Handle h4 = f_and2->add_GEQ(); +// h4.update_coef(e1, 1); +// h4.update_coef(e2, -1); +// } + +// suif2formula(r, f_and, freevars, op1, e1, side, '='); +// suif2formula(r, f_and, freevars, op2, e2, side, '='); +// } +// else if (ins->opcode() == io_max) { +// operand op1 = ins->src_op(0); +// operand op2 = ins->src_op(1); + +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e1 = f_exists->declare(tmp_e()); +// Variable_ID e2 = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// if (rel == '>') { +// GEQ_Handle h1 = f_and->add_GEQ(); +// h1.update_coef(lhs, 1); +// h1.update_coef(e1, -1); +// GEQ_Handle h2 = f_and->add_GEQ(); +// h2.update_coef(lhs, 1); +// h2.update_coef(e2, -1); +// } +// else if (rel == '<') { +// F_Or *f_or = f_and->add_or(); +// F_And *f_and1 = f_or->add_and(); +// GEQ_Handle h1 = f_and1->add_GEQ(); +// h1.update_coef(lhs, -1); +// h1.update_coef(e1, 1); +// F_And *f_and2 = f_or->add_and(); +// GEQ_Handle h2 = f_and2->add_GEQ(); +// h2.update_coef(lhs, -1); +// h2.update_coef(e2, 1); +// } +// else { +// F_Or *f_or = f_and->add_or(); +// F_And *f_and1 = f_or->add_and(); +// EQ_Handle h1 = f_and1->add_EQ(); +// h1.update_coef(lhs, 1); +// h1.update_coef(e1, -1); +// GEQ_Handle h2 = f_and1->add_GEQ(); +// h2.update_coef(e1, 1); +// h2.update_coef(e2, -1); +// F_And *f_and2 = f_or->add_and(); +// EQ_Handle h3 = f_and2->add_EQ(); +// h3.update_coef(lhs, 1); +// h3.update_coef(e2, -1); +// GEQ_Handle h4 = f_and2->add_GEQ(); +// h4.update_coef(e1, -1); +// h4.update_coef(e2, 1); +// } + +// suif2formula(r, f_and, freevars, op1, e1, side, '='); +// suif2formula(r, f_and, freevars, op2, e2, side, '='); +// } +// } + +//----------------------------------------------------------------------------- +// Generate iteration space constraints +//----------------------------------------------------------------------------- + +// void add_loop_stride_constraints(Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// tree_for *tnf, char side) { + +// std::string name(tnf->index()->name()); +// int dim = 0; +// for (;dim < r.n_set(); dim++) +// if (r.set_var(dim+1)->name() == name) +// break; + +// Relation bound = get_loop_bound(r, dim); + +// operand op = tnf->step_op(); +// if (!op.is_null()) { +// if (op.is_immed()) { +// immed im = op.immediate(); +// if (im.is_integer()) { +// int c = im.integer(); + +// if (c != 1 && c != -1) +// add_loop_stride(r, bound, dim, c); +// } +// else +// assert(0); // messy stride +// } +// else +// assert(0); // messy stride +// } +// } + +// void add_loop_bound_constraints(IR_Code *ir, Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// tree_for *tnf, +// char upper_or_lower, char side, IR_CONDITION_TYPE rel) { +// Variable_ID v = find_index(r, tnf->index()->name(), side); + +// tree_node_list *tnl; + +// if (upper_or_lower == 'u') +// tnl = tnf->ub_list(); +// else +// tnl = tnf->lb_list(); + +// tree_node_list_iter iter(tnl); +// while (!iter.is_empty()) { +// tree_node *tn = iter.step(); +// if (tn->kind() != TREE_INSTR) +// break; // messy bounds + +// instruction *ins = static_cast<tree_instr *>(tn)->instr(); + + +// if (upper_or_lower == 'u' && (tnf->test() == FOR_SLT || tnf->test() == FOR_ULT)) { +// operand op1(ins->clone()); +// operand op2(new in_ldc(type_s32, operand(), immed(1))); +// instruction *t = new in_rrr(io_sub, op1.type(), operand(), op1, op2); + +// CG_suifRepr *repr = new CG_suifRepr(operand(t)); +// exp2formula(ir, r, f_root, freevars, repr, v, side, rel, true); +// delete t; +// } +// else if (tnf->test() == FOR_SLT || tnf->test() == FOR_SLTE || tnf->test() == FOR_ULT || tnf->test() == FOR_ULTE) { +// CG_suifRepr *repr = new CG_suifRepr(operand(ins)); +// exp2formula(ir, r, f_root, freevars, repr, v, side, rel, true); +// } +// else +// assert(0); +// } +// } + + +// Relation loop_iteration_space(std::vector<Free_Var_Decl*> &freevars, +// tree_node *tn, std::vector<tree_for*> &loops) { +// Relation r(loops.size()); +// for (unsigned i = 0; i < loops.size(); i++) { +// String s = loops[i]->index()->name(); +// r.name_set_var(i+1, s); +// } + +// F_And *f_root = r.add_and(); + +// std::vector<tree_for *> outer = find_outer_loops(tn); +// std::vector<LexicalOrderType> loops_lex(loops.size(), LEX_UNKNOWN); + +// for (unsigned i = 0; i < outer.size(); i++) { +// unsigned j; + +// for (j = 0; j < loops.size(); j++) { +// if (outer[i] == loops[j]) { +// loops_lex[j] = LEX_MATCH; +// break; +// } else if (outer[i]->index() == loops[j]->index()) { +// loops_lex[j] = lexical_order(outer[i],loops[j]); +// break; +// } +// } + +// if (j != loops.size()) { +// add_loop_bound_constraints(r, f_root, freevars, outer[i], 'l', 's', '>'); +// add_loop_bound_constraints(r, f_root, freevars, outer[i], 'u', 's', '<'); +// add_loop_stride_constraints(r,f_root, freevars, outer[i], 's'); +// } +// } + +// // Add degenerated constraints for non-enclosing loops for this +// // statement. We treat low-dim space as part of whole +// // iteration space. +// LexicalOrderType lex = LEX_MATCH; +// for (unsigned i = 0; i < loops.size(); i++) { +// if (loops_lex[i] != 0) { +// if (lex == LEX_MATCH) +// lex = loops_lex[i]; +// continue; +// } + +// if (lex == LEX_MATCH) { +// for (unsigned j = i+1; j < loops.size(); j++) { +// if (loops_lex[j] == LEX_BEFORE || loops_lex[j] == LEX_AFTER) { +// lex = loops_lex[j]; +// break; +// } +// } +// } + +// if (lex == LEX_MATCH) +// lex = lexical_order(tn, loops[i]); + +// if (lex == LEX_BEFORE) +// add_loop_bound_constraints(r, f_root, freevars, loops[i], 'l', 's', '='); +// else +// add_loop_bound_constraints(r, f_root, freevars, loops[i], 'u', 's', '='); +// } + +// return r; +// } + +// Relation arrays2relation(std::vector<Free_Var_Decl*> &freevars, +// in_array *ia_w, const Relation &IS1_, +// in_array *ia_r, const Relation &IS2_) { +// Relation &IS1 = const_cast<Relation &>(IS1_); +// Relation &IS2 = const_cast<Relation &>(IS2_); + +// Relation r(IS1.n_set(), IS2.n_set()); + +// for (int i = 1; i <= IS1.n_set(); i++) +// r.name_input_var(i, IS1.set_var(i)->name()); + +// for (int i = 1; i <= IS2.n_set(); i++) +// r.name_output_var(i, IS2.set_var(i)->name()+"'"); + +// if (get_sym_of_array(ia_w) != get_sym_of_array(ia_r)) { +// r.add_or(); // False Relation +// return r; +// } + +// F_And *f_root = r.add_and(); + +// for (unsigned i = 0; i < ia_w->dims(); i++) { +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// suif2formula(r, f_and, freevars, ia_w->index(i), e, 'w', '='); +// suif2formula(r, f_and, freevars, ia_r->index(i), e, 'r', '='); +// } + +// // add iteration space restriction +// r = Restrict_Domain(r, copy(IS1)); +// r = Restrict_Range(r, copy(IS2)); + +// // reset the output variable names lost in restriction +// for (int i = 1; i <= IS2.n_set(); i++) +// r.name_output_var(i, IS2.set_var(i)->name()+"'"); + +// return r; +// } + + +// std::vector<DependenceVector> relation2dependences (IR_Code *ir, in_array *ia_w, in_array *ia_r, const Relation &r) { +// assert(r.n_inp() == r.n_out()); + +// std::vector<DependenceVector> dependences; + +// std::stack<DependenceLevel> working; +// working.push(DependenceLevel(r, r.n_inp())); + +// while (!working.empty()) { +// DependenceLevel dep = working.top(); +// working.pop(); + +// // No dependence exists, move on. +// if (!dep.r.is_satisfiable()) +// continue; + +// if (dep.level == r.n_inp()) { +// DependenceVector dv; + +// // for loop independent dependence, use lexical order to +// // determine the correct source and destination +// if (dep.dir == 0) { +// LexicalOrderType order = lexical_order(ia_w->parent(), ia_r->parent()); + +// if (order == LEX_MATCH) +// continue; //trivial self zero-dependence +// else if (order == LEX_AFTER) { +// dv.src = new IR_suifArrayRef(ir, ia_r); +// dv.dst = new IR_suifArrayRef(ir, ia_w); +// } +// else { +// dv.src = new IR_suifArrayRef(ir, ia_w); +// dv.dst = new IR_suifArrayRef(ir,ia_r); +// } +// } +// else if (dep.dir == 1) { +// dv.src = new IR_suifArrayRef(ir, ia_w); +// dv.dst = new IR_suifArrayRef(ir, ia_r); +// } +// else { // dep.dir == -1 +// dv.src = new IR_suifArrayRef(ir, ia_r); +// dv.dst = new IR_suifArrayRef(ir, ia_w); +// } + +// dv.lbounds = dep.lbounds; +// dv.ubounds = dep.ubounds; + +// // // set the dependence type +// // if (is_lhs(dv.source) && is_lhs(dv.dest)) +// // dv.type = 'o'; +// // else if (!is_lhs(dv.source) && ! is_lhs(dv.dest)) +// // dv.type = 'i'; +// // else if (is_lhs(dv.source)) +// // dv.type = 'f'; +// // else +// // dv.type = 'a'; + +// dependences.push_back(dv); +// } +// else { +// // now work on the next dimension level +// int level = ++dep.level; + +// coef_t lbound, ubound; +// Relation delta = Deltas(copy(dep.r)); +// delta.query_variable_bounds(delta.set_var(level), lbound, ubound); + +// if (dep.dir == 0) { +// if (lbound > 0) { +// dep.dir = 1; +// dep.lbounds[level-1] = lbound; +// dep.ubounds[level-1] = ubound; + +// working.push(dep); +// } +// else if (ubound < 0) { +// dep.dir = -1; +// dep.lbounds[level-1] = -ubound; +// dep.ubounds[level-1] = -lbound; + +// working.push(dep); +// } +// else { +// // split the dependence vector into flow- and anti-dependence +// // for the first non-zero distance, also separate zero distance +// // at this level. +// { +// DependenceLevel dep2 = dep; + +// dep2.lbounds[level-1] = 0; +// dep2.ubounds[level-1] = 0; + +// F_And *f_root = dep2.r.and_with_and(); +// EQ_Handle h = f_root->add_EQ(); +// h.update_coef(dep2.r.input_var(level), 1); +// h.update_coef(dep2.r.output_var(level), -1); + +// working.push(dep2); +// } + +// if (lbound < 0 && ia_w != ia_r) { +// DependenceLevel dep2 = dep; + +// F_And *f_root = dep2.r.and_with_and(); +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(dep2.r.input_var(level), 1); +// h.update_coef(dep2.r.output_var(level), -1); +// h.update_const(-1); + +// // get tighter bounds under new constraints +// coef_t lbound, ubound; +// delta = Deltas(copy(dep2.r)); +// delta.query_variable_bounds(delta.set_var(level), +// lbound, ubound); + +// dep2.dir = -1; +// dep2.lbounds[level-1] = max(-ubound,static_cast<coef_t>(1)); // use max() to avoid Omega retardness +// dep2.ubounds[level-1] = -lbound; + +// working.push(dep2); +// } + +// if (ubound > 0) { +// DependenceLevel dep2 = dep; + +// F_And *f_root = dep2.r.and_with_and(); +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(dep2.r.input_var(level), -1); +// h.update_coef(dep2.r.output_var(level), 1); +// h.update_const(-1); + +// // get tighter bonds under new constraints +// coef_t lbound, ubound; +// delta = Deltas(copy(dep2.r)); +// delta.query_variable_bounds(delta.set_var(level), +// lbound, ubound); +// dep2.dir = 1; +// dep2.lbounds[level-1] = max(lbound,static_cast<coef_t>(1)); // use max() to avoid Omega retardness +// dep2.ubounds[level-1] = ubound; + +// working.push(dep2); +// } +// } +// } +// // now deal with dependence vector with known direction +// // determined at previous levels +// else { +// // For messy bounds, further test to see if the dependence distance +// // can be reduced to positive/negative. This is an omega hack. +// if (lbound == negInfinity && ubound == posInfinity) { +// { +// Relation t = dep.r; +// F_And *f_root = t.and_with_and(); +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(t.input_var(level), 1); +// h.update_coef(t.output_var(level), -1); +// h.update_const(-1); + +// if (!t.is_satisfiable()) { +// lbound = 0; +// } +// } +// { +// Relation t = dep.r; +// F_And *f_root = t.and_with_and(); +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(t.input_var(level), -1); +// h.update_coef(t.output_var(level), 1); +// h.update_const(-1); + +// if (!t.is_satisfiable()) { +// ubound = 0; +// } +// } +// } + +// // Same thing as above, test to see if zero dependence +// // distance possible. +// if (lbound == 0 || ubound == 0) { +// Relation t = dep.r; +// F_And *f_root = t.and_with_and(); +// EQ_Handle h = f_root->add_EQ(); +// h.update_coef(t.input_var(level), 1); +// h.update_coef(t.output_var(level), -1); + +// if (!t.is_satisfiable()) { +// if (lbound == 0) +// lbound = 1; +// if (ubound == 0) +// ubound = -1; +// } +// } + +// if (dep.dir == -1) { +// dep.lbounds[level-1] = -ubound; +// dep.ubounds[level-1] = -lbound; +// } +// else { // dep.dir == 1 +// dep.lbounds[level-1] = lbound; +// dep.ubounds[level-1] = ubound; +// } + +// working.push(dep); +// } +// } +// } + +// return dependences; +// } + +//----------------------------------------------------------------------------- +// Determine whether the loop (starting from 0) in the iteration space +// has only one iteration. +//----------------------------------------------------------------------------- +bool is_single_loop_iteration(const Relation &r, int level, const Relation &known) { + int n = r.n_set(); + Relation r1 = Intersection(copy(r), Extend_Set(copy(known), n-known.n_set())); + + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= level; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + r1 = Range(Restrict_Domain(mapping, r1)); + r1.simplify(); + + Variable_ID v = r1.set_var(level); + for (DNF_Iterator di(r1.query_DNF()); di; di++) { + bool is_single = false; + for (EQ_Iterator ei((*di)->EQs()); ei; ei++) + if ((*ei).get_coef(v) != 0 && !(*ei).has_wildcards()) { + is_single = true; + break; + } + + if (!is_single) + return false; + } + + return true; +} + + + + +bool is_single_iteration(const Relation &r, int dim) { + assert(r.is_set()); + const int n = r.n_set(); + + if (dim >= n) + return true; + + Relation bound = get_loop_bound(r, dim); + +// if (!bound.has_single_conjunct()) +// return false; + +// Conjunct *c = bound.query_DNF()->single_conjunct(); + + for (DNF_Iterator di(bound.query_DNF()); di; di++) { + bool is_single = false; + for (EQ_Iterator ei((*di)->EQs()); ei; ei++) + if (!(*ei).has_wildcards()) { + is_single = true; + break; + } + + if (!is_single) + return false; + } + + return true; + + + + +// Relation r = copy(r_); +// const int n = r.n_set(); + +// if (dim >= n) +// return true; + +// Relation bound = get_loop_bound(r, dim); +// bound = Approximate(bound); +// Conjunct *c = bound.query_DNF()->single_conjunct(); + +// return c->n_GEQs() == 0; + + + + + +// Relation r = copy(r_); +// r.simplify(); +// const int n = r.n_set(); + +// if (dim >= n) +// return true; + +// for (DNF_Iterator i(r.query_DNF()); i; i++) { +// std::vector<bool> is_single(n); +// for (int j = 0; j < dim; j++) +// is_single[j] = true; +// for (int j = dim; j < n; j++) +// is_single[j] = false; + +// bool found_new_single = true; +// while (found_new_single) { +// found_new_single = false; + +// for (EQ_Iterator j = (*i)->EQs(); j; j++) { +// int saved_pos = -1; +// for (Constr_Vars_Iter k(*j); k; k++) +// if ((*k).var->kind() == Set_Var || (*k).var->kind() == Input_Var) { +// int pos = (*k).var->get_position() - 1; +// if (!is_single[pos]) +// if (saved_pos == -1) +// saved_pos = pos; +// else { +// saved_pos = -1; +// break; +// } +// } + +// if (saved_pos != -1) { +// is_single[saved_pos] = true; +// found_new_single = true; +// } +// } + +// if (is_single[dim]) +// break; +// } + +// if (!is_single[dim]) +// return false; +// } + +// return true; +} + +//----------------------------------------------------------------------------- +// Set/get the value of a variable which is know to be constant. +//----------------------------------------------------------------------------- +void assign_const(Relation &r, int dim, int val) { + const int n = r.n_out(); + + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + + for (int i = 1; i <= n; i++) { + if (i != dim+1) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), 1); + h.update_coef(mapping.input_var(i), -1); + } + else { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), 1); + h.update_const(-val); + } + } + + r = Composition(mapping, r); +} + + +int get_const(const Relation &r, int dim, Var_Kind type) { +// Relation rr = copy(r); + Relation &rr = const_cast<Relation &>(r); + + Variable_ID v; + switch (type) { + // case Set_Var: + // v = rr.set_var(dim+1); + // break; + case Input_Var: + v = rr.input_var(dim+1); + break; + case Output_Var: + v = rr.output_var(dim+1); + break; + default: + throw std::invalid_argument("unsupported variable type"); + } + + for (DNF_Iterator di(rr.query_DNF()); di; di++) + for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) + if ((*ei).is_const(v)) + return (*ei).get_const(); + + throw std::runtime_error("cannot get variable's constant value"); +} + + + + + + +//--------------------------------------------------------------------------- +// Get the bound for a specific loop. +//--------------------------------------------------------------------------- +Relation get_loop_bound(const Relation &r, int dim) { + assert(r.is_set()); + const int n = r.n_set(); + +// Relation r1 = project_onto_levels(copy(r), dim+1, true); + Relation mapping(n,n); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= dim+1; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + Relation r1 = Range(Restrict_Domain(mapping, copy(r))); + for (int i = 1; i <= n; i++) + r1.name_set_var(i, const_cast<Relation &>(r).set_var(i)->name()); + r1.setup_names(); + Relation r2 = Project(copy(r1), dim+1, Set_Var); + + return Gist(r1, r2, 1); +} + +Relation get_loop_bound(const Relation &r, int level, const Relation &known) { + int n = r.n_set(); + Relation r1 = Intersection(copy(r), Extend_Set(copy(known), n-known.n_set())); + + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= level; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + r1 = Range(Restrict_Domain(mapping, r1)); + Relation r2 = Project(copy(r1), level, Set_Var); + r1 = Gist(r1, r2, 1); + + for (int i = 1; i <= n; i++) + r1.name_set_var(i, const_cast<Relation &>(r).set_var(i)->name()); + r1.setup_names(); + + return r1; +} + + + +Relation get_max_loop_bound(const std::vector<Relation> &r, int dim) { + if (r.size() == 0) + return Relation::Null(); + + const int n = r[0].n_set(); + Relation res(Relation::False(n)); + for (int i = 0; i < r.size(); i++) { + Relation &t = const_cast<Relation &>(r[i]); + if (t.is_satisfiable()) + res = Union(get_loop_bound(t, dim), res); + } + + res.simplify(); + + return res; +} + +Relation get_min_loop_bound(const std::vector<Relation> &r, int dim) { + if (r.size() == 0) + return Relation::Null(); + + const int n = r[0].n_set(); + Relation res(Relation::True(n)); + for (int i = 0; i < r.size(); i++) { + Relation &t = const_cast<Relation &>(r[i]); + if (t.is_satisfiable()) + res = Intersection(get_loop_bound(t, dim), res); + } + + res.simplify(); + + return res; +} + +//----------------------------------------------------------------------------- +// Add strident to a loop. +// Issues: +// - Don't work with relations with multiple disjuncts. +// - Omega's dealing with max lower bound is awkward. +//----------------------------------------------------------------------------- +void add_loop_stride(Relation &r, const Relation &bound_, int dim, int stride) { + F_And *f_root = r.and_with_and(); + Relation &bound = const_cast<Relation &>(bound_); + for (DNF_Iterator di(bound.query_DNF()); di; di++) { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e1 = f_exists->declare(tmp_e()); + Variable_ID e2 = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + EQ_Handle stride_eq = f_and->add_EQ(); + stride_eq.update_coef(e1, 1); + stride_eq.update_coef(e2, stride); + if (!r.is_set()) + stride_eq.update_coef(r.output_var(dim+1), -1); + else + stride_eq.update_coef(r.set_var(dim+1), -1); + F_Or *f_or = f_and->add_or(); + + for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) { + if ((*gi).get_coef(bound.set_var(dim+1)) > 0) { + // copy the lower bound constraint + EQ_Handle h1 = f_or->add_and()->add_EQ(); + GEQ_Handle h2 = f_and->add_GEQ(); + for (Constr_Vars_Iter ci(*gi); ci; ci++) { + switch ((*ci).var->kind()) { + // case Set_Var: + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(e1, (*ci).coef); + h2.update_coef(e1, (*ci).coef); + } + else { + if (!r.is_set()) { + h1.update_coef(r.output_var(pos), (*ci).coef); + h2.update_coef(r.output_var(pos), (*ci).coef); + } + else { + h1.update_coef(r.set_var(pos), (*ci).coef); + h2.update_coef(r.set_var(pos), (*ci).coef); + } + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + h1.update_coef(r.get_local(g, (*ci).var->function_of()), (*ci).coef); + h2.update_coef(r.get_local(g, (*ci).var->function_of()), (*ci).coef); + break; + } + default: + break; + } + } + h1.update_const((*gi).get_const()); + h2.update_const((*gi).get_const()); + } + } + } +} + + +bool is_inner_loop_depend_on_level(const Relation &r, int level, const Relation &known) { + Relation r1 = Intersection(copy(r), Extend_Set(copy(known), r.n_set()-known.n_set())); + Relation r2 = copy(r1); + for (int i = level+1; i <= r2.n_set(); i++) + r2 = Project(r2, r2.set_var(i)); + r2.simplify(2, 4); + Relation r3 = Gist(r1, r2); + + Variable_ID v = r3.set_var(level); + for (DNF_Iterator di(r3.query_DNF()); di; di++) { + for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) + if ((*ei).get_coef(v) != 0) + return true; + + for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) + if ((*gi).get_coef(v) != 0) + return true; + } + + return false; +} + + +//----------------------------------------------------------------------------- +// Suppose loop dim is i. Replace i with i+adjustment in loop bounds. +// e.g. do i = 1, n +// do j = i, n +// after call with dim = 0 and adjustment = 1: +// do i = 1, n +// do j = i+1, n +// ----------------------------------------------------------------------------- +Relation adjust_loop_bound(const Relation &r, int level, int adjustment) { + if (adjustment == 0) + return copy(r); + + const int n = r.n_set(); + Relation r1 = copy(r); + for (int i = level+1; i <= r1.n_set(); i++) + r1 = Project(r1, r1.set_var(i)); + r1.simplify(2, 4); + Relation r2 = Gist(copy(r), copy(r1)); + + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= n; i++) + if (i == level) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(level), -1); + h.update_coef(mapping.output_var(level), 1); + h.update_const(static_cast<coef_t>(adjustment)); + } + else { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), -1); + h.update_coef(mapping.output_var(i), 1); + } + + r2 = Range(Restrict_Domain(mapping, r2)); + r1 = Intersection(r1, r2); + r1.simplify(); + + for (int i = 1; i <= n; i++) + r1.name_set_var(i, const_cast<Relation &>(r).set_var(i)->name()); + r1.setup_names(); + return r1; +} + + +// commented out on 07/14/2010 +// void adjust_loop_bound(Relation &r, int dim, int adjustment, std::vector<Free_Var_Decl *> globals) { +// assert(r.is_set()); + +// if (adjustment == 0) +// return; + +// const int n = r.n_set(); +// Tuple<std::string> name(n); +// for (int i = 1; i <= n; i++) +// name[i] = r.set_var(i)->name(); + +// Relation r1 = project_onto_levels(copy(r), dim+1, true); +// Relation r2 = Gist(copy(r), copy(r1)); + +// // remove old bogus global variable conditions since we are going to +// // update the value. +// if (globals.size() > 0) +// r1 = Gist(r1, project_onto_levels(copy(r), 0, true)); + +// Relation r4 = Relation::True(n); + +// for (DNF_Iterator di(r2.query_DNF()); di; di++) { +// for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) { +// EQ_Handle h = r4.and_with_EQ(*ei); + +// Variable_ID v = r2.set_var(dim+1); +// coef_t c = (*ei).get_coef(v); +// if (c != 0) +// h.update_const(c*adjustment); + +// for (int i = 0; i < globals.size(); i++) { +// Variable_ID v = r2.get_local(globals[i]); +// coef_t c = (*ei).get_coef(v); +// if (c != 0) +// h.update_const(c*adjustment); +// } +// } + +// for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) { +// GEQ_Handle h = r4.and_with_GEQ(*gi); + +// Variable_ID v = r2.set_var(dim+1); +// coef_t c = (*gi).get_coef(v); +// if (c != 0) +// h.update_const(c*adjustment); + +// for (int i = 0; i < globals.size(); i++) { +// Variable_ID v = r2.get_local(globals[i]); +// coef_t c = (*gi).get_coef(v); +// if (c != 0) +// h.update_const(c*adjustment); +// } +// } +// } +// r = Intersection(r1, r4); +// // } +// // else +// // r = Intersection(r1, r2); + +// for (int i = 1; i <= n; i++) +// r.name_set_var(i, name[i]); +// r.setup_names(); +// } + + +// void adjust_loop_bound(Relation &r, int dim, int adjustment) { +// assert(r.is_set()); +// const int n = r.n_set(); +// Tuple<String> name(n); +// for (int i = 1; i <= n; i++) +// name[i] = r.set_var(i)->name(); + +// Relation r1 = project_onto_levels(copy(r), dim+1, true); +// Relation r2 = Gist(r, copy(r1)); + +// Relation r3(n, n); +// F_And *f_root = r3.add_and(); +// for (int i = 0; i < n; i++) { +// EQ_Handle h = f_root->add_EQ(); +// h.update_coef(r3.output_var(i+1), 1); +// h.update_coef(r3.input_var(i+1), -1); +// if (i == dim) +// h.update_const(adjustment); +// } + +// r2 = Range(Restrict_Domain(r3, r2)); +// r = Intersection(r1, r2); + +// for (int i = 1; i <= n; i++) +// r.name_set_var(i, name[i]); +// r.setup_names(); +// } + +// void adjust_loop_bound(Relation &r, int dim, Free_Var_Decl *global_var, int adjustment) { +// assert(r.is_set()); +// const int n = r.n_set(); +// Tuple<String> name(n); +// for (int i = 1; i <= n; i++) +// name[i] = r.set_var(i)->name(); + +// Relation r1 = project_onto_levels(copy(r), dim+1, true); +// Relation r2 = Gist(r, copy(r1)); + +// Relation r3(n); +// Variable_ID v = r2.get_local(global_var); + +// for (DNF_Iterator di(r2.query_DNF()); di; di++) { +// for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) { +// coef_t c = (*ei).get_coef(v); +// EQ_Handle h = r3.and_with_EQ(*ei); +// if (c != 0) +// h.update_const(c*adjustment); +// } +// for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) { +// coef_t c = (*gi).get_coef(v); +// GEQ_Handle h = r3.and_with_GEQ(*gi); +// if (c != 0) +// h.update_const(c*adjustment); +// } +// } + +// r = Intersection(r1, r3); +// for (int i = 1; i <= n; i++) +// r.name_set_var(i, name[i]); +// r.setup_names(); +// } + + + +//------------------------------------------------------------------------------ +// If the dimension has value posInfinity, the statement should be privatized +// at this dimension. +//------------------------------------------------------------------------------ +// boolean is_private_statement(const Relation &r, int dim) { +// int n; +// if (r.is_set()) +// n = r.n_set(); +// else +// n = r.n_out(); + +// if (dim >= n) +// return false; + +// try { +// coef_t c; +// if (r.is_set()) +// c = get_const(r, dim, Set_Var); +// else +// c = get_const(r, dim, Output_Var); +// if (c == posInfinity) +// return true; +// else +// return false; +// } +// catch (loop_error e){ +// } + +// return false; +// } + + + +// // ---------------------------------------------------------------------------- +// // Calculate v mod dividend based on equations inside relation r. +// // Return posInfinity if it is not a constant. +// // ---------------------------------------------------------------------------- +// static coef_t mod_(const Relation &r_, Variable_ID v, int dividend, std::set<Variable_ID> &working_on) { +// assert(dividend > 0); +// if (v->kind() == Forall_Var || v->kind() == Exists_Var || v->kind() == Wildcard_Var) +// return posInfinity; + +// working_on.insert(v); + +// Relation &r = const_cast<Relation &>(r_); +// Conjunct *c = r.query_DNF()->single_conjunct(); + +// for (EQ_Iterator ei(c->EQs()); ei; ei++) { +// int coef = mod((*ei).get_coef(v), dividend); +// if (coef != 1 && coef != dividend - 1 ) +// continue; + +// coef_t result = 0; +// for (Constr_Vars_Iter cvi(*ei); cvi; cvi++) +// if ((*cvi).var != v) { +// int p = mod((*cvi).coef, dividend); + +// if (p == 0) +// continue; + +// if (working_on.find((*cvi).var) != working_on.end()) { +// result = posInfinity; +// break; +// } + +// coef_t q = mod_(r, (*cvi).var, dividend, working_on); +// if (q == posInfinity) { +// result = posInfinity; +// break; +// } +// result += p * q; +// } + +// if (result != posInfinity) { +// result += (*ei).get_const(); +// if (coef == 1) +// result = -result; +// working_on.erase(v); + +// return mod(result, dividend); +// } +// } + +// working_on.erase(v); +// return posInfinity; +// } + + +// coef_t mod(const Relation &r, Variable_ID v, int dividend) { +// std::set<Variable_ID> working_on = std::set<Variable_ID>(); + +// return mod_(r, v, dividend, working_on); +// } + + + +//----------------------------------------------------------------------------- +// Generate mapping relation for permuation. +//----------------------------------------------------------------------------- +Relation permute_relation(const std::vector<int> &pi) { + const int n = pi.size(); + + Relation r(n, n); + F_And *f_root = r.add_and(); + + for (int i = 0; i < n; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(i+1), 1); + h.update_coef(r.input_var(pi[i]+1), -1); + } + + return r; +} + + + +//--------------------------------------------------------------------------- +// Find the position index variable in a Relation by name. +//--------------------------------------------------------------------------- +Variable_ID find_index(Relation &r, const std::string &s, char side) { + // Omega quirks: assure the names are propagated inside the relation + r.setup_names(); + + if (r.is_set()) { // side == 's' + for (int i = 1; i <= r.n_set(); i++) { + std::string ss = r.set_var(i)->name(); + if (s == ss) { + return r.set_var(i); + } + } + } + else if (side == 'w') { + for (int i = 1; i <= r.n_inp(); i++) { + std::string ss = r.input_var(i)->name(); + if (s == ss) { + return r.input_var(i); + } + } + } + else { // side == 'r' + for (int i = 1; i <= r.n_out(); i++) { + std::string ss = r.output_var(i)->name(); + if (s+"'" == ss) { + return r.output_var(i); + } + } + } + + return NULL; +} + +// EQ_Handle get_eq(const Relation &r, int dim, Var_Kind type) { +// Variable_ID v; +// switch (type) { +// case Set_Var: +// v = r.set_var(dim+1); +// break; +// case Input_Var: +// v = r.input_var(dim+1); +// break; +// case Output_Var: +// v = r.output_var(dim+1); +// break; +// default: +// return NULL; +// } +// for (DNF_iterator di(r.query_DNF()); di; di++) +// for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) +// if ((*ei).get_coef(v) != 0) +// return (*ei); + +// return NULL; +// } + + +// std::Pair<Relation, Relation> split_loop(const Relation &r, const Relation &cond) { +// Relation r1 = Intersection(copy(r), copy(cond)); +// Relation r2 = Intersection(copy(r), Complement(copy(cond))); + +// return std::Pair<Relation, Relation>(r1, r2); +// } diff --git a/chill/src/parse_expr.ll b/chill/src/parse_expr.ll new file mode 100644 index 0000000..a9b389f --- /dev/null +++ b/chill/src/parse_expr.ll @@ -0,0 +1,24 @@ +%{ +// some C++ code +#include "chill_run_util.hh" +#include "parse_expr.tab.hh" +%} + +%option noyywrap + +%% +[ \t]+ /*ignore*/ +\n /*ignore*/ +L[0-9]+ { yylval.val = atoi(&yytext[1]); return LEVEL; } +[0-9]+ { yylval.val = atoi(yytext); return NUMBER; } +\<\= return LE; +\>\= return GE; +\=(\=)? return EQ; +[a-zA-Z_][a-zA-Z_0-9]* { + yylval.str_val = new char[yyleng+1]; + strcpy(yylval.str_val, yytext); + return VARIABLE; + } +. return (int)yytext[0]; +%% + diff --git a/chill/src/parse_expr.yy b/chill/src/parse_expr.yy new file mode 100644 index 0000000..c2943c2 --- /dev/null +++ b/chill/src/parse_expr.yy @@ -0,0 +1,85 @@ +%{ +#include "chill_run_util.hh" +#include "parse_expr.ll.hh" + +extern int yydebug; + +void yyerror(const char*); +int yyparse(simap_vec_t** rel); + +static simap_vec_t* return_rel; // used as the return value for yyparse + +%} + +%union { + int val; + char* str_val; + simap_t* cond_item; + simap_vec_t* cond; +} + +%token <val> NUMBER +%token <val> LEVEL +%token <str_val> VARIABLE + +%left LE GE EQ '<' '>' +%left '-' '+' '*' '/' + +/*the final output from this language should be an Omega Relation object*/ +%type <cond> cond prog +%type <cond_item> expr add_expr mul_expr neg_expr + +%% +prog : cond { return_rel = make_prog($1); } +; + +cond : expr '>' expr { $$ = make_cond_gt($1, $3); } + | expr '<' expr { $$ = make_cond_lt($1, $3); } + | expr GE expr { $$ = make_cond_ge($1, $3); } + | expr LE expr { $$ = make_cond_le($1, $3); } + | expr EQ expr { $$ = make_cond_eq($1, $3); } +; + +expr : add_expr { $$ = $1; } +; + +add_expr : add_expr '+' mul_expr { $$ = make_cond_item_add($1,$3); } + | add_expr '-' mul_expr { $$ = make_cond_item_sub($1,$3); } + | mul_expr { $$ = $1; } +; + +mul_expr : mul_expr '*' neg_expr { $$ = make_cond_item_mul($1,$3); } + | neg_expr { $$ = $1; } +; + +neg_expr : '-' neg_expr { $$ = make_cond_item_neg($2); } + | '(' expr ')' { $$ = $2; } + | NUMBER { $$ = make_cond_item_number($1); } + | LEVEL { $$ = make_cond_item_level($1); } + | VARIABLE { $$ = make_cond_item_variable($1); } +; +%% + +void yyerror(const char* msg) { + fprintf(stderr, "Parse error: %s", msg); +} + +simap_vec_t* parse_relation_vector(const char* expr) { + yydebug=0; + YY_BUFFER_STATE state; + + //if(yylex_init()) { + // TODO: error out or something + //} + + state = yy_scan_string(expr); + + if(yyparse()) { + // TODO: error out or something + } + + yy_delete_buffer(state); + yylex_destroy(); + return return_rel; +} + |