diff options
author | Tuowen Zhao <ztuowen@gmail.com> | 2016-09-17 03:22:53 +0000 |
---|---|---|
committer | Tuowen Zhao <ztuowen@gmail.com> | 2016-09-17 03:22:53 +0000 |
commit | 75ff98e4d65862ff5b36b533b4f6e3ea71ede1d5 (patch) | |
tree | 498ac06b4cf78568b807fafd2619856afff69c28 /chill/src/chillmodule.cc | |
parent | 29efa7b1a0d089e02a70f73f348f11878955287c (diff) | |
download | chill-75ff98e4d65862ff5b36b533b4f6e3ea71ede1d5.tar.gz chill-75ff98e4d65862ff5b36b533b4f6e3ea71ede1d5.tar.bz2 chill-75ff98e4d65862ff5b36b533b4f6e3ea71ede1d5.zip |
cmake build
Diffstat (limited to 'chill/src/chillmodule.cc')
-rw-r--r-- | chill/src/chillmodule.cc | 1834 |
1 files changed, 1834 insertions, 0 deletions
diff --git a/chill/src/chillmodule.cc b/chill/src/chillmodule.cc new file mode 100644 index 0000000..fbeb477 --- /dev/null +++ b/chill/src/chillmodule.cc @@ -0,0 +1,1834 @@ + +// chill interface to python + +#include "chilldebug.h" + +#ifdef CUDACHILL + +#include "rose.h" // ?? +#include "loop_cuda_rose.hh" +#include "ir_rose.hh" +#include "ir_cudarose.hh" + +#include <vector> + +#else + +#include "chill_run_util.hh" + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <omega.h> +#include "loop.hh" +#include "ir_code.hh" +#ifdef BUILD_ROSE +#include "ir_rose.hh" +#elif BUILD_SUIF +#include "ir_suif.hh" +#endif + +#endif + +#include "chillmodule.hh" + +// TODO +#undef _POSIX_C_SOURCE +#undef _XOPEN_SOURCE +#include <Python.h> + +using namespace omega; + +// -- Cuda CHiLL global variables -- +#ifdef CUDACHILL + +extern LoopCuda *myloop; +extern IR_Code *ir_code; +extern std::vector<IR_Control *> ir_controls; +extern std::vector<int> loops; + +#else + +extern Loop *myloop; +extern IR_Code *ir_code; +extern bool is_interactive; +extern bool repl_stop; + +std::string procedure_name; +std::string source_filename; + +int loop_start_num; +int loop_end_num; + +extern std::vector<IR_Control *> ir_controls; +extern std::vector<int> loops; + +#endif + +// ----------------------- // +// CHiLL support functions // +// ----------------------- // +#ifndef CUDACHILL +// not sure yet if this actually needs to be exposed to the python interface +// these four functions are here to maintain similarity to the Lua interface +int get_loop_num_start() { + return loop_start_num; +} + +int get_loop_num_end() { + return loop_end_num; +} + +static void set_loop_num_start(int start_num) { + loop_start_num = start_num; +} + +static void set_loop_num_end(int end_num) { + loop_end_num = end_num; +} + +// TODO: finalize_loop(int,int) and init_loop(int,int) are identical to thier Lua counterparts. +// consider integrating them + +void finalize_loop(int loop_num_start, int loop_num_end) { + if (loop_num_start == loop_num_end) { + ir_code->ReplaceCode(ir_controls[loops[loop_num_start]], myloop->getCode()); + ir_controls[loops[loop_num_start]] = NULL; + } + else { + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) + parm.push_back(ir_controls[i]); + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + ir_code->ReplaceCode(block, myloop->getCode()); + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + delete ir_controls[i]; + ir_controls[i] = NULL; + } + } + delete myloop; +} +void finalize_loop() { + int loop_num_start = get_loop_num_start(); + int loop_num_end = get_loop_num_end(); + finalize_loop(loop_num_start, loop_num_end); +} +static void init_loop(int loop_num_start, int loop_num_end) { + if (source_filename.empty()) { + fprintf(stderr, "source file not set when initializing the loop"); + if (!is_interactive) + exit(2); + } + else { + if (ir_code == NULL) { + #ifdef BUILD_ROSE + if (procedure_name.empty()) + procedure_name = "main"; + #elif BUILD_SUIF + if (procedure_number == -1) + procedure_number = 0; + #endif + + #ifdef BUILD_ROSE + ir_code = new IR_roseCode(source_filename.c_str(), procedure_name.c_str()); + #elif BUILD_SUIF + ir_code = new IR_suifCode(source_filename.c_str(), procedure_name.c_str()); + #endif + + IR_Block *block = ir_code->GetCode(); + ir_controls = ir_code->FindOneLevelControlStructure(block); + for (int i = 0; i < ir_controls.size(); i++) { + if (ir_controls[i]->type() == IR_CONTROL_LOOP) + loops.push_back(i); + } + delete block; + } + if (myloop != NULL && myloop->isInitialized()) { + finalize_loop(); + } + } + set_loop_num_start(loop_num_start); + set_loop_num_end(loop_num_end); + if (loop_num_end < loop_num_start) { + fprintf(stderr, "the last loop must be after the start loop"); + if (!is_interactive) + exit(2); + } + if (loop_num_end >= loops.size()) { + fprintf(stderr, "loop %d does not exist", loop_num_end); + if (!is_interactive) + exit(2); + } + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + if (ir_controls[i] == NULL) { + fprintf(stderr, "loop has already been processed"); + if (!is_interactive) + exit(2); + } + parm.push_back(ir_controls[i]); + } + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + myloop = new Loop(block); + delete block; + //if (is_interactive) printf("%s ", PROMPT_STRING); +} +#endif + +// ----------------------- // +// Python support funcions // +// ----------------------- // + +// -- CHiLL support -- // +static void strict_arg_num(PyObject* args, int arg_num, const char* fname = NULL) { + int arg_given = PyTuple_Size(args); + char msg[128]; + if(arg_num != arg_given) { + if(fname) + sprintf(msg, "%s: expected %i arguments, was given %i.", fname, arg_num, arg_given); + else + sprintf(msg, "Expected %i argumets, was given %i.", arg_num, arg_given); + throw std::runtime_error(msg); + } +} + +static int strict_arg_range(PyObject* args, int arg_min, int arg_max, const char* fname = NULL) { + int arg_given = PyTuple_Size(args); + char msg[128]; + if(arg_given < arg_min || arg_given > arg_max) { + if(fname) + sprintf(msg, "%s: expected %i to %i arguments, was given %i.", fname, arg_min, arg_max, arg_given); + else + sprintf(msg, "Expected %i to %i, argumets, was given %i.", arg_min, arg_max, arg_given); + throw std::runtime_error(msg); + } + return arg_given; +} + +static int intArg(PyObject* args, int index, int dval = 0) { + if(PyTuple_Size(args) <= index) + return dval; + int ival; + PyObject *item = PyTuple_GetItem(args, index); + Py_INCREF(item); + if (PyInt_Check(item)) ival = PyInt_AsLong(item); + else { + fprintf(stderr, "argument at index %i is not an int\n", index); + exit(-1); + } + return ival; +} + +static std::string strArg(PyObject* args, int index, const char* dval = NULL) { + if(PyTuple_Size(args) <= index) + return dval; + std::string strval; + PyObject *item = PyTuple_GetItem(args, index); + Py_INCREF(item); + if (PyString_Check(item)) strval = strdup(PyString_AsString(item)); + else { + fprintf(stderr, "argument at index %i is not an string\n", index); + exit(-1); + } + return strval; +} + +static bool boolArg(PyObject* args, int index, bool dval = false) { + if(PyTuple_Size(args) <= index) + return dval; + bool bval; + PyObject* item = PyTuple_GetItem(args, index); + Py_INCREF(item); + return (bool)PyObject_IsTrue(item); +} + +static bool tostringintmapvector(PyObject* args, int index, std::vector<std::map<std::string,int> >& vec) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq = PyTuple_GetItem(args, index); + //TODO: Typecheck + int seq_len = PyList_Size(seq); + for(int i = 0; i < seq_len; i++) { + std::map<std::string,int> map; + PyObject* dict = PyList_GetItem(seq, i); + PyObject* keys = PyDict_Keys(dict); + //TODO: Typecheck + int dict_len = PyList_Size(keys); + for(int j = 0; j < dict_len; j++) { + PyObject* key = PyList_GetItem(keys, j); + PyObject* value = PyDict_GetItem(dict, key); + std::string str_key = strdup(PyString_AsString(key)); + int int_value = PyInt_AsLong(value); + map[str_key] = int_value; + } + vec.push_back(map); + } + return true; +} + +static bool tointvector(PyObject* seq, std::vector<int>& vec) { + //TODO: Typecheck + int seq_len = PyList_Size(seq); + for(int i = 0; i < seq_len; i++) { + PyObject* item = PyList_GetItem(seq, i); + vec.push_back(PyInt_AsLong(item)); + } + return true; +} + +static bool tointvector(PyObject* args, int index, std::vector<int>& vec) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq = PyTuple_GetItem(args, index); + return tointvector(seq, vec); +} + +static bool tointset(PyObject* args, int index, std::set<int>& set) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq = PyTuple_GetItem(args, index); + //TODO: Typecheck + int seq_len = PyList_Size(seq); + for(int i = 0; i < seq_len; i++) { + PyObject* item = PyList_GetItem(seq, i); + set.insert(PyInt_AsLong(item)); + } + return true; +} +static bool tointmatrix(PyObject* args, int index, std::vector<std::vector<int> >& mat) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq_one = PyTuple_GetItem(args, index); + int seq_one_len = PyList_Size(seq_one); + for(int i = 0; i < seq_one_len; i++) { + std::vector<int> vec; + PyObject* seq_two = PyList_GetItem(seq_one, i); + int seq_two_len = PyList_Size(seq_two); + for(int j = 0; j < seq_two_len; j++) { + PyObject* item = PyList_GetItem(seq_two, j); + vec.push_back(PyInt_AsLong(item)); + } + mat.push_back(vec); + } + return true; +} + +#ifdef CUDACHILL +// ------------------------------ // +// Cuda CHiLL interface functions // +// ------------------------------ // + +static PyObject * +chill_print_code(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC print_code() PY\n"); + + myloop->printCode(); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); + +} + +static PyObject * +chill_print_ri(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_print_ri() called from python\n"); + myloop->printRuntimeInfo(); + DEBUG_PRINT("\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_print_idx(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_print_idx() called from python\n"); + myloop->printIndexes(); + DEBUG_PRINT("\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_print_dep(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_print_dep()\n"); + std::cout << myloop->dep; + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_print_space(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_print_space()\n"); + for (int i = 0; i < myloop->stmt.size(); i++) { + DEBUG_PRINT("s%d: ", i+1); + Relation r; + if (!myloop->stmt[i].xform.is_null()) + r = Composition(copy(myloop->stmt[i].xform), copy(myloop->stmt[i].IS)); + else + r = copy(myloop->stmt[i].IS); + r.simplify(2, 4); + r.print(); + } + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_num_statements(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_num_statements() called from python\n"); + int num = myloop->stmt.size(); + //DEBUG_PRINT("C num_statement() = %d\n", num); + return Py_BuildValue( "i", num ); // BEWARE "d" is DOUBLE, not int +} + +static PyObject * +chill_does_var_exist( PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_does_var_exist()\n"); + int yesno = 0; + // TODO if (myloop->symbolExists(symName)) yesno = 1; + DEBUG_PRINT("*** chill_does_var_exist *** UNIMPLEMENTED\n"); + return Py_BuildValue( "i", yesno); // there seems to be no boolean type +} + + +static PyObject * +chill_add_sync(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_add_sync() *UNTESTED*\n"); + int sstmt = -123; + // char index_name[180]; + static char Buffer[1024]; + static char *index_name = &Buffer[0]; + + if (!PyArg_ParseTuple(args, "is", &sstmt, &index_name)){ + fprintf(stderr, "chill_add_sync, can't parse statement number and name passed from python\n"); + exit(-1); + } + + DEBUG_PRINT("chill_add_sync, statement %d index_name '%s'\n", + sstmt, index_name); + std::string idxName( index_name); // ?? + myloop->addSync(sstmt, idxName); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_rename_index(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_rename_index() called from python\n"); + int sstmt; + //char oldname[80], newname[80]; + static char old[1024], newn[1024]; + + static char *oldname = &old[0], *newname=&newn[0]; + + if (!PyArg_ParseTuple(args, "iss", &sstmt, &oldname, &newname)){ + fprintf(stderr, "chill_rename_index, can't parse statement number and names passed from python\n"); + exit(-1); + } + + //DEBUG_PRINT("chill_rename_index, statement %d oldname '%s' newname '%s'\n", + //sstmt, oldname, newname); + + std::string idxName(oldname); + std::string newName(newname); + + //DEBUG_PRINT("calling myloop->renameIndex( %d, %s, %s )\n", + //sstmt, idxName.c_str(), newName.c_str()); + + myloop->renameIndex(sstmt, idxName, newName); + + //DEBUG_PRINT("after myloop->renameIndex()\n"); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + + + +//THIS NEEDS TO MOVE + + + +static PyObject * +chill_permute_v2(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("C permute_v2()\n"); + //int tot = sizeof(args); + //int things = tot / sizeof(PyObject *); + //DEBUG_PRINT("tot %d bytes, %d things\n", tot, things); + + int sstmt = -123; + PyObject *pyObj; + + //if (!PyArg_ParseTuple( args, "iO", &sstmt, &pyObj)) { + //if (!PyArg_ParseTuple( args, "i", &sstmt)) { + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + Py_XINCREF(pyObj); + + // the ONLY arg is a tuple. figure out how big it is + int tupleSize = PyTuple_Size(pyObj); + //DEBUG_PRINT("%d things in order tuple\n", tupleSize); + + // first has to be the statement number + PyObject *tupleItem = PyTuple_GetItem(pyObj, 0); + Py_XINCREF(tupleItem); + if (PyInt_Check( tupleItem )) sstmt = PyInt_AsLong( tupleItem ); + else { + fflush(stdout); + fprintf(stderr, "first tuple item in chill_permute_v2 is not an int?\n"); + exit(-1); + } + + //DEBUG_PRINT("stmt %d\n", sstmt); + + char **strings; + std::vector<std::string> order; + std::string *cppstrptr; + std::string cppstr; + + strings = (char **) malloc( sizeof(char *) * tupleSize ) ; // too big + for (int i=1; i<tupleSize; i++) { + tupleItem = PyTuple_GetItem(pyObj, i); + Py_XINCREF(tupleItem); + int im1 = i-1; // offset needed for the actual string vector + if (PyString_Check( tupleItem)) { + strings[im1] = strdup(PyString_AsString(tupleItem)); + //DEBUG_PRINT("item %d = '%s'\n", i, strings[im1]); + //cppstrptr = new std::string( strings[im1] ); + //order.push_back( &(new std::string( strings[im1] ))); + //order.push_back( &cppstrptr ); + + cppstr = strings[im1]; + order.push_back( cppstr ); + } + else { + fprintf(stderr, "later parameter was not a string?\n"); + exit(-1); + } + + } + + myloop->permute_cuda(sstmt,order); + //DEBUG_PRINT("returned from permute_cuda()\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + + +static PyObject * +chill_tile_v2_3arg( PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("in chillmodule.cc, chill_tile_v2_3arg()\n"); + + int sstmt, level, tile_size, outer_level; + //char index_name[80], control_name[80]; + static char *index_name, *control_name; + int tiling_method; + + if (!PyArg_ParseTuple(args, "iii", &sstmt, &level, &outer_level)) { + fprintf(stderr,"chill_tile_v2, can't parse parameters passed from python\n"); + exit(-1); + } + + // 3 parameter version + //DEBUG_PRINT("chill_tile_v2( %d %d %d) (3 parameter version) \n", + //sstmt,level,outer_level); + myloop->tile_cuda(sstmt,level,outer_level); + //DEBUG_PRINT("chill_tile_v2 3 parameter version returning normally\n"); + Py_RETURN_NONE; +} + + +static PyObject * +chill_tile_v2_7arg( PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("in chillmodule.cc, chill_tile_v2_7arg()\n"); + + int sstmt, level, tile_size, outer_level; + //char index_name[80], control_name[80]; + static char iname[1024], cname[1024]; + static char *index_name = &iname[0], *control_name=&cname[0]; + int tiling_method; + + if (!PyArg_ParseTuple(args, "iiiissi", + &sstmt, &level, &tile_size, &outer_level, + &index_name, &control_name, &tiling_method)){ + fprintf(stderr, "chill_tile_v2_7arg, can't parse parameters passed from python\n"); + exit(-1); + } + + //DEBUG_PRINT("7 parameter version was called?\n"); + + // 7 parameter version was called + //DEBUG_PRINT("tile_v2( %d, %d, %d, %d ... )\n", + // sstmt, level, tile_size, outer_level); + + //DEBUG_PRINT("tile_v2( %d, %d, %d, %d, %s, %s, %d)\n", + //sstmt,level,tile_size,outer_level,index_name, control_name, tiling_method); + + TilingMethodType method = StridedTile; + if (tiling_method == 0) method = StridedTile; + else if (tiling_method == 1) method = CountedTile; + else fprintf(stderr, "ERROR: tile_v2 illegal tiling method, using StridedTile\n"); + + //DEBUG_PRINT("outer level %d\n", outer_level); + //DEBUG_PRINT("calling myloop->tile_cuda( %d, %d, %d, %d, %s, %s, method)\n", + // sstmt, level, tile_size, outer_level, index_name, control_name); + + // BUH level+1? + myloop->tile_cuda(sstmt, level, tile_size, outer_level, index_name, control_name, method); + Py_RETURN_NONE; +} + + +static PyObject * +chill_cur_indices(PyObject *self, PyObject *args) +{ + int stmt_num = -123; + if (!PyArg_ParseTuple(args, "i", &stmt_num)){ + fprintf(stderr, "chill_cur_indides, can't parse statement number passed from python\n"); + exit(-1); + } + //DEBUG_PRINT("cur_indices( %d )\n", stmt_num); + + char formatstring[1024]; + for (int i=0; i<1024; i++) formatstring[i] = '\0'; + + int num = myloop->idxNames[stmt_num].size(); + for(int i=0; i<num; i++){ + //DEBUG_PRINT("myloop->idxNames[%d] index %d = '%s'\n", + //stmt_num, i, myloop->idxNames[stmt_num][i].c_str()); + + // backwards, works because all entries are the same + //sprintf(formatstring, "i %s", formatstring); + strcat( formatstring, "s "); + // put this in a list or something to pass back to python + } + + int l = strlen(formatstring); + if (l > 0) formatstring[l-1] = '\0'; + + //DEBUG_PRINT("%d current indices, format string '%s'\n\n",num,formatstring); + //DEBUG_PRINT("%d current indices\n\n", num); + + //return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(),myloop->idxNames[stmt_num][1].c_str() ); + + // I don't know a clean way to do this. + if (num == 2) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str()); + if (num == 3) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str()); + if (num == 4) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str()); + if (num == 5) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str()); + if (num == 6) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str()); + if (num == 7) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str()); + if (num == 8) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str()); + if (num == 9) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str()); + if (num == 10) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str()); + if (num == 11) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str()); + if (num == 12) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str()); + if (num == 13) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str()); + if (num == 14) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str()); + if (num == 15) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str()); + if (num == 16) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str(), + myloop->idxNames[stmt_num][15].c_str()); + if (num == 17) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str(), + myloop->idxNames[stmt_num][15].c_str(), + myloop->idxNames[stmt_num][16].c_str()); + if (num == 18) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str(), + myloop->idxNames[stmt_num][15].c_str(), + myloop->idxNames[stmt_num][16].c_str(), + myloop->idxNames[stmt_num][17].c_str()); + + fprintf(stderr, "going to die horribly, num=%d\n", num); +} + + +static PyObject * +chill_block_indices(PyObject *self, PyObject *args) { + + // I'm unsure what the legal states are here + // is it always "bx", or ("bx" and "by") ? + int howmany = 0; + char *loopnames[2]; + if (myloop->cu_bx > 1) { + loopnames[howmany] = strdup("bx"); + howmany++; + } + if (myloop->cu_by > 1) { + loopnames[howmany] = strdup("by"); + howmany++; + } + + if (howmany == 0) return Py_BuildValue("()"); + if (howmany == 1) return Py_BuildValue("(s)", loopnames[0]); + if (howmany == 2) return Py_BuildValue("(ss)", loopnames[0], loopnames[1]); + fprintf(stderr, "chill_block_indices(), gonna die, howmany == %d", howmany); + exit(666); + + Py_RETURN_NONE; +} + +static PyObject * +chill_thread_indices(PyObject *self, PyObject *args) { + + // I'm unsure what the legal states are here + // is it always "tx", or ("tx" and "ty") or ("tx" and "ty" and "tz") ? + int howmany = 0; + char *loopnames[3]; + if (myloop->cu_tx > 1) { + loopnames[howmany++] = strdup("tx"); + } + if (myloop->cu_ty > 1) { + loopnames[howmany++] = strdup("ty"); + } + if (myloop->cu_tz > 1) { + loopnames[howmany++] = strdup("tz"); + } + + if (howmany == 0) return Py_BuildValue("()"); + if (howmany == 1) return Py_BuildValue("(s)", + loopnames[0]); + if (howmany == 2) return Py_BuildValue("(ss)", + loopnames[0], + loopnames[1]); + if (howmany == 3) return Py_BuildValue("(sss)", + loopnames[0], + loopnames[1], + loopnames[2]); + + fprintf(stderr, "chill_thread_indices(), gonna die, howmany == %d", howmany); + exit(999); +} + + + + + +static PyObject * +block_dims(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("block_dims() returning %d %d\n", myloop->cu_bx, myloop->cu_by); + Py_BuildValue( "i i", myloop->cu_bx, myloop->cu_by); +} + + +static PyObject * +thread_dims(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("thread_dims() returning %d %d %d\n", + //myloop->cu_tx, myloop->cu_ty, myloop->cu_tz); + + Py_BuildValue( "i i i", myloop->cu_tx, myloop->cu_ty, myloop->cu_tz); +} + + +static PyObject * +chill_hard_loop_bounds(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("hard_loop_bounds("); + int sstmt, level; // input parameters + int upper, lower; // output + + if (!PyArg_ParseTuple(args, "ii", &sstmt, &level)){ + fprintf(stderr, "hard_loop_bounds, "); + fprintf(stderr, "can't parse statement numbers passed from python\n"); + exit(-1); + } + //DEBUG_PRINT(" %d, %d )\n", sstmt, level); + + myloop->extractCudaUB(sstmt, level, upper, lower); + + //DEBUG_PRINT("lower %d upper %d\n", lower, upper); + + Py_BuildValue( "i i", lower, upper); +} + + +static PyObject * +chill_datacopy9(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\n\n\n***** datacopy_v2() 9ARGS\n"); + + int sstmt; + int level; + std::string cppstr; + std::string array_name; + std::vector<std::string> new_idxs; + bool allow_extra_read; + int fastest_changing_dimension; + int padding_stride; + int padding_alignment; + bool cuda_shared; + + PyObject *pyObj; + + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + Py_XINCREF( pyObj ); + + //if (PyList_Check(pyObj)) fprintf(stderr, "it's a list\n"); + //if (PyTuple_Check(pyObj)) fprintf(stderr, "it's a tuple\n"); + + + + // the ONLY arg is a tuple. figure out how big it is + int tupleSize = PyTuple_Size(pyObj); + //DEBUG_PRINT("%d things in object tuple\n", tupleSize); + + // first has to be the statement number + PyObject *tupleItem1 = PyTuple_GetItem(pyObj, 0); + Py_INCREF(tupleItem1); + if (PyInt_Check( tupleItem1)) sstmt = PyInt_AsLong( tupleItem1 ); + else { + fprintf(stderr, "second tuple item in chill_datacopy9 is not an int?\n"); + exit(-1); + } + //DEBUG_PRINT("stmt %d\n", sstmt); + + PyObject *tupleItem2 = PyTuple_GetItem(pyObj, 1); // second item is level + Py_INCREF(tupleItem2); + if (PyInt_Check( tupleItem2 )) level = PyInt_AsLong( tupleItem2); + else { + fprintf(stderr, "second tuple item in chill_datacopy9 is not an int?\n"); + exit(-1); + } + //DEBUG_PRINT("level %d\n", level ); + + // third item is array name + PyObject *tupleItem3 = PyTuple_GetItem(pyObj, 2); + Py_INCREF(tupleItem3); + array_name = strdup(PyString_AsString(tupleItem3)); + //DEBUG_PRINT("array name '%s'\n", array_name.c_str()); + + + // integer number of indices + PyObject *tupleItem4 = PyTuple_GetItem(pyObj, 3); + Py_INCREF(tupleItem4); + int numindex= PyInt_AsLong( tupleItem4 ); + //DEBUG_PRINT("%d indices\n", numindex); + + + PyObject *tupleItemTEMP; + for (int i=0; i<numindex; i++) { + tupleItemTEMP = PyTuple_GetItem(pyObj, 4+i); + Py_INCREF(tupleItemTEMP); + cppstr = strdup(PyString_AsString(tupleItemTEMP)); + new_idxs.push_back( cppstr ); + //DEBUG_PRINT("%s\n", cppstr.c_str()); + } + + PyObject *tupleItem5 = PyTuple_GetItem(pyObj, 4+numindex); + Py_INCREF(tupleItem5); + allow_extra_read = PyInt_AsLong( tupleItem5 ); + + PyObject *tupleItem6 = PyTuple_GetItem(pyObj, 5+numindex); + Py_INCREF(tupleItem6); + fastest_changing_dimension = PyInt_AsLong( tupleItem6 ); + + PyObject *tupleItem7 = PyTuple_GetItem(pyObj, 6+numindex); + Py_INCREF(tupleItem7); + padding_stride = PyInt_AsLong( tupleItem7 ); + + PyObject *tupleItem8 = PyTuple_GetItem(pyObj, 7+numindex); + Py_INCREF(tupleItem8); + padding_alignment = PyInt_AsLong( tupleItem8 ); + + PyObject *tupleItem9 = PyTuple_GetItem(pyObj, 8+numindex); + Py_INCREF(tupleItem9); + cuda_shared = PyInt_AsLong( tupleItem9 ); + + + //DEBUG_PRINT("calling myloop->datacopy_cuda()\n"); + + // corruption happenes in here??? + myloop->datacopy_cuda(sstmt, level, array_name, new_idxs, + allow_extra_read, fastest_changing_dimension, + padding_stride, padding_alignment, cuda_shared); + + DEBUG_PRINT("before attempt (after actual datacopy)\n"); + //myloop->printCode(); // attempt to debug + DEBUG_PRINT("back from attempt\n"); + + //DEBUG_PRINT("datacopy_9args returning\n"); + + Py_RETURN_NONE; +} + + + + + +static PyObject * +chill_datacopy_privatized(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("C datacopy_privatized\n"); + PyObject *pyObj; + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + + PyObject *tupleItem = PyTuple_GetItem(pyObj, 0); // statement number + Py_XINCREF(tupleItem); + int sstmt = PyInt_AsLong( tupleItem ); + + tupleItem = PyTuple_GetItem(pyObj, 1); // start_loop + Py_XINCREF(tupleItem); + std::string start_loop = strdup(PyString_AsString(tupleItem)); + int level = myloop->findCurLevel(sstmt, start_loop); + + + tupleItem = PyTuple_GetItem(pyObj, 2); // array_name + Py_XINCREF(tupleItem); + std::string array_name = strdup(PyString_AsString(tupleItem)); + + // things to hold constant - first a count, then the things + tupleItem = PyTuple_GetItem(pyObj, 3); // how many things in the array + Py_XINCREF(tupleItem); + int howmany = PyInt_AsLong( tupleItem ); + + //DEBUG_PRINT("%d things to hold constant: ", howmany); + std::vector<std::string> holdconstant; + std::string cppstr; + + for (int i=0; i<howmany; i++) { + tupleItem = PyTuple_GetItem(pyObj, 4+i); + Py_XINCREF(tupleItem); + cppstr = strdup(PyString_AsString(tupleItem)); + holdconstant.push_back( cppstr ); // add at end + } + + std::vector<int> privatized_levels(howmany); + for(int i=0; i<howmany; i++) { + privatized_levels[i] = myloop->findCurLevel(sstmt, holdconstant[i]); + //DEBUG_PRINT("privatized_levels[ %d ] = %d\n", i, privatized_levels[i] ); + } + + bool allow_extra_read = false; + int fastest_changing_dimension = -1; + int padding_stride = 1; + int padding_alignment = 1; + bool cuda_shared = false; + + + myloop->datacopy_privatized_cuda(sstmt, level, array_name, privatized_levels, + allow_extra_read, fastest_changing_dimension, + padding_stride, padding_alignment, + cuda_shared); + + + Py_RETURN_NONE; +} + + + + + + +static PyObject * +chill_unroll(PyObject *self, PyObject *args) +{ + int sstmt, level, unroll_amount; + + if (!PyArg_ParseTuple(args, "iii", &sstmt, &level, &unroll_amount)) { + fprintf(stderr, "chill_unroll, can't parse parameters passed from python\n"); + exit(-1); + } + + //DEBUG_PRINT("chill_unroll( %d, %d, %d)\n", sstmt, level, unroll_amount ); + bool does_expand = myloop->unroll_cuda(sstmt,level,unroll_amount); + + // TODO return the boolean? + Py_RETURN_NONE; +} + + + + +static PyObject * +chill_cudaize_v2(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("cudaize_v2\n"); + PyObject *pyObj; + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + + // the ONLY arg is a tuple. figure out how big it is + int tupleSize = PyTuple_Size(pyObj); + //DEBUG_PRINT("%d things in tuple\n", tupleSize); + + PyObject *tupleItem = PyTuple_GetItem(pyObj, 0); //the kernel name + Py_XINCREF(tupleItem); + std::string kernel_name = strdup(PyString_AsString(tupleItem)); + + std::map<std::string, int> array_sizes; + tupleItem = PyTuple_GetItem(pyObj, 1); // number of array sizes + Py_XINCREF(tupleItem); + int numarraysizes = PyInt_AsLong( tupleItem ); + + std::string cppstr; + int offset = 2; + for (int i=0; i<numarraysizes; i++) { + tupleItem = PyTuple_GetItem(pyObj, offset++); + Py_XINCREF(tupleItem); + cppstr = strdup(PyString_AsString(tupleItem)); + tupleItem = PyTuple_GetItem(pyObj, offset++); // integer size + int siz = PyInt_AsLong( tupleItem ); + + //DEBUG_PRINT("arraysize for %s = %d\n", cppstr.c_str(), siz); + array_sizes.insert( std::make_pair( cppstr, siz )); + } + + + std::vector<std::string> blockIdxs; + tupleItem = PyTuple_GetItem(pyObj, offset++); // integer number of blocks + Py_XINCREF(tupleItem); + int numblocks = PyInt_AsLong( tupleItem ); + //DEBUG_PRINT("%d blocks\n", numblocks); + for (int i=0; i<numblocks; i++) { + tupleItem = PyTuple_GetItem(pyObj, offset++); + cppstr = strdup(PyString_AsString(tupleItem)); + blockIdxs.push_back( cppstr ); + //DEBUG_PRINT("%s\n", cppstr.c_str()); + } + + std::vector<std::string> threadIdxs; + tupleItem = PyTuple_GetItem(pyObj, offset++); // integer number of threads + Py_XINCREF(tupleItem); + int numthreads= PyInt_AsLong( tupleItem ); + //DEBUG_PRINT("%d threads\n", numthreads); + for (int i=0; i<numthreads; i++) { + tupleItem = PyTuple_GetItem(pyObj, offset++); + Py_XINCREF(tupleItem); + cppstr = strdup(PyString_AsString(tupleItem)); + threadIdxs.push_back( cppstr ); + //DEBUG_PRINT("%s\n", cppstr.c_str()); + } + + + myloop->cudaize_v2(kernel_name, array_sizes, blockIdxs, threadIdxs); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + + + +static PyObject *get_loop_num() { + // TODO get_loop_num() it's a global value? + fprintf(stderr, "get_loop_num() UNIMPLEMENTED\n"); + exit(-1); +} + + + + +static PyObject * +chill_copy_to_texture(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("C copy_to_texture() called from python \n"); + const char *array_name; + if (!PyArg_ParseTuple(args, "s", &array_name)){ + fprintf(stderr, "chill_copy_to_texture can't parse array name\n"); + exit(-1); + } + //DEBUG_PRINT("array name = %s\n", array_name); + myloop->copy_to_texture(array_name); + + Py_RETURN_NONE; +} + + + + + + + +static PyObject * +chill_init(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("C chill_init() called from python as read_IR()\n"); + DEBUG_PRINT("C init( "); + const char *filename; + const char *procname; + if (!PyArg_ParseTuple(args, "ss", &filename, &procname)){ + fprintf(stderr, "umwut? can't parse file name and procedure name?\n"); + exit(-1); + } + + int loop_num = 0; + + DEBUG_PRINT("%s, 0, 0 )\n", filename); + + DEBUG_PRINT("GETTING IR CODE in chill_init() in chillmodule.cc\n"); + DEBUG_PRINT("ir_code = new IR_cudaroseCode(%s, %s);\n",filename, procname); + ir_code = new IR_cudaroseCode(filename, procname); //this produces 15000 lines of output + fflush(stdout); + + + + + //protonu--here goes my initializations + //A lot of this code was lifted from Chun's parser.yy + //the plan is now to create the LoopCuda object directly + IR_Block *block = ir_code->GetCode(); + DEBUG_PRINT("ir_code->FindOneLevelControlStructure(block); chillmodule.cc\n"); + ir_controls = ir_code->FindOneLevelControlStructure(block); + + int loop_count = 0; + for (int i = 0; i < ir_controls.size(); i++) { + if (ir_controls[i]->type() == IR_CONTROL_LOOP) { + loops.push_back(i); + loop_count++; + } + } + delete block; + + + std::vector<IR_Control *> parm; + for(int j = 0; j < loop_count; j++) + parm.push_back(ir_controls[loops[j]]); + + + DEBUG_PRINT("block = ir_code->MergeNeighboringControlStructures(parm);\n"); + block = ir_code->MergeNeighboringControlStructures(parm); + + //DEBUG_PRINT("myloop = new LoopCuda(block, loop_num); in chillmodule.cc\n"); + myloop = new LoopCuda(block, loop_num); + fflush(stdout); DEBUG_PRINT("back\n"); + delete block; + + //end-protonu + + fflush(stdout); + DEBUG_PRINT("myloop->original();\n"); + myloop->original(); + fflush(stdout); + DEBUG_PRINT("myloop->useIdxNames=true;\n"); + myloop->useIdxNames=true;//Use idxName in code_gen + //register_v2(L); + + fflush(stdout); + DEBUG_PRINT("chill_init DONE\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); + +} + +#else +// ------------------------- // +// CHiLL interface functions // +// ------------------------- // + +static PyObject* chill_source(PyObject* self, PyObject* args) { + strict_arg_num(args, 1, "source"); + source_filename = strArg(args, 0); + Py_RETURN_NONE; +} + +static PyObject* chill_procedure(PyObject* self, PyObject* args) { + if(!procedure_name.empty()) { + fprintf(stderr, "only one procedure can be handled in a script"); + if(!is_interactive) + exit(2); + } + procedure_name = strArg(args, 0); + Py_RETURN_NONE; +} + +static PyObject* chill_loop(PyObject* self, PyObject* args) { + // loop (n) + // loop (n:m) + + int nargs = PyTuple_Size(args); + int start_num; + int end_num; + if(nargs == 1) { + start_num = intArg(args, 0); + end_num = start_num; + } + else if(nargs == 2) { + start_num = intArg(args, 0); + end_num = intArg(args, 1); + } + else { + fprintf(stderr, "loop takes one or two arguments"); + if(!is_interactive) + exit(2); + } + set_loop_num_start(start_num); + set_loop_num_end(end_num); + init_loop(start_num, end_num); + Py_RETURN_NONE; +} + +static PyObject* chill_print_code(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "print_code"); + myloop->printCode(); + printf("\n"); + Py_RETURN_NONE; +} + +static PyObject* chill_print_dep(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "print_dep"); + myloop->printDependenceGraph(); + Py_RETURN_NONE; +} + +static PyObject* chill_print_space(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "print_space"); + myloop->printIterationSpace(); + Py_RETURN_NONE; +} + +static PyObject* chill_exit(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "exit"); + repl_stop = true; + Py_RETURN_NONE; +} + +static void add_known(std::string cond_expr) { + int num_dim = myloop->known.n_set(); + std::vector<std::map<std::string, int> >* cond; + cond = parse_relation_vector(cond_expr.c_str()); + + Relation rel(num_dim); + F_And *f_root = rel.add_and(); + for (int j = 0; j < cond->size(); j++) { + GEQ_Handle h = f_root->add_GEQ(); + for (std::map<std::string, int>::iterator it = (*cond)[j].begin(); it != (*cond)[j].end(); it++) { + try { + int dim = from_string<int>(it->first); + if (dim == 0) + h.update_const(it->second); + else + throw std::invalid_argument("only symbolic variables are allowed in known condition"); + } + catch (std::ios::failure e) { + Free_Var_Decl *g = NULL; + for (unsigned i = 0; i < myloop->freevar.size(); i++) { + std::string name = myloop->freevar[i]->base_name(); + if (name == it->first) { + g = myloop->freevar[i]; + break; + } + } + if (g == NULL) + throw std::invalid_argument("symbolic variable " + it->first + " not found"); + else + h.update_coef(rel.get_local(g), it->second); + } + } + } + myloop->addKnown(rel); +} + +static PyObject* chill_known(PyObject* self, PyObject* args) { + strict_arg_num(args, 1, "known"); + if (PyList_Check(PyTuple_GetItem(args, 0))) { + PyObject* list = PyTuple_GetItem(args, 0); + for (int i = 0; i < PyList_Size(list); i++) { + add_known(std::string(PyString_AsString(PyList_GetItem(list, i)))); + } + } + else { + add_known(strArg(args, 0)); + } + Py_RETURN_NONE; +} + +static PyObject* chill_remove_dep(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "remove_dep"); + int from = intArg(args, 0); + int to = intArg(args, 1); + myloop->removeDependence(from, to); + Py_RETURN_NONE; +} + +static PyObject* chill_original(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "original"); + myloop->original(); + Py_RETURN_NONE; +} + +static PyObject* chill_permute(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 1, 3, "permute"); + if((nargs < 1) || (nargs > 3)) + throw std::runtime_error("incorrect number of arguments in permute"); + if(nargs == 1) { + // premute ( vector ) + std::vector<int> pi; + if(!tointvector(args, 0, pi)) + throw std::runtime_error("first arg in permute(pi) must be an int vector"); + myloop->permute(pi); + } + else if (nargs == 2) { + // permute ( set, vector ) + std::set<int> active; + std::vector<int> pi; + if(!tointset(args, 0, active)) + throw std::runtime_error("the first argument in permute(active, pi) must be an int set"); + if(!tointvector(args, 1, pi)) + throw std::runtime_error("the second argument in permute(active, pi) must be an int vector"); + myloop->permute(active, pi); + } + else if (nargs == 3) { + int stmt_num = intArg(args, 1); + int level = intArg(args, 2); + std::vector<int> pi; + if(!tointvector(args, 3, pi)) + throw std::runtime_error("the third argument in permute(stmt_num, level, pi) must be an int vector"); + myloop->permute(stmt_num, level, pi); + } + Py_RETURN_NONE; +} + +static PyObject* chill_pragma(PyObject* self, PyObject* args) { + strict_arg_num(args, 3, "pragma"); + int stmt_num = intArg(args, 1); + int level = intArg(args, 1); + std::string pragmaText = strArg(args, 2); + myloop->pragma(stmt_num, level, pragmaText); + Py_RETURN_NONE; +} + +static PyObject* chill_prefetch(PyObject* self, PyObject* args) { + strict_arg_num(args, 3, "prefetch"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + std::string prefetchText = strArg(args, 2); + int hint = intArg(args, 3); + myloop->prefetch(stmt_num, level, prefetchText, hint); + Py_RETURN_NONE; +} + +static PyObject* chill_tile(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 3, 7, "tile"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int tile_size = intArg(args, 2); + if(nargs == 3) { + myloop->tile(stmt_num, level, tile_size); + } + else if(nargs >= 4) { + int outer_level = intArg(args, 3); + if(nargs >= 5) { + TilingMethodType method = StridedTile; + int imethod = intArg(args, 4, 2); //< don't know if a default value is needed + // check method input against expected values + if (imethod == 0) + method = StridedTile; + else if (imethod == 1) + method = CountedTile; + else + throw std::runtime_error("5th argument must be either strided or counted"); + if(nargs >= 6) { + int alignment_offset = intArg(args, 5); + if(nargs == 7) { + int alignment_multiple = intArg(args, 6, 1); + myloop->tile(stmt_num, level, tile_size, outer_level, method, alignment_offset, alignment_multiple); + } + if(nargs == 6) + myloop->tile(stmt_num, level, tile_size, outer_level, method, alignment_offset); + } + if(nargs == 5) + myloop->tile(stmt_num, level, tile_size, outer_level, method); + } + if(nargs == 4) + myloop->tile(stmt_num, level, tile_size, outer_level); + } + Py_RETURN_NONE; +} + +static void chill_datacopy_vec(PyObject* args) { + // Overload 1: bool datacopy( + // const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, + // int level, + // bool allow_extra_read = false, + // int fastest_changing_dimension = -1, + // int padding_stride = 1, + // int padding_alignment = 4, + // int memory_type = 0); + std::vector<std::pair<int, std::vector<int> > > array_ref_nums; + // expect list(tuple(int,list(int))) + // or dict(int,list(int)) + if(PyList_CheckExact(PyTuple_GetItem(args, 0))) { + PyObject* list = PyTuple_GetItem(args, 0); + for(int i = 0; i < PyList_Size(list); i ++) { + PyObject* tup = PyList_GetItem(list, i); + int index = PyLong_AsLong(PyTuple_GetItem(tup, 0)); + std::vector<int> vec; + tointvector(PyTuple_GetItem(tup, 1), vec); + array_ref_nums.push_back(std::pair<int, std::vector<int> >(index, vec)); + } + } + else if(PyList_CheckExact(PyTuple_GetItem(args, 0))) { + PyObject* dict = PyTuple_GetItem(args, 0); + PyObject* klist = PyDict_Keys(dict); + for(int ki = 0; ki < PyList_Size(klist); ki++) { + PyObject* index = PyList_GetItem(klist, ki); + std::vector<int> vec; + tointvector(PyDict_GetItem(dict,index), vec); + array_ref_nums.push_back(std::pair<int, std::vector<int> >(PyLong_AsLong(index), vec)); + } + Py_DECREF(klist); + } + else { + //TODO: this should never happen + } + int level = intArg(args, 1); + bool allow_extra_read = boolArg(args, 2, false); + int fastest_changing_dimension = intArg(args, 3, -1); + int padding_stride = intArg(args, 4, 1); + int padding_alignment = intArg(args, 5, 4); + int memory_type = intArg(args, 6, 0); + myloop->datacopy(array_ref_nums, level, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +static void chill_datacopy_int(PyObject* args) { + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + std::string array_name = strArg(args,2,0); + bool allow_extra_read = boolArg(args,3,false); + int fastest_changing_dimension = intArg(args, 4, -1); + int padding_stride = intArg(args, 5, 1); + int padding_alignment = intArg(args, 6, 4); + int memory_type = intArg(args, 7, 0); + myloop->datacopy(stmt_num, level, array_name, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +static PyObject* chill_datacopy(PyObject* self, PyObject* args) { + // Overload 2: bool datacopy(int stmt_num, int level, const std::string &array_name, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); + int nargs = strict_arg_range(args, 3, 7, "datacopy"); + if(PyList_CheckExact(PyTuple_GetItem(args,0)) || PyDict_CheckExact(PyTuple_GetItem(args, 0))) { + chill_datacopy_vec(args); + } + else { + chill_datacopy_int(args); + } + Py_RETURN_NONE; +} + +static PyObject* chill_datacopy_privatized(PyObject* self, PyObject* args) { + // bool datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, int memory_type = 0); + int nargs = strict_arg_range(args, 4, 9, "datacopy_privatized"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + std::string array_name = strArg(args, 2); + std::vector<int> privatized_levels; + tointvector(args, 3, privatized_levels); + bool allow_extra_read = boolArg(args, 4, false); + int fastest_changing_dimension = intArg(args, 5, -1); + int padding_stride = intArg(args, 6, 1); + int padding_alignment = intArg(args, 7, 1); + int memory_type = intArg(args, 8); + myloop->datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + Py_RETURN_NONE; +} + +static PyObject* chill_unroll(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 3, 4, "unroll"); + //std::set<int> unroll(int stmt_num, int level, int unroll_amount, std::vector< std::vector<std::string> >idxNames= std::vector< std::vector<std::string> >(), int cleanup_split_level = 0); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int unroll_amount = intArg(args, 2); + std::vector< std::vector<std::string> > idxNames = std::vector< std::vector<std::string> >(); + int cleanup_split_level = intArg(args, 3); + myloop->unroll(stmt_num, level, unroll_amount, idxNames, cleanup_split_level); + Py_RETURN_NONE; +} + +static PyObject* chill_unroll_extra(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 3, 4, "unroll_extra"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int unroll_amount = intArg(args, 2); + int cleanup_split_level = intArg(args, 3, 0); + myloop->unroll_extra(stmt_num, level, unroll_amount, cleanup_split_level); + Py_RETURN_NONE; +} + +static PyObject* chill_split(PyObject* self, PyObject* args) { + strict_arg_num(args, 3, "split"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int num_dim = myloop->stmt[stmt_num].xform.n_out(); + + std::vector<std::map<std::string, int> >* cond; + std::string cond_expr = strArg(args, 2); + cond = parse_relation_vector(cond_expr.c_str()); + + Relation rel((num_dim-1)/2); + F_And *f_root = rel.add_and(); + for (int j = 0; j < cond->size(); j++) { + GEQ_Handle h = f_root->add_GEQ(); + for (std::map<std::string, int>::iterator it = (*cond)[j].begin(); it != (*cond)[j].end(); it++) { + try { + int dim = from_string<int>(it->first); + if (dim == 0) + h.update_const(it->second); + else { + if (dim > (num_dim-1)/2) + throw std::invalid_argument("invalid loop level " + to_string(dim) + " in split condition"); + h.update_coef(rel.set_var(dim), it->second); + } + } + catch (std::ios::failure e) { + Free_Var_Decl *g = NULL; + for (unsigned i = 0; i < myloop->freevar.size(); i++) { + std::string name = myloop->freevar[i]->base_name(); + if (name == it->first) { + g = myloop->freevar[i]; + break; + } + } + if (g == NULL) + throw std::invalid_argument("unrecognized variable " + to_string(it->first.c_str())); + h.update_coef(rel.get_local(g), it->second); + } + } + } + myloop->split(stmt_num,level,rel); + Py_RETURN_NONE; +} + +static PyObject* chill_nonsingular(PyObject* self, PyObject* args) { + std::vector< std::vector<int> > mat; + tointmatrix(args, 0, mat); + myloop->nonsingular(mat); + Py_RETURN_NONE; +} + +static PyObject* chill_skew(PyObject* self, PyObject* args) { + std::set<int> stmt_nums; + std::vector<int> skew_amounts; + int level = intArg(args, 1); + tointset(args, 0, stmt_nums); + tointvector(args, 2, skew_amounts); + myloop->skew(stmt_nums, level, skew_amounts); + Py_RETURN_NONE; +} + +static PyObject* chill_scale(PyObject* self, PyObject* args) { + strict_arg_num(args, 3); + std::set<int> stmt_nums; + int level = intArg(args, 1); + int scale_amount = intArg(args, 2); + tointset(args, 0, stmt_nums); + myloop->scale(stmt_nums, level, scale_amount); + Py_RETURN_NONE; +} + +static PyObject* chill_reverse(PyObject* self, PyObject* args) { + strict_arg_num(args, 2); + std::set<int> stmt_nums; + int level = intArg(args, 1); + tointset(args, 0, stmt_nums); + myloop->reverse(stmt_nums, level); + Py_RETURN_NONE; +} + +static PyObject* chill_shift(PyObject* self, PyObject* args) { + strict_arg_num(args, 3); + std::set<int> stmt_nums; + int level = intArg(args, 1); + int shift_amount = intArg(args, 2); + tointset(args, 0, stmt_nums); + myloop->shift(stmt_nums, level, shift_amount); + Py_RETURN_NONE; +} + +static PyObject* chill_shift_to(PyObject* self, PyObject* args) { + strict_arg_num(args, 3); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int absolute_pos = intArg(args, 2); + myloop->shift_to(stmt_num, level, absolute_pos); + Py_RETURN_NONE; +} + +static PyObject* chill_peel(PyObject* self, PyObject* args) { + strict_arg_range(args, 2, 3); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int amount = intArg(args, 2); + myloop->peel(stmt_num, level, amount); + Py_RETURN_NONE; +} + +static PyObject* chill_fuse(PyObject* self, PyObject* args) { + strict_arg_num(args, 2); + std::set<int> stmt_nums; + int level = intArg(args, 1); + tointset(args, 0, stmt_nums); + myloop->fuse(stmt_nums, level); + Py_RETURN_NONE; +} + +static PyObject* chill_distribute(PyObject* self, PyObject* args) { + strict_arg_num(args, 2); + std::set<int> stmts; + int level = intArg(args, 1); + tointset(args, 0, stmts); + myloop->distribute(stmts, level); + Py_RETURN_NONE; +} + +static PyObject * +chill_num_statements(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_num_statements() called from python\n"); + int num = myloop->stmt.size(); + //DEBUG_PRINT("C num_statement() = %d\n", num); + return Py_BuildValue( "i", num ); // BEWARE "d" is DOUBLE, not int +} +#endif + +#ifdef CUDACHILL +static PyMethodDef ChillMethods[] = { + + // python name C routine parameter passing comment + {"print_code", chill_print_code, METH_VARARGS, "print the code at this point"}, + {"print_ri", chill_print_ri , METH_VARARGS, "print Runtime Info "}, + {"print_idx", chill_print_idx , METH_VARARGS, "print indices "}, + {"print_dep", chill_print_dep , METH_VARARGS, "print dep, dependecies?"}, + {"print_space", chill_print_space, METH_VARARGS, "print something or other "}, + {"add_sync", chill_add_sync, METH_VARARGS, "add sync, whatever that is"}, + {"rename_index", chill_rename_index, METH_VARARGS, "rename a loop index"}, + {"permute", chill_permute, METH_VARARGS, "change the order of loops?"}, + {"tile3", chill_tile_v2_3arg, METH_VARARGS, "something to do with tile"}, + {"tile7", chill_tile_v2_7arg, METH_VARARGS, "something to do with tile"}, + {"thread_dims", thread_dims, METH_VARARGS, "tx, ty, tz "}, + {"block_dims", block_dims, METH_VARARGS, "bx, by"}, + {"thread_indices", chill_thread_indices, METH_VARARGS, "bx, by"}, + {"block_indices", chill_block_indices, METH_VARARGS, "bx, by"}, + {"hard_loop_bounds", chill_hard_loop_bounds, METH_VARARGS, "lower, upper"}, + {"unroll", chill_unroll, METH_VARARGS, "unroll a loop"}, + {"cudaize", chill_cudaize_v2, METH_VARARGS, "dunno"}, + {"datacopy_privatized", chill_datacopy_privatized, METH_VARARGS, "dunno"}, + + {"datacopy_9arg", chill_datacopy9, METH_VARARGS, "datacopy with 9 arguments"}, + {"copy_to_texture", chill_copy_to_texture, METH_VARARGS, "copy to texture mem"}, + {"read_IR", chill_init, METH_VARARGS, "read an Intermediate Representation file"}, + {"cur_indices", chill_cur_indices, METH_VARARGS, "currently active indices"}, + {"num_statements", chill_num_statements, METH_VARARGS, "number of statements in ... something"}, + {NULL, NULL, 0, NULL} /* Sentinel */ + + //{"copy_to_constant", chill_copy_to_constant, METH_VARARGS, "copy to constant mem"}, + +}; +#else +static PyMethodDef ChillMethods[] = { + + //python name C routine parameter passing comment + {"source", chill_source, METH_VARARGS, "set source file for chill script"}, + {"procedure", chill_procedure, METH_VARARGS, "set the name of the procedure"}, + {"loop", chill_loop, METH_VARARGS, "indicate which loop to optimize"}, + {"print_code", chill_print_code, METH_VARARGS, "print generated code"}, + {"print_dep", chill_print_dep, METH_VARARGS, "print the dependencies graph"}, + {"print_space", chill_print_space, METH_VARARGS, "print space"}, + {"exit", chill_exit, METH_VARARGS, "exit the interactive consule"}, + {"known", chill_known, METH_VARARGS, "knwon"}, + {"remove_dep", chill_remove_dep, METH_VARARGS, "remove dependency i suppose"}, + {"original", chill_original, METH_VARARGS, "original"}, + {"permute", chill_permute, METH_VARARGS, "permute"}, + {"pragma", chill_pragma, METH_VARARGS, "pragma"}, + {"prefetch", chill_prefetch, METH_VARARGS, "prefetch"}, + {"tile", chill_tile, METH_VARARGS, "tile"}, + {"datacopy", chill_datacopy, METH_VARARGS, "datacopy"}, + {"datacopy_privitized", chill_datacopy_privatized, METH_VARARGS, "datacopy_privatized"}, + {"unroll", chill_unroll, METH_VARARGS, "unroll"}, + {"unroll_extra", chill_unroll_extra, METH_VARARGS, "unroll_extra"}, + {"split", chill_split, METH_VARARGS, "split"}, + {"nonsingular", chill_nonsingular, METH_VARARGS, "nonsingular"}, + {"skew", chill_skew, METH_VARARGS, "skew"}, + {"scale", chill_scale, METH_VARARGS, "scale"}, + {"reverse", chill_reverse, METH_VARARGS, "reverse"}, + {"shift", chill_shift, METH_VARARGS, "shift"}, + {"shift_to", chill_shift_to, METH_VARARGS, "shift_to"}, + {"peel", chill_peel, METH_VARARGS, "peel"}, + {"fuse", chill_fuse, METH_VARARGS, "fuse"}, + {"distribute", chill_distribute, METH_VARARGS, "distribute"}, + {"num_statements", chill_num_statements, METH_VARARGS, "number of statements in the current loop"}, + {NULL, NULL, 0, NULL} +}; +#endif + +static void register_globals(PyObject* m) { + // Preset globals + PyModule_AddStringConstant(m, "VERSION", CHILL_BUILD_VERSION); + PyModule_AddStringConstant(m, "dest", "C"); + PyModule_AddStringConstant(m, "C", "C"); + // Tile method + PyModule_AddIntConstant(m, "strided", 0); + PyModule_AddIntConstant(m, "counted", 1); + // Memory mode + PyModule_AddIntConstant(m, "global", 0); + PyModule_AddIntConstant(m, "shared", 1); + PyModule_AddIntConstant(m, "textured", 2); + // Bool flags + PyModule_AddIntConstant(m, "sync", 1); +} + +PyMODINIT_FUNC +initchill(void) // pass C methods to python +{ + DEBUG_PRINT("in C, initchill() to set up C methods to be called from python\n"); + PyObject* m = Py_InitModule("chill", ChillMethods); + register_globals(m); +} |