summaryrefslogtreecommitdiff
path: root/loop_cuda_rose.cc
diff options
context:
space:
mode:
Diffstat (limited to 'loop_cuda_rose.cc')
-rw-r--r--loop_cuda_rose.cc3734
1 files changed, 3734 insertions, 0 deletions
diff --git a/loop_cuda_rose.cc b/loop_cuda_rose.cc
new file mode 100644
index 0000000..c5633ee
--- /dev/null
+++ b/loop_cuda_rose.cc
@@ -0,0 +1,3734 @@
+/*****************************************************************************
+ Copyright (C) 2009 University of Utah
+ All Rights Reserved.
+
+ Purpose:
+ Cudaize methods
+
+ Notes:
+
+ History:
+ 1/7/10 Created by Gabe Rudy by migrating code from loop.cc
+ 31/1/11 Modified by Protonu Basu
+*****************************************************************************/
+#define TRANSFORMATION_FILE_INFO Sg_File_Info::generateDefaultFileInfoForTransformationNode()
+#include <code_gen/CG_stringBuilder.h>
+#include <codegen.h>
+#include <code_gen/CG_utils.h>
+#include <code_gen/CG_outputRepr.h>
+#include "loop_cuda_rose.hh"
+#include "loop.hh"
+#include <math.h>
+//#include <useful.h>
+#include "omegatools.hh"
+#include "ir_cudarose.hh"
+#include "ir_rose.hh"
+#include "ir_rose_utils.hh"
+#include "chill_error.hh"
+#include <vector>
+#include "Outliner.hh"
+//#define DEBUG
+using namespace omega;
+using namespace SageBuilder;
+using namespace SageInterface;
+//using namespace Outliner;
+//using namespace ASTtools;
+char *k_cuda_texture_memory; //protonu--added to track texture memory type
+//extern char *omega::k_cuda_texture_memory; //protonu--added to track texture memory type
+extern char *omega::k_ocg_comment;
+
+static int cudaDebug;
+class CudaStaticInit {
+public:
+ CudaStaticInit() {
+ cudaDebug = 0; //Change this to 1 for debug
+ }
+};
+static CudaStaticInit junkInitInstance__;
+
+std::string& upcase(std::string& s) {
+ for (int i = 0; i < s.size(); i++)
+ s[i] = toupper(s[i]);
+ return s;
+}
+
+void printVs(const std::vector<std::string>& curOrder) {
+ if (!cudaDebug) return;
+ for (int i = 0; i < curOrder.size(); i++) {
+ if (i > 0)
+ printf(",");
+ printf("%s", curOrder[i].c_str());
+ }
+ printf("\n");
+}
+
+void printVS(const std::vector<std::string>& curOrder) {
+ if(!cudaDebug) return;
+ for (int i = 0; i < curOrder.size(); i++) {
+ if (i > 0)
+ printf(",");
+ printf("%s", curOrder[i].c_str());
+ }
+ printf("\n");
+}
+
+LoopCuda::~LoopCuda() {
+ const int m = stmt.size();
+ for (int i = 0; i < m; i++)
+ stmt[i].code->clear();
+}
+
+bool LoopCuda::symbolExists(std::string s) {
+
+ if (body_symtab->find_variable(SgName(s.c_str()))
+ || parameter_symtab->find_variable(SgName(s.c_str())))
+ return true;
+ if (globals->lookup_variable_symbol(SgName(s.c_str())))
+ return true;
+ for (int i = 0; i < idxNames.size(); i++)
+ for (int j = 0; j < idxNames[i].size(); j++)
+ if (strcmp(idxNames[i][j].c_str(), s.c_str()) == 0)
+ return true;
+ return false;
+}
+
+void LoopCuda::addSync(int stmt_num, std::string idxName) {
+ //we store these and code-gen inserts sync to omega comments where stmt
+ //in loop that has idxName being generated
+ syncs.push_back(make_pair(stmt_num, idxName));
+}
+
+void LoopCuda::renameIndex(int stmt_num, std::string idx, std::string newName) {
+ int level = findCurLevel(stmt_num, idx);
+ if (idxNames.size() <= stmt_num || idxNames[stmt_num].size() < level)
+ throw std::runtime_error("Invalid statment number of index");
+ idxNames[stmt_num][level - 1] = newName.c_str();
+}
+
+enum Type {
+ Int
+};
+
+SgNode* wrapInIfFromMinBound(SgNode* then_part, SgForStatement* loop,
+ SgScopeStatement* symtab, SgVariableSymbol* bound_sym) {
+ // CG_roseBuilder *ocg = new CG_roseBuilder(
+
+ SgBinaryOp* test_expr = isSgBinaryOp(loop->get_test_expr());
+ SgExpression* upperBound;
+ SgExpression* conditional;
+ upperBound = test_expr->get_rhs_operand();
+ CG_outputRepr *ifstmt;
+
+ SgCallExpression *call;
+ if (call = isSgCallExpression(upperBound))
+ if (isSgVarRefExp(call->get_function())->get_symbol()->get_name().getString()
+ == "__rose_lt") {
+ SgExprListExp* arg_list = call->get_args();
+ SgExpression *if_bound = *(arg_list->get_expressions().begin());
+ /*This relies on the minimum expression being the rhs operand of
+ * the min instruction.
+ */
+ SgIfStmt *ifstmt = buildIfStmt(
+ buildLessOrEqualOp(buildVarRefExp(bound_sym), if_bound),
+ isSgStatement(then_part), NULL);
+ return isSgNode(ifstmt);
+
+ }
+
+/* if (isSgConditionalExp(upperBound)) {
+ conditional = isSgConditionalExp(upperBound)->get_conditional_exp();
+
+ if (isSgBinaryOp(conditional)) {
+ SgBinaryOp* binop = isSgBinaryOp(conditional);
+
+ if (isSgLessThanOp(binop) || isSgLessOrEqualOp(binop)) {
+ SgIfStmt *ifstmt = buildIfStmt(
+ buildLessOrEqualOp(buildVarRefExp(bound_sym),
+ test_expr), isSgStatement(then_part), NULL);
+ return isSgNode(ifstmt);
+ }
+
+ }
+
+ }
+*/
+ return then_part;
+}
+
+/**
+ * This would be better if it was done by a CHiLL xformation instead of at codegen
+ *
+ * state:
+ * for(...)
+ * for(...)
+ * cur_body
+ * stmt1
+ *
+ * stm1 is in-between two loops that are going to be reduced. The
+ * solution is to put stmt1 at the end of cur_body but conditionally run
+ * in on the last step of the for loop.
+ *
+ * A CHiLL command that would work better:
+ *
+ * for(...)
+ * stmt0
+ * for(for i=0; i<n; i++)
+ * cur_body
+ * stmt1
+ * =>
+ * for(...)
+ * for(for i=0; i<n; i++)
+ * if(i==0) stmt0
+ * cur_body
+ * if(i==n-1) stmt1
+ */
+
+std::vector<SgForStatement*> findCommentedFors(const char* index, SgNode* tnl) {
+ std::vector<SgForStatement *> result;
+ bool next_loop_ok = false;
+
+ if (isSgBasicBlock(tnl)) {
+
+ SgStatementPtrList& list = isSgBasicBlock(tnl)->get_statements();
+
+ for (SgStatementPtrList::iterator it = list.begin(); it != list.end();
+ it++) {
+ std::vector<SgForStatement*> t = findCommentedFors(index,
+ isSgNode(*it));
+ std::copy(t.begin(), t.end(), back_inserter(result));
+ }
+ } else if (isSgForStatement(tnl)) {
+
+ AstTextAttribute* att =
+ (AstTextAttribute*) (isSgNode(tnl)->getAttribute(
+ "omega_comment"));
+ std::string comment = att->toString();
+
+ if (comment.find("~cuda~") != std::string::npos
+ && comment.find("preferredIdx: ") != std::string::npos) {
+ std::string idx = comment.substr(
+ comment.find("preferredIdx: ") + 14, std::string::npos);
+ if (idx.find(" ") != std::string::npos)
+ idx = idx.substr(0, idx.find(" "));
+ if (strcmp(idx.c_str(), index) == 0)
+ next_loop_ok = true;
+ }
+
+ if (next_loop_ok) {
+ //printf("found loop %s\n", static_cast<tree_for *>(tn)->index()->name());
+ result.push_back(isSgForStatement(tnl));
+ } else {
+ //printf("looking down for loop %s\n", static_cast<tree_for *>(tn)->index()->name());
+ std::vector<SgForStatement*> t = findCommentedFors(index,
+ isSgForStatement(tnl)->get_loop_body());
+ std::copy(t.begin(), t.end(), back_inserter(result));
+ }
+ next_loop_ok = false;
+ } else if (isSgIfStmt(tnl)) {
+ //printf("looking down if\n");
+ SgIfStmt *tni = isSgIfStmt(tnl);
+ std::vector<SgForStatement*> t = findCommentedFors(index,
+ tni->get_true_body());
+ std::copy(t.begin(), t.end(), back_inserter(result));
+ }
+
+ return result;
+}
+
+SgNode* forReduce(SgForStatement* loop, SgVariableSymbol* reduceIndex,
+ SgScopeStatement* body_syms) {
+ //We did the replacements all at once with recursiveFindPreferedIdxs
+ //replacements r;
+ //r.oldsyms.append(loop->index());
+ //r.newsyms.append(reduceIndex);
+ //tree_for* new_loop = (tree_for*)loop->clone_helper(&r, true);
+ SgForStatement* new_loop = loop;
+
+ //return body one loops in
+ SgNode* tnl = loop_body_at_level(new_loop, 1);
+ //wrap in conditional if necessary
+ tnl = wrapInIfFromMinBound(tnl, new_loop, body_syms, reduceIndex);
+ return tnl;
+}
+
+void recursiveFindRefs(SgNode* code, std::set<const SgVariableSymbol *>& syms,
+ SgFunctionDefinition* def) {
+
+ SgStatement* s = isSgStatement(code);
+ // L = {symbols defined within 's'}, local variables declared within 's'
+ ASTtools::VarSymSet_t L;
+ ASTtools::collectDefdVarSyms(s, L);
+ //dump (L, "L = ");
+
+ // U = {symbols used within 's'}
+ ASTtools::VarSymSet_t U;
+ ASTtools::collectRefdVarSyms(s, U);
+ //dump (U, "U = ");
+
+ // U - L = {symbols used within 's' but not defined in 's'}
+ // variable references to non-local-declared variables
+ ASTtools::VarSymSet_t diff_U_L;
+ set_difference(U.begin(), U.end(), L.begin(), L.end(),
+ inserter(diff_U_L, diff_U_L.begin()));
+ //dump (diff_U_L, "U - L = ");
+
+ // Q = {symbols defined within the function surrounding 's' that are
+ // visible at 's'}, including function parameters
+ ASTtools::VarSymSet_t Q;
+ ASTtools::collectLocalVisibleVarSyms(def->get_declaration(), s, Q);
+// dump (Q, "Q = ");
+
+ // (U - L) \cap Q = {variables that need to be passed as parameters
+ // to the outlined function}
+ // a sub set of variables that are not globally visible (no need to pass at all)
+ // It excludes the variables with a scope between global and the enclosing function
+ set_intersection(diff_U_L.begin(), diff_U_L.end(), Q.begin(), Q.end(),
+ inserter(syms, syms.begin()));
+
+ /* std::vector<SgVariableSymbol *> scalars;
+ //SgNode *tnl = static_cast<const omega::CG_roseRepr *>(repr)->GetCode();
+ SgStatement* stmt;
+ SgExpression* exp;
+ if (tnl != NULL) {
+ if(stmt = isSgStatement(tnl)){
+ if(isSgBasicBlock(stmt)){
+ SgStatementPtrList& stmts = isSgBasicBlock(stmt)->get_statements();
+ for(int i =0; i < stmts.size(); i++){
+ //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(stmts[i]));
+ std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(stmts[i]));
+ //delete r;
+ std::copy(a.begin(), a.end(), back_inserter(scalars));
+ }
+
+ }
+ else if(isSgForStatement(stmt)){
+
+ SgForStatement *tnf = isSgForStatement(stmt);
+ //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgStatement(tnf->get_loop_body()));
+ std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(tnf->get_loop_body()));
+ //delete r;
+ std::copy(a.begin(), a.end(), back_inserter(scalars));
+ }
+ else if(isSgFortranDo(stmt)){
+ SgFortranDo *tfortran = isSgFortranDo(stmt);
+ omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgStatement(tfortran->get_body()));
+ std::vector<SgVariableSymbol *> a = recursiveFindRefs(r);
+ delete r;
+ std::copy(a.begin(), a.end(), back_inserter(scalars));
+ }
+
+ else if(isSgIfStmt(stmt) ){
+ SgIfStmt* tni = isSgIfStmt(stmt);
+ //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(tni->get_conditional()));
+ std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(tni->get_conditional()));
+ //delete r;
+ std::copy(a.begin(), a.end(), back_inserter(scalars));
+ //r = new omega::CG_roseRepr(isSgNode(tni->get_true_body()));
+ a = recursiveFindRefs(isSgNode(tni->get_true_body()));
+ //delete r;
+ std::copy(a.begin(), a.end(), back_inserter(scalars));
+ //r = new omega::CG_roseRepr(isSgNode(tni->get_false_body()));
+ a = recursiveFindRefs(isSgNode(tni->get_false_body()));
+ //delete r;
+ std::copy(a.begin(), a.end(), back_inserter(scalars));
+ }
+ else if(isSgExprStatement(stmt)) {
+ //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgExpression(isSgExprStatement(stmt)->get_expression()));
+ std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(isSgExprStatement(stmt)->get_expression()));
+ //delete r;
+ std::copy(a.begin(), a.end(), back_inserter(scalars));
+
+ }
+ }
+ }
+ else{
+ SgExpression* op = isSgExpression(tnl);
+ if(isSgVarRefExp(op)){
+
+ scalars.push_back(isSgVarRefExp(op)->get_symbol());
+
+ }
+ else if( isSgAssignOp(op)){
+ //omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgAssignOp(op)->get_lhs_operand());
+ std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgAssignOp(op)->get_lhs_operand()));
+ //delete r1;
+ std::copy(a1.begin(), a1.end(), back_inserter(scalars));
+ //omega::CG_roseRepr *r2 = new omega::CG_roseRepr(isSgAssignOp(op)->get_rhs_operand());
+ std::vector<SgVariableSymbol *> a2 = recursiveFindRefs(isSgNode(isSgAssignOp(op)->get_rhs_operand()));
+ //delete r2;
+ std::copy(a2.begin(), a2.end(), back_inserter(scalars));
+
+ }
+ else if(isSgBinaryOp(op)){
+ // omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgBinaryOp(op)->get_lhs_operand());
+ std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgBinaryOp(op)->get_lhs_operand()));
+ //delete r1;
+ std::copy(a1.begin(), a1.end(), back_inserter(scalars));
+ //omega::CG_roseRepr *r2 = new omega::CG_roseRepr(isSgBinaryOp(op)->get_rhs_operand());
+ std::vector<SgVariableSymbol *> a2 = recursiveFindRefs((isSgBinaryOp(op)->get_rhs_operand()));
+ //delete r2;
+ std::copy(a2.begin(), a2.end(), back_inserter(scalars));
+ }
+ else if(isSgUnaryOp(op)){
+ //omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgUnaryOp(op)->get_operand());
+ std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgUnaryOp(op)->get_operand()));
+ //delete r1;
+ std::copy(a1.begin(), a1.end(), back_inserter(scalars));
+ }
+
+ }
+ return scalars;
+
+
+ */
+
+}
+
+SgNode* recursiveFindReplacePreferedIdxs(SgNode* code, SgSymbolTable* body_syms,
+ SgSymbolTable* param_syms, SgScopeStatement* body,
+ std::map<std::string, SgVariableSymbol*>& loop_idxs,
+ SgGlobal* globalscope, bool sync = false) {
+ //tree_node_list* tnl = new tree_node_list;
+ //tree_node_list_iter tnli(code);
+ SgVariableSymbol* idxSym = 0;
+ std::vector<SgStatement*> r1;
+ std::vector<SgNode*> r2;
+ SgNode* tnli;
+ SgNode* tnli1;
+ SgNode* tnli2;
+ SgBasicBlock * clone;
+
+ if (isSgForStatement(code)) {
+ AstTextAttribute* att =
+ (AstTextAttribute*) (isSgNode(code)->getAttribute(
+ "omega_comment"));
+
+ std::string comment;
+ if (att != NULL)
+ comment = att->toString();
+
+ if (comment.find("~cuda~") != std::string::npos
+ && comment.find("preferredIdx: ") != std::string::npos) {
+ std::string idx = comment.substr(
+ comment.find("preferredIdx: ") + 14, std::string::npos);
+ if (idx.find(" ") != std::string::npos)
+ idx = idx.substr(0, idx.find(" "));
+ if (loop_idxs.find(idx) != loop_idxs.end())
+ idxSym = loop_idxs.find(idx)->second;
+ //Get the proc variable sybol for this preferred index
+ if (idxSym == 0) {
+ idxSym = body_syms->find_variable(idx.c_str());
+ if (!idxSym)
+ idxSym = param_syms->find_variable(idx.c_str());
+ //printf("idx not found: lookup %p\n", idxSym);
+ if (!idxSym) {
+ SgVariableDeclaration* defn = buildVariableDeclaration(
+ SgName((char*) idx.c_str()), buildIntType());
+ //idxSym = new var_sym(type_s32, (char*)idx.c_str());
+ SgInitializedNamePtrList& variables = defn->get_variables();
+ SgInitializedNamePtrList::const_iterator i =
+ variables.begin();
+ SgInitializedName* initializedName = *i;
+ SgVariableSymbol* vs = new SgVariableSymbol(
+ initializedName);
+ prependStatement(defn, body);
+ vs->set_parent(body_syms);
+ body_syms->insert(SgName((char*) idx.c_str()), vs);
+ idxSym = vs;
+ //printf("idx created and inserted\n");
+ }
+ //Now insert into our map for future
+ if (cudaDebug)
+ std::cout << idx << "\n\n";
+ loop_idxs.insert(make_pair(idx, idxSym));
+ }
+ //See if we have a sync as well
+ if (comment.find("sync") != std::string::npos) {
+ //printf("Inserting sync after current block\n");
+ sync = true;
+ }
+
+ }
+ if (idxSym) {
+ SgForInitStatement* list =
+ isSgForStatement(code)->get_for_init_stmt();
+ SgStatementPtrList& initStatements = list->get_init_stmt();
+ SgStatementPtrList::const_iterator j = initStatements.begin();
+ const SgVariableSymbol* index;
+
+ if (SgExprStatement *expr = isSgExprStatement(*j))
+ if (SgAssignOp* op = isSgAssignOp(expr->get_expression()))
+ if (SgVarRefExp* var_ref = isSgVarRefExp(
+ op->get_lhs_operand()))
+ index = var_ref->get_symbol();
+
+ std::vector<SgVarRefExp *> array = substitute(code, index, NULL,
+ isSgNode(body_syms));
+
+ for (int j = 0; j < array.size(); j++)
+ array[j]->set_symbol(idxSym);
+ }
+
+ SgStatement* body_ = isSgStatement(
+ recursiveFindReplacePreferedIdxs(
+ isSgNode((isSgForStatement(code)->get_loop_body())),
+ body_syms, param_syms, body, loop_idxs, globalscope));
+
+ omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code);
+ omega::CG_outputRepr* block = tnl->clone();
+ tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode();
+
+ isSgForStatement(tnli)->set_loop_body(body_);
+ body_->set_parent(tnli);
+
+ if (idxSym) {
+ SgForInitStatement* list =
+ isSgForStatement(tnli)->get_for_init_stmt();
+ SgStatementPtrList& initStatements = list->get_init_stmt();
+ SgStatementPtrList::const_iterator j = initStatements.begin();
+ const SgVariableSymbol* index;
+
+ if (SgExprStatement *expr = isSgExprStatement(*j))
+ if (SgAssignOp* op = isSgAssignOp(expr->get_expression()))
+ if (SgVarRefExp* var_ref = isSgVarRefExp(
+ op->get_lhs_operand()))
+ index = var_ref->get_symbol();
+
+ std::vector<SgVarRefExp *> array = substitute(tnli, index, NULL,
+ isSgNode(body_syms));
+
+ for (int j = 0; j < array.size(); j++)
+ array[j]->set_symbol(idxSym);
+ }
+ // std::cout << isSgNode(body_)->unparseToString() << "\n\n";
+ if (att != NULL)
+ tnli->setAttribute("omega_comment", att);
+
+ if (sync) {
+ SgName name_syncthreads("__syncthreads");
+ SgFunctionSymbol * syncthreads_symbol =
+ globalscope->lookup_function_symbol(name_syncthreads);
+
+ // Create a call to __syncthreads():
+ SgFunctionCallExp * syncthreads_call = buildFunctionCallExp(
+ syncthreads_symbol, buildExprListExp());
+
+ SgExprStatement* stmt = buildExprStatement(syncthreads_call);
+
+ /* if (SgBasicBlock* bb = isSgBasicBlock(
+ isSgForStatement(code)->get_loop_body()))
+ appendStatement(isSgStatement(stmt), bb);
+
+ else if (SgStatement* ss = isSgStatement(
+ isSgForStatement(code)->get_loop_body())) {
+ SgBasicBlock* bb2 = buildBasicBlock();
+
+ isSgNode(ss)->set_parent(bb2);
+ appendStatement(ss, bb2);
+
+ appendStatement(isSgStatement(stmt), bb2);
+ isSgNode(stmt)->set_parent(bb2);
+ isSgForStatement(code)->set_loop_body(bb2);
+ isSgNode(bb2)->set_parent(code);
+ }
+ */
+
+ SgBasicBlock* bb2 = buildBasicBlock();
+
+ bb2->append_statement(isSgStatement(tnli));
+ bb2->append_statement(stmt);
+ /* SgNode* parent = code->get_parent();
+ if(!isSgStatement(parent))
+ throw loop_error("Parent not a statement");
+
+ if(isSgForStatement(parent)){
+ if(SgStatement *ss = isSgForStatement(isSgForStatement(parent)->get_loop_body())){
+ omega::CG_roseRepr * tnl = new omega::CG_roseRepr(ss);
+ omega::CG_outputRepr* block= tnl->clone();
+
+ SgNode *new_ss = static_cast<const omega::CG_roseRepr *>(block)->GetCode();
+ SgBasicBlock* bb2 = buildBasicBlock();
+
+ isSgNode(new_ss)->set_parent(bb2);
+ appendStatement(isSgStatement(new_ss), bb2);
+ appendStatement(isSgStatement(stmt), bb2);
+ isSgNode(stmt)->set_parent(bb2);
+
+ isSgStatement(parent)->replace_statement_from_basicBlock(ss, isSgStatement(bb2));
+
+ }else if(isSgBasicBlock(isSgForStatement(parent)->get_loop_body()))
+ isSgStatement(isSgForStatement(parent)->get_loop_body())->insert_statement(isSgStatement(code), stmt, false);
+ else
+ throw loop_error("parent statement type undefined!!");
+
+ }
+ else if(isSgBasicBlock(parent))
+ isSgStatement(parent)->insert_statement(isSgStatement(code), stmt, false);
+ else
+ throw loop_error("parent statement type undefined!!");
+
+ //tnl->print();
+ *
+ *
+ */
+ sync = true;
+ return isSgNode(bb2);
+
+ } else
+ return tnli;
+ } else if (isSgIfStmt(code)) {
+ SgStatement* body_ = isSgStatement(
+ recursiveFindReplacePreferedIdxs(
+ isSgNode((isSgIfStmt(code)->get_true_body())),
+ body_syms, param_syms, body, loop_idxs, globalscope));
+
+ omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code);
+ omega::CG_outputRepr* block = tnl->clone();
+ tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode();
+
+ isSgIfStmt(tnli)->set_true_body(body_);
+
+ if ((isSgIfStmt(code)->get_false_body()))
+ isSgIfStmt(tnli)->set_false_body(
+ isSgStatement(
+ recursiveFindReplacePreferedIdxs(
+ isSgNode(
+ (isSgIfStmt(code)->get_false_body())),
+ body_syms, param_syms, body, loop_idxs,
+ globalscope)));
+
+ return tnli;
+ } else if (isSgStatement(code) && !isSgBasicBlock(code)) {
+ omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code);
+ omega::CG_outputRepr* block = tnl->clone();
+ tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode();
+
+ return tnli;
+
+ } else if (isSgBasicBlock(code)) {
+ SgStatementPtrList& tnl = isSgBasicBlock(code)->get_statements();
+
+ SgStatementPtrList::iterator temp;
+ clone = buildBasicBlock();
+ bool sync_found = false;
+ for (SgStatementPtrList::const_iterator it = tnl.begin();
+ it != tnl.end(); it++) {
+
+ if (isSgForStatement(*it)) {
+ AstTextAttribute* att =
+ (AstTextAttribute*) (isSgNode(*it)->getAttribute(
+ "omega_comment"));
+
+ std::string comment;
+ if (att != NULL)
+ comment = att->toString();
+
+ if (comment.find("~cuda~") != std::string::npos
+ && comment.find("preferredIdx: ")
+ != std::string::npos) {
+ std::string idx = comment.substr(
+ comment.find("preferredIdx: ") + 14,
+ std::string::npos);
+ if (idx.find(" ") != std::string::npos)
+ idx = idx.substr(0, idx.find(" "));
+ //printf("sym_tab preferred index: %s\n", idx.c_str());
+ if (loop_idxs.find(idx) != loop_idxs.end())
+ idxSym = loop_idxs.find(idx)->second;
+ //Get the proc variable sybol for this preferred index
+ if (idxSym == 0) {
+ idxSym = body_syms->find_variable(idx.c_str());
+ if (!idxSym)
+ idxSym = param_syms->find_variable(idx.c_str());
+ //printf("idx not found: lookup %p\n", idxSym);
+ if (!idxSym) {
+ SgVariableDeclaration* defn =
+ buildVariableDeclaration(
+ SgName((char*) idx.c_str()),
+ buildIntType());
+ //idxSym = new var_sym(type_s32, (char*)idx.c_str());
+ SgInitializedNamePtrList& variables =
+ defn->get_variables();
+ SgInitializedNamePtrList::const_iterator i =
+ variables.begin();
+ SgInitializedName* initializedName = *i;
+ SgVariableSymbol* vs = new SgVariableSymbol(
+ initializedName);
+ prependStatement(defn, body);
+ vs->set_parent(body_syms);
+ body_syms->insert(SgName((char*) idx.c_str()), vs);
+ //printf("idx created and inserted\n");
+ idxSym = vs;
+ }
+ //Now insert into our map for future
+ if (cudaDebug)
+ std::cout << idx << "\n\n";
+ loop_idxs.insert(make_pair(idx, idxSym));
+
+ }
+ //See if we have a sync as well
+ if (comment.find("sync") != std::string::npos) {
+ //printf("Inserting sync after current block\n");
+ sync = true;
+ }
+
+ }
+ if (idxSym) {
+ SgForInitStatement* list =
+ isSgForStatement(*it)->get_for_init_stmt();
+ SgStatementPtrList& initStatements = list->get_init_stmt();
+ SgStatementPtrList::const_iterator j =
+ initStatements.begin();
+ const SgVariableSymbol* index;
+
+ if (SgExprStatement *expr = isSgExprStatement(*j))
+ if (SgAssignOp* op = isSgAssignOp(
+ expr->get_expression()))
+ if (SgVarRefExp* var_ref = isSgVarRefExp(
+ op->get_lhs_operand()))
+ index = var_ref->get_symbol();
+
+ std::vector<SgVarRefExp *> array = substitute(*it, index,
+ NULL, isSgNode(body_syms));
+
+ for (int j = 0; j < array.size(); j++)
+ array[j]->set_symbol(idxSym);
+
+ }
+
+ SgStatement* body_ =
+ isSgStatement(
+ recursiveFindReplacePreferedIdxs(
+ isSgNode(
+ (isSgForStatement(*it)->get_loop_body())),
+ body_syms, param_syms, body, loop_idxs,
+ globalscope));
+
+ omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it);
+ omega::CG_outputRepr* block = tnl->clone();
+ tnli =
+ static_cast<const omega::CG_roseRepr *>(block)->GetCode();
+
+ isSgForStatement(tnli)->set_loop_body(body_);
+ body_->set_parent(tnli);
+ if (idxSym) {
+ SgForInitStatement* list =
+ isSgForStatement(tnli)->get_for_init_stmt();
+ SgStatementPtrList& initStatements = list->get_init_stmt();
+ SgStatementPtrList::const_iterator j =
+ initStatements.begin();
+ const SgVariableSymbol* index;
+
+ if (SgExprStatement *expr = isSgExprStatement(*j))
+ if (SgAssignOp* op = isSgAssignOp(
+ expr->get_expression()))
+ if (SgVarRefExp* var_ref = isSgVarRefExp(
+ op->get_lhs_operand()))
+ index = var_ref->get_symbol();
+
+ std::vector<SgVarRefExp *> array = substitute(tnli, index,
+ NULL, isSgNode(body_syms));
+
+ for (int j = 0; j < array.size(); j++)
+ array[j]->set_symbol(idxSym);
+ }
+ idxSym = 0;
+ // std::cout << isSgNode(body_)->unparseToString() << "\n\n";
+ if (att != NULL)
+ tnli->setAttribute("omega_comment", att);
+ clone->append_statement(isSgStatement(tnli));
+ if (sync) {
+ SgName name_syncthreads("__syncthreads");
+ SgFunctionSymbol * syncthreads_symbol =
+ globalscope->lookup_function_symbol(
+ name_syncthreads);
+
+ // Create a call to __syncthreads():
+ SgFunctionCallExp * syncthreads_call = buildFunctionCallExp(
+ syncthreads_symbol, buildExprListExp());
+
+ SgExprStatement* stmt = buildExprStatement(
+ syncthreads_call);
+
+ /* if (SgBasicBlock* bb = isSgBasicBlock(
+ isSgForStatement(code)->get_loop_body()))
+ appendStatement(isSgStatement(stmt), bb);
+
+ else if (SgStatement* ss = isSgStatement(
+ isSgForStatement(code)->get_loop_body())) {
+ SgBasicBlock* bb2 = buildBasicBlock();
+
+ isSgNode(ss)->set_parent(bb2);
+ appendStatement(ss, bb2);
+
+ appendStatement(isSgStatement(stmt), bb2);
+ isSgNode(stmt)->set_parent(bb2);
+ isSgForStatement(code)->set_loop_body(bb2);
+ isSgNode(bb2)->set_parent(code);
+ }
+ */
+
+ //SgBasicBlock* bb2 = buildBasicBlock();
+ clone->append_statement(stmt);
+ /* SgNode* parent = code->get_parent();
+ if(!isSgStatement(parent))
+ throw loop_error("Parent not a statement");
+
+ if(isSgForStatement(parent)){
+ if(SgStatement *ss = isSgForStatement(isSgForStatement(parent)->get_loop_body())){
+ omega::CG_roseRepr * tnl = new omega::CG_roseRepr(ss);
+ omega::CG_outputRepr* block= tnl->clone();
+
+ SgNode *new_ss = static_cast<const omega::CG_roseRepr *>(block)->GetCode();
+ SgBasicBlock* bb2 = buildBasicBlock();
+
+ isSgNode(new_ss)->set_parent(bb2);
+ appendStatement(isSgStatement(new_ss), bb2);
+ appendStatement(isSgStatement(stmt), bb2);
+ isSgNode(stmt)->set_parent(bb2);
+
+ isSgStatement(parent)->replace_statement_from_basicBlock(ss, isSgStatement(bb2));
+
+ }else if(isSgBasicBlock(isSgForStatement(parent)->get_loop_body()))
+ isSgStatement(isSgForStatement(parent)->get_loop_body())->insert_statement(isSgStatement(code), stmt, false);
+ else
+ throw loop_error("parent statement type undefined!!");
+
+ }
+ else if(isSgBasicBlock(parent))
+ isSgStatement(parent)->insert_statement(isSgStatement(code), stmt, false);
+ else
+ throw loop_error("parent statement type undefined!!");
+
+ //tnl->print();
+ *
+ *
+ */
+ sync = true;
+ // return isSgNode(bb2);
+
+ }
+
+ // return tnli;
+ } else if (isSgIfStmt(*it)) {
+ SgStatement* body_ = isSgStatement(
+ recursiveFindReplacePreferedIdxs(
+ isSgNode((isSgIfStmt(*it)->get_true_body())),
+ body_syms, param_syms, body, loop_idxs,
+ globalscope));
+
+ omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it);
+ omega::CG_outputRepr* block = tnl->clone();
+ tnli1 =
+ static_cast<const omega::CG_roseRepr *>(block)->GetCode();
+
+ isSgIfStmt(tnli1)->set_true_body(body_);
+
+ if ((isSgIfStmt(*it)->get_false_body()))
+ isSgIfStmt(tnli1)->set_false_body(
+ isSgStatement(
+ recursiveFindReplacePreferedIdxs(
+ isSgNode(
+ (isSgIfStmt(*it)->get_false_body())),
+ body_syms, param_syms, body,
+ loop_idxs, globalscope)));
+
+ clone->append_statement(isSgStatement(tnli1));
+ //return tnli;
+ } else if (isSgStatement(*it)) {
+ omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it);
+ omega::CG_outputRepr* block = tnl->clone();
+ tnli2 =
+ static_cast<const omega::CG_roseRepr *>(block)->GetCode();
+
+ clone->append_statement(isSgStatement(tnli2));
+ //return tnli;
+
+ }
+ }
+
+ return isSgNode(clone);
+
+ }
+
+ /* if (!isSgBasicBlock(
+ recursiveFindReplacePreferedIdxs(isSgNode(*it), body_syms,
+ param_syms, body, loop_idxs, globalscope))) {
+ SgStatement *to_push = isSgStatement(
+ recursiveFindReplacePreferedIdxs(isSgNode(*it),
+ body_syms, param_syms, body, loop_idxs,
+ globalscope, sync));
+ clone->append_statement(to_push);
+
+ if ((sync_found) && isSgForStatement(to_push)) {
+ SgName name_syncthreads("__syncthreads");
+ SgFunctionSymbol * syncthreads_symbol =
+ globalscope->lookup_function_symbol(
+ name_syncthreads);
+
+ // Create a call to __syncthreads():
+ SgFunctionCallExp * syncthreads_call = buildFunctionCallExp(
+ syncthreads_symbol, buildExprListExp());
+
+ SgExprStatement* stmt = buildExprStatement(
+ syncthreads_call);
+
+ clone->append_statement(isSgStatement(stmt));
+ }
+ // std::cout<<isSgNode(*it)->unparseToString()<<"\n\n";
+ } else {
+
+ SgStatementPtrList& tnl2 = isSgBasicBlock(
+ recursiveFindReplacePreferedIdxs(isSgNode(*it),
+ body_syms, param_syms, body, loop_idxs,
+ globalscope))->get_statements();
+ for (SgStatementPtrList::const_iterator it2 = tnl2.begin();
+ it2 != tnl2.end(); it2++) {
+ clone->append_statement(*it2);
+
+ sync_found = true;
+ // std::cout<<isSgNode(*it2)->unparseToString()<<"\n\n";
+ }
+ }
+
+ }
+ return isSgNode(clone);
+ }
+ */
+// return tnl;
+}
+
+// loop_vars -> array references
+// loop_idxs -> <idx_name,idx_sym> map for when we encounter a loop with a different preferredIndex
+// dim_vars -> out param, fills with <old,new> var_sym pair for 2D array dimentions (messy stuff)
+SgNode* swapVarReferences(SgNode* code,
+ std::set<const SgVariableSymbol *>& syms, SgSymbolTable* param,
+ SgSymbolTable* body, SgScopeStatement* body_stmt) {
+ //Iterate over every expression, looking up each variable and type
+ //reference used and possibly replacing it or adding it to our symbol
+ //table
+ //
+ //We use the built-in cloning helper methods to seriously help us with this!
+
+ //Need to do a recursive mark
+
+ std::set<const SgVariableSymbol *>::iterator myIterator;
+ for (myIterator = syms.begin(); myIterator != syms.end(); myIterator++) {
+ SgName var_name = (*myIterator)->get_name();
+ std::string x = var_name.getString();
+
+ if ((param->find_variable(var_name) == NULL)
+ && (body->find_variable(var_name) == NULL)) {
+ SgInitializedName* decl = (*myIterator)->get_declaration();
+
+ SgVariableSymbol* dvs = new SgVariableSymbol(decl);
+ SgVariableDeclaration* var_decl = buildVariableDeclaration(
+ dvs->get_name(), dvs->get_type());
+
+ AstTextAttribute* att = (AstTextAttribute*) (isSgNode(
+ decl->get_declaration())->getAttribute("__shared__"));
+ if (isSgNode(decl->get_declaration())->attributeExists(
+ "__shared__"))
+ var_decl->get_declarationModifier().get_storageModifier().setCudaShared();
+
+ appendStatement(var_decl, body_stmt);
+
+ dvs->set_parent(body);
+ body->insert(var_name, dvs);
+ }
+
+ std::vector<SgVarRefExp *> array = substitute(code, *myIterator, NULL,
+ isSgNode(body));
+
+ SgVariableSymbol* var = (SgVariableSymbol*) (*myIterator);
+ for (int j = 0; j < array.size(); j++)
+ array[j]->set_symbol(var);
+ }
+
+ return code;
+}
+
+bool LoopCuda::validIndexes(int stmt, const std::vector<std::string>& idxs) {
+ for (int i = 0; i < idxs.size(); i++) {
+ bool found = false;
+ for (int j = 0; j < idxNames[stmt].size(); j++) {
+ if (strcmp(idxNames[stmt][j].c_str(), idxs[i].c_str()) == 0) {
+ found = true;
+ }
+ }
+ if (!found) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool LoopCuda::cudaize_v2(std::string kernel_name,
+ std::map<std::string, int> array_dims,
+ std::vector<std::string> blockIdxs,
+ std::vector<std::string> threadIdxs) {
+ CG_outputBuilder *ocg = ir->builder();
+ int stmt_num = 0;
+ if (cudaDebug) {
+ printf("cudaize_v2(%s, {", kernel_name.c_str());
+ //for(
+ printf("}, blocks={");
+ printVs(blockIdxs);
+ printf("}, thread={");
+ printVs(threadIdxs);
+ printf("})\n");
+ }
+
+ this->array_dims = array_dims;
+ if (!validIndexes(stmt_num, blockIdxs)) {
+ throw std::runtime_error("One of the indexes in the block list was not "
+ "found in the current set of indexes.");
+ }
+ if (!validIndexes(stmt_num, threadIdxs)) {
+ throw std::runtime_error(
+ "One of the indexes in the thread list was not "
+ "found in the current set of indexes.");
+ }
+ if (blockIdxs.size() == 0)
+ throw std::runtime_error("Cudaize: Need at least one block dimention");
+ int block_level = 0;
+ //Now, we will determine the actual size (if possible, otherwise
+ //complain) for the block dimentions and thread dimentions based on our
+ //indexes and the relations for our stmt;
+ for (int i = 0; i < blockIdxs.size(); i++) {
+ int level = findCurLevel(stmt_num, blockIdxs[i]);
+ int ub, lb;
+ CG_outputRepr* ubrepr = extractCudaUB(stmt_num, level, ub, lb);
+ if (lb != 0) {
+ //attempt to "normalize" the loop with an in-place tile and then re-check our bounds
+ if (cudaDebug)
+ printf(
+ "Cudaize: doing tile at level %d to try and normalize lower bounds\n",
+ level);
+ tile(stmt_num, level, 1, level, CountedTile);
+ idxNames[stmt_num].insert(idxNames[stmt_num].begin() + (level), ""); //TODO: possibly handle this for all sibling stmts
+ ubrepr = extractCudaUB(stmt_num, level, ub, lb);
+ }
+ if (lb != 0) {
+ char buf[1024];
+ sprintf(buf,
+ "Cudaize: Loop at level %d does not have 0 as it's lower bound",
+ level);
+ throw std::runtime_error(buf);
+ }
+ if (ub < 0) {
+ char buf[1024];
+ sprintf(buf,
+ "Cudaize: Loop at level %d does not have a hard upper bound",
+ level);
+ //Anand: Commenting out error indication for lack of constant upper bound
+ //throw std::runtime_error(buf);
+ }
+ if (cudaDebug)
+ printf("block idx %s level %d lb: %d ub %d\n", blockIdxs[i].c_str(),
+ level, lb, ub);
+ if (i == 0) {
+ block_level = level;
+ if (ubrepr == NULL) {
+ cu_bx = ub + 1;
+ cu_bx_repr = NULL;
+ } else {
+ cu_bx = 0;
+ cu_bx_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1));
+ }
+ idxNames[stmt_num][level - 1] = "bx";
+ } else if (i == 1) {
+ if (ubrepr == NULL) {
+ cu_by = ub + 1;
+ cu_by_repr = NULL;
+ } else {
+ cu_by = 0;
+ cu_by_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1));
+ }
+ idxNames[stmt_num][level - 1] = "by";
+ }
+ }
+ if (!cu_by && !cu_by_repr)
+ block_level = 0;
+ int thread_level1 = 0;
+ int thread_level2 = 0;
+ for (int i = 0; i < threadIdxs.size(); i++) {
+ int level = findCurLevel(stmt_num, threadIdxs[i]);
+ int ub, lb;
+ CG_outputRepr* ubrepr = extractCudaUB(stmt_num, level, ub, lb);
+ if (lb != 0) {
+ //attempt to "normalize" the loop with an in-place tile and then re-check our bounds
+ if (cudaDebug)
+ printf(
+ "Cudaize: doing tile at level %d to try and normalize lower bounds\n",
+ level);
+ tile(stmt_num, level, 1, level, CountedTile);
+ idxNames[stmt_num].insert(idxNames[stmt_num].begin() + (level), "");
+ ubrepr = extractCudaUB(stmt_num, level, ub, lb);
+ }
+ if (lb != 0) {
+ char buf[1024];
+ sprintf(buf,
+ "Cudaize: Loop at level %d does not have 0 as it's lower bound",
+ level);
+ throw std::runtime_error(buf);
+ }
+ if (ub < 0) {
+ char buf[1024];
+ sprintf(buf,
+ "Cudaize: Loop at level %d does not have a hard upper bound",
+ level);
+ //Anand: Commenting out error indication for lack of constant upper bound
+ //throw std::runtime_error(buf);
+ }
+
+ if (cudaDebug)
+ printf("thread idx %s level %d lb: %d ub %d\n",
+ threadIdxs[i].c_str(), level, lb, ub);
+ if (i == 0) {
+ thread_level1 = level;
+ if (ubrepr == NULL) {
+ cu_tx = ub + 1;
+ cu_tx_repr = NULL;
+ } else {
+ cu_tx = 0;
+ cu_tx_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1));
+ }
+ idxNames[stmt_num][level - 1] = "tx";
+ } else if (i == 1) {
+ thread_level2 = level;
+ if (ubrepr == NULL) {
+ cu_ty = ub + 1;
+ cu_ty_repr = NULL;
+ } else {
+ cu_ty = 0;
+ cu_ty_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1));
+ }
+ idxNames[stmt_num][level - 1] = "ty";
+ } else if (i == 2) {
+ if (ubrepr == NULL) {
+ cu_tz = ub + 1;
+ cu_tz_repr = NULL;
+ } else {
+ cu_tz = 0;
+ cu_tz_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1));
+ }
+ idxNames[stmt_num][level - 1] = "tz";
+ }
+ }
+ if (!cu_ty && !cu_ty_repr)
+ thread_level1 = 0;
+ if (!cu_tz && !cu_tz_repr)
+ thread_level2 = 0;
+
+ //Make changes to nonsplitlevels
+ const int m = stmt.size();
+ for (int i = 0; i < m; i++) {
+ if (block_level) {
+ //stmt[i].nonSplitLevels.append((block_level)*2);
+ stmt_nonSplitLevels[i].push_back((block_level) * 2);
+ }
+ if (thread_level1) {
+ //stmt[i].nonSplitLevels.append((thread_level1)*2);
+ stmt_nonSplitLevels[i].push_back((thread_level1) * 2);
+ }
+ if (thread_level2) {
+ //stmt[i].nonSplitLevels.append((thread_level1)*2);
+ stmt_nonSplitLevels[i].push_back((thread_level1) * 2);
+ }
+ }
+
+ if (cudaDebug) {
+ printf("Codegen: current names: ");
+ printVS(idxNames[stmt_num]);
+ }
+ //Set codegen flag
+ code_gen_flags |= GenCudaizeV2;
+
+ //Save array dimention sizes
+ this->array_dims = array_dims;
+ cu_kernel_name = kernel_name.c_str();
+
+}
+
+/*
+ * setupConstantVar
+ * handles constant variable declaration
+ * and adds a global constant variable
+ * parameters:
+ * constant - the constant_memory_mapping object for this loop
+ * arr_def - the VarDefs object for the mapped variable
+ * globals - Rose Global variables
+ * i - an index to keep new variable names unique
+ * symtab - global symbol table
+ */
+static void setupConstantVar(constant_memory_mapping* constant, VarDefs* arr_def, SgGlobal* globals, int i, SgSymbolTable* symtab) {
+ char* buf1 = new char[32];
+ snprintf(buf1, 32, "cs%dRef", i+1);
+ arr_def->secondName = buf1;
+
+ char buf2[64];
+ snprintf(buf2, 64, "__device__ __constant__ float");
+
+ SgVariableDeclaration* consvar_decl = buildVariableDeclaration(
+ SgName(std::string(buf1)), buildArrayType(
+ buildOpaqueType(SgName(buf2),globals),
+ arr_def->size_expr));
+ SgInitializedNamePtrList& variables = consvar_decl->get_variables();
+ SgInitializedNamePtrList::const_iterator j = variables.begin();
+ SgInitializedName* initializedName = *j;
+ SgVariableSymbol* consvar_sym = new SgVariableSymbol(initializedName);
+ prependStatement(consvar_decl, globals);
+
+ consvar_sym->set_parent(symtab);
+ symtab->insert(SgName(std::string(buf1)), consvar_sym);
+
+ constant->set_mapped_symbol(arr_def->original_name.c_str(), consvar_sym);
+ constant->set_vardef(arr_def->original_name.c_str(), arr_def);
+}
+
+/*
+ * cudaBindConstantVar
+ * allocs a variable to constant memory
+ * constant - the constant mapping object
+ * arr_def - the VarDefs abject
+ * globals - global symbol table
+ * stmt_list - the GPU functions' statement list
+ */
+static void cudaBindConstantVar(constant_memory_mapping* constant, VarDefs* arr_def, SgGlobal* globals, SgStatementPtrList* stmt_list) {
+ SgName cudaMemcpyToSymbol_name("cudaMemcpyToSymbol");
+ SgFunctionDeclaration* cudaMemcpyToSymbol_decl = buildNondefiningFunctionDeclaration(
+ cudaMemcpyToSymbol_name, buildVoidType(), buildFunctionParameterList(), globals);
+ SgExprListExp* args = buildExprListExp();
+ args->append_expression(buildCastExp(constant->get_mapped_symbol_exp(arr_def->original_name.c_str()),
+ buildPointerType(buildVoidType())));
+ args->append_expression(buildVarRefExp(arr_def->in_data));
+ args->append_expression(arr_def->size_expr);
+ stmt_list->push_back(buildExprStatement(
+ buildFunctionCallExp(buildFunctionRefExp(cudaMemcpyToSymbol_decl), args)));
+}
+
+static void consmapArrayRefs(constant_memory_mapping* constant, std::vector<IR_ArrayRef*>* refs, SgGlobal* globals, IR_Code* ir, CG_roseBuilder* ocg) {
+ // if constant mapping is not being used, ignore this function
+ if(constant == NULL) return;
+ for(int i = 0; i < refs->size(); i++) {
+ IR_ArrayRef* aref = (*refs)[i];
+ if(constant->is_array_mapped(aref->name().c_str())) {
+ // get array reference dimensions
+ int dims = aref->symbol()->n_dim();
+ if(dims > 2) {
+ printf(" \n CHiLL does not handle constant memory mapping for more than 2D arrays.\n");
+ return;
+ }
+
+ SgExpression* varexp = constant->get_mapped_symbol_exp(aref->name().c_str());
+ SgExpression* index_exp;
+ // build index expression
+ if(dims == 1) {
+ index_exp = static_cast<omega::CG_roseRepr*>(aref->index(0)->clone())->GetExpression();
+ }
+ if(dims == 2) {
+ VarDefs* arr_def = constant->get_vardef(aref->name().c_str());
+ CG_outputRepr* i0 = aref->index(0)->clone();
+ CG_outputRepr* i1 = aref->index(1)->clone();
+ CG_outputRepr* sz = new CG_roseRepr(buildIntVal(arr_def->size_multi_dim[0]));
+ CG_outputRepr* exp = ocg->CreatePlus(ocg->CreateTimes(sz->clone(), i0), i1);
+ index_exp = static_cast<omega::CG_roseRepr*>(exp->clone())->GetExpression();
+ }
+ ir->ReplaceExpression(aref, new CG_roseRepr(buildPntrArrRefExp(varexp, index_exp)));
+ }
+ }
+}
+
+/*
+ * setupTexmappingVar
+ * handles texture variable declaration
+ * and adds a global texture object
+ * parameters:
+ * texture - the texture_memory_mapping object
+ * arr_def - the VarDefs object for the mapped variable
+ * globals - Rose Global variables
+ * i - an index to keep the new variable names unique
+ * devptr_sym - the devptr that the original variable is associated with
+ * symtab - GPU function symbol table
+ */
+static void setupTexmappingVar(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals, int i, SgVariableSymbol* devptr_sym, SgSymbolTable* symtab) {
+ char* buf1 = new char[32];
+ snprintf(buf1, 32, "tex%dRef", i+1);
+ arr_def->secondName = buf1;
+
+ char buf2[64];
+ // single-dimensional
+ snprintf(buf2, 64, "texture<float, %d, cudaReadModeElementType>", 1);
+ // multi-dimensional
+ // snprintf(buf2, 64, "texture<float, %d, cudaReadModeElemetType>", (int)(arr_def->size_multi_dim.size())); //*/
+
+ SgVariableDeclaration* texvar_decl = buildVariableDeclaration(SgName(std::string(buf1)), buildOpaqueType(buf2, globals));
+
+ SgInitializedNamePtrList& variables = texvar_decl->get_variables();
+ SgInitializedNamePtrList::const_iterator j = variables.begin();
+ SgInitializedName* initializedName = *j;
+ SgVariableSymbol* texvar_sym = new SgVariableSymbol(initializedName);
+ prependStatement(texvar_decl, globals);
+
+ texvar_sym->set_parent(symtab);
+ symtab->insert(SgName(buf1), texvar_sym);
+
+ texture->set_mapped_symbol(arr_def->original_name.c_str(), texvar_sym);
+ texture->set_devptr_symbol(arr_def->original_name.c_str(), devptr_sym);
+ texture->set_vardef(arr_def->original_name.c_str(), arr_def);
+}
+
+
+/*
+ * One dimensional version of cudaBindTexture
+ * see cudaBindTexture for details
+ */
+static SgFunctionCallExp* cudaBindTexture1D(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals) {
+ SgName cudaBindTexture_name("cudaBindTexture");
+ SgFunctionDeclaration* cudaBindTexture_decl = buildNondefiningFunctionDeclaration(
+ cudaBindTexture_name, buildVoidType(), buildFunctionParameterList(), globals);
+
+ SgExprListExp* args = buildExprListExp();
+ args->append_expression(buildIntVal(0));
+ args->append_expression(texture->get_mapped_symbol_exp(arr_def->original_name.c_str()));
+ args->append_expression(texture->get_devptr_symbol_exp(arr_def->original_name.c_str()));
+ args->append_expression(arr_def->size_expr);
+ return buildFunctionCallExp(buildFunctionRefExp(cudaBindTexture_decl), args);
+}
+
+/*
+ * Two dimensional version of cudaBindTexture
+ * see cudaBindTexture for details
+ */
+//static SgFunctionCallExp* cudaBindTexture2D(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals) {
+// SgName cudaBindTexture_name("cudaBindTexture2D");
+// SgFunctionDeclaration* cudaBindTexture_decl = buildNondefiningFunctionDeclaration(
+// cudaBindTexture_name, buildVoidType(), buildFunctionParameterList(), globals);
+//
+// SgExprListExp* args = buildExprListExp();
+// args->append_expression(buildIntVal(0));
+// args->append_expression(texture->get_tex_mapped_symbol_exp(arr_def->original_name.c_str()));
+// args->append_expression(texture->get_devptr_symbol_exp(arr_def->original_name.c_str()));
+// args->append_expression(buildIntVal(texture->get_dim_length(arr_def->original_name.c_str(), 0)));
+// args->append_expression(buildIntVal(texture->get_dim_length(arr_def->original_name.c_str(), 1)));
+// args->append_expression(arr_def->size_expr);
+// return buildFunctionCallExp(buildFunctionRefExp(cudaBindTexture_decl), args);
+//}
+
+/*
+ * cudaBindTexture
+ * binds a variable to a texture
+ * parameters:
+ * texture - the texture mapping object
+ * arr_def - the VarDefs object
+ * globals - global symbol table
+ * stmt_list - the GPU functions' statement list
+ * notes:
+ * only supports binding 1D textures, may need to consider cudaBindTexture2D for 2D textures
+ */
+static void cudaBindTexture(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals, SgStatementPtrList* stmt_list) {
+ //int dims = (int)(arr_def->size_multi_dim.size());
+ //int dims = texture->get_dims(arr_def->original_name.c_str());
+ //if(dims == 1)
+ stmt_list->push_back(
+ buildExprStatement(cudaBindTexture1D(texture, arr_def, globals)));
+ //if(dims == 2)
+ // stmt_list->push_back(
+ // buildExprStatement(cudaBindTexture2D(texture, arr_def, globals)));
+}
+
+/*
+ * texmapArrayRefs
+ * maps array reference expresions of texture mapped variables to the tex1D function
+ * parameters:
+ * texture - the texture mapping object
+ * refs - a list of all array read operations
+ * globals - global symbol table
+ * ir - handles IR_Code operations
+ * ocg - handles CG_roseBuilder operations
+**/
+static void texmapArrayRefs(texture_memory_mapping* texture, std::vector<IR_ArrayRef*>* refs, SgGlobal* globals, IR_Code* ir, CG_roseBuilder *ocg) {
+ // if texture mapping is not being used, ignore this function
+ if(texture == NULL) return;
+ for(int i = 0; i < refs->size(); i++) {
+ IR_ArrayRef* aref = (*refs)[i];
+ if(texture->is_array_mapped(aref->name().c_str())) {
+
+ // get array dimensions
+ VarDefs* arr_def = texture->get_vardef(aref->name().c_str());
+ int dims = aref->symbol()->n_dim();
+ if(dims > 2) {
+ printf(" \n CHiLL does not handle texture mapping for more than 2D arrays.\n");
+ // TODO throw some sort of error. or handle in texture_copy function
+ return;
+ }
+
+ // build texture lookup function declaration
+ char texNDfetch_strName[16];
+ sprintf(texNDfetch_strName, "tex%dDfetch", 1); // for now, only support tex1Dfetch
+ //sprintf(texNDfetch_strName, "tex%dDfetch", dims);
+ SgFunctionDeclaration* fetch_decl = buildNondefiningFunctionDeclaration(
+ SgName(texNDfetch_strName), buildFloatType(), buildFunctionParameterList(), globals);
+
+ // build args
+ SgExprListExp* args = buildExprListExp();
+ args->append_expression(texture->get_mapped_symbol_exp(aref->name().c_str()));
+
+ // set indexing args
+ //for(int i = 0; i < dims; i++) {
+ // args->append_expression((static_cast<omega::CG_roseRepr*>(aref->index(i)->clone()))->GetExpression());
+ //}
+ if(dims == 1) {
+ args->append_expression(static_cast<omega::CG_roseRepr*>(aref->index(0)->clone())->GetExpression());
+ }
+ else if(dims == 2) {
+ CG_outputRepr* i0 = aref->index(0)->clone();
+ CG_outputRepr* i1 = aref->index(1)->clone();
+ CG_outputRepr* sz = new CG_roseRepr(buildIntVal(arr_def->size_multi_dim[0]));
+ CG_outputRepr* expr = ocg->CreatePlus(ocg->CreateTimes(sz->clone(), i0), i1);
+ args->append_expression(static_cast<omega::CG_roseRepr*>(expr->clone())->GetExpression());
+ }
+
+ // build function call and replace original array ref
+ SgFunctionCallExp* fetch_call = buildFunctionCallExp(buildFunctionRefExp(fetch_decl), args);
+ ir->ReplaceExpression(aref, new CG_roseRepr(fetch_call));
+ }
+ }
+}
+
+SgNode* LoopCuda::cudaize_codegen_v2() {
+ if(cudaDebug)
+ printf("cudaize codegen V2\n");
+ CG_roseBuilder *ocg = dynamic_cast<CG_roseBuilder*>(ir->builder());
+ if (!ocg)
+ return false;
+
+ //protonu--adding an annote to track texture memory type
+ //ANNOTE(k_cuda_texture_memory, "cuda texture memory", TRUE);
+ //ANNOTE(k_cuda_constant_memory, "cuda constant memory", TRUE);
+ int tex_mem_on = 0;
+ int cons_mem_on = 0;
+
+
+
+ CG_outputRepr* repr;
+ std::vector<VarDefs> arrayVars;
+ std::vector<VarDefs> localScopedVars;
+
+ std::vector<IR_ArrayRef *> ro_refs;
+ std::vector<IR_ArrayRef *> wo_refs;
+ std::set<std::string> uniqueRefs;
+ std::set<std::string> uniqueWoRefs;
+ std::set<const SgVariableSymbol *> syms;
+ std::set<const SgVariableSymbol *> psyms;
+ std::set<const SgVariableSymbol *> pdSyms;
+ SgStatementPtrList* replacement_list = new SgStatementPtrList;
+
+ for (int j = 0; j < stmt.size(); j++) {
+ std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[j].code);
+ for (int i = 0; i < refs.size(); i++) {
+ //printf("ref %s wo %d\n", static_cast<const char*>(refs[i]->name()), refs[i]->is_write());
+ SgVariableSymbol* var = body_symtab->find_variable(
+ SgName((char*) refs[i]->name().c_str()));
+ SgVariableSymbol* var2 = parameter_symtab->find_variable(
+ SgName((char*) refs[i]->name().c_str()));
+
+ //If the array is not a parameter, then it's a local array and we
+ //want to recreate it as a stack variable in the kernel as opposed to
+ //passing it in.
+ if (var != NULL) {
+ //anand-- needs modification, if variable is parameter it wont be part of the
+ // block's symbol table but the functiond definition's symbol table
+
+ continue;
+ }
+ if (uniqueRefs.find(refs[i]->name()) == uniqueRefs.end()) {
+
+ uniqueRefs.insert(refs[i]->name());
+ if (refs[i]->is_write()) {
+ uniqueWoRefs.insert(refs[i]->name());
+ wo_refs.push_back(refs[i]);
+ } else
+ ro_refs.push_back(refs[i]);
+ }
+ if (refs[i]->is_write()
+ && uniqueWoRefs.find(refs[i]->name())
+ == uniqueWoRefs.end()) {
+ uniqueWoRefs.insert(refs[i]->name());
+ wo_refs.push_back(refs[i]);
+ //printf("adding %s to wo\n", static_cast<const char*>(refs[i]->name()));
+ }
+ pdSyms.insert((const SgVariableSymbol*) var2);
+ }
+ }
+
+ if (cudaDebug) {
+ printf("reading from array ");
+ for (int i = 0; i < ro_refs.size(); i++)
+ printf("'%s' ", ro_refs[i]->name().c_str());
+ printf("and writing to array ");
+ for (int i = 0; i < wo_refs.size(); i++)
+ printf("'%s' ", wo_refs[i]->name().c_str());
+ printf("\n");
+ }
+ const char* gridName = "dimGrid";
+ const char* blockName = "dimBlock";
+
+ //TODO: Could allow for array_dims_vars to be a mapping from array
+ //references to to variable names that define their length.
+ SgVariableSymbol* dim1 = 0;
+ SgVariableSymbol* dim2 = 0;
+
+ for (int i = 0; i < wo_refs.size(); i++) {
+ //TODO: Currently assume all arrays are floats of one or two dimentions
+ SgVariableSymbol* outArray = 0;
+ std::string name = wo_refs[i]->name();
+ outArray = body_symtab->find_variable(SgName((char*) name.c_str()));
+ int size_n_d;
+ if (outArray == NULL)
+ outArray = parameter_symtab->find_variable(
+ SgName((char*) name.c_str()));
+
+ VarDefs v;
+ v.size_multi_dim = std::vector<int>();
+ char buf[32];
+ snprintf(buf, 32, "devO%dPtr", i + 1);
+ v.name = buf;
+ if (isSgPointerType(outArray->get_type())) {
+ if (isSgArrayType(
+ isSgNode(
+ isSgPointerType(outArray->get_type())->get_base_type()))) {
+ // v.type = ((array_type *)(((ptr_type *)(outArray->type()))->ref_type()))->elem_type();
+ SgType* t =
+ isSgPointerType(outArray->get_type())->get_base_type();
+ /* SgExprListExp* dimList = t->get_dim_info();
+ SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
+ SgExpression* expr=NULL;
+ for (; j != dimList->get_expressions().end(); j++)
+ expr = *j;
+ */
+ while (isSgArrayType(t))
+ t = isSgArrayType(t)->get_base_type();
+
+ if (!isSgType(t)) {
+ char buf[1024];
+ sprintf(buf, "CudaizeCodeGen: Array type undetected!");
+ throw std::runtime_error(buf);
+
+ }
+
+ v.type = t;
+ } else
+ v.type = isSgPointerType(outArray->get_type())->get_base_type();
+ } else if (isSgArrayType(outArray->get_type())) {
+ if (isSgArrayType(
+ isSgNode(
+ isSgArrayType(outArray->get_type())->get_base_type()))) {
+ // v.type = ((array_type *)(((ptr_type *)(outArray->type()))->ref_type()))->elem_type();
+ SgType* t =
+ isSgArrayType(outArray->get_type())->get_base_type();
+ /* SgExprListExp* dimList = t->get_dim_info();
+ SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
+ SgExpression* expr=NULL;
+ for (; j != dimList->get_expressions().end(); j++)
+ expr = *j;
+ */
+ while (isSgArrayType(t))
+ t = isSgArrayType(t)->get_base_type();
+
+ if (!isSgType(t)) {
+ char buf[1024];
+ sprintf(buf, "CudaizeCodeGen: Array type undetected!");
+ throw std::runtime_error(buf);
+
+ }
+
+ v.type = t;
+ } else
+ v.type = isSgArrayType(outArray->get_type())->get_base_type();
+ } else
+ v.type = buildFloatType();
+ v.tex_mapped = false;
+ v.cons_mapped = false;
+ v.original_name = wo_refs[i]->name();
+ //Size of the array = dim1 * dim2 * num bytes of our array type
+
+ //If our input array is 2D (non-linearized), we want the actual
+ //dimentions of the array
+ CG_outputRepr* size;
+ //Lookup in array_dims
+ std::map<std::string, int>::iterator it = array_dims.find(name.c_str());
+ if (isSgPointerType(outArray->get_type())
+ && isSgArrayType(
+ isSgNode(
+ isSgPointerType(outArray->get_type())->get_base_type()))) {
+ SgType* t = isSgPointerType(outArray->get_type())->get_base_type();
+ /* SgExprListExp* dimList = t->get_dim_info();
+ SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
+ SgExpression* expr=NULL;
+ for (; j != dimList->get_expressions().end(); j++)
+ expr = *j;
+ */
+ if (isSgIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedLongVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgAddOp(isSgArrayType(t)->get_index())) {
+ SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
+
+ SgExpression *lhs = op_add->get_lhs_operand();
+ SgExpression *rhs = op_add->get_rhs_operand();
+
+ if (isSgIntVal(lhs))
+ size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value());
+ else if (isSgUnsignedIntVal(lhs))
+ size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value()
+ + (int) isSgUnsignedIntVal(rhs)->get_value();
+ else if (isSgUnsignedLongVal(lhs))
+ size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongLongIntVal(lhs))
+ size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ size_n_d = (int) (isSgLongIntVal(lhs)->get_value()
+ + isSgLongIntVal(rhs)->get_value());
+ else if (isSgUnsignedLongLongIntVal(lhs))
+ size_n_d =
+ (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongLongIntVal(rhs)->get_value());
+
+ }
+ t = isSgArrayType(t)->get_base_type();
+ while (isSgArrayType(t)) {
+ int dim;
+ if (isSgIntVal(isSgArrayType(t)->get_index()))
+ dim =
+ (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedLongVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgAddOp(isSgArrayType(t)->get_index())) {
+ SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
+
+ SgExpression *lhs = op_add->get_lhs_operand();
+ SgExpression *rhs = op_add->get_rhs_operand();
+
+ if (isSgIntVal(lhs))
+ dim = (int) isSgIntVal(lhs)->get_value()
+ + (int) (isSgIntVal(rhs)->get_value());
+ else if (isSgUnsignedIntVal(lhs))
+ dim = (int) isSgUnsignedIntVal(lhs)->get_value()
+ + (int) isSgUnsignedIntVal(rhs)->get_value();
+ else if (isSgUnsignedLongVal(lhs))
+ dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongLongIntVal(lhs))
+ dim = (int) (isSgLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ dim = (int) (isSgLongIntVal(lhs)->get_value()
+ + isSgLongIntVal(rhs)->get_value());
+ else if (isSgUnsignedLongLongIntVal(lhs))
+ dim =
+ (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongLongIntVal(rhs)->get_value());
+
+ }
+ size_n_d *= dim;
+ v.size_multi_dim.push_back(dim);
+ t = isSgArrayType(t)->get_base_type();
+ }
+ //v.size_2d = (int) (isSgIntVal(t->get_index())->get_value());
+
+ if (cudaDebug)
+ printf("Detected Multi-dimensional array sized of %d for %s\n",
+ size_n_d, (char*) wo_refs[i]->name().c_str());
+ size = ocg->CreateInt(size_n_d);
+ } else if (isSgArrayType(outArray->get_type())
+ && isSgArrayType(
+ isSgNode(
+ isSgArrayType(outArray->get_type())->get_base_type()))) {
+ SgType* t = outArray->get_type();
+ /* SgExprListExp* dimList = t->get_dim_info();
+ SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
+ SgExpression* expr=NULL;
+ for (; j != dimList->get_expressions().end(); j++)
+ expr = *j;
+ */
+
+ if (isSgIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedLongVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgAddOp(isSgArrayType(t)->get_index())) {
+ SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
+
+ SgExpression *lhs = op_add->get_lhs_operand();
+ SgExpression *rhs = op_add->get_rhs_operand();
+
+ if (isSgIntVal(lhs))
+ size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value());
+ else if (isSgUnsignedIntVal(lhs))
+ size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value()
+ + (int) isSgUnsignedIntVal(rhs)->get_value();
+ else if (isSgUnsignedLongVal(lhs))
+ size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongLongIntVal(lhs))
+ size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ size_n_d = (int) (isSgLongIntVal(lhs)->get_value()
+ + isSgLongIntVal(rhs)->get_value());
+ else if (isSgUnsignedLongLongIntVal(lhs))
+ size_n_d =
+ (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongLongIntVal(rhs)->get_value());
+
+ }
+ t = isSgArrayType(t)->get_base_type();
+ while (isSgArrayType(t)) {
+ int dim;
+ if (isSgIntVal(isSgArrayType(t)->get_index()))
+ dim =
+ (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedLongVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgAddOp(isSgArrayType(t)->get_index())) {
+ SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
+
+ SgExpression *lhs = op_add->get_lhs_operand();
+ SgExpression *rhs = op_add->get_rhs_operand();
+
+ if (isSgIntVal(lhs))
+ dim = (int) isSgIntVal(lhs)->get_value()
+ + (int) (isSgIntVal(rhs)->get_value());
+ else if (isSgUnsignedIntVal(lhs))
+ dim = (int) isSgUnsignedIntVal(lhs)->get_value()
+ + (int) isSgUnsignedIntVal(rhs)->get_value();
+ else if (isSgUnsignedLongVal(lhs))
+ dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongLongIntVal(lhs))
+ dim = (int) (isSgLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ dim = (int) (isSgLongIntVal(lhs)->get_value()
+ + isSgLongIntVal(rhs)->get_value());
+ else if (isSgUnsignedLongLongIntVal(lhs))
+ dim =
+ (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongLongIntVal(rhs)->get_value());
+
+ }
+ size_n_d *= dim;
+ v.size_multi_dim.push_back(dim);
+ t = isSgArrayType(t)->get_base_type();
+ }
+
+ //v.size_2d = (int) (isSgIntVal(t->get_index())->get_value());
+
+ if (cudaDebug)
+ printf("Detected Multi-Dimensional array sized of %d for %s\n",
+ size_n_d, (char*) wo_refs[i]->name().c_str());
+ size = ocg->CreateInt(size_n_d);
+ } else if (it != array_dims.end()) {
+ int ref_size = it->second;
+ //size =
+ // ocg->CreateInt(
+ // isSgIntVal(
+ // isSgArrayType(outArray->get_type())->get_index())->get_value());
+ //v.size_2d = isSgArrayType(outArray->get_type())->get_rank();
+ //v.var_ref_size = ref_size;
+ size = ocg->CreateInt(ref_size);
+
+ } else {
+ if (dim1) {
+ size = ocg->CreateTimes(
+ new CG_roseRepr(isSgExpression(buildVarRefExp(dim1))),
+ new CG_roseRepr(isSgExpression(buildVarRefExp(dim2))));
+ } else {
+ char buf[1024];
+ sprintf(buf,
+ "CudaizeCodeGen: Array reference %s does not have a "
+ "detectable size or specififed dimentions",
+ name.c_str());
+ throw std::runtime_error(buf);
+ }
+ }
+
+ v.size_expr =
+ static_cast<CG_roseRepr*>(ocg->CreateTimes(size,
+ new omega::CG_roseRepr(
+ isSgExpression(buildSizeOfOp(v.type)))))->GetExpression();
+
+ v.in_data = 0;
+ v.out_data = outArray;
+ //Check for in ro_refs and remove it at this point
+ std::vector<IR_ArrayRef *>::iterator it_;
+ for (it_ = ro_refs.begin(); it_ != ro_refs.end(); it_++) {
+ if ((*it_)->name() == wo_refs[i]->name()) {
+ break;
+ }
+ }
+ if (it_ != ro_refs.end()) {
+ v.in_data = outArray;
+ ro_refs.erase(it_);
+ }
+
+ arrayVars.push_back(v);
+
+ }
+
+ //protonu-- assuming that all texture mapped memories were originally read only mems
+ //there should be safety checks for that, will implement those later
+
+ for (int i = 0; i < ro_refs.size(); i++) {
+ SgVariableSymbol* inArray = 0;
+ std::string name = ro_refs[i]->name();
+ inArray = body_symtab->find_variable(SgName((char*) name.c_str()));
+ if (inArray == NULL)
+ inArray = parameter_symtab->find_variable(
+ SgName((char*) name.c_str()));
+
+ VarDefs v;
+ v.size_multi_dim = std::vector<int>();
+ char buf[32];
+ snprintf(buf, 32, "devI%dPtr", i + 1);
+ v.name = buf;
+ int size_n_d;
+ if (isSgPointerType(inArray->get_type())) {
+ if (isSgArrayType(
+ isSgNode(
+ isSgPointerType(inArray->get_type())->get_base_type()))) {
+
+ SgType* t =
+ isSgPointerType(inArray->get_type())->get_base_type();
+
+ while (isSgArrayType(t))
+ t = isSgArrayType(t)->get_base_type();
+
+ if (!isSgType(t)) {
+ char buf[1024];
+ sprintf(buf, "CudaizeCodeGen: Array type undetected!");
+ throw std::runtime_error(buf);
+
+ }
+ v.type = t;
+ } else
+ v.type = isSgPointerType(inArray->get_type())->get_base_type();
+ } else if (isSgArrayType(inArray->get_type())) {
+ if (isSgArrayType(
+ isSgNode(
+ isSgArrayType(inArray->get_type())->get_base_type()))) {
+
+ SgType* t = inArray->get_type();
+ while (isSgArrayType(t))
+ t = isSgArrayType(t)->get_base_type();
+
+ if (!isSgType(t)) {
+ char buf[1024];
+ sprintf(buf, "CudaizeCodeGen: Array type undetected!");
+ throw std::runtime_error(buf);
+
+ }
+ v.type = t;
+ } else
+ v.type = isSgArrayType(inArray->get_type())->get_base_type();
+ }
+
+ else
+ v.type = buildFloatType();
+
+ v.tex_mapped = false;
+ v.cons_mapped = false;
+ v.original_name = ro_refs[i]->name();
+
+ //derick -- adding texture and constant mapping
+ if ( texture != NULL)
+ v.tex_mapped = (texture->is_array_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars
+ if (v.tex_mapped){
+ printf("this variable %s is mapped to texture memory", name.c_str());
+ }
+ //derick -- this is commented out until constant memory is implemeted
+ if ( constant_mem != NULL)
+ v.cons_mapped = (constant_mem->is_array_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars
+ if (v.cons_mapped){
+ printf("this variable %s is mapped to constant memory", name.c_str());
+ }
+
+ //Size of the array = dim1 * dim2 * num bytes of our array type
+ //If our input array is 2D (non-linearized), we want the actual
+ //dimentions of the array (as it might be less than cu_n
+ CG_outputRepr* size;
+ //Lookup in array_dims
+ std::map<std::string, int>::iterator it = array_dims.find(name.c_str());
+ if (isSgPointerType(inArray->get_type())
+ && isSgArrayType(
+ isSgPointerType(inArray->get_type())->get_base_type())) {
+ //SgArrayType* t = isSgArrayType(isSgArrayType(inArray->get_type())->get_base_type());
+ //v.size_2d = t->get_rank();
+ SgType* t = isSgPointerType(inArray->get_type())->get_base_type();
+ /* SgExprListExp* dimList = t->get_dim_info();
+ SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
+ SgExpression* expr=NULL;
+ for (; j != dimList->get_expressions().end(); j++)
+ expr = *j;
+ */
+ //v.size_2d = 1;
+ if (isSgIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedLongVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgAddOp(isSgArrayType(t)->get_index())) {
+ SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
+
+ SgExpression *lhs = op_add->get_lhs_operand();
+ SgExpression *rhs = op_add->get_rhs_operand();
+
+ if (isSgIntVal(lhs))
+ size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value());
+ else if (isSgUnsignedIntVal(lhs))
+ size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value()
+ + (int) isSgUnsignedIntVal(rhs)->get_value();
+ else if (isSgUnsignedLongVal(lhs))
+ size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongLongIntVal(lhs))
+ size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ size_n_d = (int) (isSgLongIntVal(lhs)->get_value()
+ + isSgLongIntVal(rhs)->get_value());
+ else if (isSgUnsignedLongLongIntVal(lhs))
+ size_n_d =
+ (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongLongIntVal(rhs)->get_value());
+
+ }
+ t = isSgArrayType(t)->get_base_type();
+ while (isSgArrayType(t)) {
+ int dim;
+ if (isSgIntVal(isSgArrayType(t)->get_index()))
+ dim =
+ (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedLongVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgAddOp(isSgArrayType(t)->get_index())) {
+ SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
+
+ SgExpression *lhs = op_add->get_lhs_operand();
+ SgExpression *rhs = op_add->get_rhs_operand();
+
+ if (isSgIntVal(lhs))
+ dim = (int) isSgIntVal(lhs)->get_value()
+ + (int) (isSgIntVal(rhs)->get_value());
+ else if (isSgUnsignedIntVal(lhs))
+ dim = (int) isSgUnsignedIntVal(lhs)->get_value()
+ + (int) isSgUnsignedIntVal(rhs)->get_value();
+ else if (isSgUnsignedLongVal(lhs))
+ dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongLongIntVal(lhs))
+ dim = (int) (isSgLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ dim = (int) (isSgLongIntVal(lhs)->get_value()
+ + isSgLongIntVal(rhs)->get_value());
+ else if (isSgUnsignedLongLongIntVal(lhs))
+ dim =
+ (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongLongIntVal(rhs)->get_value());
+
+ }
+ size_n_d *= dim;
+ v.size_multi_dim.push_back(dim);
+ t = isSgArrayType(t)->get_base_type();
+ }
+ if (cudaDebug)
+ printf("Detected Multi-dimensional array sized of %d for %s\n",
+ size_n_d, (char*) ro_refs[i]->name().c_str());
+ size = ocg->CreateInt(size_n_d);
+ } else if (isSgArrayType(inArray->get_type())
+ && isSgArrayType(
+ isSgArrayType(inArray->get_type())->get_base_type())) {
+ //SgArrayType* t = isSgArrayType(isSgArrayType(inArray->get_type())->get_base_type());
+ //v.size_2d = t->get_rank();
+ SgType* t = inArray->get_type();
+ /* SgExprListExp* dimList = t->get_dim_info();
+ SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
+ SgExpression* expr=NULL;
+ for (; j != dimList->get_expressions().end(); j++)
+ expr = *j;
+ */
+
+ if (isSgIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedLongVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d =
+ (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index()))
+ size_n_d = (int) (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgAddOp(isSgArrayType(t)->get_index())) {
+ SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
+
+ SgExpression *lhs = op_add->get_lhs_operand();
+ SgExpression *rhs = op_add->get_rhs_operand();
+
+ if (isSgIntVal(lhs))
+ size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value());
+ else if (isSgUnsignedIntVal(lhs))
+ size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value()
+ + (int) isSgUnsignedIntVal(rhs)->get_value();
+ else if (isSgUnsignedLongVal(lhs))
+ size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongLongIntVal(lhs))
+ size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ size_n_d = (int) (isSgLongIntVal(lhs)->get_value()
+ + isSgLongIntVal(rhs)->get_value());
+ else if (isSgUnsignedLongLongIntVal(lhs))
+ size_n_d =
+ (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongLongIntVal(rhs)->get_value());
+
+ }
+ t = isSgArrayType(t)->get_base_type();
+ while (isSgArrayType(t)) {
+ int dim;
+ if (isSgIntVal(isSgArrayType(t)->get_index()))
+ dim =
+ (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedLongVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
+ dim = (int) (isSgLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index()))
+ dim = (int) (isSgUnsignedLongLongIntVal(
+ isSgArrayType(t)->get_index())->get_value());
+ else if (isSgAddOp(isSgArrayType(t)->get_index())) {
+ SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
+
+ SgExpression *lhs = op_add->get_lhs_operand();
+ SgExpression *rhs = op_add->get_rhs_operand();
+
+ if (isSgIntVal(lhs))
+ dim = (int) isSgIntVal(lhs)->get_value()
+ + (int) (isSgIntVal(rhs)->get_value());
+ else if (isSgUnsignedIntVal(lhs))
+ dim = (int) isSgUnsignedIntVal(lhs)->get_value()
+ + (int) isSgUnsignedIntVal(rhs)->get_value();
+ else if (isSgUnsignedLongVal(lhs))
+ dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongLongIntVal(lhs))
+ dim = (int) (isSgLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongVal(rhs)->get_value());
+ else if (isSgLongIntVal(lhs))
+ dim = (int) (isSgLongIntVal(lhs)->get_value()
+ + isSgLongIntVal(rhs)->get_value());
+ else if (isSgUnsignedLongLongIntVal(lhs))
+ dim =
+ (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
+ + isSgUnsignedLongLongIntVal(rhs)->get_value());
+
+ }
+ size_n_d *= dim;
+ v.size_multi_dim.push_back(dim);
+ t = isSgArrayType(t)->get_base_type();
+ }
+ if (cudaDebug)
+ printf("Detected Multi-Dimensional array sized of %d for %s\n",
+ size_n_d, (char*) ro_refs[i]->name().c_str());
+ size = ocg->CreateInt(size_n_d);
+ }
+
+ else if (it != array_dims.end()) {
+ int ref_size = it->second;
+ // v.var_ref_size = ref_size;
+ size = ocg->CreateInt(ref_size);
+ } else {
+ if (dim1) {
+ size = ocg->CreateTimes(
+ new CG_roseRepr(isSgExpression(buildVarRefExp(dim1))),
+ new CG_roseRepr(isSgExpression(buildVarRefExp(dim2))));
+ } else {
+ char buf[1024];
+ sprintf(buf,
+ "CudaizeCodeGen: Array reference %s does not have a "
+ "detectable size or specififed dimentions",
+ name.c_str());
+ throw std::runtime_error(buf);
+ }
+ }
+ v.size_expr =
+ static_cast<CG_roseRepr*>(ocg->CreateTimes(size,
+ new omega::CG_roseRepr(
+ isSgExpression(buildSizeOfOp(v.type)))))->GetExpression();
+
+ v.in_data = inArray;
+ v.out_data = 0;
+ arrayVars.push_back(v);
+ }
+
+ if (arrayVars.size() < 2) {
+ fprintf(stderr,
+ "cudaize error: Did not find two arrays being accessed\n");
+ return false;
+ }
+
+ //protonu--debugging tool--the printf statement
+ //tex_mem_on signals use of tex mem
+ /* derick -- texmapping near malloc mcopy
+ for(int i=0; i<arrayVars.size(); i++)
+ {
+ //printf("var name %s, tex_mem used %s\n", arrayVars[i].name.c_str(), (arrayVars[i].tex_mapped)?"true":"false");
+ if (arrayVars[i].tex_mapped ) tex_mem_on ++;
+ //if (arrayVars[i].cons_mapped ) cons_mem_on ++;
+ }
+ */
+
+ //Add our mallocs (and input array memcpys)
+ for (int i = 0; i < arrayVars.size(); i++) {
+ if(arrayVars[i].cons_mapped) {
+ setupConstantVar(constant_mem, &arrayVars[i], globals, i, symtab);
+ SgStatementPtrList *tnl = new SgStatementPtrList;
+ cudaBindConstantVar(constant_mem, &arrayVars[i], globals, tnl);
+ setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl));
+ }
+ else {
+ SgVariableDeclaration* defn = buildVariableDeclaration(
+ SgName(arrayVars[i].name.c_str()),
+ buildPointerType(arrayVars[i].type));
+ SgInitializedNamePtrList& variables = defn->get_variables();
+ SgInitializedNamePtrList::const_iterator j = variables.begin();
+ SgInitializedName* initializedName = *j;
+ SgVariableSymbol* dvs = new SgVariableSymbol(initializedName);
+ prependStatement(defn, func_body);
+
+ dvs->set_parent(body_symtab);
+ body_symtab->insert(SgName(arrayVars[i].name.c_str()), dvs);
+
+// SgVariableSymbol* dvs = body_symtab->find_variable(SgName(arrayVars[i].name.c_str()));
+
+ // if(dvs == NULL)
+ // dvs = parameter_symtab->find_variable(SgName(arrayVars[i].name.c_str()));
+
+ //cudaMalloc args
+ // SgBasicBlock* block = buildBasicBlock();
+ SgName name_cuda_malloc("cudaMalloc");
+ SgFunctionDeclaration * decl_cuda_malloc =
+ buildNondefiningFunctionDeclaration(name_cuda_malloc,
+ buildVoidType(), buildFunctionParameterList(), globals);
+
+ SgName name_cuda_copy("cudaMemcpy");
+ SgFunctionDeclaration * decl_cuda_copy =
+ buildNondefiningFunctionDeclaration(name_cuda_copy,
+ buildVoidType(), buildFunctionParameterList(), globals);
+
+ SgExprListExp* args = buildExprListExp();
+ args->append_expression(
+ buildCastExp(buildAddressOfOp(buildVarRefExp(dvs)),
+ buildPointerType(buildPointerType(buildVoidType()))));
+ args->append_expression(arrayVars[i].size_expr);
+
+// decl_cuda_malloc->get_parameterList()->append_arg
+ SgFunctionCallExp *the_call = buildFunctionCallExp(
+ buildFunctionRefExp(decl_cuda_malloc), args);
+
+ SgExprStatement* stmt = buildExprStatement(the_call);
+
+ // (*replacement_list).push_back (stmt);
+
+ SgStatementPtrList* tnl = new SgStatementPtrList;
+ (*tnl).push_back(stmt);
+ setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl));
+ if (arrayVars[i].in_data) {
+
+ SgExprListExp * cuda_copy_in_args = buildExprListExp();
+ cuda_copy_in_args->append_expression(
+ isSgExpression(buildVarRefExp(dvs)));
+ cuda_copy_in_args->append_expression(
+ isSgExpression(buildVarRefExp(arrayVars[i].in_data)));
+ CG_roseRepr* size_exp = new CG_roseRepr(arrayVars[i].size_expr);
+ cuda_copy_in_args->append_expression(
+ static_cast<CG_roseRepr*>(size_exp->clone())->GetExpression());
+ cuda_copy_in_args->append_expression(
+ buildOpaqueVarRefExp("cudaMemcpyHostToDevice", globals));
+
+// cuda_copy_in_args->append_expression(
+// new SgVarRefExp(sourceLocation, )
+// );
+ SgFunctionCallExp * cuda_copy_in_func_call = buildFunctionCallExp(
+ buildFunctionRefExp(decl_cuda_copy), cuda_copy_in_args);
+
+ SgExprStatement* stmt = buildExprStatement(cuda_copy_in_func_call);
+
+ SgStatementPtrList *tnl = new SgStatementPtrList;
+ (*tnl).push_back(stmt);
+ setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl));
+
+ if(arrayVars[i].tex_mapped) {
+ setupTexmappingVar(texture, &arrayVars[i], globals, i, dvs, symtab);
+ SgStatementPtrList *tnl = new SgStatementPtrList;
+ cudaBindTexture(texture, &arrayVars[i], globals, tnl);
+ setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl));
+ }
+ }
+ }
+ }
+
+ //Build dimGrid dim3 variables based on loop dimentions and ti/tj
+ char blockD1[120];
+ char blockD2[120];
+ if (dim1) {
+ snprintf(blockD1, 120, "%s/%d",
+ dim1->get_declaration()->get_name().getString().c_str(), cu_tx);
+ snprintf(blockD2, 120, "%s/%d",
+ dim2->get_declaration()->get_name().getString().c_str(), cu_ty);
+ } else {
+ snprintf(blockD1, 120, "%d", cu_bx);
+ snprintf(blockD2, 120, "%d", cu_by);
+ //snprintf(blockD1, 120, "%d/%d", cu_nx, cu_tx);
+ //snprintf(blockD2, 120, "%d/%d", cu_ny, cu_ty);
+ }
+
+ SgInitializedName* arg1 = buildInitializedName("i", buildIntType());
+ SgInitializedName* arg2 = buildInitializedName("j", buildIntType());
+ SgInitializedName* arg3 = buildInitializedName("k", buildIntType());
+ SgName type_name("dim3");
+ //SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(type_name);
+
+ //ROSE_ASSERT(type_symbol != NULL);
+
+ //SgClassDeclaration * dim3classdecl = isSgClassDeclaration(
+ // type_symbol->get_declaration());
+
+ SgFunctionDeclaration * funcdecl = buildNondefiningFunctionDeclaration(
+ SgName("dim3"), buildOpaqueType("dim3", globalScope),
+ //isSgType(dim3classdecl->get_type()),
+ buildFunctionParameterList(arg1, arg2, arg3), globalScope);
+
+ if (cu_bx && cu_by)
+ repr = ocg->CreateDim3((const char*) gridName, ocg->CreateInt(cu_bx),
+ ocg->CreateInt(cu_by));
+ else if (cu_bx_repr && cu_by_repr)
+ repr = ocg->CreateDim3((const char*) gridName, cu_bx_repr, cu_by_repr);
+ else if (cu_bx_repr)
+ repr = ocg->CreateDim3((const char*) gridName, cu_bx_repr,
+ ocg->CreateInt(1));
+ setup_code = ocg->StmtListAppend(setup_code, repr);
+ //SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList();
+
+ //for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++)
+ // (*replacement_list).push_back (*it);
+
+ // repr = ocg->CreateDim3((const char*)blockName, cu_tx,cu_ty);
+
+ if (cu_tz > 1 || cu_tz_repr) {
+
+ if (cu_tx && cu_ty && cu_tz)
+ repr = ocg->CreateDim3((char*) blockName, ocg->CreateInt(cu_tx),
+ ocg->CreateInt(cu_ty), ocg->CreateInt(cu_tz));
+ else if (cu_tx_repr && cu_ty_repr && cu_tz_repr)
+ repr = ocg->CreateDim3((char*) blockName, cu_tx_repr, cu_ty_repr,
+ cu_tz_repr);
+ // SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList();
+
+ // for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++)
+ // (*replacement_list).push_back (*it);
+
+ } else {
+ if (cu_tx && cu_ty)
+ repr = ocg->CreateDim3((char*) blockName, ocg->CreateInt(cu_tx),
+ ocg->CreateInt(cu_ty));
+ else if (cu_tx_repr && cu_ty_repr)
+ repr = ocg->CreateDim3((char*) blockName, cu_tx_repr, cu_ty_repr);
+ //SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList();
+
+ //for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++)
+ // (*replacement_list).push_back (*it);
+
+ }
+
+ setup_code = ocg->StmtListAppend(setup_code, repr);
+
+ SgCudaKernelExecConfig* config = new SgCudaKernelExecConfig(
+ buildVarRefExp(gridName), buildVarRefExp(blockName), NULL, NULL);
+ //SgCudaKernelExecConfig* config = new SgCudaKernelExecConfig(buildIntVal(cu_bx), , NULL, NULL);
+ SgExprListExp* iml = new SgExprListExp();
+ SgCastExp* dim_s;
+
+ //Creating Kernel function
+ SgBasicBlock* bb = new SgBasicBlock(TRANSFORMATION_FILE_INFO);
+ SgFunctionDefinition* kernel_defn = new SgFunctionDefinition(
+ TRANSFORMATION_FILE_INFO, bb);
+ SgFunctionDeclaration* kernel_decl_ = new SgFunctionDeclaration(
+ TRANSFORMATION_FILE_INFO, SgName((char*)cu_kernel_name.c_str()),buildFunctionType(buildVoidType(), buildFunctionParameterList()), kernel_defn);
+ SgFunctionDeclaration* kernel_decl = new SgFunctionDeclaration(
+ TRANSFORMATION_FILE_INFO, SgName((char*)cu_kernel_name.c_str()),buildFunctionType(buildVoidType(), buildFunctionParameterList()), kernel_defn);
+
+ //((kernel_decl->get_declarationModifier()).get_storageModifier()).setStatic();
+
+ kernel_decl->set_definingDeclaration(kernel_decl);
+ kernel_defn->set_parent(kernel_decl);
+ bb->set_parent(kernel_defn);
+ bb->set_endOfConstruct(TRANSFORMATION_FILE_INFO);
+ bb->get_endOfConstruct()->set_parent(bb);
+
+ //SgFunctionSymbol* functionSymbol = new SgFunctionSymbol(kernel_decl_);
+ //globals->insert_symbol(SgName((char*) cu_kernel_name.c_str()),
+ // functionSymbol);
+ SgFunctionSymbol* functionSymbol2 = new SgFunctionSymbol(kernel_decl);
+
+ globals->insert_symbol(SgName((char*) cu_kernel_name.c_str()),
+ functionSymbol2);
+
+ kernel_decl_->set_parent(globals);
+
+ kernel_decl_->set_scope(globals);
+
+ kernel_decl_->setForward();
+
+ globals->prepend_declaration(kernel_decl_);
+
+ kernel_decl->set_endOfConstruct(TRANSFORMATION_FILE_INFO);
+ kernel_decl->get_endOfConstruct()->set_parent(kernel_decl);
+
+ kernel_decl->set_parent(globals);
+ kernel_decl->set_scope(globals);
+
+ kernel_decl->get_definition()->set_endOfConstruct(TRANSFORMATION_FILE_INFO);
+ kernel_decl->get_definition()->get_endOfConstruct()->set_parent(
+ kernel_decl->get_definition());
+
+ globals->append_statement(kernel_decl);
+
+ //printf("%s %s\n", static_cast<const char*>(cu_kernel_name), dims);
+ //--derick - kernel function parameters
+ for (int i = 0; i < arrayVars.size(); i++)
+ //Throw in a type cast if our kernel takes 2D array notation
+ //like (float(*) [1024])
+ {
+ //protonu--throwing in another hack to stop the caller from passing tex mapped
+ //vars to the kernel.
+ if (arrayVars[i].tex_mapped == true || arrayVars[i].cons_mapped)
+ continue;
+ if (!(arrayVars[i].size_multi_dim.empty())) {
+ //snprintf(dims,120,"(float(*) [%d])%s", arrayVars[i].size_2d,
+ // const_cast<char*>(arrayVars[i].name.c_str()));
+
+ SgType* t = arrayVars[i].type;
+ for (int k = arrayVars[i].size_multi_dim.size() - 1; k >= 0; k--) {
+ t = buildArrayType(t,
+ buildIntVal(arrayVars[i].size_multi_dim[k]));
+ }
+ SgVariableSymbol* temp = body_symtab->find_variable(
+ SgName((char*) arrayVars[i].name.c_str()));
+ if (temp == NULL)
+ temp = parameter_symtab->find_variable(
+ SgName((char*) arrayVars[i].name.c_str()));
+
+ dim_s = buildCastExp(buildVarRefExp(temp), buildPointerType(t),
+ SgCastExp::e_C_style_cast);
+
+ //printf("%d %s\n", i, dims);
+ iml->append_expression(dim_s);
+
+ SgInitializedName* id = buildInitializedName(
+ (char*) arrayVars[i].original_name.c_str(),
+ buildPointerType(t));
+ kernel_decl->get_parameterList()->append_arg(id);
+ kernel_decl_->get_parameterList()->append_arg(id);
+ id->set_file_info(TRANSFORMATION_FILE_INFO);
+
+ // DQ (9/8/2007): We now test this, so it has to be set explicitly.
+ id->set_scope(kernel_decl->get_definition());
+
+ // DQ (9/8/2007): Need to add variable symbol to global scope!
+ //printf ("Fixing up the symbol table in scope = %p = %s for SgInitializedName = %p = %s \n",globalScope,globalScope->class_name().c_str(),var1_init_name,var1_init_name->get_name().str());
+ SgVariableSymbol *var_symbol = new SgVariableSymbol(id);
+ kernel_decl->get_definition()->insert_symbol(id->get_name(),
+ var_symbol);
+
+ // if(kernel_decl->get_definition()->get_symbol_table()->find((const) id) == NULL)
+
+ } else {
+ //printf("%d %s\n", i, static_cast<const char*>(arrayVars[i].name));
+ SgVariableSymbol* temp = body_symtab->find_variable(
+ SgName((char*) arrayVars[i].name.c_str()));
+ if (temp == NULL)
+ temp = parameter_symtab->find_variable(
+ SgName((char*) arrayVars[i].name.c_str()));
+ iml->append_expression(buildVarRefExp(temp));
+ SgInitializedName* id = buildInitializedName(
+ (char*) arrayVars[i].original_name.c_str(),
+ buildPointerType(arrayVars[i].type));
+ kernel_decl->get_parameterList()->append_arg(id);
+ kernel_decl_->get_parameterList()->append_arg(id);
+ id->set_file_info(TRANSFORMATION_FILE_INFO);
+
+ // DQ (9/8/2007): We now test this, so it has to be set explicitly.
+ id->set_scope(kernel_decl->get_definition());
+
+ // DQ (9/8/2007): Need to add variable symbol to global scope!
+ //printf ("Fixing up the symbol table in scope = %p = %s for SgInitializedName = %p = %s \n"$
+ SgVariableSymbol *var_symbol = new SgVariableSymbol(id);
+ kernel_decl->get_definition()->insert_symbol(id->get_name(),
+ var_symbol);
+
+ }
+
+ }
+ if (dim1) {
+ iml->append_expression(buildVarRefExp(dim1));
+ SgInitializedName* id = buildInitializedName(
+ dim1->get_name().getString().c_str(), dim1->get_type());
+ kernel_decl->get_parameterList()->append_arg(id);
+
+ iml->append_expression(buildVarRefExp(dim2));
+ SgInitializedName* id2 = buildInitializedName(
+ dim2->get_name().getString().c_str(), dim2->get_type());
+
+ kernel_decl->get_parameterList()->append_arg(id);
+ kernel_decl_->get_parameterList()->append_arg(id);
+ }
+
+ kernel_decl->get_functionModifier().setCudaKernel();
+ kernel_decl_->get_functionModifier().setCudaKernel();
+ SgCudaKernelCallExp * cuda_call_site = new SgCudaKernelCallExp(
+ TRANSFORMATION_FILE_INFO, buildFunctionRefExp(kernel_decl), iml,config);
+
+ // SgStatementPtrList *tnl2 = new SgStatementPtrList;
+
+ (*replacement_list).push_back(buildExprStatement(cuda_call_site));
+
+ setup_code = ocg->StmtListAppend(setup_code,
+ new CG_roseRepr(replacement_list));
+
+ //cuda free variables
+ for (int i = 0; i < arrayVars.size(); i++) {
+ if (arrayVars[i].out_data) {
+
+ SgName name_cuda_copy("cudaMemcpy");
+ SgFunctionDeclaration * decl_cuda_copyout =
+ buildNondefiningFunctionDeclaration(name_cuda_copy,
+ buildVoidType(), buildFunctionParameterList(),
+ globals);
+
+ SgExprListExp* args = buildExprListExp();
+ SgExprListExp * cuda_copy_out_args = buildExprListExp();
+ cuda_copy_out_args->append_expression(
+ isSgExpression(buildVarRefExp(arrayVars[i].out_data)));
+ cuda_copy_out_args->append_expression(
+ isSgExpression(buildVarRefExp(arrayVars[i].name)));
+ CG_roseRepr* size_exp = new CG_roseRepr(arrayVars[i].size_expr);
+ cuda_copy_out_args->append_expression(
+ static_cast<CG_roseRepr*>(size_exp->clone())->GetExpression());
+ cuda_copy_out_args->append_expression(
+ buildOpaqueVarRefExp("cudaMemcpyDeviceToHost", globals));
+
+// cuda_copy_in_args->append_expression(
+// new SgVarRefExp(sourceLocation, )
+// );
+ SgFunctionCallExp * cuda_copy_out_func_call = buildFunctionCallExp(
+ buildFunctionRefExp(decl_cuda_copyout), cuda_copy_out_args);
+
+ SgFunctionCallExp *the_call = buildFunctionCallExp(
+ buildFunctionRefExp(decl_cuda_copyout), cuda_copy_out_args);
+
+ SgExprStatement* stmt = buildExprStatement(the_call);
+
+ SgStatementPtrList* tnl3 = new SgStatementPtrList;
+
+ (*tnl3).push_back(stmt);
+
+ // tree_node_list* tnl = new tree_node_list;
+ // tnl->append(new tree_instr(the_call));
+ setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl3));
+
+ }
+ if(!arrayVars[i].cons_mapped) {
+ SgName name_cuda_free("cudaFree");
+ SgFunctionDeclaration * decl_cuda_free =
+ buildNondefiningFunctionDeclaration(name_cuda_free,
+ buildVoidType(), buildFunctionParameterList(), globals);
+
+ SgExprListExp* args3 = buildExprListExp();
+
+ SgVariableSymbol* tmp = body_symtab->find_variable(
+ SgName(arrayVars[i].name.c_str()));
+ if (tmp == NULL)
+ tmp = parameter_symtab->find_variable(
+ SgName(arrayVars[i].name.c_str()));
+
+ args3->append_expression(buildVarRefExp(tmp));
+
+ SgFunctionCallExp *the_call2 = buildFunctionCallExp(
+ buildFunctionRefExp(decl_cuda_free), args3);
+
+ SgExprStatement* stmt2 = buildExprStatement(the_call2);
+
+ SgStatementPtrList* tnl4 = new SgStatementPtrList;
+
+ (*tnl4).push_back(stmt2);
+ //(*replacement_list).push_back (stmt2);
+
+ setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl4));
+ }
+ }
+
+ // ---------------
+ // BUILD THE KERNEL
+ // ---------------
+
+ //Extract out kernel body
+ SgNode* code = getCode();
+ //Create kernel function body
+ //Add Params
+ std::map<std::string, SgVariableSymbol*> loop_vars;
+ //In-Out arrays
+ for (int i = 0; i < arrayVars.size(); i++) {
+ /* if(arrayVars[i].in_data)
+ fptr = arrayVars[i].in_data->type()->clone();
+ else
+ fptr = arrayVars[i].out_data->type()->clone();
+ */
+
+ // fptr = new_proc_syms->install_type(fptr);
+ std::string name =
+ arrayVars[i].in_data ?
+ arrayVars[i].in_data->get_declaration()->get_name().getString() :
+ arrayVars[i].out_data->get_declaration()->get_name().getString();
+ //SgVariableSymbol* sym = new var_sym(fptr, arrayVars[i].in_data ? arrayVars[i].in_data->name() : arrayVars[i].out_data->name());
+
+ SgVariableSymbol* sym =
+ kernel_decl->get_definition()->get_symbol_table()->find_variable(
+ (const char*) name.c_str());
+ /* SgVariableDeclaration* defn = buildVariableDeclaration(SgName(name.c_str()), buildFloatType());
+ SgInitializedNamePtrList& variables = defn->get_variables();
+ SgInitializedNamePtrList::const_iterator i = variables.begin();
+ SgInitializedName* initializedName = *i;
+ SgVariableSymbol* sym = new SgVariableSymbol(initializedName);
+ prependStatement(defn, isSgScopeStatement(root_));
+
+ vs->set_parent(symtab2_);
+ symtab2_->insert(SgName(_s.c_str()), vs);
+ */
+
+ if (sym != NULL)
+ loop_vars.insert(
+ std::pair<std::string, SgVariableSymbol*>(std::string(name),
+ sym));
+ }
+
+ //Figure out which loop variables will be our thread and block dimention variables
+ std::vector<SgVariableSymbol *> loop_syms;
+ //Get our indexes
+ std::vector<const char*> indexes; // = get_loop_indexes(code,cu_num_reduce);
+ int threadsPos = 0;
+
+ CG_outputRepr *body = NULL;
+ SgFunctionDefinition* func_d = func_definition;
+ //std::vector<SgVariableSymbol *> symbols = recursiveFindRefs(code);
+
+ SgName name_sync("__syncthreads");
+ SgFunctionDeclaration * decl_sync = buildNondefiningFunctionDeclaration(
+ name_sync, buildVoidType(), buildFunctionParameterList(),
+ globalScope);
+
+ recursiveFindRefs(code, syms, func_d);
+
+ //SgFunctionDeclaration* func = Outliner::generateFunction (code, (char*)cu_kernel_name.c_str(), syms, pdSyms, psyms, NULL, globalScope);
+
+ if (cu_bx > 1 || cu_bx_repr) {
+ indexes.push_back("bx");
+ SgName type_name("blockIdx.x");
+ SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(
+ type_name);
+ SgVariableDeclaration * var_decl = buildVariableDeclaration("bx",
+ buildIntType(), NULL,
+ isSgScopeStatement(kernel_decl->get_definition()->get_body()));
+ SgStatementPtrList *tnl = new SgStatementPtrList;
+ // (*tnl).push_back(isSgStatement(var_decl));
+ appendStatement(var_decl, kernel_decl->get_definition()->get_body());
+
+ SgVariableSymbol* bx =
+ kernel_decl->get_definition()->get_body()->lookup_variable_symbol(
+ SgName("bx"));
+ SgStatement* assign = isSgStatement(
+ buildAssignStatement(buildVarRefExp(bx),
+ buildOpaqueVarRefExp("blockIdx.x",
+ kernel_decl->get_definition()->get_body())));
+ (*tnl).push_back(assign);
+ // body = ocg->StmtListAppend(body,
+ // new CG_roseRepr(tnl));
+ appendStatement(assign, kernel_decl->get_definition()->get_body());
+
+ }
+ if (cu_by > 1 || cu_by_repr) {
+ indexes.push_back("by");
+ SgName type_name("blockIdx.y");
+ SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(
+ type_name);
+ SgVariableDeclaration * var_decl = buildVariableDeclaration("by",
+ buildIntType(), NULL,
+ isSgScopeStatement(kernel_decl->get_definition()->get_body()));
+ // SgStatementPtrList *tnl = new SgStatementPtrList;
+ // (*tnl).push_back(isSgStatement(var_decl));
+ appendStatement(var_decl, kernel_decl->get_definition()->get_body());
+
+ SgVariableSymbol* by =
+ kernel_decl->get_definition()->get_body()->lookup_variable_symbol(
+ SgName("by"));
+ SgStatement* assign = isSgStatement(
+ buildAssignStatement(buildVarRefExp(by),
+ buildOpaqueVarRefExp("blockIdx.y",
+ kernel_decl->get_definition()->get_body())));
+ //(*tnl).push_back(assign);
+ // body = ocg->StmtListAppend(body,
+ // new CG_roseRepr(tnl));
+ appendStatement(assign, kernel_decl->get_definition()->get_body());
+
+ }
+ if (cu_tx_repr || cu_tx > 1) {
+ threadsPos = indexes.size();
+ indexes.push_back("tx");
+ SgName type_name("threadIdx.x");
+ SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(
+ type_name);
+ SgVariableDeclaration * var_decl = buildVariableDeclaration("tx",
+ buildIntType(), NULL,
+ isSgScopeStatement(kernel_decl->get_definition()->get_body()));
+ // SgStatementPtrList *tnl = new SgStatementPtrList;
+ // (*tnl).push_back(isSgStatement(var_decl));
+ appendStatement(var_decl, kernel_decl->get_definition()->get_body());
+
+ SgVariableSymbol* tx =
+ kernel_decl->get_definition()->get_body()->lookup_variable_symbol(
+ SgName("tx"));
+ SgStatement* assign = isSgStatement(
+ buildAssignStatement(buildVarRefExp(tx),
+ buildOpaqueVarRefExp("threadIdx.x",
+ kernel_decl->get_definition()->get_body())));
+ //(*tnl).push_back(assign);
+ // body = ocg->StmtListAppend(body,
+ // new CG_roseRepr(tnl));
+ appendStatement(assign, kernel_decl->get_definition()->get_body());
+
+ }
+ if (cu_ty_repr || cu_ty > 1) {
+ indexes.push_back("ty");
+ SgName type_name("threadIdx.y");
+ SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(
+ type_name);
+ SgVariableDeclaration * var_decl = buildVariableDeclaration("ty",
+ buildIntType(), NULL,
+ isSgScopeStatement(kernel_decl->get_definition()->get_body()));
+ appendStatement(var_decl, kernel_decl->get_definition()->get_body());
+
+ // SgStatementPtrList *tnl = new SgStatementPtrList;
+ // (*tnl).push_back(isSgStatement(var_decl));
+ SgVariableSymbol* ty =
+ kernel_decl->get_definition()->get_body()->lookup_variable_symbol(
+ SgName("ty"));
+ SgStatement* assign = isSgStatement(
+ buildAssignStatement(buildVarRefExp(ty),
+ buildOpaqueVarRefExp("threadIdx.y",
+ kernel_decl->get_definition()->get_body())));
+ // (*tnl).push_back(assign);
+ // body = ocg->StmtListAppend(body,
+ // new CG_roseRepr(tnl));
+ appendStatement(assign, kernel_decl->get_definition()->get_body());
+
+ }
+ if (cu_tz_repr || cu_tz > 1) {
+ indexes.push_back("tz");
+ SgName type_name("threadIdx.z");
+ SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(
+ type_name);
+ SgVariableDeclaration * var_decl = buildVariableDeclaration("tz",
+ buildIntType(), NULL,
+ isSgScopeStatement(kernel_decl->get_definition()->get_body()));
+ // SgStatementPtrList *tnl = new SgStatementPtrList;
+ // (*tnl).push_back(isSgStatement(var_decl));
+ appendStatement(var_decl, kernel_decl->get_definition()->get_body());
+
+ SgVariableSymbol* tz =
+ kernel_decl->get_definition()->get_body()->lookup_variable_symbol(
+ "tz");
+ SgStatement* assign = isSgStatement(
+ buildAssignStatement(buildVarRefExp(tz),
+ buildOpaqueVarRefExp("threadIdx.z",
+ kernel_decl->get_definition()->get_body())));
+ // (*tnl).push_back(assign);
+ // body = ocg->StmtListAppend(body,
+ // new CG_roseRepr(tnl));
+ appendStatement(assign, kernel_decl->get_definition()->get_body());
+
+ }
+
+ std::map<std::string, SgVariableSymbol*> loop_idxs; //map from idx names to their new syms
+
+ SgNode* swapped_ = swapVarReferences(code, syms,
+ kernel_decl->get_definition()->get_symbol_table(),
+ kernel_decl->get_definition()->get_body()->get_symbol_table(),
+ kernel_decl->get_definition()->get_body());
+
+ //std::cout << swapped_->unparseToString() << std::endl << std::endl;
+
+ SgNode *swapped = recursiveFindReplacePreferedIdxs(swapped_,
+ kernel_decl->get_definition()->get_body()->get_symbol_table(),
+ kernel_decl->get_definition()->get_symbol_table(),
+ kernel_decl->get_definition()->get_body(), loop_idxs, globalScope); //in-place swapping
+ //swapped->print();
+
+ if (!isSgBasicBlock(swapped)) {
+ appendStatement(isSgStatement(swapped),
+ kernel_decl->get_definition()->get_body());
+ swapped->set_parent(
+ isSgNode(kernel_decl->get_definition()->get_body()));
+ } else {
+
+ for (SgStatementPtrList::iterator it =
+ isSgBasicBlock(swapped)->get_statements().begin();
+ it != isSgBasicBlock(swapped)->get_statements().end(); it++) {
+ appendStatement(*it, kernel_decl->get_definition()->get_body());
+ (*it)->set_parent(
+ isSgNode(kernel_decl->get_definition()->get_body()));
+
+ }
+
+ }
+
+ for (int i = 0; i < indexes.size(); i++) {
+ std::vector<SgForStatement*> tfs = findCommentedFors(indexes[i],
+ swapped);
+ for (int k = 0; k < tfs.size(); k++) {
+ //printf("replacing %p tfs for index %s\n", tfs[k], indexes[i]);
+ SgNode* newBlock = forReduce(tfs[k], loop_idxs[indexes[i]],
+ kernel_decl->get_definition());
+ //newBlock->print();
+ swap_node_for_node_list(tfs[k], newBlock);
+ //printf("AFTER SWAP\n"); newBlock->print();
+ }
+ }
+
+ //--derick replace array refs of texture mapped vars here
+ body = new CG_roseRepr(kernel_decl->get_definition()->get_body());
+ std::vector<IR_ArrayRef*> refs = ir->FindArrayRef(body);
+ texmapArrayRefs(texture, &refs, globals, ir, ocg);
+ // do the same for constant mapped vars
+ consmapArrayRefs(constant_mem, &refs, globals, ir, ocg);
+
+ return swapped;
+}
+
+//Order taking out dummy variables
+std::vector<std::string> cleanOrder(std::vector<std::string> idxNames) {
+ std::vector<std::string> results;
+ for (int j = 0; j < idxNames.size(); j++) {
+ if (idxNames[j].length() != 0)
+ results.push_back(idxNames[j]);
+ }
+ return results;
+}
+
+//First non-dummy level in ascending order
+int LoopCuda::nonDummyLevel(int stmt, int level) {
+ //level comes in 1-basd and should leave 1-based
+ for (int j = level - 1; j < idxNames[stmt].size(); j++) {
+ if (idxNames[stmt][j].length() != 0) {
+ //printf("found non dummy level of %d with idx: %s when searching for %d\n", j+1, (const char*) idxNames[stmt][j], level);
+ return j + 1;
+ }
+ }
+ char buf[128];
+ sprintf(buf, "%d", level);
+ throw std::runtime_error(
+ std::string("Unable to find a non-dummy level starting from ")
+ + std::string(buf));
+}
+
+int LoopCuda::findCurLevel(int stmt, std::string idx) {
+ for (int j = 0; j < idxNames[stmt].size(); j++) {
+ if (strcmp(idxNames[stmt][j].c_str(), idx.c_str()) == 0)
+ return j + 1;
+ }
+ throw std::runtime_error(
+ std::string("Unable to find index ") + idx
+ + std::string(" in current list of indexes"));
+}
+
+void LoopCuda::permute_cuda(int stmt,
+ const std::vector<std::string>& curOrder) {
+ //printf("curOrder: ");
+ //printVs(curOrder);
+ //printf("idxNames: ");
+ //printVS(idxNames[stmt]);
+ std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt]);
+ bool same = true;
+ std::vector<int> pi;
+ for (int i = 0; i < curOrder.size(); i++) {
+ bool found = false;
+ for (int j = 0; j < cIdxNames.size(); j++) {
+ if (strcmp(cIdxNames[j].c_str(), curOrder[i].c_str()) == 0) {
+ pi.push_back(j + 1);
+ found = true;
+ if (j != i)
+ same = false;
+ }
+ }
+ if (!found) {
+ throw std::runtime_error(
+ "One of the indexes in the permute order were not "
+ "found in the current set of indexes.");
+ }
+ }
+ for (int i = curOrder.size(); i < cIdxNames.size(); i++) {
+ pi.push_back(i);
+ }
+ if (same)
+ return;
+ permute(stmt, pi);
+ //Set old indexe names as new
+ for (int i = 0; i < curOrder.size(); i++) {
+ idxNames[stmt][i] = curOrder[i].c_str(); //what about sibling stmts?
+ }
+}
+
+bool LoopCuda::permute(int stmt_num, const std::vector<int> &pi) {
+// check for sanity of parameters
+ if (stmt_num >= stmt.size() || stmt_num < 0)
+ throw std::invalid_argument("invalid statement " + to_string(stmt_num));
+ const int n = stmt[stmt_num].xform.n_out();
+ if (pi.size() > (n - 1) / 2)
+ throw std::invalid_argument(
+ "iteration space dimensionality does not match permute dimensionality");
+ int first_level = 0;
+ int last_level = 0;
+ for (int i = 0; i < pi.size(); i++) {
+ if (pi[i] > (n - 1) / 2 || pi[i] <= 0)
+ throw std::invalid_argument(
+ "invalid loop level " + to_string(pi[i])
+ + " in permuation");
+
+ if (pi[i] != i + 1) {
+ if (first_level == 0)
+ first_level = i + 1;
+ last_level = i + 1;
+ }
+ }
+ if (first_level == 0)
+ return true;
+
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ std::set<int> active = getStatements(lex, 2 * first_level - 2);
+ Loop::permute(active, pi);
+}
+
+void LoopCuda::tile_cuda(int stmt, int level, int outer_level) {
+ tile_cuda(stmt, level, 1, outer_level, "", "", CountedTile);
+}
+void LoopCuda::tile_cuda(int level, int tile_size, int outer_level,
+ std::string idxName, std::string ctrlName, TilingMethodType method) {
+ tile_cuda(0, level, tile_size, outer_level, idxName, ctrlName, method);
+}
+
+void LoopCuda::tile_cuda(int stmt, int level, int tile_size, int outer_level,
+ std::string idxName, std::string ctrlName, TilingMethodType method) {
+ //Do regular tile but then update the index and control loop variable
+ //names as well as the idxName to reflect the current state of things.
+ //printf("tile(%d,%d,%d,%d)\n", stmt, level, tile_size, outer_level);
+ //printf("idxNames before: ");
+ //printVS(idxNames[stmt]);
+
+ tile(stmt, level, tile_size, outer_level, method);
+
+ if (idxName.size())
+ idxNames[stmt][level - 1] = idxName.c_str();
+ if (tile_size == 1) {
+ //potentially rearrange loops
+ if (outer_level < level) {
+ std::string tmp = idxNames[stmt][level - 1];
+ for (int i = level - 1; i > outer_level - 1; i--) {
+ if (i - 1 >= 0)
+ idxNames[stmt][i] = idxNames[stmt][i - 1];
+ }
+ idxNames[stmt][outer_level - 1] = tmp;
+ }
+ //TODO: even with a tile size of one, you need a insert (of a dummy loop)
+ idxNames[stmt].insert(idxNames[stmt].begin() + (level), "");
+ } else {
+ if (!ctrlName.size())
+ throw std::runtime_error("No ctrl loop name for tile");
+ //insert
+ idxNames[stmt].insert(idxNames[stmt].begin() + (outer_level - 1),
+ ctrlName.c_str());
+ }
+
+ //printf("idxNames after: ");
+ //printVS(idxNames[stmt]);
+}
+
+bool LoopCuda::datacopy_privatized_cuda(int stmt_num, int level,
+ const std::string &array_name,
+ const std::vector<int> &privatized_levels, bool allow_extra_read,
+ int fastest_changing_dimension, int padding_stride,
+ int padding_alignment, bool cuda_shared) {
+ int old_stmts = stmt.size();
+ // printf("before datacopy_privatized:\n");
+ printIS();
+ //datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, cuda_shared);
+ if (cuda_shared)
+ datacopy_privatized(stmt_num, level, array_name, privatized_levels,
+ allow_extra_read, fastest_changing_dimension, padding_stride,
+ padding_alignment, 1);
+ else
+ datacopy_privatized(stmt_num, level, array_name, privatized_levels,
+ allow_extra_read, fastest_changing_dimension, padding_stride,
+ padding_alignment, 0);
+ // printf("after datacopy_privatized:\n");
+ printIS();
+
+ //Adjust idxNames to reflect updated state
+ std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]);
+ int new_stmts = stmt.size();
+ for (int i = old_stmts; i < new_stmts; i++) {
+ //printf("fixing up statement %d\n", i);
+ std::vector<std::string> idxs;
+
+ //protonu-making sure the vector of nonSplitLevels grows along with
+ //the statement structure
+ stmt_nonSplitLevels.push_back(std::vector<int>());
+
+ //Indexes up to level will be the same
+ for (int j = 0; j < level - 1; j++)
+ idxs.push_back(cIdxNames[j]);
+
+ //Expect privatized_levels to match
+ for (int j = 0; j < privatized_levels.size(); j++)
+ idxs.push_back(cIdxNames[privatized_levels[j] - 1]);//level is one-based
+
+ //all further levels should match order they are in originally
+ if (privatized_levels.size()) {
+ int last_privatized = privatized_levels.back();
+ int top_level = last_privatized
+ + (stmt[i].IS.n_set() - idxs.size());
+ //printf("last privatized_levels: %d top_level: %d\n", last_privatized, top_level);
+ for (int j = last_privatized; j < top_level; j++) {
+ idxs.push_back(cIdxNames[j]);
+ //printf("pushing back: %s\n", (const char*)cIdxNames[j]);
+ }
+ }
+ idxNames.push_back(idxs);
+ }
+}
+
+bool LoopCuda::datacopy_cuda(int stmt_num, int level,
+ const std::string &array_name,
+ const std::vector<std::string> new_idxs,
+ bool allow_extra_read, int fastest_changing_dimension,
+ int padding_stride, int padding_alignment, bool cuda_shared) {
+
+ int old_stmts = stmt.size();
+ //datacopy(stmt_num,level,array_name,allow_extra_read,fastest_changing_dimension,padding_stride,padding_alignment,cuda_shared);
+ // printf("before datacopy:\n");
+ // printIS();
+ if (cuda_shared)
+ datacopy(stmt_num, level, array_name, allow_extra_read,
+ fastest_changing_dimension, padding_stride, padding_alignment,
+ 1);
+ else
+ datacopy(stmt_num, level, array_name, allow_extra_read,
+ fastest_changing_dimension, padding_stride, padding_alignment,
+ 0);
+ // printf("after datacopy:\n");
+ printIS();
+
+ //Adjust idxNames to reflect updated state
+ std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]);
+ int new_stmts = stmt.size();
+ for (int i = old_stmts; i < new_stmts; i++) {
+ //printf("fixing up statement %d\n", i);
+ std::vector<std::string> idxs;
+
+ //protonu-making sure the vector of nonSplitLevels grows along with
+ //the statement structure
+ stmt_nonSplitLevels.push_back(std::vector<int>());
+
+ //Indexes up to level will be the same
+ for (int j = 0; j < level - 1; j++)
+ idxs.push_back(cIdxNames[j]);
+
+ //all further levels should get names from new_idxs
+ int top_level = stmt[i].IS.n_set();
+ //printf("top_level: %d level: %d\n", top_level, level);
+ if (new_idxs.size() < top_level - level + 1)
+ throw std::runtime_error(
+ "Need more new index names for new datacopy loop levels");
+
+ for (int j = level - 1; j < top_level; j++) {
+ idxs.push_back(new_idxs[j - level + 1].c_str());
+ //printf("pushing back: %s\n", new_idxs[j-level+1].c_str());
+ }
+ idxNames.push_back(idxs);
+ }
+}
+
+bool LoopCuda::unroll_cuda(int stmt_num, int level, int unroll_amount) {
+ int old_stmts = stmt.size();
+ //bool b= unroll(stmt_num, , unroll_amount);
+
+ int dim = 2 * level - 1;
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ std::set<int> same_loop = getStatements(lex, dim - 1);
+
+ level = nonDummyLevel(stmt_num, level);
+ //printf("unrolling %d at level %d\n", stmt_num,level);
+
+ //protonu--using the new version of unroll, which returns
+ //a set of ints instead of a bool. To keep Gabe's logic
+ //I'll check the size of the set, if it's 0 return true
+ //bool b= unroll(stmt_num, level, unroll_amount);
+ std::set<int> b_set = unroll(stmt_num, level, unroll_amount, idxNames);
+ bool b = false;
+ if (b_set.size() == 0)
+ b = true;
+ //end--protonu
+
+ //Adjust idxNames to reflect updated state
+ std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]);
+ std::vector<std::string> origSource = idxNames[stmt_num];
+ ;
+ //Drop index names at level
+ if (unroll_amount == 0) {
+ //For all statements that were in this unroll together, drop index name for unrolled level
+ idxNames[stmt_num][level - 1] = "";
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++) {
+ //printf("in same loop as %d is %d\n", stmt_num, (*i));
+ //idxNames[(*i)][level-1] = "";
+ idxNames[(*i)] = idxNames[stmt_num];
+ }
+ }
+
+ lex = getLexicalOrder(stmt_num);
+ same_loop = getStatements(lex, dim - 1);
+
+ bool same_as_source = false;
+ int new_stmts = stmt.size();
+ for (int i = old_stmts; i < new_stmts; i++) {
+ //Check whether we had a sync for the statement we are unrolling, if
+ //so, propogate that to newly created statements so that if they are
+ //in a different loop structure, they will also get a syncthreads
+ int size = syncs.size();
+ for (int j = 0; j < size; j++) {
+ if (syncs[j].first == stmt_num)
+ syncs.push_back(make_pair(i, syncs[j].second));
+ }
+
+ //protonu-making sure the vector of nonSplitLevels grows along with
+ //the statement structure
+ stmt_nonSplitLevels.push_back(std::vector<int>());
+
+ //We expect that new statements have a constant for the variable in
+ //stmt[i].IS at level (as seen with print_with_subs), otherwise there
+ //will be a for loop at level and idxNames should match stmt's
+ //idxNames pre-unrolled
+ Relation IS = stmt[i].IS;
+ //Ok, if you know how the hell to get anything out of a Relation, you
+ //should probably be able to do this more elegantly. But for now, I'm
+ //hacking it.
+ std::string s = IS.print_with_subs_to_string();
+ //s looks looks like
+ //{[_t49,8,_t51,_t52,128]: 0 <= _t52 <= 3 && 0 <= _t51 <= 15 && 0 <= _t49 && 64_t49+16_t52+_t51 <= 128}
+ //where level == 5, you see a integer in the input set
+
+ //If that's not an integer and this is the first new statement, then
+ //we think codegen will have a loop at that level. It's not perfect,
+ //not sure if it can be determined without round-tripping to codegen.
+ int sIdx = 0;
+ int eIdx = 0;
+ for (int j = 0; j < level - 1; j++) {
+ sIdx = s.find(",", sIdx + 1);
+ if (sIdx < 0)
+ break;
+ }
+ if (sIdx > 0) {
+ eIdx = s.find("]");
+ int tmp = s.find(",", sIdx + 1);
+ if (tmp > 0 && tmp < eIdx)
+ eIdx = tmp; //", before ]"
+ if (eIdx > 0) {
+ sIdx++;
+ std::string var = s.substr(sIdx, eIdx - sIdx);
+ //printf("%s\n", s.c_str());
+ //printf("set var for stmt %d at level %d is %s\n", i, level, var.c_str());
+ if (atoi(var.c_str()) == 0 && i == old_stmts) {
+ //TODO:Maybe do see if this new statement would be in the same
+ //group as the original and if it would, don't say
+ //same_as_source
+ if (same_loop.find(i) == same_loop.end()) {
+ printf(
+ "stmt %d level %d, newly created unroll statement should have same level indexes as source\n",
+ i, level);
+ same_as_source = true;
+ }
+ }
+ }
+ }
+
+ //printf("fixing up statement %d n_set %d with %d levels\n", i, stmt[i].IS.n_set(), level-1);
+ if (same_as_source)
+ idxNames.push_back(origSource);
+ else
+ idxNames.push_back(idxNames[stmt_num]);
+ }
+
+ return b;
+}
+
+void LoopCuda::copy_to_texture(const char *array_name) {
+ //protonu--placeholder for now
+ //set the bool for using cuda memory as true
+ //in a vector of strings, put the names of arrays to tex mapped
+ if (!texture)
+ texture = new texture_memory_mapping(true, array_name);
+ else
+ texture->add(array_name);
+
+}
+
+//void LoopCuda::copy_to_texture_2d(const char *array_name, int width, int height) {
+// if (!texture)
+// texture = new texture_memory_mapping(true, array_name, width, height);
+// else
+// texture->add(array_name, width, height);
+//}
+
+void LoopCuda::copy_to_constant(const char *array_name) {
+ if(!constant_mem)
+ constant_mem = new constant_memory_mapping(true, array_name);
+ else
+ constant_mem->add(array_name);
+}
+
+//protonu--moving this from Loop
+SgNode* LoopCuda::codegen() {
+ if (code_gen_flags & GenCudaizeV2)
+ return cudaize_codegen_v2();
+ //Do other flagged codegen methods, return plain vanilla generated code
+ return getCode();
+}
+
+//These three are in Omega code_gen.cc and are used as a massive hack to
+//get out some info from MMGenerateCode. Yea for nasty side-effects.
+namespace omega {
+ extern int checkLoopLevel;
+ extern int stmtForLoopCheck;
+ extern int upperBoundForLevel;
+ extern int lowerBoundForLevel;
+}
+
+CG_outputRepr* LoopCuda::extractCudaUB(int stmt_num, int level,
+ int &outUpperBound, int &outLowerBound) {
+ // check for sanity of parameters
+ const int m = stmt.size();
+ if (stmt_num >= m || stmt_num < 0)
+ throw std::invalid_argument("invalid statement " + to_string(stmt_num));
+ const int n = stmt[stmt_num].xform.n_out();
+ if (level > (n - 1) / 2 || level <= 0)
+ throw std::invalid_argument("invalid loop level " + to_string(level));
+
+ int dim = 2 * level - 1;
+
+ std::vector<int> lex = getLexicalOrder(stmt_num);
+ std::set<int> same_loop = getStatements(lex, dim - 1);
+
+ // extract the intersection of the iteration space to be considered
+ Relation hull;
+ {
+ hull = Relation::True(n);
+ for (std::set<int>::iterator i = same_loop.begin();
+ i != same_loop.end(); i++) {
+ Relation r = getNewIS(*i);
+ for (int j = dim + 2; j <= r.n_set(); j++)
+ r = Project(r, r.set_var(j));
+ hull = Intersection(hull, r);
+ hull.simplify(2, 4);
+ }
+
+ for (int i = 2; i <= dim + 1; i += 2) {
+ //std::string name = std::string("_t") + to_string(t_counter++);
+ std::string name = std::string("_t")
+ + to_string(tmp_loop_var_name_counter++);
+ hull.name_set_var(i, name);
+ }
+ hull.setup_names();
+ }
+
+ // extract the exact loop bound of the dimension to be unrolled
+ if (is_single_iteration(hull, dim)) {
+ throw std::runtime_error(
+ "No loop availabe at level to extract upper bound.");
+ }
+ Relation bound = get_loop_bound(hull, dim);
+ if (!bound.has_single_conjunct() || !bound.is_satisfiable()
+ || bound.is_tautology())
+ throw loop_error(
+ "loop error: unable to extract loop bound for cudaize");
+
+ // extract the loop stride
+ EQ_Handle stride_eq;
+ /*int stride = 1;
+ {
+ bool simple_stride = true;
+ int strides = countStrides(bound.query_DNF()->single_conjunct(),
+ bound.set_var(dim + 1), stride_eq, simple_stride);
+ if (strides > 1)
+ throw loop_error("loop error: too many strides");
+ else if (strides == 1) {
+ int sign = stride_eq.get_coef(bound.set_var(dim + 1));
+ // assert(sign == 1 || sign == -1);
+ Constr_Vars_Iter it(stride_eq, true);
+ stride = abs((*it).coef / sign);
+ }
+ }
+ */
+ int stride = 1;
+ {
+
+ coef_t stride;
+ std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound,
+ bound.set_var(dim + 1));
+ if (result.second == NULL)
+ stride = 1;
+ else
+ stride = abs(result.first.get_coef(result.second))
+ / gcd(abs(result.first.get_coef(result.second)),
+ abs(result.first.get_coef(bound.set_var(dim + 1))));
+
+ if (stride > 1)
+ throw loop_error("loop error: too many strides");
+ /*else if (stride == 1) {
+ int sign = result.first.get_coef(bound.set_var(dim+1));
+ assert(sign == 1 || sign == -1);
+ } */
+ }
+
+ if (stride != 1) {
+ char buf[1024];
+ sprintf(buf, "Cudaize: Loop at level %d has non-one stride of %d",
+ level, stride);
+ throw std::runtime_error(buf);
+ }
+
+ //Use code generation system to build tell us our bound information. We
+ //need a hard upper bound a 0 lower bound.
+
+ checkLoopLevel = level * 2;
+ stmtForLoopCheck = stmt_num;
+ upperBoundForLevel = -1;
+ lowerBoundForLevel = -1;
+ printCode(1, false);
+ checkLoopLevel = 0;
+
+ outUpperBound = upperBoundForLevel;
+ outLowerBound = lowerBoundForLevel;
+
+ if (outUpperBound == -1) {
+
+ CG_result* temp = last_compute_cgr_;
+
+ while (temp) {
+ CG_loop * loop;
+ if (loop = dynamic_cast<CG_loop*>(temp)) {
+ if (loop->level_ == 2 * level) {
+ Relation bound = copy(loop->bounds_);
+ Variable_ID v = bound.set_var(2 * level);
+ for (GEQ_Iterator e(
+ const_cast<Relation &>(bound).single_conjunct()->GEQs());
+ e; e++) {
+ if ((*e).get_coef(v) < 0
+ && (*e).is_const_except_for_global(v))
+ return output_upper_bound_repr(ir->builder(), *e, v,
+ bound,
+ std::vector<std::pair<CG_outputRepr *, int> >(
+ bound.n_set(),
+ std::make_pair(
+ static_cast<CG_outputRepr *>(NULL),
+ 0)));
+ }
+ }
+ if (loop->level_ > 2 * level)
+ break;
+ else
+ temp = loop->body_;
+ } else
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+void LoopCuda::printCode(int effort, bool actuallyPrint) const {
+ const int m = stmt.size();
+ if (m == 0)
+ return;
+ const int n = stmt[0].xform.n_out();
+
+ /*or (int i = 0; i < m; i++) {
+ IS[i + 1] = stmt[i].IS;
+ xform[i + 1] = stmt[i].xform;
+
+ //nonSplitLevels[i+1] = stmt[i].nonSplitLevels;
+ }
+ */
+
+ // invalidate saved codegen computation
+ if (last_compute_cgr_ != NULL) {
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ }
+
+ if (last_compute_cg_ != NULL) {
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+ }
+
+ //Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
+ /*CG_stringBuilder *ocg = new CG_stringBuilder();
+ Tuple<CG_outputRepr *> nameInfo;
+ for (int i = 1; i <= m; i++)
+ nameInfo.append(new CG_stringRepr("s" + to_string(i)));
+ */
+
+ // -- replacing MMGenerateCode
+ // -- formally CG_outputRepr* repr = MMGenerateCode(ocg, xform, IS, nameInfo, known, nonSplitLevels, syncs, idxTupleNames, effort);
+ // -- in the future, these if statements need to be cleaned up.
+ // -- something like check_lastComputeCG might be a decent protected member function
+ // -- and/or something that returns a std::vector<CG_outputRepr*> that also checks last_compute_cg_
+ //if (last_compute_cg_ == NULL) {
+ std::vector<Relation> IS(m);
+ std::vector<Relation> xforms(m);
+ std::vector<std::vector<int> > nonSplitLevels(m);
+
+ /* std::vector < std::vector <std::string> > idxTupleNames;
+ if (useIdxNames) {
+ for (int i = 0; i < idxNames.size(); i++) {
+ Tuple<std::string> idxs;
+ for (int j = 0; j < idxNames[i].size(); j++)
+ idxs.append(idxNames[i][j]);
+ idxTupleNames.append(idxs);
+ }
+ }
+ */
+ for (int i = 0; i < m; i++) {
+ IS[i] = stmt[i].IS;
+ xforms[i] = stmt[i].xform;
+ nonSplitLevels[i] = stmt_nonSplitLevels[i];
+ }
+ Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
+
+ last_compute_cg_ = new CodeGen(xforms, IS, known, nonSplitLevels, idxNames,
+ syncs);
+
+ delete last_compute_cgr_; // this was just done above?
+ last_compute_cgr_ = NULL;
+ //}
+
+ if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {
+ delete last_compute_cgr_;
+ last_compute_cgr_ = last_compute_cg_->buildAST(effort);
+ last_compute_effort_ = effort;
+ }
+
+ //std::vector<CG_outputRepr *> stmts(m);
+ //for (int i = 0; i < m; i++)
+ // stmts[i] = stmt[i].code;
+ //CG_outputRepr* repr = last_compute_cgr_->printRepr(ocg, stmts);
+ // -- end replacing MMGenerateCode
+ std::string repr = last_compute_cgr_->printString();
+
+ if (actuallyPrint)
+ std::cout << repr << std::endl;
+ //std::cout << static_cast<CG_stringRepr*>(repr)->GetString();
+ /*
+ for (int i = 1; i <= m; i++)
+ delete nameInfo[i];
+ */
+
+ //delete ocg;
+}
+
+void LoopCuda::printRuntimeInfo() const {
+ for (int i = 0; i < stmt.size(); i++) {
+ Relation IS = stmt[i].IS;
+ Relation xform = stmt[i].xform;
+ printf("stmt[%d]\n", i);
+ printf("IS\n");
+ IS.print_with_subs();
+
+ printf("xform[%d]\n", i);
+ xform.print_with_subs();
+
+ }
+}
+
+void LoopCuda::printIndexes() const {
+ for (int i = 0; i < stmt.size(); i++) {
+ printf("stmt %d nset %d ", i, stmt[i].IS.n_set());
+
+ for (int j = 0; j < idxNames[i].size(); j++) {
+ if (j > 0)
+ printf(",");
+ printf("%s", idxNames[i][j].c_str());
+ }
+ printf("\n");
+ }
+}
+
+SgNode* LoopCuda::getCode(int effort) const {
+ const int m = stmt.size();
+ if (m == 0)
+ return new SgNode;
+ const int n = stmt[0].xform.n_out();
+ /*
+ Tuple<CG_outputRepr *> ni(m);
+ Tuple < Relation > IS(m);
+ Tuple < Relation > xform(m);
+ vector < vector <int> > nonSplitLevels(m);
+ for (int i = 0; i < m; i++) {
+ ni[i + 1] = stmt[i].code;
+ IS[i + 1] = stmt[i].IS;
+ xform[i + 1] = stmt[i].xform;
+ nonSplitLevels[i + 1] = stmt_nonSplitLevels[i];
+
+ //nonSplitLevels[i+1] = stmt[i].nonSplitLevels;
+ }
+ */
+ //Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
+//#ifdef DEBUG
+//#endif
+ //std::cout << GetString(MMGenerateCode(new CG_stringBuilder(), xform, IS, ni, known,
+ // nonSplitLevels, syncs, idxTupleNames, effort));
+ if (last_compute_cgr_ != NULL) {
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ }
+
+ if (last_compute_cg_ != NULL) {
+ delete last_compute_cg_;
+ last_compute_cg_ = NULL;
+ }
+
+ CG_outputBuilder *ocg = ir->builder();
+ // -- replacing MMGenerateCode
+ // -- formally CG_outputRepr* repr = MMGenerateCode(ocg, xform, IS, nameInfo, known, nonSplitLevels, syncs, idxTupleNames, effort);
+ // -- in the future, these if statements need to be cleaned up.
+ // -- something like check_lastComputeCG might be a decent protected member function
+ // -- and/or something that returns a std::vector<CG_outputRepr*> that also checks last_compute_cg_
+ //if (last_compute_cg_ == NULL) {
+ std::vector<Relation> IS(m);
+ std::vector<Relation> xforms(m);
+ std::vector<std::vector<int> > nonSplitLevels(m);
+ for (int i = 0; i < m; i++) {
+ IS[i] = stmt[i].IS;
+ xforms[i] = stmt[i].xform;
+ nonSplitLevels[i] = stmt_nonSplitLevels[i];
+ }
+
+ /*std::vector < std::vector<std::string> > idxTupleNames;
+ if (useIdxNames) {
+ for (int i = 0; i < idxNames.size(); i++) {
+ std::vector<std::string> idxs;
+ for (int j = 0; j < idxNames[i].size(); j++)
+ idxs.push_back(idxNames[i][j]);
+ idxTupleNames.push_back(idxs);
+ }
+ }
+ */
+ Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
+
+ last_compute_cg_ = new CodeGen(xforms, IS, known, nonSplitLevels, idxNames,
+ syncs);
+ delete last_compute_cgr_;
+ last_compute_cgr_ = NULL;
+ //}
+
+ if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {
+ delete last_compute_cgr_;
+ last_compute_cgr_ = last_compute_cg_->buildAST(effort);
+ last_compute_effort_ = effort;
+ }
+
+ std::vector<CG_outputRepr *> stmts(m);
+ for (int i = 0; i < m; i++)
+ stmts[i] = stmt[i].code;
+ CG_outputRepr* repr = last_compute_cgr_->printRepr(ocg, stmts);
+ // -- end replacing MMGenerateCode
+
+ //CG_outputRepr *overflow_initialization = ocg->CreateStmtList();
+ CG_outputRepr *overflow_initialization = ocg->StmtListAppend(NULL, NULL);
+ for (std::map<int, std::vector<Free_Var_Decl *> >::const_iterator i =
+ overflow.begin(); i != overflow.end(); i++)
+ for (std::vector<Free_Var_Decl *>::const_iterator j = i->second.begin();
+ j != i->second.end(); j++)
+ //overflow_initialization = ocg->StmtListAppend(overflow_initialization, ocg->CreateStmtList(ocg->CreateAssignment(0, ocg->CreateIdent((*j)->base_name()), ocg->CreateInt(0))));
+ overflow_initialization = ocg->StmtListAppend(
+ overflow_initialization,
+ ocg->StmtListAppend(
+ ocg->CreateAssignment(0,
+ ocg->CreateIdent((*j)->base_name()),
+ ocg->CreateInt(0)), NULL));
+
+ repr = ocg->StmtListAppend(overflow_initialization, repr);
+ SgNode *tnl = static_cast<CG_roseRepr *>(repr)->GetCode();
+ SgStatementPtrList *list = static_cast<CG_roseRepr *>(repr)->GetList();
+
+ if (tnl != NULL)
+ return tnl;
+ else if (tnl == NULL && list != NULL) {
+ SgBasicBlock* bb2 = buildBasicBlock();
+
+ for (SgStatementPtrList::iterator it = (*list).begin();
+ it != (*list).end(); it++)
+ bb2->append_statement(*it);
+
+ tnl = isSgNode(bb2);
+ } else
+ throw loop_error("codegen failed");
+
+ delete repr;
+ /*
+ for (int i = 1; i <= m; i++)
+ delete ni[i];
+ */
+ return tnl;
+
+}
+
+//protonu--adding constructors for the new derived class
+LoopCuda::LoopCuda() :
+ Loop(), code_gen_flags(GenInit) {
+}
+
+LoopCuda::LoopCuda(IR_Control *irc, int loop_num) :
+ Loop(irc) {
+ setup_code = NULL;
+ teardown_code = NULL;
+ code_gen_flags = 0;
+ cu_bx = cu_by = cu_tx = cu_ty = cu_tz = 1;
+ cu_bx_repr = NULL;
+ cu_tx_repr = NULL;
+ cu_by_repr = NULL;
+ cu_ty_repr = NULL;
+ cu_tz_repr = NULL;
+
+ cu_num_reduce = 0;
+ cu_mode = GlobalMem;
+ texture = NULL;
+ constant_mem = NULL;
+
+ int m = stmt.size();
+ //printf("\n the size of stmt(initially) is: %d\n", stmt.size());
+ for (int i = 0; i < m; i++)
+ stmt_nonSplitLevels.push_back(std::vector<int>());
+
+ globals = ((IR_cudaroseCode *) ir)->gsym_;
+ globalScope = ((IR_cudaroseCode *) ir)->first_scope;
+ parameter_symtab = ((IR_cudaroseCode *) ir)->parameter;
+ body_symtab = ((IR_cudaroseCode *) ir)->body;
+ func_body = ((IR_cudaroseCode *) ir)->defn;
+ func_definition = ((IR_cudaroseCode *) ir)->func_defn;
+ std::vector<SgForStatement *> tf = ((IR_cudaroseCode *) ir)->get_loops();
+
+ symtab = tf[loop_num]->get_symbol_table();
+
+ std::vector<SgForStatement *> deepest = find_deepest_loops(
+ isSgNode(tf[loop_num]));
+
+ for (int i = 0; i < deepest.size(); i++) {
+ SgVariableSymbol* vs;
+ SgForInitStatement* list = deepest[i]->get_for_init_stmt();
+ SgStatementPtrList& initStatements = list->get_init_stmt();
+ SgStatementPtrList::const_iterator j = initStatements.begin();
+ if (SgExprStatement *expr = isSgExprStatement(*j))
+ if (SgAssignOp* op = isSgAssignOp(expr->get_expression()))
+ if (SgVarRefExp* var_ref = isSgVarRefExp(op->get_lhs_operand()))
+ vs = var_ref->get_symbol();
+
+ index.push_back(vs->get_name().getString().c_str()); //reflects original code index names
+ }
+
+ for (int i = 0; i < stmt.size(); i++)
+ idxNames.push_back(index); //refects prefered index names (used as handles in cudaize v2)
+ useIdxNames = false;
+
+}
+
+void LoopCuda::printIS() {
+ if (!cudaDebug) return;
+ int k = stmt.size();
+ for (int i = 0; i < k; i++) {
+ printf(" printing statement:%d\n", i);
+ stmt[i].IS.print();
+ }
+}
+