diff options
author | Derick Huth <derickhuth@gmail.com> | 2015-09-24 12:22:41 -0600 |
---|---|---|
committer | Derick Huth <derickhuth@gmail.com> | 2015-09-24 12:22:41 -0600 |
commit | 4631ad76927d433da5d55c3c373a1dfd0f74c9d4 (patch) | |
tree | f8dcba88576ec95e403f0c14efd80e970f30a260 /loop_cuda_rose.cc | |
parent | 6eb2b89896da66a77d0dcdf2d72b98c122826949 (diff) | |
parent | 0cff3f9a3c4ccd434900162ebef4bd814850f481 (diff) | |
download | chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.tar.gz chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.tar.bz2 chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.zip |
Merge pull request #7 from dhuth/master
V0.2.1
Diffstat (limited to 'loop_cuda_rose.cc')
-rw-r--r-- | loop_cuda_rose.cc | 3734 |
1 files changed, 0 insertions, 3734 deletions
diff --git a/loop_cuda_rose.cc b/loop_cuda_rose.cc deleted file mode 100644 index c5633ee..0000000 --- a/loop_cuda_rose.cc +++ /dev/null @@ -1,3734 +0,0 @@ -/***************************************************************************** - Copyright (C) 2009 University of Utah - All Rights Reserved. - - Purpose: - Cudaize methods - - Notes: - - History: - 1/7/10 Created by Gabe Rudy by migrating code from loop.cc - 31/1/11 Modified by Protonu Basu -*****************************************************************************/ -#define TRANSFORMATION_FILE_INFO Sg_File_Info::generateDefaultFileInfoForTransformationNode() -#include <code_gen/CG_stringBuilder.h> -#include <codegen.h> -#include <code_gen/CG_utils.h> -#include <code_gen/CG_outputRepr.h> -#include "loop_cuda_rose.hh" -#include "loop.hh" -#include <math.h> -//#include <useful.h> -#include "omegatools.hh" -#include "ir_cudarose.hh" -#include "ir_rose.hh" -#include "ir_rose_utils.hh" -#include "chill_error.hh" -#include <vector> -#include "Outliner.hh" -//#define DEBUG -using namespace omega; -using namespace SageBuilder; -using namespace SageInterface; -//using namespace Outliner; -//using namespace ASTtools; -char *k_cuda_texture_memory; //protonu--added to track texture memory type -//extern char *omega::k_cuda_texture_memory; //protonu--added to track texture memory type -extern char *omega::k_ocg_comment; - -static int cudaDebug; -class CudaStaticInit { -public: - CudaStaticInit() { - cudaDebug = 0; //Change this to 1 for debug - } -}; -static CudaStaticInit junkInitInstance__; - -std::string& upcase(std::string& s) { - for (int i = 0; i < s.size(); i++) - s[i] = toupper(s[i]); - return s; -} - -void printVs(const std::vector<std::string>& curOrder) { - if (!cudaDebug) return; - for (int i = 0; i < curOrder.size(); i++) { - if (i > 0) - printf(","); - printf("%s", curOrder[i].c_str()); - } - printf("\n"); -} - -void printVS(const std::vector<std::string>& curOrder) { - if(!cudaDebug) return; - for (int i = 0; i < curOrder.size(); i++) { - if (i > 0) - printf(","); - printf("%s", curOrder[i].c_str()); - } - printf("\n"); -} - -LoopCuda::~LoopCuda() { - const int m = stmt.size(); - for (int i = 0; i < m; i++) - stmt[i].code->clear(); -} - -bool LoopCuda::symbolExists(std::string s) { - - if (body_symtab->find_variable(SgName(s.c_str())) - || parameter_symtab->find_variable(SgName(s.c_str()))) - return true; - if (globals->lookup_variable_symbol(SgName(s.c_str()))) - return true; - for (int i = 0; i < idxNames.size(); i++) - for (int j = 0; j < idxNames[i].size(); j++) - if (strcmp(idxNames[i][j].c_str(), s.c_str()) == 0) - return true; - return false; -} - -void LoopCuda::addSync(int stmt_num, std::string idxName) { - //we store these and code-gen inserts sync to omega comments where stmt - //in loop that has idxName being generated - syncs.push_back(make_pair(stmt_num, idxName)); -} - -void LoopCuda::renameIndex(int stmt_num, std::string idx, std::string newName) { - int level = findCurLevel(stmt_num, idx); - if (idxNames.size() <= stmt_num || idxNames[stmt_num].size() < level) - throw std::runtime_error("Invalid statment number of index"); - idxNames[stmt_num][level - 1] = newName.c_str(); -} - -enum Type { - Int -}; - -SgNode* wrapInIfFromMinBound(SgNode* then_part, SgForStatement* loop, - SgScopeStatement* symtab, SgVariableSymbol* bound_sym) { - // CG_roseBuilder *ocg = new CG_roseBuilder( - - SgBinaryOp* test_expr = isSgBinaryOp(loop->get_test_expr()); - SgExpression* upperBound; - SgExpression* conditional; - upperBound = test_expr->get_rhs_operand(); - CG_outputRepr *ifstmt; - - SgCallExpression *call; - if (call = isSgCallExpression(upperBound)) - if (isSgVarRefExp(call->get_function())->get_symbol()->get_name().getString() - == "__rose_lt") { - SgExprListExp* arg_list = call->get_args(); - SgExpression *if_bound = *(arg_list->get_expressions().begin()); - /*This relies on the minimum expression being the rhs operand of - * the min instruction. - */ - SgIfStmt *ifstmt = buildIfStmt( - buildLessOrEqualOp(buildVarRefExp(bound_sym), if_bound), - isSgStatement(then_part), NULL); - return isSgNode(ifstmt); - - } - -/* if (isSgConditionalExp(upperBound)) { - conditional = isSgConditionalExp(upperBound)->get_conditional_exp(); - - if (isSgBinaryOp(conditional)) { - SgBinaryOp* binop = isSgBinaryOp(conditional); - - if (isSgLessThanOp(binop) || isSgLessOrEqualOp(binop)) { - SgIfStmt *ifstmt = buildIfStmt( - buildLessOrEqualOp(buildVarRefExp(bound_sym), - test_expr), isSgStatement(then_part), NULL); - return isSgNode(ifstmt); - } - - } - - } -*/ - return then_part; -} - -/** - * This would be better if it was done by a CHiLL xformation instead of at codegen - * - * state: - * for(...) - * for(...) - * cur_body - * stmt1 - * - * stm1 is in-between two loops that are going to be reduced. The - * solution is to put stmt1 at the end of cur_body but conditionally run - * in on the last step of the for loop. - * - * A CHiLL command that would work better: - * - * for(...) - * stmt0 - * for(for i=0; i<n; i++) - * cur_body - * stmt1 - * => - * for(...) - * for(for i=0; i<n; i++) - * if(i==0) stmt0 - * cur_body - * if(i==n-1) stmt1 - */ - -std::vector<SgForStatement*> findCommentedFors(const char* index, SgNode* tnl) { - std::vector<SgForStatement *> result; - bool next_loop_ok = false; - - if (isSgBasicBlock(tnl)) { - - SgStatementPtrList& list = isSgBasicBlock(tnl)->get_statements(); - - for (SgStatementPtrList::iterator it = list.begin(); it != list.end(); - it++) { - std::vector<SgForStatement*> t = findCommentedFors(index, - isSgNode(*it)); - std::copy(t.begin(), t.end(), back_inserter(result)); - } - } else if (isSgForStatement(tnl)) { - - AstTextAttribute* att = - (AstTextAttribute*) (isSgNode(tnl)->getAttribute( - "omega_comment")); - std::string comment = att->toString(); - - if (comment.find("~cuda~") != std::string::npos - && comment.find("preferredIdx: ") != std::string::npos) { - std::string idx = comment.substr( - comment.find("preferredIdx: ") + 14, std::string::npos); - if (idx.find(" ") != std::string::npos) - idx = idx.substr(0, idx.find(" ")); - if (strcmp(idx.c_str(), index) == 0) - next_loop_ok = true; - } - - if (next_loop_ok) { - //printf("found loop %s\n", static_cast<tree_for *>(tn)->index()->name()); - result.push_back(isSgForStatement(tnl)); - } else { - //printf("looking down for loop %s\n", static_cast<tree_for *>(tn)->index()->name()); - std::vector<SgForStatement*> t = findCommentedFors(index, - isSgForStatement(tnl)->get_loop_body()); - std::copy(t.begin(), t.end(), back_inserter(result)); - } - next_loop_ok = false; - } else if (isSgIfStmt(tnl)) { - //printf("looking down if\n"); - SgIfStmt *tni = isSgIfStmt(tnl); - std::vector<SgForStatement*> t = findCommentedFors(index, - tni->get_true_body()); - std::copy(t.begin(), t.end(), back_inserter(result)); - } - - return result; -} - -SgNode* forReduce(SgForStatement* loop, SgVariableSymbol* reduceIndex, - SgScopeStatement* body_syms) { - //We did the replacements all at once with recursiveFindPreferedIdxs - //replacements r; - //r.oldsyms.append(loop->index()); - //r.newsyms.append(reduceIndex); - //tree_for* new_loop = (tree_for*)loop->clone_helper(&r, true); - SgForStatement* new_loop = loop; - - //return body one loops in - SgNode* tnl = loop_body_at_level(new_loop, 1); - //wrap in conditional if necessary - tnl = wrapInIfFromMinBound(tnl, new_loop, body_syms, reduceIndex); - return tnl; -} - -void recursiveFindRefs(SgNode* code, std::set<const SgVariableSymbol *>& syms, - SgFunctionDefinition* def) { - - SgStatement* s = isSgStatement(code); - // L = {symbols defined within 's'}, local variables declared within 's' - ASTtools::VarSymSet_t L; - ASTtools::collectDefdVarSyms(s, L); - //dump (L, "L = "); - - // U = {symbols used within 's'} - ASTtools::VarSymSet_t U; - ASTtools::collectRefdVarSyms(s, U); - //dump (U, "U = "); - - // U - L = {symbols used within 's' but not defined in 's'} - // variable references to non-local-declared variables - ASTtools::VarSymSet_t diff_U_L; - set_difference(U.begin(), U.end(), L.begin(), L.end(), - inserter(diff_U_L, diff_U_L.begin())); - //dump (diff_U_L, "U - L = "); - - // Q = {symbols defined within the function surrounding 's' that are - // visible at 's'}, including function parameters - ASTtools::VarSymSet_t Q; - ASTtools::collectLocalVisibleVarSyms(def->get_declaration(), s, Q); -// dump (Q, "Q = "); - - // (U - L) \cap Q = {variables that need to be passed as parameters - // to the outlined function} - // a sub set of variables that are not globally visible (no need to pass at all) - // It excludes the variables with a scope between global and the enclosing function - set_intersection(diff_U_L.begin(), diff_U_L.end(), Q.begin(), Q.end(), - inserter(syms, syms.begin())); - - /* std::vector<SgVariableSymbol *> scalars; - //SgNode *tnl = static_cast<const omega::CG_roseRepr *>(repr)->GetCode(); - SgStatement* stmt; - SgExpression* exp; - if (tnl != NULL) { - if(stmt = isSgStatement(tnl)){ - if(isSgBasicBlock(stmt)){ - SgStatementPtrList& stmts = isSgBasicBlock(stmt)->get_statements(); - for(int i =0; i < stmts.size(); i++){ - //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(stmts[i])); - std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(stmts[i])); - //delete r; - std::copy(a.begin(), a.end(), back_inserter(scalars)); - } - - } - else if(isSgForStatement(stmt)){ - - SgForStatement *tnf = isSgForStatement(stmt); - //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgStatement(tnf->get_loop_body())); - std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(tnf->get_loop_body())); - //delete r; - std::copy(a.begin(), a.end(), back_inserter(scalars)); - } - else if(isSgFortranDo(stmt)){ - SgFortranDo *tfortran = isSgFortranDo(stmt); - omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgStatement(tfortran->get_body())); - std::vector<SgVariableSymbol *> a = recursiveFindRefs(r); - delete r; - std::copy(a.begin(), a.end(), back_inserter(scalars)); - } - - else if(isSgIfStmt(stmt) ){ - SgIfStmt* tni = isSgIfStmt(stmt); - //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(tni->get_conditional())); - std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(tni->get_conditional())); - //delete r; - std::copy(a.begin(), a.end(), back_inserter(scalars)); - //r = new omega::CG_roseRepr(isSgNode(tni->get_true_body())); - a = recursiveFindRefs(isSgNode(tni->get_true_body())); - //delete r; - std::copy(a.begin(), a.end(), back_inserter(scalars)); - //r = new omega::CG_roseRepr(isSgNode(tni->get_false_body())); - a = recursiveFindRefs(isSgNode(tni->get_false_body())); - //delete r; - std::copy(a.begin(), a.end(), back_inserter(scalars)); - } - else if(isSgExprStatement(stmt)) { - //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgExpression(isSgExprStatement(stmt)->get_expression())); - std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(isSgExprStatement(stmt)->get_expression())); - //delete r; - std::copy(a.begin(), a.end(), back_inserter(scalars)); - - } - } - } - else{ - SgExpression* op = isSgExpression(tnl); - if(isSgVarRefExp(op)){ - - scalars.push_back(isSgVarRefExp(op)->get_symbol()); - - } - else if( isSgAssignOp(op)){ - //omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgAssignOp(op)->get_lhs_operand()); - std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgAssignOp(op)->get_lhs_operand())); - //delete r1; - std::copy(a1.begin(), a1.end(), back_inserter(scalars)); - //omega::CG_roseRepr *r2 = new omega::CG_roseRepr(isSgAssignOp(op)->get_rhs_operand()); - std::vector<SgVariableSymbol *> a2 = recursiveFindRefs(isSgNode(isSgAssignOp(op)->get_rhs_operand())); - //delete r2; - std::copy(a2.begin(), a2.end(), back_inserter(scalars)); - - } - else if(isSgBinaryOp(op)){ - // omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgBinaryOp(op)->get_lhs_operand()); - std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgBinaryOp(op)->get_lhs_operand())); - //delete r1; - std::copy(a1.begin(), a1.end(), back_inserter(scalars)); - //omega::CG_roseRepr *r2 = new omega::CG_roseRepr(isSgBinaryOp(op)->get_rhs_operand()); - std::vector<SgVariableSymbol *> a2 = recursiveFindRefs((isSgBinaryOp(op)->get_rhs_operand())); - //delete r2; - std::copy(a2.begin(), a2.end(), back_inserter(scalars)); - } - else if(isSgUnaryOp(op)){ - //omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgUnaryOp(op)->get_operand()); - std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgUnaryOp(op)->get_operand())); - //delete r1; - std::copy(a1.begin(), a1.end(), back_inserter(scalars)); - } - - } - return scalars; - - - */ - -} - -SgNode* recursiveFindReplacePreferedIdxs(SgNode* code, SgSymbolTable* body_syms, - SgSymbolTable* param_syms, SgScopeStatement* body, - std::map<std::string, SgVariableSymbol*>& loop_idxs, - SgGlobal* globalscope, bool sync = false) { - //tree_node_list* tnl = new tree_node_list; - //tree_node_list_iter tnli(code); - SgVariableSymbol* idxSym = 0; - std::vector<SgStatement*> r1; - std::vector<SgNode*> r2; - SgNode* tnli; - SgNode* tnli1; - SgNode* tnli2; - SgBasicBlock * clone; - - if (isSgForStatement(code)) { - AstTextAttribute* att = - (AstTextAttribute*) (isSgNode(code)->getAttribute( - "omega_comment")); - - std::string comment; - if (att != NULL) - comment = att->toString(); - - if (comment.find("~cuda~") != std::string::npos - && comment.find("preferredIdx: ") != std::string::npos) { - std::string idx = comment.substr( - comment.find("preferredIdx: ") + 14, std::string::npos); - if (idx.find(" ") != std::string::npos) - idx = idx.substr(0, idx.find(" ")); - if (loop_idxs.find(idx) != loop_idxs.end()) - idxSym = loop_idxs.find(idx)->second; - //Get the proc variable sybol for this preferred index - if (idxSym == 0) { - idxSym = body_syms->find_variable(idx.c_str()); - if (!idxSym) - idxSym = param_syms->find_variable(idx.c_str()); - //printf("idx not found: lookup %p\n", idxSym); - if (!idxSym) { - SgVariableDeclaration* defn = buildVariableDeclaration( - SgName((char*) idx.c_str()), buildIntType()); - //idxSym = new var_sym(type_s32, (char*)idx.c_str()); - SgInitializedNamePtrList& variables = defn->get_variables(); - SgInitializedNamePtrList::const_iterator i = - variables.begin(); - SgInitializedName* initializedName = *i; - SgVariableSymbol* vs = new SgVariableSymbol( - initializedName); - prependStatement(defn, body); - vs->set_parent(body_syms); - body_syms->insert(SgName((char*) idx.c_str()), vs); - idxSym = vs; - //printf("idx created and inserted\n"); - } - //Now insert into our map for future - if (cudaDebug) - std::cout << idx << "\n\n"; - loop_idxs.insert(make_pair(idx, idxSym)); - } - //See if we have a sync as well - if (comment.find("sync") != std::string::npos) { - //printf("Inserting sync after current block\n"); - sync = true; - } - - } - if (idxSym) { - SgForInitStatement* list = - isSgForStatement(code)->get_for_init_stmt(); - SgStatementPtrList& initStatements = list->get_init_stmt(); - SgStatementPtrList::const_iterator j = initStatements.begin(); - const SgVariableSymbol* index; - - if (SgExprStatement *expr = isSgExprStatement(*j)) - if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) - if (SgVarRefExp* var_ref = isSgVarRefExp( - op->get_lhs_operand())) - index = var_ref->get_symbol(); - - std::vector<SgVarRefExp *> array = substitute(code, index, NULL, - isSgNode(body_syms)); - - for (int j = 0; j < array.size(); j++) - array[j]->set_symbol(idxSym); - } - - SgStatement* body_ = isSgStatement( - recursiveFindReplacePreferedIdxs( - isSgNode((isSgForStatement(code)->get_loop_body())), - body_syms, param_syms, body, loop_idxs, globalscope)); - - omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code); - omega::CG_outputRepr* block = tnl->clone(); - tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode(); - - isSgForStatement(tnli)->set_loop_body(body_); - body_->set_parent(tnli); - - if (idxSym) { - SgForInitStatement* list = - isSgForStatement(tnli)->get_for_init_stmt(); - SgStatementPtrList& initStatements = list->get_init_stmt(); - SgStatementPtrList::const_iterator j = initStatements.begin(); - const SgVariableSymbol* index; - - if (SgExprStatement *expr = isSgExprStatement(*j)) - if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) - if (SgVarRefExp* var_ref = isSgVarRefExp( - op->get_lhs_operand())) - index = var_ref->get_symbol(); - - std::vector<SgVarRefExp *> array = substitute(tnli, index, NULL, - isSgNode(body_syms)); - - for (int j = 0; j < array.size(); j++) - array[j]->set_symbol(idxSym); - } - // std::cout << isSgNode(body_)->unparseToString() << "\n\n"; - if (att != NULL) - tnli->setAttribute("omega_comment", att); - - if (sync) { - SgName name_syncthreads("__syncthreads"); - SgFunctionSymbol * syncthreads_symbol = - globalscope->lookup_function_symbol(name_syncthreads); - - // Create a call to __syncthreads(): - SgFunctionCallExp * syncthreads_call = buildFunctionCallExp( - syncthreads_symbol, buildExprListExp()); - - SgExprStatement* stmt = buildExprStatement(syncthreads_call); - - /* if (SgBasicBlock* bb = isSgBasicBlock( - isSgForStatement(code)->get_loop_body())) - appendStatement(isSgStatement(stmt), bb); - - else if (SgStatement* ss = isSgStatement( - isSgForStatement(code)->get_loop_body())) { - SgBasicBlock* bb2 = buildBasicBlock(); - - isSgNode(ss)->set_parent(bb2); - appendStatement(ss, bb2); - - appendStatement(isSgStatement(stmt), bb2); - isSgNode(stmt)->set_parent(bb2); - isSgForStatement(code)->set_loop_body(bb2); - isSgNode(bb2)->set_parent(code); - } - */ - - SgBasicBlock* bb2 = buildBasicBlock(); - - bb2->append_statement(isSgStatement(tnli)); - bb2->append_statement(stmt); - /* SgNode* parent = code->get_parent(); - if(!isSgStatement(parent)) - throw loop_error("Parent not a statement"); - - if(isSgForStatement(parent)){ - if(SgStatement *ss = isSgForStatement(isSgForStatement(parent)->get_loop_body())){ - omega::CG_roseRepr * tnl = new omega::CG_roseRepr(ss); - omega::CG_outputRepr* block= tnl->clone(); - - SgNode *new_ss = static_cast<const omega::CG_roseRepr *>(block)->GetCode(); - SgBasicBlock* bb2 = buildBasicBlock(); - - isSgNode(new_ss)->set_parent(bb2); - appendStatement(isSgStatement(new_ss), bb2); - appendStatement(isSgStatement(stmt), bb2); - isSgNode(stmt)->set_parent(bb2); - - isSgStatement(parent)->replace_statement_from_basicBlock(ss, isSgStatement(bb2)); - - }else if(isSgBasicBlock(isSgForStatement(parent)->get_loop_body())) - isSgStatement(isSgForStatement(parent)->get_loop_body())->insert_statement(isSgStatement(code), stmt, false); - else - throw loop_error("parent statement type undefined!!"); - - } - else if(isSgBasicBlock(parent)) - isSgStatement(parent)->insert_statement(isSgStatement(code), stmt, false); - else - throw loop_error("parent statement type undefined!!"); - - //tnl->print(); - * - * - */ - sync = true; - return isSgNode(bb2); - - } else - return tnli; - } else if (isSgIfStmt(code)) { - SgStatement* body_ = isSgStatement( - recursiveFindReplacePreferedIdxs( - isSgNode((isSgIfStmt(code)->get_true_body())), - body_syms, param_syms, body, loop_idxs, globalscope)); - - omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code); - omega::CG_outputRepr* block = tnl->clone(); - tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode(); - - isSgIfStmt(tnli)->set_true_body(body_); - - if ((isSgIfStmt(code)->get_false_body())) - isSgIfStmt(tnli)->set_false_body( - isSgStatement( - recursiveFindReplacePreferedIdxs( - isSgNode( - (isSgIfStmt(code)->get_false_body())), - body_syms, param_syms, body, loop_idxs, - globalscope))); - - return tnli; - } else if (isSgStatement(code) && !isSgBasicBlock(code)) { - omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code); - omega::CG_outputRepr* block = tnl->clone(); - tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode(); - - return tnli; - - } else if (isSgBasicBlock(code)) { - SgStatementPtrList& tnl = isSgBasicBlock(code)->get_statements(); - - SgStatementPtrList::iterator temp; - clone = buildBasicBlock(); - bool sync_found = false; - for (SgStatementPtrList::const_iterator it = tnl.begin(); - it != tnl.end(); it++) { - - if (isSgForStatement(*it)) { - AstTextAttribute* att = - (AstTextAttribute*) (isSgNode(*it)->getAttribute( - "omega_comment")); - - std::string comment; - if (att != NULL) - comment = att->toString(); - - if (comment.find("~cuda~") != std::string::npos - && comment.find("preferredIdx: ") - != std::string::npos) { - std::string idx = comment.substr( - comment.find("preferredIdx: ") + 14, - std::string::npos); - if (idx.find(" ") != std::string::npos) - idx = idx.substr(0, idx.find(" ")); - //printf("sym_tab preferred index: %s\n", idx.c_str()); - if (loop_idxs.find(idx) != loop_idxs.end()) - idxSym = loop_idxs.find(idx)->second; - //Get the proc variable sybol for this preferred index - if (idxSym == 0) { - idxSym = body_syms->find_variable(idx.c_str()); - if (!idxSym) - idxSym = param_syms->find_variable(idx.c_str()); - //printf("idx not found: lookup %p\n", idxSym); - if (!idxSym) { - SgVariableDeclaration* defn = - buildVariableDeclaration( - SgName((char*) idx.c_str()), - buildIntType()); - //idxSym = new var_sym(type_s32, (char*)idx.c_str()); - SgInitializedNamePtrList& variables = - defn->get_variables(); - SgInitializedNamePtrList::const_iterator i = - variables.begin(); - SgInitializedName* initializedName = *i; - SgVariableSymbol* vs = new SgVariableSymbol( - initializedName); - prependStatement(defn, body); - vs->set_parent(body_syms); - body_syms->insert(SgName((char*) idx.c_str()), vs); - //printf("idx created and inserted\n"); - idxSym = vs; - } - //Now insert into our map for future - if (cudaDebug) - std::cout << idx << "\n\n"; - loop_idxs.insert(make_pair(idx, idxSym)); - - } - //See if we have a sync as well - if (comment.find("sync") != std::string::npos) { - //printf("Inserting sync after current block\n"); - sync = true; - } - - } - if (idxSym) { - SgForInitStatement* list = - isSgForStatement(*it)->get_for_init_stmt(); - SgStatementPtrList& initStatements = list->get_init_stmt(); - SgStatementPtrList::const_iterator j = - initStatements.begin(); - const SgVariableSymbol* index; - - if (SgExprStatement *expr = isSgExprStatement(*j)) - if (SgAssignOp* op = isSgAssignOp( - expr->get_expression())) - if (SgVarRefExp* var_ref = isSgVarRefExp( - op->get_lhs_operand())) - index = var_ref->get_symbol(); - - std::vector<SgVarRefExp *> array = substitute(*it, index, - NULL, isSgNode(body_syms)); - - for (int j = 0; j < array.size(); j++) - array[j]->set_symbol(idxSym); - - } - - SgStatement* body_ = - isSgStatement( - recursiveFindReplacePreferedIdxs( - isSgNode( - (isSgForStatement(*it)->get_loop_body())), - body_syms, param_syms, body, loop_idxs, - globalscope)); - - omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it); - omega::CG_outputRepr* block = tnl->clone(); - tnli = - static_cast<const omega::CG_roseRepr *>(block)->GetCode(); - - isSgForStatement(tnli)->set_loop_body(body_); - body_->set_parent(tnli); - if (idxSym) { - SgForInitStatement* list = - isSgForStatement(tnli)->get_for_init_stmt(); - SgStatementPtrList& initStatements = list->get_init_stmt(); - SgStatementPtrList::const_iterator j = - initStatements.begin(); - const SgVariableSymbol* index; - - if (SgExprStatement *expr = isSgExprStatement(*j)) - if (SgAssignOp* op = isSgAssignOp( - expr->get_expression())) - if (SgVarRefExp* var_ref = isSgVarRefExp( - op->get_lhs_operand())) - index = var_ref->get_symbol(); - - std::vector<SgVarRefExp *> array = substitute(tnli, index, - NULL, isSgNode(body_syms)); - - for (int j = 0; j < array.size(); j++) - array[j]->set_symbol(idxSym); - } - idxSym = 0; - // std::cout << isSgNode(body_)->unparseToString() << "\n\n"; - if (att != NULL) - tnli->setAttribute("omega_comment", att); - clone->append_statement(isSgStatement(tnli)); - if (sync) { - SgName name_syncthreads("__syncthreads"); - SgFunctionSymbol * syncthreads_symbol = - globalscope->lookup_function_symbol( - name_syncthreads); - - // Create a call to __syncthreads(): - SgFunctionCallExp * syncthreads_call = buildFunctionCallExp( - syncthreads_symbol, buildExprListExp()); - - SgExprStatement* stmt = buildExprStatement( - syncthreads_call); - - /* if (SgBasicBlock* bb = isSgBasicBlock( - isSgForStatement(code)->get_loop_body())) - appendStatement(isSgStatement(stmt), bb); - - else if (SgStatement* ss = isSgStatement( - isSgForStatement(code)->get_loop_body())) { - SgBasicBlock* bb2 = buildBasicBlock(); - - isSgNode(ss)->set_parent(bb2); - appendStatement(ss, bb2); - - appendStatement(isSgStatement(stmt), bb2); - isSgNode(stmt)->set_parent(bb2); - isSgForStatement(code)->set_loop_body(bb2); - isSgNode(bb2)->set_parent(code); - } - */ - - //SgBasicBlock* bb2 = buildBasicBlock(); - clone->append_statement(stmt); - /* SgNode* parent = code->get_parent(); - if(!isSgStatement(parent)) - throw loop_error("Parent not a statement"); - - if(isSgForStatement(parent)){ - if(SgStatement *ss = isSgForStatement(isSgForStatement(parent)->get_loop_body())){ - omega::CG_roseRepr * tnl = new omega::CG_roseRepr(ss); - omega::CG_outputRepr* block= tnl->clone(); - - SgNode *new_ss = static_cast<const omega::CG_roseRepr *>(block)->GetCode(); - SgBasicBlock* bb2 = buildBasicBlock(); - - isSgNode(new_ss)->set_parent(bb2); - appendStatement(isSgStatement(new_ss), bb2); - appendStatement(isSgStatement(stmt), bb2); - isSgNode(stmt)->set_parent(bb2); - - isSgStatement(parent)->replace_statement_from_basicBlock(ss, isSgStatement(bb2)); - - }else if(isSgBasicBlock(isSgForStatement(parent)->get_loop_body())) - isSgStatement(isSgForStatement(parent)->get_loop_body())->insert_statement(isSgStatement(code), stmt, false); - else - throw loop_error("parent statement type undefined!!"); - - } - else if(isSgBasicBlock(parent)) - isSgStatement(parent)->insert_statement(isSgStatement(code), stmt, false); - else - throw loop_error("parent statement type undefined!!"); - - //tnl->print(); - * - * - */ - sync = true; - // return isSgNode(bb2); - - } - - // return tnli; - } else if (isSgIfStmt(*it)) { - SgStatement* body_ = isSgStatement( - recursiveFindReplacePreferedIdxs( - isSgNode((isSgIfStmt(*it)->get_true_body())), - body_syms, param_syms, body, loop_idxs, - globalscope)); - - omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it); - omega::CG_outputRepr* block = tnl->clone(); - tnli1 = - static_cast<const omega::CG_roseRepr *>(block)->GetCode(); - - isSgIfStmt(tnli1)->set_true_body(body_); - - if ((isSgIfStmt(*it)->get_false_body())) - isSgIfStmt(tnli1)->set_false_body( - isSgStatement( - recursiveFindReplacePreferedIdxs( - isSgNode( - (isSgIfStmt(*it)->get_false_body())), - body_syms, param_syms, body, - loop_idxs, globalscope))); - - clone->append_statement(isSgStatement(tnli1)); - //return tnli; - } else if (isSgStatement(*it)) { - omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it); - omega::CG_outputRepr* block = tnl->clone(); - tnli2 = - static_cast<const omega::CG_roseRepr *>(block)->GetCode(); - - clone->append_statement(isSgStatement(tnli2)); - //return tnli; - - } - } - - return isSgNode(clone); - - } - - /* if (!isSgBasicBlock( - recursiveFindReplacePreferedIdxs(isSgNode(*it), body_syms, - param_syms, body, loop_idxs, globalscope))) { - SgStatement *to_push = isSgStatement( - recursiveFindReplacePreferedIdxs(isSgNode(*it), - body_syms, param_syms, body, loop_idxs, - globalscope, sync)); - clone->append_statement(to_push); - - if ((sync_found) && isSgForStatement(to_push)) { - SgName name_syncthreads("__syncthreads"); - SgFunctionSymbol * syncthreads_symbol = - globalscope->lookup_function_symbol( - name_syncthreads); - - // Create a call to __syncthreads(): - SgFunctionCallExp * syncthreads_call = buildFunctionCallExp( - syncthreads_symbol, buildExprListExp()); - - SgExprStatement* stmt = buildExprStatement( - syncthreads_call); - - clone->append_statement(isSgStatement(stmt)); - } - // std::cout<<isSgNode(*it)->unparseToString()<<"\n\n"; - } else { - - SgStatementPtrList& tnl2 = isSgBasicBlock( - recursiveFindReplacePreferedIdxs(isSgNode(*it), - body_syms, param_syms, body, loop_idxs, - globalscope))->get_statements(); - for (SgStatementPtrList::const_iterator it2 = tnl2.begin(); - it2 != tnl2.end(); it2++) { - clone->append_statement(*it2); - - sync_found = true; - // std::cout<<isSgNode(*it2)->unparseToString()<<"\n\n"; - } - } - - } - return isSgNode(clone); - } - */ -// return tnl; -} - -// loop_vars -> array references -// loop_idxs -> <idx_name,idx_sym> map for when we encounter a loop with a different preferredIndex -// dim_vars -> out param, fills with <old,new> var_sym pair for 2D array dimentions (messy stuff) -SgNode* swapVarReferences(SgNode* code, - std::set<const SgVariableSymbol *>& syms, SgSymbolTable* param, - SgSymbolTable* body, SgScopeStatement* body_stmt) { - //Iterate over every expression, looking up each variable and type - //reference used and possibly replacing it or adding it to our symbol - //table - // - //We use the built-in cloning helper methods to seriously help us with this! - - //Need to do a recursive mark - - std::set<const SgVariableSymbol *>::iterator myIterator; - for (myIterator = syms.begin(); myIterator != syms.end(); myIterator++) { - SgName var_name = (*myIterator)->get_name(); - std::string x = var_name.getString(); - - if ((param->find_variable(var_name) == NULL) - && (body->find_variable(var_name) == NULL)) { - SgInitializedName* decl = (*myIterator)->get_declaration(); - - SgVariableSymbol* dvs = new SgVariableSymbol(decl); - SgVariableDeclaration* var_decl = buildVariableDeclaration( - dvs->get_name(), dvs->get_type()); - - AstTextAttribute* att = (AstTextAttribute*) (isSgNode( - decl->get_declaration())->getAttribute("__shared__")); - if (isSgNode(decl->get_declaration())->attributeExists( - "__shared__")) - var_decl->get_declarationModifier().get_storageModifier().setCudaShared(); - - appendStatement(var_decl, body_stmt); - - dvs->set_parent(body); - body->insert(var_name, dvs); - } - - std::vector<SgVarRefExp *> array = substitute(code, *myIterator, NULL, - isSgNode(body)); - - SgVariableSymbol* var = (SgVariableSymbol*) (*myIterator); - for (int j = 0; j < array.size(); j++) - array[j]->set_symbol(var); - } - - return code; -} - -bool LoopCuda::validIndexes(int stmt, const std::vector<std::string>& idxs) { - for (int i = 0; i < idxs.size(); i++) { - bool found = false; - for (int j = 0; j < idxNames[stmt].size(); j++) { - if (strcmp(idxNames[stmt][j].c_str(), idxs[i].c_str()) == 0) { - found = true; - } - } - if (!found) { - return false; - } - } - return true; -} - -bool LoopCuda::cudaize_v2(std::string kernel_name, - std::map<std::string, int> array_dims, - std::vector<std::string> blockIdxs, - std::vector<std::string> threadIdxs) { - CG_outputBuilder *ocg = ir->builder(); - int stmt_num = 0; - if (cudaDebug) { - printf("cudaize_v2(%s, {", kernel_name.c_str()); - //for( - printf("}, blocks={"); - printVs(blockIdxs); - printf("}, thread={"); - printVs(threadIdxs); - printf("})\n"); - } - - this->array_dims = array_dims; - if (!validIndexes(stmt_num, blockIdxs)) { - throw std::runtime_error("One of the indexes in the block list was not " - "found in the current set of indexes."); - } - if (!validIndexes(stmt_num, threadIdxs)) { - throw std::runtime_error( - "One of the indexes in the thread list was not " - "found in the current set of indexes."); - } - if (blockIdxs.size() == 0) - throw std::runtime_error("Cudaize: Need at least one block dimention"); - int block_level = 0; - //Now, we will determine the actual size (if possible, otherwise - //complain) for the block dimentions and thread dimentions based on our - //indexes and the relations for our stmt; - for (int i = 0; i < blockIdxs.size(); i++) { - int level = findCurLevel(stmt_num, blockIdxs[i]); - int ub, lb; - CG_outputRepr* ubrepr = extractCudaUB(stmt_num, level, ub, lb); - if (lb != 0) { - //attempt to "normalize" the loop with an in-place tile and then re-check our bounds - if (cudaDebug) - printf( - "Cudaize: doing tile at level %d to try and normalize lower bounds\n", - level); - tile(stmt_num, level, 1, level, CountedTile); - idxNames[stmt_num].insert(idxNames[stmt_num].begin() + (level), ""); //TODO: possibly handle this for all sibling stmts - ubrepr = extractCudaUB(stmt_num, level, ub, lb); - } - if (lb != 0) { - char buf[1024]; - sprintf(buf, - "Cudaize: Loop at level %d does not have 0 as it's lower bound", - level); - throw std::runtime_error(buf); - } - if (ub < 0) { - char buf[1024]; - sprintf(buf, - "Cudaize: Loop at level %d does not have a hard upper bound", - level); - //Anand: Commenting out error indication for lack of constant upper bound - //throw std::runtime_error(buf); - } - if (cudaDebug) - printf("block idx %s level %d lb: %d ub %d\n", blockIdxs[i].c_str(), - level, lb, ub); - if (i == 0) { - block_level = level; - if (ubrepr == NULL) { - cu_bx = ub + 1; - cu_bx_repr = NULL; - } else { - cu_bx = 0; - cu_bx_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); - } - idxNames[stmt_num][level - 1] = "bx"; - } else if (i == 1) { - if (ubrepr == NULL) { - cu_by = ub + 1; - cu_by_repr = NULL; - } else { - cu_by = 0; - cu_by_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); - } - idxNames[stmt_num][level - 1] = "by"; - } - } - if (!cu_by && !cu_by_repr) - block_level = 0; - int thread_level1 = 0; - int thread_level2 = 0; - for (int i = 0; i < threadIdxs.size(); i++) { - int level = findCurLevel(stmt_num, threadIdxs[i]); - int ub, lb; - CG_outputRepr* ubrepr = extractCudaUB(stmt_num, level, ub, lb); - if (lb != 0) { - //attempt to "normalize" the loop with an in-place tile and then re-check our bounds - if (cudaDebug) - printf( - "Cudaize: doing tile at level %d to try and normalize lower bounds\n", - level); - tile(stmt_num, level, 1, level, CountedTile); - idxNames[stmt_num].insert(idxNames[stmt_num].begin() + (level), ""); - ubrepr = extractCudaUB(stmt_num, level, ub, lb); - } - if (lb != 0) { - char buf[1024]; - sprintf(buf, - "Cudaize: Loop at level %d does not have 0 as it's lower bound", - level); - throw std::runtime_error(buf); - } - if (ub < 0) { - char buf[1024]; - sprintf(buf, - "Cudaize: Loop at level %d does not have a hard upper bound", - level); - //Anand: Commenting out error indication for lack of constant upper bound - //throw std::runtime_error(buf); - } - - if (cudaDebug) - printf("thread idx %s level %d lb: %d ub %d\n", - threadIdxs[i].c_str(), level, lb, ub); - if (i == 0) { - thread_level1 = level; - if (ubrepr == NULL) { - cu_tx = ub + 1; - cu_tx_repr = NULL; - } else { - cu_tx = 0; - cu_tx_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); - } - idxNames[stmt_num][level - 1] = "tx"; - } else if (i == 1) { - thread_level2 = level; - if (ubrepr == NULL) { - cu_ty = ub + 1; - cu_ty_repr = NULL; - } else { - cu_ty = 0; - cu_ty_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); - } - idxNames[stmt_num][level - 1] = "ty"; - } else if (i == 2) { - if (ubrepr == NULL) { - cu_tz = ub + 1; - cu_tz_repr = NULL; - } else { - cu_tz = 0; - cu_tz_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); - } - idxNames[stmt_num][level - 1] = "tz"; - } - } - if (!cu_ty && !cu_ty_repr) - thread_level1 = 0; - if (!cu_tz && !cu_tz_repr) - thread_level2 = 0; - - //Make changes to nonsplitlevels - const int m = stmt.size(); - for (int i = 0; i < m; i++) { - if (block_level) { - //stmt[i].nonSplitLevels.append((block_level)*2); - stmt_nonSplitLevels[i].push_back((block_level) * 2); - } - if (thread_level1) { - //stmt[i].nonSplitLevels.append((thread_level1)*2); - stmt_nonSplitLevels[i].push_back((thread_level1) * 2); - } - if (thread_level2) { - //stmt[i].nonSplitLevels.append((thread_level1)*2); - stmt_nonSplitLevels[i].push_back((thread_level1) * 2); - } - } - - if (cudaDebug) { - printf("Codegen: current names: "); - printVS(idxNames[stmt_num]); - } - //Set codegen flag - code_gen_flags |= GenCudaizeV2; - - //Save array dimention sizes - this->array_dims = array_dims; - cu_kernel_name = kernel_name.c_str(); - -} - -/* - * setupConstantVar - * handles constant variable declaration - * and adds a global constant variable - * parameters: - * constant - the constant_memory_mapping object for this loop - * arr_def - the VarDefs object for the mapped variable - * globals - Rose Global variables - * i - an index to keep new variable names unique - * symtab - global symbol table - */ -static void setupConstantVar(constant_memory_mapping* constant, VarDefs* arr_def, SgGlobal* globals, int i, SgSymbolTable* symtab) { - char* buf1 = new char[32]; - snprintf(buf1, 32, "cs%dRef", i+1); - arr_def->secondName = buf1; - - char buf2[64]; - snprintf(buf2, 64, "__device__ __constant__ float"); - - SgVariableDeclaration* consvar_decl = buildVariableDeclaration( - SgName(std::string(buf1)), buildArrayType( - buildOpaqueType(SgName(buf2),globals), - arr_def->size_expr)); - SgInitializedNamePtrList& variables = consvar_decl->get_variables(); - SgInitializedNamePtrList::const_iterator j = variables.begin(); - SgInitializedName* initializedName = *j; - SgVariableSymbol* consvar_sym = new SgVariableSymbol(initializedName); - prependStatement(consvar_decl, globals); - - consvar_sym->set_parent(symtab); - symtab->insert(SgName(std::string(buf1)), consvar_sym); - - constant->set_mapped_symbol(arr_def->original_name.c_str(), consvar_sym); - constant->set_vardef(arr_def->original_name.c_str(), arr_def); -} - -/* - * cudaBindConstantVar - * allocs a variable to constant memory - * constant - the constant mapping object - * arr_def - the VarDefs abject - * globals - global symbol table - * stmt_list - the GPU functions' statement list - */ -static void cudaBindConstantVar(constant_memory_mapping* constant, VarDefs* arr_def, SgGlobal* globals, SgStatementPtrList* stmt_list) { - SgName cudaMemcpyToSymbol_name("cudaMemcpyToSymbol"); - SgFunctionDeclaration* cudaMemcpyToSymbol_decl = buildNondefiningFunctionDeclaration( - cudaMemcpyToSymbol_name, buildVoidType(), buildFunctionParameterList(), globals); - SgExprListExp* args = buildExprListExp(); - args->append_expression(buildCastExp(constant->get_mapped_symbol_exp(arr_def->original_name.c_str()), - buildPointerType(buildVoidType()))); - args->append_expression(buildVarRefExp(arr_def->in_data)); - args->append_expression(arr_def->size_expr); - stmt_list->push_back(buildExprStatement( - buildFunctionCallExp(buildFunctionRefExp(cudaMemcpyToSymbol_decl), args))); -} - -static void consmapArrayRefs(constant_memory_mapping* constant, std::vector<IR_ArrayRef*>* refs, SgGlobal* globals, IR_Code* ir, CG_roseBuilder* ocg) { - // if constant mapping is not being used, ignore this function - if(constant == NULL) return; - for(int i = 0; i < refs->size(); i++) { - IR_ArrayRef* aref = (*refs)[i]; - if(constant->is_array_mapped(aref->name().c_str())) { - // get array reference dimensions - int dims = aref->symbol()->n_dim(); - if(dims > 2) { - printf(" \n CHiLL does not handle constant memory mapping for more than 2D arrays.\n"); - return; - } - - SgExpression* varexp = constant->get_mapped_symbol_exp(aref->name().c_str()); - SgExpression* index_exp; - // build index expression - if(dims == 1) { - index_exp = static_cast<omega::CG_roseRepr*>(aref->index(0)->clone())->GetExpression(); - } - if(dims == 2) { - VarDefs* arr_def = constant->get_vardef(aref->name().c_str()); - CG_outputRepr* i0 = aref->index(0)->clone(); - CG_outputRepr* i1 = aref->index(1)->clone(); - CG_outputRepr* sz = new CG_roseRepr(buildIntVal(arr_def->size_multi_dim[0])); - CG_outputRepr* exp = ocg->CreatePlus(ocg->CreateTimes(sz->clone(), i0), i1); - index_exp = static_cast<omega::CG_roseRepr*>(exp->clone())->GetExpression(); - } - ir->ReplaceExpression(aref, new CG_roseRepr(buildPntrArrRefExp(varexp, index_exp))); - } - } -} - -/* - * setupTexmappingVar - * handles texture variable declaration - * and adds a global texture object - * parameters: - * texture - the texture_memory_mapping object - * arr_def - the VarDefs object for the mapped variable - * globals - Rose Global variables - * i - an index to keep the new variable names unique - * devptr_sym - the devptr that the original variable is associated with - * symtab - GPU function symbol table - */ -static void setupTexmappingVar(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals, int i, SgVariableSymbol* devptr_sym, SgSymbolTable* symtab) { - char* buf1 = new char[32]; - snprintf(buf1, 32, "tex%dRef", i+1); - arr_def->secondName = buf1; - - char buf2[64]; - // single-dimensional - snprintf(buf2, 64, "texture<float, %d, cudaReadModeElementType>", 1); - // multi-dimensional - // snprintf(buf2, 64, "texture<float, %d, cudaReadModeElemetType>", (int)(arr_def->size_multi_dim.size())); //*/ - - SgVariableDeclaration* texvar_decl = buildVariableDeclaration(SgName(std::string(buf1)), buildOpaqueType(buf2, globals)); - - SgInitializedNamePtrList& variables = texvar_decl->get_variables(); - SgInitializedNamePtrList::const_iterator j = variables.begin(); - SgInitializedName* initializedName = *j; - SgVariableSymbol* texvar_sym = new SgVariableSymbol(initializedName); - prependStatement(texvar_decl, globals); - - texvar_sym->set_parent(symtab); - symtab->insert(SgName(buf1), texvar_sym); - - texture->set_mapped_symbol(arr_def->original_name.c_str(), texvar_sym); - texture->set_devptr_symbol(arr_def->original_name.c_str(), devptr_sym); - texture->set_vardef(arr_def->original_name.c_str(), arr_def); -} - - -/* - * One dimensional version of cudaBindTexture - * see cudaBindTexture for details - */ -static SgFunctionCallExp* cudaBindTexture1D(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals) { - SgName cudaBindTexture_name("cudaBindTexture"); - SgFunctionDeclaration* cudaBindTexture_decl = buildNondefiningFunctionDeclaration( - cudaBindTexture_name, buildVoidType(), buildFunctionParameterList(), globals); - - SgExprListExp* args = buildExprListExp(); - args->append_expression(buildIntVal(0)); - args->append_expression(texture->get_mapped_symbol_exp(arr_def->original_name.c_str())); - args->append_expression(texture->get_devptr_symbol_exp(arr_def->original_name.c_str())); - args->append_expression(arr_def->size_expr); - return buildFunctionCallExp(buildFunctionRefExp(cudaBindTexture_decl), args); -} - -/* - * Two dimensional version of cudaBindTexture - * see cudaBindTexture for details - */ -//static SgFunctionCallExp* cudaBindTexture2D(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals) { -// SgName cudaBindTexture_name("cudaBindTexture2D"); -// SgFunctionDeclaration* cudaBindTexture_decl = buildNondefiningFunctionDeclaration( -// cudaBindTexture_name, buildVoidType(), buildFunctionParameterList(), globals); -// -// SgExprListExp* args = buildExprListExp(); -// args->append_expression(buildIntVal(0)); -// args->append_expression(texture->get_tex_mapped_symbol_exp(arr_def->original_name.c_str())); -// args->append_expression(texture->get_devptr_symbol_exp(arr_def->original_name.c_str())); -// args->append_expression(buildIntVal(texture->get_dim_length(arr_def->original_name.c_str(), 0))); -// args->append_expression(buildIntVal(texture->get_dim_length(arr_def->original_name.c_str(), 1))); -// args->append_expression(arr_def->size_expr); -// return buildFunctionCallExp(buildFunctionRefExp(cudaBindTexture_decl), args); -//} - -/* - * cudaBindTexture - * binds a variable to a texture - * parameters: - * texture - the texture mapping object - * arr_def - the VarDefs object - * globals - global symbol table - * stmt_list - the GPU functions' statement list - * notes: - * only supports binding 1D textures, may need to consider cudaBindTexture2D for 2D textures - */ -static void cudaBindTexture(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals, SgStatementPtrList* stmt_list) { - //int dims = (int)(arr_def->size_multi_dim.size()); - //int dims = texture->get_dims(arr_def->original_name.c_str()); - //if(dims == 1) - stmt_list->push_back( - buildExprStatement(cudaBindTexture1D(texture, arr_def, globals))); - //if(dims == 2) - // stmt_list->push_back( - // buildExprStatement(cudaBindTexture2D(texture, arr_def, globals))); -} - -/* - * texmapArrayRefs - * maps array reference expresions of texture mapped variables to the tex1D function - * parameters: - * texture - the texture mapping object - * refs - a list of all array read operations - * globals - global symbol table - * ir - handles IR_Code operations - * ocg - handles CG_roseBuilder operations -**/ -static void texmapArrayRefs(texture_memory_mapping* texture, std::vector<IR_ArrayRef*>* refs, SgGlobal* globals, IR_Code* ir, CG_roseBuilder *ocg) { - // if texture mapping is not being used, ignore this function - if(texture == NULL) return; - for(int i = 0; i < refs->size(); i++) { - IR_ArrayRef* aref = (*refs)[i]; - if(texture->is_array_mapped(aref->name().c_str())) { - - // get array dimensions - VarDefs* arr_def = texture->get_vardef(aref->name().c_str()); - int dims = aref->symbol()->n_dim(); - if(dims > 2) { - printf(" \n CHiLL does not handle texture mapping for more than 2D arrays.\n"); - // TODO throw some sort of error. or handle in texture_copy function - return; - } - - // build texture lookup function declaration - char texNDfetch_strName[16]; - sprintf(texNDfetch_strName, "tex%dDfetch", 1); // for now, only support tex1Dfetch - //sprintf(texNDfetch_strName, "tex%dDfetch", dims); - SgFunctionDeclaration* fetch_decl = buildNondefiningFunctionDeclaration( - SgName(texNDfetch_strName), buildFloatType(), buildFunctionParameterList(), globals); - - // build args - SgExprListExp* args = buildExprListExp(); - args->append_expression(texture->get_mapped_symbol_exp(aref->name().c_str())); - - // set indexing args - //for(int i = 0; i < dims; i++) { - // args->append_expression((static_cast<omega::CG_roseRepr*>(aref->index(i)->clone()))->GetExpression()); - //} - if(dims == 1) { - args->append_expression(static_cast<omega::CG_roseRepr*>(aref->index(0)->clone())->GetExpression()); - } - else if(dims == 2) { - CG_outputRepr* i0 = aref->index(0)->clone(); - CG_outputRepr* i1 = aref->index(1)->clone(); - CG_outputRepr* sz = new CG_roseRepr(buildIntVal(arr_def->size_multi_dim[0])); - CG_outputRepr* expr = ocg->CreatePlus(ocg->CreateTimes(sz->clone(), i0), i1); - args->append_expression(static_cast<omega::CG_roseRepr*>(expr->clone())->GetExpression()); - } - - // build function call and replace original array ref - SgFunctionCallExp* fetch_call = buildFunctionCallExp(buildFunctionRefExp(fetch_decl), args); - ir->ReplaceExpression(aref, new CG_roseRepr(fetch_call)); - } - } -} - -SgNode* LoopCuda::cudaize_codegen_v2() { - if(cudaDebug) - printf("cudaize codegen V2\n"); - CG_roseBuilder *ocg = dynamic_cast<CG_roseBuilder*>(ir->builder()); - if (!ocg) - return false; - - //protonu--adding an annote to track texture memory type - //ANNOTE(k_cuda_texture_memory, "cuda texture memory", TRUE); - //ANNOTE(k_cuda_constant_memory, "cuda constant memory", TRUE); - int tex_mem_on = 0; - int cons_mem_on = 0; - - - - CG_outputRepr* repr; - std::vector<VarDefs> arrayVars; - std::vector<VarDefs> localScopedVars; - - std::vector<IR_ArrayRef *> ro_refs; - std::vector<IR_ArrayRef *> wo_refs; - std::set<std::string> uniqueRefs; - std::set<std::string> uniqueWoRefs; - std::set<const SgVariableSymbol *> syms; - std::set<const SgVariableSymbol *> psyms; - std::set<const SgVariableSymbol *> pdSyms; - SgStatementPtrList* replacement_list = new SgStatementPtrList; - - for (int j = 0; j < stmt.size(); j++) { - std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[j].code); - for (int i = 0; i < refs.size(); i++) { - //printf("ref %s wo %d\n", static_cast<const char*>(refs[i]->name()), refs[i]->is_write()); - SgVariableSymbol* var = body_symtab->find_variable( - SgName((char*) refs[i]->name().c_str())); - SgVariableSymbol* var2 = parameter_symtab->find_variable( - SgName((char*) refs[i]->name().c_str())); - - //If the array is not a parameter, then it's a local array and we - //want to recreate it as a stack variable in the kernel as opposed to - //passing it in. - if (var != NULL) { - //anand-- needs modification, if variable is parameter it wont be part of the - // block's symbol table but the functiond definition's symbol table - - continue; - } - if (uniqueRefs.find(refs[i]->name()) == uniqueRefs.end()) { - - uniqueRefs.insert(refs[i]->name()); - if (refs[i]->is_write()) { - uniqueWoRefs.insert(refs[i]->name()); - wo_refs.push_back(refs[i]); - } else - ro_refs.push_back(refs[i]); - } - if (refs[i]->is_write() - && uniqueWoRefs.find(refs[i]->name()) - == uniqueWoRefs.end()) { - uniqueWoRefs.insert(refs[i]->name()); - wo_refs.push_back(refs[i]); - //printf("adding %s to wo\n", static_cast<const char*>(refs[i]->name())); - } - pdSyms.insert((const SgVariableSymbol*) var2); - } - } - - if (cudaDebug) { - printf("reading from array "); - for (int i = 0; i < ro_refs.size(); i++) - printf("'%s' ", ro_refs[i]->name().c_str()); - printf("and writing to array "); - for (int i = 0; i < wo_refs.size(); i++) - printf("'%s' ", wo_refs[i]->name().c_str()); - printf("\n"); - } - const char* gridName = "dimGrid"; - const char* blockName = "dimBlock"; - - //TODO: Could allow for array_dims_vars to be a mapping from array - //references to to variable names that define their length. - SgVariableSymbol* dim1 = 0; - SgVariableSymbol* dim2 = 0; - - for (int i = 0; i < wo_refs.size(); i++) { - //TODO: Currently assume all arrays are floats of one or two dimentions - SgVariableSymbol* outArray = 0; - std::string name = wo_refs[i]->name(); - outArray = body_symtab->find_variable(SgName((char*) name.c_str())); - int size_n_d; - if (outArray == NULL) - outArray = parameter_symtab->find_variable( - SgName((char*) name.c_str())); - - VarDefs v; - v.size_multi_dim = std::vector<int>(); - char buf[32]; - snprintf(buf, 32, "devO%dPtr", i + 1); - v.name = buf; - if (isSgPointerType(outArray->get_type())) { - if (isSgArrayType( - isSgNode( - isSgPointerType(outArray->get_type())->get_base_type()))) { - // v.type = ((array_type *)(((ptr_type *)(outArray->type()))->ref_type()))->elem_type(); - SgType* t = - isSgPointerType(outArray->get_type())->get_base_type(); - /* SgExprListExp* dimList = t->get_dim_info(); - SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); - SgExpression* expr=NULL; - for (; j != dimList->get_expressions().end(); j++) - expr = *j; - */ - while (isSgArrayType(t)) - t = isSgArrayType(t)->get_base_type(); - - if (!isSgType(t)) { - char buf[1024]; - sprintf(buf, "CudaizeCodeGen: Array type undetected!"); - throw std::runtime_error(buf); - - } - - v.type = t; - } else - v.type = isSgPointerType(outArray->get_type())->get_base_type(); - } else if (isSgArrayType(outArray->get_type())) { - if (isSgArrayType( - isSgNode( - isSgArrayType(outArray->get_type())->get_base_type()))) { - // v.type = ((array_type *)(((ptr_type *)(outArray->type()))->ref_type()))->elem_type(); - SgType* t = - isSgArrayType(outArray->get_type())->get_base_type(); - /* SgExprListExp* dimList = t->get_dim_info(); - SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); - SgExpression* expr=NULL; - for (; j != dimList->get_expressions().end(); j++) - expr = *j; - */ - while (isSgArrayType(t)) - t = isSgArrayType(t)->get_base_type(); - - if (!isSgType(t)) { - char buf[1024]; - sprintf(buf, "CudaizeCodeGen: Array type undetected!"); - throw std::runtime_error(buf); - - } - - v.type = t; - } else - v.type = isSgArrayType(outArray->get_type())->get_base_type(); - } else - v.type = buildFloatType(); - v.tex_mapped = false; - v.cons_mapped = false; - v.original_name = wo_refs[i]->name(); - //Size of the array = dim1 * dim2 * num bytes of our array type - - //If our input array is 2D (non-linearized), we want the actual - //dimentions of the array - CG_outputRepr* size; - //Lookup in array_dims - std::map<std::string, int>::iterator it = array_dims.find(name.c_str()); - if (isSgPointerType(outArray->get_type()) - && isSgArrayType( - isSgNode( - isSgPointerType(outArray->get_type())->get_base_type()))) { - SgType* t = isSgPointerType(outArray->get_type())->get_base_type(); - /* SgExprListExp* dimList = t->get_dim_info(); - SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); - SgExpression* expr=NULL; - for (; j != dimList->get_expressions().end(); j++) - expr = *j; - */ - if (isSgIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedLongVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgAddOp(isSgArrayType(t)->get_index())) { - SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); - - SgExpression *lhs = op_add->get_lhs_operand(); - SgExpression *rhs = op_add->get_rhs_operand(); - - if (isSgIntVal(lhs)) - size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); - else if (isSgUnsignedIntVal(lhs)) - size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() - + (int) isSgUnsignedIntVal(rhs)->get_value(); - else if (isSgUnsignedLongVal(lhs)) - size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongLongIntVal(lhs)) - size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - size_n_d = (int) (isSgLongIntVal(lhs)->get_value() - + isSgLongIntVal(rhs)->get_value()); - else if (isSgUnsignedLongLongIntVal(lhs)) - size_n_d = - (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongLongIntVal(rhs)->get_value()); - - } - t = isSgArrayType(t)->get_base_type(); - while (isSgArrayType(t)) { - int dim; - if (isSgIntVal(isSgArrayType(t)->get_index())) - dim = - (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedLongVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgAddOp(isSgArrayType(t)->get_index())) { - SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); - - SgExpression *lhs = op_add->get_lhs_operand(); - SgExpression *rhs = op_add->get_rhs_operand(); - - if (isSgIntVal(lhs)) - dim = (int) isSgIntVal(lhs)->get_value() - + (int) (isSgIntVal(rhs)->get_value()); - else if (isSgUnsignedIntVal(lhs)) - dim = (int) isSgUnsignedIntVal(lhs)->get_value() - + (int) isSgUnsignedIntVal(rhs)->get_value(); - else if (isSgUnsignedLongVal(lhs)) - dim = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - dim = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongLongIntVal(lhs)) - dim = (int) (isSgLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - dim = (int) (isSgLongIntVal(lhs)->get_value() - + isSgLongIntVal(rhs)->get_value()); - else if (isSgUnsignedLongLongIntVal(lhs)) - dim = - (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongLongIntVal(rhs)->get_value()); - - } - size_n_d *= dim; - v.size_multi_dim.push_back(dim); - t = isSgArrayType(t)->get_base_type(); - } - //v.size_2d = (int) (isSgIntVal(t->get_index())->get_value()); - - if (cudaDebug) - printf("Detected Multi-dimensional array sized of %d for %s\n", - size_n_d, (char*) wo_refs[i]->name().c_str()); - size = ocg->CreateInt(size_n_d); - } else if (isSgArrayType(outArray->get_type()) - && isSgArrayType( - isSgNode( - isSgArrayType(outArray->get_type())->get_base_type()))) { - SgType* t = outArray->get_type(); - /* SgExprListExp* dimList = t->get_dim_info(); - SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); - SgExpression* expr=NULL; - for (; j != dimList->get_expressions().end(); j++) - expr = *j; - */ - - if (isSgIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedLongVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgAddOp(isSgArrayType(t)->get_index())) { - SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); - - SgExpression *lhs = op_add->get_lhs_operand(); - SgExpression *rhs = op_add->get_rhs_operand(); - - if (isSgIntVal(lhs)) - size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); - else if (isSgUnsignedIntVal(lhs)) - size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() - + (int) isSgUnsignedIntVal(rhs)->get_value(); - else if (isSgUnsignedLongVal(lhs)) - size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongLongIntVal(lhs)) - size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - size_n_d = (int) (isSgLongIntVal(lhs)->get_value() - + isSgLongIntVal(rhs)->get_value()); - else if (isSgUnsignedLongLongIntVal(lhs)) - size_n_d = - (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongLongIntVal(rhs)->get_value()); - - } - t = isSgArrayType(t)->get_base_type(); - while (isSgArrayType(t)) { - int dim; - if (isSgIntVal(isSgArrayType(t)->get_index())) - dim = - (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedLongVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgAddOp(isSgArrayType(t)->get_index())) { - SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); - - SgExpression *lhs = op_add->get_lhs_operand(); - SgExpression *rhs = op_add->get_rhs_operand(); - - if (isSgIntVal(lhs)) - dim = (int) isSgIntVal(lhs)->get_value() - + (int) (isSgIntVal(rhs)->get_value()); - else if (isSgUnsignedIntVal(lhs)) - dim = (int) isSgUnsignedIntVal(lhs)->get_value() - + (int) isSgUnsignedIntVal(rhs)->get_value(); - else if (isSgUnsignedLongVal(lhs)) - dim = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - dim = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongLongIntVal(lhs)) - dim = (int) (isSgLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - dim = (int) (isSgLongIntVal(lhs)->get_value() - + isSgLongIntVal(rhs)->get_value()); - else if (isSgUnsignedLongLongIntVal(lhs)) - dim = - (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongLongIntVal(rhs)->get_value()); - - } - size_n_d *= dim; - v.size_multi_dim.push_back(dim); - t = isSgArrayType(t)->get_base_type(); - } - - //v.size_2d = (int) (isSgIntVal(t->get_index())->get_value()); - - if (cudaDebug) - printf("Detected Multi-Dimensional array sized of %d for %s\n", - size_n_d, (char*) wo_refs[i]->name().c_str()); - size = ocg->CreateInt(size_n_d); - } else if (it != array_dims.end()) { - int ref_size = it->second; - //size = - // ocg->CreateInt( - // isSgIntVal( - // isSgArrayType(outArray->get_type())->get_index())->get_value()); - //v.size_2d = isSgArrayType(outArray->get_type())->get_rank(); - //v.var_ref_size = ref_size; - size = ocg->CreateInt(ref_size); - - } else { - if (dim1) { - size = ocg->CreateTimes( - new CG_roseRepr(isSgExpression(buildVarRefExp(dim1))), - new CG_roseRepr(isSgExpression(buildVarRefExp(dim2)))); - } else { - char buf[1024]; - sprintf(buf, - "CudaizeCodeGen: Array reference %s does not have a " - "detectable size or specififed dimentions", - name.c_str()); - throw std::runtime_error(buf); - } - } - - v.size_expr = - static_cast<CG_roseRepr*>(ocg->CreateTimes(size, - new omega::CG_roseRepr( - isSgExpression(buildSizeOfOp(v.type)))))->GetExpression(); - - v.in_data = 0; - v.out_data = outArray; - //Check for in ro_refs and remove it at this point - std::vector<IR_ArrayRef *>::iterator it_; - for (it_ = ro_refs.begin(); it_ != ro_refs.end(); it_++) { - if ((*it_)->name() == wo_refs[i]->name()) { - break; - } - } - if (it_ != ro_refs.end()) { - v.in_data = outArray; - ro_refs.erase(it_); - } - - arrayVars.push_back(v); - - } - - //protonu-- assuming that all texture mapped memories were originally read only mems - //there should be safety checks for that, will implement those later - - for (int i = 0; i < ro_refs.size(); i++) { - SgVariableSymbol* inArray = 0; - std::string name = ro_refs[i]->name(); - inArray = body_symtab->find_variable(SgName((char*) name.c_str())); - if (inArray == NULL) - inArray = parameter_symtab->find_variable( - SgName((char*) name.c_str())); - - VarDefs v; - v.size_multi_dim = std::vector<int>(); - char buf[32]; - snprintf(buf, 32, "devI%dPtr", i + 1); - v.name = buf; - int size_n_d; - if (isSgPointerType(inArray->get_type())) { - if (isSgArrayType( - isSgNode( - isSgPointerType(inArray->get_type())->get_base_type()))) { - - SgType* t = - isSgPointerType(inArray->get_type())->get_base_type(); - - while (isSgArrayType(t)) - t = isSgArrayType(t)->get_base_type(); - - if (!isSgType(t)) { - char buf[1024]; - sprintf(buf, "CudaizeCodeGen: Array type undetected!"); - throw std::runtime_error(buf); - - } - v.type = t; - } else - v.type = isSgPointerType(inArray->get_type())->get_base_type(); - } else if (isSgArrayType(inArray->get_type())) { - if (isSgArrayType( - isSgNode( - isSgArrayType(inArray->get_type())->get_base_type()))) { - - SgType* t = inArray->get_type(); - while (isSgArrayType(t)) - t = isSgArrayType(t)->get_base_type(); - - if (!isSgType(t)) { - char buf[1024]; - sprintf(buf, "CudaizeCodeGen: Array type undetected!"); - throw std::runtime_error(buf); - - } - v.type = t; - } else - v.type = isSgArrayType(inArray->get_type())->get_base_type(); - } - - else - v.type = buildFloatType(); - - v.tex_mapped = false; - v.cons_mapped = false; - v.original_name = ro_refs[i]->name(); - - //derick -- adding texture and constant mapping - if ( texture != NULL) - v.tex_mapped = (texture->is_array_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars - if (v.tex_mapped){ - printf("this variable %s is mapped to texture memory", name.c_str()); - } - //derick -- this is commented out until constant memory is implemeted - if ( constant_mem != NULL) - v.cons_mapped = (constant_mem->is_array_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars - if (v.cons_mapped){ - printf("this variable %s is mapped to constant memory", name.c_str()); - } - - //Size of the array = dim1 * dim2 * num bytes of our array type - //If our input array is 2D (non-linearized), we want the actual - //dimentions of the array (as it might be less than cu_n - CG_outputRepr* size; - //Lookup in array_dims - std::map<std::string, int>::iterator it = array_dims.find(name.c_str()); - if (isSgPointerType(inArray->get_type()) - && isSgArrayType( - isSgPointerType(inArray->get_type())->get_base_type())) { - //SgArrayType* t = isSgArrayType(isSgArrayType(inArray->get_type())->get_base_type()); - //v.size_2d = t->get_rank(); - SgType* t = isSgPointerType(inArray->get_type())->get_base_type(); - /* SgExprListExp* dimList = t->get_dim_info(); - SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); - SgExpression* expr=NULL; - for (; j != dimList->get_expressions().end(); j++) - expr = *j; - */ - //v.size_2d = 1; - if (isSgIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedLongVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgAddOp(isSgArrayType(t)->get_index())) { - SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); - - SgExpression *lhs = op_add->get_lhs_operand(); - SgExpression *rhs = op_add->get_rhs_operand(); - - if (isSgIntVal(lhs)) - size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); - else if (isSgUnsignedIntVal(lhs)) - size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() - + (int) isSgUnsignedIntVal(rhs)->get_value(); - else if (isSgUnsignedLongVal(lhs)) - size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongLongIntVal(lhs)) - size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - size_n_d = (int) (isSgLongIntVal(lhs)->get_value() - + isSgLongIntVal(rhs)->get_value()); - else if (isSgUnsignedLongLongIntVal(lhs)) - size_n_d = - (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongLongIntVal(rhs)->get_value()); - - } - t = isSgArrayType(t)->get_base_type(); - while (isSgArrayType(t)) { - int dim; - if (isSgIntVal(isSgArrayType(t)->get_index())) - dim = - (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedLongVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgAddOp(isSgArrayType(t)->get_index())) { - SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); - - SgExpression *lhs = op_add->get_lhs_operand(); - SgExpression *rhs = op_add->get_rhs_operand(); - - if (isSgIntVal(lhs)) - dim = (int) isSgIntVal(lhs)->get_value() - + (int) (isSgIntVal(rhs)->get_value()); - else if (isSgUnsignedIntVal(lhs)) - dim = (int) isSgUnsignedIntVal(lhs)->get_value() - + (int) isSgUnsignedIntVal(rhs)->get_value(); - else if (isSgUnsignedLongVal(lhs)) - dim = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - dim = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongLongIntVal(lhs)) - dim = (int) (isSgLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - dim = (int) (isSgLongIntVal(lhs)->get_value() - + isSgLongIntVal(rhs)->get_value()); - else if (isSgUnsignedLongLongIntVal(lhs)) - dim = - (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongLongIntVal(rhs)->get_value()); - - } - size_n_d *= dim; - v.size_multi_dim.push_back(dim); - t = isSgArrayType(t)->get_base_type(); - } - if (cudaDebug) - printf("Detected Multi-dimensional array sized of %d for %s\n", - size_n_d, (char*) ro_refs[i]->name().c_str()); - size = ocg->CreateInt(size_n_d); - } else if (isSgArrayType(inArray->get_type()) - && isSgArrayType( - isSgArrayType(inArray->get_type())->get_base_type())) { - //SgArrayType* t = isSgArrayType(isSgArrayType(inArray->get_type())->get_base_type()); - //v.size_2d = t->get_rank(); - SgType* t = inArray->get_type(); - /* SgExprListExp* dimList = t->get_dim_info(); - SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); - SgExpression* expr=NULL; - for (; j != dimList->get_expressions().end(); j++) - expr = *j; - */ - - if (isSgIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedLongVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = - (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) - size_n_d = (int) (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgAddOp(isSgArrayType(t)->get_index())) { - SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); - - SgExpression *lhs = op_add->get_lhs_operand(); - SgExpression *rhs = op_add->get_rhs_operand(); - - if (isSgIntVal(lhs)) - size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); - else if (isSgUnsignedIntVal(lhs)) - size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() - + (int) isSgUnsignedIntVal(rhs)->get_value(); - else if (isSgUnsignedLongVal(lhs)) - size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongLongIntVal(lhs)) - size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - size_n_d = (int) (isSgLongIntVal(lhs)->get_value() - + isSgLongIntVal(rhs)->get_value()); - else if (isSgUnsignedLongLongIntVal(lhs)) - size_n_d = - (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongLongIntVal(rhs)->get_value()); - - } - t = isSgArrayType(t)->get_base_type(); - while (isSgArrayType(t)) { - int dim; - if (isSgIntVal(isSgArrayType(t)->get_index())) - dim = - (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedLongVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgLongIntVal(isSgArrayType(t)->get_index())) - dim = (int) (isSgLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())) - dim = (int) (isSgUnsignedLongLongIntVal( - isSgArrayType(t)->get_index())->get_value()); - else if (isSgAddOp(isSgArrayType(t)->get_index())) { - SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); - - SgExpression *lhs = op_add->get_lhs_operand(); - SgExpression *rhs = op_add->get_rhs_operand(); - - if (isSgIntVal(lhs)) - dim = (int) isSgIntVal(lhs)->get_value() - + (int) (isSgIntVal(rhs)->get_value()); - else if (isSgUnsignedIntVal(lhs)) - dim = (int) isSgUnsignedIntVal(lhs)->get_value() - + (int) isSgUnsignedIntVal(rhs)->get_value(); - else if (isSgUnsignedLongVal(lhs)) - dim = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - dim = (int) (isSgUnsignedLongVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongLongIntVal(lhs)) - dim = (int) (isSgLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongVal(rhs)->get_value()); - else if (isSgLongIntVal(lhs)) - dim = (int) (isSgLongIntVal(lhs)->get_value() - + isSgLongIntVal(rhs)->get_value()); - else if (isSgUnsignedLongLongIntVal(lhs)) - dim = - (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() - + isSgUnsignedLongLongIntVal(rhs)->get_value()); - - } - size_n_d *= dim; - v.size_multi_dim.push_back(dim); - t = isSgArrayType(t)->get_base_type(); - } - if (cudaDebug) - printf("Detected Multi-Dimensional array sized of %d for %s\n", - size_n_d, (char*) ro_refs[i]->name().c_str()); - size = ocg->CreateInt(size_n_d); - } - - else if (it != array_dims.end()) { - int ref_size = it->second; - // v.var_ref_size = ref_size; - size = ocg->CreateInt(ref_size); - } else { - if (dim1) { - size = ocg->CreateTimes( - new CG_roseRepr(isSgExpression(buildVarRefExp(dim1))), - new CG_roseRepr(isSgExpression(buildVarRefExp(dim2)))); - } else { - char buf[1024]; - sprintf(buf, - "CudaizeCodeGen: Array reference %s does not have a " - "detectable size or specififed dimentions", - name.c_str()); - throw std::runtime_error(buf); - } - } - v.size_expr = - static_cast<CG_roseRepr*>(ocg->CreateTimes(size, - new omega::CG_roseRepr( - isSgExpression(buildSizeOfOp(v.type)))))->GetExpression(); - - v.in_data = inArray; - v.out_data = 0; - arrayVars.push_back(v); - } - - if (arrayVars.size() < 2) { - fprintf(stderr, - "cudaize error: Did not find two arrays being accessed\n"); - return false; - } - - //protonu--debugging tool--the printf statement - //tex_mem_on signals use of tex mem - /* derick -- texmapping near malloc mcopy - for(int i=0; i<arrayVars.size(); i++) - { - //printf("var name %s, tex_mem used %s\n", arrayVars[i].name.c_str(), (arrayVars[i].tex_mapped)?"true":"false"); - if (arrayVars[i].tex_mapped ) tex_mem_on ++; - //if (arrayVars[i].cons_mapped ) cons_mem_on ++; - } - */ - - //Add our mallocs (and input array memcpys) - for (int i = 0; i < arrayVars.size(); i++) { - if(arrayVars[i].cons_mapped) { - setupConstantVar(constant_mem, &arrayVars[i], globals, i, symtab); - SgStatementPtrList *tnl = new SgStatementPtrList; - cudaBindConstantVar(constant_mem, &arrayVars[i], globals, tnl); - setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl)); - } - else { - SgVariableDeclaration* defn = buildVariableDeclaration( - SgName(arrayVars[i].name.c_str()), - buildPointerType(arrayVars[i].type)); - SgInitializedNamePtrList& variables = defn->get_variables(); - SgInitializedNamePtrList::const_iterator j = variables.begin(); - SgInitializedName* initializedName = *j; - SgVariableSymbol* dvs = new SgVariableSymbol(initializedName); - prependStatement(defn, func_body); - - dvs->set_parent(body_symtab); - body_symtab->insert(SgName(arrayVars[i].name.c_str()), dvs); - -// SgVariableSymbol* dvs = body_symtab->find_variable(SgName(arrayVars[i].name.c_str())); - - // if(dvs == NULL) - // dvs = parameter_symtab->find_variable(SgName(arrayVars[i].name.c_str())); - - //cudaMalloc args - // SgBasicBlock* block = buildBasicBlock(); - SgName name_cuda_malloc("cudaMalloc"); - SgFunctionDeclaration * decl_cuda_malloc = - buildNondefiningFunctionDeclaration(name_cuda_malloc, - buildVoidType(), buildFunctionParameterList(), globals); - - SgName name_cuda_copy("cudaMemcpy"); - SgFunctionDeclaration * decl_cuda_copy = - buildNondefiningFunctionDeclaration(name_cuda_copy, - buildVoidType(), buildFunctionParameterList(), globals); - - SgExprListExp* args = buildExprListExp(); - args->append_expression( - buildCastExp(buildAddressOfOp(buildVarRefExp(dvs)), - buildPointerType(buildPointerType(buildVoidType())))); - args->append_expression(arrayVars[i].size_expr); - -// decl_cuda_malloc->get_parameterList()->append_arg - SgFunctionCallExp *the_call = buildFunctionCallExp( - buildFunctionRefExp(decl_cuda_malloc), args); - - SgExprStatement* stmt = buildExprStatement(the_call); - - // (*replacement_list).push_back (stmt); - - SgStatementPtrList* tnl = new SgStatementPtrList; - (*tnl).push_back(stmt); - setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl)); - if (arrayVars[i].in_data) { - - SgExprListExp * cuda_copy_in_args = buildExprListExp(); - cuda_copy_in_args->append_expression( - isSgExpression(buildVarRefExp(dvs))); - cuda_copy_in_args->append_expression( - isSgExpression(buildVarRefExp(arrayVars[i].in_data))); - CG_roseRepr* size_exp = new CG_roseRepr(arrayVars[i].size_expr); - cuda_copy_in_args->append_expression( - static_cast<CG_roseRepr*>(size_exp->clone())->GetExpression()); - cuda_copy_in_args->append_expression( - buildOpaqueVarRefExp("cudaMemcpyHostToDevice", globals)); - -// cuda_copy_in_args->append_expression( -// new SgVarRefExp(sourceLocation, ) -// ); - SgFunctionCallExp * cuda_copy_in_func_call = buildFunctionCallExp( - buildFunctionRefExp(decl_cuda_copy), cuda_copy_in_args); - - SgExprStatement* stmt = buildExprStatement(cuda_copy_in_func_call); - - SgStatementPtrList *tnl = new SgStatementPtrList; - (*tnl).push_back(stmt); - setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl)); - - if(arrayVars[i].tex_mapped) { - setupTexmappingVar(texture, &arrayVars[i], globals, i, dvs, symtab); - SgStatementPtrList *tnl = new SgStatementPtrList; - cudaBindTexture(texture, &arrayVars[i], globals, tnl); - setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl)); - } - } - } - } - - //Build dimGrid dim3 variables based on loop dimentions and ti/tj - char blockD1[120]; - char blockD2[120]; - if (dim1) { - snprintf(blockD1, 120, "%s/%d", - dim1->get_declaration()->get_name().getString().c_str(), cu_tx); - snprintf(blockD2, 120, "%s/%d", - dim2->get_declaration()->get_name().getString().c_str(), cu_ty); - } else { - snprintf(blockD1, 120, "%d", cu_bx); - snprintf(blockD2, 120, "%d", cu_by); - //snprintf(blockD1, 120, "%d/%d", cu_nx, cu_tx); - //snprintf(blockD2, 120, "%d/%d", cu_ny, cu_ty); - } - - SgInitializedName* arg1 = buildInitializedName("i", buildIntType()); - SgInitializedName* arg2 = buildInitializedName("j", buildIntType()); - SgInitializedName* arg3 = buildInitializedName("k", buildIntType()); - SgName type_name("dim3"); - //SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(type_name); - - //ROSE_ASSERT(type_symbol != NULL); - - //SgClassDeclaration * dim3classdecl = isSgClassDeclaration( - // type_symbol->get_declaration()); - - SgFunctionDeclaration * funcdecl = buildNondefiningFunctionDeclaration( - SgName("dim3"), buildOpaqueType("dim3", globalScope), - //isSgType(dim3classdecl->get_type()), - buildFunctionParameterList(arg1, arg2, arg3), globalScope); - - if (cu_bx && cu_by) - repr = ocg->CreateDim3((const char*) gridName, ocg->CreateInt(cu_bx), - ocg->CreateInt(cu_by)); - else if (cu_bx_repr && cu_by_repr) - repr = ocg->CreateDim3((const char*) gridName, cu_bx_repr, cu_by_repr); - else if (cu_bx_repr) - repr = ocg->CreateDim3((const char*) gridName, cu_bx_repr, - ocg->CreateInt(1)); - setup_code = ocg->StmtListAppend(setup_code, repr); - //SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList(); - - //for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++) - // (*replacement_list).push_back (*it); - - // repr = ocg->CreateDim3((const char*)blockName, cu_tx,cu_ty); - - if (cu_tz > 1 || cu_tz_repr) { - - if (cu_tx && cu_ty && cu_tz) - repr = ocg->CreateDim3((char*) blockName, ocg->CreateInt(cu_tx), - ocg->CreateInt(cu_ty), ocg->CreateInt(cu_tz)); - else if (cu_tx_repr && cu_ty_repr && cu_tz_repr) - repr = ocg->CreateDim3((char*) blockName, cu_tx_repr, cu_ty_repr, - cu_tz_repr); - // SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList(); - - // for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++) - // (*replacement_list).push_back (*it); - - } else { - if (cu_tx && cu_ty) - repr = ocg->CreateDim3((char*) blockName, ocg->CreateInt(cu_tx), - ocg->CreateInt(cu_ty)); - else if (cu_tx_repr && cu_ty_repr) - repr = ocg->CreateDim3((char*) blockName, cu_tx_repr, cu_ty_repr); - //SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList(); - - //for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++) - // (*replacement_list).push_back (*it); - - } - - setup_code = ocg->StmtListAppend(setup_code, repr); - - SgCudaKernelExecConfig* config = new SgCudaKernelExecConfig( - buildVarRefExp(gridName), buildVarRefExp(blockName), NULL, NULL); - //SgCudaKernelExecConfig* config = new SgCudaKernelExecConfig(buildIntVal(cu_bx), , NULL, NULL); - SgExprListExp* iml = new SgExprListExp(); - SgCastExp* dim_s; - - //Creating Kernel function - SgBasicBlock* bb = new SgBasicBlock(TRANSFORMATION_FILE_INFO); - SgFunctionDefinition* kernel_defn = new SgFunctionDefinition( - TRANSFORMATION_FILE_INFO, bb); - SgFunctionDeclaration* kernel_decl_ = new SgFunctionDeclaration( - TRANSFORMATION_FILE_INFO, SgName((char*)cu_kernel_name.c_str()),buildFunctionType(buildVoidType(), buildFunctionParameterList()), kernel_defn); - SgFunctionDeclaration* kernel_decl = new SgFunctionDeclaration( - TRANSFORMATION_FILE_INFO, SgName((char*)cu_kernel_name.c_str()),buildFunctionType(buildVoidType(), buildFunctionParameterList()), kernel_defn); - - //((kernel_decl->get_declarationModifier()).get_storageModifier()).setStatic(); - - kernel_decl->set_definingDeclaration(kernel_decl); - kernel_defn->set_parent(kernel_decl); - bb->set_parent(kernel_defn); - bb->set_endOfConstruct(TRANSFORMATION_FILE_INFO); - bb->get_endOfConstruct()->set_parent(bb); - - //SgFunctionSymbol* functionSymbol = new SgFunctionSymbol(kernel_decl_); - //globals->insert_symbol(SgName((char*) cu_kernel_name.c_str()), - // functionSymbol); - SgFunctionSymbol* functionSymbol2 = new SgFunctionSymbol(kernel_decl); - - globals->insert_symbol(SgName((char*) cu_kernel_name.c_str()), - functionSymbol2); - - kernel_decl_->set_parent(globals); - - kernel_decl_->set_scope(globals); - - kernel_decl_->setForward(); - - globals->prepend_declaration(kernel_decl_); - - kernel_decl->set_endOfConstruct(TRANSFORMATION_FILE_INFO); - kernel_decl->get_endOfConstruct()->set_parent(kernel_decl); - - kernel_decl->set_parent(globals); - kernel_decl->set_scope(globals); - - kernel_decl->get_definition()->set_endOfConstruct(TRANSFORMATION_FILE_INFO); - kernel_decl->get_definition()->get_endOfConstruct()->set_parent( - kernel_decl->get_definition()); - - globals->append_statement(kernel_decl); - - //printf("%s %s\n", static_cast<const char*>(cu_kernel_name), dims); - //--derick - kernel function parameters - for (int i = 0; i < arrayVars.size(); i++) - //Throw in a type cast if our kernel takes 2D array notation - //like (float(*) [1024]) - { - //protonu--throwing in another hack to stop the caller from passing tex mapped - //vars to the kernel. - if (arrayVars[i].tex_mapped == true || arrayVars[i].cons_mapped) - continue; - if (!(arrayVars[i].size_multi_dim.empty())) { - //snprintf(dims,120,"(float(*) [%d])%s", arrayVars[i].size_2d, - // const_cast<char*>(arrayVars[i].name.c_str())); - - SgType* t = arrayVars[i].type; - for (int k = arrayVars[i].size_multi_dim.size() - 1; k >= 0; k--) { - t = buildArrayType(t, - buildIntVal(arrayVars[i].size_multi_dim[k])); - } - SgVariableSymbol* temp = body_symtab->find_variable( - SgName((char*) arrayVars[i].name.c_str())); - if (temp == NULL) - temp = parameter_symtab->find_variable( - SgName((char*) arrayVars[i].name.c_str())); - - dim_s = buildCastExp(buildVarRefExp(temp), buildPointerType(t), - SgCastExp::e_C_style_cast); - - //printf("%d %s\n", i, dims); - iml->append_expression(dim_s); - - SgInitializedName* id = buildInitializedName( - (char*) arrayVars[i].original_name.c_str(), - buildPointerType(t)); - kernel_decl->get_parameterList()->append_arg(id); - kernel_decl_->get_parameterList()->append_arg(id); - id->set_file_info(TRANSFORMATION_FILE_INFO); - - // DQ (9/8/2007): We now test this, so it has to be set explicitly. - id->set_scope(kernel_decl->get_definition()); - - // DQ (9/8/2007): Need to add variable symbol to global scope! - //printf ("Fixing up the symbol table in scope = %p = %s for SgInitializedName = %p = %s \n",globalScope,globalScope->class_name().c_str(),var1_init_name,var1_init_name->get_name().str()); - SgVariableSymbol *var_symbol = new SgVariableSymbol(id); - kernel_decl->get_definition()->insert_symbol(id->get_name(), - var_symbol); - - // if(kernel_decl->get_definition()->get_symbol_table()->find((const) id) == NULL) - - } else { - //printf("%d %s\n", i, static_cast<const char*>(arrayVars[i].name)); - SgVariableSymbol* temp = body_symtab->find_variable( - SgName((char*) arrayVars[i].name.c_str())); - if (temp == NULL) - temp = parameter_symtab->find_variable( - SgName((char*) arrayVars[i].name.c_str())); - iml->append_expression(buildVarRefExp(temp)); - SgInitializedName* id = buildInitializedName( - (char*) arrayVars[i].original_name.c_str(), - buildPointerType(arrayVars[i].type)); - kernel_decl->get_parameterList()->append_arg(id); - kernel_decl_->get_parameterList()->append_arg(id); - id->set_file_info(TRANSFORMATION_FILE_INFO); - - // DQ (9/8/2007): We now test this, so it has to be set explicitly. - id->set_scope(kernel_decl->get_definition()); - - // DQ (9/8/2007): Need to add variable symbol to global scope! - //printf ("Fixing up the symbol table in scope = %p = %s for SgInitializedName = %p = %s \n"$ - SgVariableSymbol *var_symbol = new SgVariableSymbol(id); - kernel_decl->get_definition()->insert_symbol(id->get_name(), - var_symbol); - - } - - } - if (dim1) { - iml->append_expression(buildVarRefExp(dim1)); - SgInitializedName* id = buildInitializedName( - dim1->get_name().getString().c_str(), dim1->get_type()); - kernel_decl->get_parameterList()->append_arg(id); - - iml->append_expression(buildVarRefExp(dim2)); - SgInitializedName* id2 = buildInitializedName( - dim2->get_name().getString().c_str(), dim2->get_type()); - - kernel_decl->get_parameterList()->append_arg(id); - kernel_decl_->get_parameterList()->append_arg(id); - } - - kernel_decl->get_functionModifier().setCudaKernel(); - kernel_decl_->get_functionModifier().setCudaKernel(); - SgCudaKernelCallExp * cuda_call_site = new SgCudaKernelCallExp( - TRANSFORMATION_FILE_INFO, buildFunctionRefExp(kernel_decl), iml,config); - - // SgStatementPtrList *tnl2 = new SgStatementPtrList; - - (*replacement_list).push_back(buildExprStatement(cuda_call_site)); - - setup_code = ocg->StmtListAppend(setup_code, - new CG_roseRepr(replacement_list)); - - //cuda free variables - for (int i = 0; i < arrayVars.size(); i++) { - if (arrayVars[i].out_data) { - - SgName name_cuda_copy("cudaMemcpy"); - SgFunctionDeclaration * decl_cuda_copyout = - buildNondefiningFunctionDeclaration(name_cuda_copy, - buildVoidType(), buildFunctionParameterList(), - globals); - - SgExprListExp* args = buildExprListExp(); - SgExprListExp * cuda_copy_out_args = buildExprListExp(); - cuda_copy_out_args->append_expression( - isSgExpression(buildVarRefExp(arrayVars[i].out_data))); - cuda_copy_out_args->append_expression( - isSgExpression(buildVarRefExp(arrayVars[i].name))); - CG_roseRepr* size_exp = new CG_roseRepr(arrayVars[i].size_expr); - cuda_copy_out_args->append_expression( - static_cast<CG_roseRepr*>(size_exp->clone())->GetExpression()); - cuda_copy_out_args->append_expression( - buildOpaqueVarRefExp("cudaMemcpyDeviceToHost", globals)); - -// cuda_copy_in_args->append_expression( -// new SgVarRefExp(sourceLocation, ) -// ); - SgFunctionCallExp * cuda_copy_out_func_call = buildFunctionCallExp( - buildFunctionRefExp(decl_cuda_copyout), cuda_copy_out_args); - - SgFunctionCallExp *the_call = buildFunctionCallExp( - buildFunctionRefExp(decl_cuda_copyout), cuda_copy_out_args); - - SgExprStatement* stmt = buildExprStatement(the_call); - - SgStatementPtrList* tnl3 = new SgStatementPtrList; - - (*tnl3).push_back(stmt); - - // tree_node_list* tnl = new tree_node_list; - // tnl->append(new tree_instr(the_call)); - setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl3)); - - } - if(!arrayVars[i].cons_mapped) { - SgName name_cuda_free("cudaFree"); - SgFunctionDeclaration * decl_cuda_free = - buildNondefiningFunctionDeclaration(name_cuda_free, - buildVoidType(), buildFunctionParameterList(), globals); - - SgExprListExp* args3 = buildExprListExp(); - - SgVariableSymbol* tmp = body_symtab->find_variable( - SgName(arrayVars[i].name.c_str())); - if (tmp == NULL) - tmp = parameter_symtab->find_variable( - SgName(arrayVars[i].name.c_str())); - - args3->append_expression(buildVarRefExp(tmp)); - - SgFunctionCallExp *the_call2 = buildFunctionCallExp( - buildFunctionRefExp(decl_cuda_free), args3); - - SgExprStatement* stmt2 = buildExprStatement(the_call2); - - SgStatementPtrList* tnl4 = new SgStatementPtrList; - - (*tnl4).push_back(stmt2); - //(*replacement_list).push_back (stmt2); - - setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl4)); - } - } - - // --------------- - // BUILD THE KERNEL - // --------------- - - //Extract out kernel body - SgNode* code = getCode(); - //Create kernel function body - //Add Params - std::map<std::string, SgVariableSymbol*> loop_vars; - //In-Out arrays - for (int i = 0; i < arrayVars.size(); i++) { - /* if(arrayVars[i].in_data) - fptr = arrayVars[i].in_data->type()->clone(); - else - fptr = arrayVars[i].out_data->type()->clone(); - */ - - // fptr = new_proc_syms->install_type(fptr); - std::string name = - arrayVars[i].in_data ? - arrayVars[i].in_data->get_declaration()->get_name().getString() : - arrayVars[i].out_data->get_declaration()->get_name().getString(); - //SgVariableSymbol* sym = new var_sym(fptr, arrayVars[i].in_data ? arrayVars[i].in_data->name() : arrayVars[i].out_data->name()); - - SgVariableSymbol* sym = - kernel_decl->get_definition()->get_symbol_table()->find_variable( - (const char*) name.c_str()); - /* SgVariableDeclaration* defn = buildVariableDeclaration(SgName(name.c_str()), buildFloatType()); - SgInitializedNamePtrList& variables = defn->get_variables(); - SgInitializedNamePtrList::const_iterator i = variables.begin(); - SgInitializedName* initializedName = *i; - SgVariableSymbol* sym = new SgVariableSymbol(initializedName); - prependStatement(defn, isSgScopeStatement(root_)); - - vs->set_parent(symtab2_); - symtab2_->insert(SgName(_s.c_str()), vs); - */ - - if (sym != NULL) - loop_vars.insert( - std::pair<std::string, SgVariableSymbol*>(std::string(name), - sym)); - } - - //Figure out which loop variables will be our thread and block dimention variables - std::vector<SgVariableSymbol *> loop_syms; - //Get our indexes - std::vector<const char*> indexes; // = get_loop_indexes(code,cu_num_reduce); - int threadsPos = 0; - - CG_outputRepr *body = NULL; - SgFunctionDefinition* func_d = func_definition; - //std::vector<SgVariableSymbol *> symbols = recursiveFindRefs(code); - - SgName name_sync("__syncthreads"); - SgFunctionDeclaration * decl_sync = buildNondefiningFunctionDeclaration( - name_sync, buildVoidType(), buildFunctionParameterList(), - globalScope); - - recursiveFindRefs(code, syms, func_d); - - //SgFunctionDeclaration* func = Outliner::generateFunction (code, (char*)cu_kernel_name.c_str(), syms, pdSyms, psyms, NULL, globalScope); - - if (cu_bx > 1 || cu_bx_repr) { - indexes.push_back("bx"); - SgName type_name("blockIdx.x"); - SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( - type_name); - SgVariableDeclaration * var_decl = buildVariableDeclaration("bx", - buildIntType(), NULL, - isSgScopeStatement(kernel_decl->get_definition()->get_body())); - SgStatementPtrList *tnl = new SgStatementPtrList; - // (*tnl).push_back(isSgStatement(var_decl)); - appendStatement(var_decl, kernel_decl->get_definition()->get_body()); - - SgVariableSymbol* bx = - kernel_decl->get_definition()->get_body()->lookup_variable_symbol( - SgName("bx")); - SgStatement* assign = isSgStatement( - buildAssignStatement(buildVarRefExp(bx), - buildOpaqueVarRefExp("blockIdx.x", - kernel_decl->get_definition()->get_body()))); - (*tnl).push_back(assign); - // body = ocg->StmtListAppend(body, - // new CG_roseRepr(tnl)); - appendStatement(assign, kernel_decl->get_definition()->get_body()); - - } - if (cu_by > 1 || cu_by_repr) { - indexes.push_back("by"); - SgName type_name("blockIdx.y"); - SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( - type_name); - SgVariableDeclaration * var_decl = buildVariableDeclaration("by", - buildIntType(), NULL, - isSgScopeStatement(kernel_decl->get_definition()->get_body())); - // SgStatementPtrList *tnl = new SgStatementPtrList; - // (*tnl).push_back(isSgStatement(var_decl)); - appendStatement(var_decl, kernel_decl->get_definition()->get_body()); - - SgVariableSymbol* by = - kernel_decl->get_definition()->get_body()->lookup_variable_symbol( - SgName("by")); - SgStatement* assign = isSgStatement( - buildAssignStatement(buildVarRefExp(by), - buildOpaqueVarRefExp("blockIdx.y", - kernel_decl->get_definition()->get_body()))); - //(*tnl).push_back(assign); - // body = ocg->StmtListAppend(body, - // new CG_roseRepr(tnl)); - appendStatement(assign, kernel_decl->get_definition()->get_body()); - - } - if (cu_tx_repr || cu_tx > 1) { - threadsPos = indexes.size(); - indexes.push_back("tx"); - SgName type_name("threadIdx.x"); - SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( - type_name); - SgVariableDeclaration * var_decl = buildVariableDeclaration("tx", - buildIntType(), NULL, - isSgScopeStatement(kernel_decl->get_definition()->get_body())); - // SgStatementPtrList *tnl = new SgStatementPtrList; - // (*tnl).push_back(isSgStatement(var_decl)); - appendStatement(var_decl, kernel_decl->get_definition()->get_body()); - - SgVariableSymbol* tx = - kernel_decl->get_definition()->get_body()->lookup_variable_symbol( - SgName("tx")); - SgStatement* assign = isSgStatement( - buildAssignStatement(buildVarRefExp(tx), - buildOpaqueVarRefExp("threadIdx.x", - kernel_decl->get_definition()->get_body()))); - //(*tnl).push_back(assign); - // body = ocg->StmtListAppend(body, - // new CG_roseRepr(tnl)); - appendStatement(assign, kernel_decl->get_definition()->get_body()); - - } - if (cu_ty_repr || cu_ty > 1) { - indexes.push_back("ty"); - SgName type_name("threadIdx.y"); - SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( - type_name); - SgVariableDeclaration * var_decl = buildVariableDeclaration("ty", - buildIntType(), NULL, - isSgScopeStatement(kernel_decl->get_definition()->get_body())); - appendStatement(var_decl, kernel_decl->get_definition()->get_body()); - - // SgStatementPtrList *tnl = new SgStatementPtrList; - // (*tnl).push_back(isSgStatement(var_decl)); - SgVariableSymbol* ty = - kernel_decl->get_definition()->get_body()->lookup_variable_symbol( - SgName("ty")); - SgStatement* assign = isSgStatement( - buildAssignStatement(buildVarRefExp(ty), - buildOpaqueVarRefExp("threadIdx.y", - kernel_decl->get_definition()->get_body()))); - // (*tnl).push_back(assign); - // body = ocg->StmtListAppend(body, - // new CG_roseRepr(tnl)); - appendStatement(assign, kernel_decl->get_definition()->get_body()); - - } - if (cu_tz_repr || cu_tz > 1) { - indexes.push_back("tz"); - SgName type_name("threadIdx.z"); - SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( - type_name); - SgVariableDeclaration * var_decl = buildVariableDeclaration("tz", - buildIntType(), NULL, - isSgScopeStatement(kernel_decl->get_definition()->get_body())); - // SgStatementPtrList *tnl = new SgStatementPtrList; - // (*tnl).push_back(isSgStatement(var_decl)); - appendStatement(var_decl, kernel_decl->get_definition()->get_body()); - - SgVariableSymbol* tz = - kernel_decl->get_definition()->get_body()->lookup_variable_symbol( - "tz"); - SgStatement* assign = isSgStatement( - buildAssignStatement(buildVarRefExp(tz), - buildOpaqueVarRefExp("threadIdx.z", - kernel_decl->get_definition()->get_body()))); - // (*tnl).push_back(assign); - // body = ocg->StmtListAppend(body, - // new CG_roseRepr(tnl)); - appendStatement(assign, kernel_decl->get_definition()->get_body()); - - } - - std::map<std::string, SgVariableSymbol*> loop_idxs; //map from idx names to their new syms - - SgNode* swapped_ = swapVarReferences(code, syms, - kernel_decl->get_definition()->get_symbol_table(), - kernel_decl->get_definition()->get_body()->get_symbol_table(), - kernel_decl->get_definition()->get_body()); - - //std::cout << swapped_->unparseToString() << std::endl << std::endl; - - SgNode *swapped = recursiveFindReplacePreferedIdxs(swapped_, - kernel_decl->get_definition()->get_body()->get_symbol_table(), - kernel_decl->get_definition()->get_symbol_table(), - kernel_decl->get_definition()->get_body(), loop_idxs, globalScope); //in-place swapping - //swapped->print(); - - if (!isSgBasicBlock(swapped)) { - appendStatement(isSgStatement(swapped), - kernel_decl->get_definition()->get_body()); - swapped->set_parent( - isSgNode(kernel_decl->get_definition()->get_body())); - } else { - - for (SgStatementPtrList::iterator it = - isSgBasicBlock(swapped)->get_statements().begin(); - it != isSgBasicBlock(swapped)->get_statements().end(); it++) { - appendStatement(*it, kernel_decl->get_definition()->get_body()); - (*it)->set_parent( - isSgNode(kernel_decl->get_definition()->get_body())); - - } - - } - - for (int i = 0; i < indexes.size(); i++) { - std::vector<SgForStatement*> tfs = findCommentedFors(indexes[i], - swapped); - for (int k = 0; k < tfs.size(); k++) { - //printf("replacing %p tfs for index %s\n", tfs[k], indexes[i]); - SgNode* newBlock = forReduce(tfs[k], loop_idxs[indexes[i]], - kernel_decl->get_definition()); - //newBlock->print(); - swap_node_for_node_list(tfs[k], newBlock); - //printf("AFTER SWAP\n"); newBlock->print(); - } - } - - //--derick replace array refs of texture mapped vars here - body = new CG_roseRepr(kernel_decl->get_definition()->get_body()); - std::vector<IR_ArrayRef*> refs = ir->FindArrayRef(body); - texmapArrayRefs(texture, &refs, globals, ir, ocg); - // do the same for constant mapped vars - consmapArrayRefs(constant_mem, &refs, globals, ir, ocg); - - return swapped; -} - -//Order taking out dummy variables -std::vector<std::string> cleanOrder(std::vector<std::string> idxNames) { - std::vector<std::string> results; - for (int j = 0; j < idxNames.size(); j++) { - if (idxNames[j].length() != 0) - results.push_back(idxNames[j]); - } - return results; -} - -//First non-dummy level in ascending order -int LoopCuda::nonDummyLevel(int stmt, int level) { - //level comes in 1-basd and should leave 1-based - for (int j = level - 1; j < idxNames[stmt].size(); j++) { - if (idxNames[stmt][j].length() != 0) { - //printf("found non dummy level of %d with idx: %s when searching for %d\n", j+1, (const char*) idxNames[stmt][j], level); - return j + 1; - } - } - char buf[128]; - sprintf(buf, "%d", level); - throw std::runtime_error( - std::string("Unable to find a non-dummy level starting from ") - + std::string(buf)); -} - -int LoopCuda::findCurLevel(int stmt, std::string idx) { - for (int j = 0; j < idxNames[stmt].size(); j++) { - if (strcmp(idxNames[stmt][j].c_str(), idx.c_str()) == 0) - return j + 1; - } - throw std::runtime_error( - std::string("Unable to find index ") + idx - + std::string(" in current list of indexes")); -} - -void LoopCuda::permute_cuda(int stmt, - const std::vector<std::string>& curOrder) { - //printf("curOrder: "); - //printVs(curOrder); - //printf("idxNames: "); - //printVS(idxNames[stmt]); - std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt]); - bool same = true; - std::vector<int> pi; - for (int i = 0; i < curOrder.size(); i++) { - bool found = false; - for (int j = 0; j < cIdxNames.size(); j++) { - if (strcmp(cIdxNames[j].c_str(), curOrder[i].c_str()) == 0) { - pi.push_back(j + 1); - found = true; - if (j != i) - same = false; - } - } - if (!found) { - throw std::runtime_error( - "One of the indexes in the permute order were not " - "found in the current set of indexes."); - } - } - for (int i = curOrder.size(); i < cIdxNames.size(); i++) { - pi.push_back(i); - } - if (same) - return; - permute(stmt, pi); - //Set old indexe names as new - for (int i = 0; i < curOrder.size(); i++) { - idxNames[stmt][i] = curOrder[i].c_str(); //what about sibling stmts? - } -} - -bool LoopCuda::permute(int stmt_num, const std::vector<int> &pi) { -// check for sanity of parameters - if (stmt_num >= stmt.size() || stmt_num < 0) - throw std::invalid_argument("invalid statement " + to_string(stmt_num)); - const int n = stmt[stmt_num].xform.n_out(); - if (pi.size() > (n - 1) / 2) - throw std::invalid_argument( - "iteration space dimensionality does not match permute dimensionality"); - int first_level = 0; - int last_level = 0; - for (int i = 0; i < pi.size(); i++) { - if (pi[i] > (n - 1) / 2 || pi[i] <= 0) - throw std::invalid_argument( - "invalid loop level " + to_string(pi[i]) - + " in permuation"); - - if (pi[i] != i + 1) { - if (first_level == 0) - first_level = i + 1; - last_level = i + 1; - } - } - if (first_level == 0) - return true; - - std::vector<int> lex = getLexicalOrder(stmt_num); - std::set<int> active = getStatements(lex, 2 * first_level - 2); - Loop::permute(active, pi); -} - -void LoopCuda::tile_cuda(int stmt, int level, int outer_level) { - tile_cuda(stmt, level, 1, outer_level, "", "", CountedTile); -} -void LoopCuda::tile_cuda(int level, int tile_size, int outer_level, - std::string idxName, std::string ctrlName, TilingMethodType method) { - tile_cuda(0, level, tile_size, outer_level, idxName, ctrlName, method); -} - -void LoopCuda::tile_cuda(int stmt, int level, int tile_size, int outer_level, - std::string idxName, std::string ctrlName, TilingMethodType method) { - //Do regular tile but then update the index and control loop variable - //names as well as the idxName to reflect the current state of things. - //printf("tile(%d,%d,%d,%d)\n", stmt, level, tile_size, outer_level); - //printf("idxNames before: "); - //printVS(idxNames[stmt]); - - tile(stmt, level, tile_size, outer_level, method); - - if (idxName.size()) - idxNames[stmt][level - 1] = idxName.c_str(); - if (tile_size == 1) { - //potentially rearrange loops - if (outer_level < level) { - std::string tmp = idxNames[stmt][level - 1]; - for (int i = level - 1; i > outer_level - 1; i--) { - if (i - 1 >= 0) - idxNames[stmt][i] = idxNames[stmt][i - 1]; - } - idxNames[stmt][outer_level - 1] = tmp; - } - //TODO: even with a tile size of one, you need a insert (of a dummy loop) - idxNames[stmt].insert(idxNames[stmt].begin() + (level), ""); - } else { - if (!ctrlName.size()) - throw std::runtime_error("No ctrl loop name for tile"); - //insert - idxNames[stmt].insert(idxNames[stmt].begin() + (outer_level - 1), - ctrlName.c_str()); - } - - //printf("idxNames after: "); - //printVS(idxNames[stmt]); -} - -bool LoopCuda::datacopy_privatized_cuda(int stmt_num, int level, - const std::string &array_name, - const std::vector<int> &privatized_levels, bool allow_extra_read, - int fastest_changing_dimension, int padding_stride, - int padding_alignment, bool cuda_shared) { - int old_stmts = stmt.size(); - // printf("before datacopy_privatized:\n"); - printIS(); - //datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, cuda_shared); - if (cuda_shared) - datacopy_privatized(stmt_num, level, array_name, privatized_levels, - allow_extra_read, fastest_changing_dimension, padding_stride, - padding_alignment, 1); - else - datacopy_privatized(stmt_num, level, array_name, privatized_levels, - allow_extra_read, fastest_changing_dimension, padding_stride, - padding_alignment, 0); - // printf("after datacopy_privatized:\n"); - printIS(); - - //Adjust idxNames to reflect updated state - std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]); - int new_stmts = stmt.size(); - for (int i = old_stmts; i < new_stmts; i++) { - //printf("fixing up statement %d\n", i); - std::vector<std::string> idxs; - - //protonu-making sure the vector of nonSplitLevels grows along with - //the statement structure - stmt_nonSplitLevels.push_back(std::vector<int>()); - - //Indexes up to level will be the same - for (int j = 0; j < level - 1; j++) - idxs.push_back(cIdxNames[j]); - - //Expect privatized_levels to match - for (int j = 0; j < privatized_levels.size(); j++) - idxs.push_back(cIdxNames[privatized_levels[j] - 1]);//level is one-based - - //all further levels should match order they are in originally - if (privatized_levels.size()) { - int last_privatized = privatized_levels.back(); - int top_level = last_privatized - + (stmt[i].IS.n_set() - idxs.size()); - //printf("last privatized_levels: %d top_level: %d\n", last_privatized, top_level); - for (int j = last_privatized; j < top_level; j++) { - idxs.push_back(cIdxNames[j]); - //printf("pushing back: %s\n", (const char*)cIdxNames[j]); - } - } - idxNames.push_back(idxs); - } -} - -bool LoopCuda::datacopy_cuda(int stmt_num, int level, - const std::string &array_name, - const std::vector<std::string> new_idxs, - bool allow_extra_read, int fastest_changing_dimension, - int padding_stride, int padding_alignment, bool cuda_shared) { - - int old_stmts = stmt.size(); - //datacopy(stmt_num,level,array_name,allow_extra_read,fastest_changing_dimension,padding_stride,padding_alignment,cuda_shared); - // printf("before datacopy:\n"); - // printIS(); - if (cuda_shared) - datacopy(stmt_num, level, array_name, allow_extra_read, - fastest_changing_dimension, padding_stride, padding_alignment, - 1); - else - datacopy(stmt_num, level, array_name, allow_extra_read, - fastest_changing_dimension, padding_stride, padding_alignment, - 0); - // printf("after datacopy:\n"); - printIS(); - - //Adjust idxNames to reflect updated state - std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]); - int new_stmts = stmt.size(); - for (int i = old_stmts; i < new_stmts; i++) { - //printf("fixing up statement %d\n", i); - std::vector<std::string> idxs; - - //protonu-making sure the vector of nonSplitLevels grows along with - //the statement structure - stmt_nonSplitLevels.push_back(std::vector<int>()); - - //Indexes up to level will be the same - for (int j = 0; j < level - 1; j++) - idxs.push_back(cIdxNames[j]); - - //all further levels should get names from new_idxs - int top_level = stmt[i].IS.n_set(); - //printf("top_level: %d level: %d\n", top_level, level); - if (new_idxs.size() < top_level - level + 1) - throw std::runtime_error( - "Need more new index names for new datacopy loop levels"); - - for (int j = level - 1; j < top_level; j++) { - idxs.push_back(new_idxs[j - level + 1].c_str()); - //printf("pushing back: %s\n", new_idxs[j-level+1].c_str()); - } - idxNames.push_back(idxs); - } -} - -bool LoopCuda::unroll_cuda(int stmt_num, int level, int unroll_amount) { - int old_stmts = stmt.size(); - //bool b= unroll(stmt_num, , unroll_amount); - - int dim = 2 * level - 1; - std::vector<int> lex = getLexicalOrder(stmt_num); - std::set<int> same_loop = getStatements(lex, dim - 1); - - level = nonDummyLevel(stmt_num, level); - //printf("unrolling %d at level %d\n", stmt_num,level); - - //protonu--using the new version of unroll, which returns - //a set of ints instead of a bool. To keep Gabe's logic - //I'll check the size of the set, if it's 0 return true - //bool b= unroll(stmt_num, level, unroll_amount); - std::set<int> b_set = unroll(stmt_num, level, unroll_amount, idxNames); - bool b = false; - if (b_set.size() == 0) - b = true; - //end--protonu - - //Adjust idxNames to reflect updated state - std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]); - std::vector<std::string> origSource = idxNames[stmt_num]; - ; - //Drop index names at level - if (unroll_amount == 0) { - //For all statements that were in this unroll together, drop index name for unrolled level - idxNames[stmt_num][level - 1] = ""; - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) { - //printf("in same loop as %d is %d\n", stmt_num, (*i)); - //idxNames[(*i)][level-1] = ""; - idxNames[(*i)] = idxNames[stmt_num]; - } - } - - lex = getLexicalOrder(stmt_num); - same_loop = getStatements(lex, dim - 1); - - bool same_as_source = false; - int new_stmts = stmt.size(); - for (int i = old_stmts; i < new_stmts; i++) { - //Check whether we had a sync for the statement we are unrolling, if - //so, propogate that to newly created statements so that if they are - //in a different loop structure, they will also get a syncthreads - int size = syncs.size(); - for (int j = 0; j < size; j++) { - if (syncs[j].first == stmt_num) - syncs.push_back(make_pair(i, syncs[j].second)); - } - - //protonu-making sure the vector of nonSplitLevels grows along with - //the statement structure - stmt_nonSplitLevels.push_back(std::vector<int>()); - - //We expect that new statements have a constant for the variable in - //stmt[i].IS at level (as seen with print_with_subs), otherwise there - //will be a for loop at level and idxNames should match stmt's - //idxNames pre-unrolled - Relation IS = stmt[i].IS; - //Ok, if you know how the hell to get anything out of a Relation, you - //should probably be able to do this more elegantly. But for now, I'm - //hacking it. - std::string s = IS.print_with_subs_to_string(); - //s looks looks like - //{[_t49,8,_t51,_t52,128]: 0 <= _t52 <= 3 && 0 <= _t51 <= 15 && 0 <= _t49 && 64_t49+16_t52+_t51 <= 128} - //where level == 5, you see a integer in the input set - - //If that's not an integer and this is the first new statement, then - //we think codegen will have a loop at that level. It's not perfect, - //not sure if it can be determined without round-tripping to codegen. - int sIdx = 0; - int eIdx = 0; - for (int j = 0; j < level - 1; j++) { - sIdx = s.find(",", sIdx + 1); - if (sIdx < 0) - break; - } - if (sIdx > 0) { - eIdx = s.find("]"); - int tmp = s.find(",", sIdx + 1); - if (tmp > 0 && tmp < eIdx) - eIdx = tmp; //", before ]" - if (eIdx > 0) { - sIdx++; - std::string var = s.substr(sIdx, eIdx - sIdx); - //printf("%s\n", s.c_str()); - //printf("set var for stmt %d at level %d is %s\n", i, level, var.c_str()); - if (atoi(var.c_str()) == 0 && i == old_stmts) { - //TODO:Maybe do see if this new statement would be in the same - //group as the original and if it would, don't say - //same_as_source - if (same_loop.find(i) == same_loop.end()) { - printf( - "stmt %d level %d, newly created unroll statement should have same level indexes as source\n", - i, level); - same_as_source = true; - } - } - } - } - - //printf("fixing up statement %d n_set %d with %d levels\n", i, stmt[i].IS.n_set(), level-1); - if (same_as_source) - idxNames.push_back(origSource); - else - idxNames.push_back(idxNames[stmt_num]); - } - - return b; -} - -void LoopCuda::copy_to_texture(const char *array_name) { - //protonu--placeholder for now - //set the bool for using cuda memory as true - //in a vector of strings, put the names of arrays to tex mapped - if (!texture) - texture = new texture_memory_mapping(true, array_name); - else - texture->add(array_name); - -} - -//void LoopCuda::copy_to_texture_2d(const char *array_name, int width, int height) { -// if (!texture) -// texture = new texture_memory_mapping(true, array_name, width, height); -// else -// texture->add(array_name, width, height); -//} - -void LoopCuda::copy_to_constant(const char *array_name) { - if(!constant_mem) - constant_mem = new constant_memory_mapping(true, array_name); - else - constant_mem->add(array_name); -} - -//protonu--moving this from Loop -SgNode* LoopCuda::codegen() { - if (code_gen_flags & GenCudaizeV2) - return cudaize_codegen_v2(); - //Do other flagged codegen methods, return plain vanilla generated code - return getCode(); -} - -//These three are in Omega code_gen.cc and are used as a massive hack to -//get out some info from MMGenerateCode. Yea for nasty side-effects. -namespace omega { - extern int checkLoopLevel; - extern int stmtForLoopCheck; - extern int upperBoundForLevel; - extern int lowerBoundForLevel; -} - -CG_outputRepr* LoopCuda::extractCudaUB(int stmt_num, int level, - int &outUpperBound, int &outLowerBound) { - // check for sanity of parameters - const int m = stmt.size(); - if (stmt_num >= m || stmt_num < 0) - throw std::invalid_argument("invalid statement " + to_string(stmt_num)); - const int n = stmt[stmt_num].xform.n_out(); - if (level > (n - 1) / 2 || level <= 0) - throw std::invalid_argument("invalid loop level " + to_string(level)); - - int dim = 2 * level - 1; - - std::vector<int> lex = getLexicalOrder(stmt_num); - std::set<int> same_loop = getStatements(lex, dim - 1); - - // extract the intersection of the iteration space to be considered - Relation hull; - { - hull = Relation::True(n); - for (std::set<int>::iterator i = same_loop.begin(); - i != same_loop.end(); i++) { - Relation r = getNewIS(*i); - for (int j = dim + 2; j <= r.n_set(); j++) - r = Project(r, r.set_var(j)); - hull = Intersection(hull, r); - hull.simplify(2, 4); - } - - for (int i = 2; i <= dim + 1; i += 2) { - //std::string name = std::string("_t") + to_string(t_counter++); - std::string name = std::string("_t") - + to_string(tmp_loop_var_name_counter++); - hull.name_set_var(i, name); - } - hull.setup_names(); - } - - // extract the exact loop bound of the dimension to be unrolled - if (is_single_iteration(hull, dim)) { - throw std::runtime_error( - "No loop availabe at level to extract upper bound."); - } - Relation bound = get_loop_bound(hull, dim); - if (!bound.has_single_conjunct() || !bound.is_satisfiable() - || bound.is_tautology()) - throw loop_error( - "loop error: unable to extract loop bound for cudaize"); - - // extract the loop stride - EQ_Handle stride_eq; - /*int stride = 1; - { - bool simple_stride = true; - int strides = countStrides(bound.query_DNF()->single_conjunct(), - bound.set_var(dim + 1), stride_eq, simple_stride); - if (strides > 1) - throw loop_error("loop error: too many strides"); - else if (strides == 1) { - int sign = stride_eq.get_coef(bound.set_var(dim + 1)); - // assert(sign == 1 || sign == -1); - Constr_Vars_Iter it(stride_eq, true); - stride = abs((*it).coef / sign); - } - } - */ - int stride = 1; - { - - coef_t stride; - std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, - bound.set_var(dim + 1)); - if (result.second == NULL) - stride = 1; - else - stride = abs(result.first.get_coef(result.second)) - / gcd(abs(result.first.get_coef(result.second)), - abs(result.first.get_coef(bound.set_var(dim + 1)))); - - if (stride > 1) - throw loop_error("loop error: too many strides"); - /*else if (stride == 1) { - int sign = result.first.get_coef(bound.set_var(dim+1)); - assert(sign == 1 || sign == -1); - } */ - } - - if (stride != 1) { - char buf[1024]; - sprintf(buf, "Cudaize: Loop at level %d has non-one stride of %d", - level, stride); - throw std::runtime_error(buf); - } - - //Use code generation system to build tell us our bound information. We - //need a hard upper bound a 0 lower bound. - - checkLoopLevel = level * 2; - stmtForLoopCheck = stmt_num; - upperBoundForLevel = -1; - lowerBoundForLevel = -1; - printCode(1, false); - checkLoopLevel = 0; - - outUpperBound = upperBoundForLevel; - outLowerBound = lowerBoundForLevel; - - if (outUpperBound == -1) { - - CG_result* temp = last_compute_cgr_; - - while (temp) { - CG_loop * loop; - if (loop = dynamic_cast<CG_loop*>(temp)) { - if (loop->level_ == 2 * level) { - Relation bound = copy(loop->bounds_); - Variable_ID v = bound.set_var(2 * level); - for (GEQ_Iterator e( - const_cast<Relation &>(bound).single_conjunct()->GEQs()); - e; e++) { - if ((*e).get_coef(v) < 0 - && (*e).is_const_except_for_global(v)) - return output_upper_bound_repr(ir->builder(), *e, v, - bound, - std::vector<std::pair<CG_outputRepr *, int> >( - bound.n_set(), - std::make_pair( - static_cast<CG_outputRepr *>(NULL), - 0))); - } - } - if (loop->level_ > 2 * level) - break; - else - temp = loop->body_; - } else - break; - } - } - - return NULL; -} - -void LoopCuda::printCode(int effort, bool actuallyPrint) const { - const int m = stmt.size(); - if (m == 0) - return; - const int n = stmt[0].xform.n_out(); - - /*or (int i = 0; i < m; i++) { - IS[i + 1] = stmt[i].IS; - xform[i + 1] = stmt[i].xform; - - //nonSplitLevels[i+1] = stmt[i].nonSplitLevels; - } - */ - - // invalidate saved codegen computation - if (last_compute_cgr_ != NULL) { - delete last_compute_cgr_; - last_compute_cgr_ = NULL; - } - - if (last_compute_cg_ != NULL) { - delete last_compute_cg_; - last_compute_cg_ = NULL; - } - - //Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); - /*CG_stringBuilder *ocg = new CG_stringBuilder(); - Tuple<CG_outputRepr *> nameInfo; - for (int i = 1; i <= m; i++) - nameInfo.append(new CG_stringRepr("s" + to_string(i))); - */ - - // -- replacing MMGenerateCode - // -- formally CG_outputRepr* repr = MMGenerateCode(ocg, xform, IS, nameInfo, known, nonSplitLevels, syncs, idxTupleNames, effort); - // -- in the future, these if statements need to be cleaned up. - // -- something like check_lastComputeCG might be a decent protected member function - // -- and/or something that returns a std::vector<CG_outputRepr*> that also checks last_compute_cg_ - //if (last_compute_cg_ == NULL) { - std::vector<Relation> IS(m); - std::vector<Relation> xforms(m); - std::vector<std::vector<int> > nonSplitLevels(m); - - /* std::vector < std::vector <std::string> > idxTupleNames; - if (useIdxNames) { - for (int i = 0; i < idxNames.size(); i++) { - Tuple<std::string> idxs; - for (int j = 0; j < idxNames[i].size(); j++) - idxs.append(idxNames[i][j]); - idxTupleNames.append(idxs); - } - } - */ - for (int i = 0; i < m; i++) { - IS[i] = stmt[i].IS; - xforms[i] = stmt[i].xform; - nonSplitLevels[i] = stmt_nonSplitLevels[i]; - } - Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); - - last_compute_cg_ = new CodeGen(xforms, IS, known, nonSplitLevels, idxNames, - syncs); - - delete last_compute_cgr_; // this was just done above? - last_compute_cgr_ = NULL; - //} - - if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { - delete last_compute_cgr_; - last_compute_cgr_ = last_compute_cg_->buildAST(effort); - last_compute_effort_ = effort; - } - - //std::vector<CG_outputRepr *> stmts(m); - //for (int i = 0; i < m; i++) - // stmts[i] = stmt[i].code; - //CG_outputRepr* repr = last_compute_cgr_->printRepr(ocg, stmts); - // -- end replacing MMGenerateCode - std::string repr = last_compute_cgr_->printString(); - - if (actuallyPrint) - std::cout << repr << std::endl; - //std::cout << static_cast<CG_stringRepr*>(repr)->GetString(); - /* - for (int i = 1; i <= m; i++) - delete nameInfo[i]; - */ - - //delete ocg; -} - -void LoopCuda::printRuntimeInfo() const { - for (int i = 0; i < stmt.size(); i++) { - Relation IS = stmt[i].IS; - Relation xform = stmt[i].xform; - printf("stmt[%d]\n", i); - printf("IS\n"); - IS.print_with_subs(); - - printf("xform[%d]\n", i); - xform.print_with_subs(); - - } -} - -void LoopCuda::printIndexes() const { - for (int i = 0; i < stmt.size(); i++) { - printf("stmt %d nset %d ", i, stmt[i].IS.n_set()); - - for (int j = 0; j < idxNames[i].size(); j++) { - if (j > 0) - printf(","); - printf("%s", idxNames[i][j].c_str()); - } - printf("\n"); - } -} - -SgNode* LoopCuda::getCode(int effort) const { - const int m = stmt.size(); - if (m == 0) - return new SgNode; - const int n = stmt[0].xform.n_out(); - /* - Tuple<CG_outputRepr *> ni(m); - Tuple < Relation > IS(m); - Tuple < Relation > xform(m); - vector < vector <int> > nonSplitLevels(m); - for (int i = 0; i < m; i++) { - ni[i + 1] = stmt[i].code; - IS[i + 1] = stmt[i].IS; - xform[i + 1] = stmt[i].xform; - nonSplitLevels[i + 1] = stmt_nonSplitLevels[i]; - - //nonSplitLevels[i+1] = stmt[i].nonSplitLevels; - } - */ - //Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); -//#ifdef DEBUG -//#endif - //std::cout << GetString(MMGenerateCode(new CG_stringBuilder(), xform, IS, ni, known, - // nonSplitLevels, syncs, idxTupleNames, effort)); - if (last_compute_cgr_ != NULL) { - delete last_compute_cgr_; - last_compute_cgr_ = NULL; - } - - if (last_compute_cg_ != NULL) { - delete last_compute_cg_; - last_compute_cg_ = NULL; - } - - CG_outputBuilder *ocg = ir->builder(); - // -- replacing MMGenerateCode - // -- formally CG_outputRepr* repr = MMGenerateCode(ocg, xform, IS, nameInfo, known, nonSplitLevels, syncs, idxTupleNames, effort); - // -- in the future, these if statements need to be cleaned up. - // -- something like check_lastComputeCG might be a decent protected member function - // -- and/or something that returns a std::vector<CG_outputRepr*> that also checks last_compute_cg_ - //if (last_compute_cg_ == NULL) { - std::vector<Relation> IS(m); - std::vector<Relation> xforms(m); - std::vector<std::vector<int> > nonSplitLevels(m); - for (int i = 0; i < m; i++) { - IS[i] = stmt[i].IS; - xforms[i] = stmt[i].xform; - nonSplitLevels[i] = stmt_nonSplitLevels[i]; - } - - /*std::vector < std::vector<std::string> > idxTupleNames; - if (useIdxNames) { - for (int i = 0; i < idxNames.size(); i++) { - std::vector<std::string> idxs; - for (int j = 0; j < idxNames[i].size(); j++) - idxs.push_back(idxNames[i][j]); - idxTupleNames.push_back(idxs); - } - } - */ - Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); - - last_compute_cg_ = new CodeGen(xforms, IS, known, nonSplitLevels, idxNames, - syncs); - delete last_compute_cgr_; - last_compute_cgr_ = NULL; - //} - - if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { - delete last_compute_cgr_; - last_compute_cgr_ = last_compute_cg_->buildAST(effort); - last_compute_effort_ = effort; - } - - std::vector<CG_outputRepr *> stmts(m); - for (int i = 0; i < m; i++) - stmts[i] = stmt[i].code; - CG_outputRepr* repr = last_compute_cgr_->printRepr(ocg, stmts); - // -- end replacing MMGenerateCode - - //CG_outputRepr *overflow_initialization = ocg->CreateStmtList(); - CG_outputRepr *overflow_initialization = ocg->StmtListAppend(NULL, NULL); - for (std::map<int, std::vector<Free_Var_Decl *> >::const_iterator i = - overflow.begin(); i != overflow.end(); i++) - for (std::vector<Free_Var_Decl *>::const_iterator j = i->second.begin(); - j != i->second.end(); j++) - //overflow_initialization = ocg->StmtListAppend(overflow_initialization, ocg->CreateStmtList(ocg->CreateAssignment(0, ocg->CreateIdent((*j)->base_name()), ocg->CreateInt(0)))); - overflow_initialization = ocg->StmtListAppend( - overflow_initialization, - ocg->StmtListAppend( - ocg->CreateAssignment(0, - ocg->CreateIdent((*j)->base_name()), - ocg->CreateInt(0)), NULL)); - - repr = ocg->StmtListAppend(overflow_initialization, repr); - SgNode *tnl = static_cast<CG_roseRepr *>(repr)->GetCode(); - SgStatementPtrList *list = static_cast<CG_roseRepr *>(repr)->GetList(); - - if (tnl != NULL) - return tnl; - else if (tnl == NULL && list != NULL) { - SgBasicBlock* bb2 = buildBasicBlock(); - - for (SgStatementPtrList::iterator it = (*list).begin(); - it != (*list).end(); it++) - bb2->append_statement(*it); - - tnl = isSgNode(bb2); - } else - throw loop_error("codegen failed"); - - delete repr; - /* - for (int i = 1; i <= m; i++) - delete ni[i]; - */ - return tnl; - -} - -//protonu--adding constructors for the new derived class -LoopCuda::LoopCuda() : - Loop(), code_gen_flags(GenInit) { -} - -LoopCuda::LoopCuda(IR_Control *irc, int loop_num) : - Loop(irc) { - setup_code = NULL; - teardown_code = NULL; - code_gen_flags = 0; - cu_bx = cu_by = cu_tx = cu_ty = cu_tz = 1; - cu_bx_repr = NULL; - cu_tx_repr = NULL; - cu_by_repr = NULL; - cu_ty_repr = NULL; - cu_tz_repr = NULL; - - cu_num_reduce = 0; - cu_mode = GlobalMem; - texture = NULL; - constant_mem = NULL; - - int m = stmt.size(); - //printf("\n the size of stmt(initially) is: %d\n", stmt.size()); - for (int i = 0; i < m; i++) - stmt_nonSplitLevels.push_back(std::vector<int>()); - - globals = ((IR_cudaroseCode *) ir)->gsym_; - globalScope = ((IR_cudaroseCode *) ir)->first_scope; - parameter_symtab = ((IR_cudaroseCode *) ir)->parameter; - body_symtab = ((IR_cudaroseCode *) ir)->body; - func_body = ((IR_cudaroseCode *) ir)->defn; - func_definition = ((IR_cudaroseCode *) ir)->func_defn; - std::vector<SgForStatement *> tf = ((IR_cudaroseCode *) ir)->get_loops(); - - symtab = tf[loop_num]->get_symbol_table(); - - std::vector<SgForStatement *> deepest = find_deepest_loops( - isSgNode(tf[loop_num])); - - for (int i = 0; i < deepest.size(); i++) { - SgVariableSymbol* vs; - SgForInitStatement* list = deepest[i]->get_for_init_stmt(); - SgStatementPtrList& initStatements = list->get_init_stmt(); - SgStatementPtrList::const_iterator j = initStatements.begin(); - if (SgExprStatement *expr = isSgExprStatement(*j)) - if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) - if (SgVarRefExp* var_ref = isSgVarRefExp(op->get_lhs_operand())) - vs = var_ref->get_symbol(); - - index.push_back(vs->get_name().getString().c_str()); //reflects original code index names - } - - for (int i = 0; i < stmt.size(); i++) - idxNames.push_back(index); //refects prefered index names (used as handles in cudaize v2) - useIdxNames = false; - -} - -void LoopCuda::printIS() { - if (!cudaDebug) return; - int k = stmt.size(); - for (int i = 0; i < k; i++) { - printf(" printing statement:%d\n", i); - stmt[i].IS.print(); - } -} - |