/***************************************************************************** Copyright (C) 2009 University of Utah All Rights Reserved. Purpose: Cudaize methods Notes: History: 1/7/10 Created by Gabe Rudy by migrating code from loop.cc 31/1/11 Modified by Protonu Basu *****************************************************************************/ #define TRANSFORMATION_FILE_INFO Sg_File_Info::generateDefaultFileInfoForTransformationNode() #include #include #include #include #include "loop_cuda_rose.hh" #include "loop.hh" #include //#include #include "omegatools.hh" #include "ir_cudarose.hh" #include "ir_rose.hh" #include "ir_rose_utils.hh" #include "chill_error.hh" #include #include "Outliner.hh" //#define DEBUG using namespace omega; using namespace SageBuilder; using namespace SageInterface; //using namespace Outliner; //using namespace ASTtools; char *k_cuda_texture_memory; //protonu--added to track texture memory type //extern char *omega::k_cuda_texture_memory; //protonu--added to track texture memory type extern char *omega::k_ocg_comment; static int cudaDebug; class CudaStaticInit { public: CudaStaticInit() { cudaDebug = 0; //Change this to 1 for debug } }; static CudaStaticInit junkInitInstance__; std::string& upcase(std::string& s) { for (int i = 0; i < s.size(); i++) s[i] = toupper(s[i]); return s; } void printVs(const std::vector& curOrder) { if (!cudaDebug) return; for (int i = 0; i < curOrder.size(); i++) { if (i > 0) printf(","); printf("%s", curOrder[i].c_str()); } printf("\n"); } void printVS(const std::vector& curOrder) { if(!cudaDebug) return; for (int i = 0; i < curOrder.size(); i++) { if (i > 0) printf(","); printf("%s", curOrder[i].c_str()); } printf("\n"); } LoopCuda::~LoopCuda() { const int m = stmt.size(); for (int i = 0; i < m; i++) stmt[i].code->clear(); } bool LoopCuda::symbolExists(std::string s) { if (body_symtab->find_variable(SgName(s.c_str())) || parameter_symtab->find_variable(SgName(s.c_str()))) return true; if (globals->lookup_variable_symbol(SgName(s.c_str()))) return true; for (int i = 0; i < idxNames.size(); i++) for (int j = 0; j < idxNames[i].size(); j++) if (strcmp(idxNames[i][j].c_str(), s.c_str()) == 0) return true; return false; } void LoopCuda::addSync(int stmt_num, std::string idxName) { //we store these and code-gen inserts sync to omega comments where stmt //in loop that has idxName being generated syncs.push_back(make_pair(stmt_num, idxName)); } void LoopCuda::renameIndex(int stmt_num, std::string idx, std::string newName) { int level = findCurLevel(stmt_num, idx); if (idxNames.size() <= stmt_num || idxNames[stmt_num].size() < level) throw std::runtime_error("Invalid statment number of index"); idxNames[stmt_num][level - 1] = newName.c_str(); } enum Type { Int }; SgNode* wrapInIfFromMinBound(SgNode* then_part, SgForStatement* loop, SgScopeStatement* symtab, SgVariableSymbol* bound_sym) { // CG_roseBuilder *ocg = new CG_roseBuilder( SgBinaryOp* test_expr = isSgBinaryOp(loop->get_test_expr()); SgExpression* upperBound; SgExpression* conditional; upperBound = test_expr->get_rhs_operand(); CG_outputRepr *ifstmt; SgCallExpression *call; if (call = isSgCallExpression(upperBound)) if (isSgVarRefExp(call->get_function())->get_symbol()->get_name().getString() == "__rose_lt") { SgExprListExp* arg_list = call->get_args(); SgExpression *if_bound = *(arg_list->get_expressions().begin()); /*This relies on the minimum expression being the rhs operand of * the min instruction. */ SgIfStmt *ifstmt = buildIfStmt( buildLessOrEqualOp(buildVarRefExp(bound_sym), if_bound), isSgStatement(then_part), NULL); return isSgNode(ifstmt); } /* if (isSgConditionalExp(upperBound)) { conditional = isSgConditionalExp(upperBound)->get_conditional_exp(); if (isSgBinaryOp(conditional)) { SgBinaryOp* binop = isSgBinaryOp(conditional); if (isSgLessThanOp(binop) || isSgLessOrEqualOp(binop)) { SgIfStmt *ifstmt = buildIfStmt( buildLessOrEqualOp(buildVarRefExp(bound_sym), test_expr), isSgStatement(then_part), NULL); return isSgNode(ifstmt); } } } */ return then_part; } /** * This would be better if it was done by a CHiLL xformation instead of at codegen * * state: * for(...) * for(...) * cur_body * stmt1 * * stm1 is in-between two loops that are going to be reduced. The * solution is to put stmt1 at the end of cur_body but conditionally run * in on the last step of the for loop. * * A CHiLL command that would work better: * * for(...) * stmt0 * for(for i=0; i * for(...) * for(for i=0; i findCommentedFors(const char* index, SgNode* tnl) { std::vector result; bool next_loop_ok = false; if (isSgBasicBlock(tnl)) { SgStatementPtrList& list = isSgBasicBlock(tnl)->get_statements(); for (SgStatementPtrList::iterator it = list.begin(); it != list.end(); it++) { std::vector t = findCommentedFors(index, isSgNode(*it)); std::copy(t.begin(), t.end(), back_inserter(result)); } } else if (isSgForStatement(tnl)) { AstTextAttribute* att = (AstTextAttribute*) (isSgNode(tnl)->getAttribute( "omega_comment")); std::string comment = att->toString(); if (comment.find("~cuda~") != std::string::npos && comment.find("preferredIdx: ") != std::string::npos) { std::string idx = comment.substr( comment.find("preferredIdx: ") + 14, std::string::npos); if (idx.find(" ") != std::string::npos) idx = idx.substr(0, idx.find(" ")); if (strcmp(idx.c_str(), index) == 0) next_loop_ok = true; } if (next_loop_ok) { //printf("found loop %s\n", static_cast(tn)->index()->name()); result.push_back(isSgForStatement(tnl)); } else { //printf("looking down for loop %s\n", static_cast(tn)->index()->name()); std::vector t = findCommentedFors(index, isSgForStatement(tnl)->get_loop_body()); std::copy(t.begin(), t.end(), back_inserter(result)); } next_loop_ok = false; } else if (isSgIfStmt(tnl)) { //printf("looking down if\n"); SgIfStmt *tni = isSgIfStmt(tnl); std::vector t = findCommentedFors(index, tni->get_true_body()); std::copy(t.begin(), t.end(), back_inserter(result)); } return result; } SgNode* forReduce(SgForStatement* loop, SgVariableSymbol* reduceIndex, SgScopeStatement* body_syms) { //We did the replacements all at once with recursiveFindPreferedIdxs //replacements r; //r.oldsyms.append(loop->index()); //r.newsyms.append(reduceIndex); //tree_for* new_loop = (tree_for*)loop->clone_helper(&r, true); SgForStatement* new_loop = loop; //return body one loops in SgNode* tnl = loop_body_at_level(new_loop, 1); //wrap in conditional if necessary tnl = wrapInIfFromMinBound(tnl, new_loop, body_syms, reduceIndex); return tnl; } void recursiveFindRefs(SgNode* code, std::set& syms, SgFunctionDefinition* def) { SgStatement* s = isSgStatement(code); // L = {symbols defined within 's'}, local variables declared within 's' ASTtools::VarSymSet_t L; ASTtools::collectDefdVarSyms(s, L); //dump (L, "L = "); // U = {symbols used within 's'} ASTtools::VarSymSet_t U; ASTtools::collectRefdVarSyms(s, U); //dump (U, "U = "); // U - L = {symbols used within 's' but not defined in 's'} // variable references to non-local-declared variables ASTtools::VarSymSet_t diff_U_L; set_difference(U.begin(), U.end(), L.begin(), L.end(), inserter(diff_U_L, diff_U_L.begin())); //dump (diff_U_L, "U - L = "); // Q = {symbols defined within the function surrounding 's' that are // visible at 's'}, including function parameters ASTtools::VarSymSet_t Q; ASTtools::collectLocalVisibleVarSyms(def->get_declaration(), s, Q); // dump (Q, "Q = "); // (U - L) \cap Q = {variables that need to be passed as parameters // to the outlined function} // a sub set of variables that are not globally visible (no need to pass at all) // It excludes the variables with a scope between global and the enclosing function set_intersection(diff_U_L.begin(), diff_U_L.end(), Q.begin(), Q.end(), inserter(syms, syms.begin())); /* std::vector scalars; //SgNode *tnl = static_cast(repr)->GetCode(); SgStatement* stmt; SgExpression* exp; if (tnl != NULL) { if(stmt = isSgStatement(tnl)){ if(isSgBasicBlock(stmt)){ SgStatementPtrList& stmts = isSgBasicBlock(stmt)->get_statements(); for(int i =0; i < stmts.size(); i++){ //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(stmts[i])); std::vector a = recursiveFindRefs(isSgNode(stmts[i])); //delete r; std::copy(a.begin(), a.end(), back_inserter(scalars)); } } else if(isSgForStatement(stmt)){ SgForStatement *tnf = isSgForStatement(stmt); //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgStatement(tnf->get_loop_body())); std::vector a = recursiveFindRefs(isSgNode(tnf->get_loop_body())); //delete r; std::copy(a.begin(), a.end(), back_inserter(scalars)); } else if(isSgFortranDo(stmt)){ SgFortranDo *tfortran = isSgFortranDo(stmt); omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgStatement(tfortran->get_body())); std::vector a = recursiveFindRefs(r); delete r; std::copy(a.begin(), a.end(), back_inserter(scalars)); } else if(isSgIfStmt(stmt) ){ SgIfStmt* tni = isSgIfStmt(stmt); //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(tni->get_conditional())); std::vector a = recursiveFindRefs(isSgNode(tni->get_conditional())); //delete r; std::copy(a.begin(), a.end(), back_inserter(scalars)); //r = new omega::CG_roseRepr(isSgNode(tni->get_true_body())); a = recursiveFindRefs(isSgNode(tni->get_true_body())); //delete r; std::copy(a.begin(), a.end(), back_inserter(scalars)); //r = new omega::CG_roseRepr(isSgNode(tni->get_false_body())); a = recursiveFindRefs(isSgNode(tni->get_false_body())); //delete r; std::copy(a.begin(), a.end(), back_inserter(scalars)); } else if(isSgExprStatement(stmt)) { //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgExpression(isSgExprStatement(stmt)->get_expression())); std::vector a = recursiveFindRefs(isSgNode(isSgExprStatement(stmt)->get_expression())); //delete r; std::copy(a.begin(), a.end(), back_inserter(scalars)); } } } else{ SgExpression* op = isSgExpression(tnl); if(isSgVarRefExp(op)){ scalars.push_back(isSgVarRefExp(op)->get_symbol()); } else if( isSgAssignOp(op)){ //omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgAssignOp(op)->get_lhs_operand()); std::vector a1 = recursiveFindRefs(isSgNode(isSgAssignOp(op)->get_lhs_operand())); //delete r1; std::copy(a1.begin(), a1.end(), back_inserter(scalars)); //omega::CG_roseRepr *r2 = new omega::CG_roseRepr(isSgAssignOp(op)->get_rhs_operand()); std::vector a2 = recursiveFindRefs(isSgNode(isSgAssignOp(op)->get_rhs_operand())); //delete r2; std::copy(a2.begin(), a2.end(), back_inserter(scalars)); } else if(isSgBinaryOp(op)){ // omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgBinaryOp(op)->get_lhs_operand()); std::vector a1 = recursiveFindRefs(isSgNode(isSgBinaryOp(op)->get_lhs_operand())); //delete r1; std::copy(a1.begin(), a1.end(), back_inserter(scalars)); //omega::CG_roseRepr *r2 = new omega::CG_roseRepr(isSgBinaryOp(op)->get_rhs_operand()); std::vector a2 = recursiveFindRefs((isSgBinaryOp(op)->get_rhs_operand())); //delete r2; std::copy(a2.begin(), a2.end(), back_inserter(scalars)); } else if(isSgUnaryOp(op)){ //omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgUnaryOp(op)->get_operand()); std::vector a1 = recursiveFindRefs(isSgNode(isSgUnaryOp(op)->get_operand())); //delete r1; std::copy(a1.begin(), a1.end(), back_inserter(scalars)); } } return scalars; */ } SgNode* recursiveFindReplacePreferedIdxs(SgNode* code, SgSymbolTable* body_syms, SgSymbolTable* param_syms, SgScopeStatement* body, std::map& loop_idxs, SgGlobal* globalscope, bool sync = false) { //tree_node_list* tnl = new tree_node_list; //tree_node_list_iter tnli(code); SgVariableSymbol* idxSym = 0; std::vector r1; std::vector r2; SgNode* tnli; SgNode* tnli1; SgNode* tnli2; SgBasicBlock * clone; if (isSgForStatement(code)) { AstTextAttribute* att = (AstTextAttribute*) (isSgNode(code)->getAttribute( "omega_comment")); std::string comment; if (att != NULL) comment = att->toString(); if (comment.find("~cuda~") != std::string::npos && comment.find("preferredIdx: ") != std::string::npos) { std::string idx = comment.substr( comment.find("preferredIdx: ") + 14, std::string::npos); if (idx.find(" ") != std::string::npos) idx = idx.substr(0, idx.find(" ")); if (loop_idxs.find(idx) != loop_idxs.end()) idxSym = loop_idxs.find(idx)->second; //Get the proc variable sybol for this preferred index if (idxSym == 0) { idxSym = body_syms->find_variable(idx.c_str()); if (!idxSym) idxSym = param_syms->find_variable(idx.c_str()); //printf("idx not found: lookup %p\n", idxSym); if (!idxSym) { SgVariableDeclaration* defn = buildVariableDeclaration( SgName((char*) idx.c_str()), buildIntType()); //idxSym = new var_sym(type_s32, (char*)idx.c_str()); SgInitializedNamePtrList& variables = defn->get_variables(); SgInitializedNamePtrList::const_iterator i = variables.begin(); SgInitializedName* initializedName = *i; SgVariableSymbol* vs = new SgVariableSymbol( initializedName); prependStatement(defn, body); vs->set_parent(body_syms); body_syms->insert(SgName((char*) idx.c_str()), vs); idxSym = vs; //printf("idx created and inserted\n"); } //Now insert into our map for future if (cudaDebug) std::cout << idx << "\n\n"; loop_idxs.insert(make_pair(idx, idxSym)); } //See if we have a sync as well if (comment.find("sync") != std::string::npos) { //printf("Inserting sync after current block\n"); sync = true; } } if (idxSym) { SgForInitStatement* list = isSgForStatement(code)->get_for_init_stmt(); SgStatementPtrList& initStatements = list->get_init_stmt(); SgStatementPtrList::const_iterator j = initStatements.begin(); const SgVariableSymbol* index; if (SgExprStatement *expr = isSgExprStatement(*j)) if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) if (SgVarRefExp* var_ref = isSgVarRefExp( op->get_lhs_operand())) index = var_ref->get_symbol(); std::vector array = substitute(code, index, NULL, isSgNode(body_syms)); for (int j = 0; j < array.size(); j++) array[j]->set_symbol(idxSym); } SgStatement* body_ = isSgStatement( recursiveFindReplacePreferedIdxs( isSgNode((isSgForStatement(code)->get_loop_body())), body_syms, param_syms, body, loop_idxs, globalscope)); omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code); omega::CG_outputRepr* block = tnl->clone(); tnli = static_cast(block)->GetCode(); isSgForStatement(tnli)->set_loop_body(body_); body_->set_parent(tnli); if (idxSym) { SgForInitStatement* list = isSgForStatement(tnli)->get_for_init_stmt(); SgStatementPtrList& initStatements = list->get_init_stmt(); SgStatementPtrList::const_iterator j = initStatements.begin(); const SgVariableSymbol* index; if (SgExprStatement *expr = isSgExprStatement(*j)) if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) if (SgVarRefExp* var_ref = isSgVarRefExp( op->get_lhs_operand())) index = var_ref->get_symbol(); std::vector array = substitute(tnli, index, NULL, isSgNode(body_syms)); for (int j = 0; j < array.size(); j++) array[j]->set_symbol(idxSym); } // std::cout << isSgNode(body_)->unparseToString() << "\n\n"; if (att != NULL) tnli->setAttribute("omega_comment", att); if (sync) { SgName name_syncthreads("__syncthreads"); SgFunctionSymbol * syncthreads_symbol = globalscope->lookup_function_symbol(name_syncthreads); // Create a call to __syncthreads(): SgFunctionCallExp * syncthreads_call = buildFunctionCallExp( syncthreads_symbol, buildExprListExp()); SgExprStatement* stmt = buildExprStatement(syncthreads_call); /* if (SgBasicBlock* bb = isSgBasicBlock( isSgForStatement(code)->get_loop_body())) appendStatement(isSgStatement(stmt), bb); else if (SgStatement* ss = isSgStatement( isSgForStatement(code)->get_loop_body())) { SgBasicBlock* bb2 = buildBasicBlock(); isSgNode(ss)->set_parent(bb2); appendStatement(ss, bb2); appendStatement(isSgStatement(stmt), bb2); isSgNode(stmt)->set_parent(bb2); isSgForStatement(code)->set_loop_body(bb2); isSgNode(bb2)->set_parent(code); } */ SgBasicBlock* bb2 = buildBasicBlock(); bb2->append_statement(isSgStatement(tnli)); bb2->append_statement(stmt); /* SgNode* parent = code->get_parent(); if(!isSgStatement(parent)) throw loop_error("Parent not a statement"); if(isSgForStatement(parent)){ if(SgStatement *ss = isSgForStatement(isSgForStatement(parent)->get_loop_body())){ omega::CG_roseRepr * tnl = new omega::CG_roseRepr(ss); omega::CG_outputRepr* block= tnl->clone(); SgNode *new_ss = static_cast(block)->GetCode(); SgBasicBlock* bb2 = buildBasicBlock(); isSgNode(new_ss)->set_parent(bb2); appendStatement(isSgStatement(new_ss), bb2); appendStatement(isSgStatement(stmt), bb2); isSgNode(stmt)->set_parent(bb2); isSgStatement(parent)->replace_statement_from_basicBlock(ss, isSgStatement(bb2)); }else if(isSgBasicBlock(isSgForStatement(parent)->get_loop_body())) isSgStatement(isSgForStatement(parent)->get_loop_body())->insert_statement(isSgStatement(code), stmt, false); else throw loop_error("parent statement type undefined!!"); } else if(isSgBasicBlock(parent)) isSgStatement(parent)->insert_statement(isSgStatement(code), stmt, false); else throw loop_error("parent statement type undefined!!"); //tnl->print(); * * */ sync = true; return isSgNode(bb2); } else return tnli; } else if (isSgIfStmt(code)) { SgStatement* body_ = isSgStatement( recursiveFindReplacePreferedIdxs( isSgNode((isSgIfStmt(code)->get_true_body())), body_syms, param_syms, body, loop_idxs, globalscope)); omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code); omega::CG_outputRepr* block = tnl->clone(); tnli = static_cast(block)->GetCode(); isSgIfStmt(tnli)->set_true_body(body_); if ((isSgIfStmt(code)->get_false_body())) isSgIfStmt(tnli)->set_false_body( isSgStatement( recursiveFindReplacePreferedIdxs( isSgNode( (isSgIfStmt(code)->get_false_body())), body_syms, param_syms, body, loop_idxs, globalscope))); return tnli; } else if (isSgStatement(code) && !isSgBasicBlock(code)) { omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code); omega::CG_outputRepr* block = tnl->clone(); tnli = static_cast(block)->GetCode(); return tnli; } else if (isSgBasicBlock(code)) { SgStatementPtrList& tnl = isSgBasicBlock(code)->get_statements(); SgStatementPtrList::iterator temp; clone = buildBasicBlock(); bool sync_found = false; for (SgStatementPtrList::const_iterator it = tnl.begin(); it != tnl.end(); it++) { if (isSgForStatement(*it)) { AstTextAttribute* att = (AstTextAttribute*) (isSgNode(*it)->getAttribute( "omega_comment")); std::string comment; if (att != NULL) comment = att->toString(); if (comment.find("~cuda~") != std::string::npos && comment.find("preferredIdx: ") != std::string::npos) { std::string idx = comment.substr( comment.find("preferredIdx: ") + 14, std::string::npos); if (idx.find(" ") != std::string::npos) idx = idx.substr(0, idx.find(" ")); //printf("sym_tab preferred index: %s\n", idx.c_str()); if (loop_idxs.find(idx) != loop_idxs.end()) idxSym = loop_idxs.find(idx)->second; //Get the proc variable sybol for this preferred index if (idxSym == 0) { idxSym = body_syms->find_variable(idx.c_str()); if (!idxSym) idxSym = param_syms->find_variable(idx.c_str()); //printf("idx not found: lookup %p\n", idxSym); if (!idxSym) { SgVariableDeclaration* defn = buildVariableDeclaration( SgName((char*) idx.c_str()), buildIntType()); //idxSym = new var_sym(type_s32, (char*)idx.c_str()); SgInitializedNamePtrList& variables = defn->get_variables(); SgInitializedNamePtrList::const_iterator i = variables.begin(); SgInitializedName* initializedName = *i; SgVariableSymbol* vs = new SgVariableSymbol( initializedName); prependStatement(defn, body); vs->set_parent(body_syms); body_syms->insert(SgName((char*) idx.c_str()), vs); //printf("idx created and inserted\n"); idxSym = vs; } //Now insert into our map for future if (cudaDebug) std::cout << idx << "\n\n"; loop_idxs.insert(make_pair(idx, idxSym)); } //See if we have a sync as well if (comment.find("sync") != std::string::npos) { //printf("Inserting sync after current block\n"); sync = true; } } if (idxSym) { SgForInitStatement* list = isSgForStatement(*it)->get_for_init_stmt(); SgStatementPtrList& initStatements = list->get_init_stmt(); SgStatementPtrList::const_iterator j = initStatements.begin(); const SgVariableSymbol* index; if (SgExprStatement *expr = isSgExprStatement(*j)) if (SgAssignOp* op = isSgAssignOp( expr->get_expression())) if (SgVarRefExp* var_ref = isSgVarRefExp( op->get_lhs_operand())) index = var_ref->get_symbol(); std::vector array = substitute(*it, index, NULL, isSgNode(body_syms)); for (int j = 0; j < array.size(); j++) array[j]->set_symbol(idxSym); } SgStatement* body_ = isSgStatement( recursiveFindReplacePreferedIdxs( isSgNode( (isSgForStatement(*it)->get_loop_body())), body_syms, param_syms, body, loop_idxs, globalscope)); omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it); omega::CG_outputRepr* block = tnl->clone(); tnli = static_cast(block)->GetCode(); isSgForStatement(tnli)->set_loop_body(body_); body_->set_parent(tnli); if (idxSym) { SgForInitStatement* list = isSgForStatement(tnli)->get_for_init_stmt(); SgStatementPtrList& initStatements = list->get_init_stmt(); SgStatementPtrList::const_iterator j = initStatements.begin(); const SgVariableSymbol* index; if (SgExprStatement *expr = isSgExprStatement(*j)) if (SgAssignOp* op = isSgAssignOp( expr->get_expression())) if (SgVarRefExp* var_ref = isSgVarRefExp( op->get_lhs_operand())) index = var_ref->get_symbol(); std::vector array = substitute(tnli, index, NULL, isSgNode(body_syms)); for (int j = 0; j < array.size(); j++) array[j]->set_symbol(idxSym); } idxSym = 0; // std::cout << isSgNode(body_)->unparseToString() << "\n\n"; if (att != NULL) tnli->setAttribute("omega_comment", att); clone->append_statement(isSgStatement(tnli)); if (sync) { SgName name_syncthreads("__syncthreads"); SgFunctionSymbol * syncthreads_symbol = globalscope->lookup_function_symbol( name_syncthreads); // Create a call to __syncthreads(): SgFunctionCallExp * syncthreads_call = buildFunctionCallExp( syncthreads_symbol, buildExprListExp()); SgExprStatement* stmt = buildExprStatement( syncthreads_call); /* if (SgBasicBlock* bb = isSgBasicBlock( isSgForStatement(code)->get_loop_body())) appendStatement(isSgStatement(stmt), bb); else if (SgStatement* ss = isSgStatement( isSgForStatement(code)->get_loop_body())) { SgBasicBlock* bb2 = buildBasicBlock(); isSgNode(ss)->set_parent(bb2); appendStatement(ss, bb2); appendStatement(isSgStatement(stmt), bb2); isSgNode(stmt)->set_parent(bb2); isSgForStatement(code)->set_loop_body(bb2); isSgNode(bb2)->set_parent(code); } */ //SgBasicBlock* bb2 = buildBasicBlock(); clone->append_statement(stmt); /* SgNode* parent = code->get_parent(); if(!isSgStatement(parent)) throw loop_error("Parent not a statement"); if(isSgForStatement(parent)){ if(SgStatement *ss = isSgForStatement(isSgForStatement(parent)->get_loop_body())){ omega::CG_roseRepr * tnl = new omega::CG_roseRepr(ss); omega::CG_outputRepr* block= tnl->clone(); SgNode *new_ss = static_cast(block)->GetCode(); SgBasicBlock* bb2 = buildBasicBlock(); isSgNode(new_ss)->set_parent(bb2); appendStatement(isSgStatement(new_ss), bb2); appendStatement(isSgStatement(stmt), bb2); isSgNode(stmt)->set_parent(bb2); isSgStatement(parent)->replace_statement_from_basicBlock(ss, isSgStatement(bb2)); }else if(isSgBasicBlock(isSgForStatement(parent)->get_loop_body())) isSgStatement(isSgForStatement(parent)->get_loop_body())->insert_statement(isSgStatement(code), stmt, false); else throw loop_error("parent statement type undefined!!"); } else if(isSgBasicBlock(parent)) isSgStatement(parent)->insert_statement(isSgStatement(code), stmt, false); else throw loop_error("parent statement type undefined!!"); //tnl->print(); * * */ sync = true; // return isSgNode(bb2); } // return tnli; } else if (isSgIfStmt(*it)) { SgStatement* body_ = isSgStatement( recursiveFindReplacePreferedIdxs( isSgNode((isSgIfStmt(*it)->get_true_body())), body_syms, param_syms, body, loop_idxs, globalscope)); omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it); omega::CG_outputRepr* block = tnl->clone(); tnli1 = static_cast(block)->GetCode(); isSgIfStmt(tnli1)->set_true_body(body_); if ((isSgIfStmt(*it)->get_false_body())) isSgIfStmt(tnli1)->set_false_body( isSgStatement( recursiveFindReplacePreferedIdxs( isSgNode( (isSgIfStmt(*it)->get_false_body())), body_syms, param_syms, body, loop_idxs, globalscope))); clone->append_statement(isSgStatement(tnli1)); //return tnli; } else if (isSgStatement(*it)) { omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it); omega::CG_outputRepr* block = tnl->clone(); tnli2 = static_cast(block)->GetCode(); clone->append_statement(isSgStatement(tnli2)); //return tnli; } } return isSgNode(clone); } /* if (!isSgBasicBlock( recursiveFindReplacePreferedIdxs(isSgNode(*it), body_syms, param_syms, body, loop_idxs, globalscope))) { SgStatement *to_push = isSgStatement( recursiveFindReplacePreferedIdxs(isSgNode(*it), body_syms, param_syms, body, loop_idxs, globalscope, sync)); clone->append_statement(to_push); if ((sync_found) && isSgForStatement(to_push)) { SgName name_syncthreads("__syncthreads"); SgFunctionSymbol * syncthreads_symbol = globalscope->lookup_function_symbol( name_syncthreads); // Create a call to __syncthreads(): SgFunctionCallExp * syncthreads_call = buildFunctionCallExp( syncthreads_symbol, buildExprListExp()); SgExprStatement* stmt = buildExprStatement( syncthreads_call); clone->append_statement(isSgStatement(stmt)); } // std::cout<unparseToString()<<"\n\n"; } else { SgStatementPtrList& tnl2 = isSgBasicBlock( recursiveFindReplacePreferedIdxs(isSgNode(*it), body_syms, param_syms, body, loop_idxs, globalscope))->get_statements(); for (SgStatementPtrList::const_iterator it2 = tnl2.begin(); it2 != tnl2.end(); it2++) { clone->append_statement(*it2); sync_found = true; // std::cout<unparseToString()<<"\n\n"; } } } return isSgNode(clone); } */ // return tnl; } // loop_vars -> array references // loop_idxs -> map for when we encounter a loop with a different preferredIndex // dim_vars -> out param, fills with var_sym pair for 2D array dimentions (messy stuff) SgNode* swapVarReferences(SgNode* code, std::set& syms, SgSymbolTable* param, SgSymbolTable* body, SgScopeStatement* body_stmt) { //Iterate over every expression, looking up each variable and type //reference used and possibly replacing it or adding it to our symbol //table // //We use the built-in cloning helper methods to seriously help us with this! //Need to do a recursive mark std::set::iterator myIterator; for (myIterator = syms.begin(); myIterator != syms.end(); myIterator++) { SgName var_name = (*myIterator)->get_name(); std::string x = var_name.getString(); if ((param->find_variable(var_name) == NULL) && (body->find_variable(var_name) == NULL)) { SgInitializedName* decl = (*myIterator)->get_declaration(); SgVariableSymbol* dvs = new SgVariableSymbol(decl); SgVariableDeclaration* var_decl = buildVariableDeclaration( dvs->get_name(), dvs->get_type()); AstTextAttribute* att = (AstTextAttribute*) (isSgNode( decl->get_declaration())->getAttribute("__shared__")); if (isSgNode(decl->get_declaration())->attributeExists( "__shared__")) var_decl->get_declarationModifier().get_storageModifier().setCudaShared(); appendStatement(var_decl, body_stmt); dvs->set_parent(body); body->insert(var_name, dvs); } std::vector array = substitute(code, *myIterator, NULL, isSgNode(body)); SgVariableSymbol* var = (SgVariableSymbol*) (*myIterator); for (int j = 0; j < array.size(); j++) array[j]->set_symbol(var); } return code; } bool LoopCuda::validIndexes(int stmt, const std::vector& idxs) { for (int i = 0; i < idxs.size(); i++) { bool found = false; for (int j = 0; j < idxNames[stmt].size(); j++) { if (strcmp(idxNames[stmt][j].c_str(), idxs[i].c_str()) == 0) { found = true; } } if (!found) { return false; } } return true; } bool LoopCuda::cudaize_v2(std::string kernel_name, std::map array_dims, std::vector blockIdxs, std::vector threadIdxs) { CG_outputBuilder *ocg = ir->builder(); int stmt_num = 0; if (cudaDebug) { printf("cudaize_v2(%s, {", kernel_name.c_str()); //for( printf("}, blocks={"); printVs(blockIdxs); printf("}, thread={"); printVs(threadIdxs); printf("})\n"); } this->array_dims = array_dims; if (!validIndexes(stmt_num, blockIdxs)) { throw std::runtime_error("One of the indexes in the block list was not " "found in the current set of indexes."); } if (!validIndexes(stmt_num, threadIdxs)) { throw std::runtime_error( "One of the indexes in the thread list was not " "found in the current set of indexes."); } if (blockIdxs.size() == 0) throw std::runtime_error("Cudaize: Need at least one block dimention"); int block_level = 0; //Now, we will determine the actual size (if possible, otherwise //complain) for the block dimentions and thread dimentions based on our //indexes and the relations for our stmt; for (int i = 0; i < blockIdxs.size(); i++) { int level = findCurLevel(stmt_num, blockIdxs[i]); int ub, lb; CG_outputRepr* ubrepr = extractCudaUB(stmt_num, level, ub, lb); if (lb != 0) { //attempt to "normalize" the loop with an in-place tile and then re-check our bounds if (cudaDebug) printf( "Cudaize: doing tile at level %d to try and normalize lower bounds\n", level); tile(stmt_num, level, 1, level, CountedTile); idxNames[stmt_num].insert(idxNames[stmt_num].begin() + (level), ""); //TODO: possibly handle this for all sibling stmts ubrepr = extractCudaUB(stmt_num, level, ub, lb); } if (lb != 0) { char buf[1024]; sprintf(buf, "Cudaize: Loop at level %d does not have 0 as it's lower bound", level); throw std::runtime_error(buf); } if (ub < 0) { char buf[1024]; sprintf(buf, "Cudaize: Loop at level %d does not have a hard upper bound", level); //Anand: Commenting out error indication for lack of constant upper bound //throw std::runtime_error(buf); } if (cudaDebug) printf("block idx %s level %d lb: %d ub %d\n", blockIdxs[i].c_str(), level, lb, ub); if (i == 0) { block_level = level; if (ubrepr == NULL) { cu_bx = ub + 1; cu_bx_repr = NULL; } else { cu_bx = 0; cu_bx_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); } idxNames[stmt_num][level - 1] = "bx"; } else if (i == 1) { if (ubrepr == NULL) { cu_by = ub + 1; cu_by_repr = NULL; } else { cu_by = 0; cu_by_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); } idxNames[stmt_num][level - 1] = "by"; } } if (!cu_by && !cu_by_repr) block_level = 0; int thread_level1 = 0; int thread_level2 = 0; for (int i = 0; i < threadIdxs.size(); i++) { int level = findCurLevel(stmt_num, threadIdxs[i]); int ub, lb; CG_outputRepr* ubrepr = extractCudaUB(stmt_num, level, ub, lb); if (lb != 0) { //attempt to "normalize" the loop with an in-place tile and then re-check our bounds if (cudaDebug) printf( "Cudaize: doing tile at level %d to try and normalize lower bounds\n", level); tile(stmt_num, level, 1, level, CountedTile); idxNames[stmt_num].insert(idxNames[stmt_num].begin() + (level), ""); ubrepr = extractCudaUB(stmt_num, level, ub, lb); } if (lb != 0) { char buf[1024]; sprintf(buf, "Cudaize: Loop at level %d does not have 0 as it's lower bound", level); throw std::runtime_error(buf); } if (ub < 0) { char buf[1024]; sprintf(buf, "Cudaize: Loop at level %d does not have a hard upper bound", level); //Anand: Commenting out error indication for lack of constant upper bound //throw std::runtime_error(buf); } if (cudaDebug) printf("thread idx %s level %d lb: %d ub %d\n", threadIdxs[i].c_str(), level, lb, ub); if (i == 0) { thread_level1 = level; if (ubrepr == NULL) { cu_tx = ub + 1; cu_tx_repr = NULL; } else { cu_tx = 0; cu_tx_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); } idxNames[stmt_num][level - 1] = "tx"; } else if (i == 1) { thread_level2 = level; if (ubrepr == NULL) { cu_ty = ub + 1; cu_ty_repr = NULL; } else { cu_ty = 0; cu_ty_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); } idxNames[stmt_num][level - 1] = "ty"; } else if (i == 2) { if (ubrepr == NULL) { cu_tz = ub + 1; cu_tz_repr = NULL; } else { cu_tz = 0; cu_tz_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); } idxNames[stmt_num][level - 1] = "tz"; } } if (!cu_ty && !cu_ty_repr) thread_level1 = 0; if (!cu_tz && !cu_tz_repr) thread_level2 = 0; //Make changes to nonsplitlevels const int m = stmt.size(); for (int i = 0; i < m; i++) { if (block_level) { //stmt[i].nonSplitLevels.append((block_level)*2); stmt_nonSplitLevels[i].push_back((block_level) * 2); } if (thread_level1) { //stmt[i].nonSplitLevels.append((thread_level1)*2); stmt_nonSplitLevels[i].push_back((thread_level1) * 2); } if (thread_level2) { //stmt[i].nonSplitLevels.append((thread_level1)*2); stmt_nonSplitLevels[i].push_back((thread_level1) * 2); } } if (cudaDebug) { printf("Codegen: current names: "); printVS(idxNames[stmt_num]); } //Set codegen flag code_gen_flags |= GenCudaizeV2; //Save array dimention sizes this->array_dims = array_dims; cu_kernel_name = kernel_name.c_str(); } /* * setupConstantVar * handles constant variable declaration * and adds a global constant variable * parameters: * constant - the constant_memory_mapping object for this loop * arr_def - the VarDefs object for the mapped variable * globals - Rose Global variables * i - an index to keep new variable names unique * symtab - global symbol table */ static void setupConstantVar(constant_memory_mapping* constant, VarDefs* arr_def, SgGlobal* globals, int i, SgSymbolTable* symtab) { char* buf1 = new char[32]; snprintf(buf1, 32, "cs%dRef", i+1); arr_def->secondName = buf1; char buf2[64]; snprintf(buf2, 64, "__device__ __constant__ float"); SgVariableDeclaration* consvar_decl = buildVariableDeclaration( SgName(std::string(buf1)), buildArrayType( buildOpaqueType(SgName(buf2),globals), arr_def->size_expr)); SgInitializedNamePtrList& variables = consvar_decl->get_variables(); SgInitializedNamePtrList::const_iterator j = variables.begin(); SgInitializedName* initializedName = *j; SgVariableSymbol* consvar_sym = new SgVariableSymbol(initializedName); prependStatement(consvar_decl, globals); consvar_sym->set_parent(symtab); symtab->insert(SgName(std::string(buf1)), consvar_sym); constant->set_mapped_symbol(arr_def->original_name.c_str(), consvar_sym); constant->set_vardef(arr_def->original_name.c_str(), arr_def); } /* * cudaBindConstantVar * allocs a variable to constant memory * constant - the constant mapping object * arr_def - the VarDefs abject * globals - global symbol table * stmt_list - the GPU functions' statement list */ static void cudaBindConstantVar(constant_memory_mapping* constant, VarDefs* arr_def, SgGlobal* globals, SgStatementPtrList* stmt_list) { SgName cudaMemcpyToSymbol_name("cudaMemcpyToSymbol"); SgFunctionDeclaration* cudaMemcpyToSymbol_decl = buildNondefiningFunctionDeclaration( cudaMemcpyToSymbol_name, buildVoidType(), buildFunctionParameterList(), globals); SgExprListExp* args = buildExprListExp(); args->append_expression(buildCastExp(constant->get_mapped_symbol_exp(arr_def->original_name.c_str()), buildPointerType(buildVoidType()))); args->append_expression(buildVarRefExp(arr_def->in_data)); args->append_expression(arr_def->size_expr); stmt_list->push_back(buildExprStatement( buildFunctionCallExp(buildFunctionRefExp(cudaMemcpyToSymbol_decl), args))); } static void consmapArrayRefs(constant_memory_mapping* constant, std::vector* refs, SgGlobal* globals, IR_Code* ir, CG_roseBuilder* ocg) { // if constant mapping is not being used, ignore this function if(constant == NULL) return; for(int i = 0; i < refs->size(); i++) { IR_ArrayRef* aref = (*refs)[i]; if(constant->is_array_mapped(aref->name().c_str())) { // get array reference dimensions int dims = aref->symbol()->n_dim(); if(dims > 2) { printf(" \n CHiLL does not handle constant memory mapping for more than 2D arrays.\n"); return; } SgExpression* varexp = constant->get_mapped_symbol_exp(aref->name().c_str()); SgExpression* index_exp; // build index expression if(dims == 1) { index_exp = static_cast(aref->index(0)->clone())->GetExpression(); } if(dims == 2) { VarDefs* arr_def = constant->get_vardef(aref->name().c_str()); CG_outputRepr* i0 = aref->index(0)->clone(); CG_outputRepr* i1 = aref->index(1)->clone(); CG_outputRepr* sz = new CG_roseRepr(buildIntVal(arr_def->size_multi_dim[0])); CG_outputRepr* exp = ocg->CreatePlus(ocg->CreateTimes(sz->clone(), i0), i1); index_exp = static_cast(exp->clone())->GetExpression(); } ir->ReplaceExpression(aref, new CG_roseRepr(buildPntrArrRefExp(varexp, index_exp))); } } } /* * setupTexmappingVar * handles texture variable declaration * and adds a global texture object * parameters: * texture - the texture_memory_mapping object * arr_def - the VarDefs object for the mapped variable * globals - Rose Global variables * i - an index to keep the new variable names unique * devptr_sym - the devptr that the original variable is associated with * symtab - GPU function symbol table */ static void setupTexmappingVar(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals, int i, SgVariableSymbol* devptr_sym, SgSymbolTable* symtab) { char* buf1 = new char[32]; snprintf(buf1, 32, "tex%dRef", i+1); arr_def->secondName = buf1; char buf2[64]; // single-dimensional snprintf(buf2, 64, "texture", 1); // multi-dimensional // snprintf(buf2, 64, "texture", (int)(arr_def->size_multi_dim.size())); //*/ SgVariableDeclaration* texvar_decl = buildVariableDeclaration(SgName(std::string(buf1)), buildOpaqueType(buf2, globals)); SgInitializedNamePtrList& variables = texvar_decl->get_variables(); SgInitializedNamePtrList::const_iterator j = variables.begin(); SgInitializedName* initializedName = *j; SgVariableSymbol* texvar_sym = new SgVariableSymbol(initializedName); prependStatement(texvar_decl, globals); texvar_sym->set_parent(symtab); symtab->insert(SgName(buf1), texvar_sym); texture->set_mapped_symbol(arr_def->original_name.c_str(), texvar_sym); texture->set_devptr_symbol(arr_def->original_name.c_str(), devptr_sym); texture->set_vardef(arr_def->original_name.c_str(), arr_def); } /* * One dimensional version of cudaBindTexture * see cudaBindTexture for details */ static SgFunctionCallExp* cudaBindTexture1D(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals) { SgName cudaBindTexture_name("cudaBindTexture"); SgFunctionDeclaration* cudaBindTexture_decl = buildNondefiningFunctionDeclaration( cudaBindTexture_name, buildVoidType(), buildFunctionParameterList(), globals); SgExprListExp* args = buildExprListExp(); args->append_expression(buildIntVal(0)); args->append_expression(texture->get_mapped_symbol_exp(arr_def->original_name.c_str())); args->append_expression(texture->get_devptr_symbol_exp(arr_def->original_name.c_str())); args->append_expression(arr_def->size_expr); return buildFunctionCallExp(buildFunctionRefExp(cudaBindTexture_decl), args); } /* * Two dimensional version of cudaBindTexture * see cudaBindTexture for details */ //static SgFunctionCallExp* cudaBindTexture2D(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals) { // SgName cudaBindTexture_name("cudaBindTexture2D"); // SgFunctionDeclaration* cudaBindTexture_decl = buildNondefiningFunctionDeclaration( // cudaBindTexture_name, buildVoidType(), buildFunctionParameterList(), globals); // // SgExprListExp* args = buildExprListExp(); // args->append_expression(buildIntVal(0)); // args->append_expression(texture->get_tex_mapped_symbol_exp(arr_def->original_name.c_str())); // args->append_expression(texture->get_devptr_symbol_exp(arr_def->original_name.c_str())); // args->append_expression(buildIntVal(texture->get_dim_length(arr_def->original_name.c_str(), 0))); // args->append_expression(buildIntVal(texture->get_dim_length(arr_def->original_name.c_str(), 1))); // args->append_expression(arr_def->size_expr); // return buildFunctionCallExp(buildFunctionRefExp(cudaBindTexture_decl), args); //} /* * cudaBindTexture * binds a variable to a texture * parameters: * texture - the texture mapping object * arr_def - the VarDefs object * globals - global symbol table * stmt_list - the GPU functions' statement list * notes: * only supports binding 1D textures, may need to consider cudaBindTexture2D for 2D textures */ static void cudaBindTexture(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals, SgStatementPtrList* stmt_list) { //int dims = (int)(arr_def->size_multi_dim.size()); //int dims = texture->get_dims(arr_def->original_name.c_str()); //if(dims == 1) stmt_list->push_back( buildExprStatement(cudaBindTexture1D(texture, arr_def, globals))); //if(dims == 2) // stmt_list->push_back( // buildExprStatement(cudaBindTexture2D(texture, arr_def, globals))); } /* * texmapArrayRefs * maps array reference expresions of texture mapped variables to the tex1D function * parameters: * texture - the texture mapping object * refs - a list of all array read operations * globals - global symbol table * ir - handles IR_Code operations * ocg - handles CG_roseBuilder operations **/ static void texmapArrayRefs(texture_memory_mapping* texture, std::vector* refs, SgGlobal* globals, IR_Code* ir, CG_roseBuilder *ocg) { // if texture mapping is not being used, ignore this function if(texture == NULL) return; for(int i = 0; i < refs->size(); i++) { IR_ArrayRef* aref = (*refs)[i]; if(texture->is_array_mapped(aref->name().c_str())) { // get array dimensions VarDefs* arr_def = texture->get_vardef(aref->name().c_str()); int dims = aref->symbol()->n_dim(); if(dims > 2) { printf(" \n CHiLL does not handle texture mapping for more than 2D arrays.\n"); // TODO throw some sort of error. or handle in texture_copy function return; } // build texture lookup function declaration char texNDfetch_strName[16]; sprintf(texNDfetch_strName, "tex%dDfetch", 1); // for now, only support tex1Dfetch //sprintf(texNDfetch_strName, "tex%dDfetch", dims); SgFunctionDeclaration* fetch_decl = buildNondefiningFunctionDeclaration( SgName(texNDfetch_strName), buildFloatType(), buildFunctionParameterList(), globals); // build args SgExprListExp* args = buildExprListExp(); args->append_expression(texture->get_mapped_symbol_exp(aref->name().c_str())); // set indexing args //for(int i = 0; i < dims; i++) { // args->append_expression((static_cast(aref->index(i)->clone()))->GetExpression()); //} if(dims == 1) { args->append_expression(static_cast(aref->index(0)->clone())->GetExpression()); } else if(dims == 2) { CG_outputRepr* i0 = aref->index(0)->clone(); CG_outputRepr* i1 = aref->index(1)->clone(); CG_outputRepr* sz = new CG_roseRepr(buildIntVal(arr_def->size_multi_dim[0])); CG_outputRepr* expr = ocg->CreatePlus(ocg->CreateTimes(sz->clone(), i0), i1); args->append_expression(static_cast(expr->clone())->GetExpression()); } // build function call and replace original array ref SgFunctionCallExp* fetch_call = buildFunctionCallExp(buildFunctionRefExp(fetch_decl), args); ir->ReplaceExpression(aref, new CG_roseRepr(fetch_call)); } } } SgNode* LoopCuda::cudaize_codegen_v2() { if(cudaDebug) printf("cudaize codegen V2\n"); CG_roseBuilder *ocg = dynamic_cast(ir->builder()); if (!ocg) return false; //protonu--adding an annote to track texture memory type //ANNOTE(k_cuda_texture_memory, "cuda texture memory", TRUE); //ANNOTE(k_cuda_constant_memory, "cuda constant memory", TRUE); int tex_mem_on = 0; int cons_mem_on = 0; CG_outputRepr* repr; std::vector arrayVars; std::vector localScopedVars; std::vector ro_refs; std::vector wo_refs; std::set uniqueRefs; std::set uniqueWoRefs; std::set syms; std::set psyms; std::set pdSyms; SgStatementPtrList* replacement_list = new SgStatementPtrList; for (int j = 0; j < stmt.size(); j++) { std::vector refs = ir->FindArrayRef(stmt[j].code); for (int i = 0; i < refs.size(); i++) { //printf("ref %s wo %d\n", static_cast(refs[i]->name()), refs[i]->is_write()); SgVariableSymbol* var = body_symtab->find_variable( SgName((char*) refs[i]->name().c_str())); SgVariableSymbol* var2 = parameter_symtab->find_variable( SgName((char*) refs[i]->name().c_str())); //If the array is not a parameter, then it's a local array and we //want to recreate it as a stack variable in the kernel as opposed to //passing it in. if (var != NULL) { //anand-- needs modification, if variable is parameter it wont be part of the // block's symbol table but the functiond definition's symbol table continue; } if (uniqueRefs.find(refs[i]->name()) == uniqueRefs.end()) { uniqueRefs.insert(refs[i]->name()); if (refs[i]->is_write()) { uniqueWoRefs.insert(refs[i]->name()); wo_refs.push_back(refs[i]); } else ro_refs.push_back(refs[i]); } if (refs[i]->is_write() && uniqueWoRefs.find(refs[i]->name()) == uniqueWoRefs.end()) { uniqueWoRefs.insert(refs[i]->name()); wo_refs.push_back(refs[i]); //printf("adding %s to wo\n", static_cast(refs[i]->name())); } pdSyms.insert((const SgVariableSymbol*) var2); } } if (cudaDebug) { printf("reading from array "); for (int i = 0; i < ro_refs.size(); i++) printf("'%s' ", ro_refs[i]->name().c_str()); printf("and writing to array "); for (int i = 0; i < wo_refs.size(); i++) printf("'%s' ", wo_refs[i]->name().c_str()); printf("\n"); } const char* gridName = "dimGrid"; const char* blockName = "dimBlock"; //TODO: Could allow for array_dims_vars to be a mapping from array //references to to variable names that define their length. SgVariableSymbol* dim1 = 0; SgVariableSymbol* dim2 = 0; for (int i = 0; i < wo_refs.size(); i++) { //TODO: Currently assume all arrays are floats of one or two dimentions SgVariableSymbol* outArray = 0; std::string name = wo_refs[i]->name(); outArray = body_symtab->find_variable(SgName((char*) name.c_str())); int size_n_d; if (outArray == NULL) outArray = parameter_symtab->find_variable( SgName((char*) name.c_str())); VarDefs v; v.size_multi_dim = std::vector(); char buf[32]; snprintf(buf, 32, "devO%dPtr", i + 1); v.name = buf; if (isSgPointerType(outArray->get_type())) { if (isSgArrayType( isSgNode( isSgPointerType(outArray->get_type())->get_base_type()))) { // v.type = ((array_type *)(((ptr_type *)(outArray->type()))->ref_type()))->elem_type(); SgType* t = isSgPointerType(outArray->get_type())->get_base_type(); /* SgExprListExp* dimList = t->get_dim_info(); SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); SgExpression* expr=NULL; for (; j != dimList->get_expressions().end(); j++) expr = *j; */ while (isSgArrayType(t)) t = isSgArrayType(t)->get_base_type(); if (!isSgType(t)) { char buf[1024]; sprintf(buf, "CudaizeCodeGen: Array type undetected!"); throw std::runtime_error(buf); } v.type = t; } else v.type = isSgPointerType(outArray->get_type())->get_base_type(); } else if (isSgArrayType(outArray->get_type())) { if (isSgArrayType( isSgNode( isSgArrayType(outArray->get_type())->get_base_type()))) { // v.type = ((array_type *)(((ptr_type *)(outArray->type()))->ref_type()))->elem_type(); SgType* t = isSgArrayType(outArray->get_type())->get_base_type(); /* SgExprListExp* dimList = t->get_dim_info(); SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); SgExpression* expr=NULL; for (; j != dimList->get_expressions().end(); j++) expr = *j; */ while (isSgArrayType(t)) t = isSgArrayType(t)->get_base_type(); if (!isSgType(t)) { char buf[1024]; sprintf(buf, "CudaizeCodeGen: Array type undetected!"); throw std::runtime_error(buf); } v.type = t; } else v.type = isSgArrayType(outArray->get_type())->get_base_type(); } else v.type = buildFloatType(); v.tex_mapped = false; v.cons_mapped = false; v.original_name = wo_refs[i]->name(); //Size of the array = dim1 * dim2 * num bytes of our array type //If our input array is 2D (non-linearized), we want the actual //dimentions of the array CG_outputRepr* size; //Lookup in array_dims std::map::iterator it = array_dims.find(name.c_str()); if (isSgPointerType(outArray->get_type()) && isSgArrayType( isSgNode( isSgPointerType(outArray->get_type())->get_base_type()))) { SgType* t = isSgPointerType(outArray->get_type())->get_base_type(); /* SgExprListExp* dimList = t->get_dim_info(); SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); SgExpression* expr=NULL; for (; j != dimList->get_expressions().end(); j++) expr = *j; */ if (isSgIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedLongVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgAddOp(isSgArrayType(t)->get_index())) { SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); SgExpression *lhs = op_add->get_lhs_operand(); SgExpression *rhs = op_add->get_rhs_operand(); if (isSgIntVal(lhs)) size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); else if (isSgUnsignedIntVal(lhs)) size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() + (int) isSgUnsignedIntVal(rhs)->get_value(); else if (isSgUnsignedLongVal(lhs)) size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongLongIntVal(lhs)) size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) size_n_d = (int) (isSgLongIntVal(lhs)->get_value() + isSgLongIntVal(rhs)->get_value()); else if (isSgUnsignedLongLongIntVal(lhs)) size_n_d = (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + isSgUnsignedLongLongIntVal(rhs)->get_value()); } t = isSgArrayType(t)->get_base_type(); while (isSgArrayType(t)) { int dim; if (isSgIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedLongVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgAddOp(isSgArrayType(t)->get_index())) { SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); SgExpression *lhs = op_add->get_lhs_operand(); SgExpression *rhs = op_add->get_rhs_operand(); if (isSgIntVal(lhs)) dim = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); else if (isSgUnsignedIntVal(lhs)) dim = (int) isSgUnsignedIntVal(lhs)->get_value() + (int) isSgUnsignedIntVal(rhs)->get_value(); else if (isSgUnsignedLongVal(lhs)) dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongLongIntVal(lhs)) dim = (int) (isSgLongLongIntVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) dim = (int) (isSgLongIntVal(lhs)->get_value() + isSgLongIntVal(rhs)->get_value()); else if (isSgUnsignedLongLongIntVal(lhs)) dim = (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + isSgUnsignedLongLongIntVal(rhs)->get_value()); } size_n_d *= dim; v.size_multi_dim.push_back(dim); t = isSgArrayType(t)->get_base_type(); } //v.size_2d = (int) (isSgIntVal(t->get_index())->get_value()); if (cudaDebug) printf("Detected Multi-dimensional array sized of %d for %s\n", size_n_d, (char*) wo_refs[i]->name().c_str()); size = ocg->CreateInt(size_n_d); } else if (isSgArrayType(outArray->get_type()) && isSgArrayType( isSgNode( isSgArrayType(outArray->get_type())->get_base_type()))) { SgType* t = outArray->get_type(); /* SgExprListExp* dimList = t->get_dim_info(); SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); SgExpression* expr=NULL; for (; j != dimList->get_expressions().end(); j++) expr = *j; */ if (isSgIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedLongVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgAddOp(isSgArrayType(t)->get_index())) { SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); SgExpression *lhs = op_add->get_lhs_operand(); SgExpression *rhs = op_add->get_rhs_operand(); if (isSgIntVal(lhs)) size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); else if (isSgUnsignedIntVal(lhs)) size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() + (int) isSgUnsignedIntVal(rhs)->get_value(); else if (isSgUnsignedLongVal(lhs)) size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongLongIntVal(lhs)) size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) size_n_d = (int) (isSgLongIntVal(lhs)->get_value() + isSgLongIntVal(rhs)->get_value()); else if (isSgUnsignedLongLongIntVal(lhs)) size_n_d = (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + isSgUnsignedLongLongIntVal(rhs)->get_value()); } t = isSgArrayType(t)->get_base_type(); while (isSgArrayType(t)) { int dim; if (isSgIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedLongVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgAddOp(isSgArrayType(t)->get_index())) { SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); SgExpression *lhs = op_add->get_lhs_operand(); SgExpression *rhs = op_add->get_rhs_operand(); if (isSgIntVal(lhs)) dim = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); else if (isSgUnsignedIntVal(lhs)) dim = (int) isSgUnsignedIntVal(lhs)->get_value() + (int) isSgUnsignedIntVal(rhs)->get_value(); else if (isSgUnsignedLongVal(lhs)) dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongLongIntVal(lhs)) dim = (int) (isSgLongLongIntVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) dim = (int) (isSgLongIntVal(lhs)->get_value() + isSgLongIntVal(rhs)->get_value()); else if (isSgUnsignedLongLongIntVal(lhs)) dim = (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + isSgUnsignedLongLongIntVal(rhs)->get_value()); } size_n_d *= dim; v.size_multi_dim.push_back(dim); t = isSgArrayType(t)->get_base_type(); } //v.size_2d = (int) (isSgIntVal(t->get_index())->get_value()); if (cudaDebug) printf("Detected Multi-Dimensional array sized of %d for %s\n", size_n_d, (char*) wo_refs[i]->name().c_str()); size = ocg->CreateInt(size_n_d); } else if (it != array_dims.end()) { int ref_size = it->second; //size = // ocg->CreateInt( // isSgIntVal( // isSgArrayType(outArray->get_type())->get_index())->get_value()); //v.size_2d = isSgArrayType(outArray->get_type())->get_rank(); //v.var_ref_size = ref_size; size = ocg->CreateInt(ref_size); } else { if (dim1) { size = ocg->CreateTimes( new CG_roseRepr(isSgExpression(buildVarRefExp(dim1))), new CG_roseRepr(isSgExpression(buildVarRefExp(dim2)))); } else { char buf[1024]; sprintf(buf, "CudaizeCodeGen: Array reference %s does not have a " "detectable size or specififed dimentions", name.c_str()); throw std::runtime_error(buf); } } v.size_expr = static_cast(ocg->CreateTimes(size, new omega::CG_roseRepr( isSgExpression(buildSizeOfOp(v.type)))))->GetExpression(); v.in_data = 0; v.out_data = outArray; //Check for in ro_refs and remove it at this point std::vector::iterator it_; for (it_ = ro_refs.begin(); it_ != ro_refs.end(); it_++) { if ((*it_)->name() == wo_refs[i]->name()) { break; } } if (it_ != ro_refs.end()) { v.in_data = outArray; ro_refs.erase(it_); } arrayVars.push_back(v); } //protonu-- assuming that all texture mapped memories were originally read only mems //there should be safety checks for that, will implement those later for (int i = 0; i < ro_refs.size(); i++) { SgVariableSymbol* inArray = 0; std::string name = ro_refs[i]->name(); inArray = body_symtab->find_variable(SgName((char*) name.c_str())); if (inArray == NULL) inArray = parameter_symtab->find_variable( SgName((char*) name.c_str())); VarDefs v; v.size_multi_dim = std::vector(); char buf[32]; snprintf(buf, 32, "devI%dPtr", i + 1); v.name = buf; int size_n_d; if (isSgPointerType(inArray->get_type())) { if (isSgArrayType( isSgNode( isSgPointerType(inArray->get_type())->get_base_type()))) { SgType* t = isSgPointerType(inArray->get_type())->get_base_type(); while (isSgArrayType(t)) t = isSgArrayType(t)->get_base_type(); if (!isSgType(t)) { char buf[1024]; sprintf(buf, "CudaizeCodeGen: Array type undetected!"); throw std::runtime_error(buf); } v.type = t; } else v.type = isSgPointerType(inArray->get_type())->get_base_type(); } else if (isSgArrayType(inArray->get_type())) { if (isSgArrayType( isSgNode( isSgArrayType(inArray->get_type())->get_base_type()))) { SgType* t = inArray->get_type(); while (isSgArrayType(t)) t = isSgArrayType(t)->get_base_type(); if (!isSgType(t)) { char buf[1024]; sprintf(buf, "CudaizeCodeGen: Array type undetected!"); throw std::runtime_error(buf); } v.type = t; } else v.type = isSgArrayType(inArray->get_type())->get_base_type(); } else v.type = buildFloatType(); v.tex_mapped = false; v.cons_mapped = false; v.original_name = ro_refs[i]->name(); //derick -- adding texture and constant mapping if ( texture != NULL) v.tex_mapped = (texture->is_array_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars if (v.tex_mapped){ printf("this variable %s is mapped to texture memory", name.c_str()); } //derick -- this is commented out until constant memory is implemeted if ( constant_mem != NULL) v.cons_mapped = (constant_mem->is_array_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars if (v.cons_mapped){ printf("this variable %s is mapped to constant memory", name.c_str()); } //Size of the array = dim1 * dim2 * num bytes of our array type //If our input array is 2D (non-linearized), we want the actual //dimentions of the array (as it might be less than cu_n CG_outputRepr* size; //Lookup in array_dims std::map::iterator it = array_dims.find(name.c_str()); if (isSgPointerType(inArray->get_type()) && isSgArrayType( isSgPointerType(inArray->get_type())->get_base_type())) { //SgArrayType* t = isSgArrayType(isSgArrayType(inArray->get_type())->get_base_type()); //v.size_2d = t->get_rank(); SgType* t = isSgPointerType(inArray->get_type())->get_base_type(); /* SgExprListExp* dimList = t->get_dim_info(); SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); SgExpression* expr=NULL; for (; j != dimList->get_expressions().end(); j++) expr = *j; */ //v.size_2d = 1; if (isSgIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedLongVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgAddOp(isSgArrayType(t)->get_index())) { SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); SgExpression *lhs = op_add->get_lhs_operand(); SgExpression *rhs = op_add->get_rhs_operand(); if (isSgIntVal(lhs)) size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); else if (isSgUnsignedIntVal(lhs)) size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() + (int) isSgUnsignedIntVal(rhs)->get_value(); else if (isSgUnsignedLongVal(lhs)) size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongLongIntVal(lhs)) size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) size_n_d = (int) (isSgLongIntVal(lhs)->get_value() + isSgLongIntVal(rhs)->get_value()); else if (isSgUnsignedLongLongIntVal(lhs)) size_n_d = (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + isSgUnsignedLongLongIntVal(rhs)->get_value()); } t = isSgArrayType(t)->get_base_type(); while (isSgArrayType(t)) { int dim; if (isSgIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedLongVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgAddOp(isSgArrayType(t)->get_index())) { SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); SgExpression *lhs = op_add->get_lhs_operand(); SgExpression *rhs = op_add->get_rhs_operand(); if (isSgIntVal(lhs)) dim = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); else if (isSgUnsignedIntVal(lhs)) dim = (int) isSgUnsignedIntVal(lhs)->get_value() + (int) isSgUnsignedIntVal(rhs)->get_value(); else if (isSgUnsignedLongVal(lhs)) dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongLongIntVal(lhs)) dim = (int) (isSgLongLongIntVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) dim = (int) (isSgLongIntVal(lhs)->get_value() + isSgLongIntVal(rhs)->get_value()); else if (isSgUnsignedLongLongIntVal(lhs)) dim = (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + isSgUnsignedLongLongIntVal(rhs)->get_value()); } size_n_d *= dim; v.size_multi_dim.push_back(dim); t = isSgArrayType(t)->get_base_type(); } if (cudaDebug) printf("Detected Multi-dimensional array sized of %d for %s\n", size_n_d, (char*) ro_refs[i]->name().c_str()); size = ocg->CreateInt(size_n_d); } else if (isSgArrayType(inArray->get_type()) && isSgArrayType( isSgArrayType(inArray->get_type())->get_base_type())) { //SgArrayType* t = isSgArrayType(isSgArrayType(inArray->get_type())->get_base_type()); //v.size_2d = t->get_rank(); SgType* t = inArray->get_type(); /* SgExprListExp* dimList = t->get_dim_info(); SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); SgExpression* expr=NULL; for (; j != dimList->get_expressions().end(); j++) expr = *j; */ if (isSgIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedLongVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) size_n_d = (int) (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgAddOp(isSgArrayType(t)->get_index())) { SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); SgExpression *lhs = op_add->get_lhs_operand(); SgExpression *rhs = op_add->get_rhs_operand(); if (isSgIntVal(lhs)) size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); else if (isSgUnsignedIntVal(lhs)) size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() + (int) isSgUnsignedIntVal(rhs)->get_value(); else if (isSgUnsignedLongVal(lhs)) size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongLongIntVal(lhs)) size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) size_n_d = (int) (isSgLongIntVal(lhs)->get_value() + isSgLongIntVal(rhs)->get_value()); else if (isSgUnsignedLongLongIntVal(lhs)) size_n_d = (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + isSgUnsignedLongLongIntVal(rhs)->get_value()); } t = isSgArrayType(t)->get_base_type(); while (isSgArrayType(t)) { int dim; if (isSgIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedLongVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgLongIntVal(isSgArrayType(t)->get_index())) dim = (int) (isSgLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())) dim = (int) (isSgUnsignedLongLongIntVal( isSgArrayType(t)->get_index())->get_value()); else if (isSgAddOp(isSgArrayType(t)->get_index())) { SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); SgExpression *lhs = op_add->get_lhs_operand(); SgExpression *rhs = op_add->get_rhs_operand(); if (isSgIntVal(lhs)) dim = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); else if (isSgUnsignedIntVal(lhs)) dim = (int) isSgUnsignedIntVal(lhs)->get_value() + (int) isSgUnsignedIntVal(rhs)->get_value(); else if (isSgUnsignedLongVal(lhs)) dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongLongIntVal(lhs)) dim = (int) (isSgLongLongIntVal(lhs)->get_value() + isSgUnsignedLongVal(rhs)->get_value()); else if (isSgLongIntVal(lhs)) dim = (int) (isSgLongIntVal(lhs)->get_value() + isSgLongIntVal(rhs)->get_value()); else if (isSgUnsignedLongLongIntVal(lhs)) dim = (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + isSgUnsignedLongLongIntVal(rhs)->get_value()); } size_n_d *= dim; v.size_multi_dim.push_back(dim); t = isSgArrayType(t)->get_base_type(); } if (cudaDebug) printf("Detected Multi-Dimensional array sized of %d for %s\n", size_n_d, (char*) ro_refs[i]->name().c_str()); size = ocg->CreateInt(size_n_d); } else if (it != array_dims.end()) { int ref_size = it->second; // v.var_ref_size = ref_size; size = ocg->CreateInt(ref_size); } else { if (dim1) { size = ocg->CreateTimes( new CG_roseRepr(isSgExpression(buildVarRefExp(dim1))), new CG_roseRepr(isSgExpression(buildVarRefExp(dim2)))); } else { char buf[1024]; sprintf(buf, "CudaizeCodeGen: Array reference %s does not have a " "detectable size or specififed dimentions", name.c_str()); throw std::runtime_error(buf); } } v.size_expr = static_cast(ocg->CreateTimes(size, new omega::CG_roseRepr( isSgExpression(buildSizeOfOp(v.type)))))->GetExpression(); v.in_data = inArray; v.out_data = 0; arrayVars.push_back(v); } if (arrayVars.size() < 2) { fprintf(stderr, "cudaize error: Did not find two arrays being accessed\n"); return false; } //protonu--debugging tool--the printf statement //tex_mem_on signals use of tex mem /* derick -- texmapping near malloc mcopy for(int i=0; iStmtListAppend(setup_code, new CG_roseRepr(tnl)); } else { SgVariableDeclaration* defn = buildVariableDeclaration( SgName(arrayVars[i].name.c_str()), buildPointerType(arrayVars[i].type)); SgInitializedNamePtrList& variables = defn->get_variables(); SgInitializedNamePtrList::const_iterator j = variables.begin(); SgInitializedName* initializedName = *j; SgVariableSymbol* dvs = new SgVariableSymbol(initializedName); prependStatement(defn, func_body); dvs->set_parent(body_symtab); body_symtab->insert(SgName(arrayVars[i].name.c_str()), dvs); // SgVariableSymbol* dvs = body_symtab->find_variable(SgName(arrayVars[i].name.c_str())); // if(dvs == NULL) // dvs = parameter_symtab->find_variable(SgName(arrayVars[i].name.c_str())); //cudaMalloc args // SgBasicBlock* block = buildBasicBlock(); SgName name_cuda_malloc("cudaMalloc"); SgFunctionDeclaration * decl_cuda_malloc = buildNondefiningFunctionDeclaration(name_cuda_malloc, buildVoidType(), buildFunctionParameterList(), globals); SgName name_cuda_copy("cudaMemcpy"); SgFunctionDeclaration * decl_cuda_copy = buildNondefiningFunctionDeclaration(name_cuda_copy, buildVoidType(), buildFunctionParameterList(), globals); SgExprListExp* args = buildExprListExp(); args->append_expression( buildCastExp(buildAddressOfOp(buildVarRefExp(dvs)), buildPointerType(buildPointerType(buildVoidType())))); args->append_expression(arrayVars[i].size_expr); // decl_cuda_malloc->get_parameterList()->append_arg SgFunctionCallExp *the_call = buildFunctionCallExp( buildFunctionRefExp(decl_cuda_malloc), args); SgExprStatement* stmt = buildExprStatement(the_call); // (*replacement_list).push_back (stmt); SgStatementPtrList* tnl = new SgStatementPtrList; (*tnl).push_back(stmt); setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl)); if (arrayVars[i].in_data) { SgExprListExp * cuda_copy_in_args = buildExprListExp(); cuda_copy_in_args->append_expression( isSgExpression(buildVarRefExp(dvs))); cuda_copy_in_args->append_expression( isSgExpression(buildVarRefExp(arrayVars[i].in_data))); CG_roseRepr* size_exp = new CG_roseRepr(arrayVars[i].size_expr); cuda_copy_in_args->append_expression( static_cast(size_exp->clone())->GetExpression()); cuda_copy_in_args->append_expression( buildOpaqueVarRefExp("cudaMemcpyHostToDevice", globals)); // cuda_copy_in_args->append_expression( // new SgVarRefExp(sourceLocation, ) // ); SgFunctionCallExp * cuda_copy_in_func_call = buildFunctionCallExp( buildFunctionRefExp(decl_cuda_copy), cuda_copy_in_args); SgExprStatement* stmt = buildExprStatement(cuda_copy_in_func_call); SgStatementPtrList *tnl = new SgStatementPtrList; (*tnl).push_back(stmt); setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl)); if(arrayVars[i].tex_mapped) { setupTexmappingVar(texture, &arrayVars[i], globals, i, dvs, symtab); SgStatementPtrList *tnl = new SgStatementPtrList; cudaBindTexture(texture, &arrayVars[i], globals, tnl); setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl)); } } } } //Build dimGrid dim3 variables based on loop dimentions and ti/tj char blockD1[120]; char blockD2[120]; if (dim1) { snprintf(blockD1, 120, "%s/%d", dim1->get_declaration()->get_name().getString().c_str(), cu_tx); snprintf(blockD2, 120, "%s/%d", dim2->get_declaration()->get_name().getString().c_str(), cu_ty); } else { snprintf(blockD1, 120, "%d", cu_bx); snprintf(blockD2, 120, "%d", cu_by); //snprintf(blockD1, 120, "%d/%d", cu_nx, cu_tx); //snprintf(blockD2, 120, "%d/%d", cu_ny, cu_ty); } SgInitializedName* arg1 = buildInitializedName("i", buildIntType()); SgInitializedName* arg2 = buildInitializedName("j", buildIntType()); SgInitializedName* arg3 = buildInitializedName("k", buildIntType()); SgName type_name("dim3"); //SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(type_name); //ROSE_ASSERT(type_symbol != NULL); //SgClassDeclaration * dim3classdecl = isSgClassDeclaration( // type_symbol->get_declaration()); SgFunctionDeclaration * funcdecl = buildNondefiningFunctionDeclaration( SgName("dim3"), buildOpaqueType("dim3", globalScope), //isSgType(dim3classdecl->get_type()), buildFunctionParameterList(arg1, arg2, arg3), globalScope); if (cu_bx && cu_by) repr = ocg->CreateDim3((const char*) gridName, ocg->CreateInt(cu_bx), ocg->CreateInt(cu_by)); else if (cu_bx_repr && cu_by_repr) repr = ocg->CreateDim3((const char*) gridName, cu_bx_repr, cu_by_repr); else if (cu_bx_repr) repr = ocg->CreateDim3((const char*) gridName, cu_bx_repr, ocg->CreateInt(1)); setup_code = ocg->StmtListAppend(setup_code, repr); //SgStatementPtrList* dimList = static_cast(repr)->GetList(); //for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++) // (*replacement_list).push_back (*it); // repr = ocg->CreateDim3((const char*)blockName, cu_tx,cu_ty); if (cu_tz > 1 || cu_tz_repr) { if (cu_tx && cu_ty && cu_tz) repr = ocg->CreateDim3((char*) blockName, ocg->CreateInt(cu_tx), ocg->CreateInt(cu_ty), ocg->CreateInt(cu_tz)); else if (cu_tx_repr && cu_ty_repr && cu_tz_repr) repr = ocg->CreateDim3((char*) blockName, cu_tx_repr, cu_ty_repr, cu_tz_repr); // SgStatementPtrList* dimList = static_cast(repr)->GetList(); // for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++) // (*replacement_list).push_back (*it); } else { if (cu_tx && cu_ty) repr = ocg->CreateDim3((char*) blockName, ocg->CreateInt(cu_tx), ocg->CreateInt(cu_ty)); else if (cu_tx_repr && cu_ty_repr) repr = ocg->CreateDim3((char*) blockName, cu_tx_repr, cu_ty_repr); //SgStatementPtrList* dimList = static_cast(repr)->GetList(); //for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++) // (*replacement_list).push_back (*it); } setup_code = ocg->StmtListAppend(setup_code, repr); SgCudaKernelExecConfig* config = new SgCudaKernelExecConfig( buildVarRefExp(gridName), buildVarRefExp(blockName), NULL, NULL); //SgCudaKernelExecConfig* config = new SgCudaKernelExecConfig(buildIntVal(cu_bx), , NULL, NULL); SgExprListExp* iml = new SgExprListExp(); SgCastExp* dim_s; //Creating Kernel function SgBasicBlock* bb = new SgBasicBlock(TRANSFORMATION_FILE_INFO); SgFunctionDefinition* kernel_defn = new SgFunctionDefinition( TRANSFORMATION_FILE_INFO, bb); SgFunctionDeclaration* kernel_decl_ = new SgFunctionDeclaration( TRANSFORMATION_FILE_INFO, SgName((char*)cu_kernel_name.c_str()),buildFunctionType(buildVoidType(), buildFunctionParameterList()), kernel_defn); SgFunctionDeclaration* kernel_decl = new SgFunctionDeclaration( TRANSFORMATION_FILE_INFO, SgName((char*)cu_kernel_name.c_str()),buildFunctionType(buildVoidType(), buildFunctionParameterList()), kernel_defn); //((kernel_decl->get_declarationModifier()).get_storageModifier()).setStatic(); kernel_decl->set_definingDeclaration(kernel_decl); kernel_defn->set_parent(kernel_decl); bb->set_parent(kernel_defn); bb->set_endOfConstruct(TRANSFORMATION_FILE_INFO); bb->get_endOfConstruct()->set_parent(bb); //SgFunctionSymbol* functionSymbol = new SgFunctionSymbol(kernel_decl_); //globals->insert_symbol(SgName((char*) cu_kernel_name.c_str()), // functionSymbol); SgFunctionSymbol* functionSymbol2 = new SgFunctionSymbol(kernel_decl); globals->insert_symbol(SgName((char*) cu_kernel_name.c_str()), functionSymbol2); kernel_decl_->set_parent(globals); kernel_decl_->set_scope(globals); kernel_decl_->setForward(); globals->prepend_declaration(kernel_decl_); kernel_decl->set_endOfConstruct(TRANSFORMATION_FILE_INFO); kernel_decl->get_endOfConstruct()->set_parent(kernel_decl); kernel_decl->set_parent(globals); kernel_decl->set_scope(globals); kernel_decl->get_definition()->set_endOfConstruct(TRANSFORMATION_FILE_INFO); kernel_decl->get_definition()->get_endOfConstruct()->set_parent( kernel_decl->get_definition()); globals->append_statement(kernel_decl); //printf("%s %s\n", static_cast(cu_kernel_name), dims); //--derick - kernel function parameters for (int i = 0; i < arrayVars.size(); i++) //Throw in a type cast if our kernel takes 2D array notation //like (float(*) [1024]) { //protonu--throwing in another hack to stop the caller from passing tex mapped //vars to the kernel. if (arrayVars[i].tex_mapped == true || arrayVars[i].cons_mapped) continue; if (!(arrayVars[i].size_multi_dim.empty())) { //snprintf(dims,120,"(float(*) [%d])%s", arrayVars[i].size_2d, // const_cast(arrayVars[i].name.c_str())); SgType* t = arrayVars[i].type; for (int k = arrayVars[i].size_multi_dim.size() - 1; k >= 0; k--) { t = buildArrayType(t, buildIntVal(arrayVars[i].size_multi_dim[k])); } SgVariableSymbol* temp = body_symtab->find_variable( SgName((char*) arrayVars[i].name.c_str())); if (temp == NULL) temp = parameter_symtab->find_variable( SgName((char*) arrayVars[i].name.c_str())); dim_s = buildCastExp(buildVarRefExp(temp), buildPointerType(t), SgCastExp::e_C_style_cast); //printf("%d %s\n", i, dims); iml->append_expression(dim_s); SgInitializedName* id = buildInitializedName( (char*) arrayVars[i].original_name.c_str(), buildPointerType(t)); kernel_decl->get_parameterList()->append_arg(id); kernel_decl_->get_parameterList()->append_arg(id); id->set_file_info(TRANSFORMATION_FILE_INFO); // DQ (9/8/2007): We now test this, so it has to be set explicitly. id->set_scope(kernel_decl->get_definition()); // DQ (9/8/2007): Need to add variable symbol to global scope! //printf ("Fixing up the symbol table in scope = %p = %s for SgInitializedName = %p = %s \n",globalScope,globalScope->class_name().c_str(),var1_init_name,var1_init_name->get_name().str()); SgVariableSymbol *var_symbol = new SgVariableSymbol(id); kernel_decl->get_definition()->insert_symbol(id->get_name(), var_symbol); // if(kernel_decl->get_definition()->get_symbol_table()->find((const) id) == NULL) } else { //printf("%d %s\n", i, static_cast(arrayVars[i].name)); SgVariableSymbol* temp = body_symtab->find_variable( SgName((char*) arrayVars[i].name.c_str())); if (temp == NULL) temp = parameter_symtab->find_variable( SgName((char*) arrayVars[i].name.c_str())); iml->append_expression(buildVarRefExp(temp)); SgInitializedName* id = buildInitializedName( (char*) arrayVars[i].original_name.c_str(), buildPointerType(arrayVars[i].type)); kernel_decl->get_parameterList()->append_arg(id); kernel_decl_->get_parameterList()->append_arg(id); id->set_file_info(TRANSFORMATION_FILE_INFO); // DQ (9/8/2007): We now test this, so it has to be set explicitly. id->set_scope(kernel_decl->get_definition()); // DQ (9/8/2007): Need to add variable symbol to global scope! //printf ("Fixing up the symbol table in scope = %p = %s for SgInitializedName = %p = %s \n"$ SgVariableSymbol *var_symbol = new SgVariableSymbol(id); kernel_decl->get_definition()->insert_symbol(id->get_name(), var_symbol); } } if (dim1) { iml->append_expression(buildVarRefExp(dim1)); SgInitializedName* id = buildInitializedName( dim1->get_name().getString().c_str(), dim1->get_type()); kernel_decl->get_parameterList()->append_arg(id); iml->append_expression(buildVarRefExp(dim2)); SgInitializedName* id2 = buildInitializedName( dim2->get_name().getString().c_str(), dim2->get_type()); kernel_decl->get_parameterList()->append_arg(id); kernel_decl_->get_parameterList()->append_arg(id); } kernel_decl->get_functionModifier().setCudaKernel(); kernel_decl_->get_functionModifier().setCudaKernel(); SgCudaKernelCallExp * cuda_call_site = new SgCudaKernelCallExp( TRANSFORMATION_FILE_INFO, buildFunctionRefExp(kernel_decl), iml,config); // SgStatementPtrList *tnl2 = new SgStatementPtrList; (*replacement_list).push_back(buildExprStatement(cuda_call_site)); setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(replacement_list)); //cuda free variables for (int i = 0; i < arrayVars.size(); i++) { if (arrayVars[i].out_data) { SgName name_cuda_copy("cudaMemcpy"); SgFunctionDeclaration * decl_cuda_copyout = buildNondefiningFunctionDeclaration(name_cuda_copy, buildVoidType(), buildFunctionParameterList(), globals); SgExprListExp* args = buildExprListExp(); SgExprListExp * cuda_copy_out_args = buildExprListExp(); cuda_copy_out_args->append_expression( isSgExpression(buildVarRefExp(arrayVars[i].out_data))); cuda_copy_out_args->append_expression( isSgExpression(buildVarRefExp(arrayVars[i].name))); CG_roseRepr* size_exp = new CG_roseRepr(arrayVars[i].size_expr); cuda_copy_out_args->append_expression( static_cast(size_exp->clone())->GetExpression()); cuda_copy_out_args->append_expression( buildOpaqueVarRefExp("cudaMemcpyDeviceToHost", globals)); // cuda_copy_in_args->append_expression( // new SgVarRefExp(sourceLocation, ) // ); SgFunctionCallExp * cuda_copy_out_func_call = buildFunctionCallExp( buildFunctionRefExp(decl_cuda_copyout), cuda_copy_out_args); SgFunctionCallExp *the_call = buildFunctionCallExp( buildFunctionRefExp(decl_cuda_copyout), cuda_copy_out_args); SgExprStatement* stmt = buildExprStatement(the_call); SgStatementPtrList* tnl3 = new SgStatementPtrList; (*tnl3).push_back(stmt); // tree_node_list* tnl = new tree_node_list; // tnl->append(new tree_instr(the_call)); setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl3)); } if(!arrayVars[i].cons_mapped) { SgName name_cuda_free("cudaFree"); SgFunctionDeclaration * decl_cuda_free = buildNondefiningFunctionDeclaration(name_cuda_free, buildVoidType(), buildFunctionParameterList(), globals); SgExprListExp* args3 = buildExprListExp(); SgVariableSymbol* tmp = body_symtab->find_variable( SgName(arrayVars[i].name.c_str())); if (tmp == NULL) tmp = parameter_symtab->find_variable( SgName(arrayVars[i].name.c_str())); args3->append_expression(buildVarRefExp(tmp)); SgFunctionCallExp *the_call2 = buildFunctionCallExp( buildFunctionRefExp(decl_cuda_free), args3); SgExprStatement* stmt2 = buildExprStatement(the_call2); SgStatementPtrList* tnl4 = new SgStatementPtrList; (*tnl4).push_back(stmt2); //(*replacement_list).push_back (stmt2); setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl4)); } } // --------------- // BUILD THE KERNEL // --------------- //Extract out kernel body SgNode* code = getCode(); //Create kernel function body //Add Params std::map loop_vars; //In-Out arrays for (int i = 0; i < arrayVars.size(); i++) { /* if(arrayVars[i].in_data) fptr = arrayVars[i].in_data->type()->clone(); else fptr = arrayVars[i].out_data->type()->clone(); */ // fptr = new_proc_syms->install_type(fptr); std::string name = arrayVars[i].in_data ? arrayVars[i].in_data->get_declaration()->get_name().getString() : arrayVars[i].out_data->get_declaration()->get_name().getString(); //SgVariableSymbol* sym = new var_sym(fptr, arrayVars[i].in_data ? arrayVars[i].in_data->name() : arrayVars[i].out_data->name()); SgVariableSymbol* sym = kernel_decl->get_definition()->get_symbol_table()->find_variable( (const char*) name.c_str()); /* SgVariableDeclaration* defn = buildVariableDeclaration(SgName(name.c_str()), buildFloatType()); SgInitializedNamePtrList& variables = defn->get_variables(); SgInitializedNamePtrList::const_iterator i = variables.begin(); SgInitializedName* initializedName = *i; SgVariableSymbol* sym = new SgVariableSymbol(initializedName); prependStatement(defn, isSgScopeStatement(root_)); vs->set_parent(symtab2_); symtab2_->insert(SgName(_s.c_str()), vs); */ if (sym != NULL) loop_vars.insert( std::pair(std::string(name), sym)); } //Figure out which loop variables will be our thread and block dimention variables std::vector loop_syms; //Get our indexes std::vector indexes; // = get_loop_indexes(code,cu_num_reduce); int threadsPos = 0; CG_outputRepr *body = NULL; SgFunctionDefinition* func_d = func_definition; //std::vector symbols = recursiveFindRefs(code); SgName name_sync("__syncthreads"); SgFunctionDeclaration * decl_sync = buildNondefiningFunctionDeclaration( name_sync, buildVoidType(), buildFunctionParameterList(), globalScope); recursiveFindRefs(code, syms, func_d); //SgFunctionDeclaration* func = Outliner::generateFunction (code, (char*)cu_kernel_name.c_str(), syms, pdSyms, psyms, NULL, globalScope); if (cu_bx > 1 || cu_bx_repr) { indexes.push_back("bx"); SgName type_name("blockIdx.x"); SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( type_name); SgVariableDeclaration * var_decl = buildVariableDeclaration("bx", buildIntType(), NULL, isSgScopeStatement(kernel_decl->get_definition()->get_body())); SgStatementPtrList *tnl = new SgStatementPtrList; // (*tnl).push_back(isSgStatement(var_decl)); appendStatement(var_decl, kernel_decl->get_definition()->get_body()); SgVariableSymbol* bx = kernel_decl->get_definition()->get_body()->lookup_variable_symbol( SgName("bx")); SgStatement* assign = isSgStatement( buildAssignStatement(buildVarRefExp(bx), buildOpaqueVarRefExp("blockIdx.x", kernel_decl->get_definition()->get_body()))); (*tnl).push_back(assign); // body = ocg->StmtListAppend(body, // new CG_roseRepr(tnl)); appendStatement(assign, kernel_decl->get_definition()->get_body()); } if (cu_by > 1 || cu_by_repr) { indexes.push_back("by"); SgName type_name("blockIdx.y"); SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( type_name); SgVariableDeclaration * var_decl = buildVariableDeclaration("by", buildIntType(), NULL, isSgScopeStatement(kernel_decl->get_definition()->get_body())); // SgStatementPtrList *tnl = new SgStatementPtrList; // (*tnl).push_back(isSgStatement(var_decl)); appendStatement(var_decl, kernel_decl->get_definition()->get_body()); SgVariableSymbol* by = kernel_decl->get_definition()->get_body()->lookup_variable_symbol( SgName("by")); SgStatement* assign = isSgStatement( buildAssignStatement(buildVarRefExp(by), buildOpaqueVarRefExp("blockIdx.y", kernel_decl->get_definition()->get_body()))); //(*tnl).push_back(assign); // body = ocg->StmtListAppend(body, // new CG_roseRepr(tnl)); appendStatement(assign, kernel_decl->get_definition()->get_body()); } if (cu_tx_repr || cu_tx > 1) { threadsPos = indexes.size(); indexes.push_back("tx"); SgName type_name("threadIdx.x"); SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( type_name); SgVariableDeclaration * var_decl = buildVariableDeclaration("tx", buildIntType(), NULL, isSgScopeStatement(kernel_decl->get_definition()->get_body())); // SgStatementPtrList *tnl = new SgStatementPtrList; // (*tnl).push_back(isSgStatement(var_decl)); appendStatement(var_decl, kernel_decl->get_definition()->get_body()); SgVariableSymbol* tx = kernel_decl->get_definition()->get_body()->lookup_variable_symbol( SgName("tx")); SgStatement* assign = isSgStatement( buildAssignStatement(buildVarRefExp(tx), buildOpaqueVarRefExp("threadIdx.x", kernel_decl->get_definition()->get_body()))); //(*tnl).push_back(assign); // body = ocg->StmtListAppend(body, // new CG_roseRepr(tnl)); appendStatement(assign, kernel_decl->get_definition()->get_body()); } if (cu_ty_repr || cu_ty > 1) { indexes.push_back("ty"); SgName type_name("threadIdx.y"); SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( type_name); SgVariableDeclaration * var_decl = buildVariableDeclaration("ty", buildIntType(), NULL, isSgScopeStatement(kernel_decl->get_definition()->get_body())); appendStatement(var_decl, kernel_decl->get_definition()->get_body()); // SgStatementPtrList *tnl = new SgStatementPtrList; // (*tnl).push_back(isSgStatement(var_decl)); SgVariableSymbol* ty = kernel_decl->get_definition()->get_body()->lookup_variable_symbol( SgName("ty")); SgStatement* assign = isSgStatement( buildAssignStatement(buildVarRefExp(ty), buildOpaqueVarRefExp("threadIdx.y", kernel_decl->get_definition()->get_body()))); // (*tnl).push_back(assign); // body = ocg->StmtListAppend(body, // new CG_roseRepr(tnl)); appendStatement(assign, kernel_decl->get_definition()->get_body()); } if (cu_tz_repr || cu_tz > 1) { indexes.push_back("tz"); SgName type_name("threadIdx.z"); SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( type_name); SgVariableDeclaration * var_decl = buildVariableDeclaration("tz", buildIntType(), NULL, isSgScopeStatement(kernel_decl->get_definition()->get_body())); // SgStatementPtrList *tnl = new SgStatementPtrList; // (*tnl).push_back(isSgStatement(var_decl)); appendStatement(var_decl, kernel_decl->get_definition()->get_body()); SgVariableSymbol* tz = kernel_decl->get_definition()->get_body()->lookup_variable_symbol( "tz"); SgStatement* assign = isSgStatement( buildAssignStatement(buildVarRefExp(tz), buildOpaqueVarRefExp("threadIdx.z", kernel_decl->get_definition()->get_body()))); // (*tnl).push_back(assign); // body = ocg->StmtListAppend(body, // new CG_roseRepr(tnl)); appendStatement(assign, kernel_decl->get_definition()->get_body()); } std::map loop_idxs; //map from idx names to their new syms SgNode* swapped_ = swapVarReferences(code, syms, kernel_decl->get_definition()->get_symbol_table(), kernel_decl->get_definition()->get_body()->get_symbol_table(), kernel_decl->get_definition()->get_body()); //std::cout << swapped_->unparseToString() << std::endl << std::endl; SgNode *swapped = recursiveFindReplacePreferedIdxs(swapped_, kernel_decl->get_definition()->get_body()->get_symbol_table(), kernel_decl->get_definition()->get_symbol_table(), kernel_decl->get_definition()->get_body(), loop_idxs, globalScope); //in-place swapping //swapped->print(); if (!isSgBasicBlock(swapped)) { appendStatement(isSgStatement(swapped), kernel_decl->get_definition()->get_body()); swapped->set_parent( isSgNode(kernel_decl->get_definition()->get_body())); } else { for (SgStatementPtrList::iterator it = isSgBasicBlock(swapped)->get_statements().begin(); it != isSgBasicBlock(swapped)->get_statements().end(); it++) { appendStatement(*it, kernel_decl->get_definition()->get_body()); (*it)->set_parent( isSgNode(kernel_decl->get_definition()->get_body())); } } for (int i = 0; i < indexes.size(); i++) { std::vector tfs = findCommentedFors(indexes[i], swapped); for (int k = 0; k < tfs.size(); k++) { //printf("replacing %p tfs for index %s\n", tfs[k], indexes[i]); SgNode* newBlock = forReduce(tfs[k], loop_idxs[indexes[i]], kernel_decl->get_definition()); //newBlock->print(); swap_node_for_node_list(tfs[k], newBlock); //printf("AFTER SWAP\n"); newBlock->print(); } } //--derick replace array refs of texture mapped vars here body = new CG_roseRepr(kernel_decl->get_definition()->get_body()); std::vector refs = ir->FindArrayRef(body); texmapArrayRefs(texture, &refs, globals, ir, ocg); // do the same for constant mapped vars consmapArrayRefs(constant_mem, &refs, globals, ir, ocg); return swapped; } //Order taking out dummy variables std::vector cleanOrder(std::vector idxNames) { std::vector results; for (int j = 0; j < idxNames.size(); j++) { if (idxNames[j].length() != 0) results.push_back(idxNames[j]); } return results; } //First non-dummy level in ascending order int LoopCuda::nonDummyLevel(int stmt, int level) { //level comes in 1-basd and should leave 1-based for (int j = level - 1; j < idxNames[stmt].size(); j++) { if (idxNames[stmt][j].length() != 0) { //printf("found non dummy level of %d with idx: %s when searching for %d\n", j+1, (const char*) idxNames[stmt][j], level); return j + 1; } } char buf[128]; sprintf(buf, "%d", level); throw std::runtime_error( std::string("Unable to find a non-dummy level starting from ") + std::string(buf)); } int LoopCuda::findCurLevel(int stmt, std::string idx) { for (int j = 0; j < idxNames[stmt].size(); j++) { if (strcmp(idxNames[stmt][j].c_str(), idx.c_str()) == 0) return j + 1; } throw std::runtime_error( std::string("Unable to find index ") + idx + std::string(" in current list of indexes")); } void LoopCuda::permute_cuda(int stmt, const std::vector& curOrder) { //printf("curOrder: "); //printVs(curOrder); //printf("idxNames: "); //printVS(idxNames[stmt]); std::vector cIdxNames = cleanOrder(idxNames[stmt]); bool same = true; std::vector pi; for (int i = 0; i < curOrder.size(); i++) { bool found = false; for (int j = 0; j < cIdxNames.size(); j++) { if (strcmp(cIdxNames[j].c_str(), curOrder[i].c_str()) == 0) { pi.push_back(j + 1); found = true; if (j != i) same = false; } } if (!found) { throw std::runtime_error( "One of the indexes in the permute order were not " "found in the current set of indexes."); } } for (int i = curOrder.size(); i < cIdxNames.size(); i++) { pi.push_back(i); } if (same) return; permute(stmt, pi); //Set old indexe names as new for (int i = 0; i < curOrder.size(); i++) { idxNames[stmt][i] = curOrder[i].c_str(); //what about sibling stmts? } } bool LoopCuda::permute(int stmt_num, const std::vector &pi) { // check for sanity of parameters if (stmt_num >= stmt.size() || stmt_num < 0) throw std::invalid_argument("invalid statement " + to_string(stmt_num)); const int n = stmt[stmt_num].xform.n_out(); if (pi.size() > (n - 1) / 2) throw std::invalid_argument( "iteration space dimensionality does not match permute dimensionality"); int first_level = 0; int last_level = 0; for (int i = 0; i < pi.size(); i++) { if (pi[i] > (n - 1) / 2 || pi[i] <= 0) throw std::invalid_argument( "invalid loop level " + to_string(pi[i]) + " in permuation"); if (pi[i] != i + 1) { if (first_level == 0) first_level = i + 1; last_level = i + 1; } } if (first_level == 0) return true; std::vector lex = getLexicalOrder(stmt_num); std::set active = getStatements(lex, 2 * first_level - 2); Loop::permute(active, pi); } void LoopCuda::tile_cuda(int stmt, int level, int outer_level) { tile_cuda(stmt, level, 1, outer_level, "", "", CountedTile); } void LoopCuda::tile_cuda(int level, int tile_size, int outer_level, std::string idxName, std::string ctrlName, TilingMethodType method) { tile_cuda(0, level, tile_size, outer_level, idxName, ctrlName, method); } void LoopCuda::tile_cuda(int stmt, int level, int tile_size, int outer_level, std::string idxName, std::string ctrlName, TilingMethodType method) { //Do regular tile but then update the index and control loop variable //names as well as the idxName to reflect the current state of things. //printf("tile(%d,%d,%d,%d)\n", stmt, level, tile_size, outer_level); //printf("idxNames before: "); //printVS(idxNames[stmt]); tile(stmt, level, tile_size, outer_level, method); if (idxName.size()) idxNames[stmt][level - 1] = idxName.c_str(); if (tile_size == 1) { //potentially rearrange loops if (outer_level < level) { std::string tmp = idxNames[stmt][level - 1]; for (int i = level - 1; i > outer_level - 1; i--) { if (i - 1 >= 0) idxNames[stmt][i] = idxNames[stmt][i - 1]; } idxNames[stmt][outer_level - 1] = tmp; } //TODO: even with a tile size of one, you need a insert (of a dummy loop) idxNames[stmt].insert(idxNames[stmt].begin() + (level), ""); } else { if (!ctrlName.size()) throw std::runtime_error("No ctrl loop name for tile"); //insert idxNames[stmt].insert(idxNames[stmt].begin() + (outer_level - 1), ctrlName.c_str()); } //printf("idxNames after: "); //printVS(idxNames[stmt]); } bool LoopCuda::datacopy_privatized_cuda(int stmt_num, int level, const std::string &array_name, const std::vector &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, bool cuda_shared) { int old_stmts = stmt.size(); // printf("before datacopy_privatized:\n"); printIS(); //datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, cuda_shared); if (cuda_shared) datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, 1); else datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, 0); // printf("after datacopy_privatized:\n"); printIS(); //Adjust idxNames to reflect updated state std::vector cIdxNames = cleanOrder(idxNames[stmt_num]); int new_stmts = stmt.size(); for (int i = old_stmts; i < new_stmts; i++) { //printf("fixing up statement %d\n", i); std::vector idxs; //protonu-making sure the vector of nonSplitLevels grows along with //the statement structure stmt_nonSplitLevels.push_back(std::vector()); //Indexes up to level will be the same for (int j = 0; j < level - 1; j++) idxs.push_back(cIdxNames[j]); //Expect privatized_levels to match for (int j = 0; j < privatized_levels.size(); j++) idxs.push_back(cIdxNames[privatized_levels[j] - 1]);//level is one-based //all further levels should match order they are in originally if (privatized_levels.size()) { int last_privatized = privatized_levels.back(); int top_level = last_privatized + (stmt[i].IS.n_set() - idxs.size()); //printf("last privatized_levels: %d top_level: %d\n", last_privatized, top_level); for (int j = last_privatized; j < top_level; j++) { idxs.push_back(cIdxNames[j]); //printf("pushing back: %s\n", (const char*)cIdxNames[j]); } } idxNames.push_back(idxs); } } bool LoopCuda::datacopy_cuda(int stmt_num, int level, const std::string &array_name, const std::vector new_idxs, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, bool cuda_shared) { int old_stmts = stmt.size(); //datacopy(stmt_num,level,array_name,allow_extra_read,fastest_changing_dimension,padding_stride,padding_alignment,cuda_shared); // printf("before datacopy:\n"); // printIS(); if (cuda_shared) datacopy(stmt_num, level, array_name, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, 1); else datacopy(stmt_num, level, array_name, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, 0); // printf("after datacopy:\n"); printIS(); //Adjust idxNames to reflect updated state std::vector cIdxNames = cleanOrder(idxNames[stmt_num]); int new_stmts = stmt.size(); for (int i = old_stmts; i < new_stmts; i++) { //printf("fixing up statement %d\n", i); std::vector idxs; //protonu-making sure the vector of nonSplitLevels grows along with //the statement structure stmt_nonSplitLevels.push_back(std::vector()); //Indexes up to level will be the same for (int j = 0; j < level - 1; j++) idxs.push_back(cIdxNames[j]); //all further levels should get names from new_idxs int top_level = stmt[i].IS.n_set(); //printf("top_level: %d level: %d\n", top_level, level); if (new_idxs.size() < top_level - level + 1) throw std::runtime_error( "Need more new index names for new datacopy loop levels"); for (int j = level - 1; j < top_level; j++) { idxs.push_back(new_idxs[j - level + 1].c_str()); //printf("pushing back: %s\n", new_idxs[j-level+1].c_str()); } idxNames.push_back(idxs); } } bool LoopCuda::unroll_cuda(int stmt_num, int level, int unroll_amount) { int old_stmts = stmt.size(); //bool b= unroll(stmt_num, , unroll_amount); int dim = 2 * level - 1; std::vector lex = getLexicalOrder(stmt_num); std::set same_loop = getStatements(lex, dim - 1); level = nonDummyLevel(stmt_num, level); //printf("unrolling %d at level %d\n", stmt_num,level); //protonu--using the new version of unroll, which returns //a set of ints instead of a bool. To keep Gabe's logic //I'll check the size of the set, if it's 0 return true //bool b= unroll(stmt_num, level, unroll_amount); std::set b_set = unroll(stmt_num, level, unroll_amount, idxNames); bool b = false; if (b_set.size() == 0) b = true; //end--protonu //Adjust idxNames to reflect updated state std::vector cIdxNames = cleanOrder(idxNames[stmt_num]); std::vector origSource = idxNames[stmt_num]; ; //Drop index names at level if (unroll_amount == 0) { //For all statements that were in this unroll together, drop index name for unrolled level idxNames[stmt_num][level - 1] = ""; for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { //printf("in same loop as %d is %d\n", stmt_num, (*i)); //idxNames[(*i)][level-1] = ""; idxNames[(*i)] = idxNames[stmt_num]; } } lex = getLexicalOrder(stmt_num); same_loop = getStatements(lex, dim - 1); bool same_as_source = false; int new_stmts = stmt.size(); for (int i = old_stmts; i < new_stmts; i++) { //Check whether we had a sync for the statement we are unrolling, if //so, propogate that to newly created statements so that if they are //in a different loop structure, they will also get a syncthreads int size = syncs.size(); for (int j = 0; j < size; j++) { if (syncs[j].first == stmt_num) syncs.push_back(make_pair(i, syncs[j].second)); } //protonu-making sure the vector of nonSplitLevels grows along with //the statement structure stmt_nonSplitLevels.push_back(std::vector()); //We expect that new statements have a constant for the variable in //stmt[i].IS at level (as seen with print_with_subs), otherwise there //will be a for loop at level and idxNames should match stmt's //idxNames pre-unrolled Relation IS = stmt[i].IS; //Ok, if you know how the hell to get anything out of a Relation, you //should probably be able to do this more elegantly. But for now, I'm //hacking it. std::string s = IS.print_with_subs_to_string(); //s looks looks like //{[_t49,8,_t51,_t52,128]: 0 <= _t52 <= 3 && 0 <= _t51 <= 15 && 0 <= _t49 && 64_t49+16_t52+_t51 <= 128} //where level == 5, you see a integer in the input set //If that's not an integer and this is the first new statement, then //we think codegen will have a loop at that level. It's not perfect, //not sure if it can be determined without round-tripping to codegen. int sIdx = 0; int eIdx = 0; for (int j = 0; j < level - 1; j++) { sIdx = s.find(",", sIdx + 1); if (sIdx < 0) break; } if (sIdx > 0) { eIdx = s.find("]"); int tmp = s.find(",", sIdx + 1); if (tmp > 0 && tmp < eIdx) eIdx = tmp; //", before ]" if (eIdx > 0) { sIdx++; std::string var = s.substr(sIdx, eIdx - sIdx); //printf("%s\n", s.c_str()); //printf("set var for stmt %d at level %d is %s\n", i, level, var.c_str()); if (atoi(var.c_str()) == 0 && i == old_stmts) { //TODO:Maybe do see if this new statement would be in the same //group as the original and if it would, don't say //same_as_source if (same_loop.find(i) == same_loop.end()) { printf( "stmt %d level %d, newly created unroll statement should have same level indexes as source\n", i, level); same_as_source = true; } } } } //printf("fixing up statement %d n_set %d with %d levels\n", i, stmt[i].IS.n_set(), level-1); if (same_as_source) idxNames.push_back(origSource); else idxNames.push_back(idxNames[stmt_num]); } return b; } void LoopCuda::copy_to_texture(const char *array_name) { //protonu--placeholder for now //set the bool for using cuda memory as true //in a vector of strings, put the names of arrays to tex mapped if (!texture) texture = new texture_memory_mapping(true, array_name); else texture->add(array_name); } //void LoopCuda::copy_to_texture_2d(const char *array_name, int width, int height) { // if (!texture) // texture = new texture_memory_mapping(true, array_name, width, height); // else // texture->add(array_name, width, height); //} void LoopCuda::copy_to_constant(const char *array_name) { if(!constant_mem) constant_mem = new constant_memory_mapping(true, array_name); else constant_mem->add(array_name); } //protonu--moving this from Loop SgNode* LoopCuda::codegen() { if (code_gen_flags & GenCudaizeV2) return cudaize_codegen_v2(); //Do other flagged codegen methods, return plain vanilla generated code return getCode(); } //These three are in Omega code_gen.cc and are used as a massive hack to //get out some info from MMGenerateCode. Yea for nasty side-effects. namespace omega { extern int checkLoopLevel; extern int stmtForLoopCheck; extern int upperBoundForLevel; extern int lowerBoundForLevel; } CG_outputRepr* LoopCuda::extractCudaUB(int stmt_num, int level, int &outUpperBound, int &outLowerBound) { // check for sanity of parameters const int m = stmt.size(); if (stmt_num >= m || stmt_num < 0) throw std::invalid_argument("invalid statement " + to_string(stmt_num)); const int n = stmt[stmt_num].xform.n_out(); if (level > (n - 1) / 2 || level <= 0) throw std::invalid_argument("invalid loop level " + to_string(level)); int dim = 2 * level - 1; std::vector lex = getLexicalOrder(stmt_num); std::set same_loop = getStatements(lex, dim - 1); // extract the intersection of the iteration space to be considered Relation hull; { hull = Relation::True(n); for (std::set::iterator i = same_loop.begin(); i != same_loop.end(); i++) { Relation r = getNewIS(*i); for (int j = dim + 2; j <= r.n_set(); j++) r = Project(r, r.set_var(j)); hull = Intersection(hull, r); hull.simplify(2, 4); } for (int i = 2; i <= dim + 1; i += 2) { //std::string name = std::string("_t") + to_string(t_counter++); std::string name = std::string("_t") + to_string(tmp_loop_var_name_counter++); hull.name_set_var(i, name); } hull.setup_names(); } // extract the exact loop bound of the dimension to be unrolled if (is_single_iteration(hull, dim)) { throw std::runtime_error( "No loop availabe at level to extract upper bound."); } Relation bound = get_loop_bound(hull, dim); if (!bound.has_single_conjunct() || !bound.is_satisfiable() || bound.is_tautology()) throw loop_error( "loop error: unable to extract loop bound for cudaize"); // extract the loop stride EQ_Handle stride_eq; /*int stride = 1; { bool simple_stride = true; int strides = countStrides(bound.query_DNF()->single_conjunct(), bound.set_var(dim + 1), stride_eq, simple_stride); if (strides > 1) throw loop_error("loop error: too many strides"); else if (strides == 1) { int sign = stride_eq.get_coef(bound.set_var(dim + 1)); // assert(sign == 1 || sign == -1); Constr_Vars_Iter it(stride_eq, true); stride = abs((*it).coef / sign); } } */ int stride = 1; { coef_t stride; std::pair result = find_simplest_stride(bound, bound.set_var(dim + 1)); if (result.second == NULL) stride = 1; else stride = abs(result.first.get_coef(result.second)) / gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(dim + 1)))); if (stride > 1) throw loop_error("loop error: too many strides"); /*else if (stride == 1) { int sign = result.first.get_coef(bound.set_var(dim+1)); assert(sign == 1 || sign == -1); } */ } if (stride != 1) { char buf[1024]; sprintf(buf, "Cudaize: Loop at level %d has non-one stride of %d", level, stride); throw std::runtime_error(buf); } //Use code generation system to build tell us our bound information. We //need a hard upper bound a 0 lower bound. checkLoopLevel = level * 2; stmtForLoopCheck = stmt_num; upperBoundForLevel = -1; lowerBoundForLevel = -1; printCode(1, false); checkLoopLevel = 0; outUpperBound = upperBoundForLevel; outLowerBound = lowerBoundForLevel; if (outUpperBound == -1) { CG_result* temp = last_compute_cgr_; while (temp) { CG_loop * loop; if (loop = dynamic_cast(temp)) { if (loop->level_ == 2 * level) { Relation bound = copy(loop->bounds_); Variable_ID v = bound.set_var(2 * level); for (GEQ_Iterator e( const_cast(bound).single_conjunct()->GEQs()); e; e++) { if ((*e).get_coef(v) < 0 && (*e).is_const_except_for_global(v)) return output_upper_bound_repr(ir->builder(), *e, v, bound, std::vector >( bound.n_set(), std::make_pair( static_cast(NULL), 0))); } } if (loop->level_ > 2 * level) break; else temp = loop->body_; } else break; } } return NULL; } void LoopCuda::printCode(int effort, bool actuallyPrint) const { const int m = stmt.size(); if (m == 0) return; const int n = stmt[0].xform.n_out(); /*or (int i = 0; i < m; i++) { IS[i + 1] = stmt[i].IS; xform[i + 1] = stmt[i].xform; //nonSplitLevels[i+1] = stmt[i].nonSplitLevels; } */ // invalidate saved codegen computation if (last_compute_cgr_ != NULL) { delete last_compute_cgr_; last_compute_cgr_ = NULL; } if (last_compute_cg_ != NULL) { delete last_compute_cg_; last_compute_cg_ = NULL; } //Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); /*CG_stringBuilder *ocg = new CG_stringBuilder(); Tuple nameInfo; for (int i = 1; i <= m; i++) nameInfo.append(new CG_stringRepr("s" + to_string(i))); */ // -- replacing MMGenerateCode // -- formally CG_outputRepr* repr = MMGenerateCode(ocg, xform, IS, nameInfo, known, nonSplitLevels, syncs, idxTupleNames, effort); // -- in the future, these if statements need to be cleaned up. // -- something like check_lastComputeCG might be a decent protected member function // -- and/or something that returns a std::vector that also checks last_compute_cg_ //if (last_compute_cg_ == NULL) { std::vector IS(m); std::vector xforms(m); std::vector > nonSplitLevels(m); /* std::vector < std::vector > idxTupleNames; if (useIdxNames) { for (int i = 0; i < idxNames.size(); i++) { Tuple idxs; for (int j = 0; j < idxNames[i].size(); j++) idxs.append(idxNames[i][j]); idxTupleNames.append(idxs); } } */ for (int i = 0; i < m; i++) { IS[i] = stmt[i].IS; xforms[i] = stmt[i].xform; nonSplitLevels[i] = stmt_nonSplitLevels[i]; } Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); last_compute_cg_ = new CodeGen(xforms, IS, known, nonSplitLevels, idxNames, syncs); delete last_compute_cgr_; // this was just done above? last_compute_cgr_ = NULL; //} if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { delete last_compute_cgr_; last_compute_cgr_ = last_compute_cg_->buildAST(effort); last_compute_effort_ = effort; } //std::vector stmts(m); //for (int i = 0; i < m; i++) // stmts[i] = stmt[i].code; //CG_outputRepr* repr = last_compute_cgr_->printRepr(ocg, stmts); // -- end replacing MMGenerateCode std::string repr = last_compute_cgr_->printString(); if (actuallyPrint) std::cout << repr << std::endl; //std::cout << static_cast(repr)->GetString(); /* for (int i = 1; i <= m; i++) delete nameInfo[i]; */ //delete ocg; } void LoopCuda::printRuntimeInfo() const { for (int i = 0; i < stmt.size(); i++) { Relation IS = stmt[i].IS; Relation xform = stmt[i].xform; printf("stmt[%d]\n", i); printf("IS\n"); IS.print_with_subs(); printf("xform[%d]\n", i); xform.print_with_subs(); } } void LoopCuda::printIndexes() const { for (int i = 0; i < stmt.size(); i++) { printf("stmt %d nset %d ", i, stmt[i].IS.n_set()); for (int j = 0; j < idxNames[i].size(); j++) { if (j > 0) printf(","); printf("%s", idxNames[i][j].c_str()); } printf("\n"); } } SgNode* LoopCuda::getCode(int effort) const { const int m = stmt.size(); if (m == 0) return new SgNode; const int n = stmt[0].xform.n_out(); /* Tuple ni(m); Tuple < Relation > IS(m); Tuple < Relation > xform(m); vector < vector > nonSplitLevels(m); for (int i = 0; i < m; i++) { ni[i + 1] = stmt[i].code; IS[i + 1] = stmt[i].IS; xform[i + 1] = stmt[i].xform; nonSplitLevels[i + 1] = stmt_nonSplitLevels[i]; //nonSplitLevels[i+1] = stmt[i].nonSplitLevels; } */ //Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); //#ifdef DEBUG //#endif //std::cout << GetString(MMGenerateCode(new CG_stringBuilder(), xform, IS, ni, known, // nonSplitLevels, syncs, idxTupleNames, effort)); if (last_compute_cgr_ != NULL) { delete last_compute_cgr_; last_compute_cgr_ = NULL; } if (last_compute_cg_ != NULL) { delete last_compute_cg_; last_compute_cg_ = NULL; } CG_outputBuilder *ocg = ir->builder(); // -- replacing MMGenerateCode // -- formally CG_outputRepr* repr = MMGenerateCode(ocg, xform, IS, nameInfo, known, nonSplitLevels, syncs, idxTupleNames, effort); // -- in the future, these if statements need to be cleaned up. // -- something like check_lastComputeCG might be a decent protected member function // -- and/or something that returns a std::vector that also checks last_compute_cg_ //if (last_compute_cg_ == NULL) { std::vector IS(m); std::vector xforms(m); std::vector > nonSplitLevels(m); for (int i = 0; i < m; i++) { IS[i] = stmt[i].IS; xforms[i] = stmt[i].xform; nonSplitLevels[i] = stmt_nonSplitLevels[i]; } /*std::vector < std::vector > idxTupleNames; if (useIdxNames) { for (int i = 0; i < idxNames.size(); i++) { std::vector idxs; for (int j = 0; j < idxNames[i].size(); j++) idxs.push_back(idxNames[i][j]); idxTupleNames.push_back(idxs); } } */ Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); last_compute_cg_ = new CodeGen(xforms, IS, known, nonSplitLevels, idxNames, syncs); delete last_compute_cgr_; last_compute_cgr_ = NULL; //} if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { delete last_compute_cgr_; last_compute_cgr_ = last_compute_cg_->buildAST(effort); last_compute_effort_ = effort; } std::vector stmts(m); for (int i = 0; i < m; i++) stmts[i] = stmt[i].code; CG_outputRepr* repr = last_compute_cgr_->printRepr(ocg, stmts); // -- end replacing MMGenerateCode //CG_outputRepr *overflow_initialization = ocg->CreateStmtList(); CG_outputRepr *overflow_initialization = ocg->StmtListAppend(NULL, NULL); for (std::map >::const_iterator i = overflow.begin(); i != overflow.end(); i++) for (std::vector::const_iterator j = i->second.begin(); j != i->second.end(); j++) //overflow_initialization = ocg->StmtListAppend(overflow_initialization, ocg->CreateStmtList(ocg->CreateAssignment(0, ocg->CreateIdent((*j)->base_name()), ocg->CreateInt(0)))); overflow_initialization = ocg->StmtListAppend( overflow_initialization, ocg->StmtListAppend( ocg->CreateAssignment(0, ocg->CreateIdent((*j)->base_name()), ocg->CreateInt(0)), NULL)); repr = ocg->StmtListAppend(overflow_initialization, repr); SgNode *tnl = static_cast(repr)->GetCode(); SgStatementPtrList *list = static_cast(repr)->GetList(); if (tnl != NULL) return tnl; else if (tnl == NULL && list != NULL) { SgBasicBlock* bb2 = buildBasicBlock(); for (SgStatementPtrList::iterator it = (*list).begin(); it != (*list).end(); it++) bb2->append_statement(*it); tnl = isSgNode(bb2); } else throw loop_error("codegen failed"); delete repr; /* for (int i = 1; i <= m; i++) delete ni[i]; */ return tnl; } //protonu--adding constructors for the new derived class LoopCuda::LoopCuda() : Loop(), code_gen_flags(GenInit) { } LoopCuda::LoopCuda(IR_Control *irc, int loop_num) : Loop(irc) { setup_code = NULL; teardown_code = NULL; code_gen_flags = 0; cu_bx = cu_by = cu_tx = cu_ty = cu_tz = 1; cu_bx_repr = NULL; cu_tx_repr = NULL; cu_by_repr = NULL; cu_ty_repr = NULL; cu_tz_repr = NULL; cu_num_reduce = 0; cu_mode = GlobalMem; texture = NULL; constant_mem = NULL; int m = stmt.size(); //printf("\n the size of stmt(initially) is: %d\n", stmt.size()); for (int i = 0; i < m; i++) stmt_nonSplitLevels.push_back(std::vector()); globals = ((IR_cudaroseCode *) ir)->gsym_; globalScope = ((IR_cudaroseCode *) ir)->first_scope; parameter_symtab = ((IR_cudaroseCode *) ir)->parameter; body_symtab = ((IR_cudaroseCode *) ir)->body; func_body = ((IR_cudaroseCode *) ir)->defn; func_definition = ((IR_cudaroseCode *) ir)->func_defn; std::vector tf = ((IR_cudaroseCode *) ir)->get_loops(); symtab = tf[loop_num]->get_symbol_table(); std::vector deepest = find_deepest_loops( isSgNode(tf[loop_num])); for (int i = 0; i < deepest.size(); i++) { SgVariableSymbol* vs; SgForInitStatement* list = deepest[i]->get_for_init_stmt(); SgStatementPtrList& initStatements = list->get_init_stmt(); SgStatementPtrList::const_iterator j = initStatements.begin(); if (SgExprStatement *expr = isSgExprStatement(*j)) if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) if (SgVarRefExp* var_ref = isSgVarRefExp(op->get_lhs_operand())) vs = var_ref->get_symbol(); index.push_back(vs->get_name().getString().c_str()); //reflects original code index names } for (int i = 0; i < stmt.size(); i++) idxNames.push_back(index); //refects prefered index names (used as handles in cudaize v2) useIdxNames = false; } void LoopCuda::printIS() { if (!cudaDebug) return; int k = stmt.size(); for (int i = 0; i < k; i++) { printf(" printing statement:%d\n", i); stmt[i].IS.print(); } }