summaryrefslogtreecommitdiff
path: root/loop_cuda_rose.cc
diff options
context:
space:
mode:
authorDerick Huth <derickhuth@gmail.com>2015-09-24 12:22:41 -0600
committerDerick Huth <derickhuth@gmail.com>2015-09-24 12:22:41 -0600
commit4631ad76927d433da5d55c3c373a1dfd0f74c9d4 (patch)
treef8dcba88576ec95e403f0c14efd80e970f30a260 /loop_cuda_rose.cc
parent6eb2b89896da66a77d0dcdf2d72b98c122826949 (diff)
parent0cff3f9a3c4ccd434900162ebef4bd814850f481 (diff)
downloadchill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.tar.gz
chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.tar.bz2
chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.zip
Merge pull request #7 from dhuth/master
V0.2.1
Diffstat (limited to 'loop_cuda_rose.cc')
-rw-r--r--loop_cuda_rose.cc3734
1 files changed, 0 insertions, 3734 deletions
diff --git a/loop_cuda_rose.cc b/loop_cuda_rose.cc
deleted file mode 100644
index c5633ee..0000000
--- a/loop_cuda_rose.cc
+++ /dev/null
@@ -1,3734 +0,0 @@
-/*****************************************************************************
- Copyright (C) 2009 University of Utah
- All Rights Reserved.
-
- Purpose:
- Cudaize methods
-
- Notes:
-
- History:
- 1/7/10 Created by Gabe Rudy by migrating code from loop.cc
- 31/1/11 Modified by Protonu Basu
-*****************************************************************************/
-#define TRANSFORMATION_FILE_INFO Sg_File_Info::generateDefaultFileInfoForTransformationNode()
-#include <code_gen/CG_stringBuilder.h>
-#include <codegen.h>
-#include <code_gen/CG_utils.h>
-#include <code_gen/CG_outputRepr.h>
-#include "loop_cuda_rose.hh"
-#include "loop.hh"
-#include <math.h>
-//#include <useful.h>
-#include "omegatools.hh"
-#include "ir_cudarose.hh"
-#include "ir_rose.hh"
-#include "ir_rose_utils.hh"
-#include "chill_error.hh"
-#include <vector>
-#include "Outliner.hh"
-//#define DEBUG
-using namespace omega;
-using namespace SageBuilder;
-using namespace SageInterface;
-//using namespace Outliner;
-//using namespace ASTtools;
-char *k_cuda_texture_memory; //protonu--added to track texture memory type
-//extern char *omega::k_cuda_texture_memory; //protonu--added to track texture memory type
-extern char *omega::k_ocg_comment;
-
-static int cudaDebug;
-class CudaStaticInit {
-public:
- CudaStaticInit() {
- cudaDebug = 0; //Change this to 1 for debug
- }
-};
-static CudaStaticInit junkInitInstance__;
-
-std::string& upcase(std::string& s) {
- for (int i = 0; i < s.size(); i++)
- s[i] = toupper(s[i]);
- return s;
-}
-
-void printVs(const std::vector<std::string>& curOrder) {
- if (!cudaDebug) return;
- for (int i = 0; i < curOrder.size(); i++) {
- if (i > 0)
- printf(",");
- printf("%s", curOrder[i].c_str());
- }
- printf("\n");
-}
-
-void printVS(const std::vector<std::string>& curOrder) {
- if(!cudaDebug) return;
- for (int i = 0; i < curOrder.size(); i++) {
- if (i > 0)
- printf(",");
- printf("%s", curOrder[i].c_str());
- }
- printf("\n");
-}
-
-LoopCuda::~LoopCuda() {
- const int m = stmt.size();
- for (int i = 0; i < m; i++)
- stmt[i].code->clear();
-}
-
-bool LoopCuda::symbolExists(std::string s) {
-
- if (body_symtab->find_variable(SgName(s.c_str()))
- || parameter_symtab->find_variable(SgName(s.c_str())))
- return true;
- if (globals->lookup_variable_symbol(SgName(s.c_str())))
- return true;
- for (int i = 0; i < idxNames.size(); i++)
- for (int j = 0; j < idxNames[i].size(); j++)
- if (strcmp(idxNames[i][j].c_str(), s.c_str()) == 0)
- return true;
- return false;
-}
-
-void LoopCuda::addSync(int stmt_num, std::string idxName) {
- //we store these and code-gen inserts sync to omega comments where stmt
- //in loop that has idxName being generated
- syncs.push_back(make_pair(stmt_num, idxName));
-}
-
-void LoopCuda::renameIndex(int stmt_num, std::string idx, std::string newName) {
- int level = findCurLevel(stmt_num, idx);
- if (idxNames.size() <= stmt_num || idxNames[stmt_num].size() < level)
- throw std::runtime_error("Invalid statment number of index");
- idxNames[stmt_num][level - 1] = newName.c_str();
-}
-
-enum Type {
- Int
-};
-
-SgNode* wrapInIfFromMinBound(SgNode* then_part, SgForStatement* loop,
- SgScopeStatement* symtab, SgVariableSymbol* bound_sym) {
- // CG_roseBuilder *ocg = new CG_roseBuilder(
-
- SgBinaryOp* test_expr = isSgBinaryOp(loop->get_test_expr());
- SgExpression* upperBound;
- SgExpression* conditional;
- upperBound = test_expr->get_rhs_operand();
- CG_outputRepr *ifstmt;
-
- SgCallExpression *call;
- if (call = isSgCallExpression(upperBound))
- if (isSgVarRefExp(call->get_function())->get_symbol()->get_name().getString()
- == "__rose_lt") {
- SgExprListExp* arg_list = call->get_args();
- SgExpression *if_bound = *(arg_list->get_expressions().begin());
- /*This relies on the minimum expression being the rhs operand of
- * the min instruction.
- */
- SgIfStmt *ifstmt = buildIfStmt(
- buildLessOrEqualOp(buildVarRefExp(bound_sym), if_bound),
- isSgStatement(then_part), NULL);
- return isSgNode(ifstmt);
-
- }
-
-/* if (isSgConditionalExp(upperBound)) {
- conditional = isSgConditionalExp(upperBound)->get_conditional_exp();
-
- if (isSgBinaryOp(conditional)) {
- SgBinaryOp* binop = isSgBinaryOp(conditional);
-
- if (isSgLessThanOp(binop) || isSgLessOrEqualOp(binop)) {
- SgIfStmt *ifstmt = buildIfStmt(
- buildLessOrEqualOp(buildVarRefExp(bound_sym),
- test_expr), isSgStatement(then_part), NULL);
- return isSgNode(ifstmt);
- }
-
- }
-
- }
-*/
- return then_part;
-}
-
-/**
- * This would be better if it was done by a CHiLL xformation instead of at codegen
- *
- * state:
- * for(...)
- * for(...)
- * cur_body
- * stmt1
- *
- * stm1 is in-between two loops that are going to be reduced. The
- * solution is to put stmt1 at the end of cur_body but conditionally run
- * in on the last step of the for loop.
- *
- * A CHiLL command that would work better:
- *
- * for(...)
- * stmt0
- * for(for i=0; i<n; i++)
- * cur_body
- * stmt1
- * =>
- * for(...)
- * for(for i=0; i<n; i++)
- * if(i==0) stmt0
- * cur_body
- * if(i==n-1) stmt1
- */
-
-std::vector<SgForStatement*> findCommentedFors(const char* index, SgNode* tnl) {
- std::vector<SgForStatement *> result;
- bool next_loop_ok = false;
-
- if (isSgBasicBlock(tnl)) {
-
- SgStatementPtrList& list = isSgBasicBlock(tnl)->get_statements();
-
- for (SgStatementPtrList::iterator it = list.begin(); it != list.end();
- it++) {
- std::vector<SgForStatement*> t = findCommentedFors(index,
- isSgNode(*it));
- std::copy(t.begin(), t.end(), back_inserter(result));
- }
- } else if (isSgForStatement(tnl)) {
-
- AstTextAttribute* att =
- (AstTextAttribute*) (isSgNode(tnl)->getAttribute(
- "omega_comment"));
- std::string comment = att->toString();
-
- if (comment.find("~cuda~") != std::string::npos
- && comment.find("preferredIdx: ") != std::string::npos) {
- std::string idx = comment.substr(
- comment.find("preferredIdx: ") + 14, std::string::npos);
- if (idx.find(" ") != std::string::npos)
- idx = idx.substr(0, idx.find(" "));
- if (strcmp(idx.c_str(), index) == 0)
- next_loop_ok = true;
- }
-
- if (next_loop_ok) {
- //printf("found loop %s\n", static_cast<tree_for *>(tn)->index()->name());
- result.push_back(isSgForStatement(tnl));
- } else {
- //printf("looking down for loop %s\n", static_cast<tree_for *>(tn)->index()->name());
- std::vector<SgForStatement*> t = findCommentedFors(index,
- isSgForStatement(tnl)->get_loop_body());
- std::copy(t.begin(), t.end(), back_inserter(result));
- }
- next_loop_ok = false;
- } else if (isSgIfStmt(tnl)) {
- //printf("looking down if\n");
- SgIfStmt *tni = isSgIfStmt(tnl);
- std::vector<SgForStatement*> t = findCommentedFors(index,
- tni->get_true_body());
- std::copy(t.begin(), t.end(), back_inserter(result));
- }
-
- return result;
-}
-
-SgNode* forReduce(SgForStatement* loop, SgVariableSymbol* reduceIndex,
- SgScopeStatement* body_syms) {
- //We did the replacements all at once with recursiveFindPreferedIdxs
- //replacements r;
- //r.oldsyms.append(loop->index());
- //r.newsyms.append(reduceIndex);
- //tree_for* new_loop = (tree_for*)loop->clone_helper(&r, true);
- SgForStatement* new_loop = loop;
-
- //return body one loops in
- SgNode* tnl = loop_body_at_level(new_loop, 1);
- //wrap in conditional if necessary
- tnl = wrapInIfFromMinBound(tnl, new_loop, body_syms, reduceIndex);
- return tnl;
-}
-
-void recursiveFindRefs(SgNode* code, std::set<const SgVariableSymbol *>& syms,
- SgFunctionDefinition* def) {
-
- SgStatement* s = isSgStatement(code);
- // L = {symbols defined within 's'}, local variables declared within 's'
- ASTtools::VarSymSet_t L;
- ASTtools::collectDefdVarSyms(s, L);
- //dump (L, "L = ");
-
- // U = {symbols used within 's'}
- ASTtools::VarSymSet_t U;
- ASTtools::collectRefdVarSyms(s, U);
- //dump (U, "U = ");
-
- // U - L = {symbols used within 's' but not defined in 's'}
- // variable references to non-local-declared variables
- ASTtools::VarSymSet_t diff_U_L;
- set_difference(U.begin(), U.end(), L.begin(), L.end(),
- inserter(diff_U_L, diff_U_L.begin()));
- //dump (diff_U_L, "U - L = ");
-
- // Q = {symbols defined within the function surrounding 's' that are
- // visible at 's'}, including function parameters
- ASTtools::VarSymSet_t Q;
- ASTtools::collectLocalVisibleVarSyms(def->get_declaration(), s, Q);
-// dump (Q, "Q = ");
-
- // (U - L) \cap Q = {variables that need to be passed as parameters
- // to the outlined function}
- // a sub set of variables that are not globally visible (no need to pass at all)
- // It excludes the variables with a scope between global and the enclosing function
- set_intersection(diff_U_L.begin(), diff_U_L.end(), Q.begin(), Q.end(),
- inserter(syms, syms.begin()));
-
- /* std::vector<SgVariableSymbol *> scalars;
- //SgNode *tnl = static_cast<const omega::CG_roseRepr *>(repr)->GetCode();
- SgStatement* stmt;
- SgExpression* exp;
- if (tnl != NULL) {
- if(stmt = isSgStatement(tnl)){
- if(isSgBasicBlock(stmt)){
- SgStatementPtrList& stmts = isSgBasicBlock(stmt)->get_statements();
- for(int i =0; i < stmts.size(); i++){
- //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(stmts[i]));
- std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(stmts[i]));
- //delete r;
- std::copy(a.begin(), a.end(), back_inserter(scalars));
- }
-
- }
- else if(isSgForStatement(stmt)){
-
- SgForStatement *tnf = isSgForStatement(stmt);
- //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgStatement(tnf->get_loop_body()));
- std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(tnf->get_loop_body()));
- //delete r;
- std::copy(a.begin(), a.end(), back_inserter(scalars));
- }
- else if(isSgFortranDo(stmt)){
- SgFortranDo *tfortran = isSgFortranDo(stmt);
- omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgStatement(tfortran->get_body()));
- std::vector<SgVariableSymbol *> a = recursiveFindRefs(r);
- delete r;
- std::copy(a.begin(), a.end(), back_inserter(scalars));
- }
-
- else if(isSgIfStmt(stmt) ){
- SgIfStmt* tni = isSgIfStmt(stmt);
- //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(tni->get_conditional()));
- std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(tni->get_conditional()));
- //delete r;
- std::copy(a.begin(), a.end(), back_inserter(scalars));
- //r = new omega::CG_roseRepr(isSgNode(tni->get_true_body()));
- a = recursiveFindRefs(isSgNode(tni->get_true_body()));
- //delete r;
- std::copy(a.begin(), a.end(), back_inserter(scalars));
- //r = new omega::CG_roseRepr(isSgNode(tni->get_false_body()));
- a = recursiveFindRefs(isSgNode(tni->get_false_body()));
- //delete r;
- std::copy(a.begin(), a.end(), back_inserter(scalars));
- }
- else if(isSgExprStatement(stmt)) {
- //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgExpression(isSgExprStatement(stmt)->get_expression()));
- std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(isSgExprStatement(stmt)->get_expression()));
- //delete r;
- std::copy(a.begin(), a.end(), back_inserter(scalars));
-
- }
- }
- }
- else{
- SgExpression* op = isSgExpression(tnl);
- if(isSgVarRefExp(op)){
-
- scalars.push_back(isSgVarRefExp(op)->get_symbol());
-
- }
- else if( isSgAssignOp(op)){
- //omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgAssignOp(op)->get_lhs_operand());
- std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgAssignOp(op)->get_lhs_operand()));
- //delete r1;
- std::copy(a1.begin(), a1.end(), back_inserter(scalars));
- //omega::CG_roseRepr *r2 = new omega::CG_roseRepr(isSgAssignOp(op)->get_rhs_operand());
- std::vector<SgVariableSymbol *> a2 = recursiveFindRefs(isSgNode(isSgAssignOp(op)->get_rhs_operand()));
- //delete r2;
- std::copy(a2.begin(), a2.end(), back_inserter(scalars));
-
- }
- else if(isSgBinaryOp(op)){
- // omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgBinaryOp(op)->get_lhs_operand());
- std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgBinaryOp(op)->get_lhs_operand()));
- //delete r1;
- std::copy(a1.begin(), a1.end(), back_inserter(scalars));
- //omega::CG_roseRepr *r2 = new omega::CG_roseRepr(isSgBinaryOp(op)->get_rhs_operand());
- std::vector<SgVariableSymbol *> a2 = recursiveFindRefs((isSgBinaryOp(op)->get_rhs_operand()));
- //delete r2;
- std::copy(a2.begin(), a2.end(), back_inserter(scalars));
- }
- else if(isSgUnaryOp(op)){
- //omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgUnaryOp(op)->get_operand());
- std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgUnaryOp(op)->get_operand()));
- //delete r1;
- std::copy(a1.begin(), a1.end(), back_inserter(scalars));
- }
-
- }
- return scalars;
-
-
- */
-
-}
-
-SgNode* recursiveFindReplacePreferedIdxs(SgNode* code, SgSymbolTable* body_syms,
- SgSymbolTable* param_syms, SgScopeStatement* body,
- std::map<std::string, SgVariableSymbol*>& loop_idxs,
- SgGlobal* globalscope, bool sync = false) {
- //tree_node_list* tnl = new tree_node_list;
- //tree_node_list_iter tnli(code);
- SgVariableSymbol* idxSym = 0;
- std::vector<SgStatement*> r1;
- std::vector<SgNode*> r2;
- SgNode* tnli;
- SgNode* tnli1;
- SgNode* tnli2;
- SgBasicBlock * clone;
-
- if (isSgForStatement(code)) {
- AstTextAttribute* att =
- (AstTextAttribute*) (isSgNode(code)->getAttribute(
- "omega_comment"));
-
- std::string comment;
- if (att != NULL)
- comment = att->toString();
-
- if (comment.find("~cuda~") != std::string::npos
- && comment.find("preferredIdx: ") != std::string::npos) {
- std::string idx = comment.substr(
- comment.find("preferredIdx: ") + 14, std::string::npos);
- if (idx.find(" ") != std::string::npos)
- idx = idx.substr(0, idx.find(" "));
- if (loop_idxs.find(idx) != loop_idxs.end())
- idxSym = loop_idxs.find(idx)->second;
- //Get the proc variable sybol for this preferred index
- if (idxSym == 0) {
- idxSym = body_syms->find_variable(idx.c_str());
- if (!idxSym)
- idxSym = param_syms->find_variable(idx.c_str());
- //printf("idx not found: lookup %p\n", idxSym);
- if (!idxSym) {
- SgVariableDeclaration* defn = buildVariableDeclaration(
- SgName((char*) idx.c_str()), buildIntType());
- //idxSym = new var_sym(type_s32, (char*)idx.c_str());
- SgInitializedNamePtrList& variables = defn->get_variables();
- SgInitializedNamePtrList::const_iterator i =
- variables.begin();
- SgInitializedName* initializedName = *i;
- SgVariableSymbol* vs = new SgVariableSymbol(
- initializedName);
- prependStatement(defn, body);
- vs->set_parent(body_syms);
- body_syms->insert(SgName((char*) idx.c_str()), vs);
- idxSym = vs;
- //printf("idx created and inserted\n");
- }
- //Now insert into our map for future
- if (cudaDebug)
- std::cout << idx << "\n\n";
- loop_idxs.insert(make_pair(idx, idxSym));
- }
- //See if we have a sync as well
- if (comment.find("sync") != std::string::npos) {
- //printf("Inserting sync after current block\n");
- sync = true;
- }
-
- }
- if (idxSym) {
- SgForInitStatement* list =
- isSgForStatement(code)->get_for_init_stmt();
- SgStatementPtrList& initStatements = list->get_init_stmt();
- SgStatementPtrList::const_iterator j = initStatements.begin();
- const SgVariableSymbol* index;
-
- if (SgExprStatement *expr = isSgExprStatement(*j))
- if (SgAssignOp* op = isSgAssignOp(expr->get_expression()))
- if (SgVarRefExp* var_ref = isSgVarRefExp(
- op->get_lhs_operand()))
- index = var_ref->get_symbol();
-
- std::vector<SgVarRefExp *> array = substitute(code, index, NULL,
- isSgNode(body_syms));
-
- for (int j = 0; j < array.size(); j++)
- array[j]->set_symbol(idxSym);
- }
-
- SgStatement* body_ = isSgStatement(
- recursiveFindReplacePreferedIdxs(
- isSgNode((isSgForStatement(code)->get_loop_body())),
- body_syms, param_syms, body, loop_idxs, globalscope));
-
- omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code);
- omega::CG_outputRepr* block = tnl->clone();
- tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode();
-
- isSgForStatement(tnli)->set_loop_body(body_);
- body_->set_parent(tnli);
-
- if (idxSym) {
- SgForInitStatement* list =
- isSgForStatement(tnli)->get_for_init_stmt();
- SgStatementPtrList& initStatements = list->get_init_stmt();
- SgStatementPtrList::const_iterator j = initStatements.begin();
- const SgVariableSymbol* index;
-
- if (SgExprStatement *expr = isSgExprStatement(*j))
- if (SgAssignOp* op = isSgAssignOp(expr->get_expression()))
- if (SgVarRefExp* var_ref = isSgVarRefExp(
- op->get_lhs_operand()))
- index = var_ref->get_symbol();
-
- std::vector<SgVarRefExp *> array = substitute(tnli, index, NULL,
- isSgNode(body_syms));
-
- for (int j = 0; j < array.size(); j++)
- array[j]->set_symbol(idxSym);
- }
- // std::cout << isSgNode(body_)->unparseToString() << "\n\n";
- if (att != NULL)
- tnli->setAttribute("omega_comment", att);
-
- if (sync) {
- SgName name_syncthreads("__syncthreads");
- SgFunctionSymbol * syncthreads_symbol =
- globalscope->lookup_function_symbol(name_syncthreads);
-
- // Create a call to __syncthreads():
- SgFunctionCallExp * syncthreads_call = buildFunctionCallExp(
- syncthreads_symbol, buildExprListExp());
-
- SgExprStatement* stmt = buildExprStatement(syncthreads_call);
-
- /* if (SgBasicBlock* bb = isSgBasicBlock(
- isSgForStatement(code)->get_loop_body()))
- appendStatement(isSgStatement(stmt), bb);
-
- else if (SgStatement* ss = isSgStatement(
- isSgForStatement(code)->get_loop_body())) {
- SgBasicBlock* bb2 = buildBasicBlock();
-
- isSgNode(ss)->set_parent(bb2);
- appendStatement(ss, bb2);
-
- appendStatement(isSgStatement(stmt), bb2);
- isSgNode(stmt)->set_parent(bb2);
- isSgForStatement(code)->set_loop_body(bb2);
- isSgNode(bb2)->set_parent(code);
- }
- */
-
- SgBasicBlock* bb2 = buildBasicBlock();
-
- bb2->append_statement(isSgStatement(tnli));
- bb2->append_statement(stmt);
- /* SgNode* parent = code->get_parent();
- if(!isSgStatement(parent))
- throw loop_error("Parent not a statement");
-
- if(isSgForStatement(parent)){
- if(SgStatement *ss = isSgForStatement(isSgForStatement(parent)->get_loop_body())){
- omega::CG_roseRepr * tnl = new omega::CG_roseRepr(ss);
- omega::CG_outputRepr* block= tnl->clone();
-
- SgNode *new_ss = static_cast<const omega::CG_roseRepr *>(block)->GetCode();
- SgBasicBlock* bb2 = buildBasicBlock();
-
- isSgNode(new_ss)->set_parent(bb2);
- appendStatement(isSgStatement(new_ss), bb2);
- appendStatement(isSgStatement(stmt), bb2);
- isSgNode(stmt)->set_parent(bb2);
-
- isSgStatement(parent)->replace_statement_from_basicBlock(ss, isSgStatement(bb2));
-
- }else if(isSgBasicBlock(isSgForStatement(parent)->get_loop_body()))
- isSgStatement(isSgForStatement(parent)->get_loop_body())->insert_statement(isSgStatement(code), stmt, false);
- else
- throw loop_error("parent statement type undefined!!");
-
- }
- else if(isSgBasicBlock(parent))
- isSgStatement(parent)->insert_statement(isSgStatement(code), stmt, false);
- else
- throw loop_error("parent statement type undefined!!");
-
- //tnl->print();
- *
- *
- */
- sync = true;
- return isSgNode(bb2);
-
- } else
- return tnli;
- } else if (isSgIfStmt(code)) {
- SgStatement* body_ = isSgStatement(
- recursiveFindReplacePreferedIdxs(
- isSgNode((isSgIfStmt(code)->get_true_body())),
- body_syms, param_syms, body, loop_idxs, globalscope));
-
- omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code);
- omega::CG_outputRepr* block = tnl->clone();
- tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode();
-
- isSgIfStmt(tnli)->set_true_body(body_);
-
- if ((isSgIfStmt(code)->get_false_body()))
- isSgIfStmt(tnli)->set_false_body(
- isSgStatement(
- recursiveFindReplacePreferedIdxs(
- isSgNode(
- (isSgIfStmt(code)->get_false_body())),
- body_syms, param_syms, body, loop_idxs,
- globalscope)));
-
- return tnli;
- } else if (isSgStatement(code) && !isSgBasicBlock(code)) {
- omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code);
- omega::CG_outputRepr* block = tnl->clone();
- tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode();
-
- return tnli;
-
- } else if (isSgBasicBlock(code)) {
- SgStatementPtrList& tnl = isSgBasicBlock(code)->get_statements();
-
- SgStatementPtrList::iterator temp;
- clone = buildBasicBlock();
- bool sync_found = false;
- for (SgStatementPtrList::const_iterator it = tnl.begin();
- it != tnl.end(); it++) {
-
- if (isSgForStatement(*it)) {
- AstTextAttribute* att =
- (AstTextAttribute*) (isSgNode(*it)->getAttribute(
- "omega_comment"));
-
- std::string comment;
- if (att != NULL)
- comment = att->toString();
-
- if (comment.find("~cuda~") != std::string::npos
- && comment.find("preferredIdx: ")
- != std::string::npos) {
- std::string idx = comment.substr(
- comment.find("preferredIdx: ") + 14,
- std::string::npos);
- if (idx.find(" ") != std::string::npos)
- idx = idx.substr(0, idx.find(" "));
- //printf("sym_tab preferred index: %s\n", idx.c_str());
- if (loop_idxs.find(idx) != loop_idxs.end())
- idxSym = loop_idxs.find(idx)->second;
- //Get the proc variable sybol for this preferred index
- if (idxSym == 0) {
- idxSym = body_syms->find_variable(idx.c_str());
- if (!idxSym)
- idxSym = param_syms->find_variable(idx.c_str());
- //printf("idx not found: lookup %p\n", idxSym);
- if (!idxSym) {
- SgVariableDeclaration* defn =
- buildVariableDeclaration(
- SgName((char*) idx.c_str()),
- buildIntType());
- //idxSym = new var_sym(type_s32, (char*)idx.c_str());
- SgInitializedNamePtrList& variables =
- defn->get_variables();
- SgInitializedNamePtrList::const_iterator i =
- variables.begin();
- SgInitializedName* initializedName = *i;
- SgVariableSymbol* vs = new SgVariableSymbol(
- initializedName);
- prependStatement(defn, body);
- vs->set_parent(body_syms);
- body_syms->insert(SgName((char*) idx.c_str()), vs);
- //printf("idx created and inserted\n");
- idxSym = vs;
- }
- //Now insert into our map for future
- if (cudaDebug)
- std::cout << idx << "\n\n";
- loop_idxs.insert(make_pair(idx, idxSym));
-
- }
- //See if we have a sync as well
- if (comment.find("sync") != std::string::npos) {
- //printf("Inserting sync after current block\n");
- sync = true;
- }
-
- }
- if (idxSym) {
- SgForInitStatement* list =
- isSgForStatement(*it)->get_for_init_stmt();
- SgStatementPtrList& initStatements = list->get_init_stmt();
- SgStatementPtrList::const_iterator j =
- initStatements.begin();
- const SgVariableSymbol* index;
-
- if (SgExprStatement *expr = isSgExprStatement(*j))
- if (SgAssignOp* op = isSgAssignOp(
- expr->get_expression()))
- if (SgVarRefExp* var_ref = isSgVarRefExp(
- op->get_lhs_operand()))
- index = var_ref->get_symbol();
-
- std::vector<SgVarRefExp *> array = substitute(*it, index,
- NULL, isSgNode(body_syms));
-
- for (int j = 0; j < array.size(); j++)
- array[j]->set_symbol(idxSym);
-
- }
-
- SgStatement* body_ =
- isSgStatement(
- recursiveFindReplacePreferedIdxs(
- isSgNode(
- (isSgForStatement(*it)->get_loop_body())),
- body_syms, param_syms, body, loop_idxs,
- globalscope));
-
- omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it);
- omega::CG_outputRepr* block = tnl->clone();
- tnli =
- static_cast<const omega::CG_roseRepr *>(block)->GetCode();
-
- isSgForStatement(tnli)->set_loop_body(body_);
- body_->set_parent(tnli);
- if (idxSym) {
- SgForInitStatement* list =
- isSgForStatement(tnli)->get_for_init_stmt();
- SgStatementPtrList& initStatements = list->get_init_stmt();
- SgStatementPtrList::const_iterator j =
- initStatements.begin();
- const SgVariableSymbol* index;
-
- if (SgExprStatement *expr = isSgExprStatement(*j))
- if (SgAssignOp* op = isSgAssignOp(
- expr->get_expression()))
- if (SgVarRefExp* var_ref = isSgVarRefExp(
- op->get_lhs_operand()))
- index = var_ref->get_symbol();
-
- std::vector<SgVarRefExp *> array = substitute(tnli, index,
- NULL, isSgNode(body_syms));
-
- for (int j = 0; j < array.size(); j++)
- array[j]->set_symbol(idxSym);
- }
- idxSym = 0;
- // std::cout << isSgNode(body_)->unparseToString() << "\n\n";
- if (att != NULL)
- tnli->setAttribute("omega_comment", att);
- clone->append_statement(isSgStatement(tnli));
- if (sync) {
- SgName name_syncthreads("__syncthreads");
- SgFunctionSymbol * syncthreads_symbol =
- globalscope->lookup_function_symbol(
- name_syncthreads);
-
- // Create a call to __syncthreads():
- SgFunctionCallExp * syncthreads_call = buildFunctionCallExp(
- syncthreads_symbol, buildExprListExp());
-
- SgExprStatement* stmt = buildExprStatement(
- syncthreads_call);
-
- /* if (SgBasicBlock* bb = isSgBasicBlock(
- isSgForStatement(code)->get_loop_body()))
- appendStatement(isSgStatement(stmt), bb);
-
- else if (SgStatement* ss = isSgStatement(
- isSgForStatement(code)->get_loop_body())) {
- SgBasicBlock* bb2 = buildBasicBlock();
-
- isSgNode(ss)->set_parent(bb2);
- appendStatement(ss, bb2);
-
- appendStatement(isSgStatement(stmt), bb2);
- isSgNode(stmt)->set_parent(bb2);
- isSgForStatement(code)->set_loop_body(bb2);
- isSgNode(bb2)->set_parent(code);
- }
- */
-
- //SgBasicBlock* bb2 = buildBasicBlock();
- clone->append_statement(stmt);
- /* SgNode* parent = code->get_parent();
- if(!isSgStatement(parent))
- throw loop_error("Parent not a statement");
-
- if(isSgForStatement(parent)){
- if(SgStatement *ss = isSgForStatement(isSgForStatement(parent)->get_loop_body())){
- omega::CG_roseRepr * tnl = new omega::CG_roseRepr(ss);
- omega::CG_outputRepr* block= tnl->clone();
-
- SgNode *new_ss = static_cast<const omega::CG_roseRepr *>(block)->GetCode();
- SgBasicBlock* bb2 = buildBasicBlock();
-
- isSgNode(new_ss)->set_parent(bb2);
- appendStatement(isSgStatement(new_ss), bb2);
- appendStatement(isSgStatement(stmt), bb2);
- isSgNode(stmt)->set_parent(bb2);
-
- isSgStatement(parent)->replace_statement_from_basicBlock(ss, isSgStatement(bb2));
-
- }else if(isSgBasicBlock(isSgForStatement(parent)->get_loop_body()))
- isSgStatement(isSgForStatement(parent)->get_loop_body())->insert_statement(isSgStatement(code), stmt, false);
- else
- throw loop_error("parent statement type undefined!!");
-
- }
- else if(isSgBasicBlock(parent))
- isSgStatement(parent)->insert_statement(isSgStatement(code), stmt, false);
- else
- throw loop_error("parent statement type undefined!!");
-
- //tnl->print();
- *
- *
- */
- sync = true;
- // return isSgNode(bb2);
-
- }
-
- // return tnli;
- } else if (isSgIfStmt(*it)) {
- SgStatement* body_ = isSgStatement(
- recursiveFindReplacePreferedIdxs(
- isSgNode((isSgIfStmt(*it)->get_true_body())),
- body_syms, param_syms, body, loop_idxs,
- globalscope));
-
- omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it);
- omega::CG_outputRepr* block = tnl->clone();
- tnli1 =
- static_cast<const omega::CG_roseRepr *>(block)->GetCode();
-
- isSgIfStmt(tnli1)->set_true_body(body_);
-
- if ((isSgIfStmt(*it)->get_false_body()))
- isSgIfStmt(tnli1)->set_false_body(
- isSgStatement(
- recursiveFindReplacePreferedIdxs(
- isSgNode(
- (isSgIfStmt(*it)->get_false_body())),
- body_syms, param_syms, body,
- loop_idxs, globalscope)));
-
- clone->append_statement(isSgStatement(tnli1));
- //return tnli;
- } else if (isSgStatement(*it)) {
- omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it);
- omega::CG_outputRepr* block = tnl->clone();
- tnli2 =
- static_cast<const omega::CG_roseRepr *>(block)->GetCode();
-
- clone->append_statement(isSgStatement(tnli2));
- //return tnli;
-
- }
- }
-
- return isSgNode(clone);
-
- }
-
- /* if (!isSgBasicBlock(
- recursiveFindReplacePreferedIdxs(isSgNode(*it), body_syms,
- param_syms, body, loop_idxs, globalscope))) {
- SgStatement *to_push = isSgStatement(
- recursiveFindReplacePreferedIdxs(isSgNode(*it),
- body_syms, param_syms, body, loop_idxs,
- globalscope, sync));
- clone->append_statement(to_push);
-
- if ((sync_found) && isSgForStatement(to_push)) {
- SgName name_syncthreads("__syncthreads");
- SgFunctionSymbol * syncthreads_symbol =
- globalscope->lookup_function_symbol(
- name_syncthreads);
-
- // Create a call to __syncthreads():
- SgFunctionCallExp * syncthreads_call = buildFunctionCallExp(
- syncthreads_symbol, buildExprListExp());
-
- SgExprStatement* stmt = buildExprStatement(
- syncthreads_call);
-
- clone->append_statement(isSgStatement(stmt));
- }
- // std::cout<<isSgNode(*it)->unparseToString()<<"\n\n";
- } else {
-
- SgStatementPtrList& tnl2 = isSgBasicBlock(
- recursiveFindReplacePreferedIdxs(isSgNode(*it),
- body_syms, param_syms, body, loop_idxs,
- globalscope))->get_statements();
- for (SgStatementPtrList::const_iterator it2 = tnl2.begin();
- it2 != tnl2.end(); it2++) {
- clone->append_statement(*it2);
-
- sync_found = true;
- // std::cout<<isSgNode(*it2)->unparseToString()<<"\n\n";
- }
- }
-
- }
- return isSgNode(clone);
- }
- */
-// return tnl;
-}
-
-// loop_vars -> array references
-// loop_idxs -> <idx_name,idx_sym> map for when we encounter a loop with a different preferredIndex
-// dim_vars -> out param, fills with <old,new> var_sym pair for 2D array dimentions (messy stuff)
-SgNode* swapVarReferences(SgNode* code,
- std::set<const SgVariableSymbol *>& syms, SgSymbolTable* param,
- SgSymbolTable* body, SgScopeStatement* body_stmt) {
- //Iterate over every expression, looking up each variable and type
- //reference used and possibly replacing it or adding it to our symbol
- //table
- //
- //We use the built-in cloning helper methods to seriously help us with this!
-
- //Need to do a recursive mark
-
- std::set<const SgVariableSymbol *>::iterator myIterator;
- for (myIterator = syms.begin(); myIterator != syms.end(); myIterator++) {
- SgName var_name = (*myIterator)->get_name();
- std::string x = var_name.getString();
-
- if ((param->find_variable(var_name) == NULL)
- && (body->find_variable(var_name) == NULL)) {
- SgInitializedName* decl = (*myIterator)->get_declaration();
-
- SgVariableSymbol* dvs = new SgVariableSymbol(decl);
- SgVariableDeclaration* var_decl = buildVariableDeclaration(
- dvs->get_name(), dvs->get_type());
-
- AstTextAttribute* att = (AstTextAttribute*) (isSgNode(
- decl->get_declaration())->getAttribute("__shared__"));
- if (isSgNode(decl->get_declaration())->attributeExists(
- "__shared__"))
- var_decl->get_declarationModifier().get_storageModifier().setCudaShared();
-
- appendStatement(var_decl, body_stmt);
-
- dvs->set_parent(body);
- body->insert(var_name, dvs);
- }
-
- std::vector<SgVarRefExp *> array = substitute(code, *myIterator, NULL,
- isSgNode(body));
-
- SgVariableSymbol* var = (SgVariableSymbol*) (*myIterator);
- for (int j = 0; j < array.size(); j++)
- array[j]->set_symbol(var);
- }
-
- return code;
-}
-
-bool LoopCuda::validIndexes(int stmt, const std::vector<std::string>& idxs) {
- for (int i = 0; i < idxs.size(); i++) {
- bool found = false;
- for (int j = 0; j < idxNames[stmt].size(); j++) {
- if (strcmp(idxNames[stmt][j].c_str(), idxs[i].c_str()) == 0) {
- found = true;
- }
- }
- if (!found) {
- return false;
- }
- }
- return true;
-}
-
-bool LoopCuda::cudaize_v2(std::string kernel_name,
- std::map<std::string, int> array_dims,
- std::vector<std::string> blockIdxs,
- std::vector<std::string> threadIdxs) {
- CG_outputBuilder *ocg = ir->builder();
- int stmt_num = 0;
- if (cudaDebug) {
- printf("cudaize_v2(%s, {", kernel_name.c_str());
- //for(
- printf("}, blocks={");
- printVs(blockIdxs);
- printf("}, thread={");
- printVs(threadIdxs);
- printf("})\n");
- }
-
- this->array_dims = array_dims;
- if (!validIndexes(stmt_num, blockIdxs)) {
- throw std::runtime_error("One of the indexes in the block list was not "
- "found in the current set of indexes.");
- }
- if (!validIndexes(stmt_num, threadIdxs)) {
- throw std::runtime_error(
- "One of the indexes in the thread list was not "
- "found in the current set of indexes.");
- }
- if (blockIdxs.size() == 0)
- throw std::runtime_error("Cudaize: Need at least one block dimention");
- int block_level = 0;
- //Now, we will determine the actual size (if possible, otherwise
- //complain) for the block dimentions and thread dimentions based on our
- //indexes and the relations for our stmt;
- for (int i = 0; i < blockIdxs.size(); i++) {
- int level = findCurLevel(stmt_num, blockIdxs[i]);
- int ub, lb;
- CG_outputRepr* ubrepr = extractCudaUB(stmt_num, level, ub, lb);
- if (lb != 0) {
- //attempt to "normalize" the loop with an in-place tile and then re-check our bounds
- if (cudaDebug)
- printf(
- "Cudaize: doing tile at level %d to try and normalize lower bounds\n",
- level);
- tile(stmt_num, level, 1, level, CountedTile);
- idxNames[stmt_num].insert(idxNames[stmt_num].begin() + (level), ""); //TODO: possibly handle this for all sibling stmts
- ubrepr = extractCudaUB(stmt_num, level, ub, lb);
- }
- if (lb != 0) {
- char buf[1024];
- sprintf(buf,
- "Cudaize: Loop at level %d does not have 0 as it's lower bound",
- level);
- throw std::runtime_error(buf);
- }
- if (ub < 0) {
- char buf[1024];
- sprintf(buf,
- "Cudaize: Loop at level %d does not have a hard upper bound",
- level);
- //Anand: Commenting out error indication for lack of constant upper bound
- //throw std::runtime_error(buf);
- }
- if (cudaDebug)
- printf("block idx %s level %d lb: %d ub %d\n", blockIdxs[i].c_str(),
- level, lb, ub);
- if (i == 0) {
- block_level = level;
- if (ubrepr == NULL) {
- cu_bx = ub + 1;
- cu_bx_repr = NULL;
- } else {
- cu_bx = 0;
- cu_bx_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1));
- }
- idxNames[stmt_num][level - 1] = "bx";
- } else if (i == 1) {
- if (ubrepr == NULL) {
- cu_by = ub + 1;
- cu_by_repr = NULL;
- } else {
- cu_by = 0;
- cu_by_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1));
- }
- idxNames[stmt_num][level - 1] = "by";
- }
- }
- if (!cu_by && !cu_by_repr)
- block_level = 0;
- int thread_level1 = 0;
- int thread_level2 = 0;
- for (int i = 0; i < threadIdxs.size(); i++) {
- int level = findCurLevel(stmt_num, threadIdxs[i]);
- int ub, lb;
- CG_outputRepr* ubrepr = extractCudaUB(stmt_num, level, ub, lb);
- if (lb != 0) {
- //attempt to "normalize" the loop with an in-place tile and then re-check our bounds
- if (cudaDebug)
- printf(
- "Cudaize: doing tile at level %d to try and normalize lower bounds\n",
- level);
- tile(stmt_num, level, 1, level, CountedTile);
- idxNames[stmt_num].insert(idxNames[stmt_num].begin() + (level), "");
- ubrepr = extractCudaUB(stmt_num, level, ub, lb);
- }
- if (lb != 0) {
- char buf[1024];
- sprintf(buf,
- "Cudaize: Loop at level %d does not have 0 as it's lower bound",
- level);
- throw std::runtime_error(buf);
- }
- if (ub < 0) {
- char buf[1024];
- sprintf(buf,
- "Cudaize: Loop at level %d does not have a hard upper bound",
- level);
- //Anand: Commenting out error indication for lack of constant upper bound
- //throw std::runtime_error(buf);
- }
-
- if (cudaDebug)
- printf("thread idx %s level %d lb: %d ub %d\n",
- threadIdxs[i].c_str(), level, lb, ub);
- if (i == 0) {
- thread_level1 = level;
- if (ubrepr == NULL) {
- cu_tx = ub + 1;
- cu_tx_repr = NULL;
- } else {
- cu_tx = 0;
- cu_tx_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1));
- }
- idxNames[stmt_num][level - 1] = "tx";
- } else if (i == 1) {
- thread_level2 = level;
- if (ubrepr == NULL) {
- cu_ty = ub + 1;
- cu_ty_repr = NULL;
- } else {
- cu_ty = 0;
- cu_ty_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1));
- }
- idxNames[stmt_num][level - 1] = "ty";
- } else if (i == 2) {
- if (ubrepr == NULL) {
- cu_tz = ub + 1;
- cu_tz_repr = NULL;
- } else {
- cu_tz = 0;
- cu_tz_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1));
- }
- idxNames[stmt_num][level - 1] = "tz";
- }
- }
- if (!cu_ty && !cu_ty_repr)
- thread_level1 = 0;
- if (!cu_tz && !cu_tz_repr)
- thread_level2 = 0;
-
- //Make changes to nonsplitlevels
- const int m = stmt.size();
- for (int i = 0; i < m; i++) {
- if (block_level) {
- //stmt[i].nonSplitLevels.append((block_level)*2);
- stmt_nonSplitLevels[i].push_back((block_level) * 2);
- }
- if (thread_level1) {
- //stmt[i].nonSplitLevels.append((thread_level1)*2);
- stmt_nonSplitLevels[i].push_back((thread_level1) * 2);
- }
- if (thread_level2) {
- //stmt[i].nonSplitLevels.append((thread_level1)*2);
- stmt_nonSplitLevels[i].push_back((thread_level1) * 2);
- }
- }
-
- if (cudaDebug) {
- printf("Codegen: current names: ");
- printVS(idxNames[stmt_num]);
- }
- //Set codegen flag
- code_gen_flags |= GenCudaizeV2;
-
- //Save array dimention sizes
- this->array_dims = array_dims;
- cu_kernel_name = kernel_name.c_str();
-
-}
-
-/*
- * setupConstantVar
- * handles constant variable declaration
- * and adds a global constant variable
- * parameters:
- * constant - the constant_memory_mapping object for this loop
- * arr_def - the VarDefs object for the mapped variable
- * globals - Rose Global variables
- * i - an index to keep new variable names unique
- * symtab - global symbol table
- */
-static void setupConstantVar(constant_memory_mapping* constant, VarDefs* arr_def, SgGlobal* globals, int i, SgSymbolTable* symtab) {
- char* buf1 = new char[32];
- snprintf(buf1, 32, "cs%dRef", i+1);
- arr_def->secondName = buf1;
-
- char buf2[64];
- snprintf(buf2, 64, "__device__ __constant__ float");
-
- SgVariableDeclaration* consvar_decl = buildVariableDeclaration(
- SgName(std::string(buf1)), buildArrayType(
- buildOpaqueType(SgName(buf2),globals),
- arr_def->size_expr));
- SgInitializedNamePtrList& variables = consvar_decl->get_variables();
- SgInitializedNamePtrList::const_iterator j = variables.begin();
- SgInitializedName* initializedName = *j;
- SgVariableSymbol* consvar_sym = new SgVariableSymbol(initializedName);
- prependStatement(consvar_decl, globals);
-
- consvar_sym->set_parent(symtab);
- symtab->insert(SgName(std::string(buf1)), consvar_sym);
-
- constant->set_mapped_symbol(arr_def->original_name.c_str(), consvar_sym);
- constant->set_vardef(arr_def->original_name.c_str(), arr_def);
-}
-
-/*
- * cudaBindConstantVar
- * allocs a variable to constant memory
- * constant - the constant mapping object
- * arr_def - the VarDefs abject
- * globals - global symbol table
- * stmt_list - the GPU functions' statement list
- */
-static void cudaBindConstantVar(constant_memory_mapping* constant, VarDefs* arr_def, SgGlobal* globals, SgStatementPtrList* stmt_list) {
- SgName cudaMemcpyToSymbol_name("cudaMemcpyToSymbol");
- SgFunctionDeclaration* cudaMemcpyToSymbol_decl = buildNondefiningFunctionDeclaration(
- cudaMemcpyToSymbol_name, buildVoidType(), buildFunctionParameterList(), globals);
- SgExprListExp* args = buildExprListExp();
- args->append_expression(buildCastExp(constant->get_mapped_symbol_exp(arr_def->original_name.c_str()),
- buildPointerType(buildVoidType())));
- args->append_expression(buildVarRefExp(arr_def->in_data));
- args->append_expression(arr_def->size_expr);
- stmt_list->push_back(buildExprStatement(
- buildFunctionCallExp(buildFunctionRefExp(cudaMemcpyToSymbol_decl), args)));
-}
-
-static void consmapArrayRefs(constant_memory_mapping* constant, std::vector<IR_ArrayRef*>* refs, SgGlobal* globals, IR_Code* ir, CG_roseBuilder* ocg) {
- // if constant mapping is not being used, ignore this function
- if(constant == NULL) return;
- for(int i = 0; i < refs->size(); i++) {
- IR_ArrayRef* aref = (*refs)[i];
- if(constant->is_array_mapped(aref->name().c_str())) {
- // get array reference dimensions
- int dims = aref->symbol()->n_dim();
- if(dims > 2) {
- printf(" \n CHiLL does not handle constant memory mapping for more than 2D arrays.\n");
- return;
- }
-
- SgExpression* varexp = constant->get_mapped_symbol_exp(aref->name().c_str());
- SgExpression* index_exp;
- // build index expression
- if(dims == 1) {
- index_exp = static_cast<omega::CG_roseRepr*>(aref->index(0)->clone())->GetExpression();
- }
- if(dims == 2) {
- VarDefs* arr_def = constant->get_vardef(aref->name().c_str());
- CG_outputRepr* i0 = aref->index(0)->clone();
- CG_outputRepr* i1 = aref->index(1)->clone();
- CG_outputRepr* sz = new CG_roseRepr(buildIntVal(arr_def->size_multi_dim[0]));
- CG_outputRepr* exp = ocg->CreatePlus(ocg->CreateTimes(sz->clone(), i0), i1);
- index_exp = static_cast<omega::CG_roseRepr*>(exp->clone())->GetExpression();
- }
- ir->ReplaceExpression(aref, new CG_roseRepr(buildPntrArrRefExp(varexp, index_exp)));
- }
- }
-}
-
-/*
- * setupTexmappingVar
- * handles texture variable declaration
- * and adds a global texture object
- * parameters:
- * texture - the texture_memory_mapping object
- * arr_def - the VarDefs object for the mapped variable
- * globals - Rose Global variables
- * i - an index to keep the new variable names unique
- * devptr_sym - the devptr that the original variable is associated with
- * symtab - GPU function symbol table
- */
-static void setupTexmappingVar(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals, int i, SgVariableSymbol* devptr_sym, SgSymbolTable* symtab) {
- char* buf1 = new char[32];
- snprintf(buf1, 32, "tex%dRef", i+1);
- arr_def->secondName = buf1;
-
- char buf2[64];
- // single-dimensional
- snprintf(buf2, 64, "texture<float, %d, cudaReadModeElementType>", 1);
- // multi-dimensional
- // snprintf(buf2, 64, "texture<float, %d, cudaReadModeElemetType>", (int)(arr_def->size_multi_dim.size())); //*/
-
- SgVariableDeclaration* texvar_decl = buildVariableDeclaration(SgName(std::string(buf1)), buildOpaqueType(buf2, globals));
-
- SgInitializedNamePtrList& variables = texvar_decl->get_variables();
- SgInitializedNamePtrList::const_iterator j = variables.begin();
- SgInitializedName* initializedName = *j;
- SgVariableSymbol* texvar_sym = new SgVariableSymbol(initializedName);
- prependStatement(texvar_decl, globals);
-
- texvar_sym->set_parent(symtab);
- symtab->insert(SgName(buf1), texvar_sym);
-
- texture->set_mapped_symbol(arr_def->original_name.c_str(), texvar_sym);
- texture->set_devptr_symbol(arr_def->original_name.c_str(), devptr_sym);
- texture->set_vardef(arr_def->original_name.c_str(), arr_def);
-}
-
-
-/*
- * One dimensional version of cudaBindTexture
- * see cudaBindTexture for details
- */
-static SgFunctionCallExp* cudaBindTexture1D(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals) {
- SgName cudaBindTexture_name("cudaBindTexture");
- SgFunctionDeclaration* cudaBindTexture_decl = buildNondefiningFunctionDeclaration(
- cudaBindTexture_name, buildVoidType(), buildFunctionParameterList(), globals);
-
- SgExprListExp* args = buildExprListExp();
- args->append_expression(buildIntVal(0));
- args->append_expression(texture->get_mapped_symbol_exp(arr_def->original_name.c_str()));
- args->append_expression(texture->get_devptr_symbol_exp(arr_def->original_name.c_str()));
- args->append_expression(arr_def->size_expr);
- return buildFunctionCallExp(buildFunctionRefExp(cudaBindTexture_decl), args);
-}
-
-/*
- * Two dimensional version of cudaBindTexture
- * see cudaBindTexture for details
- */
-//static SgFunctionCallExp* cudaBindTexture2D(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals) {
-// SgName cudaBindTexture_name("cudaBindTexture2D");
-// SgFunctionDeclaration* cudaBindTexture_decl = buildNondefiningFunctionDeclaration(
-// cudaBindTexture_name, buildVoidType(), buildFunctionParameterList(), globals);
-//
-// SgExprListExp* args = buildExprListExp();
-// args->append_expression(buildIntVal(0));
-// args->append_expression(texture->get_tex_mapped_symbol_exp(arr_def->original_name.c_str()));
-// args->append_expression(texture->get_devptr_symbol_exp(arr_def->original_name.c_str()));
-// args->append_expression(buildIntVal(texture->get_dim_length(arr_def->original_name.c_str(), 0)));
-// args->append_expression(buildIntVal(texture->get_dim_length(arr_def->original_name.c_str(), 1)));
-// args->append_expression(arr_def->size_expr);
-// return buildFunctionCallExp(buildFunctionRefExp(cudaBindTexture_decl), args);
-//}
-
-/*
- * cudaBindTexture
- * binds a variable to a texture
- * parameters:
- * texture - the texture mapping object
- * arr_def - the VarDefs object
- * globals - global symbol table
- * stmt_list - the GPU functions' statement list
- * notes:
- * only supports binding 1D textures, may need to consider cudaBindTexture2D for 2D textures
- */
-static void cudaBindTexture(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals, SgStatementPtrList* stmt_list) {
- //int dims = (int)(arr_def->size_multi_dim.size());
- //int dims = texture->get_dims(arr_def->original_name.c_str());
- //if(dims == 1)
- stmt_list->push_back(
- buildExprStatement(cudaBindTexture1D(texture, arr_def, globals)));
- //if(dims == 2)
- // stmt_list->push_back(
- // buildExprStatement(cudaBindTexture2D(texture, arr_def, globals)));
-}
-
-/*
- * texmapArrayRefs
- * maps array reference expresions of texture mapped variables to the tex1D function
- * parameters:
- * texture - the texture mapping object
- * refs - a list of all array read operations
- * globals - global symbol table
- * ir - handles IR_Code operations
- * ocg - handles CG_roseBuilder operations
-**/
-static void texmapArrayRefs(texture_memory_mapping* texture, std::vector<IR_ArrayRef*>* refs, SgGlobal* globals, IR_Code* ir, CG_roseBuilder *ocg) {
- // if texture mapping is not being used, ignore this function
- if(texture == NULL) return;
- for(int i = 0; i < refs->size(); i++) {
- IR_ArrayRef* aref = (*refs)[i];
- if(texture->is_array_mapped(aref->name().c_str())) {
-
- // get array dimensions
- VarDefs* arr_def = texture->get_vardef(aref->name().c_str());
- int dims = aref->symbol()->n_dim();
- if(dims > 2) {
- printf(" \n CHiLL does not handle texture mapping for more than 2D arrays.\n");
- // TODO throw some sort of error. or handle in texture_copy function
- return;
- }
-
- // build texture lookup function declaration
- char texNDfetch_strName[16];
- sprintf(texNDfetch_strName, "tex%dDfetch", 1); // for now, only support tex1Dfetch
- //sprintf(texNDfetch_strName, "tex%dDfetch", dims);
- SgFunctionDeclaration* fetch_decl = buildNondefiningFunctionDeclaration(
- SgName(texNDfetch_strName), buildFloatType(), buildFunctionParameterList(), globals);
-
- // build args
- SgExprListExp* args = buildExprListExp();
- args->append_expression(texture->get_mapped_symbol_exp(aref->name().c_str()));
-
- // set indexing args
- //for(int i = 0; i < dims; i++) {
- // args->append_expression((static_cast<omega::CG_roseRepr*>(aref->index(i)->clone()))->GetExpression());
- //}
- if(dims == 1) {
- args->append_expression(static_cast<omega::CG_roseRepr*>(aref->index(0)->clone())->GetExpression());
- }
- else if(dims == 2) {
- CG_outputRepr* i0 = aref->index(0)->clone();
- CG_outputRepr* i1 = aref->index(1)->clone();
- CG_outputRepr* sz = new CG_roseRepr(buildIntVal(arr_def->size_multi_dim[0]));
- CG_outputRepr* expr = ocg->CreatePlus(ocg->CreateTimes(sz->clone(), i0), i1);
- args->append_expression(static_cast<omega::CG_roseRepr*>(expr->clone())->GetExpression());
- }
-
- // build function call and replace original array ref
- SgFunctionCallExp* fetch_call = buildFunctionCallExp(buildFunctionRefExp(fetch_decl), args);
- ir->ReplaceExpression(aref, new CG_roseRepr(fetch_call));
- }
- }
-}
-
-SgNode* LoopCuda::cudaize_codegen_v2() {
- if(cudaDebug)
- printf("cudaize codegen V2\n");
- CG_roseBuilder *ocg = dynamic_cast<CG_roseBuilder*>(ir->builder());
- if (!ocg)
- return false;
-
- //protonu--adding an annote to track texture memory type
- //ANNOTE(k_cuda_texture_memory, "cuda texture memory", TRUE);
- //ANNOTE(k_cuda_constant_memory, "cuda constant memory", TRUE);
- int tex_mem_on = 0;
- int cons_mem_on = 0;
-
-
-
- CG_outputRepr* repr;
- std::vector<VarDefs> arrayVars;
- std::vector<VarDefs> localScopedVars;
-
- std::vector<IR_ArrayRef *> ro_refs;
- std::vector<IR_ArrayRef *> wo_refs;
- std::set<std::string> uniqueRefs;
- std::set<std::string> uniqueWoRefs;
- std::set<const SgVariableSymbol *> syms;
- std::set<const SgVariableSymbol *> psyms;
- std::set<const SgVariableSymbol *> pdSyms;
- SgStatementPtrList* replacement_list = new SgStatementPtrList;
-
- for (int j = 0; j < stmt.size(); j++) {
- std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[j].code);
- for (int i = 0; i < refs.size(); i++) {
- //printf("ref %s wo %d\n", static_cast<const char*>(refs[i]->name()), refs[i]->is_write());
- SgVariableSymbol* var = body_symtab->find_variable(
- SgName((char*) refs[i]->name().c_str()));
- SgVariableSymbol* var2 = parameter_symtab->find_variable(
- SgName((char*) refs[i]->name().c_str()));
-
- //If the array is not a parameter, then it's a local array and we
- //want to recreate it as a stack variable in the kernel as opposed to
- //passing it in.
- if (var != NULL) {
- //anand-- needs modification, if variable is parameter it wont be part of the
- // block's symbol table but the functiond definition's symbol table
-
- continue;
- }
- if (uniqueRefs.find(refs[i]->name()) == uniqueRefs.end()) {
-
- uniqueRefs.insert(refs[i]->name());
- if (refs[i]->is_write()) {
- uniqueWoRefs.insert(refs[i]->name());
- wo_refs.push_back(refs[i]);
- } else
- ro_refs.push_back(refs[i]);
- }
- if (refs[i]->is_write()
- && uniqueWoRefs.find(refs[i]->name())
- == uniqueWoRefs.end()) {
- uniqueWoRefs.insert(refs[i]->name());
- wo_refs.push_back(refs[i]);
- //printf("adding %s to wo\n", static_cast<const char*>(refs[i]->name()));
- }
- pdSyms.insert((const SgVariableSymbol*) var2);
- }
- }
-
- if (cudaDebug) {
- printf("reading from array ");
- for (int i = 0; i < ro_refs.size(); i++)
- printf("'%s' ", ro_refs[i]->name().c_str());
- printf("and writing to array ");
- for (int i = 0; i < wo_refs.size(); i++)
- printf("'%s' ", wo_refs[i]->name().c_str());
- printf("\n");
- }
- const char* gridName = "dimGrid";
- const char* blockName = "dimBlock";
-
- //TODO: Could allow for array_dims_vars to be a mapping from array
- //references to to variable names that define their length.
- SgVariableSymbol* dim1 = 0;
- SgVariableSymbol* dim2 = 0;
-
- for (int i = 0; i < wo_refs.size(); i++) {
- //TODO: Currently assume all arrays are floats of one or two dimentions
- SgVariableSymbol* outArray = 0;
- std::string name = wo_refs[i]->name();
- outArray = body_symtab->find_variable(SgName((char*) name.c_str()));
- int size_n_d;
- if (outArray == NULL)
- outArray = parameter_symtab->find_variable(
- SgName((char*) name.c_str()));
-
- VarDefs v;
- v.size_multi_dim = std::vector<int>();
- char buf[32];
- snprintf(buf, 32, "devO%dPtr", i + 1);
- v.name = buf;
- if (isSgPointerType(outArray->get_type())) {
- if (isSgArrayType(
- isSgNode(
- isSgPointerType(outArray->get_type())->get_base_type()))) {
- // v.type = ((array_type *)(((ptr_type *)(outArray->type()))->ref_type()))->elem_type();
- SgType* t =
- isSgPointerType(outArray->get_type())->get_base_type();
- /* SgExprListExp* dimList = t->get_dim_info();
- SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
- SgExpression* expr=NULL;
- for (; j != dimList->get_expressions().end(); j++)
- expr = *j;
- */
- while (isSgArrayType(t))
- t = isSgArrayType(t)->get_base_type();
-
- if (!isSgType(t)) {
- char buf[1024];
- sprintf(buf, "CudaizeCodeGen: Array type undetected!");
- throw std::runtime_error(buf);
-
- }
-
- v.type = t;
- } else
- v.type = isSgPointerType(outArray->get_type())->get_base_type();
- } else if (isSgArrayType(outArray->get_type())) {
- if (isSgArrayType(
- isSgNode(
- isSgArrayType(outArray->get_type())->get_base_type()))) {
- // v.type = ((array_type *)(((ptr_type *)(outArray->type()))->ref_type()))->elem_type();
- SgType* t =
- isSgArrayType(outArray->get_type())->get_base_type();
- /* SgExprListExp* dimList = t->get_dim_info();
- SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
- SgExpression* expr=NULL;
- for (; j != dimList->get_expressions().end(); j++)
- expr = *j;
- */
- while (isSgArrayType(t))
- t = isSgArrayType(t)->get_base_type();
-
- if (!isSgType(t)) {
- char buf[1024];
- sprintf(buf, "CudaizeCodeGen: Array type undetected!");
- throw std::runtime_error(buf);
-
- }
-
- v.type = t;
- } else
- v.type = isSgArrayType(outArray->get_type())->get_base_type();
- } else
- v.type = buildFloatType();
- v.tex_mapped = false;
- v.cons_mapped = false;
- v.original_name = wo_refs[i]->name();
- //Size of the array = dim1 * dim2 * num bytes of our array type
-
- //If our input array is 2D (non-linearized), we want the actual
- //dimentions of the array
- CG_outputRepr* size;
- //Lookup in array_dims
- std::map<std::string, int>::iterator it = array_dims.find(name.c_str());
- if (isSgPointerType(outArray->get_type())
- && isSgArrayType(
- isSgNode(
- isSgPointerType(outArray->get_type())->get_base_type()))) {
- SgType* t = isSgPointerType(outArray->get_type())->get_base_type();
- /* SgExprListExp* dimList = t->get_dim_info();
- SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
- SgExpression* expr=NULL;
- for (; j != dimList->get_expressions().end(); j++)
- expr = *j;
- */
- if (isSgIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedLongVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgAddOp(isSgArrayType(t)->get_index())) {
- SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
-
- SgExpression *lhs = op_add->get_lhs_operand();
- SgExpression *rhs = op_add->get_rhs_operand();
-
- if (isSgIntVal(lhs))
- size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value());
- else if (isSgUnsignedIntVal(lhs))
- size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value()
- + (int) isSgUnsignedIntVal(rhs)->get_value();
- else if (isSgUnsignedLongVal(lhs))
- size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongLongIntVal(lhs))
- size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- size_n_d = (int) (isSgLongIntVal(lhs)->get_value()
- + isSgLongIntVal(rhs)->get_value());
- else if (isSgUnsignedLongLongIntVal(lhs))
- size_n_d =
- (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongLongIntVal(rhs)->get_value());
-
- }
- t = isSgArrayType(t)->get_base_type();
- while (isSgArrayType(t)) {
- int dim;
- if (isSgIntVal(isSgArrayType(t)->get_index()))
- dim =
- (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedLongVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgAddOp(isSgArrayType(t)->get_index())) {
- SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
-
- SgExpression *lhs = op_add->get_lhs_operand();
- SgExpression *rhs = op_add->get_rhs_operand();
-
- if (isSgIntVal(lhs))
- dim = (int) isSgIntVal(lhs)->get_value()
- + (int) (isSgIntVal(rhs)->get_value());
- else if (isSgUnsignedIntVal(lhs))
- dim = (int) isSgUnsignedIntVal(lhs)->get_value()
- + (int) isSgUnsignedIntVal(rhs)->get_value();
- else if (isSgUnsignedLongVal(lhs))
- dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongLongIntVal(lhs))
- dim = (int) (isSgLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- dim = (int) (isSgLongIntVal(lhs)->get_value()
- + isSgLongIntVal(rhs)->get_value());
- else if (isSgUnsignedLongLongIntVal(lhs))
- dim =
- (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongLongIntVal(rhs)->get_value());
-
- }
- size_n_d *= dim;
- v.size_multi_dim.push_back(dim);
- t = isSgArrayType(t)->get_base_type();
- }
- //v.size_2d = (int) (isSgIntVal(t->get_index())->get_value());
-
- if (cudaDebug)
- printf("Detected Multi-dimensional array sized of %d for %s\n",
- size_n_d, (char*) wo_refs[i]->name().c_str());
- size = ocg->CreateInt(size_n_d);
- } else if (isSgArrayType(outArray->get_type())
- && isSgArrayType(
- isSgNode(
- isSgArrayType(outArray->get_type())->get_base_type()))) {
- SgType* t = outArray->get_type();
- /* SgExprListExp* dimList = t->get_dim_info();
- SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
- SgExpression* expr=NULL;
- for (; j != dimList->get_expressions().end(); j++)
- expr = *j;
- */
-
- if (isSgIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedLongVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgAddOp(isSgArrayType(t)->get_index())) {
- SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
-
- SgExpression *lhs = op_add->get_lhs_operand();
- SgExpression *rhs = op_add->get_rhs_operand();
-
- if (isSgIntVal(lhs))
- size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value());
- else if (isSgUnsignedIntVal(lhs))
- size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value()
- + (int) isSgUnsignedIntVal(rhs)->get_value();
- else if (isSgUnsignedLongVal(lhs))
- size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongLongIntVal(lhs))
- size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- size_n_d = (int) (isSgLongIntVal(lhs)->get_value()
- + isSgLongIntVal(rhs)->get_value());
- else if (isSgUnsignedLongLongIntVal(lhs))
- size_n_d =
- (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongLongIntVal(rhs)->get_value());
-
- }
- t = isSgArrayType(t)->get_base_type();
- while (isSgArrayType(t)) {
- int dim;
- if (isSgIntVal(isSgArrayType(t)->get_index()))
- dim =
- (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedLongVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgAddOp(isSgArrayType(t)->get_index())) {
- SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
-
- SgExpression *lhs = op_add->get_lhs_operand();
- SgExpression *rhs = op_add->get_rhs_operand();
-
- if (isSgIntVal(lhs))
- dim = (int) isSgIntVal(lhs)->get_value()
- + (int) (isSgIntVal(rhs)->get_value());
- else if (isSgUnsignedIntVal(lhs))
- dim = (int) isSgUnsignedIntVal(lhs)->get_value()
- + (int) isSgUnsignedIntVal(rhs)->get_value();
- else if (isSgUnsignedLongVal(lhs))
- dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongLongIntVal(lhs))
- dim = (int) (isSgLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- dim = (int) (isSgLongIntVal(lhs)->get_value()
- + isSgLongIntVal(rhs)->get_value());
- else if (isSgUnsignedLongLongIntVal(lhs))
- dim =
- (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongLongIntVal(rhs)->get_value());
-
- }
- size_n_d *= dim;
- v.size_multi_dim.push_back(dim);
- t = isSgArrayType(t)->get_base_type();
- }
-
- //v.size_2d = (int) (isSgIntVal(t->get_index())->get_value());
-
- if (cudaDebug)
- printf("Detected Multi-Dimensional array sized of %d for %s\n",
- size_n_d, (char*) wo_refs[i]->name().c_str());
- size = ocg->CreateInt(size_n_d);
- } else if (it != array_dims.end()) {
- int ref_size = it->second;
- //size =
- // ocg->CreateInt(
- // isSgIntVal(
- // isSgArrayType(outArray->get_type())->get_index())->get_value());
- //v.size_2d = isSgArrayType(outArray->get_type())->get_rank();
- //v.var_ref_size = ref_size;
- size = ocg->CreateInt(ref_size);
-
- } else {
- if (dim1) {
- size = ocg->CreateTimes(
- new CG_roseRepr(isSgExpression(buildVarRefExp(dim1))),
- new CG_roseRepr(isSgExpression(buildVarRefExp(dim2))));
- } else {
- char buf[1024];
- sprintf(buf,
- "CudaizeCodeGen: Array reference %s does not have a "
- "detectable size or specififed dimentions",
- name.c_str());
- throw std::runtime_error(buf);
- }
- }
-
- v.size_expr =
- static_cast<CG_roseRepr*>(ocg->CreateTimes(size,
- new omega::CG_roseRepr(
- isSgExpression(buildSizeOfOp(v.type)))))->GetExpression();
-
- v.in_data = 0;
- v.out_data = outArray;
- //Check for in ro_refs and remove it at this point
- std::vector<IR_ArrayRef *>::iterator it_;
- for (it_ = ro_refs.begin(); it_ != ro_refs.end(); it_++) {
- if ((*it_)->name() == wo_refs[i]->name()) {
- break;
- }
- }
- if (it_ != ro_refs.end()) {
- v.in_data = outArray;
- ro_refs.erase(it_);
- }
-
- arrayVars.push_back(v);
-
- }
-
- //protonu-- assuming that all texture mapped memories were originally read only mems
- //there should be safety checks for that, will implement those later
-
- for (int i = 0; i < ro_refs.size(); i++) {
- SgVariableSymbol* inArray = 0;
- std::string name = ro_refs[i]->name();
- inArray = body_symtab->find_variable(SgName((char*) name.c_str()));
- if (inArray == NULL)
- inArray = parameter_symtab->find_variable(
- SgName((char*) name.c_str()));
-
- VarDefs v;
- v.size_multi_dim = std::vector<int>();
- char buf[32];
- snprintf(buf, 32, "devI%dPtr", i + 1);
- v.name = buf;
- int size_n_d;
- if (isSgPointerType(inArray->get_type())) {
- if (isSgArrayType(
- isSgNode(
- isSgPointerType(inArray->get_type())->get_base_type()))) {
-
- SgType* t =
- isSgPointerType(inArray->get_type())->get_base_type();
-
- while (isSgArrayType(t))
- t = isSgArrayType(t)->get_base_type();
-
- if (!isSgType(t)) {
- char buf[1024];
- sprintf(buf, "CudaizeCodeGen: Array type undetected!");
- throw std::runtime_error(buf);
-
- }
- v.type = t;
- } else
- v.type = isSgPointerType(inArray->get_type())->get_base_type();
- } else if (isSgArrayType(inArray->get_type())) {
- if (isSgArrayType(
- isSgNode(
- isSgArrayType(inArray->get_type())->get_base_type()))) {
-
- SgType* t = inArray->get_type();
- while (isSgArrayType(t))
- t = isSgArrayType(t)->get_base_type();
-
- if (!isSgType(t)) {
- char buf[1024];
- sprintf(buf, "CudaizeCodeGen: Array type undetected!");
- throw std::runtime_error(buf);
-
- }
- v.type = t;
- } else
- v.type = isSgArrayType(inArray->get_type())->get_base_type();
- }
-
- else
- v.type = buildFloatType();
-
- v.tex_mapped = false;
- v.cons_mapped = false;
- v.original_name = ro_refs[i]->name();
-
- //derick -- adding texture and constant mapping
- if ( texture != NULL)
- v.tex_mapped = (texture->is_array_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars
- if (v.tex_mapped){
- printf("this variable %s is mapped to texture memory", name.c_str());
- }
- //derick -- this is commented out until constant memory is implemeted
- if ( constant_mem != NULL)
- v.cons_mapped = (constant_mem->is_array_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars
- if (v.cons_mapped){
- printf("this variable %s is mapped to constant memory", name.c_str());
- }
-
- //Size of the array = dim1 * dim2 * num bytes of our array type
- //If our input array is 2D (non-linearized), we want the actual
- //dimentions of the array (as it might be less than cu_n
- CG_outputRepr* size;
- //Lookup in array_dims
- std::map<std::string, int>::iterator it = array_dims.find(name.c_str());
- if (isSgPointerType(inArray->get_type())
- && isSgArrayType(
- isSgPointerType(inArray->get_type())->get_base_type())) {
- //SgArrayType* t = isSgArrayType(isSgArrayType(inArray->get_type())->get_base_type());
- //v.size_2d = t->get_rank();
- SgType* t = isSgPointerType(inArray->get_type())->get_base_type();
- /* SgExprListExp* dimList = t->get_dim_info();
- SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
- SgExpression* expr=NULL;
- for (; j != dimList->get_expressions().end(); j++)
- expr = *j;
- */
- //v.size_2d = 1;
- if (isSgIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedLongVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgAddOp(isSgArrayType(t)->get_index())) {
- SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
-
- SgExpression *lhs = op_add->get_lhs_operand();
- SgExpression *rhs = op_add->get_rhs_operand();
-
- if (isSgIntVal(lhs))
- size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value());
- else if (isSgUnsignedIntVal(lhs))
- size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value()
- + (int) isSgUnsignedIntVal(rhs)->get_value();
- else if (isSgUnsignedLongVal(lhs))
- size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongLongIntVal(lhs))
- size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- size_n_d = (int) (isSgLongIntVal(lhs)->get_value()
- + isSgLongIntVal(rhs)->get_value());
- else if (isSgUnsignedLongLongIntVal(lhs))
- size_n_d =
- (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongLongIntVal(rhs)->get_value());
-
- }
- t = isSgArrayType(t)->get_base_type();
- while (isSgArrayType(t)) {
- int dim;
- if (isSgIntVal(isSgArrayType(t)->get_index()))
- dim =
- (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedLongVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgAddOp(isSgArrayType(t)->get_index())) {
- SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
-
- SgExpression *lhs = op_add->get_lhs_operand();
- SgExpression *rhs = op_add->get_rhs_operand();
-
- if (isSgIntVal(lhs))
- dim = (int) isSgIntVal(lhs)->get_value()
- + (int) (isSgIntVal(rhs)->get_value());
- else if (isSgUnsignedIntVal(lhs))
- dim = (int) isSgUnsignedIntVal(lhs)->get_value()
- + (int) isSgUnsignedIntVal(rhs)->get_value();
- else if (isSgUnsignedLongVal(lhs))
- dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongLongIntVal(lhs))
- dim = (int) (isSgLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- dim = (int) (isSgLongIntVal(lhs)->get_value()
- + isSgLongIntVal(rhs)->get_value());
- else if (isSgUnsignedLongLongIntVal(lhs))
- dim =
- (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongLongIntVal(rhs)->get_value());
-
- }
- size_n_d *= dim;
- v.size_multi_dim.push_back(dim);
- t = isSgArrayType(t)->get_base_type();
- }
- if (cudaDebug)
- printf("Detected Multi-dimensional array sized of %d for %s\n",
- size_n_d, (char*) ro_refs[i]->name().c_str());
- size = ocg->CreateInt(size_n_d);
- } else if (isSgArrayType(inArray->get_type())
- && isSgArrayType(
- isSgArrayType(inArray->get_type())->get_base_type())) {
- //SgArrayType* t = isSgArrayType(isSgArrayType(inArray->get_type())->get_base_type());
- //v.size_2d = t->get_rank();
- SgType* t = inArray->get_type();
- /* SgExprListExp* dimList = t->get_dim_info();
- SgExpressionPtrList::iterator j= dimList->get_expressions().begin();
- SgExpression* expr=NULL;
- for (; j != dimList->get_expressions().end(); j++)
- expr = *j;
- */
-
- if (isSgIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedLongVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d =
- (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index()))
- size_n_d = (int) (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgAddOp(isSgArrayType(t)->get_index())) {
- SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
-
- SgExpression *lhs = op_add->get_lhs_operand();
- SgExpression *rhs = op_add->get_rhs_operand();
-
- if (isSgIntVal(lhs))
- size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value());
- else if (isSgUnsignedIntVal(lhs))
- size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value()
- + (int) isSgUnsignedIntVal(rhs)->get_value();
- else if (isSgUnsignedLongVal(lhs))
- size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongLongIntVal(lhs))
- size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- size_n_d = (int) (isSgLongIntVal(lhs)->get_value()
- + isSgLongIntVal(rhs)->get_value());
- else if (isSgUnsignedLongLongIntVal(lhs))
- size_n_d =
- (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongLongIntVal(rhs)->get_value());
-
- }
- t = isSgArrayType(t)->get_base_type();
- while (isSgArrayType(t)) {
- int dim;
- if (isSgIntVal(isSgArrayType(t)->get_index()))
- dim =
- (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedLongVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgLongIntVal(isSgArrayType(t)->get_index()))
- dim = (int) (isSgLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index()))
- dim = (int) (isSgUnsignedLongLongIntVal(
- isSgArrayType(t)->get_index())->get_value());
- else if (isSgAddOp(isSgArrayType(t)->get_index())) {
- SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index());
-
- SgExpression *lhs = op_add->get_lhs_operand();
- SgExpression *rhs = op_add->get_rhs_operand();
-
- if (isSgIntVal(lhs))
- dim = (int) isSgIntVal(lhs)->get_value()
- + (int) (isSgIntVal(rhs)->get_value());
- else if (isSgUnsignedIntVal(lhs))
- dim = (int) isSgUnsignedIntVal(lhs)->get_value()
- + (int) isSgUnsignedIntVal(rhs)->get_value();
- else if (isSgUnsignedLongVal(lhs))
- dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- dim = (int) (isSgUnsignedLongVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongLongIntVal(lhs))
- dim = (int) (isSgLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongVal(rhs)->get_value());
- else if (isSgLongIntVal(lhs))
- dim = (int) (isSgLongIntVal(lhs)->get_value()
- + isSgLongIntVal(rhs)->get_value());
- else if (isSgUnsignedLongLongIntVal(lhs))
- dim =
- (int) (isSgUnsignedLongLongIntVal(lhs)->get_value()
- + isSgUnsignedLongLongIntVal(rhs)->get_value());
-
- }
- size_n_d *= dim;
- v.size_multi_dim.push_back(dim);
- t = isSgArrayType(t)->get_base_type();
- }
- if (cudaDebug)
- printf("Detected Multi-Dimensional array sized of %d for %s\n",
- size_n_d, (char*) ro_refs[i]->name().c_str());
- size = ocg->CreateInt(size_n_d);
- }
-
- else if (it != array_dims.end()) {
- int ref_size = it->second;
- // v.var_ref_size = ref_size;
- size = ocg->CreateInt(ref_size);
- } else {
- if (dim1) {
- size = ocg->CreateTimes(
- new CG_roseRepr(isSgExpression(buildVarRefExp(dim1))),
- new CG_roseRepr(isSgExpression(buildVarRefExp(dim2))));
- } else {
- char buf[1024];
- sprintf(buf,
- "CudaizeCodeGen: Array reference %s does not have a "
- "detectable size or specififed dimentions",
- name.c_str());
- throw std::runtime_error(buf);
- }
- }
- v.size_expr =
- static_cast<CG_roseRepr*>(ocg->CreateTimes(size,
- new omega::CG_roseRepr(
- isSgExpression(buildSizeOfOp(v.type)))))->GetExpression();
-
- v.in_data = inArray;
- v.out_data = 0;
- arrayVars.push_back(v);
- }
-
- if (arrayVars.size() < 2) {
- fprintf(stderr,
- "cudaize error: Did not find two arrays being accessed\n");
- return false;
- }
-
- //protonu--debugging tool--the printf statement
- //tex_mem_on signals use of tex mem
- /* derick -- texmapping near malloc mcopy
- for(int i=0; i<arrayVars.size(); i++)
- {
- //printf("var name %s, tex_mem used %s\n", arrayVars[i].name.c_str(), (arrayVars[i].tex_mapped)?"true":"false");
- if (arrayVars[i].tex_mapped ) tex_mem_on ++;
- //if (arrayVars[i].cons_mapped ) cons_mem_on ++;
- }
- */
-
- //Add our mallocs (and input array memcpys)
- for (int i = 0; i < arrayVars.size(); i++) {
- if(arrayVars[i].cons_mapped) {
- setupConstantVar(constant_mem, &arrayVars[i], globals, i, symtab);
- SgStatementPtrList *tnl = new SgStatementPtrList;
- cudaBindConstantVar(constant_mem, &arrayVars[i], globals, tnl);
- setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl));
- }
- else {
- SgVariableDeclaration* defn = buildVariableDeclaration(
- SgName(arrayVars[i].name.c_str()),
- buildPointerType(arrayVars[i].type));
- SgInitializedNamePtrList& variables = defn->get_variables();
- SgInitializedNamePtrList::const_iterator j = variables.begin();
- SgInitializedName* initializedName = *j;
- SgVariableSymbol* dvs = new SgVariableSymbol(initializedName);
- prependStatement(defn, func_body);
-
- dvs->set_parent(body_symtab);
- body_symtab->insert(SgName(arrayVars[i].name.c_str()), dvs);
-
-// SgVariableSymbol* dvs = body_symtab->find_variable(SgName(arrayVars[i].name.c_str()));
-
- // if(dvs == NULL)
- // dvs = parameter_symtab->find_variable(SgName(arrayVars[i].name.c_str()));
-
- //cudaMalloc args
- // SgBasicBlock* block = buildBasicBlock();
- SgName name_cuda_malloc("cudaMalloc");
- SgFunctionDeclaration * decl_cuda_malloc =
- buildNondefiningFunctionDeclaration(name_cuda_malloc,
- buildVoidType(), buildFunctionParameterList(), globals);
-
- SgName name_cuda_copy("cudaMemcpy");
- SgFunctionDeclaration * decl_cuda_copy =
- buildNondefiningFunctionDeclaration(name_cuda_copy,
- buildVoidType(), buildFunctionParameterList(), globals);
-
- SgExprListExp* args = buildExprListExp();
- args->append_expression(
- buildCastExp(buildAddressOfOp(buildVarRefExp(dvs)),
- buildPointerType(buildPointerType(buildVoidType()))));
- args->append_expression(arrayVars[i].size_expr);
-
-// decl_cuda_malloc->get_parameterList()->append_arg
- SgFunctionCallExp *the_call = buildFunctionCallExp(
- buildFunctionRefExp(decl_cuda_malloc), args);
-
- SgExprStatement* stmt = buildExprStatement(the_call);
-
- // (*replacement_list).push_back (stmt);
-
- SgStatementPtrList* tnl = new SgStatementPtrList;
- (*tnl).push_back(stmt);
- setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl));
- if (arrayVars[i].in_data) {
-
- SgExprListExp * cuda_copy_in_args = buildExprListExp();
- cuda_copy_in_args->append_expression(
- isSgExpression(buildVarRefExp(dvs)));
- cuda_copy_in_args->append_expression(
- isSgExpression(buildVarRefExp(arrayVars[i].in_data)));
- CG_roseRepr* size_exp = new CG_roseRepr(arrayVars[i].size_expr);
- cuda_copy_in_args->append_expression(
- static_cast<CG_roseRepr*>(size_exp->clone())->GetExpression());
- cuda_copy_in_args->append_expression(
- buildOpaqueVarRefExp("cudaMemcpyHostToDevice", globals));
-
-// cuda_copy_in_args->append_expression(
-// new SgVarRefExp(sourceLocation, )
-// );
- SgFunctionCallExp * cuda_copy_in_func_call = buildFunctionCallExp(
- buildFunctionRefExp(decl_cuda_copy), cuda_copy_in_args);
-
- SgExprStatement* stmt = buildExprStatement(cuda_copy_in_func_call);
-
- SgStatementPtrList *tnl = new SgStatementPtrList;
- (*tnl).push_back(stmt);
- setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl));
-
- if(arrayVars[i].tex_mapped) {
- setupTexmappingVar(texture, &arrayVars[i], globals, i, dvs, symtab);
- SgStatementPtrList *tnl = new SgStatementPtrList;
- cudaBindTexture(texture, &arrayVars[i], globals, tnl);
- setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl));
- }
- }
- }
- }
-
- //Build dimGrid dim3 variables based on loop dimentions and ti/tj
- char blockD1[120];
- char blockD2[120];
- if (dim1) {
- snprintf(blockD1, 120, "%s/%d",
- dim1->get_declaration()->get_name().getString().c_str(), cu_tx);
- snprintf(blockD2, 120, "%s/%d",
- dim2->get_declaration()->get_name().getString().c_str(), cu_ty);
- } else {
- snprintf(blockD1, 120, "%d", cu_bx);
- snprintf(blockD2, 120, "%d", cu_by);
- //snprintf(blockD1, 120, "%d/%d", cu_nx, cu_tx);
- //snprintf(blockD2, 120, "%d/%d", cu_ny, cu_ty);
- }
-
- SgInitializedName* arg1 = buildInitializedName("i", buildIntType());
- SgInitializedName* arg2 = buildInitializedName("j", buildIntType());
- SgInitializedName* arg3 = buildInitializedName("k", buildIntType());
- SgName type_name("dim3");
- //SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(type_name);
-
- //ROSE_ASSERT(type_symbol != NULL);
-
- //SgClassDeclaration * dim3classdecl = isSgClassDeclaration(
- // type_symbol->get_declaration());
-
- SgFunctionDeclaration * funcdecl = buildNondefiningFunctionDeclaration(
- SgName("dim3"), buildOpaqueType("dim3", globalScope),
- //isSgType(dim3classdecl->get_type()),
- buildFunctionParameterList(arg1, arg2, arg3), globalScope);
-
- if (cu_bx && cu_by)
- repr = ocg->CreateDim3((const char*) gridName, ocg->CreateInt(cu_bx),
- ocg->CreateInt(cu_by));
- else if (cu_bx_repr && cu_by_repr)
- repr = ocg->CreateDim3((const char*) gridName, cu_bx_repr, cu_by_repr);
- else if (cu_bx_repr)
- repr = ocg->CreateDim3((const char*) gridName, cu_bx_repr,
- ocg->CreateInt(1));
- setup_code = ocg->StmtListAppend(setup_code, repr);
- //SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList();
-
- //for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++)
- // (*replacement_list).push_back (*it);
-
- // repr = ocg->CreateDim3((const char*)blockName, cu_tx,cu_ty);
-
- if (cu_tz > 1 || cu_tz_repr) {
-
- if (cu_tx && cu_ty && cu_tz)
- repr = ocg->CreateDim3((char*) blockName, ocg->CreateInt(cu_tx),
- ocg->CreateInt(cu_ty), ocg->CreateInt(cu_tz));
- else if (cu_tx_repr && cu_ty_repr && cu_tz_repr)
- repr = ocg->CreateDim3((char*) blockName, cu_tx_repr, cu_ty_repr,
- cu_tz_repr);
- // SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList();
-
- // for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++)
- // (*replacement_list).push_back (*it);
-
- } else {
- if (cu_tx && cu_ty)
- repr = ocg->CreateDim3((char*) blockName, ocg->CreateInt(cu_tx),
- ocg->CreateInt(cu_ty));
- else if (cu_tx_repr && cu_ty_repr)
- repr = ocg->CreateDim3((char*) blockName, cu_tx_repr, cu_ty_repr);
- //SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList();
-
- //for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++)
- // (*replacement_list).push_back (*it);
-
- }
-
- setup_code = ocg->StmtListAppend(setup_code, repr);
-
- SgCudaKernelExecConfig* config = new SgCudaKernelExecConfig(
- buildVarRefExp(gridName), buildVarRefExp(blockName), NULL, NULL);
- //SgCudaKernelExecConfig* config = new SgCudaKernelExecConfig(buildIntVal(cu_bx), , NULL, NULL);
- SgExprListExp* iml = new SgExprListExp();
- SgCastExp* dim_s;
-
- //Creating Kernel function
- SgBasicBlock* bb = new SgBasicBlock(TRANSFORMATION_FILE_INFO);
- SgFunctionDefinition* kernel_defn = new SgFunctionDefinition(
- TRANSFORMATION_FILE_INFO, bb);
- SgFunctionDeclaration* kernel_decl_ = new SgFunctionDeclaration(
- TRANSFORMATION_FILE_INFO, SgName((char*)cu_kernel_name.c_str()),buildFunctionType(buildVoidType(), buildFunctionParameterList()), kernel_defn);
- SgFunctionDeclaration* kernel_decl = new SgFunctionDeclaration(
- TRANSFORMATION_FILE_INFO, SgName((char*)cu_kernel_name.c_str()),buildFunctionType(buildVoidType(), buildFunctionParameterList()), kernel_defn);
-
- //((kernel_decl->get_declarationModifier()).get_storageModifier()).setStatic();
-
- kernel_decl->set_definingDeclaration(kernel_decl);
- kernel_defn->set_parent(kernel_decl);
- bb->set_parent(kernel_defn);
- bb->set_endOfConstruct(TRANSFORMATION_FILE_INFO);
- bb->get_endOfConstruct()->set_parent(bb);
-
- //SgFunctionSymbol* functionSymbol = new SgFunctionSymbol(kernel_decl_);
- //globals->insert_symbol(SgName((char*) cu_kernel_name.c_str()),
- // functionSymbol);
- SgFunctionSymbol* functionSymbol2 = new SgFunctionSymbol(kernel_decl);
-
- globals->insert_symbol(SgName((char*) cu_kernel_name.c_str()),
- functionSymbol2);
-
- kernel_decl_->set_parent(globals);
-
- kernel_decl_->set_scope(globals);
-
- kernel_decl_->setForward();
-
- globals->prepend_declaration(kernel_decl_);
-
- kernel_decl->set_endOfConstruct(TRANSFORMATION_FILE_INFO);
- kernel_decl->get_endOfConstruct()->set_parent(kernel_decl);
-
- kernel_decl->set_parent(globals);
- kernel_decl->set_scope(globals);
-
- kernel_decl->get_definition()->set_endOfConstruct(TRANSFORMATION_FILE_INFO);
- kernel_decl->get_definition()->get_endOfConstruct()->set_parent(
- kernel_decl->get_definition());
-
- globals->append_statement(kernel_decl);
-
- //printf("%s %s\n", static_cast<const char*>(cu_kernel_name), dims);
- //--derick - kernel function parameters
- for (int i = 0; i < arrayVars.size(); i++)
- //Throw in a type cast if our kernel takes 2D array notation
- //like (float(*) [1024])
- {
- //protonu--throwing in another hack to stop the caller from passing tex mapped
- //vars to the kernel.
- if (arrayVars[i].tex_mapped == true || arrayVars[i].cons_mapped)
- continue;
- if (!(arrayVars[i].size_multi_dim.empty())) {
- //snprintf(dims,120,"(float(*) [%d])%s", arrayVars[i].size_2d,
- // const_cast<char*>(arrayVars[i].name.c_str()));
-
- SgType* t = arrayVars[i].type;
- for (int k = arrayVars[i].size_multi_dim.size() - 1; k >= 0; k--) {
- t = buildArrayType(t,
- buildIntVal(arrayVars[i].size_multi_dim[k]));
- }
- SgVariableSymbol* temp = body_symtab->find_variable(
- SgName((char*) arrayVars[i].name.c_str()));
- if (temp == NULL)
- temp = parameter_symtab->find_variable(
- SgName((char*) arrayVars[i].name.c_str()));
-
- dim_s = buildCastExp(buildVarRefExp(temp), buildPointerType(t),
- SgCastExp::e_C_style_cast);
-
- //printf("%d %s\n", i, dims);
- iml->append_expression(dim_s);
-
- SgInitializedName* id = buildInitializedName(
- (char*) arrayVars[i].original_name.c_str(),
- buildPointerType(t));
- kernel_decl->get_parameterList()->append_arg(id);
- kernel_decl_->get_parameterList()->append_arg(id);
- id->set_file_info(TRANSFORMATION_FILE_INFO);
-
- // DQ (9/8/2007): We now test this, so it has to be set explicitly.
- id->set_scope(kernel_decl->get_definition());
-
- // DQ (9/8/2007): Need to add variable symbol to global scope!
- //printf ("Fixing up the symbol table in scope = %p = %s for SgInitializedName = %p = %s \n",globalScope,globalScope->class_name().c_str(),var1_init_name,var1_init_name->get_name().str());
- SgVariableSymbol *var_symbol = new SgVariableSymbol(id);
- kernel_decl->get_definition()->insert_symbol(id->get_name(),
- var_symbol);
-
- // if(kernel_decl->get_definition()->get_symbol_table()->find((const) id) == NULL)
-
- } else {
- //printf("%d %s\n", i, static_cast<const char*>(arrayVars[i].name));
- SgVariableSymbol* temp = body_symtab->find_variable(
- SgName((char*) arrayVars[i].name.c_str()));
- if (temp == NULL)
- temp = parameter_symtab->find_variable(
- SgName((char*) arrayVars[i].name.c_str()));
- iml->append_expression(buildVarRefExp(temp));
- SgInitializedName* id = buildInitializedName(
- (char*) arrayVars[i].original_name.c_str(),
- buildPointerType(arrayVars[i].type));
- kernel_decl->get_parameterList()->append_arg(id);
- kernel_decl_->get_parameterList()->append_arg(id);
- id->set_file_info(TRANSFORMATION_FILE_INFO);
-
- // DQ (9/8/2007): We now test this, so it has to be set explicitly.
- id->set_scope(kernel_decl->get_definition());
-
- // DQ (9/8/2007): Need to add variable symbol to global scope!
- //printf ("Fixing up the symbol table in scope = %p = %s for SgInitializedName = %p = %s \n"$
- SgVariableSymbol *var_symbol = new SgVariableSymbol(id);
- kernel_decl->get_definition()->insert_symbol(id->get_name(),
- var_symbol);
-
- }
-
- }
- if (dim1) {
- iml->append_expression(buildVarRefExp(dim1));
- SgInitializedName* id = buildInitializedName(
- dim1->get_name().getString().c_str(), dim1->get_type());
- kernel_decl->get_parameterList()->append_arg(id);
-
- iml->append_expression(buildVarRefExp(dim2));
- SgInitializedName* id2 = buildInitializedName(
- dim2->get_name().getString().c_str(), dim2->get_type());
-
- kernel_decl->get_parameterList()->append_arg(id);
- kernel_decl_->get_parameterList()->append_arg(id);
- }
-
- kernel_decl->get_functionModifier().setCudaKernel();
- kernel_decl_->get_functionModifier().setCudaKernel();
- SgCudaKernelCallExp * cuda_call_site = new SgCudaKernelCallExp(
- TRANSFORMATION_FILE_INFO, buildFunctionRefExp(kernel_decl), iml,config);
-
- // SgStatementPtrList *tnl2 = new SgStatementPtrList;
-
- (*replacement_list).push_back(buildExprStatement(cuda_call_site));
-
- setup_code = ocg->StmtListAppend(setup_code,
- new CG_roseRepr(replacement_list));
-
- //cuda free variables
- for (int i = 0; i < arrayVars.size(); i++) {
- if (arrayVars[i].out_data) {
-
- SgName name_cuda_copy("cudaMemcpy");
- SgFunctionDeclaration * decl_cuda_copyout =
- buildNondefiningFunctionDeclaration(name_cuda_copy,
- buildVoidType(), buildFunctionParameterList(),
- globals);
-
- SgExprListExp* args = buildExprListExp();
- SgExprListExp * cuda_copy_out_args = buildExprListExp();
- cuda_copy_out_args->append_expression(
- isSgExpression(buildVarRefExp(arrayVars[i].out_data)));
- cuda_copy_out_args->append_expression(
- isSgExpression(buildVarRefExp(arrayVars[i].name)));
- CG_roseRepr* size_exp = new CG_roseRepr(arrayVars[i].size_expr);
- cuda_copy_out_args->append_expression(
- static_cast<CG_roseRepr*>(size_exp->clone())->GetExpression());
- cuda_copy_out_args->append_expression(
- buildOpaqueVarRefExp("cudaMemcpyDeviceToHost", globals));
-
-// cuda_copy_in_args->append_expression(
-// new SgVarRefExp(sourceLocation, )
-// );
- SgFunctionCallExp * cuda_copy_out_func_call = buildFunctionCallExp(
- buildFunctionRefExp(decl_cuda_copyout), cuda_copy_out_args);
-
- SgFunctionCallExp *the_call = buildFunctionCallExp(
- buildFunctionRefExp(decl_cuda_copyout), cuda_copy_out_args);
-
- SgExprStatement* stmt = buildExprStatement(the_call);
-
- SgStatementPtrList* tnl3 = new SgStatementPtrList;
-
- (*tnl3).push_back(stmt);
-
- // tree_node_list* tnl = new tree_node_list;
- // tnl->append(new tree_instr(the_call));
- setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl3));
-
- }
- if(!arrayVars[i].cons_mapped) {
- SgName name_cuda_free("cudaFree");
- SgFunctionDeclaration * decl_cuda_free =
- buildNondefiningFunctionDeclaration(name_cuda_free,
- buildVoidType(), buildFunctionParameterList(), globals);
-
- SgExprListExp* args3 = buildExprListExp();
-
- SgVariableSymbol* tmp = body_symtab->find_variable(
- SgName(arrayVars[i].name.c_str()));
- if (tmp == NULL)
- tmp = parameter_symtab->find_variable(
- SgName(arrayVars[i].name.c_str()));
-
- args3->append_expression(buildVarRefExp(tmp));
-
- SgFunctionCallExp *the_call2 = buildFunctionCallExp(
- buildFunctionRefExp(decl_cuda_free), args3);
-
- SgExprStatement* stmt2 = buildExprStatement(the_call2);
-
- SgStatementPtrList* tnl4 = new SgStatementPtrList;
-
- (*tnl4).push_back(stmt2);
- //(*replacement_list).push_back (stmt2);
-
- setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl4));
- }
- }
-
- // ---------------
- // BUILD THE KERNEL
- // ---------------
-
- //Extract out kernel body
- SgNode* code = getCode();
- //Create kernel function body
- //Add Params
- std::map<std::string, SgVariableSymbol*> loop_vars;
- //In-Out arrays
- for (int i = 0; i < arrayVars.size(); i++) {
- /* if(arrayVars[i].in_data)
- fptr = arrayVars[i].in_data->type()->clone();
- else
- fptr = arrayVars[i].out_data->type()->clone();
- */
-
- // fptr = new_proc_syms->install_type(fptr);
- std::string name =
- arrayVars[i].in_data ?
- arrayVars[i].in_data->get_declaration()->get_name().getString() :
- arrayVars[i].out_data->get_declaration()->get_name().getString();
- //SgVariableSymbol* sym = new var_sym(fptr, arrayVars[i].in_data ? arrayVars[i].in_data->name() : arrayVars[i].out_data->name());
-
- SgVariableSymbol* sym =
- kernel_decl->get_definition()->get_symbol_table()->find_variable(
- (const char*) name.c_str());
- /* SgVariableDeclaration* defn = buildVariableDeclaration(SgName(name.c_str()), buildFloatType());
- SgInitializedNamePtrList& variables = defn->get_variables();
- SgInitializedNamePtrList::const_iterator i = variables.begin();
- SgInitializedName* initializedName = *i;
- SgVariableSymbol* sym = new SgVariableSymbol(initializedName);
- prependStatement(defn, isSgScopeStatement(root_));
-
- vs->set_parent(symtab2_);
- symtab2_->insert(SgName(_s.c_str()), vs);
- */
-
- if (sym != NULL)
- loop_vars.insert(
- std::pair<std::string, SgVariableSymbol*>(std::string(name),
- sym));
- }
-
- //Figure out which loop variables will be our thread and block dimention variables
- std::vector<SgVariableSymbol *> loop_syms;
- //Get our indexes
- std::vector<const char*> indexes; // = get_loop_indexes(code,cu_num_reduce);
- int threadsPos = 0;
-
- CG_outputRepr *body = NULL;
- SgFunctionDefinition* func_d = func_definition;
- //std::vector<SgVariableSymbol *> symbols = recursiveFindRefs(code);
-
- SgName name_sync("__syncthreads");
- SgFunctionDeclaration * decl_sync = buildNondefiningFunctionDeclaration(
- name_sync, buildVoidType(), buildFunctionParameterList(),
- globalScope);
-
- recursiveFindRefs(code, syms, func_d);
-
- //SgFunctionDeclaration* func = Outliner::generateFunction (code, (char*)cu_kernel_name.c_str(), syms, pdSyms, psyms, NULL, globalScope);
-
- if (cu_bx > 1 || cu_bx_repr) {
- indexes.push_back("bx");
- SgName type_name("blockIdx.x");
- SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(
- type_name);
- SgVariableDeclaration * var_decl = buildVariableDeclaration("bx",
- buildIntType(), NULL,
- isSgScopeStatement(kernel_decl->get_definition()->get_body()));
- SgStatementPtrList *tnl = new SgStatementPtrList;
- // (*tnl).push_back(isSgStatement(var_decl));
- appendStatement(var_decl, kernel_decl->get_definition()->get_body());
-
- SgVariableSymbol* bx =
- kernel_decl->get_definition()->get_body()->lookup_variable_symbol(
- SgName("bx"));
- SgStatement* assign = isSgStatement(
- buildAssignStatement(buildVarRefExp(bx),
- buildOpaqueVarRefExp("blockIdx.x",
- kernel_decl->get_definition()->get_body())));
- (*tnl).push_back(assign);
- // body = ocg->StmtListAppend(body,
- // new CG_roseRepr(tnl));
- appendStatement(assign, kernel_decl->get_definition()->get_body());
-
- }
- if (cu_by > 1 || cu_by_repr) {
- indexes.push_back("by");
- SgName type_name("blockIdx.y");
- SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(
- type_name);
- SgVariableDeclaration * var_decl = buildVariableDeclaration("by",
- buildIntType(), NULL,
- isSgScopeStatement(kernel_decl->get_definition()->get_body()));
- // SgStatementPtrList *tnl = new SgStatementPtrList;
- // (*tnl).push_back(isSgStatement(var_decl));
- appendStatement(var_decl, kernel_decl->get_definition()->get_body());
-
- SgVariableSymbol* by =
- kernel_decl->get_definition()->get_body()->lookup_variable_symbol(
- SgName("by"));
- SgStatement* assign = isSgStatement(
- buildAssignStatement(buildVarRefExp(by),
- buildOpaqueVarRefExp("blockIdx.y",
- kernel_decl->get_definition()->get_body())));
- //(*tnl).push_back(assign);
- // body = ocg->StmtListAppend(body,
- // new CG_roseRepr(tnl));
- appendStatement(assign, kernel_decl->get_definition()->get_body());
-
- }
- if (cu_tx_repr || cu_tx > 1) {
- threadsPos = indexes.size();
- indexes.push_back("tx");
- SgName type_name("threadIdx.x");
- SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(
- type_name);
- SgVariableDeclaration * var_decl = buildVariableDeclaration("tx",
- buildIntType(), NULL,
- isSgScopeStatement(kernel_decl->get_definition()->get_body()));
- // SgStatementPtrList *tnl = new SgStatementPtrList;
- // (*tnl).push_back(isSgStatement(var_decl));
- appendStatement(var_decl, kernel_decl->get_definition()->get_body());
-
- SgVariableSymbol* tx =
- kernel_decl->get_definition()->get_body()->lookup_variable_symbol(
- SgName("tx"));
- SgStatement* assign = isSgStatement(
- buildAssignStatement(buildVarRefExp(tx),
- buildOpaqueVarRefExp("threadIdx.x",
- kernel_decl->get_definition()->get_body())));
- //(*tnl).push_back(assign);
- // body = ocg->StmtListAppend(body,
- // new CG_roseRepr(tnl));
- appendStatement(assign, kernel_decl->get_definition()->get_body());
-
- }
- if (cu_ty_repr || cu_ty > 1) {
- indexes.push_back("ty");
- SgName type_name("threadIdx.y");
- SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(
- type_name);
- SgVariableDeclaration * var_decl = buildVariableDeclaration("ty",
- buildIntType(), NULL,
- isSgScopeStatement(kernel_decl->get_definition()->get_body()));
- appendStatement(var_decl, kernel_decl->get_definition()->get_body());
-
- // SgStatementPtrList *tnl = new SgStatementPtrList;
- // (*tnl).push_back(isSgStatement(var_decl));
- SgVariableSymbol* ty =
- kernel_decl->get_definition()->get_body()->lookup_variable_symbol(
- SgName("ty"));
- SgStatement* assign = isSgStatement(
- buildAssignStatement(buildVarRefExp(ty),
- buildOpaqueVarRefExp("threadIdx.y",
- kernel_decl->get_definition()->get_body())));
- // (*tnl).push_back(assign);
- // body = ocg->StmtListAppend(body,
- // new CG_roseRepr(tnl));
- appendStatement(assign, kernel_decl->get_definition()->get_body());
-
- }
- if (cu_tz_repr || cu_tz > 1) {
- indexes.push_back("tz");
- SgName type_name("threadIdx.z");
- SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(
- type_name);
- SgVariableDeclaration * var_decl = buildVariableDeclaration("tz",
- buildIntType(), NULL,
- isSgScopeStatement(kernel_decl->get_definition()->get_body()));
- // SgStatementPtrList *tnl = new SgStatementPtrList;
- // (*tnl).push_back(isSgStatement(var_decl));
- appendStatement(var_decl, kernel_decl->get_definition()->get_body());
-
- SgVariableSymbol* tz =
- kernel_decl->get_definition()->get_body()->lookup_variable_symbol(
- "tz");
- SgStatement* assign = isSgStatement(
- buildAssignStatement(buildVarRefExp(tz),
- buildOpaqueVarRefExp("threadIdx.z",
- kernel_decl->get_definition()->get_body())));
- // (*tnl).push_back(assign);
- // body = ocg->StmtListAppend(body,
- // new CG_roseRepr(tnl));
- appendStatement(assign, kernel_decl->get_definition()->get_body());
-
- }
-
- std::map<std::string, SgVariableSymbol*> loop_idxs; //map from idx names to their new syms
-
- SgNode* swapped_ = swapVarReferences(code, syms,
- kernel_decl->get_definition()->get_symbol_table(),
- kernel_decl->get_definition()->get_body()->get_symbol_table(),
- kernel_decl->get_definition()->get_body());
-
- //std::cout << swapped_->unparseToString() << std::endl << std::endl;
-
- SgNode *swapped = recursiveFindReplacePreferedIdxs(swapped_,
- kernel_decl->get_definition()->get_body()->get_symbol_table(),
- kernel_decl->get_definition()->get_symbol_table(),
- kernel_decl->get_definition()->get_body(), loop_idxs, globalScope); //in-place swapping
- //swapped->print();
-
- if (!isSgBasicBlock(swapped)) {
- appendStatement(isSgStatement(swapped),
- kernel_decl->get_definition()->get_body());
- swapped->set_parent(
- isSgNode(kernel_decl->get_definition()->get_body()));
- } else {
-
- for (SgStatementPtrList::iterator it =
- isSgBasicBlock(swapped)->get_statements().begin();
- it != isSgBasicBlock(swapped)->get_statements().end(); it++) {
- appendStatement(*it, kernel_decl->get_definition()->get_body());
- (*it)->set_parent(
- isSgNode(kernel_decl->get_definition()->get_body()));
-
- }
-
- }
-
- for (int i = 0; i < indexes.size(); i++) {
- std::vector<SgForStatement*> tfs = findCommentedFors(indexes[i],
- swapped);
- for (int k = 0; k < tfs.size(); k++) {
- //printf("replacing %p tfs for index %s\n", tfs[k], indexes[i]);
- SgNode* newBlock = forReduce(tfs[k], loop_idxs[indexes[i]],
- kernel_decl->get_definition());
- //newBlock->print();
- swap_node_for_node_list(tfs[k], newBlock);
- //printf("AFTER SWAP\n"); newBlock->print();
- }
- }
-
- //--derick replace array refs of texture mapped vars here
- body = new CG_roseRepr(kernel_decl->get_definition()->get_body());
- std::vector<IR_ArrayRef*> refs = ir->FindArrayRef(body);
- texmapArrayRefs(texture, &refs, globals, ir, ocg);
- // do the same for constant mapped vars
- consmapArrayRefs(constant_mem, &refs, globals, ir, ocg);
-
- return swapped;
-}
-
-//Order taking out dummy variables
-std::vector<std::string> cleanOrder(std::vector<std::string> idxNames) {
- std::vector<std::string> results;
- for (int j = 0; j < idxNames.size(); j++) {
- if (idxNames[j].length() != 0)
- results.push_back(idxNames[j]);
- }
- return results;
-}
-
-//First non-dummy level in ascending order
-int LoopCuda::nonDummyLevel(int stmt, int level) {
- //level comes in 1-basd and should leave 1-based
- for (int j = level - 1; j < idxNames[stmt].size(); j++) {
- if (idxNames[stmt][j].length() != 0) {
- //printf("found non dummy level of %d with idx: %s when searching for %d\n", j+1, (const char*) idxNames[stmt][j], level);
- return j + 1;
- }
- }
- char buf[128];
- sprintf(buf, "%d", level);
- throw std::runtime_error(
- std::string("Unable to find a non-dummy level starting from ")
- + std::string(buf));
-}
-
-int LoopCuda::findCurLevel(int stmt, std::string idx) {
- for (int j = 0; j < idxNames[stmt].size(); j++) {
- if (strcmp(idxNames[stmt][j].c_str(), idx.c_str()) == 0)
- return j + 1;
- }
- throw std::runtime_error(
- std::string("Unable to find index ") + idx
- + std::string(" in current list of indexes"));
-}
-
-void LoopCuda::permute_cuda(int stmt,
- const std::vector<std::string>& curOrder) {
- //printf("curOrder: ");
- //printVs(curOrder);
- //printf("idxNames: ");
- //printVS(idxNames[stmt]);
- std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt]);
- bool same = true;
- std::vector<int> pi;
- for (int i = 0; i < curOrder.size(); i++) {
- bool found = false;
- for (int j = 0; j < cIdxNames.size(); j++) {
- if (strcmp(cIdxNames[j].c_str(), curOrder[i].c_str()) == 0) {
- pi.push_back(j + 1);
- found = true;
- if (j != i)
- same = false;
- }
- }
- if (!found) {
- throw std::runtime_error(
- "One of the indexes in the permute order were not "
- "found in the current set of indexes.");
- }
- }
- for (int i = curOrder.size(); i < cIdxNames.size(); i++) {
- pi.push_back(i);
- }
- if (same)
- return;
- permute(stmt, pi);
- //Set old indexe names as new
- for (int i = 0; i < curOrder.size(); i++) {
- idxNames[stmt][i] = curOrder[i].c_str(); //what about sibling stmts?
- }
-}
-
-bool LoopCuda::permute(int stmt_num, const std::vector<int> &pi) {
-// check for sanity of parameters
- if (stmt_num >= stmt.size() || stmt_num < 0)
- throw std::invalid_argument("invalid statement " + to_string(stmt_num));
- const int n = stmt[stmt_num].xform.n_out();
- if (pi.size() > (n - 1) / 2)
- throw std::invalid_argument(
- "iteration space dimensionality does not match permute dimensionality");
- int first_level = 0;
- int last_level = 0;
- for (int i = 0; i < pi.size(); i++) {
- if (pi[i] > (n - 1) / 2 || pi[i] <= 0)
- throw std::invalid_argument(
- "invalid loop level " + to_string(pi[i])
- + " in permuation");
-
- if (pi[i] != i + 1) {
- if (first_level == 0)
- first_level = i + 1;
- last_level = i + 1;
- }
- }
- if (first_level == 0)
- return true;
-
- std::vector<int> lex = getLexicalOrder(stmt_num);
- std::set<int> active = getStatements(lex, 2 * first_level - 2);
- Loop::permute(active, pi);
-}
-
-void LoopCuda::tile_cuda(int stmt, int level, int outer_level) {
- tile_cuda(stmt, level, 1, outer_level, "", "", CountedTile);
-}
-void LoopCuda::tile_cuda(int level, int tile_size, int outer_level,
- std::string idxName, std::string ctrlName, TilingMethodType method) {
- tile_cuda(0, level, tile_size, outer_level, idxName, ctrlName, method);
-}
-
-void LoopCuda::tile_cuda(int stmt, int level, int tile_size, int outer_level,
- std::string idxName, std::string ctrlName, TilingMethodType method) {
- //Do regular tile but then update the index and control loop variable
- //names as well as the idxName to reflect the current state of things.
- //printf("tile(%d,%d,%d,%d)\n", stmt, level, tile_size, outer_level);
- //printf("idxNames before: ");
- //printVS(idxNames[stmt]);
-
- tile(stmt, level, tile_size, outer_level, method);
-
- if (idxName.size())
- idxNames[stmt][level - 1] = idxName.c_str();
- if (tile_size == 1) {
- //potentially rearrange loops
- if (outer_level < level) {
- std::string tmp = idxNames[stmt][level - 1];
- for (int i = level - 1; i > outer_level - 1; i--) {
- if (i - 1 >= 0)
- idxNames[stmt][i] = idxNames[stmt][i - 1];
- }
- idxNames[stmt][outer_level - 1] = tmp;
- }
- //TODO: even with a tile size of one, you need a insert (of a dummy loop)
- idxNames[stmt].insert(idxNames[stmt].begin() + (level), "");
- } else {
- if (!ctrlName.size())
- throw std::runtime_error("No ctrl loop name for tile");
- //insert
- idxNames[stmt].insert(idxNames[stmt].begin() + (outer_level - 1),
- ctrlName.c_str());
- }
-
- //printf("idxNames after: ");
- //printVS(idxNames[stmt]);
-}
-
-bool LoopCuda::datacopy_privatized_cuda(int stmt_num, int level,
- const std::string &array_name,
- const std::vector<int> &privatized_levels, bool allow_extra_read,
- int fastest_changing_dimension, int padding_stride,
- int padding_alignment, bool cuda_shared) {
- int old_stmts = stmt.size();
- // printf("before datacopy_privatized:\n");
- printIS();
- //datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, cuda_shared);
- if (cuda_shared)
- datacopy_privatized(stmt_num, level, array_name, privatized_levels,
- allow_extra_read, fastest_changing_dimension, padding_stride,
- padding_alignment, 1);
- else
- datacopy_privatized(stmt_num, level, array_name, privatized_levels,
- allow_extra_read, fastest_changing_dimension, padding_stride,
- padding_alignment, 0);
- // printf("after datacopy_privatized:\n");
- printIS();
-
- //Adjust idxNames to reflect updated state
- std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]);
- int new_stmts = stmt.size();
- for (int i = old_stmts; i < new_stmts; i++) {
- //printf("fixing up statement %d\n", i);
- std::vector<std::string> idxs;
-
- //protonu-making sure the vector of nonSplitLevels grows along with
- //the statement structure
- stmt_nonSplitLevels.push_back(std::vector<int>());
-
- //Indexes up to level will be the same
- for (int j = 0; j < level - 1; j++)
- idxs.push_back(cIdxNames[j]);
-
- //Expect privatized_levels to match
- for (int j = 0; j < privatized_levels.size(); j++)
- idxs.push_back(cIdxNames[privatized_levels[j] - 1]);//level is one-based
-
- //all further levels should match order they are in originally
- if (privatized_levels.size()) {
- int last_privatized = privatized_levels.back();
- int top_level = last_privatized
- + (stmt[i].IS.n_set() - idxs.size());
- //printf("last privatized_levels: %d top_level: %d\n", last_privatized, top_level);
- for (int j = last_privatized; j < top_level; j++) {
- idxs.push_back(cIdxNames[j]);
- //printf("pushing back: %s\n", (const char*)cIdxNames[j]);
- }
- }
- idxNames.push_back(idxs);
- }
-}
-
-bool LoopCuda::datacopy_cuda(int stmt_num, int level,
- const std::string &array_name,
- const std::vector<std::string> new_idxs,
- bool allow_extra_read, int fastest_changing_dimension,
- int padding_stride, int padding_alignment, bool cuda_shared) {
-
- int old_stmts = stmt.size();
- //datacopy(stmt_num,level,array_name,allow_extra_read,fastest_changing_dimension,padding_stride,padding_alignment,cuda_shared);
- // printf("before datacopy:\n");
- // printIS();
- if (cuda_shared)
- datacopy(stmt_num, level, array_name, allow_extra_read,
- fastest_changing_dimension, padding_stride, padding_alignment,
- 1);
- else
- datacopy(stmt_num, level, array_name, allow_extra_read,
- fastest_changing_dimension, padding_stride, padding_alignment,
- 0);
- // printf("after datacopy:\n");
- printIS();
-
- //Adjust idxNames to reflect updated state
- std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]);
- int new_stmts = stmt.size();
- for (int i = old_stmts; i < new_stmts; i++) {
- //printf("fixing up statement %d\n", i);
- std::vector<std::string> idxs;
-
- //protonu-making sure the vector of nonSplitLevels grows along with
- //the statement structure
- stmt_nonSplitLevels.push_back(std::vector<int>());
-
- //Indexes up to level will be the same
- for (int j = 0; j < level - 1; j++)
- idxs.push_back(cIdxNames[j]);
-
- //all further levels should get names from new_idxs
- int top_level = stmt[i].IS.n_set();
- //printf("top_level: %d level: %d\n", top_level, level);
- if (new_idxs.size() < top_level - level + 1)
- throw std::runtime_error(
- "Need more new index names for new datacopy loop levels");
-
- for (int j = level - 1; j < top_level; j++) {
- idxs.push_back(new_idxs[j - level + 1].c_str());
- //printf("pushing back: %s\n", new_idxs[j-level+1].c_str());
- }
- idxNames.push_back(idxs);
- }
-}
-
-bool LoopCuda::unroll_cuda(int stmt_num, int level, int unroll_amount) {
- int old_stmts = stmt.size();
- //bool b= unroll(stmt_num, , unroll_amount);
-
- int dim = 2 * level - 1;
- std::vector<int> lex = getLexicalOrder(stmt_num);
- std::set<int> same_loop = getStatements(lex, dim - 1);
-
- level = nonDummyLevel(stmt_num, level);
- //printf("unrolling %d at level %d\n", stmt_num,level);
-
- //protonu--using the new version of unroll, which returns
- //a set of ints instead of a bool. To keep Gabe's logic
- //I'll check the size of the set, if it's 0 return true
- //bool b= unroll(stmt_num, level, unroll_amount);
- std::set<int> b_set = unroll(stmt_num, level, unroll_amount, idxNames);
- bool b = false;
- if (b_set.size() == 0)
- b = true;
- //end--protonu
-
- //Adjust idxNames to reflect updated state
- std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]);
- std::vector<std::string> origSource = idxNames[stmt_num];
- ;
- //Drop index names at level
- if (unroll_amount == 0) {
- //For all statements that were in this unroll together, drop index name for unrolled level
- idxNames[stmt_num][level - 1] = "";
- for (std::set<int>::iterator i = same_loop.begin();
- i != same_loop.end(); i++) {
- //printf("in same loop as %d is %d\n", stmt_num, (*i));
- //idxNames[(*i)][level-1] = "";
- idxNames[(*i)] = idxNames[stmt_num];
- }
- }
-
- lex = getLexicalOrder(stmt_num);
- same_loop = getStatements(lex, dim - 1);
-
- bool same_as_source = false;
- int new_stmts = stmt.size();
- for (int i = old_stmts; i < new_stmts; i++) {
- //Check whether we had a sync for the statement we are unrolling, if
- //so, propogate that to newly created statements so that if they are
- //in a different loop structure, they will also get a syncthreads
- int size = syncs.size();
- for (int j = 0; j < size; j++) {
- if (syncs[j].first == stmt_num)
- syncs.push_back(make_pair(i, syncs[j].second));
- }
-
- //protonu-making sure the vector of nonSplitLevels grows along with
- //the statement structure
- stmt_nonSplitLevels.push_back(std::vector<int>());
-
- //We expect that new statements have a constant for the variable in
- //stmt[i].IS at level (as seen with print_with_subs), otherwise there
- //will be a for loop at level and idxNames should match stmt's
- //idxNames pre-unrolled
- Relation IS = stmt[i].IS;
- //Ok, if you know how the hell to get anything out of a Relation, you
- //should probably be able to do this more elegantly. But for now, I'm
- //hacking it.
- std::string s = IS.print_with_subs_to_string();
- //s looks looks like
- //{[_t49,8,_t51,_t52,128]: 0 <= _t52 <= 3 && 0 <= _t51 <= 15 && 0 <= _t49 && 64_t49+16_t52+_t51 <= 128}
- //where level == 5, you see a integer in the input set
-
- //If that's not an integer and this is the first new statement, then
- //we think codegen will have a loop at that level. It's not perfect,
- //not sure if it can be determined without round-tripping to codegen.
- int sIdx = 0;
- int eIdx = 0;
- for (int j = 0; j < level - 1; j++) {
- sIdx = s.find(",", sIdx + 1);
- if (sIdx < 0)
- break;
- }
- if (sIdx > 0) {
- eIdx = s.find("]");
- int tmp = s.find(",", sIdx + 1);
- if (tmp > 0 && tmp < eIdx)
- eIdx = tmp; //", before ]"
- if (eIdx > 0) {
- sIdx++;
- std::string var = s.substr(sIdx, eIdx - sIdx);
- //printf("%s\n", s.c_str());
- //printf("set var for stmt %d at level %d is %s\n", i, level, var.c_str());
- if (atoi(var.c_str()) == 0 && i == old_stmts) {
- //TODO:Maybe do see if this new statement would be in the same
- //group as the original and if it would, don't say
- //same_as_source
- if (same_loop.find(i) == same_loop.end()) {
- printf(
- "stmt %d level %d, newly created unroll statement should have same level indexes as source\n",
- i, level);
- same_as_source = true;
- }
- }
- }
- }
-
- //printf("fixing up statement %d n_set %d with %d levels\n", i, stmt[i].IS.n_set(), level-1);
- if (same_as_source)
- idxNames.push_back(origSource);
- else
- idxNames.push_back(idxNames[stmt_num]);
- }
-
- return b;
-}
-
-void LoopCuda::copy_to_texture(const char *array_name) {
- //protonu--placeholder for now
- //set the bool for using cuda memory as true
- //in a vector of strings, put the names of arrays to tex mapped
- if (!texture)
- texture = new texture_memory_mapping(true, array_name);
- else
- texture->add(array_name);
-
-}
-
-//void LoopCuda::copy_to_texture_2d(const char *array_name, int width, int height) {
-// if (!texture)
-// texture = new texture_memory_mapping(true, array_name, width, height);
-// else
-// texture->add(array_name, width, height);
-//}
-
-void LoopCuda::copy_to_constant(const char *array_name) {
- if(!constant_mem)
- constant_mem = new constant_memory_mapping(true, array_name);
- else
- constant_mem->add(array_name);
-}
-
-//protonu--moving this from Loop
-SgNode* LoopCuda::codegen() {
- if (code_gen_flags & GenCudaizeV2)
- return cudaize_codegen_v2();
- //Do other flagged codegen methods, return plain vanilla generated code
- return getCode();
-}
-
-//These three are in Omega code_gen.cc and are used as a massive hack to
-//get out some info from MMGenerateCode. Yea for nasty side-effects.
-namespace omega {
- extern int checkLoopLevel;
- extern int stmtForLoopCheck;
- extern int upperBoundForLevel;
- extern int lowerBoundForLevel;
-}
-
-CG_outputRepr* LoopCuda::extractCudaUB(int stmt_num, int level,
- int &outUpperBound, int &outLowerBound) {
- // check for sanity of parameters
- const int m = stmt.size();
- if (stmt_num >= m || stmt_num < 0)
- throw std::invalid_argument("invalid statement " + to_string(stmt_num));
- const int n = stmt[stmt_num].xform.n_out();
- if (level > (n - 1) / 2 || level <= 0)
- throw std::invalid_argument("invalid loop level " + to_string(level));
-
- int dim = 2 * level - 1;
-
- std::vector<int> lex = getLexicalOrder(stmt_num);
- std::set<int> same_loop = getStatements(lex, dim - 1);
-
- // extract the intersection of the iteration space to be considered
- Relation hull;
- {
- hull = Relation::True(n);
- for (std::set<int>::iterator i = same_loop.begin();
- i != same_loop.end(); i++) {
- Relation r = getNewIS(*i);
- for (int j = dim + 2; j <= r.n_set(); j++)
- r = Project(r, r.set_var(j));
- hull = Intersection(hull, r);
- hull.simplify(2, 4);
- }
-
- for (int i = 2; i <= dim + 1; i += 2) {
- //std::string name = std::string("_t") + to_string(t_counter++);
- std::string name = std::string("_t")
- + to_string(tmp_loop_var_name_counter++);
- hull.name_set_var(i, name);
- }
- hull.setup_names();
- }
-
- // extract the exact loop bound of the dimension to be unrolled
- if (is_single_iteration(hull, dim)) {
- throw std::runtime_error(
- "No loop availabe at level to extract upper bound.");
- }
- Relation bound = get_loop_bound(hull, dim);
- if (!bound.has_single_conjunct() || !bound.is_satisfiable()
- || bound.is_tautology())
- throw loop_error(
- "loop error: unable to extract loop bound for cudaize");
-
- // extract the loop stride
- EQ_Handle stride_eq;
- /*int stride = 1;
- {
- bool simple_stride = true;
- int strides = countStrides(bound.query_DNF()->single_conjunct(),
- bound.set_var(dim + 1), stride_eq, simple_stride);
- if (strides > 1)
- throw loop_error("loop error: too many strides");
- else if (strides == 1) {
- int sign = stride_eq.get_coef(bound.set_var(dim + 1));
- // assert(sign == 1 || sign == -1);
- Constr_Vars_Iter it(stride_eq, true);
- stride = abs((*it).coef / sign);
- }
- }
- */
- int stride = 1;
- {
-
- coef_t stride;
- std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound,
- bound.set_var(dim + 1));
- if (result.second == NULL)
- stride = 1;
- else
- stride = abs(result.first.get_coef(result.second))
- / gcd(abs(result.first.get_coef(result.second)),
- abs(result.first.get_coef(bound.set_var(dim + 1))));
-
- if (stride > 1)
- throw loop_error("loop error: too many strides");
- /*else if (stride == 1) {
- int sign = result.first.get_coef(bound.set_var(dim+1));
- assert(sign == 1 || sign == -1);
- } */
- }
-
- if (stride != 1) {
- char buf[1024];
- sprintf(buf, "Cudaize: Loop at level %d has non-one stride of %d",
- level, stride);
- throw std::runtime_error(buf);
- }
-
- //Use code generation system to build tell us our bound information. We
- //need a hard upper bound a 0 lower bound.
-
- checkLoopLevel = level * 2;
- stmtForLoopCheck = stmt_num;
- upperBoundForLevel = -1;
- lowerBoundForLevel = -1;
- printCode(1, false);
- checkLoopLevel = 0;
-
- outUpperBound = upperBoundForLevel;
- outLowerBound = lowerBoundForLevel;
-
- if (outUpperBound == -1) {
-
- CG_result* temp = last_compute_cgr_;
-
- while (temp) {
- CG_loop * loop;
- if (loop = dynamic_cast<CG_loop*>(temp)) {
- if (loop->level_ == 2 * level) {
- Relation bound = copy(loop->bounds_);
- Variable_ID v = bound.set_var(2 * level);
- for (GEQ_Iterator e(
- const_cast<Relation &>(bound).single_conjunct()->GEQs());
- e; e++) {
- if ((*e).get_coef(v) < 0
- && (*e).is_const_except_for_global(v))
- return output_upper_bound_repr(ir->builder(), *e, v,
- bound,
- std::vector<std::pair<CG_outputRepr *, int> >(
- bound.n_set(),
- std::make_pair(
- static_cast<CG_outputRepr *>(NULL),
- 0)));
- }
- }
- if (loop->level_ > 2 * level)
- break;
- else
- temp = loop->body_;
- } else
- break;
- }
- }
-
- return NULL;
-}
-
-void LoopCuda::printCode(int effort, bool actuallyPrint) const {
- const int m = stmt.size();
- if (m == 0)
- return;
- const int n = stmt[0].xform.n_out();
-
- /*or (int i = 0; i < m; i++) {
- IS[i + 1] = stmt[i].IS;
- xform[i + 1] = stmt[i].xform;
-
- //nonSplitLevels[i+1] = stmt[i].nonSplitLevels;
- }
- */
-
- // invalidate saved codegen computation
- if (last_compute_cgr_ != NULL) {
- delete last_compute_cgr_;
- last_compute_cgr_ = NULL;
- }
-
- if (last_compute_cg_ != NULL) {
- delete last_compute_cg_;
- last_compute_cg_ = NULL;
- }
-
- //Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
- /*CG_stringBuilder *ocg = new CG_stringBuilder();
- Tuple<CG_outputRepr *> nameInfo;
- for (int i = 1; i <= m; i++)
- nameInfo.append(new CG_stringRepr("s" + to_string(i)));
- */
-
- // -- replacing MMGenerateCode
- // -- formally CG_outputRepr* repr = MMGenerateCode(ocg, xform, IS, nameInfo, known, nonSplitLevels, syncs, idxTupleNames, effort);
- // -- in the future, these if statements need to be cleaned up.
- // -- something like check_lastComputeCG might be a decent protected member function
- // -- and/or something that returns a std::vector<CG_outputRepr*> that also checks last_compute_cg_
- //if (last_compute_cg_ == NULL) {
- std::vector<Relation> IS(m);
- std::vector<Relation> xforms(m);
- std::vector<std::vector<int> > nonSplitLevels(m);
-
- /* std::vector < std::vector <std::string> > idxTupleNames;
- if (useIdxNames) {
- for (int i = 0; i < idxNames.size(); i++) {
- Tuple<std::string> idxs;
- for (int j = 0; j < idxNames[i].size(); j++)
- idxs.append(idxNames[i][j]);
- idxTupleNames.append(idxs);
- }
- }
- */
- for (int i = 0; i < m; i++) {
- IS[i] = stmt[i].IS;
- xforms[i] = stmt[i].xform;
- nonSplitLevels[i] = stmt_nonSplitLevels[i];
- }
- Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
-
- last_compute_cg_ = new CodeGen(xforms, IS, known, nonSplitLevels, idxNames,
- syncs);
-
- delete last_compute_cgr_; // this was just done above?
- last_compute_cgr_ = NULL;
- //}
-
- if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {
- delete last_compute_cgr_;
- last_compute_cgr_ = last_compute_cg_->buildAST(effort);
- last_compute_effort_ = effort;
- }
-
- //std::vector<CG_outputRepr *> stmts(m);
- //for (int i = 0; i < m; i++)
- // stmts[i] = stmt[i].code;
- //CG_outputRepr* repr = last_compute_cgr_->printRepr(ocg, stmts);
- // -- end replacing MMGenerateCode
- std::string repr = last_compute_cgr_->printString();
-
- if (actuallyPrint)
- std::cout << repr << std::endl;
- //std::cout << static_cast<CG_stringRepr*>(repr)->GetString();
- /*
- for (int i = 1; i <= m; i++)
- delete nameInfo[i];
- */
-
- //delete ocg;
-}
-
-void LoopCuda::printRuntimeInfo() const {
- for (int i = 0; i < stmt.size(); i++) {
- Relation IS = stmt[i].IS;
- Relation xform = stmt[i].xform;
- printf("stmt[%d]\n", i);
- printf("IS\n");
- IS.print_with_subs();
-
- printf("xform[%d]\n", i);
- xform.print_with_subs();
-
- }
-}
-
-void LoopCuda::printIndexes() const {
- for (int i = 0; i < stmt.size(); i++) {
- printf("stmt %d nset %d ", i, stmt[i].IS.n_set());
-
- for (int j = 0; j < idxNames[i].size(); j++) {
- if (j > 0)
- printf(",");
- printf("%s", idxNames[i][j].c_str());
- }
- printf("\n");
- }
-}
-
-SgNode* LoopCuda::getCode(int effort) const {
- const int m = stmt.size();
- if (m == 0)
- return new SgNode;
- const int n = stmt[0].xform.n_out();
- /*
- Tuple<CG_outputRepr *> ni(m);
- Tuple < Relation > IS(m);
- Tuple < Relation > xform(m);
- vector < vector <int> > nonSplitLevels(m);
- for (int i = 0; i < m; i++) {
- ni[i + 1] = stmt[i].code;
- IS[i + 1] = stmt[i].IS;
- xform[i + 1] = stmt[i].xform;
- nonSplitLevels[i + 1] = stmt_nonSplitLevels[i];
-
- //nonSplitLevels[i+1] = stmt[i].nonSplitLevels;
- }
- */
- //Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
-//#ifdef DEBUG
-//#endif
- //std::cout << GetString(MMGenerateCode(new CG_stringBuilder(), xform, IS, ni, known,
- // nonSplitLevels, syncs, idxTupleNames, effort));
- if (last_compute_cgr_ != NULL) {
- delete last_compute_cgr_;
- last_compute_cgr_ = NULL;
- }
-
- if (last_compute_cg_ != NULL) {
- delete last_compute_cg_;
- last_compute_cg_ = NULL;
- }
-
- CG_outputBuilder *ocg = ir->builder();
- // -- replacing MMGenerateCode
- // -- formally CG_outputRepr* repr = MMGenerateCode(ocg, xform, IS, nameInfo, known, nonSplitLevels, syncs, idxTupleNames, effort);
- // -- in the future, these if statements need to be cleaned up.
- // -- something like check_lastComputeCG might be a decent protected member function
- // -- and/or something that returns a std::vector<CG_outputRepr*> that also checks last_compute_cg_
- //if (last_compute_cg_ == NULL) {
- std::vector<Relation> IS(m);
- std::vector<Relation> xforms(m);
- std::vector<std::vector<int> > nonSplitLevels(m);
- for (int i = 0; i < m; i++) {
- IS[i] = stmt[i].IS;
- xforms[i] = stmt[i].xform;
- nonSplitLevels[i] = stmt_nonSplitLevels[i];
- }
-
- /*std::vector < std::vector<std::string> > idxTupleNames;
- if (useIdxNames) {
- for (int i = 0; i < idxNames.size(); i++) {
- std::vector<std::string> idxs;
- for (int j = 0; j < idxNames[i].size(); j++)
- idxs.push_back(idxNames[i][j]);
- idxTupleNames.push_back(idxs);
- }
- }
- */
- Relation known = Extend_Set(copy(this->known), n - this->known.n_set());
-
- last_compute_cg_ = new CodeGen(xforms, IS, known, nonSplitLevels, idxNames,
- syncs);
- delete last_compute_cgr_;
- last_compute_cgr_ = NULL;
- //}
-
- if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) {
- delete last_compute_cgr_;
- last_compute_cgr_ = last_compute_cg_->buildAST(effort);
- last_compute_effort_ = effort;
- }
-
- std::vector<CG_outputRepr *> stmts(m);
- for (int i = 0; i < m; i++)
- stmts[i] = stmt[i].code;
- CG_outputRepr* repr = last_compute_cgr_->printRepr(ocg, stmts);
- // -- end replacing MMGenerateCode
-
- //CG_outputRepr *overflow_initialization = ocg->CreateStmtList();
- CG_outputRepr *overflow_initialization = ocg->StmtListAppend(NULL, NULL);
- for (std::map<int, std::vector<Free_Var_Decl *> >::const_iterator i =
- overflow.begin(); i != overflow.end(); i++)
- for (std::vector<Free_Var_Decl *>::const_iterator j = i->second.begin();
- j != i->second.end(); j++)
- //overflow_initialization = ocg->StmtListAppend(overflow_initialization, ocg->CreateStmtList(ocg->CreateAssignment(0, ocg->CreateIdent((*j)->base_name()), ocg->CreateInt(0))));
- overflow_initialization = ocg->StmtListAppend(
- overflow_initialization,
- ocg->StmtListAppend(
- ocg->CreateAssignment(0,
- ocg->CreateIdent((*j)->base_name()),
- ocg->CreateInt(0)), NULL));
-
- repr = ocg->StmtListAppend(overflow_initialization, repr);
- SgNode *tnl = static_cast<CG_roseRepr *>(repr)->GetCode();
- SgStatementPtrList *list = static_cast<CG_roseRepr *>(repr)->GetList();
-
- if (tnl != NULL)
- return tnl;
- else if (tnl == NULL && list != NULL) {
- SgBasicBlock* bb2 = buildBasicBlock();
-
- for (SgStatementPtrList::iterator it = (*list).begin();
- it != (*list).end(); it++)
- bb2->append_statement(*it);
-
- tnl = isSgNode(bb2);
- } else
- throw loop_error("codegen failed");
-
- delete repr;
- /*
- for (int i = 1; i <= m; i++)
- delete ni[i];
- */
- return tnl;
-
-}
-
-//protonu--adding constructors for the new derived class
-LoopCuda::LoopCuda() :
- Loop(), code_gen_flags(GenInit) {
-}
-
-LoopCuda::LoopCuda(IR_Control *irc, int loop_num) :
- Loop(irc) {
- setup_code = NULL;
- teardown_code = NULL;
- code_gen_flags = 0;
- cu_bx = cu_by = cu_tx = cu_ty = cu_tz = 1;
- cu_bx_repr = NULL;
- cu_tx_repr = NULL;
- cu_by_repr = NULL;
- cu_ty_repr = NULL;
- cu_tz_repr = NULL;
-
- cu_num_reduce = 0;
- cu_mode = GlobalMem;
- texture = NULL;
- constant_mem = NULL;
-
- int m = stmt.size();
- //printf("\n the size of stmt(initially) is: %d\n", stmt.size());
- for (int i = 0; i < m; i++)
- stmt_nonSplitLevels.push_back(std::vector<int>());
-
- globals = ((IR_cudaroseCode *) ir)->gsym_;
- globalScope = ((IR_cudaroseCode *) ir)->first_scope;
- parameter_symtab = ((IR_cudaroseCode *) ir)->parameter;
- body_symtab = ((IR_cudaroseCode *) ir)->body;
- func_body = ((IR_cudaroseCode *) ir)->defn;
- func_definition = ((IR_cudaroseCode *) ir)->func_defn;
- std::vector<SgForStatement *> tf = ((IR_cudaroseCode *) ir)->get_loops();
-
- symtab = tf[loop_num]->get_symbol_table();
-
- std::vector<SgForStatement *> deepest = find_deepest_loops(
- isSgNode(tf[loop_num]));
-
- for (int i = 0; i < deepest.size(); i++) {
- SgVariableSymbol* vs;
- SgForInitStatement* list = deepest[i]->get_for_init_stmt();
- SgStatementPtrList& initStatements = list->get_init_stmt();
- SgStatementPtrList::const_iterator j = initStatements.begin();
- if (SgExprStatement *expr = isSgExprStatement(*j))
- if (SgAssignOp* op = isSgAssignOp(expr->get_expression()))
- if (SgVarRefExp* var_ref = isSgVarRefExp(op->get_lhs_operand()))
- vs = var_ref->get_symbol();
-
- index.push_back(vs->get_name().getString().c_str()); //reflects original code index names
- }
-
- for (int i = 0; i < stmt.size(); i++)
- idxNames.push_back(index); //refects prefered index names (used as handles in cudaize v2)
- useIdxNames = false;
-
-}
-
-void LoopCuda::printIS() {
- if (!cudaDebug) return;
- int k = stmt.size();
- for (int i = 0; i < k; i++) {
- printf(" printing statement:%d\n", i);
- stmt[i].IS.print();
- }
-}
-