summaryrefslogtreecommitdiff
path: root/loop_cuda.hh
diff options
context:
space:
mode:
authorDerick Huth <derickhuth@gmail.com>2015-09-24 12:22:41 -0600
committerDerick Huth <derickhuth@gmail.com>2015-09-24 12:22:41 -0600
commit4631ad76927d433da5d55c3c373a1dfd0f74c9d4 (patch)
treef8dcba88576ec95e403f0c14efd80e970f30a260 /loop_cuda.hh
parent6eb2b89896da66a77d0dcdf2d72b98c122826949 (diff)
parent0cff3f9a3c4ccd434900162ebef4bd814850f481 (diff)
downloadchill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.tar.gz
chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.tar.bz2
chill-4631ad76927d433da5d55c3c373a1dfd0f74c9d4.zip
Merge pull request #7 from dhuth/master
V0.2.1
Diffstat (limited to 'loop_cuda.hh')
-rw-r--r--loop_cuda.hh163
1 files changed, 0 insertions, 163 deletions
diff --git a/loop_cuda.hh b/loop_cuda.hh
deleted file mode 100644
index 15726c0..0000000
--- a/loop_cuda.hh
+++ /dev/null
@@ -1,163 +0,0 @@
-#ifndef LOOP_CUDA_HH
-#define LOOP_CUDA_HH
-
-#include "loop.hh"
-#include <string.h>
-#include <suif1.h>
-
-
-enum MemoryMode { GlobalMem, SharedMem, TexMem };
-
-//protonu --class introduced to hold texture memory information in one single place
-//this might help me get over the weird memory issues I am having with the Loop class
-//where someone/something corrupts my memory
-class texture_memory_mapping{
-private:
- bool tex_mem_used;
- std::vector< std::string > tex_mapped_array_name;
-public:
- texture_memory_mapping ( bool used, const char * array_name){
- tex_mem_used = used;
- tex_mapped_array_name.push_back(std::string(array_name));
- }
-
- void add(const char * array_name) {
- tex_mapped_array_name.push_back(std::string(array_name));
- }
-
- bool is_tex_mem_used() {return tex_mem_used;}
- bool is_array_tex_mapped(const char * array_name){
-
- for( int i=0; i<tex_mapped_array_name.size(); i++){
- if(!(strcmp(array_name, tex_mapped_array_name[i].c_str())))
- return true;
- }
- return false;
- }
- texture_memory_mapping() {tex_mem_used = false;}
-};
-
-//protonu --class introduced to hold constant memory information in one single place
-//this might help me get over the weird memory issues I am having with the Loop class
-//where someone/something corrupts my memory
-class constant_memory_mapping{
-private:
- bool cons_mem_used;
- std::vector< std::string > cons_mapped_array_name;
-public:
- constant_memory_mapping ( bool used, const char * array_name){
- cons_mem_used = used;
- cons_mapped_array_name.push_back(std::string(array_name));
- }
-
- void add(const char * array_name) {
- cons_mapped_array_name.push_back(std::string(array_name));
- }
-
- bool is_cons_mem_used() {return cons_mem_used;}
- bool is_array_cons_mapped(const char * array_name){
-
- for( int i=0; i<cons_mapped_array_name.size(); i++){
- if(!(strcmp(array_name, cons_mapped_array_name[i].c_str())))
- return true;
- }
- return false;
- }
- constant_memory_mapping() {cons_mem_used = false;}
-};
-
-
-class LoopCuda: public Loop{
-
-public:
- std::vector<proc_sym*> new_procs; //Need adding to a fse
- std::vector< std::vector<std::string> > idxNames;
- std::vector< std::pair<int, std::string> > syncs;
- bool useIdxNames;
- std::vector<std::string> index;
- proc_symtab *symtab;
- global_symtab *globals;
-
- //protonu--inserting this here, Gabe's implementation had it
- //the struct statment as nonSplitLevels
- std::vector<omega::Tuple<int> > stmt_nonSplitLevels;
-
- texture_memory_mapping *texture; //protonu
- constant_memory_mapping *constant_mem; //protonu
- std::map<std::string, int> array_dims;
- omega::CG_outputRepr *setup_code;
- omega::CG_outputRepr *teardown_code;
-
- unsigned int code_gen_flags;
- enum CodeGenFlags {
- GenInit = 0x00,
- GenCudaizeV2 = 0x02,
- };
-
-
- //varibles used by cudaize_codegen
- //block x, y sizes, N and num_red
- int cu_bx, cu_by, cu_n, cu_num_reduce;
- //block statement and level
- int cu_block_stmt, cu_block_level;
- //thread x, y, z
- int cu_tx, cu_ty, cu_tz;
- //tile statements, and loop-levels (cudaize v1)
- std::vector< std::vector<int> > cu_thread_loop;
- std::vector<int> cu_thread_sync;
- MemoryMode cu_mode;
-
- std::string cu_nx_name, cu_ny_name, cu_kernel_name;
- int nonDummyLevel(int stmt, int level);
- bool symbolExists(std::string s);
- void addSync(int stmt, std::string idx);
- void renameIndex(int stmt, std::string idx, std::string newName);
- bool validIndexes(int stmt, const std::vector<std::string>& idxs);
- void extractCudaUB(int stmt_num, int level, int &outUpperBound, int &outLowerBound);
-
- void printCode(int effort=1, bool actuallyPrint=true) const;
- void printRuntimeInfo() const;
- void printIndexes() const;
- tree_node_list* getCode(int effort = 1) const;
-
-
- void permute_cuda(int stmt, const std::vector<std::string>& curOrder);
- //protonu-writing a wrapper for the Chun's new permute function
- bool permute(int stmt_num, const std::vector<int> &pi);
- //end--protonu.
- void tile_cuda(int stmt, int level, int outer_level);
- void tile_cuda(int level, int tile_size, int outer_level, std::string idxName, std::string ctrlName, TilingMethodType method=StridedTile);
- void tile_cuda(int stmt, int level, int tile_size, int outer_level, std::string idxName, std::string ctrlName, TilingMethodType method=StridedTile);
- bool datacopy_privatized_cuda(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, bool cuda_shared=false);
- bool datacopy_cuda(int stmt_num, int level, const std::string &array_name, std::vector<std::string> new_idxs, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, bool cuda_shared=false);
- bool unroll_cuda(int stmt_num, int level, int unroll_amount);
- //protonu--using texture memory
- void copy_to_texture(const char *array_name);
- //protonu--using constant memory
- void copy_to_constant(const char *array_name);
- int findCurLevel(int stmt, std::string idx);
- /**
- *
- * @param kernel_name Name of the GPU generated kernel
- * @param nx Iteration space over the x dimention
- * @param ny Iteration space over the y dimention
- * @param tx Tile dimention over x dimention
- * @param ty Tile dimention over the y dimention
- * @param num_reduce The number of dimentions to reduce by mapping to the GPU implicit blocks/threads
- */
- //stmnt_num is referenced from the perspective of being inside the cudize block loops
- bool cudaize_v2(std::string kernel_name, std::map<std::string, int> array_dims,
- std::vector<std::string> blockIdxs, std::vector<std::string> threadIdxs);
- tree_node_list* cudaize_codegen_v2();
- tree_node_list* codegen();
-
- //protonu--have to add the constructors for the new class
- //and maybe destructors (?)
- LoopCuda();
- //LoopCuda(IR_Code *ir, tree_for *tf, global_symtab* gsym);
- LoopCuda(IR_Control *ir_c, int loop_num);//protonu-added so as to not change ir_suif
- ~LoopCuda();
-
-};
-
-#endif