#ifndef LOOP_HH #define LOOP_HH #include #include #include #include #include #include #include "dep.hh" #include "ir_code.hh" #include "irtools.hh" #include #include "stencil.hh" class IR_Code; enum TilingMethodType { StridedTile, CountedTile }; enum LoopLevelType { LoopLevelOriginal, LoopLevelTile, LoopLevelUnknown }; // Describes properties of each loop level of a statement. "payload" // for LoopLevelOriginal means iteration space dimension, for // LoopLevelTile means tiled loop level. Special value -1 for // LoopLevelTile means purely derived loop. For dependence dimension // payloads, the values must be in an increasing order. // "parallel_level" will be used by code generation to support // multi-level parallelization (default 0 means sequential loop under // the current parallelization level). struct LoopLevel { LoopLevelType type; int payload; int parallel_level; bool segreducible; std::string segment_descriptor; }; struct Statement { omega::CG_outputRepr *code; omega::Relation IS; omega::Relation xform; std::vector loop_level; ir_tree_node *ir_stmt_node; bool has_inspector; int reduction; // Manu:: 0 == reduction not possible, 1 == reduction possible, 2 == reduction with some processing IR_OPERATION_TYPE reductionOp; // Manu class stencilInfo *statementStencil; //protonu--temporarily putting this back here //omega::Tuple nonSplitLevels; //end--protonu. }; class Loop { protected: int tmp_loop_var_name_counter; static const std::string tmp_loop_var_name_prefix; int overflow_var_name_counter; static const std::string overflow_var_name_prefix; std::vector stmt_nesting_level_; // UNDERLINE std::vector index; std::map replace; std::map > reduced_statements; public: void debugRelations() const; IR_Code *ir; std::vector ptr_variables; std::vector freevar; std::vector stmt; std::vector actual_code; // ????? std::vector ir_stmt; std::vector ir_tree; std::set reduced_write_refs; std::map array_dims; DependenceGraph dep; int num_dep_dim; omega::Relation known; omega::CG_outputRepr *init_code; omega::CG_outputRepr *cleanup_code; std::map > overflow; std::vector > > uninterpreted_symbols; std::vector > >uninterpreted_symbols_stringrepr; protected: mutable omega::CodeGen *last_compute_cg_; mutable omega::CG_result *last_compute_cgr_; mutable int last_compute_effort_; protected: bool init_loop(std::vector &ir_tree, std::vector &ir_stmt); int get_dep_dim_of(int stmt, int level) const; int get_last_dep_dim_before(int stmt, int level) const; std::vector getNewIS() const; omega::Relation getNewIS(int stmt_num) const; std::vector getLexicalOrder(int stmt_num) const; int getLexicalOrder(int stmt_num, int level) const; std::set getStatements(const std::vector &lex, int dim) const; void shiftLexicalOrder(const std::vector &lex, int dim, int amount); void setLexicalOrder(int dim, const std::set &active, int starting_order = 0, std::vector< std::vector >idxNames= std::vector< std::vector >()); void apply_xform(int stmt_num); void apply_xform(std::set &active); void apply_xform(); std::set getSubLoopNest(int stmt_num, int level) const; int getMinLexValue(std::set stmts, int level); public: Loop() { ir = NULL; tmp_loop_var_name_counter = 1; init_code = NULL; } Loop(const IR_Control *control); ~Loop(); omega::CG_outputRepr *getCode(int effort = 3) const; // TODO was 1 //chillAST_Node* getCode(int effort, std::set stmts) const; void stencilASEPadded(int stmt_num); void printCode(int effort = 3) const; void addKnown(const omega::Relation &cond); void print_internal_loop_structure() const; bool isInitialized() const; int num_statement() const { return stmt.size(); } void printIterationSpace() const; void printDependenceGraph() const; void removeDependence(int stmt_num_from, int stmt_num_to); void dump() const; std::vector > sort_by_same_loops(std::set active, int level); // // legacy unimodular transformations for perfectly nested loops // e.g. M*(i,j)^T = (i',j')^T or M*(i,j,1)^T = (i',j')^T // bool nonsingular(const std::vector > &M); // // high-level loop transformations // void permute(const std::set &active, const std::vector &pi); void permute(int stmt_num, int level, const std::vector &pi); void permute(const std::vector &pi); void original(); void tile(int stmt_num, int level, int tile_size, int outer_level = 1, TilingMethodType method = StridedTile, int alignment_offset = 0, int alignment_multiple = 1); std::set split(int stmt_num, int level, const omega::Relation &cond); std::set unroll(int stmt_num, int level, int unroll_amount, std::vector< std::vector >idxNames= std::vector< std::vector >(), int cleanup_split_level = 0); bool datacopy(const std::vector > > &array_ref_nums, int level, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); bool datacopy(int stmt_num, int level, const std::string &array_name, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); bool datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, int memory_type = 0); bool datacopy_privatized(const std::vector > > &array_ref_nums, int level, const std::vector &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, int memory_type = 0); bool datacopy_privatized(const std::vector > > &stmt_refs, int level, const std::vector &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type = 0); //std::set scalar_replacement_inner(int stmt_num); bool find_stencil_shape( int stmt_num ); Graph, bool> construct_induced_graph_at_level(std::vector > s, DependenceGraph dep, int dep_dim); std::vector > typed_fusion(Graph, bool> g, std::vector &types); void fuse(const std::set &stmt_nums, int level); void distribute(const std::set &stmt_nums, int level); void skew(const std::set &stmt_nums, int level, const std::vector &skew_amount); void shift(const std::set &stmt_nums, int level, int shift_amount); void scale(const std::set &stmt_nums, int level, int scale_amount); void reverse(const std::set &stmt_nums, int level); void peel(int stmt_num, int level, int peel_amount = 1); // // more fancy loop transformations // void modular_shift(int stmt_num, int level, int shift_amount) {} void diagonal_map(int stmt_num, const std::pair &levels, int offset) {} void modular_partition(int stmt_num, int level, int stride) {} void flatten(int stmt_num, std::string index_name, std::vector &loop_levels, std::string inspector_name); void normalize(int stmt_num, int loop_level); void generate_ghostcells_v2(std::vector stmt, int loop_num, int ghost_value, int hold_inner_loop_constant=0 ); // // derived loop transformations // void shift_to(int stmt_num, int level, int absolute_position); std::set unroll_extra(int stmt_num, int level, int unroll_amount, int cleanup_split_level = 0); bool is_dependence_valid_based_on_lex_order(int i, int j, const DependenceVector &dv, bool before); void split_with_alignment(int stmt_num, int level, int alignment, int direction=0); // Manu:: reduction operation void reduce(int stmt_num, std::vector &level, int param, std::string func_name, std::vector &seq_levels, std::vector cudaized_levels = std::vector(), int bound_level = -1); void scalar_expand(int stmt_num, const std::vector &levels, std::string arrName, int memory_type =0, int padding_alignment=0, int assign_then_accumulate = 1, int padding_stride = 0); void ELLify(int stmt_num, std::vector arrays_to_pad, int pad_to, bool dense_pad = false, std::string dense_pad_pos_array = ""); void compact(int stmt_num, int level, std::string new_array, int zero, std::string data_array); void make_dense(int stmt_num, int loop_level, std::string new_loop_index); void set_array_size(std::string name, int size ); omega::CG_outputRepr * iegen_parser(std::string &str, std::vector &index_names); // // other public operations // void pragma(int stmt_num, int level, const std::string &pragmaText); void prefetch(int stmt_num, int level, const std::string &arrName, int hint); //void prefetch(int stmt_num, int level, const std::string &arrName, const std::string &indexName, int offset, int hint); }; #endif