//===- TensorOps.h - Linalg dialect TensorOps operation definition --------===// // // Copyright 2019 The MLIR Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ============================================================================= #ifndef LINALG2_TENSOROPS_H_ #define LINALG2_TENSOROPS_H_ #include "mlir/IR/OpDefinition.h" #include "mlir/Support/LLVM.h" namespace mlir { class AffineForOp; } // namespace mlir namespace linalg { /// A generic TensorContraction base class which captures the generic behavior /// of tensor contraction operations (with broadcast). template class TensorContractionBase { protected: using TensorContractionBaseType = TensorContractionBase; ////////////////////////////////////////////////////////////////////////////// // Hooks to customize the behavior of this op. ////////////////////////////////////////////////////////////////////////////// /// Generic implementation of hooks that should be called from `ConcreteType`s mlir::LogicalResult verify(); static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result); void print(mlir::OpAsmPrinter *p); public: ////////////////////////////////////////////////////////////////////////////// // Op-specific functionality. ////////////////////////////////////////////////////////////////////////////// TensorContractionBase() = default; mlir::Operation::operand_range getInputs(); mlir::Operation::operand_range getOutputs(); mlir::Operation::operand_range getInputsAndOutputs(); /// These are better as methods calling into the ConcreteOp instead of /// template parameters because methods allow more generic behavior and avoid /// specializing for number of arguments. All derived classes have /// `VariadicOperands` and a build method from both an ArrayRef /// and the proper number of mlir::Value*. unsigned getNumInputs() { return static_cast(this)->numInputs; }; unsigned getNumOutputs() { return static_cast(this)->numOutputs; }; unsigned getNumParallelDims() { return static_cast(this)->numParallelDims; }; unsigned getNumReductionDims() { return static_cast(this)->numReductionDims; }; ////////////////////////////////////////////////////////////////////////////// // Used in Linalg3 and later. ////////////////////////////////////////////////////////////////////////////// mlir::Value *getInputView(unsigned viewIndex); mlir::Value *getOutputView(unsigned viewIndex); mlir::Value *getView(unsigned viewIndex) { return viewIndex < getNumInputs() ? getInputView(viewIndex) : getOutputView(viewIndex - getNumInputs()); } /// Each op is responsible for declaring how it lowers itself to scalar form, /// given the enclosing parallel and reduction induction variables. /// `emitScalarImplementation` emits the scalar IR for the op in the nesting /// context of the innermost enclosing loop(i.e. `reductionIvs.back()` or /// `parallel.back()`). void emitScalarImplementation(llvm::ArrayRef parallelIvs, llvm::ArrayRef reductionIvs); /// Represents a mapping from the loops to all the ranges of the operands. /// The operands and their ranges are in the order defined by the particular /// ConcreteOp implementation, the resulting map must match those. /// In favorable cases, this can be calculated by an analysis but specifying /// it explicitly is not expensive and generalizes to cases where an analysis /// is not available. For details, see the description of /// loopsToOperandRangeMaps in each ConcreteOp. llvm::SmallVector loopsToOperandRangeMaps(); }; /// Implements c = A * B where c is a scalar and A and B are 1-D vectors. class DotOp : public TensorContractionBase, public mlir::Op { public: using Op::Op; using TensorContractionBaseType = TensorContractionBase::TensorContractionBaseType; ////////////////////////////////////////////////////////////////////////////// // Hooks to customize the behavior of this op. ////////////////////////////////////////////////////////////////////////////// static llvm::StringRef getOperationName() { return "linalg.dot"; } static void build(mlir::Builder *b, mlir::OperationState *result, llvm::ArrayRef operands); static void build(mlir::Builder *b, mlir::OperationState *result, mlir::Value *A, mlir::Value *B, mlir::Value *C) { return build(b, result, {A, B, C}); } mlir::LogicalResult verify(); static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result); void print(mlir::OpAsmPrinter *p); ////////////////////////////////////////////////////////////////////////////// // Op-specific functionality. ////////////////////////////////////////////////////////////////////////////// static constexpr unsigned numInputs = 2; static constexpr unsigned numOutputs = 1; static constexpr unsigned numParallelDims = 0; static constexpr unsigned numReductionDims = 1; ////////////////////////////////////////////////////////////////////////////// // Used in Linalg3 and later. ////////////////////////////////////////////////////////////////////////////// /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a /// loop over matvec). Does nothing by default. void writeAsFinerGrainTensorContraction(); /// Inputs to this map will be (%k) coming from enclosing loops. /// Therefore, the mapping to get back to A(K), B(K), C() is: /// (d0) -> (d0, d0)(%k) /// And the operands ranges are: /// (%k, %k) llvm::SmallVector loopsToOperandRangeMaps(); /// Given an enclosing reduction loop with iv `r_i`, emits MLIR corresponding /// to: /// 1. conditionally assign scalarC to 0.0f on the first iteration or load /// C[] from memory (0-D tensor) /// 2. multiply A[r_i] by B[r_i] and add to scalarC /// 3. store back scalarC at C[] /// /// In some compact index notation this could be written: /// cond = (r_i == zero) /// scalarC = select(cond, zerof, C[]); /// C[] = scalarC + A[r_i] * B[r_i]; void emitScalarImplementation(llvm::ArrayRef parallelIvs, llvm::ArrayRef reductionIvs); }; /// Implements C = A * B where A is a 2-D matrix and X and Y are 1-D vectors. class MatvecOp : public TensorContractionBase, public mlir::Op { public: using Op::Op; using TensorContractionBaseType = TensorContractionBase::TensorContractionBaseType; ////////////////////////////////////////////////////////////////////////////// // Hooks to customize the behavior of this op. ////////////////////////////////////////////////////////////////////////////// static llvm::StringRef getOperationName() { return "linalg.matvec"; } static void build(mlir::Builder *b, mlir::OperationState *result, llvm::ArrayRef operands); static void build(mlir::Builder *b, mlir::OperationState *result, mlir::Value *A, mlir::Value *B, mlir::Value *C) { return build(b, result, {A, B, C}); } mlir::LogicalResult verify(); static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result); void print(mlir::OpAsmPrinter *p); ////////////////////////////////////////////////////////////////////////////// // Op-specific functionality. ////////////////////////////////////////////////////////////////////////////// static constexpr unsigned numInputs = 2; static constexpr unsigned numOutputs = 1; static constexpr unsigned numParallelDims = 1; static constexpr unsigned numReductionDims = 1; ////////////////////////////////////////////////////////////////////////////// // Used in Linalg3 and later. ////////////////////////////////////////////////////////////////////////////// /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a /// loop over matvec). Does nothing by default. void writeAsFinerGrainTensorContraction(); /// Inputs to this map will be (%m, %k) coming from enclosing loops. /// Therefore, the mapping to get back to A(M, K), B(K), C(M) is: /// (d0, d1) -> (d0, d1, d1, d0)(%m, %k) /// And the operands ranges are: /// (%m, %k, %k, %m) llvm::SmallVector loopsToOperandRangeMaps(); /// Given an enclosing parallel loop with iv `i` and an enclosing parallel /// loop with iv `r_j`, emits MLIR corresponding to: /// 1. conditionally assign scalarC to 0.0f on the first iteration or load /// C[i] /// 2. multiply A[i, r_j] by B[r_j] and add to scalarC /// 3. store back scalarC at C[i] /// /// In some compact index notation this could be written: /// cond = (r_j == zero) /// scalarC = select(cond, zerof, C(i)); /// C(i) = scalarC + A(i, r_j) * B(r_j); void emitScalarImplementation(llvm::ArrayRef parallelIvs, llvm::ArrayRef reductionIvs); }; /// Implements C = A * B on 2-D matrices. class MatmulOp : public TensorContractionBase, public mlir::Op { public: using Op::Op; using TensorContractionBaseType = TensorContractionBase::TensorContractionBaseType; ////////////////////////////////////////////////////////////////////////////// // Hooks to customize the behavior of this op. ////////////////////////////////////////////////////////////////////////////// static llvm::StringRef getOperationName() { return "linalg.matmul"; } static void build(mlir::Builder *b, mlir::OperationState *result, llvm::ArrayRef operands); static void build(mlir::Builder *b, mlir::OperationState *result, mlir::Value *A, mlir::Value *B, mlir::Value *C) { return build(b, result, {A, B, C}); } mlir::LogicalResult verify(); static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result); void print(mlir::OpAsmPrinter *p); ////////////////////////////////////////////////////////////////////////////// // Op-specific functionality. ////////////////////////////////////////////////////////////////////////////// static constexpr unsigned numInputs = 2; static constexpr unsigned numOutputs = 1; static constexpr unsigned numParallelDims = 2; static constexpr unsigned numReductionDims = 1; ////////////////////////////////////////////////////////////////////////////// // Used in Linalg3 and later. ////////////////////////////////////////////////////////////////////////////// /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a /// loop over matvec). Does nothing by default. void writeAsFinerGrainTensorContraction(); /// Inputs to this map will be (%m, %n, %k) coming from enclosing loops. /// Therefore, the mapping to get back to A(M, K), B(K, N), C(M, N) is: /// (d0, d1, d2) -> (d0, d2, d2, d1, d0, d1)(%m, %n, %k) /// And the operands ranges are: /// (%m, %k, %k, %n, %m, %n) llvm::SmallVector loopsToOperandRangeMaps(); /// Given a enclosing parallel loops with ivs `i` and `j`, and an enclosing /// reduction loop with iv `r_k`, emits MLIR corresponding to: /// 1. conditionally assign scalarC to 0.0f on the first iteration or load /// C[i, j] /// 2. multiply A[i, r_k] by B[r_k, j] and add to scalarC /// 3. store back scalarC at C[i, j] /// /// In some compact index notation this could be written: /// cond = (r_k == zero) /// scalarC = select(cond, zerof, C[i, j]); /// C[i, j] = scalarC + A[i, r_k] * B[r_k, j]; void emitScalarImplementation(llvm::ArrayRef parallelIvs, llvm::ArrayRef reductionIvs); }; } // namespace linalg /// The TensorOp-inl.h inclusion pattern is chosen to allow gradual extension of /// TensorOps by adding implementations as they are needed in the appropriate /// step in the tutorial. #include "linalg2/TensorOps-inl.h" #endif // LINALG2_TENSOROPS_H_