summaryrefslogtreecommitdiff
path: root/include/linalg2/TensorOps.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linalg2/TensorOps.h')
-rw-r--r--include/linalg2/TensorOps.h287
1 files changed, 287 insertions, 0 deletions
diff --git a/include/linalg2/TensorOps.h b/include/linalg2/TensorOps.h
new file mode 100644
index 0000000..39e51f0
--- /dev/null
+++ b/include/linalg2/TensorOps.h
@@ -0,0 +1,287 @@
+//===- TensorOps.h - Linalg dialect TensorOps operation definition --------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#ifndef LINALG2_TENSOROPS_H_
+#define LINALG2_TENSOROPS_H_
+
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/Support/LLVM.h"
+
+namespace mlir {
+class AffineForOp;
+} // namespace mlir
+
+namespace linalg {
+
+/// A generic TensorContraction base class which captures the generic behavior
+/// of tensor contraction operations (with broadcast).
+template <class ConcreteOp> class TensorContractionBase {
+protected:
+ using TensorContractionBaseType = TensorContractionBase<ConcreteOp>;
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Hooks to customize the behavior of this op.
+ //////////////////////////////////////////////////////////////////////////////
+ /// Generic implementation of hooks that should be called from `ConcreteType`s
+ mlir::LogicalResult verify();
+ static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+ void print(mlir::OpAsmPrinter *p);
+
+public:
+ //////////////////////////////////////////////////////////////////////////////
+ // Op-specific functionality.
+ //////////////////////////////////////////////////////////////////////////////
+ TensorContractionBase() = default;
+ mlir::Operation::operand_range getInputs();
+ mlir::Operation::operand_range getOutputs();
+ mlir::Operation::operand_range getInputsAndOutputs();
+
+ /// These are better as methods calling into the ConcreteOp instead of
+ /// template parameters because methods allow more generic behavior and avoid
+ /// specializing for number of arguments. All derived classes have
+ /// `VariadicOperands` and a build method from both an ArrayRef<mlirValue*>
+ /// and the proper number of mlir::Value*.
+ unsigned getNumInputs() {
+ return static_cast<ConcreteOp *>(this)->numInputs;
+ };
+ unsigned getNumOutputs() {
+ return static_cast<ConcreteOp *>(this)->numOutputs;
+ };
+ unsigned getNumParallelDims() {
+ return static_cast<ConcreteOp *>(this)->numParallelDims;
+ };
+ unsigned getNumReductionDims() {
+ return static_cast<ConcreteOp *>(this)->numReductionDims;
+ };
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Used in Linalg3 and later.
+ //////////////////////////////////////////////////////////////////////////////
+ mlir::Value *getInputView(unsigned viewIndex);
+ mlir::Value *getOutputView(unsigned viewIndex);
+ mlir::Value *getView(unsigned viewIndex) {
+ return viewIndex < getNumInputs()
+ ? getInputView(viewIndex)
+ : getOutputView(viewIndex - getNumInputs());
+ }
+
+ /// Each op is responsible for declaring how it lowers itself to scalar form,
+ /// given the enclosing parallel and reduction induction variables.
+ /// `emitScalarImplementation` emits the scalar IR for the op in the nesting
+ /// context of the innermost enclosing loop(i.e. `reductionIvs.back()` or
+ /// `parallel.back()`).
+ void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+ llvm::ArrayRef<mlir::Value *> reductionIvs);
+
+ /// Represents a mapping from the loops to all the ranges of the operands.
+ /// The operands and their ranges are in the order defined by the particular
+ /// ConcreteOp implementation, the resulting map must match those.
+ /// In favorable cases, this can be calculated by an analysis but specifying
+ /// it explicitly is not expensive and generalizes to cases where an analysis
+ /// is not available. For details, see the description of
+ /// loopsToOperandRangeMaps in each ConcreteOp.
+ llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+};
+
+/// Implements c = A * B where c is a scalar and A and B are 1-D vectors.
+class DotOp : public TensorContractionBase<DotOp>,
+ public mlir::Op<DotOp, mlir::OpTrait::VariadicOperands,
+ mlir::OpTrait::ZeroResult> {
+public:
+ using Op::Op;
+ using TensorContractionBaseType =
+ TensorContractionBase::TensorContractionBaseType;
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Hooks to customize the behavior of this op.
+ //////////////////////////////////////////////////////////////////////////////
+ static llvm::StringRef getOperationName() { return "linalg.dot"; }
+ static void build(mlir::Builder *b, mlir::OperationState *result,
+ llvm::ArrayRef<mlir::Value *> operands);
+ static void build(mlir::Builder *b, mlir::OperationState *result,
+ mlir::Value *A, mlir::Value *B, mlir::Value *C) {
+ return build(b, result, {A, B, C});
+ }
+ mlir::LogicalResult verify();
+ static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+ void print(mlir::OpAsmPrinter *p);
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Op-specific functionality.
+ //////////////////////////////////////////////////////////////////////////////
+ static constexpr unsigned numInputs = 2;
+ static constexpr unsigned numOutputs = 1;
+ static constexpr unsigned numParallelDims = 0;
+ static constexpr unsigned numReductionDims = 1;
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Used in Linalg3 and later.
+ //////////////////////////////////////////////////////////////////////////////
+ /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a
+ /// loop over matvec). Does nothing by default.
+ void writeAsFinerGrainTensorContraction();
+
+ /// Inputs to this map will be (%k) coming from enclosing loops.
+ /// Therefore, the mapping to get back to A(K), B(K), C() is:
+ /// (d0) -> (d0, d0)(%k)
+ /// And the operands ranges are:
+ /// (%k, %k)
+ llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+
+ /// Given an enclosing reduction loop with iv `r_i`, emits MLIR corresponding
+ /// to:
+ /// 1. conditionally assign scalarC to 0.0f on the first iteration or load
+ /// C[] from memory (0-D tensor)
+ /// 2. multiply A[r_i] by B[r_i] and add to scalarC
+ /// 3. store back scalarC at C[]
+ ///
+ /// In some compact index notation this could be written:
+ /// cond = (r_i == zero)
+ /// scalarC = select(cond, zerof, C[]);
+ /// C[] = scalarC + A[r_i] * B[r_i];
+ void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+ llvm::ArrayRef<mlir::Value *> reductionIvs);
+};
+
+/// Implements C = A * B where A is a 2-D matrix and X and Y are 1-D vectors.
+class MatvecOp : public TensorContractionBase<MatvecOp>,
+ public mlir::Op<MatvecOp, mlir::OpTrait::VariadicOperands,
+ mlir::OpTrait::ZeroResult> {
+public:
+ using Op::Op;
+ using TensorContractionBaseType =
+ TensorContractionBase::TensorContractionBaseType;
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Hooks to customize the behavior of this op.
+ //////////////////////////////////////////////////////////////////////////////
+ static llvm::StringRef getOperationName() { return "linalg.matvec"; }
+ static void build(mlir::Builder *b, mlir::OperationState *result,
+ llvm::ArrayRef<mlir::Value *> operands);
+ static void build(mlir::Builder *b, mlir::OperationState *result,
+ mlir::Value *A, mlir::Value *B, mlir::Value *C) {
+ return build(b, result, {A, B, C});
+ }
+ mlir::LogicalResult verify();
+ static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+ void print(mlir::OpAsmPrinter *p);
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Op-specific functionality.
+ //////////////////////////////////////////////////////////////////////////////
+ static constexpr unsigned numInputs = 2;
+ static constexpr unsigned numOutputs = 1;
+ static constexpr unsigned numParallelDims = 1;
+ static constexpr unsigned numReductionDims = 1;
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Used in Linalg3 and later.
+ //////////////////////////////////////////////////////////////////////////////
+ /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a
+ /// loop over matvec). Does nothing by default.
+ void writeAsFinerGrainTensorContraction();
+
+ /// Inputs to this map will be (%m, %k) coming from enclosing loops.
+ /// Therefore, the mapping to get back to A(M, K), B(K), C(M) is:
+ /// (d0, d1) -> (d0, d1, d1, d0)(%m, %k)
+ /// And the operands ranges are:
+ /// (%m, %k, %k, %m)
+ llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+
+ /// Given an enclosing parallel loop with iv `i` and an enclosing parallel
+ /// loop with iv `r_j`, emits MLIR corresponding to:
+ /// 1. conditionally assign scalarC to 0.0f on the first iteration or load
+ /// C[i]
+ /// 2. multiply A[i, r_j] by B[r_j] and add to scalarC
+ /// 3. store back scalarC at C[i]
+ ///
+ /// In some compact index notation this could be written:
+ /// cond = (r_j == zero)
+ /// scalarC = select(cond, zerof, C(i));
+ /// C(i) = scalarC + A(i, r_j) * B(r_j);
+ void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+ llvm::ArrayRef<mlir::Value *> reductionIvs);
+};
+
+/// Implements C = A * B on 2-D matrices.
+class MatmulOp : public TensorContractionBase<MatmulOp>,
+ public mlir::Op<MatmulOp, mlir::OpTrait::VariadicOperands,
+ mlir::OpTrait::ZeroResult> {
+public:
+ using Op::Op;
+ using TensorContractionBaseType =
+ TensorContractionBase::TensorContractionBaseType;
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Hooks to customize the behavior of this op.
+ //////////////////////////////////////////////////////////////////////////////
+ static llvm::StringRef getOperationName() { return "linalg.matmul"; }
+ static void build(mlir::Builder *b, mlir::OperationState *result,
+ llvm::ArrayRef<mlir::Value *> operands);
+ static void build(mlir::Builder *b, mlir::OperationState *result,
+ mlir::Value *A, mlir::Value *B, mlir::Value *C) {
+ return build(b, result, {A, B, C});
+ }
+ mlir::LogicalResult verify();
+ static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+ void print(mlir::OpAsmPrinter *p);
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Op-specific functionality.
+ //////////////////////////////////////////////////////////////////////////////
+ static constexpr unsigned numInputs = 2;
+ static constexpr unsigned numOutputs = 1;
+ static constexpr unsigned numParallelDims = 2;
+ static constexpr unsigned numReductionDims = 1;
+
+ //////////////////////////////////////////////////////////////////////////////
+ // Used in Linalg3 and later.
+ //////////////////////////////////////////////////////////////////////////////
+ /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a
+ /// loop over matvec). Does nothing by default.
+ void writeAsFinerGrainTensorContraction();
+
+ /// Inputs to this map will be (%m, %n, %k) coming from enclosing loops.
+ /// Therefore, the mapping to get back to A(M, K), B(K, N), C(M, N) is:
+ /// (d0, d1, d2) -> (d0, d2, d2, d1, d0, d1)(%m, %n, %k)
+ /// And the operands ranges are:
+ /// (%m, %k, %k, %n, %m, %n)
+ llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+
+ /// Given a enclosing parallel loops with ivs `i` and `j`, and an enclosing
+ /// reduction loop with iv `r_k`, emits MLIR corresponding to:
+ /// 1. conditionally assign scalarC to 0.0f on the first iteration or load
+ /// C[i, j]
+ /// 2. multiply A[i, r_k] by B[r_k, j] and add to scalarC
+ /// 3. store back scalarC at C[i, j]
+ ///
+ /// In some compact index notation this could be written:
+ /// cond = (r_k == zero)
+ /// scalarC = select(cond, zerof, C[i, j]);
+ /// C[i, j] = scalarC + A[i, r_k] * B[r_k, j];
+ void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+ llvm::ArrayRef<mlir::Value *> reductionIvs);
+};
+
+} // namespace linalg
+
+/// The TensorOp-inl.h inclusion pattern is chosen to allow gradual extension of
+/// TensorOps by adding implementations as they are needed in the appropriate
+/// step in the tutorial.
+#include "linalg2/TensorOps-inl.h"
+
+#endif // LINALG2_TENSOROPS_H_