1 files changed, 287 insertions, 0 deletions
diff --git a/include/linalg2/TensorOps.h b/include/linalg2/TensorOps.h
new file mode 100644
index 0000000..39e51f0
--- /dev/null
+++ b/include/linalg2/TensorOps.h
@@ -0,0 +1,287 @@
+//===- TensorOps.h - Linalg dialect TensorOps operation definition --------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#ifndef LINALG2_TENSOROPS_H_
+#define LINALG2_TENSOROPS_H_
+
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/Support/LLVM.h"
+
+namespace mlir {
+class AffineForOp;
+} // namespace mlir
+
+namespace linalg {
+
+/// A generic TensorContraction base class which captures the generic behavior
+/// of tensor contraction operations (with broadcast).
+template <class ConcreteOp> class TensorContractionBase {
+protected:
+  using TensorContractionBaseType = TensorContractionBase<ConcreteOp>;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Hooks to customize the behavior of this op.
+  //////////////////////////////////////////////////////////////////////////////
+  /// Generic implementation of hooks that should be called from `ConcreteType`s
+  mlir::LogicalResult verify();
+  static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+  void print(mlir::OpAsmPrinter *p);
+
+public:
+  //////////////////////////////////////////////////////////////////////////////
+  // Op-specific functionality.
+  //////////////////////////////////////////////////////////////////////////////
+  TensorContractionBase() = default;
+  mlir::Operation::operand_range getInputs();
+  mlir::Operation::operand_range getOutputs();
+  mlir::Operation::operand_range getInputsAndOutputs();
+
+  /// These are better as methods calling into the ConcreteOp instead of
+  /// template parameters because methods allow more generic behavior and avoid
+  /// specializing for number of arguments. All derived classes have
+  /// `VariadicOperands` and a build method from both an ArrayRef<mlirValue*>
+  /// and the proper number of mlir::Value*.
+  unsigned getNumInputs() {
+    return static_cast<ConcreteOp *>(this)->numInputs;
+  };
+  unsigned getNumOutputs() {
+    return static_cast<ConcreteOp *>(this)->numOutputs;
+  };
+  unsigned getNumParallelDims() {
+    return static_cast<ConcreteOp *>(this)->numParallelDims;
+  };
+  unsigned getNumReductionDims() {
+    return static_cast<ConcreteOp *>(this)->numReductionDims;
+  };
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Used in Linalg3 and later.
+  //////////////////////////////////////////////////////////////////////////////
+  mlir::Value *getInputView(unsigned viewIndex);
+  mlir::Value *getOutputView(unsigned viewIndex);
+  mlir::Value *getView(unsigned viewIndex) {
+    return viewIndex < getNumInputs()
+               ? getInputView(viewIndex)
+               : getOutputView(viewIndex - getNumInputs());
+  }
+
+  /// Each op is responsible for declaring how it lowers itself to scalar form,
+  /// given the enclosing parallel and reduction induction variables.
+  /// `emitScalarImplementation` emits the scalar IR for the op in the nesting
+  /// context of the innermost enclosing loop(i.e. `reductionIvs.back()` or
+  /// `parallel.back()`).
+  void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+                                llvm::ArrayRef<mlir::Value *> reductionIvs);
+
+  /// Represents a mapping from the loops to all the ranges of the operands.
+  /// The operands and their ranges are in the order defined by the particular
+  /// ConcreteOp implementation, the resulting map must match those.
+  /// In favorable cases, this can be calculated by an analysis but specifying
+  /// it explicitly is not expensive and generalizes to cases where an analysis
+  /// is not available. For details, see the description of
+  /// loopsToOperandRangeMaps in each ConcreteOp.
+  llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+};
+
+/// Implements c = A * B where c is a scalar and A and B are 1-D vectors.
+class DotOp : public TensorContractionBase<DotOp>,
+              public mlir::Op<DotOp, mlir::OpTrait::VariadicOperands,
+                              mlir::OpTrait::ZeroResult> {
+public:
+  using Op::Op;
+  using TensorContractionBaseType =
+      TensorContractionBase::TensorContractionBaseType;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Hooks to customize the behavior of this op.
+  //////////////////////////////////////////////////////////////////////////////
+  static llvm::StringRef getOperationName() { return "linalg.dot"; }
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    llvm::ArrayRef<mlir::Value *> operands);
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    mlir::Value *A, mlir::Value *B, mlir::Value *C) {
+    return build(b, result, {A, B, C});
+  }
+  mlir::LogicalResult verify();
+  static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+  void print(mlir::OpAsmPrinter *p);
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Op-specific functionality.
+  //////////////////////////////////////////////////////////////////////////////
+  static constexpr unsigned numInputs = 2;
+  static constexpr unsigned numOutputs = 1;
+  static constexpr unsigned numParallelDims = 0;
+  static constexpr unsigned numReductionDims = 1;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Used in Linalg3 and later.
+  //////////////////////////////////////////////////////////////////////////////
+  /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a
+  /// loop over matvec). Does nothing by default.
+  void writeAsFinerGrainTensorContraction();
+
+  /// Inputs to this map will be (%k) coming from enclosing loops.
+  /// Therefore, the mapping to get back to A(K), B(K), C() is:
+  ///   (d0) -> (d0, d0)(%k)
+  /// And the operands ranges are:
+  ///   (%k, %k)
+  llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+
+  ///  Given an enclosing reduction loop with iv `r_i`, emits MLIR corresponding
+  ///  to:
+  ///    1. conditionally assign scalarC to 0.0f on the first iteration or load
+  ///       C[] from memory (0-D tensor)
+  ///    2. multiply A[r_i] by B[r_i] and add to scalarC
+  ///    3. store back scalarC at C[]
+  ///
+  /// In some compact index notation this could be written:
+  ///  cond = (r_i == zero)
+  ///  scalarC = select(cond, zerof, C[]);
+  ///  C[] = scalarC + A[r_i] * B[r_i];
+  void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+                                llvm::ArrayRef<mlir::Value *> reductionIvs);
+};
+
+/// Implements C = A * B where A is a 2-D matrix and X and Y are 1-D vectors.
+class MatvecOp : public TensorContractionBase<MatvecOp>,
+                 public mlir::Op<MatvecOp, mlir::OpTrait::VariadicOperands,
+                                 mlir::OpTrait::ZeroResult> {
+public:
+  using Op::Op;
+  using TensorContractionBaseType =
+      TensorContractionBase::TensorContractionBaseType;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Hooks to customize the behavior of this op.
+  //////////////////////////////////////////////////////////////////////////////
+  static llvm::StringRef getOperationName() { return "linalg.matvec"; }
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    llvm::ArrayRef<mlir::Value *> operands);
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    mlir::Value *A, mlir::Value *B, mlir::Value *C) {
+    return build(b, result, {A, B, C});
+  }
+  mlir::LogicalResult verify();
+  static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+  void print(mlir::OpAsmPrinter *p);
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Op-specific functionality.
+  //////////////////////////////////////////////////////////////////////////////
+  static constexpr unsigned numInputs = 2;
+  static constexpr unsigned numOutputs = 1;
+  static constexpr unsigned numParallelDims = 1;
+  static constexpr unsigned numReductionDims = 1;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Used in Linalg3 and later.
+  //////////////////////////////////////////////////////////////////////////////
+  /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a
+  /// loop over matvec). Does nothing by default.
+  void writeAsFinerGrainTensorContraction();
+
+  /// Inputs to this map will be (%m, %k) coming from enclosing loops.
+  /// Therefore, the mapping to get back to A(M, K), B(K), C(M) is:
+  ///   (d0, d1) -> (d0, d1, d1, d0)(%m, %k)
+  /// And the operands ranges are:
+  ///   (%m, %k, %k, %m)
+  llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+
+  ///  Given an enclosing parallel loop with iv `i` and an enclosing parallel
+  ///  loop with iv `r_j`, emits MLIR corresponding to:
+  ///    1. conditionally assign scalarC to 0.0f on the first iteration or load
+  ///       C[i]
+  ///    2. multiply A[i, r_j] by B[r_j] and add to scalarC
+  ///    3. store back scalarC at C[i]
+  ///
+  /// In some compact index notation this could be written:
+  ///  cond = (r_j == zero)
+  ///  scalarC = select(cond, zerof, C(i));
+  ///  C(i) = scalarC + A(i, r_j) * B(r_j);
+  void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+                                llvm::ArrayRef<mlir::Value *> reductionIvs);
+};
+
+/// Implements C = A * B on 2-D matrices.
+class MatmulOp : public TensorContractionBase<MatmulOp>,
+                 public mlir::Op<MatmulOp, mlir::OpTrait::VariadicOperands,
+                                 mlir::OpTrait::ZeroResult> {
+public:
+  using Op::Op;
+  using TensorContractionBaseType =
+      TensorContractionBase::TensorContractionBaseType;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Hooks to customize the behavior of this op.
+  //////////////////////////////////////////////////////////////////////////////
+  static llvm::StringRef getOperationName() { return "linalg.matmul"; }
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    llvm::ArrayRef<mlir::Value *> operands);
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    mlir::Value *A, mlir::Value *B, mlir::Value *C) {
+    return build(b, result, {A, B, C});
+  }
+  mlir::LogicalResult verify();
+  static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+  void print(mlir::OpAsmPrinter *p);
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Op-specific functionality.
+  //////////////////////////////////////////////////////////////////////////////
+  static constexpr unsigned numInputs = 2;
+  static constexpr unsigned numOutputs = 1;
+  static constexpr unsigned numParallelDims = 2;
+  static constexpr unsigned numReductionDims = 1;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Used in Linalg3 and later.
+  //////////////////////////////////////////////////////////////////////////////
+  /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a
+  /// loop over matvec). Does nothing by default.
+  void writeAsFinerGrainTensorContraction();
+
+  /// Inputs to this map will be (%m, %n, %k) coming from enclosing loops.
+  /// Therefore, the mapping to get back to A(M, K), B(K, N), C(M, N) is:
+  ///   (d0, d1, d2) -> (d0, d2, d2, d1, d0, d1)(%m, %n, %k)
+  /// And the operands ranges are:
+  ///   (%m, %k, %k, %n, %m, %n)
+  llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+
+  ///  Given a enclosing parallel loops with ivs `i` and `j`, and an enclosing
+  ///  reduction loop with iv `r_k`, emits MLIR corresponding to:
+  ///    1. conditionally assign scalarC to 0.0f on the first iteration or load
+  ///       C[i, j]
+  ///    2. multiply A[i, r_k] by B[r_k, j] and add to scalarC
+  ///    3. store back scalarC at C[i, j]
+  ///
+  /// In some compact index notation this could be written:
+  ///  cond = (r_k == zero)
+  ///  scalarC = select(cond, zerof, C[i, j]);
+  ///  C[i, j] = scalarC + A[i, r_k] * B[r_k, j];
+  void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+                                llvm::ArrayRef<mlir::Value *> reductionIvs);
+};
+
+} // namespace linalg
+
+/// The TensorOp-inl.h inclusion pattern is chosen to allow gradual extension of
+/// TensorOps by adding implementations as they are needed in the appropriate
+/// step in the tutorial.
+#include "linalg2/TensorOps-inl.h"
+
+#endif // LINALG2_TENSOROPS_H_