6 files changed, 522 insertions, 0 deletions
diff --git a/include/linalg2/Analysis.h b/include/linalg2/Analysis.h
new file mode 100644
index 0000000..43acd95
--- /dev/null
+++ b/include/linalg2/Analysis.h
@@ -0,0 +1,23 @@
+//===- Analysis.h - Linalg dialect Analysis function definitions ----------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#ifndef LINALG2_ANALYSIS_H_
+#define LINALG2_ANALYSIS_H_
+
+#include "linalg1/Analysis.h"
+
+#endif // LINALG2_ANALYSIS_H_
diff --git a/include/linalg2/Intrinsics.h b/include/linalg2/Intrinsics.h
new file mode 100644
index 0000000..e74e059
--- /dev/null
+++ b/include/linalg2/Intrinsics.h
@@ -0,0 +1,32 @@
+//===- Intrinsics.h - Linalg intrinsics definitions -----------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#ifndef LINALG2_INTRINSICS_H_
+#define LINALG2_INTRINSICS_H_
+
+#include "linalg1/Intrinsics.h"
+#include "linalg2/Ops.h"
+
+namespace linalg {
+namespace intrinsics {
+using dot = mlir::edsc::intrinsics::OperationBuilder<DotOp>;
+using matmul = mlir::edsc::intrinsics::OperationBuilder<MatmulOp>;
+using matvec = mlir::edsc::intrinsics::OperationBuilder<MatvecOp>;
+} // namespace intrinsics
+} // namespace linalg
+
+#endif // LINALG2_INTRINSICS_H_
diff --git a/include/linalg2/Ops.h b/include/linalg2/Ops.h
new file mode 100644
index 0000000..141b1d0
--- /dev/null
+++ b/include/linalg2/Ops.h
@@ -0,0 +1,24 @@
+//===- Ops.h - Linalg Ops single entry point ------------------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#ifndef LINALG2_OPS_H_
+#define LINALG2_OPS_H_
+
+#include "linalg1/Ops.h"
+#include "linalg2/TensorOps.h"
+
+#endif // LINALG2_OPS_H_
diff --git a/include/linalg2/TensorOps-inl.h b/include/linalg2/TensorOps-inl.h
new file mode 100644
index 0000000..940f8d7
--- /dev/null
+++ b/include/linalg2/TensorOps-inl.h
@@ -0,0 +1,120 @@
+//===- TensorOps-inl.h - Linalg dialect TensorOps operation implementation ===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+/// The TensorOp-inl.h inclusion pattern is chosen to allow gradual extension of
+/// TensorOps by adding implementations as they are needed in the appropriate
+/// step in the tutorial.
+#ifndef LINALG2_TENSOROPS_INL_H_
+#define LINALG2_TENSOROPS_INL_H_
+
+#include "linalg2/Ops.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/StandardTypes.h"
+
+namespace linalg {
+
+template <class ConcreteOp>
+mlir::Operation::operand_range
+linalg::TensorContractionBase<ConcreteOp>::getInputs() {
+  auto *op = static_cast<ConcreteOp *>(this)->getOperation();
+  return {op->operand_begin(), op->operand_begin() + getNumInputs()};
+}
+
+template <class ConcreteOp>
+mlir::Operation::operand_range
+linalg::TensorContractionBase<ConcreteOp>::getOutputs() {
+  auto *op = static_cast<ConcreteOp *>(this)->getOperation();
+  return {op->operand_begin() + getNumInputs(),
+          op->operand_begin() + getNumInputs() + getNumOutputs()};
+}
+
+template <class ConcreteOp>
+mlir::Operation::operand_range
+linalg::TensorContractionBase<ConcreteOp>::getInputsAndOutputs() {
+  return {getInputs().begin(), getOutputs().end()};
+}
+
+template <class ConcreteOp>
+mlir::LogicalResult linalg::TensorContractionBase<ConcreteOp>::verify() {
+  auto *concreteOp = static_cast<ConcreteOp *>(this)->getOperation();
+  if (getNumInputs() <= 0)
+    concreteOp->emitOpError("expected at least one input");
+  if (getNumOutputs() <= 0)
+    concreteOp->emitOpError("expected at least one output");
+  if (concreteOp->getNumOperands() != getNumInputs() + getNumOutputs()) {
+    concreteOp->emitOpError("expected " +
+                            llvm::Twine(getNumInputs() + getNumOutputs()) +
+                            " operands");
+  }
+  for (unsigned i = 0, e = getNumInputs(); i < e; ++i) {
+    if (!concreteOp->getOperand(i)->getType().template isa<ViewType>())
+      return concreteOp->emitOpError("operand " + llvm::Twine(i) +
+                                     " not a ViewType");
+  }
+  for (unsigned i = getNumInputs(), e = getNumInputs() + getNumOutputs(); i < e;
+       ++i) {
+    auto viewType =
+        concreteOp->getOperand(i)->getType().template dyn_cast<ViewType>();
+    if (!viewType)
+      return concreteOp->emitOpError("operand " + llvm::Twine(i) +
+                                     " not a ViewType");
+    if (viewType.getRank() != getNumParallelDims())
+      return concreteOp->emitOpError("operand " + llvm::Twine(i) +
+                                     " must be of rank " +
+                                     llvm::Twine(getNumParallelDims()));
+  }
+  return mlir::success();
+}
+
+template <class ConcreteOp>
+bool linalg::TensorContractionBase<ConcreteOp>::parse(
+    mlir::OpAsmParser *parser, mlir::OperationState *result) {
+  llvm_unreachable("Parsing linalg dialect is not supported in this tutorial");
+}
+
+// A TensorContraction prints as:
+//
+// ```{.mlir}
+//   concrete_op_name (ssa-inputs, ssa-outputs) : output-view-types
+// ```
+//
+// for example:
+//
+// ```
+//   linalg.matmul(%0, %1, %2) : view<?x?xf32>
+// ```
+//
+// Where %0, %1 and %2 are ssa-values of type ViewType.
+template <class ConcreteOp>
+void linalg::TensorContractionBase<ConcreteOp>::print(mlir::OpAsmPrinter *p) {
+  *p << static_cast<ConcreteOp *>(this)->getOperationName() << "(";
+  auto *last = *std::prev(getInputsAndOutputs().end());
+  for (auto *i : getInputsAndOutputs()) {
+    *p << *i << ((i == last) ? "" : ", ");
+  }
+  *p << ") : ";
+  auto *lastOutput = *std::prev(getOutputs().end());
+  for (auto *o : getOutputs()) {
+    *p << o->getType() << ((o == lastOutput) ? "" : ",");
+  }
+}
+
+} // namespace linalg
+
+#endif // LINALG2_TENSOROPS_INL_H_
diff --git a/include/linalg2/TensorOps.h b/include/linalg2/TensorOps.h
new file mode 100644
index 0000000..39e51f0
--- /dev/null
+++ b/include/linalg2/TensorOps.h
@@ -0,0 +1,287 @@
+//===- TensorOps.h - Linalg dialect TensorOps operation definition --------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#ifndef LINALG2_TENSOROPS_H_
+#define LINALG2_TENSOROPS_H_
+
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/Support/LLVM.h"
+
+namespace mlir {
+class AffineForOp;
+} // namespace mlir
+
+namespace linalg {
+
+/// A generic TensorContraction base class which captures the generic behavior
+/// of tensor contraction operations (with broadcast).
+template <class ConcreteOp> class TensorContractionBase {
+protected:
+  using TensorContractionBaseType = TensorContractionBase<ConcreteOp>;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Hooks to customize the behavior of this op.
+  //////////////////////////////////////////////////////////////////////////////
+  /// Generic implementation of hooks that should be called from `ConcreteType`s
+  mlir::LogicalResult verify();
+  static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+  void print(mlir::OpAsmPrinter *p);
+
+public:
+  //////////////////////////////////////////////////////////////////////////////
+  // Op-specific functionality.
+  //////////////////////////////////////////////////////////////////////////////
+  TensorContractionBase() = default;
+  mlir::Operation::operand_range getInputs();
+  mlir::Operation::operand_range getOutputs();
+  mlir::Operation::operand_range getInputsAndOutputs();
+
+  /// These are better as methods calling into the ConcreteOp instead of
+  /// template parameters because methods allow more generic behavior and avoid
+  /// specializing for number of arguments. All derived classes have
+  /// `VariadicOperands` and a build method from both an ArrayRef<mlirValue*>
+  /// and the proper number of mlir::Value*.
+  unsigned getNumInputs() {
+    return static_cast<ConcreteOp *>(this)->numInputs;
+  };
+  unsigned getNumOutputs() {
+    return static_cast<ConcreteOp *>(this)->numOutputs;
+  };
+  unsigned getNumParallelDims() {
+    return static_cast<ConcreteOp *>(this)->numParallelDims;
+  };
+  unsigned getNumReductionDims() {
+    return static_cast<ConcreteOp *>(this)->numReductionDims;
+  };
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Used in Linalg3 and later.
+  //////////////////////////////////////////////////////////////////////////////
+  mlir::Value *getInputView(unsigned viewIndex);
+  mlir::Value *getOutputView(unsigned viewIndex);
+  mlir::Value *getView(unsigned viewIndex) {
+    return viewIndex < getNumInputs()
+               ? getInputView(viewIndex)
+               : getOutputView(viewIndex - getNumInputs());
+  }
+
+  /// Each op is responsible for declaring how it lowers itself to scalar form,
+  /// given the enclosing parallel and reduction induction variables.
+  /// `emitScalarImplementation` emits the scalar IR for the op in the nesting
+  /// context of the innermost enclosing loop(i.e. `reductionIvs.back()` or
+  /// `parallel.back()`).
+  void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+                                llvm::ArrayRef<mlir::Value *> reductionIvs);
+
+  /// Represents a mapping from the loops to all the ranges of the operands.
+  /// The operands and their ranges are in the order defined by the particular
+  /// ConcreteOp implementation, the resulting map must match those.
+  /// In favorable cases, this can be calculated by an analysis but specifying
+  /// it explicitly is not expensive and generalizes to cases where an analysis
+  /// is not available. For details, see the description of
+  /// loopsToOperandRangeMaps in each ConcreteOp.
+  llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+};
+
+/// Implements c = A * B where c is a scalar and A and B are 1-D vectors.
+class DotOp : public TensorContractionBase<DotOp>,
+              public mlir::Op<DotOp, mlir::OpTrait::VariadicOperands,
+                              mlir::OpTrait::ZeroResult> {
+public:
+  using Op::Op;
+  using TensorContractionBaseType =
+      TensorContractionBase::TensorContractionBaseType;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Hooks to customize the behavior of this op.
+  //////////////////////////////////////////////////////////////////////////////
+  static llvm::StringRef getOperationName() { return "linalg.dot"; }
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    llvm::ArrayRef<mlir::Value *> operands);
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    mlir::Value *A, mlir::Value *B, mlir::Value *C) {
+    return build(b, result, {A, B, C});
+  }
+  mlir::LogicalResult verify();
+  static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+  void print(mlir::OpAsmPrinter *p);
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Op-specific functionality.
+  //////////////////////////////////////////////////////////////////////////////
+  static constexpr unsigned numInputs = 2;
+  static constexpr unsigned numOutputs = 1;
+  static constexpr unsigned numParallelDims = 0;
+  static constexpr unsigned numReductionDims = 1;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Used in Linalg3 and later.
+  //////////////////////////////////////////////////////////////////////////////
+  /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a
+  /// loop over matvec). Does nothing by default.
+  void writeAsFinerGrainTensorContraction();
+
+  /// Inputs to this map will be (%k) coming from enclosing loops.
+  /// Therefore, the mapping to get back to A(K), B(K), C() is:
+  ///   (d0) -> (d0, d0)(%k)
+  /// And the operands ranges are:
+  ///   (%k, %k)
+  llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+
+  ///  Given an enclosing reduction loop with iv `r_i`, emits MLIR corresponding
+  ///  to:
+  ///    1. conditionally assign scalarC to 0.0f on the first iteration or load
+  ///       C[] from memory (0-D tensor)
+  ///    2. multiply A[r_i] by B[r_i] and add to scalarC
+  ///    3. store back scalarC at C[]
+  ///
+  /// In some compact index notation this could be written:
+  ///  cond = (r_i == zero)
+  ///  scalarC = select(cond, zerof, C[]);
+  ///  C[] = scalarC + A[r_i] * B[r_i];
+  void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+                                llvm::ArrayRef<mlir::Value *> reductionIvs);
+};
+
+/// Implements C = A * B where A is a 2-D matrix and X and Y are 1-D vectors.
+class MatvecOp : public TensorContractionBase<MatvecOp>,
+                 public mlir::Op<MatvecOp, mlir::OpTrait::VariadicOperands,
+                                 mlir::OpTrait::ZeroResult> {
+public:
+  using Op::Op;
+  using TensorContractionBaseType =
+      TensorContractionBase::TensorContractionBaseType;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Hooks to customize the behavior of this op.
+  //////////////////////////////////////////////////////////////////////////////
+  static llvm::StringRef getOperationName() { return "linalg.matvec"; }
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    llvm::ArrayRef<mlir::Value *> operands);
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    mlir::Value *A, mlir::Value *B, mlir::Value *C) {
+    return build(b, result, {A, B, C});
+  }
+  mlir::LogicalResult verify();
+  static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+  void print(mlir::OpAsmPrinter *p);
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Op-specific functionality.
+  //////////////////////////////////////////////////////////////////////////////
+  static constexpr unsigned numInputs = 2;
+  static constexpr unsigned numOutputs = 1;
+  static constexpr unsigned numParallelDims = 1;
+  static constexpr unsigned numReductionDims = 1;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Used in Linalg3 and later.
+  //////////////////////////////////////////////////////////////////////////////
+  /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a
+  /// loop over matvec). Does nothing by default.
+  void writeAsFinerGrainTensorContraction();
+
+  /// Inputs to this map will be (%m, %k) coming from enclosing loops.
+  /// Therefore, the mapping to get back to A(M, K), B(K), C(M) is:
+  ///   (d0, d1) -> (d0, d1, d1, d0)(%m, %k)
+  /// And the operands ranges are:
+  ///   (%m, %k, %k, %m)
+  llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+
+  ///  Given an enclosing parallel loop with iv `i` and an enclosing parallel
+  ///  loop with iv `r_j`, emits MLIR corresponding to:
+  ///    1. conditionally assign scalarC to 0.0f on the first iteration or load
+  ///       C[i]
+  ///    2. multiply A[i, r_j] by B[r_j] and add to scalarC
+  ///    3. store back scalarC at C[i]
+  ///
+  /// In some compact index notation this could be written:
+  ///  cond = (r_j == zero)
+  ///  scalarC = select(cond, zerof, C(i));
+  ///  C(i) = scalarC + A(i, r_j) * B(r_j);
+  void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+                                llvm::ArrayRef<mlir::Value *> reductionIvs);
+};
+
+/// Implements C = A * B on 2-D matrices.
+class MatmulOp : public TensorContractionBase<MatmulOp>,
+                 public mlir::Op<MatmulOp, mlir::OpTrait::VariadicOperands,
+                                 mlir::OpTrait::ZeroResult> {
+public:
+  using Op::Op;
+  using TensorContractionBaseType =
+      TensorContractionBase::TensorContractionBaseType;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Hooks to customize the behavior of this op.
+  //////////////////////////////////////////////////////////////////////////////
+  static llvm::StringRef getOperationName() { return "linalg.matmul"; }
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    llvm::ArrayRef<mlir::Value *> operands);
+  static void build(mlir::Builder *b, mlir::OperationState *result,
+                    mlir::Value *A, mlir::Value *B, mlir::Value *C) {
+    return build(b, result, {A, B, C});
+  }
+  mlir::LogicalResult verify();
+  static bool parse(mlir::OpAsmParser *parser, mlir::OperationState *result);
+  void print(mlir::OpAsmPrinter *p);
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Op-specific functionality.
+  //////////////////////////////////////////////////////////////////////////////
+  static constexpr unsigned numInputs = 2;
+  static constexpr unsigned numOutputs = 1;
+  static constexpr unsigned numParallelDims = 2;
+  static constexpr unsigned numReductionDims = 1;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Used in Linalg3 and later.
+  //////////////////////////////////////////////////////////////////////////////
+  /// Rewrites this op as a finer-grained tensor contraction (e.g. matmul is a
+  /// loop over matvec). Does nothing by default.
+  void writeAsFinerGrainTensorContraction();
+
+  /// Inputs to this map will be (%m, %n, %k) coming from enclosing loops.
+  /// Therefore, the mapping to get back to A(M, K), B(K, N), C(M, N) is:
+  ///   (d0, d1, d2) -> (d0, d2, d2, d1, d0, d1)(%m, %n, %k)
+  /// And the operands ranges are:
+  ///   (%m, %k, %k, %n, %m, %n)
+  llvm::SmallVector<mlir::AffineMap, 8> loopsToOperandRangeMaps();
+
+  ///  Given a enclosing parallel loops with ivs `i` and `j`, and an enclosing
+  ///  reduction loop with iv `r_k`, emits MLIR corresponding to:
+  ///    1. conditionally assign scalarC to 0.0f on the first iteration or load
+  ///       C[i, j]
+  ///    2. multiply A[i, r_k] by B[r_k, j] and add to scalarC
+  ///    3. store back scalarC at C[i, j]
+  ///
+  /// In some compact index notation this could be written:
+  ///  cond = (r_k == zero)
+  ///  scalarC = select(cond, zerof, C[i, j]);
+  ///  C[i, j] = scalarC + A[i, r_k] * B[r_k, j];
+  void emitScalarImplementation(llvm::ArrayRef<mlir::Value *> parallelIvs,
+                                llvm::ArrayRef<mlir::Value *> reductionIvs);
+};
+
+} // namespace linalg
+
+/// The TensorOp-inl.h inclusion pattern is chosen to allow gradual extension of
+/// TensorOps by adding implementations as they are needed in the appropriate
+/// step in the tutorial.
+#include "linalg2/TensorOps-inl.h"
+
+#endif // LINALG2_TENSOROPS_H_
diff --git a/include/linalg2/Transforms.h b/include/linalg2/Transforms.h
new file mode 100644
index 0000000..c55f863
--- /dev/null
+++ b/include/linalg2/Transforms.h
@@ -0,0 +1,36 @@
+//===- Transforms.h - Linalg dialect Transformations definition -----------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#ifndef LINALG2_TRANSFORMS_H_
+#define LINALG2_TRANSFORMS_H_
+
+namespace mlir {
+class Value;
+} // namespace mlir
+
+namespace linalg {
+
+class ViewOp;
+
+/// Takes a `view` of type ViewType (i.e. either a ViewOp or a SliceOp) and
+/// composes away all the SliceOp to return a single ViewOp.
+/// Inserts the required operations after `view`.
+ViewOp emitAndReturnFullyComposedView(mlir::Value *v);
+
+} // namespace linalg
+
+#endif // LINALG2_TRANSFORMS_H_