Author: Wen-Heng (Jack) Chung Date: 2020-06-05T22:18:19-05:00 New Revision: f0500d18ee6f0072487526e53fc9a7bd1c1d7235
URL: https://github.com/llvm/llvm-project/commit/f0500d18ee6f0072487526e53fc9a7bd1c1d7235 DIFF: https://github.com/llvm/llvm-project/commit/f0500d18ee6f0072487526e53fc9a7bd1c1d7235.diff LOG: Add Op traversing logic into MIOpen dialect -> C++ header translator. Added: mlir/test/Dialect/MIOpen/translate.mlir Modified: mlir/lib/Dialect/MIOpenOps/CppOutput/ConvertToMIOpenCPP.cpp Removed: mlir/test/Dialect/MIOpen/CppOutput/transformed.mlir ################################################################################ diff --git a/mlir/lib/Dialect/MIOpenOps/CppOutput/ConvertToMIOpenCPP.cpp b/mlir/lib/Dialect/MIOpenOps/CppOutput/ConvertToMIOpenCPP.cpp index b071565c2b51..4eb8d7de7181 100644 --- a/mlir/lib/Dialect/MIOpenOps/CppOutput/ConvertToMIOpenCPP.cpp +++ b/mlir/lib/Dialect/MIOpenOps/CppOutput/ConvertToMIOpenCPP.cpp @@ -15,8 +15,10 @@ #include "mlir/Dialect/StandardOps/Ops.h" #include "mlir/IR/Function.h" #include "mlir/IR/Module.h" +#include "mlir/Support/STLExtras.h" #include "mlir/Translation.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" @@ -222,6 +224,160 @@ void EmitCppEpilogue(llvm::raw_ostream &output, llvm::StringRef layoutStr, llvm: output << kCppEpiloguePart2; } +static constexpr StringLiteral kHeaderPreamblePart1 = R"( +#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R4_HPP +#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4R4_HPP + +#include "common_header.hpp" +#include "tensor_descriptor.hpp" +#include "tensor_descriptor_helper.hpp" +#include "gridwise_gemm.hpp" + +namespace ck { + +// GemmM = K +// GemmN = N * Ho * Wo +// GemmK = C * Y * X +template <index_t GridSize, + index_t BlockSize, + typename Float, + typename AccFloat, + typename InGlobalDesc, + typename WeiGlobalDesc, + typename OutGlobalDesc, + typename ConvStrides, + typename ConvDilations, + typename InLeftPads, + typename InRightPads, + index_t GemmMPerBlock, + index_t GemmNPerBlock, + index_t GemmKPerBlock, + index_t GemmMPerThreadSubC, + index_t GemmNPerThreadSubC, + index_t GemmMLevel0Cluster, + index_t GemmNLevel0Cluster, + index_t GemmMLevel1Cluster, + index_t GemmNLevel1Cluster, + index_t GemmKPerThreadLoop, + index_t GemmThreadGemmDataPerReadM, + index_t GemmThreadGemmDataPerReadN, + typename GemmABlockCopyThreadSliceLengths_GemmK_GemmM, + typename GemmABlockCopyThreadClusterLengths_GemmK_GemmM, + index_t GemmABlockCopySrcDataPerRead_GemmK, + index_t GemmABlockCopyDstDataPerWrite_GemmM, + typename GemmBBlockCopyThreadSliceLengths_GemmK_GemmN, + typename GemmBBlockCopyThreadClusterLengths_GemmK_GemmN, + index_t GemmBBlockCopySrcDataPerRead_GemmN, + index_t GemmBBlockCopyDstDataPerWrite_GemmN, + index_t GemmCThreadCopyDstDataPerWrite_GemmN1> +)"; + +static constexpr StringLiteral kHeaderPreamblePart2 = R"( +{ + __device__ void Run(const Float* const __restrict__ p_in_global, + const Float* const __restrict__ p_wei_global, + Float* const __restrict__ p_out_global) const + { +)"; + +static constexpr StringLiteral kHeaderPreamblePart3 = R"( + constexpr auto I0 = Number<0>{}; + constexpr auto I1 = Number<1>{}; + constexpr auto I2 = Number<2>{}; + constexpr auto I3 = Number<3>{}; + + constexpr index_t ConvStrideH = ConvStrides{}[0]; + constexpr index_t ConvStrideW = ConvStrides{}[1]; + + constexpr index_t ConvDilationH = ConvDilations{}[0]; + constexpr index_t ConvDilationW = ConvDilations{}[1]; +)"; + +static constexpr StringLiteral kHeaderEpiloguePart1 = R"( + // GEMM + constexpr auto gridwise_gemm = + GridwiseGemmTransposedANormalBNormalC_v1<GridSize, + BlockSize, + Float, + AccFloat, +)"; + +static constexpr StringLiteral kHeaderEpiloguePart2 = R"( + InMemoryDataOperation::none, + GemmMPerBlock, + GemmNPerBlock, + GemmKPerBlock, + GemmMPerThreadSubC, + GemmNPerThreadSubC, + GemmMLevel0Cluster, + GemmNLevel0Cluster, + GemmMLevel1Cluster, + GemmNLevel1Cluster, + GemmKPerThreadLoop, + GemmThreadGemmDataPerReadM, + GemmThreadGemmDataPerReadN, + GemmABlockCopyThreadSliceLengths_GemmK_GemmM, + GemmABlockCopyThreadClusterLengths_GemmK_GemmM, + Sequence<1, 0>, + Sequence<1, 0>, + 0, + GemmABlockCopySrcDataPerRead_GemmK, + GemmABlockCopyDstDataPerWrite_GemmM, + GemmBBlockCopyThreadSliceLengths_GemmK_GemmN, + GemmBBlockCopyThreadClusterLengths_GemmK_GemmN, + Sequence<0, 1>, + Sequence<0, 1>, + 1, + GemmBBlockCopySrcDataPerRead_GemmN, + GemmBBlockCopyDstDataPerWrite_GemmN, + Sequence<0, 1, 2, 3>, + 3, + GemmCThreadCopyDstDataPerWrite_GemmN1>{}; + + gridwise_gemm.Run(p_wei_global, p_in_global, p_out_global); + } +}; + +} // namespace ck +#endif +)"; + +void EmitHeaderPreamble(llvm::raw_ostream &output, llvm::StringRef layoutStr, llvm::SmallVector<std::string, 3> &tensorDescs) { + output << kHeaderPreamblePart1; + + output << R"( +struct GridwiseConvolutionImplicitGemm_v4r4_)"; + output << layoutStr; + + output << kHeaderPreamblePart2; + + output << kHeaderPreamblePart3; + + output << '\n'; + + output << R"( + constexpr auto )" << tensorDescs[0] << " = InGlobalDesc{};"; + output << R"( + constexpr auto )" << tensorDescs[1] << " = WeiGlobalDesc{};"; + output << R"( + constexpr auto )" << tensorDescs[2] << " = OutGlobalDesc{};"; +} + +void EmitHeaderEpilogue(llvm::raw_ostream &output, llvm::SmallDenseMap<int64_t, std::string> &args) { + output << kHeaderEpiloguePart1; + +// Between Part1 and Part2 emit: +// decltype(wei_e_k_global_desc), +// decltype(in_e_b_global_desc), +// decltype(out_k_b_global_desc), + for (int i = 0; i < 3; ++i) { + output << R"( + decltype()" << args[i] << "),"; + } + + output << kHeaderEpiloguePart2; +} + void EmitLayoutString(llvm::raw_ostream &output, llvm::ArrayRef<mlir::Attribute> &layoutArrayAttr, llvm::StringRef prefix, llvm::StringRef suffix, llvm::StringRef delimiter = "") { for (int i = 0; i < kConv2DTensorDimension; ++i) { auto attr = layoutArrayAttr[i]; @@ -234,11 +390,20 @@ void EmitLayoutString(llvm::raw_ostream &output, llvm::ArrayRef<mlir::Attribute> } } +void EmitHeaderDimensionLengths(llvm::raw_ostream &output, llvm::ArrayRef<mlir::Attribute> &layoutArrayAttr, llvm::StringRef tensorDesc) { + for (int i = 0; i < kConv2DTensorDimension; ++i) { + auto attr = layoutArrayAttr[i]; + if (auto strAttr = attr.dyn_cast<StringAttr>()) { + output << " constexpr index_t " << strAttr.getValue() << " = " << tensorDesc << ".GetLengths()[" << i << "];\n"; + } + } +} + void EmitDimensionVariables(llvm::raw_ostream &output, llvm::ArrayRef<mlir::Attribute> &layoutArrayAttr) { for (int i = 0; i < kConv2DTensorDimension; ++i) { auto attr = layoutArrayAttr[i]; if (auto strAttr = attr.dyn_cast<StringAttr>()) { - output << " const index_t " << strAttr.getValue() << " = CK_PARAM_PROBLEM_"; + output << " constexpr index_t " << strAttr.getValue() << " = CK_PARAM_PROBLEM_"; switch (llvm::toUpper(strAttr.getValue()[0])) { case 'H': @@ -258,7 +423,7 @@ void EmitStrideVariables(llvm::raw_ostream &output, llvm::ArrayRef<mlir::Attribu for (int i = 0; i < kConv2DTensorDimension; ++i) { auto attr = layoutArrayAttr[i]; if (auto strAttr = attr.dyn_cast<StringAttr>()) { - output << " const index_t stride_" << strAttr.getValue() << " = "; + output << " constexpr index_t stride_" << strAttr.getValue() << " = "; if (i == 0) { output << "1;\n"; @@ -272,6 +437,19 @@ void EmitStrideVariables(llvm::raw_ostream &output, llvm::ArrayRef<mlir::Attribu } } +void EmitInterleaveArrayAttrOfStringAttrWithSeparator(llvm::raw_ostream &os, mlir::ArrayAttr &arrayAttr, const StringRef &separator) { + if (arrayAttr) { + interleave(arrayAttr, os, [&](Attribute attr) { + if (auto strAttr = attr.dyn_cast<StringAttr>()) + os << strAttr.getValue(); + }, separator); + } +} + +void EmitInterleaveCommaArrayAttrOfStringAttr(llvm::raw_ostream &os, mlir::ArrayAttr &arrayAttr) { + EmitInterleaveArrayAttrOfStringAttrWithSeparator(os, arrayAttr, ", "); +} + void ObtainModuleInfo(ModuleOp &m, std::string &layoutStr, llvm::SmallVector<std::string, 3> &tensorDescs) { // (TBD verifiying logic) The Module could contain multiple FuncOp, and inside each FuncOp there // should be exactly: @@ -315,7 +493,7 @@ void ObtainModuleInfo(ModuleOp &m, std::string &layoutStr, llvm::SmallVector<std } -std::unique_ptr<llvm::StringRef> mlir::translateModuleToMIOpenCpp(ModuleOp m) { +std::unique_ptr<llvm::StringRef> mlir::translateModuleToMIOpenHeader(ModuleOp m) { std::string resultStr; llvm::raw_string_ostream output(resultStr); @@ -323,6 +501,7 @@ std::unique_ptr<llvm::StringRef> mlir::translateModuleToMIOpenCpp(ModuleOp m) { for (auto f : m.getOps<FuncOp>()) { std::string layoutStr; llvm::SmallVector<std::string, 3> tensorDescs; + llvm::SmallDenseMap<int64_t, std::string> gridwiseGemmArguments; // Obtain critical information from ModuleOp. ObtainModuleInfo(m, layoutStr, tensorDescs); @@ -330,17 +509,154 @@ std::unique_ptr<llvm::StringRef> mlir::translateModuleToMIOpenCpp(ModuleOp m) { int srcLayoutAttrCtr = 0; // Start emitting. + EmitHeaderPreamble(output, layoutStr, tensorDescs); + + f.walk([&output, &srcLayoutAttrCtr, &tensorDescs, &gridwiseGemmArguments](miopen::TransformOp op) { + // get source_layout attribute. + auto srcLayoutAttr = op.getAttrOfType<ArrayAttr>("source_layout"); + if (srcLayoutAttr) { + auto srcLayout = srcLayoutAttr.getValue(); + output << "\n // "; + EmitLayoutString(output, srcLayout, "", "", ", "); + output << '\n'; + + EmitHeaderDimensionLengths(output, srcLayout, tensorDescs[srcLayoutAttrCtr]); + } + output << '\n'; + + // get layout attribute. + auto layoutAttr = op.getAttrOfType<ArrayAttr>("layout"); + std::string inputTensorName; + std::string outputTensorName; + std::string operationSpec; + std::string srcDimSpec; + std::string dstDimSpec; + llvm::raw_string_ostream ins(inputTensorName); + llvm::raw_string_ostream outs(outputTensorName); + llvm::raw_string_ostream ops(operationSpec); + llvm::raw_string_ostream srcs(srcDimSpec); + llvm::raw_string_ostream dsts(dstDimSpec); + + // determine input and output tensor name. + auto immLayoutAttr = op.getAttrOfType<ArrayAttr>("intermediate_layout"); + auto outputLayoutAttr = op.getAttrOfType<ArrayAttr>("output_layout"); + if (srcLayoutAttr) { + inputTensorName = tensorDescs[srcLayoutAttrCtr]; + outs << kVarName[srcLayoutAttrCtr] << "_"; + + srcLayoutAttrCtr++; + } else { + // get intermediate_layout attribute. + if (immLayoutAttr) { + ins << kVarName[srcLayoutAttrCtr - 1] << "_"; + EmitInterleaveArrayAttrOfStringAttrWithSeparator(ins, immLayoutAttr, "_"); + ins << "_desc"; + ins.flush(); + + outs << kVarName[srcLayoutAttrCtr - 1] << "_"; + } + } + EmitInterleaveArrayAttrOfStringAttrWithSeparator(outs, outputLayoutAttr, "_"); + outs << "_desc"; + outs.flush(); + + // determine gridwise GEMM arguments. + auto gridwiseGemmArgPosAttr = op.getAttrOfType<IntegerAttr>("gridwise_gemm_argument_position"); + if (gridwiseGemmArgPosAttr) { + llvm::errs() << "gridwise gemm argument pos: " << gridwiseGemmArgPosAttr.getValue() << "\n"; + llvm::errs() << "tensor: " << outputTensorName << "\n"; + gridwiseGemmArguments[gridwiseGemmArgPosAttr.getInt()] = outputTensorName; + } + + ops << " make_tuple("; + srcs << " make_tuple("; + dsts << " make_tuple("; + + for (auto layoutSpec = layoutAttr.begin(); layoutSpec != layoutAttr.end(); ) { + if (auto layoutSpecDict = layoutSpec->dyn_cast<DictionaryAttr>()) { + auto srcNames = layoutSpecDict.get("source_names").dyn_cast<ArrayAttr>(); + auto dstNames = layoutSpecDict.get("names").dyn_cast<ArrayAttr>(); + + if (auto transform = layoutSpecDict.get("transformation").dyn_cast<StringAttr>()) { + if (transform.getValue() == "PassThrough" || + transform.getValue() == "Merge") { + ops << transform.getValue() << "<"; + EmitInterleaveCommaArrayAttrOfStringAttr(ops, srcNames); + ops << ">{}"; + } else if (transform.getValue() == "Pad") { + ops << transform.getValue() << "<" + << "Sequence<"; + EmitInterleaveCommaArrayAttrOfStringAttr(ops, srcNames); + ops << ">, InLeftPads, InRightPads" << ">{}"; + } else if (transform.getValue() == "Embed") { + ops << transform.getValue() << "<" + << "Sequence<"; + EmitInterleaveCommaArrayAttrOfStringAttr(ops, dstNames); + ops << ">, Sequence<ConvDilationTBD, ConvDilationTBD, 0>>{}"; + } + srcs << "Sequence<" << layoutSpecDict.get("source_dimensions") << ">{}"; + dsts << "Sequence<" << layoutSpecDict.get("dimensions") << ">{}"; + } + } + + ++layoutSpec; + if (layoutSpec != layoutAttr.end()) { + ops << ", "; + srcs << ", "; + dsts << ", "; + } + } + ops << "),\n"; + ops.flush(); + srcs << "),\n"; + srcs.flush(); + dsts << ")"; + dsts.flush(); + + output << " constexpr auto " << outputTensorName << " = transform_tensor_descriptor(\n"; + output << " " << inputTensorName << ",\n"; + output << operationSpec << srcDimSpec << dstDimSpec; + output << ");\n"; + }); + + // TBD get tuning parameters. + //f.walk([&output](miopen::GridwiseGemmOp op) { + // // get op name. + // //output << "op name: " << op.getOperationName() << "\n"; + // //op.dump(); + //}); + + EmitHeaderEpilogue(output, gridwiseGemmArguments); + } + + output.flush(); + return std::make_unique<llvm::StringRef>(resultStr); +} +std::unique_ptr<llvm::StringRef> mlir::translateModuleToMIOpenCpp(ModuleOp m) { + std::string resultStr; + llvm::raw_string_ostream output(resultStr); + + // Enumerate FuncOp instances inside the ModuleOp. + for (auto f : m.getOps<FuncOp>()) { + std::string layoutStr; + llvm::SmallVector<std::string, 3> tensorDescs; + + // Obtain critical information from ModuleOp. + ObtainModuleInfo(m, layoutStr, tensorDescs); + + int srcLayoutAttrCtr = 0; + + // Start emitting. EmitCppPreamble(output, layoutStr); f.walk([&output, &srcLayoutAttrCtr, &tensorDescs](miopen::TransformOp op) { - // get source_layout attribute. auto srcLayoutAttr = op.getAttrOfType<ArrayAttr>("source_layout"); if (srcLayoutAttr) { auto srcLayout = srcLayoutAttr.getValue(); output << " // "; - EmitLayoutString(output, srcLayout, "", "", ", "); + EmitLayoutString(output, srcLayout, "", "", ","); output << '\n'; EmitDimensionVariables(output, srcLayout); @@ -354,20 +670,6 @@ std::unique_ptr<llvm::StringRef> mlir::translateModuleToMIOpenCpp(ModuleOp m) { EmitLayoutString(output, srcLayout, "stride_", "", ", "); output << ">{});\n\n"; } - - //// get layout attribute. - // TBD not used in emitting C++ source wrapper. - // would be used in emitting C++ header. - //auto layoutAttr = op.getAttrOfType<ArrayAttr>("layout"); - //for (auto layoutSpec : layoutAttr) { - // if (auto layoutSpecDict = layoutSpec.dyn_cast<DictionaryAttr>()) { - // //output << "dimensions: " << layoutSpecDict.get("dimensions") << "\n"; - // //output << "names: " << layoutSpecDict.get("names") << "\n"; - // //output << "source_dimensions: " << layoutSpecDict.get("source_dimensions") << "\n"; - // //output << "source_names: " << layoutSpecDict.get("source_names") << "\n"; - // //output << "transformation: " << layoutSpecDict.get("transformation") << "\n"; - // } - //} }); EmitCppInterlude(output); @@ -396,13 +698,13 @@ static TranslateFromMLIRRegistration return success(); }); -//static TranslateFromMLIRRegistration -// toHeader("mlir-to-miopen-h", [](ModuleOp module, llvm::raw_ostream &output) { -// auto sourceCode = mlir::translateModuleToMIOpenHeader(module); -// if (!sourceCode) -// return failure(); -// -// output << *sourceCode; -// return success(); -// }); +static TranslateFromMLIRRegistration + toHeader("mlir-to-miopen-hpp", [](ModuleOp module, llvm::raw_ostream &output) { + auto sourceCode = mlir::translateModuleToMIOpenHeader(module); + if (!sourceCode) + return failure(); + + output << *sourceCode; + return success(); + }); diff --git a/mlir/test/Dialect/MIOpen/CppOutput/transformed.mlir b/mlir/test/Dialect/MIOpen/CppOutput/transformed.mlir deleted file mode 100644 index ca9751191859..000000000000 --- a/mlir/test/Dialect/MIOpen/CppOutput/transformed.mlir +++ /dev/null @@ -1,157 +0,0 @@ -// RUN: mlir-translate -mlir-to-miopen-cpp %s | FileCheck %s - -// CHECK: __launch_bounds__(CK_PARAM_TUNABLE_BLOCK_SIZE, 2) void gridwise_convolution_implicit_gemm_v4r4_kcyx_niciwihi_nokohowo -func @miopen_transformed_conv2d(%filter : memref<?x?x?x?xf32>, %input : memref<?x?x?x?xf32>, %output : memref<?x?x?x?xf32>) { - // filter tensor - %filter_gemmK_gemmM = miopen.transform(%filter) { - layout = [ - { - dimensions = [0], - names = ["gemmK"], - transformation = "merge", - source_dimensions = [1, 2, 3], - source_names = ["c", "y", "x"] - }, - { - dimensions = [1], - names = ["gemmM"], - transformation = "passthrough", - source_dimensions = [0], - source_names = ["n"] - } - ], - source_layout = ["k", "c", "y", "x"] - } : memref<?x?x?x?xf32> to memref<?x?xf32> - - // input tensor - %input_n_c_hipad_wipad = miopen.transform(%input) { - layout = [ - { - dimensions = [0], - names = ["n"], - transformation = "passthorugh", - source_dimensions = [0], - source_names = ["ni"] - }, - { - dimensions = [1], - names = ["c"], - transformation = "passthorugh", - source_dimensions = [1], - source_names = ["ci"] - }, - { - dimensions = [2], - names = ["hipad"], - transformation = "pad", - parameters = [0, 0], - source_dimensions = [2], - source_names = ["hi"] - }, - { - dimensions = [3], - names = ["wipad"], - transformation = "pad", - parameters = [0, 0], - source_dimensions = [3], - source_names = ["wi"] - } - ], - source_layout = ["ni", "ci", "wi", "hi"] - } : memref<?x?x?x?xf32> to memref<?x?x?x?xf32> - - %input_n_c_y_ho_x_wo = miopen.transform(%input_n_c_hipad_wipad) { - layout = [ - { - dimensions = [0], - names = ["n"], - transformation = "passthrough", - source_dimensions = [0], - source_names = ["n"] - }, - { - dimensions = [1], - names = ["c"], - transformation = "passthrough", - source_dimensions = [1], - source_names = ["c"] - }, - { - dimensions = [2, 3], - names = ["y", "ho"], - transformation = "embed", - parameters = [2, [1, 1, 0]], - source_dimensions = [2], - source_names = ["hipad"] - }, - { - dimensions = [4, 5], - names = ["x", "wo"], - transformation = "embed", - parameters = [2, [1, 1, 0]], - source_dimensions = [2], - source_names = ["wipad"] - } - ], - intermediate_layout = ["n", "c", "hipad", "wipad"] - } : memref<?x?x?x?xf32> to memref<?x?x?x?x?x?x?xf32> - - %input_gemmK_gemmN = miopen.transform(%input_n_c_y_ho_x_wo) { - layout = [ - { - dimensions = [0], - names = ["gemmK"], - transformation = "merge", - source_dimensions = [1, 2, 4], - source_names = ["c", "y", "x"] - }, - { - dimensions = [1], - names = ["gemmN"], - transformation = "merge", - source_dimensions = [0, 3, 5], - source_names = ["n", "hipad", "wipad"] - } - ], - intermediate_layout = ["n", "c", "y", "hipad", "x", "wipad"] - } : memref<?x?x?x?x?x?x?xf32> to memref<?x?xf32> - - // output tensor - %output_gemmM_gemmN = miopen.transform(%output) { - layout = [ - { - dimensions = [0], - names = ["gemmM"], - transformation = "passthrough", - source_dimensions = [1], - source_names = ["ko"] - }, - { - dimensions = [1], - names = ["gemmN"], - transformation = "merge", - source_dimensions = [0, 2, 3], - source_names = ["no", "ho", "wo"] - } - ], - source_layout = ["no", "ko", "ho", "wo"] - } : memref<?x?x?x?xf32> to memref<?x?xf32> - - // apply gridwise GEMM - miopen.gridwise_gemm(%filter_gemmK_gemmM, %input_gemmK_gemmN, %output_gemmM_gemmN) { - parameters = [ - // tuning parameters - ] - } : memref<?x?xf32>, - memref<?x?xf32>, - memref<?x?xf32> - - return -} -// CHECK: constexpr auto weight_k_c_y_x_desc = make_native_tensor_descriptor(Sequence<k, c, y, x>{}, Sequence<stride_k, stride_c, stride_y, stride_x>{}); -// CHECK: constexpr auto input_ni_ci_wi_hi_desc = make_native_tensor_descriptor(Sequence<ni, ci, wi, hi>{}, Sequence<stride_ni, stride_ci, stride_wi, stride_hi>{}); -// CHECK: constexpr auto output_no_ko_ho_wo_desc = make_native_tensor_descriptor(Sequence<no, ko, ho, wo>{}, Sequence<stride_no, stride_ko, stride_ho, stride_wo>{}); -// CHECK: constexpr auto gridwise_conv = GridwiseConvolutionImplicitGemm_v4r4_kcyx_niciwihi_nokohowo -// CHECK: decltype(weight_k_c_y_x_desc), -// CHECK: decltype(input_ni_ci_wi_hi_desc), -// CHECK: decltype(output_no_ko_ho_wo_desc), diff --git a/mlir/test/Dialect/MIOpen/translate.mlir b/mlir/test/Dialect/MIOpen/translate.mlir new file mode 100644 index 000000000000..1139100262e2 --- /dev/null +++ b/mlir/test/Dialect/MIOpen/translate.mlir @@ -0,0 +1,159 @@ +// RUN: mlir-translate -mlir-to-miopen-cpp %s | FileCheck -check-prefix=MIOPEN-CPP %s +// RUN: mlir-translate -mlir-to-miopen-hpp %s | FileCheck -check-prefix=MIOPEN-HPP %s + +// MIOPEN-CPP: __launch_bounds__(CK_PARAM_TUNABLE_BLOCK_SIZE, 2) void gridwise_convolution_implicit_gemm_v4r4_kcyx_nicihiwi_nokohowo +// MIOPEN-HPP: struct GridwiseConvolutionImplicitGemm_v4r4_kcyx_nicihiwi_nokohowo +func @miopen_transformed_conv2d(%filter : memref<?x?x?x?xf32>, %input : memref<?x?x?x?xf32>, %output : memref<?x?x?x?xf32>) { + // filter tensor + %filter_gemmK_gemmM = miopen.transform(%filter) { + layout = [ + { + dimensions = [0], + names = ["gemmK"], + transformation = "Merge", + source_dimensions = [1, 2, 3], + source_names = ["c", "y", "x"] + }, + { + dimensions = [1], + names = ["gemmM"], + transformation = "PassThrough", + source_dimensions = [0], + source_names = ["k"] + } + ], + source_layout = ["k", "c", "y", "x"], + output_layout = ["gemmK", "gemmM"], + gridwise_gemm_argument_position = 0 + } : memref<?x?x?x?xf32> to memref<?x?xf32> + + // input tensor + %input_ni_ci_hipad_wipad = miopen.transform(%input) { + layout = [ + { + dimensions = [0], + names = ["ni"], + transformation = "PassThrough", + source_dimensions = [0], + source_names = ["ni"] + }, + { + dimensions = [1], + names = ["ci"], + transformation = "PassThrough", + source_dimensions = [1], + source_names = ["ci"] + }, + { + dimensions = [2, 3], + names = ["hipad", "wipad"], + transformation = "Pad", + parameters = [0, 0], + source_dimensions = [2, 3], + source_names = ["hi", "wi"] + } + ], + source_layout = ["ni", "ci", "hi", "wi"], + output_layout = ["ni", "ci", "hipad", "wipad"] + } : memref<?x?x?x?xf32> to memref<?x?x?x?xf32> + + %input_ni_ci_y_ho_x_wo = miopen.transform(%input_ni_ci_hipad_wipad) { + layout = [ + { + dimensions = [0], + names = ["ni"], + transformation = "PassThrough", + source_dimensions = [0], + source_names = ["ni"] + }, + { + dimensions = [1], + names = ["ci"], + transformation = "PassThrough", + source_dimensions = [1], + source_names = ["ci"] + }, + { + dimensions = [2, 3], + names = ["y", "ho"], + transformation = "Embed", + parameters = [2, [1, 1, 0]], + source_dimensions = [2], + source_names = ["hipad"] + }, + { + dimensions = [4, 5], + names = ["x", "wo"], + transformation = "Embed", + parameters = [2, [1, 1, 0]], + source_dimensions = [3], + source_names = ["wipad"] + } + ], + intermediate_layout = ["ni", "ci", "hipad", "wipad"], + output_layout = ["ni", "ci", "y", "ho", "x", "wo"] + } : memref<?x?x?x?xf32> to memref<?x?x?x?x?x?x?xf32> + + %input_gemmK_gemmN = miopen.transform(%input_ni_ci_y_ho_x_wo) { + layout = [ + { + dimensions = [0], + names = ["gemmK"], + transformation = "Merge", + source_dimensions = [1, 2, 4], + source_names = ["ci", "y", "x"] + }, + { + dimensions = [1], + names = ["gemmN"], + transformation = "Merge", + source_dimensions = [0, 3, 5], + source_names = ["ni", "ho", "wo"] + } + ], + intermediate_layout = ["ni", "ci", "y", "ho", "x", "wo"], + output_layout = ["gemmK", "gemmN"], + gridwise_gemm_argument_position = 1 + } : memref<?x?x?x?x?x?x?xf32> to memref<?x?xf32> + + // output tensor + %output_gemmM_gemmN = miopen.transform(%output) { + layout = [ + { + dimensions = [0], + names = ["gemmM"], + transformation = "PassThrough", + source_dimensions = [1], + source_names = ["ko"] + }, + { + dimensions = [1], + names = ["gemmN"], + transformation = "Merge", + source_dimensions = [0, 2, 3], + source_names = ["no", "ho", "wo"] + } + ], + source_layout = ["no", "ko", "ho", "wo"], + output_layout = ["gemmM", "gemmN"], + gridwise_gemm_argument_position = 2 + } : memref<?x?x?x?xf32> to memref<?x?xf32> + + // apply gridwise GEMM + miopen.gridwise_gemm(%filter_gemmK_gemmM, %input_gemmK_gemmN, %output_gemmM_gemmN) { + parameters = [ + // tuning parameters + ] + } : memref<?x?xf32>, + memref<?x?xf32>, + memref<?x?xf32> + + return +} +// MIOPEN-CPP: constexpr auto weight_k_c_y_x_desc = make_native_tensor_descriptor(Sequence<k, c, y, x>{}, Sequence<stride_k, stride_c, stride_y, stride_x>{}); +// MIOPEN-CPP: constexpr auto input_ni_ci_hi_wi_desc = make_native_tensor_descriptor(Sequence<ni, ci, hi, wi>{}, Sequence<stride_ni, stride_ci, stride_hi, stride_wi>{}); +// MIOPEN-CPP: constexpr auto output_no_ko_ho_wo_desc = make_native_tensor_descriptor(Sequence<no, ko, ho, wo>{}, Sequence<stride_no, stride_ko, stride_ho, stride_wo>{}); +// MIOPEN-CPP: constexpr auto gridwise_conv = GridwiseConvolutionImplicitGemm_v4r4_kcyx_nicihiwi_nokohowo +// MIOPEN-CPP: decltype(weight_k_c_y_x_desc), +// MIOPEN-CPP: decltype(input_ni_ci_hi_wi_desc), +// MIOPEN-CPP: decltype(output_no_ko_ho_wo_desc), _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits