https://github.com/silee2 updated https://github.com/llvm/llvm-project/pull/71430
>From c76403cf8629b8f7d8a5b7a3ee5da2881713a7f8 Mon Sep 17 00:00:00 2001 From: "Lee, Sang Ik" <sang.ik....@intel.com> Date: Mon, 6 Nov 2023 18:47:23 +0000 Subject: [PATCH 1/5] [MLIR] Enable GPU Dialect to SYCL runtime integration GPU Dialect lowering to SYCL runtime is driven by spirv.target_env attached to gpu.module. As a result of this, spirv.target_env remains as an input to LLVMIR Translation. A SPIRVToLLVMIRTranslation without any actual translation is added to avoid an unregistered error in mlir-cpu-runner. SelectObjectAttr.cpp is updated to 1) Pass binary size argument to getModuleLoadFn 2) Pass parameter count to getKernelLaunchFn This change does not impact CUDA and ROCM usage since both mlir_cuda_runtime and mlir_rocm_runtime are already updated to accept and ignore the extra arguments. --- mlir/include/mlir/Target/LLVMIR/Dialect/All.h | 3 ++ .../Dialect/SPIRV/SPIRVToLLVMIRTranslation.h | 31 +++++++++++ mlir/lib/Target/LLVMIR/CMakeLists.txt | 1 + mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt | 1 + .../LLVMIR/Dialect/GPU/SelectObjectAttr.cpp | 50 +++++++++++++---- .../LLVMIR/Dialect/SPIRV/CMakeLists.txt | 13 +++++ .../SPIRV/SPIRVToLLVMIRTranslation.cpp | 31 +++++++++++ mlir/test/CMakeLists.txt | 4 ++ .../Integration/GPU/SYCL/gpu-to-spirv.mlir | 54 +++++++++++++++++++ mlir/test/Integration/GPU/SYCL/lit.local.cfg | 2 + mlir/test/Target/LLVMIR/gpu.mlir | 9 ++-- mlir/test/lit.cfg.py | 3 ++ mlir/test/lit.site.cfg.py.in | 1 + 13 files changed, 188 insertions(+), 15 deletions(-) create mode 100644 mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h create mode 100644 mlir/lib/Target/LLVMIR/Dialect/SPIRV/CMakeLists.txt create mode 100644 mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp create mode 100644 mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir create mode 100644 mlir/test/Integration/GPU/SYCL/lit.local.cfg diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/All.h b/mlir/include/mlir/Target/LLVMIR/Dialect/All.h index 0563b9bf3d475a4..5dfc15afb75931a 100644 --- a/mlir/include/mlir/Target/LLVMIR/Dialect/All.h +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/All.h @@ -26,6 +26,7 @@ #include "mlir/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/X86Vector/X86VectorToLLVMIRTranslation.h" namespace mlir { @@ -45,6 +46,7 @@ static inline void registerAllToLLVMIRTranslations(DialectRegistry ®istry) { registerOpenACCDialectTranslation(registry); registerOpenMPDialectTranslation(registry); registerROCDLDialectTranslation(registry); + registerSPIRVDialectTranslation(registry); registerX86VectorDialectTranslation(registry); // Extension required for translating GPU offloading Ops. @@ -61,6 +63,7 @@ registerAllGPUToLLVMIRTranslations(DialectRegistry ®istry) { registerLLVMDialectTranslation(registry); registerNVVMDialectTranslation(registry); registerROCDLDialectTranslation(registry); + registerSPIRVDialectTranslation(registry); // Extension required for translating GPU offloading Ops. gpu::registerOffloadingLLVMTranslationInterfaceExternalModels(registry); diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h new file mode 100644 index 000000000000000..e9580a10b4ca780 --- /dev/null +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h @@ -0,0 +1,31 @@ +//===- SPIRVToLLVMIRTranslation.h - SPIRV to LLVM IR ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides registration calls for SPIRV dialect to LLVM IR translation. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVMIR_DIALECT_SPIRV_SPIRVTOLLVMIRTRANSLATION_H +#define MLIR_TARGET_LLVMIR_DIALECT_SPIRV_SPIRVTOLLVMIRTRANSLATION_H + +namespace mlir { + +class DialectRegistry; +class MLIRContext; + +/// Register the SPIRV dialect and the translation from it to the LLVM IR in the +/// given registry; +void registerSPIRVDialectTranslation(DialectRegistry ®istry); + +/// Register the SPIRV dialect and the translation from it in the registry +/// associated with the given context. +void registerSPIRVDialectTranslation(MLIRContext &context); + +} // namespace mlir + +#endif // MLIR_TARGET_LLVMIR_DIALECT_SPIRV_SPIRVTOLLVMIRTRANSLATION_H diff --git a/mlir/lib/Target/LLVMIR/CMakeLists.txt b/mlir/lib/Target/LLVMIR/CMakeLists.txt index 5db0885d70d6e7a..531c15a8703e948 100644 --- a/mlir/lib/Target/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Target/LLVMIR/CMakeLists.txt @@ -58,6 +58,7 @@ add_mlir_translation_library(MLIRToLLVMIRTranslationRegistration MLIROpenACCToLLVMIRTranslation MLIROpenMPToLLVMIRTranslation MLIRROCDLToLLVMIRTranslation + MLIRSPIRVToLLVMIRTranslation ) add_mlir_translation_library(MLIRTargetLLVMIRImport diff --git a/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt b/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt index fb0e5cd0649f636..c9d916d8a5d82d1 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt +++ b/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt @@ -9,4 +9,5 @@ add_subdirectory(NVVM) add_subdirectory(OpenACC) add_subdirectory(OpenMP) add_subdirectory(ROCDL) +add_subdirectory(SPIRV) add_subdirectory(X86Vector) diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp index 47fe6973778cd7f..6ea0dac89a42c18 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp @@ -52,6 +52,10 @@ class SelectObjectAttrImpl std::string getBinaryIdentifier(StringRef binaryName) { return binaryName.str() + "_bin_cst"; } +// Returns an identifier for the global int64 holding the binary size. +std::string getBinarySizeIdentifier(StringRef binaryName) { + return binaryName.str() + "_bin_size_cst"; +} } // namespace void mlir::gpu::registerOffloadingLLVMTranslationInterfaceExternalModels( @@ -124,6 +128,17 @@ LogicalResult SelectObjectAttrImpl::embedBinary( serializedObj->setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage); serializedObj->setAlignment(llvm::MaybeAlign(8)); serializedObj->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None); + + // Embed the object size as a global constant. + llvm::Constant *binarySize = + llvm::ConstantInt::get(builder.getInt64Ty(), object.getObject().size()); + llvm::GlobalVariable *serializedSize = new llvm::GlobalVariable( + *module, binarySize->getType(), true, + llvm::GlobalValue::LinkageTypes::InternalLinkage, binarySize, + getBinarySizeIdentifier(op.getName())); + serializedSize->setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage); + serializedSize->setAlignment(llvm::MaybeAlign(8)); + serializedSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None); return success(); } @@ -172,6 +187,7 @@ class LaunchKernel { IRBuilderBase &builder; mlir::LLVM::ModuleTranslation &moduleTranslation; Type *i32Ty{}; + Type *i64Ty{}; Type *voidTy{}; Type *intPtrTy{}; PointerType *ptrTy{}; @@ -213,6 +229,7 @@ llvm::LaunchKernel::LaunchKernel( mlir::LLVM::ModuleTranslation &moduleTranslation) : module(module), builder(builder), moduleTranslation(moduleTranslation) { i32Ty = builder.getInt32Ty(); + i64Ty = builder.getInt64Ty(); ptrTy = builder.getPtrTy(0); voidTy = builder.getVoidTy(); intPtrTy = builder.getIntPtrTy(module.getDataLayout()); @@ -221,11 +238,11 @@ llvm::LaunchKernel::LaunchKernel( llvm::FunctionCallee llvm::LaunchKernel::getKernelLaunchFn() { return module.getOrInsertFunction( "mgpuLaunchKernel", - FunctionType::get( - voidTy, - ArrayRef<Type *>({ptrTy, intPtrTy, intPtrTy, intPtrTy, intPtrTy, - intPtrTy, intPtrTy, i32Ty, ptrTy, ptrTy, ptrTy}), - false)); + FunctionType::get(voidTy, + ArrayRef<Type *>({ptrTy, intPtrTy, intPtrTy, intPtrTy, + intPtrTy, intPtrTy, intPtrTy, i32Ty, + ptrTy, ptrTy, ptrTy, i64Ty}), + false)); } llvm::FunctionCallee llvm::LaunchKernel::getModuleFunctionFn() { @@ -237,7 +254,7 @@ llvm::FunctionCallee llvm::LaunchKernel::getModuleFunctionFn() { llvm::FunctionCallee llvm::LaunchKernel::getModuleLoadFn() { return module.getOrInsertFunction( "mgpuModuleLoad", - FunctionType::get(ptrTy, ArrayRef<Type *>({ptrTy}), false)); + FunctionType::get(ptrTy, ArrayRef<Type *>({ptrTy, i64Ty}), false)); } llvm::FunctionCallee llvm::LaunchKernel::getModuleLoadJITFn() { @@ -377,10 +394,21 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, if (!binary) return op.emitError() << "Couldn't find the binary: " << binaryIdentifier; + llvm::Constant *paramsCount = + llvm::ConstantInt::get(i64Ty, op.getNumKernelOperands()); + + std::string binarySizeIdentifier = getBinarySizeIdentifier(moduleName); + Value *binarySizeVar = module.getGlobalVariable(binarySizeIdentifier, true); + if (!binarySizeVar) + return op.emitError() << "Couldn't find the binary size: " + << binarySizeIdentifier; + Value *binarySize = + dyn_cast<llvm::GlobalVariable>(binarySizeVar)->getInitializer(); + Value *moduleObject = object.getFormat() == gpu::CompilationTarget::Assembly ? builder.CreateCall(getModuleLoadJITFn(), {binary, optV}) - : builder.CreateCall(getModuleLoadFn(), {binary}); + : builder.CreateCall(getModuleLoadFn(), {binary, binarySize}); // Load the kernel function. Value *moduleFunction = builder.CreateCall( @@ -401,10 +429,10 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, // Create the launch call. Value *nullPtr = ConstantPointerNull::get(ptrTy); - builder.CreateCall( - getKernelLaunchFn(), - ArrayRef<Value *>({moduleFunction, gx, gy, gz, bx, by, bz, - dynamicMemorySize, stream, argArray, nullPtr})); + builder.CreateCall(getKernelLaunchFn(), + ArrayRef<Value *>({moduleFunction, gx, gy, gz, bx, by, bz, + dynamicMemorySize, stream, argArray, + nullPtr, paramsCount})); // Sync & destroy the stream, for synchronous launches. if (handleStream) { diff --git a/mlir/lib/Target/LLVMIR/Dialect/SPIRV/CMakeLists.txt b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/CMakeLists.txt new file mode 100644 index 000000000000000..850b95b8ddc77a0 --- /dev/null +++ b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/CMakeLists.txt @@ -0,0 +1,13 @@ +add_mlir_translation_library(MLIRSPIRVToLLVMIRTranslation + SPIRVToLLVMIRTranslation.cpp + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRIR + MLIRLLVMDialect + MLIRSPIRVDialect + MLIRSupport + MLIRTargetLLVMIRExport + ) diff --git a/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp new file mode 100644 index 000000000000000..06038a17f2ef666 --- /dev/null +++ b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp @@ -0,0 +1,31 @@ +//===- SPIRVToLLVMIRTranslation.cpp - Translate SPIRV to LLVM IR ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a translation between the MLIR SPIRV dialect and +// LLVM IR. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h" +#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/Operation.h" +#include "mlir/Target/LLVMIR/ModuleTranslation.h" + +using namespace mlir; +using namespace mlir::LLVM; + +void mlir::registerSPIRVDialectTranslation(DialectRegistry ®istry) { + registry.insert<spirv::SPIRVDialect>(); +} + +void mlir::registerSPIRVDialectTranslation(MLIRContext &context) { + DialectRegistry registry; + registerSPIRVDialectTranslation(registry); + context.appendDialectRegistry(registry); +} diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt index d81f3c4b1e20c5a..c26826d1b2c62fe 100644 --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -139,6 +139,10 @@ if(MLIR_ENABLE_ROCM_RUNNER) list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime) endif() +if(MLIR_ENABLE_SYCL_RUNNER) + list(APPEND MLIR_TEST_DEPENDS mlir_sycl_runtime) +endif() + list(APPEND MLIR_TEST_DEPENDS MLIRUnitTests) if(LLVM_BUILD_EXAMPLES) diff --git a/mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir new file mode 100644 index 000000000000000..bc6f3cea080df20 --- /dev/null +++ b/mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir @@ -0,0 +1,54 @@ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \ +// RUN: | mlir-cpu-runner \ +// RUN: --shared-libs=%mlir_sycl_runtime \ +// RUN: --shared-libs=%mlir_runner_utils \ +// RUN: --entry-point-result=void \ +// RUN: | FileCheck %s + +module @add attributes {gpu.container_module} { + memref.global "private" constant @__constant_3x3xi64_0 : memref<3x3xi64> = dense<[[1, 4098, 3], [16777220, 5, 4294967302], [7, 1099511627784, 9]]> + memref.global "private" constant @__constant_3x3xi64 : memref<3x3xi64> = dense<[[1, 2, 3], [4, 5, 4102], [16777223, 4294967304, 1099511627785]]> + func.func @main() { + %0 = memref.get_global @__constant_3x3xi64 : memref<3x3xi64> + %1 = memref.get_global @__constant_3x3xi64_0 : memref<3x3xi64> + %2 = call @test(%0, %1) : (memref<3x3xi64>, memref<3x3xi64>) -> memref<3x3xi64> + %cast = memref.cast %2 : memref<3x3xi64> to memref<*xi64> + call @printMemrefI64(%cast) : (memref<*xi64>) -> () + return + } + func.func private @printMemrefI64(memref<*xi64>) + func.func @test(%arg0: memref<3x3xi64>, %arg1: memref<3x3xi64>) -> memref<3x3xi64> { + %c3 = arith.constant 3 : index + %c1 = arith.constant 1 : index + %mem = gpu.alloc host_shared () : memref<3x3xi64> + memref.copy %arg1, %mem : memref<3x3xi64> to memref<3x3xi64> + %memref_0 = gpu.alloc host_shared () : memref<3x3xi64> + memref.copy %arg0, %memref_0 : memref<3x3xi64> to memref<3x3xi64> + %memref_2 = gpu.alloc host_shared () : memref<3x3xi64> + %2 = gpu.wait async + %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c3, %c3, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<3x3xi64>, %mem : memref<3x3xi64>, %memref_2 : memref<3x3xi64>) + gpu.wait [%3] + %alloc = memref.alloc() : memref<3x3xi64> + memref.copy %memref_2, %alloc : memref<3x3xi64> to memref<3x3xi64> + %4 = gpu.wait async + %5 = gpu.dealloc async [%4] %memref_2 : memref<3x3xi64> + %6 = gpu.dealloc async [%5] %memref_0 : memref<3x3xi64> + %7 = gpu.dealloc async [%6] %mem : memref<3x3xi64> + gpu.wait [%7] + return %alloc : memref<3x3xi64> + } + gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} { + gpu.func @test_kernel(%arg0: memref<3x3xi64>, %arg1: memref<3x3xi64>, %arg2: memref<3x3xi64>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 3, 3, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} { + %0 = gpu.block_id x + %1 = gpu.block_id y + %2 = memref.load %arg0[%0, %1] : memref<3x3xi64> + %3 = memref.load %arg1[%0, %1] : memref<3x3xi64> + %4 = arith.addi %2, %3 : i64 + memref.store %4, %arg2[%0, %1] : memref<3x3xi64> + gpu.return + } + } + // CHECK: [2, 4100, 6], + // CHECK: [16777224, 10, 4294971404], + // CHECK: [16777230, 1103806595088, 1099511627794] +} diff --git a/mlir/test/Integration/GPU/SYCL/lit.local.cfg b/mlir/test/Integration/GPU/SYCL/lit.local.cfg new file mode 100644 index 000000000000000..75bac1882eed5c9 --- /dev/null +++ b/mlir/test/Integration/GPU/SYCL/lit.local.cfg @@ -0,0 +1,2 @@ +if not config.enable_sycl_runner: + config.unsupported = True diff --git a/mlir/test/Target/LLVMIR/gpu.mlir b/mlir/test/Target/LLVMIR/gpu.mlir index fddbbee962c1aee..8a3fc13e0b9af71 100644 --- a/mlir/test/Target/LLVMIR/gpu.mlir +++ b/mlir/test/Target/LLVMIR/gpu.mlir @@ -4,6 +4,7 @@ module attributes {gpu.container_module} { // CHECK: [[ARGS_TY:%.*]] = type { i32, i32 } // CHECK: @kernel_module_bin_cst = internal constant [4 x i8] c"BLOB", align 8 + // CHECK: @kernel_module_bin_size_cst = internal constant i64 4, align 8 // CHECK: @kernel_module_kernel_kernel_name = private unnamed_addr constant [7 x i8] c"kernel\00", align 1 gpu.binary @kernel_module [#gpu.object<#nvvm.target, "BLOB">] llvm.func @foo() { @@ -17,10 +18,10 @@ module attributes {gpu.container_module} { // CHECK: store i32 32, ptr [[ARG1]], align 4 // CHECK: %{{.*}} = getelementptr ptr, ptr [[ARGS_ARRAY]], i32 1 // CHECK: store ptr [[ARG1]], ptr %{{.*}}, align 8 - // CHECK: [[MODULE:%.*]] = call ptr @mgpuModuleLoad(ptr @kernel_module_bin_cst) + // CHECK: [[MODULE:%.*]] = call ptr @mgpuModuleLoad(ptr @kernel_module_bin_cst, i64 4) // CHECK: [[FUNC:%.*]] = call ptr @mgpuModuleGetFunction(ptr [[MODULE]], ptr @kernel_module_kernel_kernel_name) // CHECK: [[STREAM:%.*]] = call ptr @mgpuStreamCreate() - // CHECK: call void @mgpuLaunchKernel(ptr [[FUNC]], i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i32 256, ptr [[STREAM]], ptr [[ARGS_ARRAY]], ptr null) + // CHECK: call void @mgpuLaunchKernel(ptr [[FUNC]], i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i32 256, ptr [[STREAM]], ptr [[ARGS_ARRAY]], ptr null, i64 2) // CHECK: call void @mgpuStreamSynchronize(ptr [[STREAM]]) // CHECK: call void @mgpuStreamDestroy(ptr [[STREAM]]) // CHECK: call void @mgpuModuleUnload(ptr [[MODULE]]) @@ -59,9 +60,9 @@ module attributes {gpu.container_module} { // CHECK: = call ptr @mgpuStreamCreate() // CHECK-NEXT: = alloca {{.*}}, align 8 // CHECK-NEXT: [[ARGS:%.*]] = alloca ptr, i64 0, align 8 - // CHECK-NEXT: [[MODULE:%.*]] = call ptr @mgpuModuleLoad(ptr @kernel_module_bin_cst) + // CHECK-NEXT: [[MODULE:%.*]] = call ptr @mgpuModuleLoad(ptr @kernel_module_bin_cst, i64 4) // CHECK-NEXT: [[FUNC:%.*]] = call ptr @mgpuModuleGetFunction(ptr [[MODULE]], ptr @kernel_module_kernel_kernel_name) - // CHECK-NEXT: call void @mgpuLaunchKernel(ptr [[FUNC]], i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i32 0, ptr {{.*}}, ptr [[ARGS]], ptr null) + // CHECK-NEXT: call void @mgpuLaunchKernel(ptr [[FUNC]], i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i32 0, ptr {{.*}}, ptr [[ARGS]], ptr null, i64 0) // CHECK-NEXT: call void @mgpuModuleUnload(ptr [[MODULE]]) // CHECK-NEXT: call void @mgpuStreamSynchronize(ptr %{{.*}}) // CHECK-NEXT: call void @mgpuStreamDestroy(ptr %{{.*}}) diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index da8488373862c36..cb8e1ab9d8a4ca8 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -126,6 +126,9 @@ def add_runtime(name): if config.enable_cuda_runner: tools.extend([add_runtime("mlir_cuda_runtime")]) +if config.enable_sycl_runner: + tools.extend([add_runtime("mlir_sycl_runtime")]) + # The following tools are optional tools.extend( [ diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in index 2de40ba5e8e57e6..c994de0d3d16b7e 100644 --- a/mlir/test/lit.site.cfg.py.in +++ b/mlir/test/lit.site.cfg.py.in @@ -31,6 +31,7 @@ config.run_rocm_tests = @MLIR_ENABLE_ROCM_CONVERSIONS@ config.enable_rocm_runner = @MLIR_ENABLE_ROCM_RUNNER@ config.gpu_compilation_format = "@MLIR_GPU_COMPILATION_TEST_FORMAT@" config.rocm_test_chipset = "@ROCM_TEST_CHIPSET@" +config.enable_sycl_runner = @MLIR_ENABLE_SYCL_RUNNER@ config.enable_spirv_cpu_runner = @MLIR_ENABLE_SPIRV_CPU_RUNNER@ config.enable_vulkan_runner = @MLIR_ENABLE_VULKAN_RUNNER@ config.enable_bindings_python = @MLIR_ENABLE_BINDINGS_PYTHON@ >From 50c621ebb8c18b131bf2d124337e008ffede80bc Mon Sep 17 00:00:00 2001 From: "Lee, Sang Ik" <sang.ik....@intel.com> Date: Mon, 13 Nov 2023 19:49:55 +0000 Subject: [PATCH 2/5] Address reviewer comments. --- .../Dialect/SPIRV/SPIRVToLLVMIRTranslation.h | 10 +++---- .../LLVMIR/Dialect/GPU/SelectObjectAttr.cpp | 28 ++++--------------- .../SPIRV/SPIRVToLLVMIRTranslation.cpp | 4 +-- 3 files changed, 13 insertions(+), 29 deletions(-) diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h index e9580a10b4ca780..2b066a528deb58f 100644 --- a/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h @@ -1,4 +1,4 @@ -//===- SPIRVToLLVMIRTranslation.h - SPIRV to LLVM IR ------------*- C++ -*-===// +//===- SPIRVToLLVMIRTranslation.h - SPIR-V to LLVM IR -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This provides registration calls for SPIRV dialect to LLVM IR translation. +// This provides registration calls for SPIR-V dialect to LLVM IR translation. // //===----------------------------------------------------------------------===// @@ -18,11 +18,11 @@ namespace mlir { class DialectRegistry; class MLIRContext; -/// Register the SPIRV dialect and the translation from it to the LLVM IR in the -/// given registry; +/// Register the SPIR-V dialect and the translation from it to the LLVM IR in +/// the given registry; void registerSPIRVDialectTranslation(DialectRegistry ®istry); -/// Register the SPIRV dialect and the translation from it in the registry +/// Register the SPIR-V dialect and the translation from it in the registry /// associated with the given context. void registerSPIRVDialectTranslation(MLIRContext &context); diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp index 6ea0dac89a42c18..54947c16f5c561f 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp @@ -52,10 +52,6 @@ class SelectObjectAttrImpl std::string getBinaryIdentifier(StringRef binaryName) { return binaryName.str() + "_bin_cst"; } -// Returns an identifier for the global int64 holding the binary size. -std::string getBinarySizeIdentifier(StringRef binaryName) { - return binaryName.str() + "_bin_size_cst"; -} } // namespace void mlir::gpu::registerOffloadingLLVMTranslationInterfaceExternalModels( @@ -128,17 +124,6 @@ LogicalResult SelectObjectAttrImpl::embedBinary( serializedObj->setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage); serializedObj->setAlignment(llvm::MaybeAlign(8)); serializedObj->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None); - - // Embed the object size as a global constant. - llvm::Constant *binarySize = - llvm::ConstantInt::get(builder.getInt64Ty(), object.getObject().size()); - llvm::GlobalVariable *serializedSize = new llvm::GlobalVariable( - *module, binarySize->getType(), true, - llvm::GlobalValue::LinkageTypes::InternalLinkage, binarySize, - getBinarySizeIdentifier(op.getName())); - serializedSize->setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage); - serializedSize->setAlignment(llvm::MaybeAlign(8)); - serializedSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None); return success(); } @@ -397,13 +382,12 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, llvm::Constant *paramsCount = llvm::ConstantInt::get(i64Ty, op.getNumKernelOperands()); - std::string binarySizeIdentifier = getBinarySizeIdentifier(moduleName); - Value *binarySizeVar = module.getGlobalVariable(binarySizeIdentifier, true); - if (!binarySizeVar) - return op.emitError() << "Couldn't find the binary size: " - << binarySizeIdentifier; - Value *binarySize = - dyn_cast<llvm::GlobalVariable>(binarySizeVar)->getInitializer(); + auto binaryVar = dyn_cast<llvm::GlobalVariable>(binary); + llvm::Constant *binaryInit = binaryVar->getInitializer(); + auto binaryDataSeq = dyn_cast<llvm::ConstantDataSequential>(binaryInit); + llvm::Constant *binarySize = + llvm::ConstantInt::get(i64Ty, binaryDataSeq->getNumElements() * + binaryDataSeq->getElementByteSize()); Value *moduleObject = object.getFormat() == gpu::CompilationTarget::Assembly diff --git a/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp index 06038a17f2ef666..638edca5efde86f 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp @@ -1,4 +1,4 @@ -//===- SPIRVToLLVMIRTranslation.cpp - Translate SPIRV to LLVM IR ----------===// +//===- SPIRVToLLVMIRTranslation.cpp - Translate SPIR-V to LLVM IR ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements a translation between the MLIR SPIRV dialect and +// This file implements a translation between the MLIR SPIR-V dialect and // LLVM IR. // //===----------------------------------------------------------------------===// >From b4068d5eb4cf874879182eba78b9bfa2854e4ec4 Mon Sep 17 00:00:00 2001 From: "Lee, Sang Ik" <sang.ik....@intel.com> Date: Mon, 13 Nov 2023 23:08:03 +0000 Subject: [PATCH 3/5] Add more integration tests. --- .../GPU/SYCL/gpu-addf32-to-spirv.mlir | 56 +++++++++++++ ...to-spirv.mlir => gpu-addi64-to-spirv.mlir} | 0 .../GPU/SYCL/gpu-reluf32-to-spirv.mlir | 79 +++++++++++++++++++ 3 files changed, 135 insertions(+) create mode 100644 mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir rename mlir/test/Integration/GPU/SYCL/{gpu-to-spirv.mlir => gpu-addi64-to-spirv.mlir} (100%) create mode 100644 mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir diff --git a/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir new file mode 100644 index 000000000000000..113a49425de5445 --- /dev/null +++ b/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir @@ -0,0 +1,56 @@ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \ +// RUN: | mlir-cpu-runner \ +// RUN: --shared-libs=%mlir_sycl_runtime \ +// RUN: --shared-libs=%mlir_runner_utils \ +// RUN: --entry-point-result=void \ +// RUN: | FileCheck %s + +module @add attributes {gpu.container_module} { + memref.global "private" constant @__constant_2x2x2xf32_0 : memref<2x2x2xf32> = dense<[[[1.1, 2.2], [3.3, 4.4]], [[5.5, 6.6], [7.7, 8.8 ]]]> + memref.global "private" constant @__constant_2x2x2xf32 : memref<2x2x2xf32> = dense<[[[1.2, 2.3], [4.5, 5.8]], [[7.2, 8.3], [10.5, 11.8]]]> + func.func @main() { + %0 = memref.get_global @__constant_2x2x2xf32 : memref<2x2x2xf32> + %1 = memref.get_global @__constant_2x2x2xf32_0 : memref<2x2x2xf32> + %2 = call @test(%0, %1) : (memref<2x2x2xf32>, memref<2x2x2xf32>) -> memref<2x2x2xf32> + %cast = memref.cast %2 : memref<2x2x2xf32> to memref<*xf32> + call @printMemrefF32(%cast) : (memref<*xf32>) -> () + return + } + func.func private @printMemrefF32(memref<*xf32>) + func.func @test(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>) -> memref<2x2x2xf32> { + %c2 = arith.constant 2 : index + %c1 = arith.constant 1 : index + %mem = gpu.alloc host_shared () : memref<2x2x2xf32> + memref.copy %arg1, %mem : memref<2x2x2xf32> to memref<2x2x2xf32> + %memref_0 = gpu.alloc host_shared () : memref<2x2x2xf32> + memref.copy %arg0, %memref_0 : memref<2x2x2xf32> to memref<2x2x2xf32> + %memref_2 = gpu.alloc host_shared () : memref<2x2x2xf32> + %2 = gpu.wait async + %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c2, %c2, %c2) threads in (%c1, %c1, %c1) args(%memref_0 : memref<2x2x2xf32>, %mem : memref<2x2x2xf32>, %memref_2 : memref<2x2x2xf32>) + gpu.wait [%3] + %alloc = memref.alloc() : memref<2x2x2xf32> + memref.copy %memref_2, %alloc : memref<2x2x2xf32> to memref<2x2x2xf32> + %4 = gpu.wait async + %5 = gpu.dealloc async [%4] %memref_2 : memref<2x2x2xf32> + %6 = gpu.dealloc async [%5] %memref_0 : memref<2x2x2xf32> + %7 = gpu.dealloc async [%6] %mem : memref<2x2x2xf32> + gpu.wait [%7] + return %alloc : memref<2x2x2xf32> + } + gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} { + gpu.func @test_kernel(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>, %arg2: memref<2x2x2xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 2, 2, 2>, spirv.entry_point_abi = #spirv.entry_point_abi<>} { + %0 = gpu.block_id x + %1 = gpu.block_id y + %2 = gpu.block_id z + %3 = memref.load %arg0[%0, %1, %2] : memref<2x2x2xf32> + %4 = memref.load %arg1[%0, %1, %2] : memref<2x2x2xf32> + %5 = arith.addf %3, %4 : f32 + memref.store %5, %arg2[%0, %1, %2] : memref<2x2x2xf32> + gpu.return + } + } + // CHECK: [2.3, 4.5] + // CHECK: [7.8, 10.2] + // CHECK: [12.7, 14.9] + // CHECK: [18.2, 20.6] +} diff --git a/mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir similarity index 100% rename from mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir rename to mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir diff --git a/mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir new file mode 100644 index 000000000000000..162a793305e9725 --- /dev/null +++ b/mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir @@ -0,0 +1,79 @@ +// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \ +// RUN: | mlir-cpu-runner \ +// RUN: --shared-libs=%mlir_sycl_runtime \ +// RUN: --shared-libs=%mlir_runner_utils \ +// RUN: --entry-point-result=void \ +// RUN: | FileCheck %s + +module @relu attributes {gpu.container_module} { + memref.global "private" constant @__constant_4x5xf32 : memref<4x5xf32> = dense<[ + [-1.000000e-01, -2.000000e-01, -3.000000e-01, 4.000000e-01, 5.000000e-01], + [1.000000e-01, -2.000000e-01, 3.000000e-01, -4.000000e-01, 5.000000e-01], + [1.000000e-01, 2.000000e-01, 3.000000e-01, -4.000000e-01, -5.000000e-01], + [1.000000e-01, 2.000000e-01, 3.000000e-01, 4.000000e-01, 5.000000e-01] + ]> + + func.func @main() { + %c1 = arith.constant 1 : index + %c100 = arith.constant 100 : index + %c0 = arith.constant 0 : index + %0 = memref.get_global @__constant_4x5xf32 : memref<4x5xf32> + + scf.for %arg0 = %c0 to %c100 step %c1 { + %1 = func.call @test(%0) : (memref<4x5xf32>) -> memref<4x5xf32> + %cast = memref.cast %1 : memref<4x5xf32> to memref<*xf32> + func.call @printMemrefF32(%cast) : (memref<*xf32>) -> () + // CHECK: [0, 0, 0, 0.4, 0.5], + // CHECK: [0.1, 0, 0.3, 0, 0.5], + // CHECK: [0.1, 0.2, 0.3, 0, 0], + // CHECK: [0.1, 0.2, 0.3, 0.4, 0.5] + } + return + } + + func.func private @printMemrefF32(memref<*xf32>) + func.func @test(%arg0: memref<4x5xf32>) -> memref<4x5xf32> { + %c5 = arith.constant 5 : index + %c4 = arith.constant 4 : index + %cst = arith.constant 0.000000e+00 : f32 + %c1 = arith.constant 1 : index + %memref = gpu.alloc host_shared () : memref<4x5xf32> + memref.copy %arg0, %memref : memref<4x5xf32> to memref<4x5xf32> + %memref_0 = gpu.alloc host_shared () : memref<4x5xi1> + %2 = gpu.wait async + %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<4x5xf32>, %cst : f32, %memref_0 : memref<4x5xi1>) + gpu.wait [%3] + %memref_1 = gpu.alloc host_shared () : memref<4x5xf32> + %4 = gpu.wait async + %5 = gpu.launch_func async [%4] @test_kernel_0::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<4x5xi1>, %memref : memref<4x5xf32>, %cst : f32, %memref_1 : memref<4x5xf32>) + gpu.wait [%5] + %alloc = memref.alloc() : memref<4x5xf32> + memref.copy %memref_1, %alloc : memref<4x5xf32> to memref<4x5xf32> + %6 = gpu.wait async + %7 = gpu.dealloc async [%6] %memref_1 : memref<4x5xf32> + %8 = gpu.dealloc async [%7] %memref_0 : memref<4x5xi1> + %9 = gpu.dealloc async [%8] %memref : memref<4x5xf32> + return %alloc : memref<4x5xf32> + } + gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Int8, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} { + gpu.func @test_kernel(%arg0: memref<4x5xf32>, %arg1: f32, %arg2: memref<4x5xi1>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 5, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} { + %0 = gpu.block_id x + %1 = gpu.block_id y + %2 = memref.load %arg0[%0, %1] : memref<4x5xf32> + %3 = arith.cmpf olt, %2, %arg1 : f32 + memref.store %3, %arg2[%0, %1] : memref<4x5xi1> + gpu.return + } + } + gpu.module @test_kernel_0 attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Int8, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} { + gpu.func @test_kernel(%arg0: memref<4x5xi1>, %arg1: memref<4x5xf32>, %arg2: f32, %arg3: memref<4x5xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 5, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} { + %0 = gpu.block_id x + %1 = gpu.block_id y + %2 = memref.load %arg0[%0, %1] : memref<4x5xi1> + %3 = memref.load %arg1[%0, %1] : memref<4x5xf32> + %4 = arith.select %2, %arg2, %3 : f32 + memref.store %4, %arg3[%0, %1] : memref<4x5xf32> + gpu.return + } + } +} >From 95559efd242d3b4b98939fcae818bb1b7b36af28 Mon Sep 17 00:00:00 2001 From: "Lee, Sang Ik" <sang.ik....@intel.com> Date: Mon, 13 Nov 2023 23:11:11 +0000 Subject: [PATCH 4/5] Reorder code. --- mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp index 54947c16f5c561f..80f3d725b55db5f 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp @@ -379,9 +379,6 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, if (!binary) return op.emitError() << "Couldn't find the binary: " << binaryIdentifier; - llvm::Constant *paramsCount = - llvm::ConstantInt::get(i64Ty, op.getNumKernelOperands()); - auto binaryVar = dyn_cast<llvm::GlobalVariable>(binary); llvm::Constant *binaryInit = binaryVar->getInitializer(); auto binaryDataSeq = dyn_cast<llvm::ConstantDataSequential>(binaryInit); @@ -411,6 +408,9 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, stream = builder.CreateCall(getStreamCreateFn(), {}); } + llvm::Constant *paramsCount = + llvm::ConstantInt::get(i64Ty, op.getNumKernelOperands()); + // Create the launch call. Value *nullPtr = ConstantPointerNull::get(ptrTy); builder.CreateCall(getKernelLaunchFn(), >From 2783bc1245f9acf6e1e510ad66ec4564ac2abb23 Mon Sep 17 00:00:00 2001 From: "Lee, Sang Ik" <sang.ik....@intel.com> Date: Tue, 14 Nov 2023 19:27:56 +0000 Subject: [PATCH 5/5] Address reviewer comments. --- .../LLVMIR/Dialect/GPU/SelectObjectAttr.cpp | 9 ++++- .../GPU/SYCL/gpu-addf32-to-spirv.mlir | 36 +++++++++---------- .../GPU/SYCL/gpu-addi64-to-spirv.mlir | 36 +++++++++---------- mlir/test/Target/LLVMIR/gpu.mlir | 1 - 4 files changed, 44 insertions(+), 38 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp index 80f3d725b55db5f..270daea0a0737ec 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp @@ -380,8 +380,15 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op, return op.emitError() << "Couldn't find the binary: " << binaryIdentifier; auto binaryVar = dyn_cast<llvm::GlobalVariable>(binary); + if (!binaryVar) + return op.emitError() << "Binary is not a global variable: " + << binaryIdentifier; llvm::Constant *binaryInit = binaryVar->getInitializer(); - auto binaryDataSeq = dyn_cast<llvm::ConstantDataSequential>(binaryInit); + auto binaryDataSeq = + dyn_cast_if_present<llvm::ConstantDataSequential>(binaryInit); + if (!binaryDataSeq) + return op.emitError() << "Couldn't find binary data array: " + << binaryIdentifier; llvm::Constant *binarySize = llvm::ConstantInt::get(i64Ty, binaryDataSeq->getNumElements() * binaryDataSeq->getElementByteSize()); diff --git a/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir index 113a49425de5445..c0e2903aee2d125 100644 --- a/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir +++ b/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir @@ -18,24 +18,24 @@ module @add attributes {gpu.container_module} { } func.func private @printMemrefF32(memref<*xf32>) func.func @test(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>) -> memref<2x2x2xf32> { - %c2 = arith.constant 2 : index - %c1 = arith.constant 1 : index - %mem = gpu.alloc host_shared () : memref<2x2x2xf32> - memref.copy %arg1, %mem : memref<2x2x2xf32> to memref<2x2x2xf32> - %memref_0 = gpu.alloc host_shared () : memref<2x2x2xf32> - memref.copy %arg0, %memref_0 : memref<2x2x2xf32> to memref<2x2x2xf32> - %memref_2 = gpu.alloc host_shared () : memref<2x2x2xf32> - %2 = gpu.wait async - %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c2, %c2, %c2) threads in (%c1, %c1, %c1) args(%memref_0 : memref<2x2x2xf32>, %mem : memref<2x2x2xf32>, %memref_2 : memref<2x2x2xf32>) - gpu.wait [%3] - %alloc = memref.alloc() : memref<2x2x2xf32> - memref.copy %memref_2, %alloc : memref<2x2x2xf32> to memref<2x2x2xf32> - %4 = gpu.wait async - %5 = gpu.dealloc async [%4] %memref_2 : memref<2x2x2xf32> - %6 = gpu.dealloc async [%5] %memref_0 : memref<2x2x2xf32> - %7 = gpu.dealloc async [%6] %mem : memref<2x2x2xf32> - gpu.wait [%7] - return %alloc : memref<2x2x2xf32> + %c2 = arith.constant 2 : index + %c1 = arith.constant 1 : index + %mem = gpu.alloc host_shared () : memref<2x2x2xf32> + memref.copy %arg1, %mem : memref<2x2x2xf32> to memref<2x2x2xf32> + %memref_0 = gpu.alloc host_shared () : memref<2x2x2xf32> + memref.copy %arg0, %memref_0 : memref<2x2x2xf32> to memref<2x2x2xf32> + %memref_2 = gpu.alloc host_shared () : memref<2x2x2xf32> + %2 = gpu.wait async + %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c2, %c2, %c2) threads in (%c1, %c1, %c1) args(%memref_0 : memref<2x2x2xf32>, %mem : memref<2x2x2xf32>, %memref_2 : memref<2x2x2xf32>) + gpu.wait [%3] + %alloc = memref.alloc() : memref<2x2x2xf32> + memref.copy %memref_2, %alloc : memref<2x2x2xf32> to memref<2x2x2xf32> + %4 = gpu.wait async + %5 = gpu.dealloc async [%4] %memref_2 : memref<2x2x2xf32> + %6 = gpu.dealloc async [%5] %memref_0 : memref<2x2x2xf32> + %7 = gpu.dealloc async [%6] %mem : memref<2x2x2xf32> + gpu.wait [%7] + return %alloc : memref<2x2x2xf32> } gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} { gpu.func @test_kernel(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>, %arg2: memref<2x2x2xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 2, 2, 2>, spirv.entry_point_abi = #spirv.entry_point_abi<>} { diff --git a/mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir index bc6f3cea080df20..4ac1533b75d2034 100644 --- a/mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir +++ b/mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir @@ -18,24 +18,24 @@ module @add attributes {gpu.container_module} { } func.func private @printMemrefI64(memref<*xi64>) func.func @test(%arg0: memref<3x3xi64>, %arg1: memref<3x3xi64>) -> memref<3x3xi64> { - %c3 = arith.constant 3 : index - %c1 = arith.constant 1 : index - %mem = gpu.alloc host_shared () : memref<3x3xi64> - memref.copy %arg1, %mem : memref<3x3xi64> to memref<3x3xi64> - %memref_0 = gpu.alloc host_shared () : memref<3x3xi64> - memref.copy %arg0, %memref_0 : memref<3x3xi64> to memref<3x3xi64> - %memref_2 = gpu.alloc host_shared () : memref<3x3xi64> - %2 = gpu.wait async - %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c3, %c3, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<3x3xi64>, %mem : memref<3x3xi64>, %memref_2 : memref<3x3xi64>) - gpu.wait [%3] - %alloc = memref.alloc() : memref<3x3xi64> - memref.copy %memref_2, %alloc : memref<3x3xi64> to memref<3x3xi64> - %4 = gpu.wait async - %5 = gpu.dealloc async [%4] %memref_2 : memref<3x3xi64> - %6 = gpu.dealloc async [%5] %memref_0 : memref<3x3xi64> - %7 = gpu.dealloc async [%6] %mem : memref<3x3xi64> - gpu.wait [%7] - return %alloc : memref<3x3xi64> + %c3 = arith.constant 3 : index + %c1 = arith.constant 1 : index + %mem = gpu.alloc host_shared () : memref<3x3xi64> + memref.copy %arg1, %mem : memref<3x3xi64> to memref<3x3xi64> + %memref_0 = gpu.alloc host_shared () : memref<3x3xi64> + memref.copy %arg0, %memref_0 : memref<3x3xi64> to memref<3x3xi64> + %memref_2 = gpu.alloc host_shared () : memref<3x3xi64> + %2 = gpu.wait async + %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c3, %c3, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<3x3xi64>, %mem : memref<3x3xi64>, %memref_2 : memref<3x3xi64>) + gpu.wait [%3] + %alloc = memref.alloc() : memref<3x3xi64> + memref.copy %memref_2, %alloc : memref<3x3xi64> to memref<3x3xi64> + %4 = gpu.wait async + %5 = gpu.dealloc async [%4] %memref_2 : memref<3x3xi64> + %6 = gpu.dealloc async [%5] %memref_0 : memref<3x3xi64> + %7 = gpu.dealloc async [%6] %mem : memref<3x3xi64> + gpu.wait [%7] + return %alloc : memref<3x3xi64> } gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} { gpu.func @test_kernel(%arg0: memref<3x3xi64>, %arg1: memref<3x3xi64>, %arg2: memref<3x3xi64>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 3, 3, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} { diff --git a/mlir/test/Target/LLVMIR/gpu.mlir b/mlir/test/Target/LLVMIR/gpu.mlir index 8a3fc13e0b9af71..7afc8a4dc7e87f0 100644 --- a/mlir/test/Target/LLVMIR/gpu.mlir +++ b/mlir/test/Target/LLVMIR/gpu.mlir @@ -4,7 +4,6 @@ module attributes {gpu.container_module} { // CHECK: [[ARGS_TY:%.*]] = type { i32, i32 } // CHECK: @kernel_module_bin_cst = internal constant [4 x i8] c"BLOB", align 8 - // CHECK: @kernel_module_bin_size_cst = internal constant i64 4, align 8 // CHECK: @kernel_module_kernel_kernel_name = private unnamed_addr constant [7 x i8] c"kernel\00", align 1 gpu.binary @kernel_module [#gpu.object<#nvvm.target, "BLOB">] llvm.func @foo() { _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits