llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Alex Voicu (AlexVlx) <details> <summary>Changes</summary> Albeit not currently enabled, the InferAddressSpaces pass is desirable / profitable for SPIR-V, as it can leverage info that might subsequently be lost as transforms are applied to the IR/resulting SPIR-V. This patch enables the pass for all SPIR-V targets, and is modelled after the AMDGPU implementation. --- Patch is 93.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110897.diff 20 Files Affected: - (modified) clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu (+27-35) - (modified) llvm/lib/Target/SPIRV/CMakeLists.txt (+2) - (modified) llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp (+92) - (modified) llvm/lib/Target/SPIRV/SPIRVTargetMachine.h (+7) - (modified) llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h (+4) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll (+31) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll (+236) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-address-space.ll (+211) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll (+65) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll (+108) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll (+158) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll (+57) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/lit.local.cfg (+2) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll (+145) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll (+70) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll (+60) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll (+48) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll (+28) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll (+29) - (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll (+187) ``````````diff diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu index b295bbbdaaf955..15c8b46d278ea1 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -58,13 +58,11 @@ // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi( @@ -126,13 +124,11 @@ __global__ void kernel1(int *x) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri( @@ -195,7 +191,7 @@ __global__ void kernel2(int &x) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i( -// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 // OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 @@ -261,7 +257,7 @@ __global__ void kernel3(__attribute__((address_space(2))) int *x, // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi( -// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 @@ -343,7 +339,7 @@ struct S { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( -// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 @@ -446,19 +442,17 @@ __global__ void kernel4(struct S s) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8 -// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4 -// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8 -// OPT-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 -// OPT-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4 -// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 -// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[S_COERCE]], align 8 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 +// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8 +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[Y]], align 8 +// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4 +// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 +// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S( @@ -551,7 +545,7 @@ struct T { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( -// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 // OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0 @@ -631,13 +625,11 @@ __global__ void kernel6(struct T t) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi( -// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: ptr addrspace(1) noalias nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi( @@ -700,7 +692,7 @@ struct SS { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( -// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4 diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt index 326343ae278148..0ae292498e4636 100644 --- a/llvm/lib/Target/SPIRV/CMakeLists.txt +++ b/llvm/lib/Target/SPIRV/CMakeLists.txt @@ -52,6 +52,8 @@ add_llvm_target(SPIRVCodeGen Core Demangle GlobalISel + Passes + Scalar SPIRVAnalysis MC SPIRVDesc diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index e5384b2eb2c2c1..91bcd68813fc55 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -26,9 +26,15 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/InitializePasses.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" +#include "llvm/Passes/OptimizationLevel.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Utils.h" #include <optional> @@ -91,6 +97,89 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT, setRequiresStructuredCFG(false); } +namespace { + enum AddressSpace { + Function = storageClassToAddressSpace(SPIRV::StorageClass::Function), + CrossWorkgroup = + storageClassToAddressSpace(SPIRV::StorageClass::CrossWorkgroup), + UniformConstant = + storageClassToAddressSpace(SPIRV::StorageClass::UniformConstant), + Workgroup = storageClassToAddressSpace(SPIRV::StorageClass::Workgroup), + Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic) + }; +} + +unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { + const auto *LD = dyn_cast<LoadInst>(V); + if (!LD) + return UINT32_MAX; + + // It must be a load from a pointer to Generic. + assert(V->getType()->isPointerTy() && + V->getType()->getPointerAddressSpace() == AddressSpace::Generic); + + const auto *Ptr = LD->getPointerOperand(); + if (Ptr->getType()->getPointerAddressSpace() != AddressSpace::UniformConstant) + return UINT32_MAX; + // For a loaded from a pointer to UniformConstant, we can infer CrossWorkgroup + // storage, as this could only have been legally initialised with a + // CrossWorkgroup (aka device) constant pointer. + return AddressSpace::CrossWorkgroup; +} + +std::pair<const Value *, unsigned> +SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const { + using namespace PatternMatch; + + if (auto *II = dyn_cast<IntrinsicInst>(V)) { + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_is_shared: + return std::pair(II->getArgOperand(0), AddressSpace::Workgroup); + case Intrinsic::amdgcn_is_private: + return std::pair(II->getArgOperand(0), AddressSpace::Function); + default: + break; + } + return std::pair(nullptr, UINT32_MAX); + } + // Check the global pointer predication based on + // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and + // the order of 'is_shared' and 'is_private' is not significant. + Value *Ptr; + if (getTargetTriple().getVendor() == Triple::VendorType::AMD && + match( + const_cast<Value *>(V), + m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))), + m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(m_Deferred(Ptr)))))) + return std::pair(Ptr, AddressSpace::CrossWorkgroup); + + return std::pair(nullptr, UINT32_MAX); +} + +bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + if (SrcAS != AddressSpace::Generic && SrcAS != AddressSpace::CrossWorkgroup) + return false; + return DestAS == AddressSpace::Generic || + DestAS == AddressSpace::CrossWorkgroup; +} + +void SPIRVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { + PB.registerCGSCCOptimizerLateEPCallback([](CGSCCPassManager &PM, + OptimizationLevel Level) { + if (Level == OptimizationLevel::O0) + return; + + FunctionPassManager FPM; + + // Add infer address spaces pass to the opt pipeline after inlining + // but before SROA to increase SROA opportunities. + FPM.addPass(InferAddressSpacesPass(AddressSpace::Generic)); + + PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); + }); +} + namespace { // SPIR-V Code Generator Pass Configuration Options. class SPIRVPassConfig : public TargetPassConfig { @@ -178,6 +267,9 @@ void SPIRVPassConfig::addIRPasses() { addPass(createSPIRVStructurizerPass()); } + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(createInferAddressSpacesPass(AddressSpace::Generic)); + addPass(createSPIRVRegularizerPass()); addPass(createSPIRVPrepareFunctionsPass(TM)); addPass(createSPIRVStripConvergenceIntrinsicsPass()); diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h index a1a9f26846153b..24b09febb9d184 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h @@ -43,6 +43,13 @@ class SPIRVTargetMachine : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + unsigned getAssumedAddrSpace(const Value *V) const override; + std::pair<const Value *, unsigned> + getPredicatedAddrSpace(const Value *V) const override; + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override; + + void registerPassBuilderCallbacks(PassBuilder &PB) override; }; } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h index 24047f31fab290..295c0ceeade839 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h +++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h @@ -39,6 +39,10 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> { : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + unsigned getFlatAddressSpace() const { + return storageClassToAddressSpace(SPIRV::StorageClass::Generic); + } + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { // SPIR-V natively supports OpBitcount, per 3.53.14 in the spec, as such it // is reasonable to assume the Op is fast / preferable to the expanded loop. diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll new file mode 100644 index 00000000000000..9b65ff44f288f2 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll @@ -0,0 +1,31 @@ +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces -o - %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces -o - %s | FileCheck %s + +@c0 = addrspace(2) global ptr undef + +; CHECK-LABEL: @generic_ptr_from_constant +; CHECK: addrspacecast ptr addrspace(4) %p to ptr addrspace(1) +; CHECK-NEXT: load float, ptr addrspace(1) +define spir_func float @generic_ptr_from_constant() { + %p = load ptr addrspace(4), ptr addrspace(2) @c0 + %v = load float, ptr addrspace(4) %p + ret float %v +} + +%struct.S = type { ptr addrspace(4), ptr addrspace(4) } + +; CHECK-LABEL: @generic_ptr_from_aggregate_argument +; CHECK: addrspacecast ptr addrspace(4) %p0 to ptr addrspace(1) +; CHECK: addrspacecast ptr addrspace(4) %p1 to ptr addrspace(1) +; CHECK: load i32, ptr addrspace(1) +; CHECK: store float %v1, ptr addrspace(1) +; CHECK: ret +define spir_kernel void @generic_ptr_from_aggregate_argument(ptr addrspace(2) byval(%struct.S) align 8 %0) { + %p0 = load ptr addrspace(4), ptr addrspace(2) %0 + %f1 = getelementptr inbounds %struct.S, ptr addrspace(2) %0, i64 0, i32 1 + %p1 = load ptr addrspace(4), ptr addrspace(2) %f1 + %v0 = load i32, ptr addrspace(4) %p0 + %v1 = sitofp i32 %v0 to float + store float %v1, ptr addrspace(4) %p1 + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll new file mode 100644 index 00000000000000..75b23aa30349af --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll @@ -0,0 +1,236 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +; Trivial optimization of generic addressing + +define float @load_global_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define float @load_global_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[TMP0]], align 4 +; CHECK-NEXT: ret float [[TMP1]] +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1) + %tmp1 = load float, ptr addrspace(1) %tmp0 + ret float %tmp1 +} + +define float @load_group_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define float @load_group_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(3) [[TMP0]], align 4 +; CHECK-NEXT: ret float [[TMP1]] +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3) + %tmp1 = load float, ptr addrspace(3) %tmp0 + ret float %tmp1 +} + +define float @load_private_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define float @load_private_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4 +; CHECK-NEXT: ret float [[TMP1]] +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr + %tmp1 = load float, ptr %tmp0 + ret float %tmp1 +} + +define spir_kernel void @store_global_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define spir_kernel void @store_global_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[TMP0]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1) + store float 0.0, ptr addrspace(1) %tmp0 + ret void +} + +define spir_kernel void @store_group_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define spir_kernel void @store_group_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(3) [[TMP0]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3) + store float 0.0, ptr addrspace(3) %tmp0 + ret void +} + +define spir_kernel void @store_private_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: de... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/110897 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits