https://github.com/jmmartinez updated https://github.com/llvm/llvm-project/pull/132048
From 5f039ef32105132a7edf6203eb0b7825d4b8b503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juama...@amd.com> Date: Thu, 20 Mar 2025 13:23:25 +0100 Subject: [PATCH 1/6] [AMDGPU] Add "lds-buffer-load-insts" attribute for all targets < gfx11 This are used to restrict the availability of buffer_load_lds intrinsics to targets that actually have this instructions. --- clang/lib/Basic/Targets/AMDGPU.cpp | 2 +- clang/test/CodeGen/link-builtin-bitcode.c | 6 +++--- .../CodeGenCXX/dynamic-cast-address-space.cpp | 4 ++-- flang/test/Lower/OpenMP/target_cpu_features.f90 | 4 ++-- llvm/lib/Target/AMDGPU/AMDGPU.td | 16 +++++++++++----- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 3 ++- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 ++- llvm/lib/TargetParser/TargetParser.cpp | 5 +++++ 9 files changed, 31 insertions(+), 15 deletions(-) diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index a42b4589fb5ac..ed578890fc71f 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -260,7 +260,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); - for (auto F : {"image-insts", "gws"}) + for (auto F : {"image-insts", "gws", "lds-buffer-load-insts"}) ReadOnlyFeatures.insert(F); HalfArgsAndReturns = true; } diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c index 470180efa4247..efce9d0af5d49 100644 --- a/clang/test/CodeGen/link-builtin-bitcode.c +++ b/clang/test/CodeGen/link-builtin-bitcode.c @@ -44,6 +44,6 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in // CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] { // CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-gfx9-insts" } +// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-gfx9-insts" } diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index 8d50c71feb990..dbed7d3ea9197 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -111,9 +111,9 @@ const B& f(A *a) { // CHECK: attributes #[[ATTR3]] = { nounwind } // CHECK: attributes #[[ATTR4]] = { noreturn } //. -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) } -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn } //. diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90 index ea8efcf5d256b..87079b30860f6 100644 --- a/flang/test/Lower/OpenMP/target_cpu_features.f90 +++ b/flang/test/Lower/OpenMP/target_cpu_features.f90 @@ -11,8 +11,8 @@ !AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts", !AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts", !AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", -!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts", -!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]> +!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+lds-buffer-load-insts", +!AMDGCN-SAME: "+mai-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]> !NVPTX: module attributes { !NVPTX-SAME: fir.target_cpu = "sm_80" diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 6963b24dd8a5e..ac44356f7b722 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1273,6 +1273,12 @@ def FeatureLshlAddU64Inst : SubtargetFeature<"lshl-add-u64-inst", "HasLshlAddU64Inst", "true", "Has v_lshl_add_u64 instruction">; +def FeatureLDSBufferLoad : SubtargetFeature<"lds-buffer-load-insts", + "HasLDSBufferLoad", + "true", + "The platform has buffer_load lds instructions" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -1294,7 +1300,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureVmemWriteVgprInOrder + FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad ] >; @@ -1308,7 +1314,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder + FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad ] >; @@ -1324,7 +1330,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, - FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder + FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad ] >; @@ -1343,7 +1349,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS, - FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder + FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad ] >; @@ -1367,7 +1373,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureDefaultComponentZero, FeatureMaxHardClauseLength63, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder + FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad ] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 984b09ff163b3..e7cae77b95584 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3369,7 +3369,8 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT( } bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { - assert(!AMDGPU::isGFX12Plus(STI)); + if (!Subtarget->hasLDSBufferLoad()) + return false; unsigned Opc; unsigned Size = MI.getOperand(3).getImm(); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 301e4c0275ad4..27f1cfc82b2c4 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -193,6 +193,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool SupportsSRAMECC = false; bool DynamicVGPR = false; bool DynamicVGPRBlockSize32 = false; + bool HasLDSBufferLoad = false; // This should not be used directly. 'TargetID' tracks the dynamic settings // for SRAMECC. @@ -1319,6 +1320,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return hasGFX950Insts(); } + bool hasLDSBufferLoad() const { return HasLDSBufferLoad; } + bool hasSALUFloatInsts() const { return HasSALUFloatInsts; } bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 96c113cc5d24c..4cca089cb3be4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10104,7 +10104,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: case Intrinsic::amdgcn_struct_buffer_load_lds: case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: { - assert(!AMDGPU::isGFX12Plus(*Subtarget)); + if (!Subtarget->hasLDSBufferLoad()) + return SDValue(); unsigned Opc; bool HasVIndex = IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds || diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 8731a16b88a5c..9ff70ebad1cdc 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -374,6 +374,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["prng-inst"] = true; Features["wavefrontsize32"] = true; Features["wavefrontsize64"] = true; + Features["lds-buffer-load-insts"] = true; } else if (T.isAMDGCN()) { AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); switch (Kind) { @@ -459,6 +460,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["s-memrealtime"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; + Features["lds-buffer-load-insts"] = true; break; case GK_GFX1012: case GK_GFX1011: @@ -483,6 +485,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["s-memrealtime"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; + Features["lds-buffer-load-insts"] = true; break; case GK_GFX950: Features["bitop3-insts"] = true; @@ -533,6 +536,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["ci-insts"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; + Features["lds-buffer-load-insts"] = true; break; case GK_GFX90A: Features["gfx90a-insts"] = true; @@ -585,6 +589,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["image-insts"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; + Features["lds-buffer-load-insts"] = true; break; case GK_NONE: break; From 4bed1193f58d4eb012cf7b57674e711c16684a30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juama...@amd.com> Date: Wed, 26 Mar 2025 12:10:12 +0100 Subject: [PATCH 2/6] [Review] Add failure test --- .../AMDGPU/llvm.amdgcn.buffer.load.lds.err.ll | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.lds.err.ll diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.lds.err.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.lds.err.ll new file mode 100644 index 0000000000000..c0552df2a13c3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.lds.err.ll @@ -0,0 +1,45 @@ +; RUN: split-file %s %t +; +; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 %t/struct.ll 2>&1 | FileCheck --ignore-case %s +; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 %t/struct.ll 2>&1 | FileCheck --ignore-case %s +; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 %t/struct.ptr.ll 2>&1 | FileCheck --ignore-case --check-prefix=LEGALIZER-FAIL %s +; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 %t/struct.ptr.ll 2>&1 | FileCheck --ignore-case %s +; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 %t/raw.ll 2>&1 | FileCheck --ignore-case %s +; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 %t/raw.ll 2>&1 | FileCheck --ignore-case %s +; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 %t/raw.ptr.ll 2>&1 | FileCheck --ignore-case --check-prefix=LEGALIZER-FAIL %s +; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 %t/raw.ptr.ll 2>&1 | FileCheck --ignore-case %s +; +; CHECK: LLVM ERROR: Cannot select +; LEGALIZER-FAIL: Do not know how to expand this operator's operand! + +;--- struct.ll +declare void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) + +define amdgpu_ps void @buffer_load_lds(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { + call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0) + ret void +} + +;--- struct.ptr.ll +declare void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) + +define amdgpu_ps void @buffer_load_lds(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { + call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0) + ret void +} + +;--- raw.ll +declare void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) + +define amdgpu_ps void @buffer_load_lds(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { + call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0) + ret void +} + +;--- raw.ptr.ll +declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) + +define amdgpu_ps void @buffer_load_lds(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { + call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0) + ret void +} From b03c1187227d674c6f99af8dbb4406e05b9f5a04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juama...@amd.com> Date: Wed, 26 Mar 2025 16:50:08 +0100 Subject: [PATCH 3/6] [AMDGPU] Use a target feature to enable __builtin_amdgcn_global_load_lds on gfx9 and gfx10 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +- clang/lib/Basic/Targets/AMDGPU.cpp | 2 +- clang/test/CodeGen/link-builtin-bitcode.c | 6 +++--- .../CodeGenCXX/dynamic-cast-address-space.cpp | 4 ++-- ...942.cl => builtins-amdgcn-global-load-lds.cl} | 2 ++ ...cl => builtins-amdgcn-global-load-lds-err.cl} | 16 +++++++++------- flang/test/Lower/OpenMP/target_cpu_features.f90 | 4 ++-- llvm/lib/Target/AMDGPU/AMDGPU.td | 16 ++++++++-------- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 5 ++++- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++-- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 ++++- llvm/lib/TargetParser/TargetParser.cpp | 10 +++++----- .../AMDGPU/llvm.amdgcn.global.load.lds.err.ll | 13 +++++++++++++ 13 files changed, 56 insertions(+), 33 deletions(-) rename clang/test/CodeGenOpenCL/{builtins-amdgcn-gfx942.cl => builtins-amdgcn-global-load-lds.cl} (93%) rename clang/test/SemaOpenCL/{builtins-amdgcn-gfx942-err.cl => builtins-amdgcn-global-load-lds-err.cl} (60%) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.err.ll diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 44ef404aee72f..10e2146882795 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -254,7 +254,7 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "at TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts") -TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx940-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "mem-to-lds-load-insts") //===----------------------------------------------------------------------===// // Deep learning builtins. diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index ed578890fc71f..7757189517e40 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -260,7 +260,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); - for (auto F : {"image-insts", "gws", "lds-buffer-load-insts"}) + for (auto F : {"image-insts", "gws", "mem-to-lds-load-insts"}) ReadOnlyFeatures.insert(F); HalfArgsAndReturns = true; } diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c index efce9d0af5d49..630735dece808 100644 --- a/clang/test/CodeGen/link-builtin-bitcode.c +++ b/clang/test/CodeGen/link-builtin-bitcode.c @@ -44,6 +44,6 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in // CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] { // CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-gfx9-insts" } +// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-gfx9-insts" } diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index dbed7d3ea9197..5c16fe2d79738 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -111,9 +111,9 @@ const B& f(A *a) { // CHECK: attributes #[[ATTR3]] = { nounwind } // CHECK: attributes #[[ATTR4]] = { noreturn } //. -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) } -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn } //. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-lds.cl similarity index 93% rename from clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl rename to clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-lds.cl index 789f6e07240d7..62c8deb6e4a89 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-lds.cl @@ -1,5 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s // REQUIRES: amdgpu-registered-target typedef unsigned int u32; diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-gfx942-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-lds-err.cl similarity index 60% rename from clang/test/SemaOpenCL/builtins-amdgcn-gfx942-err.cl rename to clang/test/SemaOpenCL/builtins-amdgcn-global-load-lds-err.cl index 0b3f692f33998..d5185a069b5c4 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-gfx942-err.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-global-load-lds-err.cl @@ -1,4 +1,6 @@ -// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx942 -S -verify=gfx942,expected -o - %s +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -verify=gfx,expected -o - %s +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx942 -S -verify=gfx,expected -o - %s +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -verify=gfx,expected -o - %s // RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify=gfx950,expected -o - %s // REQUIRES: amdgpu-registered-target @@ -8,12 +10,12 @@ void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32 __builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}} __builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, offset, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}} __builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, /*offset=*/0, aux); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // gfx942-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // gfx942-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} - __builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // gfx-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // gfx-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}} + __builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}} } __attribute__((target("gfx950-insts"))) diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90 index 87079b30860f6..adbf7bc497414 100644 --- a/flang/test/Lower/OpenMP/target_cpu_features.f90 +++ b/flang/test/Lower/OpenMP/target_cpu_features.f90 @@ -11,8 +11,8 @@ !AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts", !AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts", !AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", -!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+lds-buffer-load-insts", -!AMDGCN-SAME: "+mai-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]> +!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts", +!AMDGCN-SAME: "+mem-to-lds-load-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]> !NVPTX: module attributes { !NVPTX-SAME: fir.target_cpu = "sm_80" diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index ac44356f7b722..3bf5d33a021d1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1273,10 +1273,10 @@ def FeatureLshlAddU64Inst : SubtargetFeature<"lshl-add-u64-inst", "HasLshlAddU64Inst", "true", "Has v_lshl_add_u64 instruction">; -def FeatureLDSBufferLoad : SubtargetFeature<"lds-buffer-load-insts", - "HasLDSBufferLoad", +def FeatureMemToLDSLoad : SubtargetFeature<"mem-to-lds-load-insts", + "HasMemToLDSLoad", "true", - "The platform has buffer_load lds instructions" + "The platform has memory to lds instructions (global_load w/lds bit set, buffer_load w/lds bit set or global_load_lds. This does not include scratch_load_lds." >; // Dummy feature used to disable assembler instructions. @@ -1300,7 +1300,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, - FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad + FeatureVmemWriteVgprInOrder ] >; @@ -1314,7 +1314,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad + FeatureVmemWriteVgprInOrder ] >; @@ -1330,7 +1330,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, - FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad + FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder ] >; @@ -1349,7 +1349,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS, - FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad + FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad ] >; @@ -1373,7 +1373,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureDefaultComponentZero, FeatureMaxHardClauseLength63, FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, - FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad + FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad ] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index e7cae77b95584..8a081077aa7f6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3369,7 +3369,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT( } bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { - if (!Subtarget->hasLDSBufferLoad()) + if (!Subtarget->hasMemToLDSLoad()) return false; unsigned Opc; unsigned Size = MI.getOperand(3).getImm(); @@ -3506,6 +3506,9 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { } bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{ + if (!Subtarget->hasMemToLDSLoad()) + return false; + unsigned Opc; unsigned Size = MI.getOperand(3).getImm(); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 27f1cfc82b2c4..0d385614ca586 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -193,7 +193,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool SupportsSRAMECC = false; bool DynamicVGPR = false; bool DynamicVGPRBlockSize32 = false; - bool HasLDSBufferLoad = false; + bool HasMemToLDSLoad = false; // This should not be used directly. 'TargetID' tracks the dynamic settings // for SRAMECC. @@ -1320,7 +1320,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return hasGFX950Insts(); } - bool hasLDSBufferLoad() const { return HasLDSBufferLoad; } + bool hasMemToLDSLoad() const { return HasMemToLDSLoad; } bool hasSALUFloatInsts() const { return HasSALUFloatInsts; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4cca089cb3be4..5299098474098 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10104,7 +10104,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: case Intrinsic::amdgcn_struct_buffer_load_lds: case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: { - if (!Subtarget->hasLDSBufferLoad()) + if (!Subtarget->hasMemToLDSLoad()) return SDValue(); unsigned Opc; bool HasVIndex = @@ -10212,6 +10212,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, return SDValue(Load, 0); } case Intrinsic::amdgcn_global_load_lds: { + if (!Subtarget->hasMemToLDSLoad()) + return SDValue(); + unsigned Opc; unsigned Size = Op->getConstantOperandVal(4); switch (Size) { diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 9ff70ebad1cdc..ab02a519f66db 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -374,7 +374,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["prng-inst"] = true; Features["wavefrontsize32"] = true; Features["wavefrontsize64"] = true; - Features["lds-buffer-load-insts"] = true; + Features["mem-to-lds-load-insts"] = true; } else if (T.isAMDGCN()) { AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); switch (Kind) { @@ -460,7 +460,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["s-memrealtime"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; - Features["lds-buffer-load-insts"] = true; + Features["mem-to-lds-load-insts"] = true; break; case GK_GFX1012: case GK_GFX1011: @@ -485,7 +485,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["s-memrealtime"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; - Features["lds-buffer-load-insts"] = true; + Features["mem-to-lds-load-insts"] = true; break; case GK_GFX950: Features["bitop3-insts"] = true; @@ -536,7 +536,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["ci-insts"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; - Features["lds-buffer-load-insts"] = true; + Features["mem-to-lds-load-insts"] = true; break; case GK_GFX90A: Features["gfx90a-insts"] = true; @@ -589,7 +589,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["image-insts"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; - Features["lds-buffer-load-insts"] = true; + Features["mem-to-lds-load-insts"] = true; break; case GK_NONE: break; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.err.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.err.ll new file mode 100644 index 0000000000000..383f6c1288d13 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.err.ll @@ -0,0 +1,13 @@ +; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck --ignore-case %s +; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck --ignore-case %s +; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --ignore-case %s +; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --ignore-case %s +; +; CHECK: LLVM ERROR: Cannot select + +declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux) + +define amdgpu_ps void @global_load_lds_dword(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) { + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0) + ret void +} From ee2123ffe216cc1e8697111c740f7f93feee6794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juama...@amd.com> Date: Mon, 31 Mar 2025 09:25:18 +0200 Subject: [PATCH 4/6] [Review] remove declare --- .../CodeGen/AMDGPU/llvm.amdgcn.buffer.load.lds.err.ll | 8 -------- 1 file changed, 8 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.lds.err.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.lds.err.ll index c0552df2a13c3..7679db8d113ea 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.lds.err.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.lds.err.ll @@ -13,32 +13,24 @@ ; LEGALIZER-FAIL: Do not know how to expand this operator's operand! ;--- struct.ll -declare void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) - define amdgpu_ps void @buffer_load_lds(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } ;--- struct.ptr.ll -declare void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) - define amdgpu_ps void @buffer_load_lds(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } ;--- raw.ll -declare void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) - define amdgpu_ps void @buffer_load_lds(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0) ret void } ;--- raw.ptr.ll -declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) - define amdgpu_ps void @buffer_load_lds(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0) ret void From 96d7a93109b6fa1f792b524a24edcaacbd5c7026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juama...@amd.com> Date: Mon, 31 Mar 2025 12:07:04 +0200 Subject: [PATCH 5/6] [Review] mem-to-lds -> vmem-to-lds --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +- clang/lib/Basic/Targets/AMDGPU.cpp | 2 +- clang/test/CodeGen/link-builtin-bitcode.c | 6 +++--- clang/test/CodeGenCXX/dynamic-cast-address-space.cpp | 4 ++-- flang/test/Lower/OpenMP/target_cpu_features.f90 | 2 +- llvm/lib/Target/AMDGPU/AMDGPU.td | 4 ++-- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 4 ++-- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++-- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 ++-- llvm/lib/TargetParser/TargetParser.cpp | 10 +++++----- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 10e2146882795..044f7b9f1c2f9 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -254,7 +254,7 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "at TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts") TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts") -TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "mem-to-lds-load-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "vmem-to-lds-load-insts") //===----------------------------------------------------------------------===// // Deep learning builtins. diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 7757189517e40..c57a70bf6e587 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -260,7 +260,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); - for (auto F : {"image-insts", "gws", "mem-to-lds-load-insts"}) + for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"}) ReadOnlyFeatures.insert(F); HalfArgsAndReturns = true; } diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c index 630735dece808..963a3956ff808 100644 --- a/clang/test/CodeGen/link-builtin-bitcode.c +++ b/clang/test/CodeGen/link-builtin-bitcode.c @@ -44,6 +44,6 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in // CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] { // CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-gfx9-insts" } +// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" } +// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" } diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index 5c16fe2d79738..5557dbb754ee3 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -111,9 +111,9 @@ const B& f(A *a) { // CHECK: attributes #[[ATTR3]] = { nounwind } // CHECK: attributes #[[ATTR4]] = { noreturn } //. -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) } -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn } //. diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90 index adbf7bc497414..4532593156eab 100644 --- a/flang/test/Lower/OpenMP/target_cpu_features.f90 +++ b/flang/test/Lower/OpenMP/target_cpu_features.f90 @@ -12,7 +12,7 @@ !AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts", !AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", !AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts", -!AMDGCN-SAME: "+mem-to-lds-load-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]> +!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+vmem-to-lds-load-insts", "+wavefrontsize64"]> !NVPTX: module attributes { !NVPTX-SAME: fir.target_cpu = "sm_80" diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 3bf5d33a021d1..b3f70f2153061 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1273,8 +1273,8 @@ def FeatureLshlAddU64Inst : SubtargetFeature<"lshl-add-u64-inst", "HasLshlAddU64Inst", "true", "Has v_lshl_add_u64 instruction">; -def FeatureMemToLDSLoad : SubtargetFeature<"mem-to-lds-load-insts", - "HasMemToLDSLoad", +def FeatureMemToLDSLoad : SubtargetFeature<"vmem-to-lds-load-insts", + "HasVMemToLDSLoad", "true", "The platform has memory to lds instructions (global_load w/lds bit set, buffer_load w/lds bit set or global_load_lds. This does not include scratch_load_lds." >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 8a081077aa7f6..6ef7505ec6f62 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3369,7 +3369,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT( } bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { - if (!Subtarget->hasMemToLDSLoad()) + if (!Subtarget->hasVMemToLDSLoad()) return false; unsigned Opc; unsigned Size = MI.getOperand(3).getImm(); @@ -3506,7 +3506,7 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { } bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{ - if (!Subtarget->hasMemToLDSLoad()) + if (!Subtarget->hasVMemToLDSLoad()) return false; unsigned Opc; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 0d385614ca586..e03566ee81f91 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -193,7 +193,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool SupportsSRAMECC = false; bool DynamicVGPR = false; bool DynamicVGPRBlockSize32 = false; - bool HasMemToLDSLoad = false; + bool HasVMemToLDSLoad = false; // This should not be used directly. 'TargetID' tracks the dynamic settings // for SRAMECC. @@ -1320,7 +1320,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return hasGFX950Insts(); } - bool hasMemToLDSLoad() const { return HasMemToLDSLoad; } + bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; } bool hasSALUFloatInsts() const { return HasSALUFloatInsts; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5299098474098..a583a5cb990e7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10104,7 +10104,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: case Intrinsic::amdgcn_struct_buffer_load_lds: case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: { - if (!Subtarget->hasMemToLDSLoad()) + if (!Subtarget->hasVMemToLDSLoad()) return SDValue(); unsigned Opc; bool HasVIndex = @@ -10212,7 +10212,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, return SDValue(Load, 0); } case Intrinsic::amdgcn_global_load_lds: { - if (!Subtarget->hasMemToLDSLoad()) + if (!Subtarget->hasVMemToLDSLoad()) return SDValue(); unsigned Opc; diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index ab02a519f66db..f7d99ccb57507 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -374,7 +374,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["prng-inst"] = true; Features["wavefrontsize32"] = true; Features["wavefrontsize64"] = true; - Features["mem-to-lds-load-insts"] = true; + Features["vmem-to-lds-load-insts"] = true; } else if (T.isAMDGCN()) { AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); switch (Kind) { @@ -460,7 +460,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["s-memrealtime"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; - Features["mem-to-lds-load-insts"] = true; + Features["vmem-to-lds-load-insts"] = true; break; case GK_GFX1012: case GK_GFX1011: @@ -485,7 +485,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["s-memrealtime"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; - Features["mem-to-lds-load-insts"] = true; + Features["vmem-to-lds-load-insts"] = true; break; case GK_GFX950: Features["bitop3-insts"] = true; @@ -536,7 +536,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["ci-insts"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; - Features["mem-to-lds-load-insts"] = true; + Features["vmem-to-lds-load-insts"] = true; break; case GK_GFX90A: Features["gfx90a-insts"] = true; @@ -589,7 +589,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["image-insts"] = true; Features["s-memtime-inst"] = true; Features["gws"] = true; - Features["mem-to-lds-load-insts"] = true; + Features["vmem-to-lds-load-insts"] = true; break; case GK_NONE: break; From b625c81b8bd6461e40623e2c7d3424e92bcd7bc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juama...@amd.com> Date: Thu, 13 Mar 2025 10:00:28 +0100 Subject: [PATCH 6/6] [Clang][AMDGPU] Expose buffer load lds as a clang builtin CK is using either inline assembly or inline llvm-ir builtins to generate buffer_load_dword lds instructions. This patch exposes this instruction as a builtin. Related to SWDEV-519702 and SWDEV-518861 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 ++ clang/include/clang/Basic/DiagnosticSemaKinds.td | 4 ++-- clang/lib/Sema/SemaAMDGPU.cpp | 7 +++---- .../CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl | 9 +++++++++ .../builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl | 10 ++++++++++ ...tins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl | 6 ++++++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 +++- 7 files changed, 35 insertions(+), 7 deletions(-) create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 044f7b9f1c2f9..4b347e0631e22 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -162,6 +162,8 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n") BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n") +TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts") + //===----------------------------------------------------------------------===// // Ballot builtins. //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 2306cb4d7cac4..e4d80012381d5 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13052,6 +13052,6 @@ def err_acc_decl_for_routine : Error<"expected function or lambda declaration for 'routine' construct">; // AMDGCN builtins diagnostics -def err_amdgcn_global_load_lds_size_invalid_value : Error<"invalid size value">; -def note_amdgcn_global_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">; +def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">; +def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">; } // end of sema component. diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index a4d075dfd0768..7fec099374152 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -35,6 +35,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap); switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds: case AMDGPU::BI__builtin_amdgcn_global_load_lds: { constexpr const int SizeIdx = 2; llvm::APSInt Size; @@ -54,11 +55,9 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, [[fallthrough]]; } default: - Diag(ArgExpr->getExprLoc(), - diag::err_amdgcn_global_load_lds_size_invalid_value) + Diag(ArgExpr->getExprLoc(), diag::err_amdgcn_load_lds_size_invalid_value) << ArgExpr->getSourceRange(); - Diag(ArgExpr->getExprLoc(), - diag::note_amdgcn_global_load_lds_size_valid_value) + Diag(ArgExpr->getExprLoc(), diag::note_amdgcn_load_lds_size_valid_value) << HasGFX950Insts << ArgExpr->getSourceRange(); return true; } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl index 3403b69e07e4b..5e3ed9027c17a 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl @@ -170,3 +170,12 @@ v3u32 test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(__amdgpu_buffer_rsrc v4u32 test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) { return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, soffset, /*aux=*/0); } + +// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_lds( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3) +// CHECK-NEXT: ret void +// +void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int offset, int soffset) { + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl new file mode 100644 index 0000000000000..5915393ae7f56 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify=gfx90a,expected -o - %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify=gfx950,expected -o - %s +// REQUIRES: amdgpu-registered-target + +void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) { + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, x, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}} + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl new file mode 100644 index 0000000000000..768f894e9180d --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-target-error.cl @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s +// REQUIRES: amdgpu-registered-target + +void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) { + __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f53016f62abbe..7e87858249d7e 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1863,7 +1863,9 @@ class AMDGPURawBufferLoadLDS : Intrinsic < ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS; -class AMDGPURawPtrBufferLoadLDS : Intrinsic < +class AMDGPURawPtrBufferLoadLDS : + ClangBuiltin<"__builtin_amdgcn_raw_ptr_buffer_load_lds">, + Intrinsic < [], [AMDGPUBufferRsrcTy, // rsrc(SGPR) LLVMQualPointerType<3>, // LDS base offset _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits