llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-mc Author: Jun Wang (jwanggit86) <details> <summary>Changes</summary> Three instructions, namely, buffer_store_lds_dword, buffer_wbinvl1, and buffer_wbinvl1_vol are obsolete in GFX940 and should not be supported. --- Patch is 211.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/109223.diff 35 Files Affected: - (modified) clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributes.def (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+76) - (modified) llvm/lib/Target/AMDGPU/BUFInstructions.td (+9-3) - (modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll (+9-9) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (+27-26) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+16-15) - (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll (+9-9) - (added) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-globalisel.ll (+567) - (added) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll (+606) - (modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/flat-address-space.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll (+1) - (modified) llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll (+15-15) - (modified) llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll (+1) - (modified) llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll (+10-10) - (modified) llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll (+23-23) - (modified) llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll (+2-4) - (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll (+2-2) - (modified) llvm/test/MC/AMDGPU/gfx10_unsupported.s (+3) - (modified) llvm/test/MC/AMDGPU/gfx11_unsupported.s (+3) - (modified) llvm/test/MC/AMDGPU/gfx12_unsupported.s (+6) - (added) llvm/test/MC/AMDGPU/gfx940_unsupported.s (+11) ``````````diff diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu index b295bbbdaaf955..7f016180629e4b 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -432,7 +432,7 @@ __global__ void kernel4(struct S s) { // CHECK-SPIRV-NEXT: ret void // // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S( -// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] { +// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { // OPT-NEXT: [[ENTRY:.*:]] // OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(1) [[S_COERCE]], align 8 // OPT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def index bacc8e4e821e5d..8c1c8219690ba5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def @@ -30,5 +30,6 @@ AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z") AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id") AMDGPU_ATTRIBUTE(DEFAULT_QUEUE, "amdgpu-no-default-queue") AMDGPU_ATTRIBUTE(COMPLETION_ACTION, "amdgpu-no-completion-action") +AMDGPU_ATTRIBUTE(FLAT_SCRATCH_INIT, "amdgpu-no-flat-scratch-init") #undef AMDGPU_ATTRIBUTE diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index d65e0ae92308e6..b3bcc979e23a5e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -434,6 +434,26 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { indicatePessimisticFixpoint(); return; } + + SmallPtrSet<const Constant *, 8> VisitedConsts; + + for (Instruction &I : instructions(F)) { + if (isa<AddrSpaceCastInst>(I) && + cast<AddrSpaceCastInst>(I).getSrcAddressSpace() == + AMDGPUAS::PRIVATE_ADDRESS) { + removeAssumedBits(FLAT_SCRATCH_INIT); + return; + } + // check for addrSpaceCast in constant expressions + for (const Use &U : I.operands()) { + if (const auto *C = dyn_cast<Constant>(U)) { + if (constHasASCast(C, VisitedConsts)) { + removeAssumedBits(FLAT_SCRATCH_INIT); + return; + } + } + } + } } ChangeStatus updateImpl(Attributor &A) override { @@ -520,6 +540,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV)) removeAssumedBits(COMPLETION_ACTION); + if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A)) + removeAssumedBits(FLAT_SCRATCH_INIT); + return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; } @@ -678,6 +701,59 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this, UsedAssumedInformation); } + + // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is + // not to be set. + bool needFlatScratchInit(Attributor &A) { + assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set + + // This is called on each callee; false means callee shouldn't have + // no-flat-scratch-init. + auto CheckForNoFlatScratchInit = [&](Instruction &I) { + const auto &CB = cast<CallBase>(I); + const Function *Callee = CB.getCalledFunction(); + + // Callee == 0 for inline asm or indirect call with known callees. + // In the latter case, updateImpl() already checked the callees and we + // know their FLAT_SCRATCH_INIT bit is set. + // If function has indirect call with unknown callees, the bit is + // already removed in updateImpl() and execution won't reach here. + if (!Callee) + return true; + + return Callee->getIntrinsicID() != + Intrinsic::amdgcn_addrspacecast_nonnull; + }; + + bool UsedAssumedInformation = false; + // If any callee is false (i.e. need FlatScratchInit), + // checkForAllCallLikeInstructions returns false, in which case this + // function returns true. + return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this, + UsedAssumedInformation); + } + + bool constHasASCast(const Constant *C, + SmallPtrSetImpl<const Constant *> &Visited) { + if (!Visited.insert(C).second) + return false; + + if (const auto *CE = dyn_cast<ConstantExpr>(C)) + if (CE->getOpcode() == Instruction::AddrSpaceCast && + CE->getOperand(0)->getType()->getPointerAddressSpace() == + AMDGPUAS::PRIVATE_ADDRESS) + return true; + + for (const Use &U : C->operands()) { + const auto *OpC = dyn_cast<Constant>(U); + if (!OpC || !Visited.insert(OpC).second) + continue; + + if (constHasASCast(OpC, Visited)) + return true; + } + return false; + } }; AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index c6668b24f4ef67..1bc5cd140f14ec 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -3277,12 +3277,18 @@ defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_vi <0x6a>; defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_vi <0x6b>; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_vi <0x6c>; -defm BUFFER_STORE_LDS_DWORD : MUBUF_Real_vi_gfx90a <0x3d>; +let AssemblerPredicate = isGFX8GFX9NotGFX90A in { +defm BUFFER_STORE_LDS_DWORD : MUBUF_Real_vi <0x3d>; +} + +let AssemblerPredicate = isGFX90AOnly in { +defm BUFFER_STORE_LDS_DWORD : MUBUF_Real_gfx90a <0x3d>; +} -let AssemblerPredicate = isGFX8GFX9 in { +let AssemblerPredicate = isGFX8GFX9NotGFX940 in { defm BUFFER_WBINVL1 : MUBUF_Real_vi <0x3e>; defm BUFFER_WBINVL1_VOL : MUBUF_Real_vi <0x3f>; -} // End AssemblerPredicate = isGFX8GFX9 +} // End AssemblerPredicate = isGFX8GFX9NotGFX940 defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index cff9ce05066793..96bbcb7ed2149a 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -233,9 +233,9 @@ attributes #1 = { nounwind } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} ;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll index 33b1cc65dc5699..c96c6d39a20b51 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll @@ -116,7 +116,7 @@ define amdgpu_kernel void @kernel_calls_extern() { define amdgpu_kernel void @kernel_calls_extern_marked_callsite() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite( ; CHECK-SAME: ) #[[ATTR4]] { -; CHECK-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] +; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]] ; CHECK-NEXT: ret void ; call void @unknown() #0 @@ -136,7 +136,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) { define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite( ; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] { -; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR9]] +; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]] ; CHECK-NEXT: ret void ; call void %indirect() #0 @@ -242,14 +242,14 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) { attributes #0 = { "amdgpu-no-agpr" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" } +; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR10]] = { "amdgpu-no-agpr" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index 3d4ae84d9c698e..ed136c58379cb3 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -956,7 +956,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_def() { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR30:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -969,7 +969,7 @@ define void @unused_enqueue_block() { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR30]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -980,7 +980,7 @@ define internal void @known_func() { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR30]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -994,7 +994,7 @@ define amdgpu_kernel void @kern_callsite_enqueue_block() { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR30]] { ; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR29:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -1024,34 +1024,35 @@ attributes #6 = { "enqueued-block" } ; AKF_HSA: attributes #[[ATTR7]] = { "enqueued-block" } ; AKF_HSA: attributes #[[ATTR8]] = { "amdgpu-calls" } ;. + ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/109223 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits