https://github.com/jwanggit86 updated https://github.com/llvm/llvm-project/pull/75647
>From bb15eebae9645e5383f26066093c0734ea76442d Mon Sep 17 00:00:00 2001 From: Jun Wang <jun.wa...@amd.com> Date: Fri, 15 Dec 2023 13:53:54 -0600 Subject: [PATCH 1/2] [AMDGPU] Adding the amdgpu-num-work-groups function attribute A new function attribute named amdgpu-num-work-groups is added. This attribute allows programmers to let the compiler know the number of workgroups to be launched and do optimizations based on that information. --- clang/include/clang/Basic/Attr.td | 7 ++ clang/include/clang/Basic/AttrDocs.td | 23 ++++++ clang/lib/CodeGen/Targets/AMDGPU.cpp | 7 ++ clang/lib/Sema/SemaDeclAttr.cpp | 13 +++ ...a-attribute-supported-attributes-list.test | 1 + .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 4 + llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 6 ++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 3 + .../Target/AMDGPU/SIMachineFunctionInfo.cpp | 1 + .../lib/Target/AMDGPU/SIMachineFunctionInfo.h | 9 ++ .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 15 ++++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 8 ++ .../AMDGPU/attr-amdgpu-num-work-groups.ll | 82 +++++++++++++++++++ 13 files changed, 179 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 5943583d92773a..605fcbbff027b9 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2011,6 +2011,13 @@ def AMDGPUNumVGPR : InheritableAttr { let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } +def AMDGPUNumWorkGroups : InheritableAttr { + let Spellings = [Clang<"amdgpu_num_work_groups", 0>]; + let Args = [UnsignedArgument<"NumWorkGroups">]; + let Documentation = [AMDGPUNumWorkGroupsDocs]; + let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; +} + def AMDGPUKernelCall : DeclOrTypeAttr { let Spellings = [Clang<"amdgpu_kernel">]; let Documentation = [Undocumented]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 77950ab6d877ea..0bf3ccf367284c 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2693,6 +2693,29 @@ An error will be given if: }]; } +def AMDGPUNumWorkGroupsDocs : Documentation { + let Category = DocCatAMDGPUAttributes; + let Content = [{ +The number of work groups specifies the number of work groups when the kernel +is dispatched. + +Clang supports the +``__attribute__((amdgpu_num_work_groups(<num>)))`` attribute for the +AMDGPU target. This attribute may be attached to a kernel function definition +and is an optimization hint. + +``<num>`` parameter specifies the number of work groups. + +If specified, the AMDGPU target backend might be able to produce better machine +code. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes. + }]; +} + def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> { let Content = [{ Clang supports several different calling conventions, depending on the target diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 03ac6b78598fc8..11a0835f37f4a9 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -356,6 +356,13 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } + + if (const auto *Attr = FD->getAttr<AMDGPUNumWorkGroupsAttr>()) { + uint32_t NumWG = Attr->getNumWorkGroups(); + + if (NumWG != 0) + F->addFnAttr("amdgpu-num-work-groups", llvm::utostr(NumWG)); + } } /// Emits control constants used to change per-architecture behaviour in the diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 5b29b05dee54b3..3737dd256aff02 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8051,6 +8051,16 @@ static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, const ParsedAttr &AL) { D->addAttr(::new (S.Context) AMDGPUNumVGPRAttr(S.Context, AL, NumVGPR)); } +static void handleAMDGPUNumWorkGroupsAttr(Sema &S, Decl *D, + const ParsedAttr &AL) { + uint32_t NumWG = 0; + Expr *NumWGExpr = AL.getArgAsExpr(0); + if (!checkUInt32Argument(S, AL, NumWGExpr, NumWG)) + return; + + D->addAttr(::new (S.Context) AMDGPUNumWorkGroupsAttr(S.Context, AL, NumWG)); +} + static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // If we try to apply it to a function pointer, don't warn, but don't @@ -9058,6 +9068,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_AMDGPUNumVGPR: handleAMDGPUNumVGPRAttr(S, D, AL); break; + case ParsedAttr::AT_AMDGPUNumWorkGroups: + handleAMDGPUNumWorkGroupsAttr(S, D, AL); + break; case ParsedAttr::AT_AVRSignal: handleAVRSignalAttr(S, D, AL); break; diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index bdfda430eea86c..d42bb52cc8bcfa 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -6,6 +6,7 @@ // CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function) +// CHECK-NEXT: AMDGPUNumWorkGroups (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUWavesPerEU (SubjectMatchRule_function) // CHECK-NEXT: AVRSignal (SubjectMatchRule_function) // CHECK-NEXT: AbiTag (SubjectMatchRule_record_not_is_union, SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_namespace) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index b51a876750b58b..b9ede45e174a7d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -494,6 +494,10 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, Kern[".max_flat_workgroup_size"] = Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); + unsigned NumWG = MFI.getNumWorkGroups(); + if (NumWG != 0) { + Kern[".num_work_groups"] = Kern.getDocument()->getNode(NumWG); + } Kern[".sgpr_spill_count"] = Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); Kern[".vgpr_spill_count"] = diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index f19c5766856408..d7f5c456706ecd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -1108,3 +1108,9 @@ void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) { unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() { return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs; } + +unsigned AMDGPUSubtarget::getNumWorkGroups(const Function &F) const { + const unsigned Default = 0; + return AMDGPU::getUnsignedIntegerAttribute(F, "amdgpu-num-work-groups", Default); +} + diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index b72697973be7a1..b791399c38dff8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -288,6 +288,9 @@ class AMDGPUSubtarget { /// 2) dimension. unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; + /// Return the number of work groups for the function. + unsigned getNumWorkGroups(const Function &F) const; + /// Return true if only a single workitem can be active in a wave. bool isSingleLaneExecution(const Function &Kernel) const; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 48c341917ddec7..2f483e18544a78 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -46,6 +46,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); + NumWorkGroups = ST.getNumWorkGroups(F); Occupancy = ST.computeOccupancy(F, getLDSSize()); CallingConv::ID CC = F.getCallingConv(); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 7ff50c80081d30..fc244552f40da8 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -426,6 +426,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV; + // Default/requested number of work groups for the function. + unsigned NumWorkGroups = 0; + private: unsigned NumUserSGPRs = 0; unsigned NumSystemSGPRs = 0; @@ -1094,6 +1097,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, // \returns true if a function needs or may need AGPRs. bool usesAGPRs(const MachineFunction &MF) const; + + /// \returns Default/requested number of work groups for this function. + unsigned getNumWorkGroups() const { + return NumWorkGroups; + } + }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 4edd7960bd8c40..82e3bca7ab73b1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1221,6 +1221,21 @@ getIntegerPairAttribute(const Function &F, StringRef Name, return Ints; } +unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name, unsigned Default) { + Attribute A = F.getFnAttribute(Name); + if (!A.isStringAttribute()) + return Default; + + LLVMContext &Ctx = F.getContext(); + unsigned IntVal = Default; + StringRef Str = A.getValueAsString(); + if (Str.trim().getAsInteger(0, IntVal)) { + Ctx.emitError("can't parse integer attribute " + Name); + return Default; + } + return IntVal; +} + unsigned getVmcntBitMask(const IsaVersion &Version) { return (1 << (getVmcntBitWidthLo(Version.Major) + getVmcntBitWidthHi(Version.Major))) - diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 3c9f330cbcded9..c54c1638fa97a1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -818,6 +818,14 @@ bool shouldEmitConstantsToTextSection(const Triple &TT); /// to integer. int getIntegerAttribute(const Function &F, StringRef Name, int Default); +/// \returns Unsigned Integer value requested using \p F's \p Name attribute. +/// +/// \returns \p Default if attribute is not present. +/// +/// \returns \p Default and emits error if requested value cannot be converted +/// to integer. +unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name, unsigned Default); + /// \returns A pair of integer values requested using \p F's \p Name attribute /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired /// is false). diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll new file mode 100644 index 00000000000000..315cd7dc0c0d91 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll @@ -0,0 +1,82 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +; Attribute not specified. +; CHECK-LABEL: {{^}}empty_no_attribute: +define amdgpu_kernel void @empty_no_attribute() { +entry: + ret void +} + +; Ignore if number of work groups is 0. +; CHECK-LABEL: {{^}}empty_num_work_groups_0: +define amdgpu_kernel void @empty_num_work_groups_0() #0 { +entry: + ret void +} +attributes #0 = {"amdgpu-num-work-groups"="0"} + +; Exactly 1 work group. +; CHECK-LABEL: {{^}}empty_num_work_groups_1: +define amdgpu_kernel void @empty_num_work_groups_1() #1 { +entry: + ret void +} +attributes #1 = {"amdgpu-num-work-groups"="1"} + +; Exactly 5 work groups. +; CHECK-LABEL: {{^}}empty_num_work_groups_5: +define amdgpu_kernel void @empty_num_work_groups_5() #2 { +entry: + ret void +} +attributes #2 = {"amdgpu-num-work-groups"="5"} + +; Exactly 32 work groups. +; CHECK-LABEL: {{^}}empty_num_work_groups_32: +define amdgpu_kernel void @empty_num_work_groups_32() #3 { +entry: + ret void +} +attributes #3 = {"amdgpu-num-work-groups"="32"} + +; Exactly 50 work groups. +; CHECK-LABEL: {{^}}empty_num_work_groups_50: +define amdgpu_kernel void @empty_num_work_groups_50() #4 { +entry: + ret void +} +attributes #4 = {"amdgpu-num-work-groups"="50"} + +; Exactly 256 work groups. +; CHECK-LABEL: {{^}}empty_num_work_groups_256: +define amdgpu_kernel void @empty_num_work_groups_256() #5 { +entry: + ret void +} +attributes #5 = {"amdgpu-num-work-groups"="256"} + +; Exactly 1024 work groups. +; CHECK-LABEL: {{^}}empty_num_work_groups_1024: +define amdgpu_kernel void @empty_num_work_groups_1024() #6 { +entry: + ret void +} +attributes #6 = {"amdgpu-num-work-groups"="1024"} + +; CHECK: .amdgpu_metadata +; CHECK: .name: empty_no_attribute +; CHECK-NEXT: .private_segment_fixed_size: 0 +; CHECK: .name: empty_num_work_groups_0 +; CHECK-NEXT: .private_segment_fixed_size: 0 +; CHECK: .name: empty_num_work_groups_1 +; CHECK-NEXT: .num_work_groups: 1 +; CHECK: .name: empty_num_work_groups_5 +; CHECK-NEXT: .num_work_groups: 5 +; CHECK: .name: empty_num_work_groups_32 +; CHECK-NEXT: .num_work_groups: 32 +; CHECK: .name: empty_num_work_groups_50 +; CHECK-NEXT: .num_work_groups: 50 +; CHECK: .name: empty_num_work_groups_256 +; CHECK-NEXT: .num_work_groups: 256 +; CHECK: .name: empty_num_work_groups_1024 +; CHECK-NEXT: .num_work_groups: 1024 >From b5bceb99a4ab70509cf17ea223494bd718b5f62f Mon Sep 17 00:00:00 2001 From: Jun Wang <jun.wa...@amd.com> Date: Fri, 15 Dec 2023 14:13:20 -0600 Subject: [PATCH 2/2] Fix formatting. --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 4 ++-- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 5 +---- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 3 ++- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 ++- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index d7f5c456706ecd..d69a78d3664bcd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -1111,6 +1111,6 @@ unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() { unsigned AMDGPUSubtarget::getNumWorkGroups(const Function &F) const { const unsigned Default = 0; - return AMDGPU::getUnsignedIntegerAttribute(F, "amdgpu-num-work-groups", Default); + return AMDGPU::getUnsignedIntegerAttribute(F, "amdgpu-num-work-groups", + Default); } - diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index fc244552f40da8..1ab6d2bca902ae 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -1099,10 +1099,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, bool usesAGPRs(const MachineFunction &MF) const; /// \returns Default/requested number of work groups for this function. - unsigned getNumWorkGroups() const { - return NumWorkGroups; - } - + unsigned getNumWorkGroups() const { return NumWorkGroups; } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 82e3bca7ab73b1..1a763120e04bf1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1221,7 +1221,8 @@ getIntegerPairAttribute(const Function &F, StringRef Name, return Ints; } -unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name, unsigned Default) { +unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name, + unsigned Default) { Attribute A = F.getFnAttribute(Name); if (!A.isStringAttribute()) return Default; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index c54c1638fa97a1..f395384a2e0899 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -824,7 +824,8 @@ int getIntegerAttribute(const Function &F, StringRef Name, int Default); /// /// \returns \p Default and emits error if requested value cannot be converted /// to integer. -unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name, unsigned Default); +unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name, + unsigned Default); /// \returns A pair of integer values requested using \p F's \p Name attribute /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits