Author: Corbin Robeck Date: 2023-07-25T12:20:13-07:00 New Revision: 7a4968b5a378d1f06e638c99d0e983c35045fb34
URL: https://github.com/llvm/llvm-project/commit/7a4968b5a378d1f06e638c99d0e983c35045fb34 DIFF: https://github.com/llvm/llvm-project/commit/7a4968b5a378d1f06e638c99d0e983c35045fb34.diff LOG: [AMDGPU] Add dynamic stack bit info to kernel-resource-usage Rpass output In code object 5 (https://llvm.org/docs/AMDGPUUsage.html#code-object-v5-metadata) the AMDGPU backend added the .uses_dynamic_stack bit to the kernel meta data to identity kernels which have compile time indeterminable stack usage (indirect function calls and recursion mainly). This patch adds this information to the output of the kernel-resource-usage remarks. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D156040 Author: Corbin Robeck <corbin.rob...@amd.com> Added: Modified: clang/test/Frontend/amdgcn-machine-analysis-remarks.cl llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll Removed: ################################################################################ diff --git a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl index 9403d12afa05a7..a05e21b37b9127 100644 --- a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl +++ b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl @@ -1,11 +1,12 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null -// expected-remark@+9 {{Function Name: foo}} -// expected-remark@+8 {{ SGPRs: 13}} -// expected-remark@+7 {{ VGPRs: 10}} -// expected-remark@+6 {{ AGPRs: 12}} -// expected-remark@+5 {{ ScratchSize [bytes/lane]: 0}} +// expected-remark@+10 {{Function Name: foo}} +// expected-remark@+9 {{ SGPRs: 13}} +// expected-remark@+8 {{ VGPRs: 10}} +// expected-remark@+7 {{ AGPRs: 12}} +// expected-remark@+6 {{ ScratchSize [bytes/lane]: 0}} +// expected-remark@+5 {{ Dynamic Stack: False}} // expected-remark@+4 {{ Occupancy [waves/SIMD]: 10}} // expected-remark@+3 {{ SGPRs Spill: 0}} // expected-remark@+2 {{ VGPRs Spill: 0}} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 7cd8e53e65215f..4b9c699879e349 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1293,6 +1293,9 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks( EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR); EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]", CurrentProgramInfo.ScratchSize); + StringRef DynamicStackStr = + CurrentProgramInfo.DynamicCallStack ? "True" : "False"; + EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr); EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]", CurrentProgramInfo.Occupancy); EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill", diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll index 2616b043324191..7252aa6120cab4 100644 --- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll +++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s ; RUN: FileCheck -check-prefix=REMARK %s < %t ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel @@ -6,6 +6,7 @@ ; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9 ; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43 ; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/lane]: 0 +; STDERR-NEXT: remark: foo.cl:27:0: Dynamic Stack: False ; STDERR-NEXT: remark: foo.cl:27:0: Occupancy [waves/SIMD]: 5 ; STDERR-NEXT: remark: foo.cl:27:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:27:0: VGPRs Spill: 0 @@ -55,7 +56,16 @@ ; REMARK-NEXT: Args: ; REMARK-NEXT: - String: ' ScratchSize [bytes/lane]: ' ; REMARK-NEXT: - ScratchSize: '0' -; REMARK-NEXT: ... +; REMARK-NEXT: .. +; REMARK-NEXT: --- !Analysis +; REMARK-NEXT: Pass: kernel-resource-usage +; REMARK-NEXT: Name: DynamicStack +; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } +; REMARK-NEXT: Function: test_kernel +; REMARK-NEXT: Args: +; REMARK-NEXT: - String: ' Dynamic Stack: +; REMARK-NEXT: - DynamicStack: 'False' +; REMARK-NEXT: .. ; REMARK-NEXT: --- !Analysis ; REMARK-NEXT: Pass: kernel-resource-usage ; REMARK-NEXT: Name: Occupancy @@ -108,6 +118,7 @@ define amdgpu_kernel void @test_kernel() !dbg !3 { ; STDERR-NEXT: remark: foo.cl:42:0: VGPRs: 0 ; STDERR-NEXT: remark: foo.cl:42:0: AGPRs: 0 ; STDERR-NEXT: remark: foo.cl:42:0: ScratchSize [bytes/lane]: 0 +; STDERR-NEXT: remark: foo.cl:42:0: Dynamic Stack: False ; STDERR-NEXT: remark: foo.cl:42:0: Occupancy [waves/SIMD]: 0 ; STDERR-NEXT: remark: foo.cl:42:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:42:0: VGPRs Spill: 0 @@ -124,6 +135,7 @@ define void @test_func() !dbg !6 { ; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0 ; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0 ; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/lane]: 0 +; STDERR-NEXT: remark: foo.cl:8:0: Dynamic Stack: False ; STDERR-NEXT: remark: foo.cl:8:0: Occupancy [waves/SIMD]: 8 ; STDERR-NEXT: remark: foo.cl:8:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:8:0: VGPRs Spill: 0 @@ -137,6 +149,7 @@ define amdgpu_kernel void @empty_kernel() !dbg !7 { ; STDERR-NEXT: remark: foo.cl:52:0: VGPRs: 0 ; STDERR-NEXT: remark: foo.cl:52:0: AGPRs: 0 ; STDERR-NEXT: remark: foo.cl:52:0: ScratchSize [bytes/lane]: 0 +; STDERR-NEXT: remark: foo.cl:52:0: Dynamic Stack: False ; STDERR-NEXT: remark: foo.cl:52:0: Occupancy [waves/SIMD]: 0 ; STDERR-NEXT: remark: foo.cl:52:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:52:0: VGPRs Spill: 0 @@ -144,8 +157,48 @@ define void @empty_func() !dbg !8 { ret void } +; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call +; STDERR-NEXT: remark: foo.cl:64:0: SGPRs: 39 +; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: 32 +; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: 10 +; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0 +; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True +; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8 +; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0 +@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 + +define amdgpu_kernel void @test_indirect_call() !dbg !9 { + %fptr = load ptr, ptr addrspace(4) @gv.fptr0 + call void %fptr() + ret void +} + +; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack +; STDERR-NEXT: remark: foo.cl:74:0: SGPRs: 39 +; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: 32 +; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: 10 +; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 64 +; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True +; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8 +; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0 +; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0 + +declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg) + +define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 { + %alloca = alloca <10 x i64>, align 16, addrspace(5) + call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 40, i1 false) + %fptr = load ptr, ptr addrspace(4) @gv.fptr0 + call void %fptr() + ret void +} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} +!llvm.module.flags = !{!11} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) !1 = !DIFile(filename: "foo.cl", directory: "/tmp") @@ -156,3 +209,6 @@ define void @empty_func() !dbg !8 { !6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) !7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) !8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!9 = distinct !DISubprogram(name: "test_indirect_call", scope: !1, file: !1, type: !4, scopeLine: 64, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!10 = distinct !DISubprogram(name: "test_indirect_w_static_stack", scope: !1, file: !1, type: !4, scopeLine: 74, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!11 = !{i32 1, !"amdgpu_code_object_version", i32 500} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits