Author: Corbin Robeck
Date: 2023-07-25T12:20:13-07:00
New Revision: 7a4968b5a378d1f06e638c99d0e983c35045fb34

URL: 
https://github.com/llvm/llvm-project/commit/7a4968b5a378d1f06e638c99d0e983c35045fb34
DIFF: 
https://github.com/llvm/llvm-project/commit/7a4968b5a378d1f06e638c99d0e983c35045fb34.diff

LOG: [AMDGPU] Add dynamic stack bit info to kernel-resource-usage Rpass output

In code object 5 
(https://llvm.org/docs/AMDGPUUsage.html#code-object-v5-metadata) the AMDGPU 
backend added the .uses_dynamic_stack bit to the kernel meta data to identity 
kernels which have compile time indeterminable stack usage (indirect function 
calls and recursion mainly). This patch adds this information to the output of 
the kernel-resource-usage remarks.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D156040

Author:    Corbin Robeck <corbin.rob...@amd.com>

Added: 
    

Modified: 
    clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll

Removed: 
    


################################################################################
diff  --git a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl 
b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
index 9403d12afa05a7..a05e21b37b9127 100644
--- a/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
+++ b/clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
@@ -1,11 +1,12 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 
-Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
 
-// expected-remark@+9 {{Function Name: foo}}
-// expected-remark@+8 {{    SGPRs: 13}}
-// expected-remark@+7 {{    VGPRs: 10}}
-// expected-remark@+6 {{    AGPRs: 12}}
-// expected-remark@+5 {{    ScratchSize [bytes/lane]: 0}}
+// expected-remark@+10 {{Function Name: foo}}
+// expected-remark@+9 {{    SGPRs: 13}}
+// expected-remark@+8 {{    VGPRs: 10}}
+// expected-remark@+7 {{    AGPRs: 12}}
+// expected-remark@+6 {{    ScratchSize [bytes/lane]: 0}}
+// expected-remark@+5 {{    Dynamic Stack: False}}
 // expected-remark@+4 {{    Occupancy [waves/SIMD]: 10}}
 // expected-remark@+3 {{    SGPRs Spill: 0}}
 // expected-remark@+2 {{    VGPRs Spill: 0}}

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 7cd8e53e65215f..4b9c699879e349 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1293,6 +1293,9 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks(
     EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
   EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
                           CurrentProgramInfo.ScratchSize);
+  StringRef DynamicStackStr =
+      CurrentProgramInfo.DynamicCallStack ? "True" : "False";
+  EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);
   EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
                           CurrentProgramInfo.Occupancy);
   EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",

diff  --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll 
b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
index 2616b043324191..7252aa6120cab4 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t 
-pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 
| FileCheck -check-prefix=STDERR %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t 
-pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck 
-check-prefix=STDERR %s
 ; RUN: FileCheck -check-prefix=REMARK %s < %t
 
 ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
@@ -6,6 +6,7 @@
 ; STDERR-NEXT: remark: foo.cl:27:0:     VGPRs: 9
 ; STDERR-NEXT: remark: foo.cl:27:0:     AGPRs: 43
 ; STDERR-NEXT: remark: foo.cl:27:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:27:0:     Dynamic Stack: False
 ; STDERR-NEXT: remark: foo.cl:27:0:     Occupancy [waves/SIMD]: 5
 ; STDERR-NEXT: remark: foo.cl:27:0:     SGPRs Spill: 0
 ; STDERR-NEXT: remark: foo.cl:27:0:     VGPRs Spill: 0
@@ -55,7 +56,16 @@
 ; REMARK-NEXT: Args:
 ; REMARK-NEXT:   - String:          '    ScratchSize [bytes/lane]: '
 ; REMARK-NEXT:   - ScratchSize:     '0'
-; REMARK-NEXT: ...
+; REMARK-NEXT: ..
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            DynamicStack
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String: ' Dynamic Stack: 
+; REMARK-NEXT:   - DynamicStack: 'False' 
+; REMARK-NEXT: ..
 ; REMARK-NEXT: --- !Analysis
 ; REMARK-NEXT: Pass:            kernel-resource-usage
 ; REMARK-NEXT: Name:            Occupancy
@@ -108,6 +118,7 @@ define amdgpu_kernel void @test_kernel() !dbg !3 {
 ; STDERR-NEXT: remark: foo.cl:42:0:     VGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:42:0:     AGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:42:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     Dynamic Stack: False
 ; STDERR-NEXT: remark: foo.cl:42:0:     Occupancy [waves/SIMD]: 0
 ; STDERR-NEXT: remark: foo.cl:42:0:     SGPRs Spill: 0
 ; STDERR-NEXT: remark: foo.cl:42:0:     VGPRs Spill: 0
@@ -124,6 +135,7 @@ define void @test_func() !dbg !6 {
 ; STDERR-NEXT: remark: foo.cl:8:0:     VGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:8:0:     AGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:8:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     Dynamic Stack: False
 ; STDERR-NEXT: remark: foo.cl:8:0:     Occupancy [waves/SIMD]: 8
 ; STDERR-NEXT: remark: foo.cl:8:0:     SGPRs Spill: 0
 ; STDERR-NEXT: remark: foo.cl:8:0:     VGPRs Spill: 0
@@ -137,6 +149,7 @@ define amdgpu_kernel void @empty_kernel() !dbg !7 {
 ; STDERR-NEXT: remark: foo.cl:52:0:     VGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:52:0:     AGPRs: 0
 ; STDERR-NEXT: remark: foo.cl:52:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     Dynamic Stack: False
 ; STDERR-NEXT: remark: foo.cl:52:0:     Occupancy [waves/SIMD]: 0
 ; STDERR-NEXT: remark: foo.cl:52:0:     SGPRs Spill: 0
 ; STDERR-NEXT: remark: foo.cl:52:0:     VGPRs Spill: 0
@@ -144,8 +157,48 @@ define void @empty_func() !dbg !8 {
   ret void
 }
 
+; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
+; STDERR-NEXT: remark: foo.cl:64:0:     SGPRs: 39
+; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs: 32
+; STDERR-NEXT: remark: foo.cl:64:0:     AGPRs: 10
+; STDERR-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:64:0:     Dynamic Stack: True
+; STDERR-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: 8
+; STDERR-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0
+@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
+
+define amdgpu_kernel void @test_indirect_call() !dbg !9 {
+  %fptr = load ptr, ptr addrspace(4) @gv.fptr0
+  call void %fptr()
+  ret void
+}
+
+; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
+; STDERR-NEXT: remark: foo.cl:74:0:     SGPRs: 39
+; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs: 32
+; STDERR-NEXT: remark: foo.cl:74:0:     AGPRs: 10
+; STDERR-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 64
+; STDERR-NEXT: remark: foo.cl:74:0:     Dynamic Stack: True
+; STDERR-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: 8
+; STDERR-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
+
+declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, 
i1 immarg)
+ 
+define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 {
+  %alloca = alloca <10 x i64>, align 16, addrspace(5)
+  call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 40, i1 
false)
+  %fptr = load ptr, ptr addrspace(4) @gv.fptr0
+  call void %fptr()
+  ret void
+}
+
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2}
+!llvm.module.flags = !{!11}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: 
true, runtimeVersion: 0, emissionKind: FullDebug)
 !1 = !DIFile(filename: "foo.cl", directory: "/tmp")
@@ -156,3 +209,6 @@ define void @empty_func() !dbg !8 {
 !6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, 
scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
 !7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: 
!4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: 
!0)
 !8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, 
scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!9 = distinct !DISubprogram(name: "test_indirect_call", scope: !1, file: !1, 
type: !4, scopeLine: 64, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, 
unit: !0)
+!10 = distinct !DISubprogram(name: "test_indirect_w_static_stack", scope: !1, 
file: !1, type: !4, scopeLine: 74, flags: DIFlagPrototyped, spFlags: 
DISPFlagDefinition, unit: !0)
+!11 = !{i32 1, !"amdgpu_code_object_version", i32 500}


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to