vangthao updated this revision to Diff 424344.
vangthao added a comment.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Move remarks into its own function. Skip if !ORE. Add clang frontend test. 
Remove LDSSpillSize.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123878/new/

https://reviews.llvm.org/D123878

Files:
  clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
  llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
  llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
  llvm/lib/Target/AMDGPU/SIProgramInfo.h
  llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll

Index: llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -0,0 +1,169 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s
+; RUN: FileCheck -check-prefix=REMARK %s < %t
+
+; STDERR: remark: foo.cl:27:0: Kernel Name: test_kernel
+; STDERR-NEXT: remark: foo.cl:27:0: SGPRs: 24
+; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9
+; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43
+; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/thread]: 0
+; STDERR-NEXT: remark: foo.cl:27:0: Occupancy [waves/SIMD]: 5
+; STDERR-NEXT: remark: foo.cl:27:0: SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:27:0: VGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:27:0: LDS Size [bytes/block]: 512
+; STDERR-NEXT: remark: foo.cl:27:0: ------------------------------
+
+; REMARK-LABEL: --- !Analysis
+; REMARK: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            KernelName
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          'Kernel Name: '
+; REMARK-NEXT:   - KernelName:      test_kernel
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            NumSGPR
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          'SGPRs: '
+; REMARK-NEXT:   - NumSGPR:         '24'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            NumVGPR
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          'VGPRs: '
+; REMARK-NEXT:   - NumVGPR:         '9'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            NumAGPR
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          'AGPRs: '
+; REMARK-NEXT:   - NumAGPR:         '43'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            ScratchSize
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          'ScratchSize [bytes/thread]: '
+; REMARK-NEXT:   - ScratchSize:     '0'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            Occupancy
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          'Occupancy [waves/SIMD]: '
+; REMARK-NEXT:   - Occupancy:       '5'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            SGPRSpill
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          'SGPRs Spill: '
+; REMARK-NEXT:   - SGPRSpill:       '0'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            VGPRSpill
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          'VGPRs Spill: '
+; REMARK-NEXT:   - VGPRSpill:       '0'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            BytesLDS
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          'LDS Size [bytes/block]: '
+; REMARK-NEXT:   - BytesLDS:        '512'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            KernelEnd
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '------------------------------'
+; REMARK-NEXT: ...
+@lds = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
+
+define amdgpu_kernel void @test_kernel() !dbg !3 {
+  call void asm sideeffect "; clobber v8", "~{v8}"()
+  call void asm sideeffect "; clobber s23", "~{s23}"()
+  call void asm sideeffect "; clobber a42", "~{a42}"()
+  call void asm sideeffect "; use $0", "v"([128 x i32] addrspace(3)* @lds)
+  ret void
+}
+
+; STDERR: remark: foo.cl:42:0: Kernel Name: test_func
+; STDERR-NEXT: remark: foo.cl:42:0: SGPRs: 0
+; STDERR-NEXT: remark: foo.cl:42:0: VGPRs: 0
+; STDERR-NEXT: remark: foo.cl:42:0: AGPRs: 0
+; STDERR-NEXT: remark: foo.cl:42:0: ScratchSize [bytes/thread]: 0
+; STDERR-NEXT: remark: foo.cl:42:0: Occupancy [waves/SIMD]: 0
+; STDERR-NEXT: remark: foo.cl:42:0: SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:42:0: VGPRs Spill: 0
+; STDERR-NOT: LDS Size
+; STDERR-NEXT: remark: foo.cl:42:0: ------------------------------
+define void @test_func() !dbg !6 {
+  call void asm sideeffect "; clobber v17", "~{v17}"()
+  call void asm sideeffect "; clobber s11", "~{s11}"()
+  call void asm sideeffect "; clobber a9", "~{a9}"()
+  ret void
+}
+
+; STDERR: remark: foo.cl:8:0: Kernel Name: empty_kernel
+; STDERR-NEXT: remark: foo.cl:8:0: SGPRs: 0
+; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0
+; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0
+; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/thread]: 0
+; STDERR-NEXT: remark: foo.cl:8:0: Occupancy [waves/SIMD]: 10
+; STDERR-NEXT: remark: foo.cl:8:0: SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:8:0: VGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:8:0: LDS Size [bytes/block]: 0
+; STDERR-NEXT: remark: foo.cl:8:0: ------------------------------
+define amdgpu_kernel void @empty_kernel() !dbg !7 {
+  ret void
+}
+
+; STDERR: remark: foo.cl:52:0: Kernel Name: empty_func
+; STDERR-NEXT: remark: foo.cl:52:0: SGPRs: 0
+; STDERR-NEXT: remark: foo.cl:52:0: VGPRs: 0
+; STDERR-NEXT: remark: foo.cl:52:0: AGPRs: 0
+; STDERR-NEXT: remark: foo.cl:52:0: ScratchSize [bytes/thread]: 0
+; STDERR-NEXT: remark: foo.cl:52:0: Occupancy [waves/SIMD]: 0
+; STDERR-NEXT: remark: foo.cl:52:0: SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:52:0: VGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:52:0: ------------------------------
+define void @empty_func() !dbg !8 {
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "foo.cl", directory: "/tmp")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "test_kernel", scope: !1, file: !1, type: !4, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!4 = !DISubroutineType(types: !5)
+!5 = !{null}
+!6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
Index: llvm/lib/Target/AMDGPU/SIProgramInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -49,6 +49,8 @@
     uint32_t AccumOffset = 0;
     uint32_t TgSplit = 0;
     uint32_t NumSGPR = 0;
+    unsigned SGPRSpill = 0;
+    unsigned VGPRSpill = 0;
     uint32_t LDSSize = 0;
     bool FlatUsed = false;
 
Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -69,6 +69,9 @@
                                   uint64_t ScratchSize,
                                   uint64_t CodeSize,
                                   const AMDGPUMachineFunction* MFI);
+  void emitResourceUsageRemarks(const MachineFunction &MF,
+                                const SIProgramInfo &CurrentProgramInfo,
+                                bool isModuleEntryFunction, bool hasMAIInsts);
 
   uint16_t getAmdhsaKernelCodeProperties(
       const MachineFunction &MF) const;
Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -27,8 +27,10 @@
 #include "SIMachineFunctionInfo.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -506,6 +508,9 @@
 
   emitFunctionBody();
 
+  emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
+                           STM.hasMAIInsts());
+
   if (isVerbose()) {
     MCSectionELF *CommentSection =
         Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
@@ -876,7 +881,10 @@
   }
 
   unsigned LDSSpillSize =
-    MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize();
+      MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize();
+
+  ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();
+  ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();
 
   ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
   ProgInfo.LDSBlocks =
@@ -1167,3 +1175,92 @@
   AU.addPreserved<AMDGPUResourceUsageAnalysis>();
   AsmPrinter::getAnalysisUsage(AU);
 }
+
+void AMDGPUAsmPrinter::emitResourceUsageRemarks(
+    const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,
+    bool isModuleEntryFunction, bool hasMAIInsts) {
+  if (!ORE)
+    return;
+
+  const char *Name = "kernel-resource-usage";
+
+  ORE->emit([&]() {
+    return MachineOptimizationRemarkAnalysis(Name, "KernelName",
+                                             MF.getFunction().getSubprogram(),
+                                             &MF.front())
+           << "Kernel Name: "
+           << ore::NV("KernelName", MF.getFunction().getName());
+  });
+
+  ORE->emit([&]() {
+    return MachineOptimizationRemarkAnalysis(Name, "NumSGPR",
+                                             MF.getFunction().getSubprogram(),
+                                             &MF.front())
+           << "SGPRs: " << ore::NV("NumSGPR", CurrentProgramInfo.NumSGPR);
+  });
+
+  ORE->emit([&]() {
+    return MachineOptimizationRemarkAnalysis(Name, "NumVGPR",
+                                             MF.getFunction().getSubprogram(),
+                                             &MF.front())
+           << "VGPRs: " << ore::NV("NumVGPR", CurrentProgramInfo.NumArchVGPR);
+  });
+
+  if (hasMAIInsts) {
+    ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(Name, "NumAGPR",
+                                               MF.getFunction().getSubprogram(),
+                                               &MF.front())
+             << "AGPRs: " << ore::NV("NumAGPR", CurrentProgramInfo.NumAccVGPR);
+    });
+  }
+
+  ORE->emit([&]() {
+    return MachineOptimizationRemarkAnalysis(Name, "ScratchSize",
+                                             MF.getFunction().getSubprogram(),
+                                             &MF.front())
+           << "ScratchSize [bytes/thread]: "
+           << ore::NV("ScratchSize", CurrentProgramInfo.ScratchSize);
+  });
+
+  ORE->emit([&]() {
+    return MachineOptimizationRemarkAnalysis(Name, "Occupancy",
+                                             MF.getFunction().getSubprogram(),
+                                             &MF.front())
+           << "Occupancy [waves/SIMD]: "
+           << ore::NV("Occupancy", CurrentProgramInfo.Occupancy);
+  });
+
+  ORE->emit([&]() {
+    return MachineOptimizationRemarkAnalysis(Name, "SGPRSpill",
+                                             MF.getFunction().getSubprogram(),
+                                             &MF.front())
+           << "SGPRs Spill: "
+           << ore::NV("SGPRSpill", CurrentProgramInfo.SGPRSpill);
+  });
+
+  ORE->emit([&]() {
+    return MachineOptimizationRemarkAnalysis(Name, "VGPRSpill",
+                                             MF.getFunction().getSubprogram(),
+                                             &MF.front())
+           << "VGPRs Spill: "
+           << ore::NV("VGPRSpill", CurrentProgramInfo.VGPRSpill);
+  });
+
+  if (isModuleEntryFunction) {
+    ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(Name, "BytesLDS",
+                                               MF.getFunction().getSubprogram(),
+                                               &MF.front())
+             << "LDS Size [bytes/block]: "
+             << ore::NV("BytesLDS", CurrentProgramInfo.LDSSize);
+    });
+  }
+
+  ORE->emit([&]() {
+    return MachineOptimizationRemarkAnalysis(Name, "KernelEnd",
+                                             MF.getFunction().getSubprogram(),
+                                             &MF.front())
+           << "------------------------------";
+  });
+}
Index: clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
===================================================================
--- /dev/null
+++ clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
@@ -0,0 +1,18 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
+
+// expected-remark@+10 {{Kernel Name: foo}}
+// expected-remark@+9 {{SGPRs: 9}}
+// expected-remark@+8 {{VGPRs: 10}}
+// expected-remark@+7 {{AGPRs: 12}}
+// expected-remark@+6 {{ScratchSize [bytes/thread]: 0}}
+// expected-remark@+5 {{Occupancy [waves/SIMD]: 10}}
+// expected-remark@+4 {{SGPRs Spill: 0}}
+// expected-remark@+3 {{VGPRs Spill: 0}}
+// expected-remark@+2 {{LDS Size [bytes/block]: 0}}
+// expected-remark@+1 {{------------------------------}}
+__kernel void foo() {
+  __asm volatile ("; clobber s8" :::"s8");
+  __asm volatile ("; clobber v9" :::"v9");
+  __asm volatile ("; clobber a11" :::"a11");
+}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to