vangthao updated this revision to Diff 440447.
vangthao added a comment.

Remove "--------" delimiter. Change ScratchSize [bytes/thread] to ScratchSize 
[bytes/lane]. Use lambda expression to emit remarks. Do not output yaml if 
specific remark is not enabled. Add indentation to make it easier to tell which 
resource usage remark belong to which kernel.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123878/new/

https://reviews.llvm.org/D123878

Files:
  clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
  llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
  llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
  llvm/lib/Target/AMDGPU/SIProgramInfo.h
  llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll

Index: llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -0,0 +1,158 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s
+; RUN: FileCheck -check-prefix=REMARK %s < %t
+
+; STDERR: remark: foo.cl:27:0: Kernel Name: test_kernel
+; STDERR-NEXT: remark: foo.cl:27:0:     SGPRs: 24
+; STDERR-NEXT: remark: foo.cl:27:0:     VGPRs: 9
+; STDERR-NEXT: remark: foo.cl:27:0:     AGPRs: 43
+; STDERR-NEXT: remark: foo.cl:27:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:27:0:     Occupancy [waves/SIMD]: 5
+; STDERR-NEXT: remark: foo.cl:27:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:27:0:     VGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:27:0:     LDS Size [bytes/block]: 512
+
+; REMARK-LABEL: --- !Analysis
+; REMARK: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            KernelName
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          'Kernel Name: '
+; REMARK-NEXT:   - KernelName:      test_kernel
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            NumSGPR
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    SGPRs: '
+; REMARK-NEXT:   - NumSGPR:         '24'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            NumVGPR
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    VGPRs: '
+; REMARK-NEXT:   - NumVGPR:         '9'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            NumAGPR
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    AGPRs: '
+; REMARK-NEXT:   - NumAGPR:         '43'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            ScratchSize
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    ScratchSize [bytes/lane]: '
+; REMARK-NEXT:   - ScratchSize:     '0'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            Occupancy
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    Occupancy [waves/SIMD]: '
+; REMARK-NEXT:   - Occupancy:       '5'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            SGPRSpill
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    SGPRs Spill: '
+; REMARK-NEXT:   - SGPRSpill:       '0'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            VGPRSpill
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    VGPRs Spill: '
+; REMARK-NEXT:   - VGPRSpill:       '0'
+; REMARK-NEXT: ...
+; REMARK-NEXT: --- !Analysis
+; REMARK-NEXT: Pass:            kernel-resource-usage
+; REMARK-NEXT: Name:            BytesLDS
+; REMARK-NEXT: DebugLoc:        { File: foo.cl, Line: 27, Column: 0 }
+; REMARK-NEXT: Function:        test_kernel
+; REMARK-NEXT: Args:
+; REMARK-NEXT:   - String:          '    LDS Size [bytes/block]: '
+; REMARK-NEXT:   - BytesLDS:        '512'
+; REMARK-NEXT: ...
+
+@lds = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
+
+define amdgpu_kernel void @test_kernel() !dbg !3 {
+  call void asm sideeffect "; clobber v8", "~{v8}"()
+  call void asm sideeffect "; clobber s23", "~{s23}"()
+  call void asm sideeffect "; clobber a42", "~{a42}"()
+  call void asm sideeffect "; use $0", "v"([128 x i32] addrspace(3)* @lds)
+  ret void
+}
+
+; STDERR: remark: foo.cl:42:0: Kernel Name: test_func
+; STDERR-NEXT: remark: foo.cl:42:0:     SGPRs: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     VGPRs: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     AGPRs: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     Occupancy [waves/SIMD]: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:42:0:     VGPRs Spill: 0
+; STDERR-NOT: LDS Size
+define void @test_func() !dbg !6 {
+  call void asm sideeffect "; clobber v17", "~{v17}"()
+  call void asm sideeffect "; clobber s11", "~{s11}"()
+  call void asm sideeffect "; clobber a9", "~{a9}"()
+  ret void
+}
+
+; STDERR: remark: foo.cl:8:0: Kernel Name: empty_kernel
+; STDERR-NEXT: remark: foo.cl:8:0:     SGPRs: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     VGPRs: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     AGPRs: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     Occupancy [waves/SIMD]: 10
+; STDERR-NEXT: remark: foo.cl:8:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     VGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:8:0:     LDS Size [bytes/block]: 0
+define amdgpu_kernel void @empty_kernel() !dbg !7 {
+  ret void
+}
+
+; STDERR: remark: foo.cl:52:0: Kernel Name: empty_func
+; STDERR-NEXT: remark: foo.cl:52:0:     SGPRs: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     VGPRs: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     AGPRs: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     ScratchSize [bytes/lane]: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     Occupancy [waves/SIMD]: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     SGPRs Spill: 0
+; STDERR-NEXT: remark: foo.cl:52:0:     VGPRs Spill: 0
+define void @empty_func() !dbg !8 {
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "foo.cl", directory: "/tmp")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "test_kernel", scope: !1, file: !1, type: !4, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!4 = !DISubroutineType(types: !5)
+!5 = !{null}
+!6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
Index: llvm/lib/Target/AMDGPU/SIProgramInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -49,6 +49,8 @@
     uint32_t AccumOffset = 0;
     uint32_t TgSplit = 0;
     uint32_t NumSGPR = 0;
+    unsigned SGPRSpill = 0;
+    unsigned VGPRSpill = 0;
     uint32_t LDSSize = 0;
     bool FlatUsed = false;
 
Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -69,6 +69,9 @@
                                   uint64_t ScratchSize,
                                   uint64_t CodeSize,
                                   const AMDGPUMachineFunction* MFI);
+  void emitResourceUsageRemarks(const MachineFunction &MF,
+                                const SIProgramInfo &CurrentProgramInfo,
+                                bool isModuleEntryFunction, bool hasMAIInsts);
 
   uint16_t getAmdhsaKernelCodeProperties(
       const MachineFunction &MF) const;
Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -27,8 +27,10 @@
 #include "SIMachineFunctionInfo.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -506,6 +508,9 @@
 
   emitFunctionBody();
 
+  emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
+                           STM.hasMAIInsts());
+
   if (isVerbose()) {
     MCSectionELF *CommentSection =
         Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
@@ -875,6 +880,9 @@
     LDSAlignShift = 9;
   }
 
+  ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();
+  ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();
+
   ProgInfo.LDSSize = MFI->getLDSSize();
   ProgInfo.LDSBlocks =
       alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
@@ -1174,3 +1182,58 @@
   AU.addPreserved<AMDGPUResourceUsageAnalysis>();
   AsmPrinter::getAnalysisUsage(AU);
 }
+
+void AMDGPUAsmPrinter::emitResourceUsageRemarks(
+    const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,
+    bool isModuleEntryFunction, bool hasMAIInsts) {
+  if (!ORE)
+    return;
+
+  const char *Name = "kernel-resource-usage";
+  const char *Indent = "    ";
+
+  // If the remark is not specifically enabled, do not output to yaml
+  LLVMContext &Ctx = MF.getFunction().getContext();
+  if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
+    return;
+
+  auto EmitResourceUsageRemark = [&](StringRef RemarkName,
+                                     StringRef RemarkLabel, auto &&Argument) {
+    // Add an indent for every line besides the line with the kernel name. This
+    // makes it easier to tell which resource usage go with which kernel since
+    // the kernel name will always be displayed first.
+    std::string LabelStr = RemarkLabel.str() + ": ";
+    if (!RemarkName.equals("KernelName"))
+      LabelStr = Indent + LabelStr;
+
+    ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(Name, RemarkName,
+                                               MF.getFunction().getSubprogram(),
+                                               &MF.front())
+             << LabelStr << ore::NV(RemarkName, Argument);
+    });
+  };
+
+  // FIXME: Formatting here is pretty nasty because clang does not accept
+  // newlines from diagnostics. This forces us to emit multiple diagnostic
+  // remarks to simulate newlines. If and when clang does accept newlines, this
+  // formatting should be aggregated into one remark with newlines to avoid
+  // printing multiple diagnostic location and diag opts.
+  EmitResourceUsageRemark("KernelName", "Kernel Name",
+                          MF.getFunction().getName());
+  EmitResourceUsageRemark("NumSGPR", "SGPRs", CurrentProgramInfo.NumSGPR);
+  EmitResourceUsageRemark("NumVGPR", "VGPRs", CurrentProgramInfo.NumArchVGPR);
+  if (hasMAIInsts)
+    EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
+  EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
+                          CurrentProgramInfo.ScratchSize);
+  EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
+                          CurrentProgramInfo.Occupancy);
+  EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
+                          CurrentProgramInfo.SGPRSpill);
+  EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",
+                          CurrentProgramInfo.VGPRSpill);
+  if (isModuleEntryFunction)
+    EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",
+                            CurrentProgramInfo.LDSSize);
+}
Index: clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
===================================================================
--- /dev/null
+++ clang/test/Frontend/amdgcn-machine-analysis-remarks.cl
@@ -0,0 +1,17 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
+
+// expected-remark@+9 {{Kernel Name: foo}}
+// expected-remark@+8 {{    SGPRs: 9}}
+// expected-remark@+7 {{    VGPRs: 10}}
+// expected-remark@+6 {{    AGPRs: 12}}
+// expected-remark@+5 {{    ScratchSize [bytes/lane]: 0}}
+// expected-remark@+4 {{    Occupancy [waves/SIMD]: 10}}
+// expected-remark@+3 {{    SGPRs Spill: 0}}
+// expected-remark@+2 {{    VGPRs Spill: 0}}
+// expected-remark@+1 {{    LDS Size [bytes/block]: 0}}
+__kernel void foo() {
+  __asm volatile ("; clobber s8" :::"s8");
+  __asm volatile ("; clobber v9" :::"v9");
+  __asm volatile ("; clobber a11" :::"a11");
+}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to