iamarchit123 created this revision.
Herald added a subscriber: wenlei.
Herald added a project: All.
iamarchit123 requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Test Plan:


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D132188

Files:
  clang/docs/MisNoInline.rst
  clang/docs/ReleaseNotes.rst
  clang/test/Misc/Inputs/MisNoInline.proftext
  clang/test/Misc/MisNoInline.cpp
  clang/test/Misc/MisNoInline_LowThreshold.cpp
  clang/test/Misc/MisNoInline_PragmaIgnore.cpp
  llvm/docs/MisNoInline.rst
  llvm/test/Transforms/PGOProfile/Inputs/MisNoInline.proftext
  llvm/test/Transforms/PGOProfile/MisNoInline.ll

Index: llvm/test/Transforms/PGOProfile/MisNoInline.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/PGOProfile/MisNoInline.ll
@@ -0,0 +1,216 @@
+; RUN: opt < %s -passes=sample-profile,inline -sample-profile-file=%S/Inputs/MisNoInline.proftext -pgo-warn-misnoinline -S  2>&1 | FileCheck %s -check-prefix=WARNING
+; RUN: opt < %s -passes=sample-profile,inline -sample-profile-file=%S/Inputs/MisNoInline.proftext -pgo-warn-misnoinline -misnoinline-percent=900000 -S  2>&1 | FileCheck %s -check-prefix=WARNING_LOW
+; RUN: opt < %s -passes=sample-profile,inline -sample-profile-file=%S/Inputs/MisNoInline.proftext -pass-remarks=misnoinline -S 2>&1 | FileCheck %s --check-prefix=REMARK
+
+; WARNING-DAG: warning: Module: {{.*}} Marking _Z4foo1ii noinline while calling in main may hurt performance as per inline cost/hotness analysis
+; WARNING-DAG: warning: Module: {{.*}} Marking _Z4foo2ii noinline while calling in main may hurt performance as per inline cost/hotness analysis
+
+; WARNING_LOW-DAG: warning: Module: {{.*}} Marking _Z4foo1ii noinline while calling in main may hurt performance as per inline cost/hotness analysis
+; WARNING_LOW-NOT: warning: Module: {{.*}} Marking _Z4foo2ii noinline while calling in main may hurt performance as per inline cost/hotness analysis
+
+; REMARK-NOT: warning: Module: {{.*}} Marking _Z4foo1ii noinline while calling in main may hurt performance as per inline cost/hotness analysis
+; REMARK-NOT: warning: Module: {{.*}} Marking _Z4foo2ii noinline while calling in main may hurt performance as per inline cost/hotness analysis
+; REMARK-DAG: remark: MisNoInline.cpp:28:8: Marking _Z4foo1ii noinline while calling in main may hurt performance as per inline cost/hotness analysis
+; REMARK-DAG: remark: MisNoInline.cpp:29:8: Marking _Z4foo2ii noinline while calling in main may hurt performance as per inline cost/hotness analysis
+
+; The source code used for the LLVM IR that follows.
+; void printf();
+; __attribute__((noinline)) long foo1(int x, int y) {
+;   while (x != y) {
+;     printf();
+;     y++;
+;   }
+;   return y;
+; }
+; __attribute__((noinline)) long foo2(int x, int y) {
+;   while (x != y) {
+;     printf();
+;     y++;
+;   }
+;   return y;
+; }
+; int main() {
+;   int x = 5678;
+;   int y = 1234;
+;   x += foo1(x, y);
+;   x += foo2(x, y);
+;
+;   return x;
+; }
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define dso_local noundef i64 @_Z4foo1ii(i32 noundef %x, i32 noundef %y) #0 !dbg !8 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  call void @llvm.dbg.declare(metadata ptr %x.addr, metadata !14, metadata !DIExpression()), !dbg !15
+  store i32 %y, ptr %y.addr, align 4
+  call void @llvm.dbg.declare(metadata ptr %y.addr, metadata !16, metadata !DIExpression()), !dbg !17
+  br label %while.cond, !dbg !18
+
+while.cond:                                       ; preds = %while.body, %entry
+  %0 = load i32, ptr %x.addr, align 4, !dbg !19
+  %1 = load i32, ptr %y.addr, align 4, !dbg !21
+  %cmp = icmp ne i32 %0, %1, !dbg !22
+  br i1 %cmp, label %while.body, label %while.end, !dbg !23
+
+while.body:                                       ; preds = %while.cond
+  call void @_Z6printfv(), !dbg !24
+  %2 = load i32, ptr %y.addr, align 4, !dbg !26
+  %inc = add nsw i32 %2, 1, !dbg !26
+  store i32 %inc, ptr %y.addr, align 4, !dbg !26
+  br label %while.cond, !dbg !27, !llvm.loop !29
+
+while.end:                                        ; preds = %while.cond
+  %3 = load i32, ptr %y.addr, align 4, !dbg !32
+  %conv = sext i32 %3 to i64, !dbg !32
+  ret i64 %conv, !dbg !33
+}
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare dso_local void @_Z6printfv() #2
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define dso_local noundef i64 @_Z4foo2ii(i32 noundef %x, i32 noundef %y) #0 !dbg !34 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  call void @llvm.dbg.declare(metadata ptr %x.addr, metadata !35, metadata !DIExpression()), !dbg !36
+  store i32 %y, ptr %y.addr, align 4
+  call void @llvm.dbg.declare(metadata ptr %y.addr, metadata !37, metadata !DIExpression()), !dbg !38
+  br label %while.cond, !dbg !39
+
+while.cond:                                       ; preds = %while.body, %entry
+  %0 = load i32, ptr %x.addr, align 4, !dbg !40
+  %1 = load i32, ptr %y.addr, align 4, !dbg !42
+  %cmp = icmp ne i32 %0, %1, !dbg !43
+  br i1 %cmp, label %while.body, label %while.end, !dbg !44
+
+while.body:                                       ; preds = %while.cond
+  call void @_Z6printfv(), !dbg !45
+  %2 = load i32, ptr %y.addr, align 4, !dbg !47
+  %inc = add nsw i32 %2, 1, !dbg !47
+  store i32 %inc, ptr %y.addr, align 4, !dbg !47
+  br label %while.cond, !dbg !48, !llvm.loop !50
+
+while.end:                                        ; preds = %while.cond
+  %3 = load i32, ptr %y.addr, align 4, !dbg !52
+  %conv = sext i32 %3 to i64, !dbg !52
+  ret i64 %conv, !dbg !53
+}
+
+; Function Attrs: mustprogress noinline norecurse uwtable
+define dso_local noundef i32 @main() #3 !dbg !54 {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  store i32 0, ptr %retval, align 4
+  call void @llvm.dbg.declare(metadata ptr %x, metadata !57, metadata !DIExpression()), !dbg !58
+  store i32 5678, ptr %x, align 4, !dbg !58
+  call void @llvm.dbg.declare(metadata ptr %y, metadata !59, metadata !DIExpression()), !dbg !60
+  store i32 1234, ptr %y, align 4, !dbg !60
+  %0 = load i32, ptr %x, align 4, !dbg !61
+  %1 = load i32, ptr %y, align 4, !dbg !62
+  %call = call noundef i64 @_Z4foo1ii(i32 noundef %0, i32 noundef %1), !dbg !63
+  %2 = load i32, ptr %x, align 4, !dbg !64
+  %conv = sext i32 %2 to i64, !dbg !64
+  %add = add nsw i64 %conv, %call, !dbg !64
+  %conv1 = trunc i64 %add to i32, !dbg !64
+  store i32 %conv1, ptr %x, align 4, !dbg !64
+  %3 = load i32, ptr %x, align 4, !dbg !65
+  %4 = load i32, ptr %y, align 4, !dbg !66
+  %call2 = call noundef i64 @_Z4foo2ii(i32 noundef %3, i32 noundef %4), !dbg !67
+  %5 = load i32, ptr %x, align 4, !dbg !68
+  %conv3 = sext i32 %5 to i64, !dbg !68
+  %add4 = add nsw i64 %conv3, %call2, !dbg !68
+  %conv5 = trunc i64 %add4 to i32, !dbg !68
+  store i32 %conv5, ptr %x, align 4, !dbg !68
+  %6 = load i32, ptr %x, align 4, !dbg !69
+  ret i32 %6, !dbg !70
+}
+
+attributes #0 = { mustprogress noinline uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile"}
+attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { mustprogress noinline norecurse uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile"}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 15.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "MisNoInline.cpp", directory: ".")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 7, !"uwtable", i32 2}
+!6 = !{i32 7, !"frame-pointer", i32 2}
+!7 = !{!"clang version 15.0.0"}
+!8 = distinct !DISubprogram(name: "foo1", linkageName: "_Z4foo1ii", scope: !1, file: !1, line: 11, type: !9, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !13)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11, !12, !12}
+!11 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed)
+!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!13 = !{}
+!14 = !DILocalVariable(name: "x", arg: 1, scope: !8, file: !1, line: 11, type: !12)
+!15 = !DILocation(line: 11, column: 41, scope: !8)
+!16 = !DILocalVariable(name: "y", arg: 2, scope: !8, file: !1, line: 11, type: !12)
+!17 = !DILocation(line: 11, column: 48, scope: !8)
+!18 = !DILocation(line: 12, column: 3, scope: !8)
+!19 = !DILocation(line: 12, column: 10, scope: !20)
+!20 = !DILexicalBlockFile(scope: !8, file: !1, discriminator: 2)
+!21 = !DILocation(line: 12, column: 15, scope: !20)
+!22 = !DILocation(line: 12, column: 12, scope: !20)
+!23 = !DILocation(line: 12, column: 3, scope: !20)
+!24 = !DILocation(line: 13, column: 5, scope: !25)
+!25 = distinct !DILexicalBlock(scope: !8, file: !1, line: 12, column: 18)
+!26 = !DILocation(line: 14, column: 6, scope: !25)
+!27 = !DILocation(line: 12, column: 3, scope: !28)
+!28 = !DILexicalBlockFile(scope: !8, file: !1, discriminator: 4)
+!29 = distinct !{!29, !18, !30, !31}
+!30 = !DILocation(line: 15, column: 3, scope: !8)
+!31 = !{!"llvm.loop.mustprogress"}
+!32 = !DILocation(line: 16, column: 10, scope: !8)
+!33 = !DILocation(line: 16, column: 3, scope: !8)
+!34 = distinct !DISubprogram(name: "foo2", linkageName: "_Z4foo2ii", scope: !1, file: !1, line: 18, type: !9, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !13)
+!35 = !DILocalVariable(name: "x", arg: 1, scope: !34, file: !1, line: 18, type: !12)
+!36 = !DILocation(line: 18, column: 41, scope: !34)
+!37 = !DILocalVariable(name: "y", arg: 2, scope: !34, file: !1, line: 18, type: !12)
+!38 = !DILocation(line: 18, column: 48, scope: !34)
+!39 = !DILocation(line: 19, column: 3, scope: !34)
+!40 = !DILocation(line: 19, column: 10, scope: !41)
+!41 = !DILexicalBlockFile(scope: !34, file: !1, discriminator: 2)
+!42 = !DILocation(line: 19, column: 15, scope: !41)
+!43 = !DILocation(line: 19, column: 12, scope: !41)
+!44 = !DILocation(line: 19, column: 3, scope: !41)
+!45 = !DILocation(line: 20, column: 5, scope: !46)
+!46 = distinct !DILexicalBlock(scope: !34, file: !1, line: 19, column: 18)
+!47 = !DILocation(line: 21, column: 6, scope: !46)
+!48 = !DILocation(line: 19, column: 3, scope: !49)
+!49 = !DILexicalBlockFile(scope: !34, file: !1, discriminator: 4)
+!50 = distinct !{!50, !39, !51, !31}
+!51 = !DILocation(line: 22, column: 3, scope: !34)
+!52 = !DILocation(line: 23, column: 10, scope: !34)
+!53 = !DILocation(line: 23, column: 3, scope: !34)
+!54 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 25, type: !55, scopeLine: 25, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !13)
+!55 = !DISubroutineType(types: !56)
+!56 = !{!12}
+!57 = !DILocalVariable(name: "x", scope: !54, file: !1, line: 26, type: !12)
+!58 = !DILocation(line: 26, column: 7, scope: !54)
+!59 = !DILocalVariable(name: "y", scope: !54, file: !1, line: 27, type: !12)
+!60 = !DILocation(line: 27, column: 7, scope: !54)
+!61 = !DILocation(line: 28, column: 13, scope: !54)
+!62 = !DILocation(line: 28, column: 16, scope: !54)
+!63 = !DILocation(line: 28, column: 8, scope: !54)
+!64 = !DILocation(line: 28, column: 5, scope: !54)
+!65 = !DILocation(line: 29, column: 13, scope: !54)
+!66 = !DILocation(line: 29, column: 16, scope: !54)
+!67 = !DILocation(line: 29, column: 8, scope: !54)
+!68 = !DILocation(line: 29, column: 5, scope: !54)
+!69 = !DILocation(line: 31, column: 10, scope: !54)
+!70 = !DILocation(line: 31, column: 3, scope: !54)
Index: llvm/test/Transforms/PGOProfile/Inputs/MisNoInline.proftext
===================================================================
--- /dev/null
+++ llvm/test/Transforms/PGOProfile/Inputs/MisNoInline.proftext
@@ -0,0 +1,21 @@
+_Z4foo1ii:2000:2000
+ 0: 2000
+ 1.1: 2000
+ 1.2: 2000
+ 2: 2000
+ 3: 2000
+ 5: 2000
+_Z4foo2ii:200:200
+ 0: 200
+ 1.1: 200
+ 1.2: 200
+ 2: 200
+ 3: 200
+ 5: 200
+main:1:1
+ 0: 1
+ 1: 1
+ 2: 1
+ 3: 1
+ 4: 1
+ 6: 1
Index: llvm/docs/MisNoInline.rst
===================================================================
--- /dev/null
+++ llvm/docs/MisNoInline.rst
@@ -0,0 +1,69 @@
+===================
+Misnoexpect
+===================
+.. contents::
+
+.. toctree::
+   :maxdepth: 1
+
+When developers use noinline attribute with a function, i.e., through use of
+``__attribute__((noinline))``, they are trying to communicate that the
+function should not be inlined by the compiler. These can be due to various
+number of reasons to mark a function noinline. If a function is small,
+not critical to the performance of your code and would be called less often,
+especially in cases of error handling, it makes sense to noinline a function.
+These annotations, however, can be incorrect for a variety of reasons:
+changes to the code base may make these functions hot, the developer
+mis-annotated them, or perhaps they assumed something incorrectly when they
+wrote the annotation. Regardless of why, it is useful to detect these
+situations so that the user can rethink and remove the attribute as necessary.
+
+The MisNoInline checks in the LLVM backend follow a simple procedure: if the function entry
+entry count exceeds a certain percentile threshold and the cost of inlining the noinline marked
+function indicates a success, the MisNoInline flags will emit a diagnostic to the user to reconsider
+their stance on the function marking.
+
+The most natural place to perform these checks is in the CGSCC Inliner pass specifically
+where we check the cost of inlining the function. The profile pass has also been run
+up to this point, so information about BFI/PSI is available for further analysis.
+
+
+Instead of stopping our analysis on whether a function needs to be inlined
+in InlinerCost, if we hit a noinline marked function we continue and see if by cost it is
+feasible to inline a function. If so, we do one more check if the function is percentile wise
+hotter than user provided value (If user doesn't provide the value it's 99% by default), and go
+ahead and emit the warning.
+
+The diagnostics are also available in the form of optimization remarks,
+which can be serialized and processed through the ``opt-viewer.py``
+scripts in LLVM.
+
+.. option:: -pass-remarks=misnoinline
+
+  Enables optimization remarks for misnoinline when profiling data conflicts with
+  use of ``noinline`` function attribute.
+
+
+.. option:: pgo-warn-misnoinline
+
+  Enables misnoinline warnings when profiling data conflicts with use of
+  ``noinline`` function attribute.
+
+.. option:: misnoinline-percent
+
+  Relaxes misnoinline checking to not emit warnings for functions colder than Nth percentile.
+
+LLVM supports 4 types of profile formats: Frontend, IR, CS-IR, and
+Sampling. MisNoInline Diagnostics are compatible with all profiling formats.
+
++----------------+--------------------------------------------------------------------------------------+
+| Profile Type   | Description                                                                          |
++================+======================================================================================+
+| Frontend       | Profiling instrumentation added during compilation by the frontend, i.e. ``clang``   |
++----------------+--------------------------------------------------------------------------------------+
+| IR             | Profiling instrumentation added during by the LLVM backend                           |
++----------------+--------------------------------------------------------------------------------------+
+| CS-IR          | Context Sensitive IR based profiles                                                  |
++----------------+--------------------------------------------------------------------------------------+
+| Sampling       | Profiles collected through sampling with external tools, such as ``perf`` on Linux   |
++----------------+--------------------------------------------------------------------------------------+
Index: clang/test/Misc/MisNoInline_PragmaIgnore.cpp
===================================================================
--- /dev/null
+++ clang/test/Misc/MisNoInline_PragmaIgnore.cpp
@@ -0,0 +1,34 @@
+// Test that misnoinline dosent emit warning when it is pragma ignored
+
+//  RUN: %clang_cc1 %s -O2 -o - -emit-llvm -fprofile-sample-use=%S/Inputs/MisNoInline.proftext -verify=noemit -Wmisnoinline
+
+//  RUN: %clang_cc1 %s -O2 -o - -emit-llvm -fprofile-sample-use=%S/Inputs/MisNoInline.proftext -verify=noemit -Wmisnoinline -fdiagnostics-misnoinline-percentile-threshold=900000
+
+//noemit-no-diagnostics
+void printf();
+__attribute__((noinline)) long foo1(int x, int y) {
+  while (x != y) {
+    printf();
+    y++;
+  }
+  return y;
+}
+__attribute__((noinline)) long foo2(int x, int y) {
+  while (x != y) {
+    printf();
+    y++;
+  }
+  return y;
+}
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmisnoinline"
+int main() {
+  int x = 5678;
+  int y = 1234;
+  x += foo1(x, y);
+  x += foo2(x, y);
+
+  return x;
+}
+#pragma clang diagnostic pop
Index: clang/test/Misc/MisNoInline_LowThreshold.cpp
===================================================================
--- /dev/null
+++ clang/test/Misc/MisNoInline_LowThreshold.cpp
@@ -0,0 +1,28 @@
+//Test that misnoinline emits correct warning
+
+// Test that with -fdiagnostics-misnoinline-percentile-threshold set to 90% only foo1 emits warning as per profiling data
+//  RUN: %clang_cc1 %s -O2 -o - -emit-llvm -fprofile-sample-use=%S/Inputs/MisNoInline.proftext  -verify=emit -Wmisnoinline -fdiagnostics-misnoinline-percentile-threshold=900000
+
+void printf();
+__attribute__((noinline)) long foo1(int x, int y) {
+  while (x != y) {
+    printf();
+    y++;
+  }
+  return y;
+}
+__attribute__((noinline)) long foo2(int x, int y) {
+  while (x != y) {
+    printf();
+    y++;
+  }
+  return y;
+}
+int main() {
+  int x = 5678;
+  int y = 1234;
+  x += foo1(x, y); // emit-warning-re {{noinline attribute marking for {{.*}}MisNoInline_LowThreshold.cpp:foo1(int, int) may hurt performance.}}
+  x += foo2(x, y);
+
+  return x;
+}
Index: clang/test/Misc/MisNoInline.cpp
===================================================================
--- /dev/null
+++ clang/test/Misc/MisNoInline.cpp
@@ -0,0 +1,29 @@
+//Test that misnoinline emits correct warning
+
+// Test that without passing -fdiagnostics-misnoinline-count-threshold flag warning are emitted with percentile cuttof at 99%
+// This would mean both foo1 and foo2 would emit warning
+//  RUN: %clang_cc1 %s -O2 -o - -emit-llvm -fprofile-sample-use=%S/Inputs/MisNoInline.proftext -verify=emit -Wmisnoinline
+
+void printf();
+__attribute__((noinline)) long foo1(int x, int y) {
+  while (x != y) {
+    printf();
+    y++;
+  }
+  return y;
+}
+__attribute__((noinline)) long foo2(int x, int y) {
+  while (x != y) {
+    printf();
+    y++;
+  }
+  return y;
+}
+int main() {
+  int x = 5678;
+  int y = 1234;
+  x += foo1(x, y); // emit-warning-re {{noinline attribute marking for {{.*}}MisNoInline.cpp:foo1(int, int) may hurt performance.}}
+  x += foo2(x, y); // emit-warning-re {{noinline attribute marking for {{.*}}MisNoInline.cpp:foo2(int, int) may hurt performance.}}
+
+  return x;
+}
Index: clang/test/Misc/Inputs/MisNoInline.proftext
===================================================================
--- /dev/null
+++ clang/test/Misc/Inputs/MisNoInline.proftext
@@ -0,0 +1,21 @@
+_Z4foo1ii:2000:2000
+ 0: 2000
+ 1.1: 2000
+ 1.2: 2000
+ 2: 2000
+ 3: 2000
+ 5: 2000
+_Z4foo2ii:200:200
+ 0: 200
+ 1.1: 200
+ 1.2: 200
+ 2: 200
+ 3: 200
+ 5: 200
+main:1:1
+ 0: 1
+ 1: 1
+ 2: 1
+ 3: 1
+ 4: 1
+ 6: 1
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -101,6 +101,8 @@
 - Clang will now print more information about failed static assertions. In
   particular, simple static assertion expressions are evaluated to their
   compile-time value and printed out if the assertion fails.
+- ``-Wmisnoinline`` warns when the noinline attribute attached to the function
+  may look wrong compared to profiling data.
 
 Non-comprehensive list of changes in this release
 -------------------------------------------------
Index: clang/docs/MisNoInline.rst
===================================================================
--- /dev/null
+++ clang/docs/MisNoInline.rst
@@ -0,0 +1,67 @@
+===================
+Misnoinline
+===================
+.. contents::
+
+.. toctree::
+   :maxdepth: 1
+
+When developers use noinline attribute with a function, i.e., through use of
+``__attribute__((noinline))``, they are trying to communicate that the
+function should not be inlined by the compiler. These can be due to various
+number of reasons to mark a function noinline. If a function is small,
+not critical to the performance of your code and would be called less often,
+especially in cases of error handling, it makes sense to noinline a function.
+These annotations, however, can be incorrect for a variety of reasons:
+changes to the code base may make these functions hot, the developer
+mis-annotated them, or perhaps they assumed something incorrectly when they
+wrote the annotation. Regardless of why, it is useful to detect these
+situations so that the user can rethink and remove the attribute as necessary.
+
+MisNoInline diagnostics are intended to help developers identify and address
+these situations, by comparing function hotness of noinline function and
+checking if they indeed were candidates to become inline. Whenever percentile values
+are breached, a warning is emitted from backend. Details on how the flags operate
+in the LLVM backed can be found in LLVM's documentation.
+
+By default, MisNoInline checking is quite linient. It checks on default threshold percentile
+of 99%. Because this may emit a lot of warnings, MisNoInline diagnostics are not enabled by
+default, and support an additional flag to decrease the noinline percentile threshold and filter
+hotter functions. The ``-fdiagnostics-misnoinline-percentile-threshold=N`` accepts
+function entry percentile coldness upto N before emitting a warning. So passing
+``-fdiagnostics-misnoinline-percentile-threshold=50`` will not report diagnostic messages
+if the noinline function's hotness is greater than 50% as per PSI analysis.
+
+MisNoInline diagnostics are also available in the form of optimization remarks,
+which can be serialized and processed through the ``opt-viewer.py``
+scripts in LLVM.
+
+.. option:: -Rpass=misnoinline
+
+  Enables optimization remarks for misnoinline when profiling data conflicts with
+  use of ``noinline`` attribute.
+
+
+.. option:: -Wmisnoinline
+
+  Enables misnoinline warnings when profiling data conflicts with use of
+  ``noinline`` attribute.
+
+.. option:: -fdiagnostics-misnoinline-percentile-threshold=N
+
+  Relaxes misnoinline checking to not emit warning for functions colder than N%.
+
+LLVM supports 4 types of profile formats: Frontend, IR, CS-IR, and
+Sampling. MisNoInline Diagnostics are compatible with all Profiling formats.
+
++----------------+--------------------------------------------------------------------------------------+
+| Profile Type   | Description                                                                          |
++================+======================================================================================+
+| Frontend       | Profiling instrumentation added during compilation by the frontend, i.e. ``clang``   |
++----------------+--------------------------------------------------------------------------------------+
+| IR             | Profiling instrumentation added during by the LLVM backend                           |
++----------------+--------------------------------------------------------------------------------------+
+| CS-IR          | Context Sensitive IR based profiles                                                  |
++----------------+--------------------------------------------------------------------------------------+
+| Sampling       | Profiles collected through sampling with external tools, such as ``perf`` on Linux   |
++----------------+--------------------------------------------------------------------------------------+
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to