Author: Hassnaa Hamdi
Date: 2025-12-07T14:24:30Z
New Revision: 9e7ce77573b2fad126a367475cbe807c10826691

URL: 
https://github.com/llvm/llvm-project/commit/9e7ce77573b2fad126a367475cbe807c10826691
DIFF: 
https://github.com/llvm/llvm-project/commit/9e7ce77573b2fad126a367475cbe807c10826691.diff

LOG: [Clang]: Support opt-in speculative devirtualization (#159685)

This patch adds Clang support for speculative devirtualization and
integrates the related pass into the pass pipeline.
It's building on the LLVM backend implementation from PR #159048.
Speculative devirtualization transforms an indirect call (the virtual
function) to a guarded direct call.
It is guarded by a comparison of the virtual function pointer to the
expected target.
This optimization is still safe without LTO because it doesn't do direct
calls, it's conditional according to the function ptr.
This optimization:
- Opt-in: Disabled by default, enabled via `-fdevirtualize-speculatively`
- Works in non-LTO mode
- Handles publicly-visible objects.
- Uses guarded devirtualization with fallback to indirect calls when the
speculation is incorrect.

For this C++ example:
```
class Base {
public:
    __attribute__((noinline))
    virtual void virtual_function1() { asm volatile("NOP"); }
    virtual void virtual_function2() { asm volatile("NOP"); }
};
class Derived : public Base {
public:
    void virtual_function2() override { asm volatile("NOP"); }
};
__attribute__((noinline))
void foo(Base *BV) {
    BV->virtual_function1();
}
void bar() {
    Base *b = new Derived();
    foo(b);
}
```
Here is the IR without enabling speculative devirtualization:
```
define dso_local void @_Z3fooP4Base(ptr noundef %BV) local_unnamed_addr #0 {
entry:
  %vtable = load ptr, ptr %BV, align 8, !tbaa !6
  %0 = load ptr, ptr %vtable, align 8
  tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV)
  ret void
}
```
IR after enabling speculative devirtualization:
```
define dso_local void @_Z3fooP4Base(ptr noundef %BV) local_unnamed_addr #0 {
entry:
  %vtable = load ptr, ptr %BV, align 8, !tbaa !12
  %0 = load ptr, ptr %vtable, align 8
  %1 = icmp eq ptr %0, @_ZN4Base17virtual_function1Ev
  br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect, !prof !15

if.true.direct_targ:                              ; preds = %entry
  tail call void @_ZN4Base17virtual_function1Ev(ptr noundef nonnull align 8 
dereferenceable(8) %BV)
  br label %if.end.icp

if.false.orig_indirect:                           ; preds = %entry
  tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV)
  br label %if.end.icp

if.end.icp:                                       ; preds = 
%if.false.orig_indirect, %if.true.direct_targ
  ret void
}
```

Added: 
    clang/test/CodeGenCXX/speculative-devirt-metadata.cpp
    llvm/test/Transforms/PhaseOrdering/speculative-devirt-then-inliner.ll
    llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/docs/UsersManual.rst
    clang/include/clang/Basic/CodeGenOptions.def
    clang/include/clang/Options/Options.td
    clang/lib/CodeGen/BackendUtil.cpp
    clang/lib/CodeGen/CGClass.cpp
    clang/lib/CodeGen/CGVTables.cpp
    clang/lib/CodeGen/ItaniumCXXABI.cpp
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/test/Driver/clang_f_opts.c
    llvm/include/llvm/Passes/PassBuilder.h
    llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
    llvm/lib/Passes/PassBuilderPipelines.cpp
    llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f31e1c343e8aa..acca997e0ff64 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -343,6 +343,7 @@ Modified Compiler Flags
 -----------------------
 - The `-gkey-instructions` compiler flag is now enabled by default when DWARF 
is emitted for plain C/C++ and optimizations are enabled. (#GH149509)
 - The `-fconstexpr-steps` compiler flag now accepts value `0` to opt out of 
this limit. (#GH160440)
+- The `-fdevirtualize-speculatively` compiler flag is now supported to enable 
speculative devirtualization of virtual function calls, it's disabled by 
default. (#GH159685)
 
 Removed Compiler Flags
 -------------------------

diff  --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 42665da413660..c624efb26f67d 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2352,6 +2352,56 @@ are listed below.
    pure ThinLTO, as all split regular LTO modules are merged and LTO linked
    with regular LTO.
 
+.. option:: -fdevirtualize-speculatively
+
+   Enable speculative devirtualization optimization where a virtual call
+   can be transformed into a direct call under the assumption that its
+   object is of a particular type. A runtime check is inserted to validate
+   the assumption before making the direct call, and if the check fails,
+   the original virtual call is made instead. This optimization can enable
+   more inlining opportunities and better optimization of the direct call.
+   This is 
diff erent from whole program devirtualization optimization
+   that rely on global analysis and hidden visibility of the objects to prove
+   that the object is always of a particular type at a virtual call site.
+   This optimization doesn't require global analysis or hidden visibility.
+   This optimization doesn't devirtualize all virtual calls, but only
+   when there's a single implementation of the virtual function in the module.
+   There could be a single implementation of the virtual function
+   either because the function is not overridden in any derived class,
+   or because all objects are instances of the same class/type.
+
+   Ex of IR before the optimization:
+
+   .. code-block:: llvm
+
+     %vtable = load ptr, ptr %BV, align 8, !tbaa !6
+     %0 = tail call i1 @llvm.public.type.test(ptr %vtable, metadata 
!"_ZTS4Base")
+     tail call void @llvm.assume(i1 %0)
+     %0 = load ptr, ptr %vtable, align 8
+     tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV)
+     ret void
+
+   IR after the optimization:
+
+   .. code-block:: llvm
+
+     %vtable = load ptr, ptr %BV, align 8, !tbaa !12
+     %0 = load ptr, ptr %vtable, align 8
+     %1 = icmp eq ptr %0, @_ZN4Base17virtual_function1Ev
+     br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect, 
!prof !15
+     if.true.direct_targ:                              ; preds = %entry
+       tail call void @_ZN4Base17virtual_function1Ev(ptr noundef nonnull align 
8 dereferenceable(8) %BV)
+       br label %if.end.icp
+     if.false.orig_indirect:                           ; preds = %entry
+       tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV)
+       br label %if.end.icp
+     if.end.icp:                                       ; preds = 
%if.false.orig_indirect, %if.true.direct_targ
+       ret void
+
+   This feature is temporarily ignored at the LLVM side when LTO is enabled.
+   TODO: Update the comment when the LLVM side supports this feature for LTO.
+   This feature is turned off by default.
+
 .. option:: -f[no-]unique-source-file-names
 
    When enabled, allows the compiler to assume that each object file
@@ -5216,6 +5266,8 @@ Execute ``clang-cl /?`` to see a list of supported 
options:
       -fstandalone-debug      Emit full debug info for all types used by the 
program
       -fstrict-aliasing              Enable optimizations based on strict 
aliasing rules
       -fsyntax-only           Run the preprocessor, parser and semantic 
analysis stages
+      -fdevirtualize-speculatively
+                              Enables speculative devirtualization 
optimization.
       -fwhole-program-vtables Enables whole-program vtable optimization. 
Requires -flto
       -gcodeview-ghash        Emit type record hashes in a .debug$H section
       -gcodeview              Generate CodeView debug information

diff  --git a/clang/include/clang/Basic/CodeGenOptions.def 
b/clang/include/clang/Basic/CodeGenOptions.def
index 76a6463881c6f..a059803c433e3 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -364,6 +364,8 @@ VALUE_CODEGENOPT(WarnStackSize     , 32, UINT_MAX, Benign) 
///< Set via -fwarn-s
 CODEGENOPT(NoStackArgProbe, 1, 0, Benign) ///< Set when -mno-stack-arg-probe 
is used
 CODEGENOPT(EmitLLVMUseLists, 1, 0, Benign) ///< Control whether to serialize 
use-lists.
 
+CODEGENOPT(DevirtualizeSpeculatively, 1, 0, Benign) ///< Whether to apply the 
speculative
+                                                    /// devirtualization 
optimization.
 CODEGENOPT(WholeProgramVTables, 1, 0, Benign) ///< Whether to apply 
whole-program
                                               ///  vtable optimization.
 

diff  --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index c6841937c8d39..1a2cf1410e9ed 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -4522,6 +4522,13 @@ defm new_infallible : BoolFOption<"new-infallible",
   BothFlags<[], [ClangOption, CC1Option],
           " treating throwing global C++ operator new as always returning 
valid memory "
   "(annotates with __attribute__((returns_nonnull)) and throw()). This is 
detectable in source.">>;
+defm devirtualize_speculatively
+    : BoolFOption<"devirtualize-speculatively",
+                  CodeGenOpts<"DevirtualizeSpeculatively">, DefaultFalse,
+                  PosFlag<SetTrue, [], [],
+                          "Enables speculative devirtualization 
optimization.">,
+                  NegFlag<SetFalse>,
+                  BothFlags<[], [ClangOption, CLOption, CC1Option]>>;
 defm whole_program_vtables : BoolFOption<"whole-program-vtables",
   CodeGenOpts<"WholeProgramVTables">, DefaultFalse,
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
@@ -7132,9 +7139,8 @@ defm variable_expansion_in_unroller : 
BooleanFFlag<"variable-expansion-in-unroll
     Group<clang_ignored_gcc_optimization_f_Group>;
 defm web : BooleanFFlag<"web">, Group<clang_ignored_gcc_optimization_f_Group>;
 defm whole_program : BooleanFFlag<"whole-program">, 
Group<clang_ignored_gcc_optimization_f_Group>;
-defm devirtualize : BooleanFFlag<"devirtualize">, 
Group<clang_ignored_gcc_optimization_f_Group>;
-defm devirtualize_speculatively : BooleanFFlag<"devirtualize-speculatively">,
-    Group<clang_ignored_gcc_optimization_f_Group>;
+defm devirtualize : BooleanFFlag<"devirtualize">,
+                    Group<clang_ignored_gcc_optimization_f_Group>;
 
 // Generic gfortran options.
 def A_DASH : Joined<["-"], "A-">, Group<gfortran_Group>;

diff  --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index af3480d5755f1..126005a5d93c2 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -946,6 +946,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
   // non-integrated assemblers don't recognize .cgprofile section.
   PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
   PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO;
+  PTO.DevirtualizeSpeculatively = CodeGenOpts.DevirtualizeSpeculatively;
 
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;

diff  --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index f782b0cd17da4..96fde10f24f32 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -2827,10 +2827,15 @@ void 
CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
                                                    SourceLocation Loc) {
   if (SanOpts.has(SanitizerKind::CFIVCall))
     EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc);
-  else if (CGM.getCodeGenOpts().WholeProgramVTables &&
-           // Don't insert type test assumes if we are forcing public
-           // visibility.
-           !CGM.AlwaysHasLTOVisibilityPublic(RD)) {
+  // Emit the intrinsics of (type_test and assume) for the features of WPD and
+  // speculative devirtualization. For WPD, emit the intrinsics only for the
+  // case of non_public LTO visibility.
+  // TODO: refactor this condition and similar ones into a function (e.g.,
+  // ShouldEmitDevirtualizationMD) to encapsulate the details of the 
diff erent
+  // types of devirtualization.
+  else if ((CGM.getCodeGenOpts().WholeProgramVTables &&
+            !CGM.AlwaysHasLTOVisibilityPublic(RD)) ||
+           CGM.getCodeGenOpts().DevirtualizeSpeculatively) {
     CanQualType Ty = CGM.getContext().getCanonicalTagType(RD);
     llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(Ty);
     llvm::Value *TypeId =
@@ -2988,8 +2993,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const 
CXXRecordDecl *RD,
 }
 
 bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) 
{
-  if (!CGM.getCodeGenOpts().WholeProgramVTables ||
-      !CGM.HasHiddenLTOVisibility(RD))
+  if ((!CGM.getCodeGenOpts().WholeProgramVTables ||
+       !CGM.HasHiddenLTOVisibility(RD)) &&
+      !CGM.getCodeGenOpts().DevirtualizeSpeculatively)
     return false;
 
   if (CGM.getCodeGenOpts().VirtualFunctionElimination)

diff  --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index 3fbac308a9178..91550d0d31d83 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -1363,10 +1363,12 @@ llvm::GlobalObject::VCallVisibility 
CodeGenModule::GetVCallVisibilityLevel(
 void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
                                            llvm::GlobalVariable *VTable,
                                            const VTableLayout &VTLayout) {
-  // Emit type metadata on vtables with LTO or IR instrumentation.
+  // Emit type metadata on vtables with LTO or IR instrumentation or
+  // speculative devirtualization.
   // In IR instrumentation, the type metadata is used to find out vtable
   // definitions (for type profiling) among all global variables.
-  if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr())
+  if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() &&
+      !getCodeGenOpts().DevirtualizeSpeculatively)
     return;
 
   CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType());

diff  --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp 
b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 3042f37859e5a..09472edaba7d3 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -716,10 +716,14 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
 
   bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination &&
                            CGM.HasHiddenLTOVisibility(RD);
+  // TODO: Update this name not to be restricted to WPD only
+  // as we now emit the vtable info info for speculative devirtualization as
+  // well.
   bool ShouldEmitWPDInfo =
-      CGM.getCodeGenOpts().WholeProgramVTables &&
-      // Don't insert type tests if we are forcing public visibility.
-      !CGM.AlwaysHasLTOVisibilityPublic(RD);
+      (CGM.getCodeGenOpts().WholeProgramVTables &&
+       // Don't insert type tests if we are forcing public visibility.
+       !CGM.AlwaysHasLTOVisibilityPublic(RD)) ||
+      CGM.getCodeGenOpts().DevirtualizeSpeculatively;
   llvm::Value *VirtualFn = nullptr;
 
   {
@@ -2110,17 +2114,20 @@ void 
ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
 
   // Always emit type metadata on non-available_externally definitions, and on
   // available_externally definitions if we are performing whole program
-  // devirtualization. For WPD we need the type metadata on all vtable
-  // definitions to ensure we associate derived classes with base classes
-  // defined in headers but with a strong definition only in a shared library.
+  // devirtualization or speculative devirtualization. We need the type 
metadata
+  // on all vtable definitions to ensure we associate derived classes with base
+  // classes defined in headers but with a strong definition only in a shared
+  // library.
   if (!VTable->isDeclarationForLinker() ||
-      CGM.getCodeGenOpts().WholeProgramVTables) {
+      CGM.getCodeGenOpts().WholeProgramVTables ||
+      CGM.getCodeGenOpts().DevirtualizeSpeculatively) {
     CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout);
     // For available_externally definitions, add the vtable to
     // @llvm.compiler.used so that it isn't deleted before whole program
     // analysis.
     if (VTable->isDeclarationForLinker()) {
-      assert(CGM.getCodeGenOpts().WholeProgramVTables);
+      assert(CGM.getCodeGenOpts().WholeProgramVTables ||
+             CGM.getCodeGenOpts().DevirtualizeSpeculatively);
       CGM.addCompilerUsedGlobal(VTable);
     }
   }

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 0380568412e62..7187d1a158e01 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7745,6 +7745,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction 
&JA,
 
   addOpenMPHostOffloadingArgs(C, JA, Args, CmdArgs);
 
+  if (Args.hasFlag(options::OPT_fdevirtualize_speculatively,
+                   options::OPT_fno_devirtualize_speculatively,
+                   /*Default value*/ false))
+    CmdArgs.push_back("-fdevirtualize-speculatively");
+
   bool VirtualFunctionElimination =
       Args.hasFlag(options::OPT_fvirtual_function_elimination,
                    options::OPT_fno_virtual_function_elimination, false);

diff  --git a/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp 
b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp
new file mode 100644
index 0000000000000..20d2ab9f46fe5
--- /dev/null
+++ b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp
@@ -0,0 +1,78 @@
+// Test that Clang emits vtable metadata when speculative devirtualization is 
enabled.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively 
-emit-llvm -o - %s | FileCheck --check-prefix=CHECK  %s
+
+struct A {
+  A();
+  virtual void f();
+};
+
+struct B : virtual A {
+  B();
+  virtual void g();
+  virtual void h();
+};
+
+namespace {
+
+struct D : B {
+  D();
+  virtual void f();
+  virtual void h();
+};
+
+}
+
+A::A() {}
+B::B() {}
+D::D() {}
+
+void A::f() {
+}
+
+void B::g() {
+}
+
+void D::f() {
+}
+
+void D::h() {
+}
+
+void af(A *a) {
+  // CHECK: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], 
metadata !"_ZTS1A")
+  // CHECK-NEXT: call void @llvm.assume(i1 [[P]])
+  a->f();
+}
+
+void dg1(D *d) {
+  // CHECK: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], 
metadata !"_ZTS1B")
+  // CHECK-NEXT: call void @llvm.assume(i1 [[P]])
+  d->g();
+}
+
+void df1(D *d) {
+  // CHECK: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata 
!11)
+  // CHECK-NEXT: call void @llvm.assume(i1 [[P]])
+  d->f();
+}
+
+void dh1(D *d) {
+  // CHECK: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata 
!11)
+  // CHECK-NEXT: call void @llvm.assume(i1 [[P]])
+  d->h();
+}
+
+
+D d;
+
+void foo() {
+  dg1(&d);
+  df1(&d);
+  dh1(&d);
+
+
+  struct FA : A {
+    void f() {}
+  } fa;
+  af(&fa);
+}

diff  --git a/clang/test/Driver/clang_f_opts.c 
b/clang/test/Driver/clang_f_opts.c
index 765f9d6ae3212..e5a23270ea732 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -377,7 +377,6 @@
 // RUN: -ftree-ter                                                            \
 // RUN: -ftree-vrp                                                            \
 // RUN: -fno-devirtualize                                                     \
-// RUN: -fno-devirtualize-speculatively                                       \
 // RUN: -fslp-vectorize-aggressive                                            \
 // RUN: -fno-slp-vectorize-aggressive                                         \
 // RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-WARNING %s
@@ -436,7 +435,6 @@
 // CHECK-WARNING-DAG: optimization flag '-ftree-ter' is not supported
 // CHECK-WARNING-DAG: optimization flag '-ftree-vrp' is not supported
 // CHECK-WARNING-DAG: optimization flag '-fno-devirtualize' is not supported
-// CHECK-WARNING-DAG: optimization flag '-fno-devirtualize-speculatively' is 
not supported
 // CHECK-WARNING-DAG: the flag '-fslp-vectorize-aggressive' has been 
deprecated and will be ignored
 // CHECK-WARNING-DAG: the flag '-fno-slp-vectorize-aggressive' has been 
deprecated and will be ignored
 

diff  --git a/llvm/include/llvm/Passes/PassBuilder.h 
b/llvm/include/llvm/Passes/PassBuilder.h
index 8fa21f2cb2dd6..00d4874d5109b 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -99,6 +99,10 @@ class PipelineTuningOptions {
   // analyses after various module->function or cgscc->function adaptors in the
   // default pipelines.
   bool EagerlyInvalidateAnalyses;
+
+  // Tuning option to enable/disable speculative devirtualization.
+  // Its default value is false.
+  bool DevirtualizeSpeculatively;
 };
 
 /// This class provides access to building LLVM's passes.

diff  --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h 
b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index 7a03405b4f462..2e33a4098be1b 100644
--- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -226,11 +226,14 @@ struct WholeProgramDevirtPass : public 
PassInfoMixin<WholeProgramDevirtPass> {
   ModuleSummaryIndex *ExportSummary;
   const ModuleSummaryIndex *ImportSummary;
   bool UseCommandLine = false;
+  bool DevirtSpeculatively = false;
   WholeProgramDevirtPass()
       : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {}
   WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
-                         const ModuleSummaryIndex *ImportSummary)
-      : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
+                         const ModuleSummaryIndex *ImportSummary,
+                         bool DevirtSpeculatively = false)
+      : ExportSummary(ExportSummary), ImportSummary(ImportSummary),
+        DevirtSpeculatively(DevirtSpeculatively) {
     assert(!(ExportSummary && ImportSummary));
   }
   LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &);

diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp 
b/llvm/lib/Passes/PassBuilderPipelines.cpp
index c6beb3fdf09bd..4de527d9ef85e 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -305,6 +305,13 @@ static cl::opt<std::string> InstrumentColdFuncOnlyPath(
              "with --pgo-instrument-cold-function-only)"),
     cl::Hidden);
 
+// TODO: There is a similar flag in WPD pass, we should consolidate them by
+// parsing the option only once in PassBuilder and share it across both places.
+static cl::opt<bool> EnableDevirtualizeSpeculatively(
+    "enable-devirtualize-speculatively",
+    cl::desc("Enable speculative devirtualization optimization"),
+    cl::init(false));
+
 extern cl::opt<std::string> UseCtxProfile;
 extern cl::opt<bool> PGOInstrumentColdFunctionOnly;
 
@@ -326,6 +333,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
   MergeFunctions = EnableMergeFunctions;
   InlinerThreshold = -1;
   EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
+  DevirtualizeSpeculatively = EnableDevirtualizeSpeculatively;
 }
 
 namespace llvm {
@@ -1655,6 +1663,34 @@ 
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   if (!LTOPreLink)
     MPM.addPass(RelLookupTableConverterPass());
 
+  // Add devirtualization pass only when LTO is not enabled, as otherwise
+  // the pass is already enabled in the LTO pipeline.
+  if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
+    // TODO: explore a better pipeline configuration that can improve
+    // compilation time overhead.
+    MPM.addPass(WholeProgramDevirtPass(
+        /*ExportSummary*/ nullptr,
+        /*ImportSummary*/ nullptr,
+        /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
+    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
+                                   lowertypetests::DropTestKind::Assume));
+    // Given that the devirtualization creates more opportunities for inlining,
+    // we run the Inliner again here to maximize the optimization gain we
+    // get from devirtualization.
+    // Also, we can't run devirtualization before inlining because the
+    // devirtualization depends on the passes optimizing/eliminating vtable GVs
+    // and those passes are only effective after inlining.
+    if (EnableModuleInliner) {
+      MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
+                                    UseInlineAdvisor,
+                                    ThinOrFullLTOPhase::None));
+    } else {
+      MPM.addPass(ModuleInlinerWrapperPass(
+          getInlineParamsFromOptLevel(Level),
+          /* MandatoryFirst */ true,
+          InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner}));
+    }
+  }
   return MPM;
 }
 

diff  --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp 
b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 4642da0abdc13..7aa90eefd0d96 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -628,9 +628,11 @@ struct DevirtModule {
   std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
   PatternList FunctionsToSkip;
 
+  const bool DevirtSpeculatively;
   DevirtModule(Module &M, ModuleAnalysisManager &MAM,
                ModuleSummaryIndex *ExportSummary,
-               const ModuleSummaryIndex *ImportSummary)
+               const ModuleSummaryIndex *ImportSummary,
+               bool DevirtSpeculatively)
       : M(M), MAM(MAM),
         FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
         ExportSummary(ExportSummary), ImportSummary(ImportSummary),
@@ -643,7 +645,8 @@ struct DevirtModule {
         RemarksEnabled(areRemarksEnabled()),
         OREGetter([&](Function &F) -> OptimizationRemarkEmitter & {
           return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-        }) {
+        }),
+        DevirtSpeculatively(DevirtSpeculatively) {
     assert(!(ExportSummary && ImportSummary));
     FunctionsToSkip.init(SkipFunctionNames);
   }
@@ -757,7 +760,8 @@ struct DevirtModule {
 
   // Lower the module using the action and summary passed as command line
   // arguments. For testing purposes only.
-  static bool runForTesting(Module &M, ModuleAnalysisManager &MAM);
+  static bool runForTesting(Module &M, ModuleAnalysisManager &MAM,
+                            bool DevirtSpeculatively);
 };
 
 struct DevirtIndex {
@@ -800,11 +804,22 @@ struct DevirtIndex {
 PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
                                               ModuleAnalysisManager &MAM) {
   if (UseCommandLine) {
-    if (!DevirtModule::runForTesting(M, MAM))
+    if (!DevirtModule::runForTesting(M, MAM, ClDevirtualizeSpeculatively))
       return PreservedAnalyses::all();
     return PreservedAnalyses::none();
   }
-  if (!DevirtModule(M, MAM, ExportSummary, ImportSummary).run())
+
+  std::optional<ModuleSummaryIndex> Index;
+  if (!ExportSummary && !ImportSummary && DevirtSpeculatively) {
+    // Build the ExportSummary from the module.
+    assert(!ExportSummary &&
+           "ExportSummary is expected to be empty in non-LTO mode");
+    ProfileSummaryInfo PSI(M);
+    Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI));
+    ExportSummary = Index.has_value() ? &Index.value() : nullptr;
+  }
+  if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, DevirtSpeculatively)
+           .run())
     return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }
@@ -1002,7 +1017,8 @@ static Error 
checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) {
   return ErrorSuccess();
 }
 
-bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) {
+bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM,
+                                 bool DevirtSpeculatively) {
   std::unique_ptr<ModuleSummaryIndex> Summary =
       std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
 
@@ -1031,7 +1047,8 @@ bool DevirtModule::runForTesting(Module &M, 
ModuleAnalysisManager &MAM) {
                    ClSummaryAction == PassSummaryAction::Export ? Summary.get()
                                                                 : nullptr,
                    ClSummaryAction == PassSummaryAction::Import ? Summary.get()
-                                                                : nullptr)
+                                                                : nullptr,
+                   DevirtSpeculatively)
           .run();
 
   if (!ClWriteSummary.empty()) {
@@ -1095,10 +1112,10 @@ bool DevirtModule::tryFindVirtualCallTargets(
     if (!TM.Bits->GV->isConstant())
       return false;
 
-    // Without ClDevirtualizeSpeculatively, we cannot perform whole program
+    // Without DevirtSpeculatively, we cannot perform whole program
     // devirtualization analysis on a vtable with public LTO visibility.
-    if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() ==
-                                            
GlobalObject::VCallVisibilityPublic)
+    if (!DevirtSpeculatively && TM.Bits->GV->getVCallVisibility() ==
+                                    GlobalObject::VCallVisibilityPublic)
       return false;
 
     Function *Fn = nullptr;
@@ -1119,7 +1136,7 @@ bool DevirtModule::tryFindVirtualCallTargets(
 
     // In most cases empty functions will be overridden by the
     // implementation of the derived class, so we can skip them.
-    if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() &&
+    if (DevirtSpeculatively && Fn->getReturnType()->isVoidTy() &&
         Fn->getInstructionCount() <= 1)
       continue;
 
@@ -1240,8 +1257,7 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo 
&SlotInfo,
       // add support to compare the virtual function pointer to the
       // devirtualized target. In case of a mismatch, fall back to indirect
       // call.
-      if (DevirtCheckMode == WPDCheckMode::Fallback ||
-          ClDevirtualizeSpeculatively) {
+      if (DevirtCheckMode == WPDCheckMode::Fallback || DevirtSpeculatively) {
         MDNode *Weights = 
MDBuilder(M.getContext()).createLikelyBranchWeights();
         // Version the indirect call site. If the called value is equal to the
         // given callee, 'NewInst' will be executed, otherwise the original 
call
@@ -2365,7 +2381,7 @@ bool DevirtModule::run() {
   Function *PublicTypeTestFunc = nullptr;
   // If we are in speculative devirtualization mode, we can work on the public
   // type test intrinsics.
-  if (ClDevirtualizeSpeculatively)
+  if (DevirtSpeculatively)
     PublicTypeTestFunc =
         Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
   Function *TypeTestFunc =
@@ -2501,7 +2517,7 @@ bool DevirtModule::run() {
       // Out of speculative devirtualization mode, Try to apply virtual 
constant
       // propagation or branch funneling.
       // TODO: This should eventually be enabled for non-public type tests.
-      if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) {
+      if (!SingleImplDevirt && !DevirtSpeculatively) {
         DidVirtualConstProp |=
             tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
 

diff  --git 
a/llvm/test/Transforms/PhaseOrdering/speculative-devirt-then-inliner.ll 
b/llvm/test/Transforms/PhaseOrdering/speculative-devirt-then-inliner.ll
new file mode 100644
index 0000000000000..98df729696de9
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/speculative-devirt-then-inliner.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S -O3 -enable-devirtualize-speculatively %s 2>&1 | FileCheck %s
+
+; Test that the devirtualized calls are inlined.
+
+@vt1 = constant [1 x ptr] [ptr @vf], !type !0
+@vt2 = constant [1 x ptr] [ptr @vf2], !type !1
+
+
+define i1 @vf(ptr %this) {
+  ret i1 true
+}
+
+define i1 @vf2(ptr %this) {
+  ret i1 false
+}
+
+; CHECK: define i1 @call
+define i1 @call(ptr %obj) #1 {
+  %vtable = load ptr, ptr %obj
+  %p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid")
+  call void @llvm.assume(i1 %p)
+  %fptr = load ptr, ptr %vtable
+  ; if.true.direct_targ:                              ; preds = %0
+  ;   br label %if.end.icp
+  ; if.false.orig_indirect:                           ; preds = %0
+  ;   %res = tail call i1 %fptr(ptr nonnull %obj)
+  ;   br label %if.end.icp
+  ; if.end.icp:                                       ; preds = 
%if.false.orig_indirect, %if.true.direct_targ
+  ;   %2 = phi i1 [ %res, %if.false.orig_indirect ], [ true, 
%if.true.direct_targ ]
+  ;   ret i1 %2
+  %res = call i1 %fptr(ptr %obj)
+  ret i1 %res
+}
+
+
+; CHECK: define i1 @call1
+define i1 @call1(ptr %obj) #1 {
+  %vtable = load ptr, ptr %obj
+  %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1")
+  call void @llvm.assume(i1 %p)
+  %fptr = load ptr, ptr %vtable, align 8
+  ; if.true.direct_targ:                              ; preds = %0
+  ;   br label %if.end.icp
+  ; if.false.orig_indirect:                           ; preds = %0
+  ;   %res = tail call i1 %fptr(ptr nonnull %obj)
+  ;   br label %if.end.icp
+  ; if.end.icp:                                       ; preds = 
%if.false.orig_indirect, %if.true.direct_targ
+  ;   %2 = phi i1 [ %res, %if.false.orig_indirect ], [ false, 
%if.true.direct_targ ]
+  ;   ret i1 %2
+  %res = call i1 %fptr(ptr %obj)
+  ret i1 %res
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare i1 @llvm.public.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
+!1 = !{i32 0, !"typeid1"}

diff  --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll 
b/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll
new file mode 100644
index 0000000000000..d8781d5686b53
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll
@@ -0,0 +1,64 @@
+; Test that the needed intrinsics for devirtualization are preserved and not 
dropped by other
+; optimizations.
+
+; RUN: opt -S -O3 %s 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = constant [1 x ptr] [ptr @vf], !type !8
+@vt2 = constant [1 x ptr] [ptr @vf2], !type !12
+
+define i1 @vf(ptr %this) #0 !dbg !7 {
+  ret i1 true
+}
+
+define i1 @vf2(ptr %this) !dbg !11 {
+  ret i1 false
+}
+
+define void @call(ptr %obj) #1 !dbg !5 {
+  %vtable = load ptr, ptr %obj
+  ; CHECK: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ 
]*]], metadata !"typeid")
+  ; CHECK-NEXT: call void @llvm.assume(i1 [[P]])
+  %p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid")
+  call void @llvm.assume(i1 %p)
+  %fptr = load ptr, ptr %vtable
+  call i1 %fptr(ptr %obj), !dbg !6
+  ret void
+}
+
+define void @call1(ptr %obj) #1 !dbg !9 {
+  %vtable = load ptr, ptr %obj
+  ; CHECK: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], 
metadata !"typeid1")
+  ; CHECK-NEXT: call void @llvm.assume(i1 [[P]])
+  %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1")
+  call void @llvm.assume(i1 %p)
+  %fptr = load ptr, ptr %vtable, align 8
+  %1 = call i1 %fptr(ptr %obj), !dbg !10
+  ret void
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare i1 @llvm.public.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, 
producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, 
runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "devirt-single.cc", directory: ".")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{!"clang version 4.0.0 (trunk 278098)"}
+!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, 
file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: 
DIFlagPrototyped, isOptimized: false, unit: !0)
+!6 = !DILocation(line: 30, column: 32, scope: !5)
+!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEb", scope: 
!1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, 
flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!8 = !{i32 0, !"typeid"}
+
+!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: 
!1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, 
flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!10 = !DILocation(line: 35, column: 32, scope: !9)
+!11 = distinct !DISubprogram(name: "vf2", linkageName: "_ZN3vt13vf2Eb", scope: 
!1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, 
flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!12 = !{i32 0, !"typeid1"}
+


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to