Index: llvm/test/Transforms/Internalize/vcall-visibility.ll
--- /dev/null
+++ llvm/test/Transforms/Internalize/vcall-visibility.ll
@@ -0,0 +1,64 @@
+; RUN: opt < %s -internalize -S | FileCheck %s
+%struct.A = type { i32 (...)** }
+%struct.B = type { i32 (...)** }
+%struct.C = type { i32 (...)** }
+; Class A has default visibility, so has no !vcall_visibility metadata before
+; or after LTO.
+; CHECK-NOT: @_ZTV1A = {{.*}}!vcall_visibility
+@_ZTV1A = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1
+; Class B has hidden visibility but public LTO visibility, so has no
+; !vcall_visibility metadata before or after LTO.
+; CHECK-NOT: @_ZTV1B = {{.*}}!vcall_visibility
+@_ZTV1B = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !2, !type !3
+; Class C has hidden visibility, so the !vcall_visibility metadata is set to 1
+; (linkage unit) before LTO, and 2 (translation unit) after LTO.
+; CHECK: @_ZTV1C ={{.*}}!vcall_visibility [[MD_TU_VIS:![0-9]+]]
+@_ZTV1C = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)] }, align 8, !type !4, !type !5, !vcall_visibility !6
+; Class D has translation unit visibility before LTO, and this is not changed
+; by LTO.
+; CHECK: @_ZTVN12_GLOBAL__N_11DE = {{.*}}!vcall_visibility [[MD_TU_VIS:![0-9]+]]
+@_ZTVN12_GLOBAL__N_11DE = internal unnamed_addr constant { [3 x i8*] } zeroinitializer, align 8, !type !7, !type !9, !vcall_visibility !11
+define dso_local void @_ZN1A3fooEv(%struct.A* nocapture %this) {
+  ret void
+define hidden void @_ZN1B3fooEv(%struct.B* nocapture %this) {
+  ret void
+define hidden void @_ZN1C3fooEv(%struct.C* nocapture %this) {
+  ret void
+define hidden noalias nonnull i8* @_Z6make_dv() {
+  %call = tail call i8* @_Znwm(i64 8) #3
+  %0 = bitcast i8* %call to i32 (...)***
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN12_GLOBAL__N_11DE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret i8* %call
+declare dso_local noalias nonnull i8* @_Znwm(i64)
+; CHECK: [[MD_TU_VIS]] = !{i64 2}
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"}
+!2 = !{i64 16, !"_ZTS1B"}
+!3 = !{i64 16, !"_ZTSM1BFvvE.virtual"}
+!4 = !{i64 16, !"_ZTS1C"}
+!5 = !{i64 16, !"_ZTSM1CFvvE.virtual"}
+!6 = !{i64 1}
+!7 = !{i64 16, !8}
+!8 = distinct !{}
+!9 = !{i64 16, !10}
+!10 = distinct !{}
+!11 = !{i64 2}
Index: llvm/test/Transforms/GlobalDCE/vtable-rtti.ll
--- /dev/null
+++ llvm/test/Transforms/GlobalDCE/vtable-rtti.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -globaldce -S | FileCheck %s
+; We currently only use llvm.type.checked.load for virtual function pointers,
+; not any other part of the vtable, so we can't remove the RTTI pointer even if
+; it's never going to be loaded from.
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+%struct.A = type { i32 (...)** }
+; CHECK: @_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* null] }, align 8, !type !0, !type !1, !vcall_visibility !2
+@_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2
+@_ZTS1A = hidden constant [3 x i8] c"1A\00", align 1
+@_ZTI1A = hidden constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }, align 8
+define internal void @_ZN1AC2Ev(%struct.A* %this) {
+  %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+; CHECK-NOT: define {{.*}} @_ZN1A3fooEv(
+define internal void @_ZN1A3fooEv(%struct.A* nocapture %this) {
+  ret void
+define dso_local i8* @_Z6make_Av() {
+  %call = tail call i8* @_Znwm(i64 8)
+  %0 = bitcast i8* %call to %struct.A*
+  tail call void @_ZN1AC2Ev(%struct.A* %0)
+  ret i8* %call
+declare dso_local noalias nonnull i8* @_Znwm(i64)
+@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8*
+!llvm.module.flags = !{!3}
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"}
+!2 = !{i64 2} ; translation-unit vcall visibility
+!3 = !{i32 1, !"LTOPostLink", i32 1}
Index: llvm/test/Transforms/GlobalDCE/virtual-functions.ll
--- /dev/null
+++ llvm/test/Transforms/GlobalDCE/virtual-functions.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -globaldce -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+declare dso_local noalias nonnull i8* @_Znwm(i64)
+declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata)
+; %struct.A is a C++ struct with two virtual functions, A::foo and A::bar. The
+; !vcall_visibility metadata is set on the vtable, so we know that all virtual
+; calls through this vtable are visible and use the @llvm.type.checked.load
+; intrinsic. Function test_A makes a call to A::foo, but there is no call to
+; A::bar anywhere, so A::bar can be deleted, and its vtable slot replaced with
+; null.
+%struct.A = type { i32 (...)** }
+; The pointer to A::bar in the vtable can be removed, because it will never be
+; loaded. We replace it with null to keep the layout the same. Because it is at
+; the end of the vtable we could potentially shrink the vtable, but don't
+; currently do that.
+; CHECK: @_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* null] }
+@_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3barEv to i8*)] }, align 8, !type !0, !type !1, !type !2, !vcall_visibility !3
+; A::foo is called, so must be retained.
+; CHECK: define internal i32 @_ZN1A3fooEv(
+define internal i32 @_ZN1A3fooEv(%struct.A* nocapture readnone %this) {
+  ret i32 42
+; A::bar is not used, so can be deleted.
+; CHECK-NOT: define internal i32 @_ZN1A3barEv(
+define internal i32 @_ZN1A3barEv(%struct.A* nocapture readnone %this) {
+  ret i32 1337
+define dso_local i32 @test_A() {
+  %call = tail call i8* @_Znwm(i64 8)
+  %0 = bitcast i8* %call to %struct.A*
+  %1 = bitcast i8* %call to i32 (...)***
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %1, align 8
+  %2 = tail call { i8*, i1 } @llvm.type.checked.load(i8* bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i8*), i32 0, metadata !"_ZTS1A"), !nosanitize !9
+  %3 = extractvalue { i8*, i1 } %2, 0, !nosanitize !9
+  %4 = bitcast i8* %3 to i32 (%struct.A*)*, !nosanitize !9
+  %call1 = tail call i32 %4(%struct.A* nonnull %0)
+  ret i32 %call1
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTSM1AFivE.virtual"}
+!2 = !{i64 24, !"_ZTSM1AFivE.virtual"}
+!3 = !{i64 2}
+!9 = !{}
Index: llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-pre-lto.ll
--- /dev/null
+++ llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-pre-lto.ll
@@ -0,0 +1,94 @@
+; RUN: opt < %s -globaldce -S | FileCheck %s
+; structs A, B and C have vcall_visibility of public, linkage-unit and
+; translation-unit respectively. This test is run before LTO linking occurs
+; (the LTOPostLink metadata is not present), so only C can be VFE'd.
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+%struct.A = type { i32 (...)** }
+@_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2
+define internal void @_ZN1AC2Ev(%struct.A* %this) {
+  %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+; CHECK: define {{.*}} @_ZN1A3fooEv(
+define internal void @_ZN1A3fooEv(%struct.A* nocapture %this) {
+  ret void
+define dso_local i8* @_Z6make_Av() {
+  %call = tail call i8* @_Znwm(i64 8)
+  %0 = bitcast i8* %call to %struct.A*
+  tail call void @_ZN1AC2Ev(%struct.A* %0)
+  ret i8* %call
+%struct.B = type { i32 (...)** }
+@_ZTV1B = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !3
+define internal void @_ZN1BC2Ev(%struct.B* %this) {
+  %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+; CHECK: define {{.*}} @_ZN1B3fooEv(
+define internal void @_ZN1B3fooEv(%struct.B* nocapture %this) {
+  ret void
+define dso_local i8* @_Z6make_Bv() {
+  %call = tail call i8* @_Znwm(i64 8)
+  %0 = bitcast i8* %call to %struct.B*
+  tail call void @_ZN1BC2Ev(%struct.B* %0)
+  ret i8* %call
+%struct.C = type { i32 (...)** }
+@_ZTV1C = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !4
+define internal void @_ZN1CC2Ev(%struct.C* %this) {
+  %0 = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1C, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+; CHECK-NOT: define {{.*}} @_ZN1C3fooEv(
+define internal void @_ZN1C3fooEv(%struct.C* nocapture %this) {
+  ret void
+define dso_local i8* @_Z6make_Cv() {
+  %call = tail call i8* @_Znwm(i64 8)
+  %0 = bitcast i8* %call to %struct.C*
+  tail call void @_ZN1CC2Ev(%struct.C* %0)
+  ret i8* %call
+declare dso_local noalias nonnull i8* @_Znwm(i64)
+!llvm.module.flags = !{}
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"}
+!2 = !{i64 0} ; public vcall visibility
+!3 = !{i64 1} ; linkage-unit vcall visibility
+!4 = !{i64 2} ; translation-unit vcall visibility
Index: llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll
--- /dev/null
+++ llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll
@@ -0,0 +1,95 @@
+; RUN: opt < %s -globaldce -S | FileCheck %s
+; structs A, B and C have vcall_visibility of public, linkage-unit and
+; translation-unit respectively. This test is run after LTO linking (the
+; LTOPostLink metadata is present), so B and C can be VFE'd.
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+%struct.A = type { i32 (...)** }
+@_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2
+define internal void @_ZN1AC2Ev(%struct.A* %this) {
+  %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+; CHECK: define {{.*}} @_ZN1A3fooEv(
+define internal void @_ZN1A3fooEv(%struct.A* nocapture %this) {
+  ret void
+define dso_local i8* @_Z6make_Av() {
+  %call = tail call i8* @_Znwm(i64 8)
+  %0 = bitcast i8* %call to %struct.A*
+  tail call void @_ZN1AC2Ev(%struct.A* %0)
+  ret i8* %call
+%struct.B = type { i32 (...)** }
+@_ZTV1B = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !3
+define internal void @_ZN1BC2Ev(%struct.B* %this) {
+  %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+; CHECK-NOT: define {{.*}} @_ZN1B3fooEv(
+define internal void @_ZN1B3fooEv(%struct.B* nocapture %this) {
+  ret void
+define dso_local i8* @_Z6make_Bv() {
+  %call = tail call i8* @_Znwm(i64 8)
+  %0 = bitcast i8* %call to %struct.B*
+  tail call void @_ZN1BC2Ev(%struct.B* %0)
+  ret i8* %call
+%struct.C = type { i32 (...)** }
+@_ZTV1C = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !4
+define internal void @_ZN1CC2Ev(%struct.C* %this) {
+  %0 = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1C, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+; CHECK-NOT: define {{.*}} @_ZN1C3fooEv(
+define internal void @_ZN1C3fooEv(%struct.C* nocapture %this) {
+  ret void
+define dso_local i8* @_Z6make_Cv() {
+  %call = tail call i8* @_Znwm(i64 8)
+  %0 = bitcast i8* %call to %struct.C*
+  tail call void @_ZN1CC2Ev(%struct.C* %0)
+  ret i8* %call
+declare dso_local noalias nonnull i8* @_Znwm(i64)
+!llvm.module.flags = !{!5}
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"}
+!2 = !{i64 0} ; public vcall visibility
+!3 = !{i64 1} ; linkage-unit vcall visibility
+!4 = !{i64 2} ; translation-unit vcall visibility
+!5 = !{i32 1, !"LTOPostLink", i32 1}
Index: llvm/test/Transforms/GlobalDCE/virtual-functions-derived-pointer-call.ll
--- /dev/null
+++ llvm/test/Transforms/GlobalDCE/virtual-functions-derived-pointer-call.ll
@@ -0,0 +1,120 @@
+; RUN: opt < %s -globaldce -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+; struct A {
+;   A();
+;   virtual int foo(int);
+;   virtual int bar(float);
+; };
+; struct B : A {
+;   B();
+;   virtual int foo(int);
+;   virtual int bar(float);
+; };
+; A::A() {}
+; B::B() {}
+; int A::foo(int)   { return 1; }
+; int A::bar(float) { return 2; }
+; int B::foo(int)   { return 3; }
+; int B::bar(float) { return 4; }
+; extern "C" int test(B *p, int (B::*q)(int)) { return (p->*q)(42); }
+; Member function pointers are tracked by the combination of their object type
+; and function type, which must both be compatible. Here, the call is through a
+; pointer of type "int (B::*q)(int)", so the call could only be dispatched to
+; B::foo. It can't be dispatched to A::bar or B::bar as the function pointer
+; does not match, and it can't be dispatched to A::foo as the object type
+; doesn't match, so those can be removed.
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+; CHECK: @_ZTV1A = internal unnamed_addr constant { [4 x i8*] } zeroinitializer
+@_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A3fooEi to i8*), i8* bitcast (i32 (%struct.A*, float)* @_ZN1A3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !vcall_visibility !3
+; CHECK: @_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* null] }
+@_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* bitcast (i32 (%struct.B*, float)* @_ZN1B3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !type !4, !type !5, !type !6, !vcall_visibility !3
+; CHECK-NOT: define internal i32 @_ZN1A3fooEi(
+define internal i32 @_ZN1A3fooEi(%struct.A* nocapture readnone %this, i32) unnamed_addr #1 align 2 {
+  ret i32 1
+; CHECK-NOT: define internal i32 @_ZN1A3barEf(
+define internal i32 @_ZN1A3barEf(%struct.A* nocapture readnone %this, float) unnamed_addr #1 align 2 {
+  ret i32 2
+; CHECK: define internal i32 @_ZN1B3fooEi(
+define internal i32 @_ZN1B3fooEi(%struct.B* nocapture readnone %this, i32) unnamed_addr #1 align 2 {
+  ret i32 3
+; CHECK-NOT: define internal i32 @_ZN1B3barEf(
+define internal i32 @_ZN1B3barEf(%struct.B* nocapture readnone %this, float) unnamed_addr #1 align 2 {
+  ret i32 4
+define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) {
+  %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) {
+  %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+define hidden i32 @test(%struct.B* %p, i64 %q.coerce0, i64 %q.coerce1) {
+  %0 = bitcast %struct.B* %p to i8*
+  %1 = getelementptr inbounds i8, i8* %0, i64 %q.coerce1
+  %this.adjusted = bitcast i8* %1 to %struct.B*
+  %2 = and i64 %q.coerce0, 1
+  %memptr.isvirtual = icmp eq i64 %2, 0
+  br i1 %memptr.isvirtual, label %memptr.nonvirtual, label %memptr.virtual
+memptr.virtual:                                   ; preds = %entry
+  %3 = bitcast i8* %1 to i8**
+  %vtable = load i8*, i8** %3, align 8
+  %4 = add i64 %q.coerce0, -1
+  %5 = getelementptr i8, i8* %vtable, i64 %4, !nosanitize !12
+  %6 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %5, i32 0, metadata !"_ZTSM1BFiiE.virtual"), !nosanitize !12
+  %7 = extractvalue { i8*, i1 } %6, 0, !nosanitize !12
+  %memptr.virtualfn = bitcast i8* %7 to i32 (%struct.B*, i32)*, !nosanitize !12
+  br label %memptr.end
+memptr.nonvirtual:                                ; preds = %entry
+  %memptr.nonvirtualfn = inttoptr i64 %q.coerce0 to i32 (%struct.B*, i32)*
+  br label %memptr.end
+memptr.end:                                       ; preds = %memptr.nonvirtual, %memptr.virtual
+  %8 = phi i32 (%struct.B*, i32)* [ %memptr.virtualfn, %memptr.virtual ], [ %memptr.nonvirtualfn, %memptr.nonvirtual ]
+  %call = tail call i32 %8(%struct.B* %this.adjusted, i32 42)
+  ret i32 %call
+declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata)
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTSM1AFiiE.virtual"}
+!2 = !{i64 24, !"_ZTSM1AFifE.virtual"}
+!3 = !{i64 2}
+!4 = !{i64 16, !"_ZTS1B"}
+!5 = !{i64 16, !"_ZTSM1BFiiE.virtual"}
+!6 = !{i64 24, !"_ZTSM1BFifE.virtual"}
+!12 = !{}
Index: llvm/test/Transforms/GlobalDCE/virtual-functions-derived-call.ll
--- /dev/null
+++ llvm/test/Transforms/GlobalDCE/virtual-functions-derived-call.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -globaldce -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+; struct A {
+;   A();
+;   virtual int foo();
+; };
+; struct B : A {
+;   B();
+;   virtual int foo();
+; };
+; A::A() {}
+; B::B() {}
+; int A::foo() { return 42; }
+; int B::foo() { return 1337; }
+; extern "C" int test(B *p) { return p->foo(); }
+; The virtual call in test can only be dispatched to B::foo (or a more-derived
+; class, if there was one), so A::foo can be removed.
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+; CHECK: @_ZTV1A = internal unnamed_addr constant { [3 x i8*] } zeroinitializer
+@_ZTV1A = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2
+; CHECK: @_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] }
+@_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !type !3, !type !4, !vcall_visibility !2
+; CHECK-NOT: define internal i32 @_ZN1A3fooEv(
+define internal i32 @_ZN1A3fooEv(%struct.A* nocapture readnone %this) {
+  ret i32 42
+; CHECK: define internal i32 @_ZN1B3fooEv(
+define internal i32 @_ZN1B3fooEv(%struct.B* nocapture readnone %this) {
+  ret i32 1337
+define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) {
+  %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) {
+  %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+define hidden i32 @test(%struct.B* %p) {
+  %0 = bitcast %struct.B* %p to i8**
+  %vtable1 = load i8*, i8** %0, align 8
+  %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable1, i32 0, metadata !"_ZTS1B"), !nosanitize !10
+  %2 = extractvalue { i8*, i1 } %1, 0, !nosanitize !10
+  %3 = bitcast i8* %2 to i32 (%struct.B*)*, !nosanitize !10
+  %call = tail call i32 %3(%struct.B* %p)
+  ret i32 %call
+declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) #2
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTSM1AFivE.virtual"}
+!2 = !{i64 2}
+!3 = !{i64 16, !"_ZTS1B"}
+!4 = !{i64 16, !"_ZTSM1BFivE.virtual"}
+!10 = !{}
Index: llvm/test/Transforms/GlobalDCE/virtual-functions-base-pointer-call.ll
--- /dev/null
+++ llvm/test/Transforms/GlobalDCE/virtual-functions-base-pointer-call.ll
@@ -0,0 +1,118 @@
+; RUN: opt < %s -globaldce -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+; struct A {
+;   A();
+;   virtual int foo(int);
+;   virtual int bar(float);
+; };
+; struct B : A {
+;   B();
+;   virtual int foo(int);
+;   virtual int bar(float);
+; };
+; A::A() {}
+; B::B() {}
+; int A::foo(int)   { return 1; }
+; int A::bar(float) { return 2; }
+; int B::foo(int)   { return 3; }
+; int B::bar(float) { return 4; }
+; extern "C" int test(A *p, int (A::*q)(int)) { return (p->*q)(42); }
+; Member function pointers are tracked by the combination of their object type
+; and function type, which must both be compatible. Here, the call is through a
+; pointer of type "int (A::*q)(int)", so the call could be dispatched to A::foo
+; or B::foo. It can't be dispatched to A::bar or B::bar as the function pointer
+; does not match, so those can be removed.
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+; CHECK: @_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A3fooEi to i8*), i8* null] }
+@_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A3fooEi to i8*), i8* bitcast (i32 (%struct.A*, float)* @_ZN1A3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !vcall_visibility !3
+; CHECK: @_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* null] }
+@_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* bitcast (i32 (%struct.B*, float)* @_ZN1B3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !type !4, !type !5, !type !6, !vcall_visibility !3
+; CHECK: define internal i32 @_ZN1A3fooEi(
+define internal i32 @_ZN1A3fooEi(%struct.A* nocapture readnone %this, i32) unnamed_addr #1 align 2 {
+  ret i32 1
+; CHECK-NOT: define internal i32 @_ZN1A3barEf(
+define internal i32 @_ZN1A3barEf(%struct.A* nocapture readnone %this, float) unnamed_addr #1 align 2 {
+  ret i32 2
+; CHECK: define internal i32 @_ZN1B3fooEi(
+define internal i32 @_ZN1B3fooEi(%struct.B* nocapture readnone %this, i32) unnamed_addr #1 align 2 {
+  ret i32 3
+; CHECK-NOT: define internal i32 @_ZN1B3barEf(
+define internal i32 @_ZN1B3barEf(%struct.B* nocapture readnone %this, float) unnamed_addr #1 align 2 {
+  ret i32 4
+define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) {
+  %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) {
+  %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+define hidden i32 @test(%struct.A* %p, i64 %q.coerce0, i64 %q.coerce1) {
+  %0 = bitcast %struct.A* %p to i8*
+  %1 = getelementptr inbounds i8, i8* %0, i64 %q.coerce1
+  %this.adjusted = bitcast i8* %1 to %struct.A*
+  %2 = and i64 %q.coerce0, 1
+  %memptr.isvirtual = icmp eq i64 %2, 0
+  br i1 %memptr.isvirtual, label %memptr.nonvirtual, label %memptr.virtual
+memptr.virtual:                                   ; preds = %entry
+  %3 = bitcast i8* %1 to i8**
+  %vtable = load i8*, i8** %3, align 8
+  %4 = add i64 %q.coerce0, -1
+  %5 = getelementptr i8, i8* %vtable, i64 %4, !nosanitize !12
+  %6 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %5, i32 0, metadata !"_ZTSM1AFiiE.virtual"), !nosanitize !12
+  %7 = extractvalue { i8*, i1 } %6, 0, !nosanitize !12
+  %memptr.virtualfn = bitcast i8* %7 to i32 (%struct.A*, i32)*, !nosanitize !12
+  br label %memptr.end
+memptr.nonvirtual:                                ; preds = %entry
+  %memptr.nonvirtualfn = inttoptr i64 %q.coerce0 to i32 (%struct.A*, i32)*
+  br label %memptr.end
+memptr.end:                                       ; preds = %memptr.nonvirtual, %memptr.virtual
+  %8 = phi i32 (%struct.A*, i32)* [ %memptr.virtualfn, %memptr.virtual ], [ %memptr.nonvirtualfn, %memptr.nonvirtual ]
+  %call = tail call i32 %8(%struct.A* %this.adjusted, i32 42)
+  ret i32 %call
+declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata)
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTSM1AFiiE.virtual"}
+!2 = !{i64 24, !"_ZTSM1AFifE.virtual"}
+!3 = !{i64 2}
+!4 = !{i64 16, !"_ZTS1B"}
+!5 = !{i64 16, !"_ZTSM1BFiiE.virtual"}
+!6 = !{i64 24, !"_ZTSM1BFifE.virtual"}
+!12 = !{}
Index: llvm/test/Transforms/GlobalDCE/virtual-functions-base-call.ll
--- /dev/null
+++ llvm/test/Transforms/GlobalDCE/virtual-functions-base-call.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -globaldce -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+; struct A {
+;   A();
+;   virtual int foo();
+; };
+; struct B : A {
+;   B();
+;   virtual int foo();
+; };
+; A::A() {}
+; B::B() {}
+; int A::foo() { return 42; }
+; int B::foo() { return 1337; }
+; extern "C" int test(A *p) { return p->foo(); }
+; The virtual call in test could be dispatched to either A::foo or B::foo, so
+; both must be retained.
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+; CHECK: @_ZTV1A = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*)] }
+@_ZTV1A = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2
+; CHECK: @_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] }
+@_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !type !3, !type !4, !vcall_visibility !2
+; CHECK: define internal i32 @_ZN1A3fooEv(
+define internal i32 @_ZN1A3fooEv(%struct.A* nocapture readnone %this) {
+  ret i32 42
+; CHECK: define internal i32 @_ZN1B3fooEv(
+define internal i32 @_ZN1B3fooEv(%struct.B* nocapture readnone %this) {
+  ret i32 1337
+define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) {
+  %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) {
+  %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+define hidden i32 @test(%struct.A* %p) {
+  %0 = bitcast %struct.A* %p to i8**
+  %vtable1 = load i8*, i8** %0, align 8
+  %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable1, i32 0, metadata !"_ZTS1A"), !nosanitize !10
+  %2 = extractvalue { i8*, i1 } %1, 0, !nosanitize !10
+  %3 = bitcast i8* %2 to i32 (%struct.A*)*, !nosanitize !10
+  %call = tail call i32 %3(%struct.A* %p)
+  ret i32 %call
+declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) #2
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTSM1AFivE.virtual"}
+!2 = !{i64 2}
+!3 = !{i64 16, !"_ZTS1B"}
+!4 = !{i64 16, !"_ZTSM1BFivE.virtual"}
+!10 = !{}
Index: llvm/test/ThinLTO/X86/lazyload_metadata.ll
--- llvm/test/ThinLTO/X86/lazyload_metadata.ll
+++ llvm/test/ThinLTO/X86/lazyload_metadata.ll
@@ -10,13 +10,13 @@
 ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \
 ; RUN:          -o /dev/null -stats \
 ; RUN:  2>&1 | FileCheck %s -check-prefix=LAZY
-; LAZY: 61 bitcode-reader  - Number of Metadata records loaded
+; LAZY: 63 bitcode-reader  - Number of Metadata records loaded
 ; LAZY: 2 bitcode-reader  - Number of MDStrings loaded
 ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \
 ; RUN:          -o /dev/null -disable-ondemand-mds-loading -stats \
 ; RUN:  2>&1 | FileCheck %s -check-prefix=NOTLAZY
-; NOTLAZY: 70 bitcode-reader  - Number of Metadata records loaded
+; NOTLAZY: 72 bitcode-reader  - Number of Metadata records loaded
 ; NOTLAZY: 7 bitcode-reader  - Number of MDStrings loaded
Index: llvm/test/LTO/ARM/lto-linking-metadata.ll
--- /dev/null
+++ llvm/test/LTO/ARM/lto-linking-metadata.ll
@@ -0,0 +1,19 @@
+; RUN: opt %s -o %t1.bc
+; RUN: llvm-lto %t1.bc -o %t1.save.opt -save-merged-module -O1 --exported-symbol=foo
+; RUN: llvm-dis < %t1.save.opt.merged.bc | FileCheck %s
+; RUN: llvm-lto2 run %t1.bc -o %t.out.o -save-temps \
+; RUN:     -r=%t1.bc,foo,pxl
+; RUN: llvm-dis < %t.out.o.0.2.internalize.bc | FileCheck  %s
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7a-unknown-linux"
+define void @foo() {
+  ret void
+; CHECK: !llvm.module.flags = !{[[MD_NUM:![0-9]+]]}
+; CHECK: [[MD_NUM]] = !{i32 1, !"LTOPostLink", i32 1}
Index: llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
--- llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -496,7 +496,6 @@
   void buildTypeIdentifierMap(
       std::vector<VTableBits> &Bits,
       DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
-  Constant *getPointerAtOffset(Constant *I, uint64_t Offset);
   tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
                             const std::set<TypeMemberInfo> &TypeMemberInfos,
@@ -815,38 +814,6 @@
-Constant *DevirtModule::getPointerAtOffset(Constant *I, uint64_t Offset) {
-  if (I->getType()->isPointerTy()) {
-    if (Offset == 0)
-      return I;
-    return nullptr;
-  }
-  const DataLayout &DL = M.getDataLayout();
-  if (auto *C = dyn_cast<ConstantStruct>(I)) {
-    const StructLayout *SL = DL.getStructLayout(C->getType());
-    if (Offset >= SL->getSizeInBytes())
-      return nullptr;
-    unsigned Op = SL->getElementContainingOffset(Offset);
-    return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
-                              Offset - SL->getElementOffset(Op));
-  }
-  if (auto *C = dyn_cast<ConstantArray>(I)) {
-    ArrayType *VTableTy = C->getType();
-    uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType());
-    unsigned Op = Offset / ElemSize;
-    if (Op >= C->getNumOperands())
-      return nullptr;
-    return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
-                              Offset % ElemSize);
-  }
-  return nullptr;
 bool DevirtModule::tryFindVirtualCallTargets(
     std::vector<VirtualCallTarget> &TargetsForSlot,
     const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset) {
@@ -855,7 +822,7 @@
       return false;
     Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
-                                       TM.Offset + ByteOffset);
+                                       TM.Offset + ByteOffset, M);
     if (!Ptr)
       return false;
@@ -1943,6 +1910,12 @@
     for (VTableBits &B : Bits)
+  // We have lowered or deleted the type checked load intrinsics, so we no
+  // longer have enough information to reason about the liveness of virtual
+  // function pointers in GlobalDCE.
+  for (GlobalVariable &GV : M.globals())
+    GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
   return true;
Index: llvm/lib/Transforms/IPO/GlobalDCE.cpp
--- llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -17,9 +17,11 @@
 #include "llvm/Transforms/IPO/GlobalDCE.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Utils/CtorUtils.h"
@@ -29,10 +31,15 @@
 #define DEBUG_TYPE "globaldce"
+static cl::opt<bool>
+    ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore,
+                cl::desc("Enable virtual function elimination"));
 STATISTIC(NumAliases  , "Number of global aliases removed");
 STATISTIC(NumFunctions, "Number of functions removed");
 STATISTIC(NumIFuncs,    "Number of indirect functions removed");
 STATISTIC(NumVariables, "Number of global variables removed");
+STATISTIC(NumVFuncs,    "Number of virtual functions removed");
 namespace {
   class GlobalDCELegacyPass : public ModulePass {
@@ -118,6 +125,15 @@
     ComputeDependencies(User, Deps);
   Deps.erase(&GV); // Remove self-reference.
   for (GlobalValue *GVU : Deps) {
+    // If this is a dep from a vtable to a virtual function, and we have
+    // complete information about all virtual call sites which could call
+    // though this vtable, then skip it, because the call site information will
+    // be more precise.
+    if (VFESafeVTables.count(GVU) && isa<Function>(&GV)) {
+      LLVM_DEBUG(dbgs() << "Ignoring dep " << GVU->getName() << " -> "
+                        << GV.getName() << "\n");
+      continue;
+    }
@@ -132,12 +148,133 @@
   if (Updates)
   if (Comdat *C = GV.getComdat()) {
-    for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+    for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
       MarkLive(*CM.second, Updates); // Recursion depth is only two because only
                                      // globals in the same comdat are visited.
+    }
+  }
+void GlobalDCEPass::ScanVTables(Module &M) {
+  SmallVector<MDNode *, 2> Types;
+  LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n");
+  auto *LTOPostLinkMD =
+      cast_or_null<ConstantAsMetadata>(M.getModuleFlag("LTOPostLink"));
+  bool LTOPostLink =
+      LTOPostLinkMD &&
+      (cast<ConstantInt>(LTOPostLinkMD->getValue())->getZExtValue() != 0);
+  for (GlobalVariable &GV : M.globals()) {
+    Types.clear();
+    GV.getMetadata(LLVMContext::MD_type, Types);
+    if (GV.isDeclaration() || Types.empty())
+      continue;
+    // Use the typeid metadata on the vtable to build a mapping from typeids to
+    // the list of (GV, offset) pairs which are the possible vtables for that
+    // typeid.
+    for (MDNode *Type : Types) {
+      Metadata *TypeID = Type->getOperand(1).get();
+      uint64_t Offset =
+          cast<ConstantInt>(
+              cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+              ->getZExtValue();
+      TypeIdMap[TypeID].insert(std::make_pair(&GV, Offset));
+    }
+    // If the type corresponding to the vtable is private to this translation
+    // unit, we know that we can see all virtual functions which might use it,
+    // so VFE is safe.
+    if (auto GO = dyn_cast<GlobalObject>(&GV)) {
+      GlobalObject::VCallVisibility TypeVis = GV.getVCallVisibility();
+      if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit ||
+          (LTOPostLink &&
+           TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) {
+        LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n");
+        VFESafeVTables.insert(&GV);
+      }
+    }
+  }
+void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId,
+                                   uint64_t CallOffset) {
+  for (auto &VTableInfo : TypeIdMap[TypeId]) {
+    GlobalVariable *VTable = VTableInfo.first;
+    uint64_t VTableOffset = VTableInfo.second;
+    Constant *Ptr =
+        getPointerAtOffset(VTable->getInitializer(), VTableOffset + CallOffset,
+                           *Caller->getParent());
+    if (!Ptr) {
+      LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n");
+      VFESafeVTables.erase(VTable);
+      return;
+    }
+    auto Callee = dyn_cast<Function>(Ptr->stripPointerCasts());
+    if (!Callee) {
+      LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n");
+      VFESafeVTables.erase(VTable);
+      return;
+    }
+    LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> "
+                      << Callee->getName() << "\n");
+    GVDependencies[Caller].insert(Callee);
+void GlobalDCEPass::ScanTypeCheckedLoadIntrinsics(Module &M) {
+  LLVM_DEBUG(dbgs() << "Scanning type.checked.load intrinsics\n");
+  Function *TypeCheckedLoadFunc =
+      M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
+  if (!TypeCheckedLoadFunc)
+    return;
+  for (auto U : TypeCheckedLoadFunc->users()) {
+    auto CI = dyn_cast<CallInst>(U);
+    if (!CI)
+      continue;
+    auto *Offset = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    Value *TypeIdValue = CI->getArgOperand(2);
+    auto *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
+    if (Offset) {
+      ScanVTableLoad(CI->getFunction(), TypeId, Offset->getZExtValue());
+    } else {
+      // type.checked.load with a non-constant offset, so assume every entry in
+      // every matching vtable is used.
+      for (auto &VTableInfo : TypeIdMap[TypeId]) {
+        VFESafeVTables.erase(VTableInfo.first);
+      }
+    }
+  }
+void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) {
+  if (!ClEnableVFE)
+    return;
+  ScanVTables(M);
+  if (VFESafeVTables.empty())
+    return;
+  ScanTypeCheckedLoadIntrinsics(M);
+    dbgs() << "VFE safe vtables:\n";
+    for (auto *VTable : VFESafeVTables)
+      dbgs() << "  " << VTable->getName() << "\n";
+  );
 PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
   bool Changed = false;
@@ -163,6 +300,10 @@
     if (Comdat *C = GA.getComdat())
       ComdatMembers.insert(std::make_pair(C, &GA));
+  // Add dependencies between virtual call sites and the virtual functions they
+  // might call, if we have that information.
+  AddVirtualFunctionDependencies(M);
   // Loop over the module, adding globals which are obviously necessary.
   for (GlobalObject &GO : M.global_objects()) {
     Changed |= RemoveUnusedGlobalValue(GO);
@@ -257,8 +398,17 @@
   NumFunctions += DeadFunctions.size();
-  for (Function *F : DeadFunctions)
+  for (Function *F : DeadFunctions) {
+    if (!F->use_empty()) {
+      // Virtual functions might still be referenced by one or more vtables,
+      // but if we've proven them to be unused then it's safe to replace the
+      // virtual function pointers with null, allowing us to remove the
+      // function itself.
+      ++NumVFuncs;
+      F->replaceAllUsesWith(ConstantPointerNull::get(F->getType()));
+    }
+  }
   NumVariables += DeadGlobalVars.size();
   for (GlobalVariable *GV : DeadGlobalVars)
@@ -277,6 +427,8 @@
+  TypeIdMap.clear();
+  VFESafeVTables.clear();
   if (Changed)
     return PreservedAnalyses::none();
Index: llvm/lib/LTO/LTOCodeGenerator.cpp
--- llvm/lib/LTO/LTOCodeGenerator.cpp
+++ llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -462,6 +462,8 @@
   internalizeModule(*MergedModule, mustPreserveGV);
+  MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1);
   ScopeRestrictionsDone = true;
Index: llvm/lib/LTO/LTO.cpp
--- llvm/lib/LTO/LTO.cpp
+++ llvm/lib/LTO/LTO.cpp
@@ -1004,6 +1004,8 @@
+    RegularLTO.CombinedModule->addModuleFlag(Module::Error, "LTOPostLink", 1);
     if (Conf.PostInternalizeModuleHook &&
         !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule))
       return Error::success();
Index: llvm/lib/IR/Metadata.cpp
--- llvm/lib/IR/Metadata.cpp
+++ llvm/lib/IR/Metadata.cpp
@@ -1497,6 +1497,24 @@
+void GlobalObject::addVCallVisibilityMetadata(VCallVisibility Visibility) {
+  addMetadata(LLVMContext::MD_vcall_visibility,
+              *MDNode::get(getContext(),
+                           {ConstantAsMetadata::get(ConstantInt::get(
+                               Type::getInt64Ty(getContext()), Visibility))}));
+GlobalObject::VCallVisibility GlobalObject::getVCallVisibility() const {
+  if (MDNode *MD = getMetadata(LLVMContext::MD_vcall_visibility)) {
+    uint64_t Val = cast<ConstantInt>(
+                       cast<ConstantAsMetadata>(MD->getOperand(0))->getValue())
+                       ->getZExtValue();
+    assert((Val >= 0 && Val <= 2) && "unknown vcall visibility!");
+    return (VCallVisibility)Val;
+  }
+  return VCallVisibility::VCallVisibilityPublic;
 void Function::setSubprogram(DISubprogram *SP) {
   setMetadata(LLVMContext::MD_dbg, SP);
Index: llvm/lib/Analysis/TypeMetadataUtils.cpp
--- llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -127,3 +127,35 @@
     findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr,
                               Offset->getZExtValue(), CI, DT);
+Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) {
+  if (I->getType()->isPointerTy()) {
+    if (Offset == 0)
+      return I;
+    return nullptr;
+  }
+  const DataLayout &DL = M.getDataLayout();
+  if (auto *C = dyn_cast<ConstantStruct>(I)) {
+    const StructLayout *SL = DL.getStructLayout(C->getType());
+    if (Offset >= SL->getSizeInBytes())
+      return nullptr;
+    unsigned Op = SL->getElementContainingOffset(Offset);
+    return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
+                              Offset - SL->getElementOffset(Op), M);
+  }
+  if (auto *C = dyn_cast<ConstantArray>(I)) {
+    ArrayType *VTableTy = C->getType();
+    uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType());
+    unsigned Op = Offset / ElemSize;
+    if (Op >= C->getNumOperands())
+      return nullptr;
+    return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
+                              Offset % ElemSize, M);
+  }
+  return nullptr;
Index: llvm/include/llvm/Transforms/IPO/GlobalDCE.h
--- llvm/include/llvm/Transforms/IPO/GlobalDCE.h
+++ llvm/include/llvm/Transforms/IPO/GlobalDCE.h
@@ -43,11 +43,25 @@
   /// Comdat -> Globals in that Comdat section.
   std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
+  /// !type metadata -> set of (vtable, offset) pairs
+  DenseMap<Metadata *, SmallSet<std::pair<GlobalVariable *, uint64_t>, 4>>
+      TypeIdMap;
+  // Global variables which are vtables, and which we have enough information
+  // about to safely do dead virtual function elimination.
+  SmallPtrSet<GlobalValue *, 32> VFESafeVTables;
   void UpdateGVDependencies(GlobalValue &GV);
   void MarkLive(GlobalValue &GV,
                 SmallVectorImpl<GlobalValue *> *Updates = nullptr);
   bool RemoveUnusedGlobalValue(GlobalValue &GV);
+  // Dead virtual function elimination.
+  void AddVirtualFunctionDependencies(Module &M);
+  void ScanVTables(Module &M);
+  void ScanTypeCheckedLoadIntrinsics(Module &M);
+  void ScanVTableLoad(Function *Caller, Metadata *TypeId, uint64_t CallOffset);
   void ComputeDependencies(Value *V, SmallPtrSetImpl<GlobalValue *> &U);
Index: llvm/include/llvm/IR/GlobalObject.h
--- llvm/include/llvm/IR/GlobalObject.h
+++ llvm/include/llvm/IR/GlobalObject.h
@@ -28,6 +28,20 @@
 class Metadata;
 class GlobalObject : public GlobalValue {
+  // VCallVisibility - values for visibility metadata attached to vtables. This
+  // describes the scope in which a virtual call could end up being dispatched
+  // through this vtable.
+  enum VCallVisibility {
+    // Type is potentially visible to external code.
+    VCallVisibilityPublic = 0,
+    // Type is only visible to code which will be in the current Module after
+    // LTO internalization.
+    VCallVisibilityLinkageUnit = 1,
+    // Type is only visible to code in the current Module.
+    VCallVisibilityTranslationUnit = 2,
+  };
   GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
                LinkageTypes Linkage, const Twine &Name,
@@ -163,6 +177,8 @@
   void copyMetadata(const GlobalObject *Src, unsigned Offset);
   void addTypeMetadata(unsigned Offset, Metadata *TypeID);
+  void addVCallVisibilityMetadata(VCallVisibility Visibility);
+  VCallVisibility getVCallVisibility() const;
   void copyAttributesFrom(const GlobalObject *Src);
Index: llvm/include/llvm/IR/FixedMetadataKinds.def
--- llvm/include/llvm/IR/FixedMetadataKinds.def
+++ llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -39,3 +39,4 @@
 LLVM_FIXED_MD_KIND(MD_access_group, "llvm.access.group", 25)
 LLVM_FIXED_MD_KIND(MD_callback, "callback", 26)
 LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27)
+LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 28)
Index: llvm/include/llvm/Analysis/TypeMetadataUtils.h
--- llvm/include/llvm/Analysis/TypeMetadataUtils.h
+++ llvm/include/llvm/Analysis/TypeMetadataUtils.h
@@ -50,6 +50,8 @@
     SmallVectorImpl<Instruction *> &LoadedPtrs,
     SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
     const CallInst *CI, DominatorTree &DT);
+Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M);
Index: llvm/docs/TypeMetadata.rst
--- llvm/docs/TypeMetadata.rst
+++ llvm/docs/TypeMetadata.rst
@@ -224,3 +224,67 @@
 .. _GlobalLayoutBuilder: https://github.com/llvm/llvm-project/blob/master/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h
+``!vcall_visibility`` Metadata
+In order to allow removing unused function pointers from vtables, we need to
+know whether every virtual call which could use it is known to the compiler, or
+whether another translation unit could introduce more calls through the vtable.
+This is not the same as the linkage of the vtable, because call sites could be
+using a pointer of a more widely-visible base class. For example, consider this
+.. code-block:: c++
+  __attribute__((visibility("default")))
+  struct A {
+    virtual void f();
+  };
+  __attribute__((visibility("hidden")))
+  struct B : A {
+    virtual void f();
+  };
+With LTO, we know that all code which can see the declaration of ``B`` is
+visible to us. However, a pointer to a ``B`` could be cast to ``A*`` and passed
+to another linkage unit, which could then call ``f`` on it. This call would
+load from the vtable for ``B`` (using the object pointer), and then call
+``B::f``. This means we can't remove the function pointer from ``B``'s vtable,
+or the implementation of ``B::f``. However, if we can see all code which knows
+about any dynamic base class (which would be the case if ``B`` only inherited
+from classes with hidden visibility), then this optimisation would be valid.
+This concept is represented in IR by the ``!vcall_visibility`` metadata
+attached to vtable objects, with the following values:
+.. list-table::
+   :header-rows: 1
+   :widths: 10 90
+   * - Value
+     - Behavior
+   * - 0 (or omitted)
+     - **Public**
+           Virtual function calls using this vtable could be made from external
+           code.
+   * - 1
+     - **Linkage Unit**
+           All virtual function calls which might use this vtable are in the
+           current LTO unit, meaning they will be in the current module once
+           LTO linking has been performed.
+   * - 2
+     - **Translation Unit**
+           All virtual function calls which might use this vtable are in the
+           current module.
+In addition, all function pointer loads from a vtable marked with the
+``!vcall_visibility`` metadata (with a non-zero value) must be done using the
+:ref:`llvm.type.checked.load <type.checked.load>` intrinsic, so that virtual
+calls sites can be correlated with the vtables which they might load from.
+Other parts of the vtable (RTTI, offset-to-top, ...) can still be accessed with
+normal loads.
Index: llvm/docs/LangRef.rst
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -6241,6 +6241,13 @@
     !0 = !{i32 1, !"short_wchar", i32 1}
     !1 = !{i32 1, !"short_enum", i32 0}
+LTO Post-Link Module Flags Metadata
+Some optimisations are only when the entire LTO unit is present in the current
+module. This is represented by the ``LTOPostLink`` module flags metadata, which
+will be created with a value of ``1`` when LTO linking occurs.
 Automatic Linker Flags Named Metadata
@@ -16520,6 +16527,8 @@
 The ``llvm.type.test`` intrinsic tests whether the given pointer is associated
 with the given type identifier.
+.. _type.checked.load:
 '``llvm.type.checked.load``' Intrinsic
Index: clang/test/Driver/virtual-function-elimination.cpp
--- /dev/null
+++ clang/test/Driver/virtual-function-elimination.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -### %s 2>&1 | FileCheck --check-prefix=BAD-LTO %s
+// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto=thin -### %s 2>&1 | FileCheck --check-prefix=BAD-LTO %s
+// BAD-LTO: invalid argument '-fvirtual-function-elimination' only allowed with '-flto=full'
+// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto -### %s 2>&1 | FileCheck --check-prefix=GOOD %s
+// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto=full -### %s 2>&1 | FileCheck --check-prefix=GOOD %s
+// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto -fwhole-program-vtables -### %s 2>&1 | FileCheck --check-prefix=GOOD %s
+// GOOD: "-fvirtual-function-elimination" "-fwhole-program-vtables"
+// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -fno-whole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=NO-WHOLE-PROGRAM-VTABLES %s
+// NO-WHOLE-PROGRAM-VTABLES: invalid argument '-fno-whole-program-vtables' not allowed with '-fvirtual-function-elimination'
Index: clang/test/CodeGenCXX/virtual-function-elimination.cpp
--- /dev/null
+++ clang/test/CodeGenCXX/virtual-function-elimination.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -flto -flto-unit -fvirtual-function-elimination -fwhole-program-vtables -emit-llvm -o - %s | FileCheck %s
+struct __attribute__((visibility("default"))) A {
+  virtual void foo();
+void test_1(A *p) {
+  // A has default visibility, so no need for type.checked.load.
+// CHECK-LABEL: define void @_Z6test_1P1A
+// CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** {{%.+}}, i64 0
+// CHECK: [[FN_PTR:%.+]] = load void (%struct.A*)*, void (%struct.A*)** [[FN_PTR_ADDR]]
+// CHECK: call void [[FN_PTR]](
+  p->foo();
+struct __attribute__((visibility("hidden"))) [[clang::lto_visibility_public]] B {
+  virtual void foo();
+void test_2(B *p) {
+  // B has public LTO visibility, so no need for type.checked.load.
+// CHECK-LABEL: define void @_Z6test_2P1B
+// CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr inbounds void (%struct.B*)*, void (%struct.B*)** {{%.+}}, i64 0
+// CHECK: [[FN_PTR:%.+]] = load void (%struct.B*)*, void (%struct.B*)** [[FN_PTR_ADDR]]
+// CHECK: call void [[FN_PTR]](
+  p->foo();
+struct __attribute__((visibility("hidden"))) C {
+  virtual void foo();
+  virtual void bar();
+void test_3(C *p) {
+  // C has hidden visibility, so we generate type.checked.load to allow VFE.
+// CHECK-LABEL: define void @_Z6test_3P1C
+// CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* {{%.+}}, i32 0, metadata !"_ZTS1C")
+// CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0
+// CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)*
+// CHECK: call void [[FN_PTR]](
+  p->foo();
+void test_4(C *p) {
+  // When using type.checked.load, we pass the vtable offset to the intrinsic,
+  // rather than adding it to the pointer with a GEP.
+// CHECK-LABEL: define void @_Z6test_4P1C
+// CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* {{%.+}}, i32 8, metadata !"_ZTS1C")
+// CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0
+// CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)*
+// CHECK: call void [[FN_PTR]](
+  p->bar();
+void test_5(C *p, void (C::*q)(void)) {
+  // We also use type.checked.load for the virtual side of member function
+  // pointer calls. We use a GEP to calculate the address to load from and pass
+  // 0 as the offset to the intrinsic, because we know that the load must be
+  // from exactly the point marked by one of the function-type metadatas (in
+  // this case "_ZTSM1CFvvE.virtual"). If we passed the offset from the member
+  // function pointer to the intrinsic, this information would be lost. No
+  // codegen changes on the non-virtual side.
+// CHECK-LABEL: define void @_Z6test_5P1CMS_FvvE(
+// CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr i8, i8* %vtable, i64 {{%.+}}
+// CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* [[FN_PTR_ADDR]], i32 0, metadata !"_ZTSM1CFvvE.virtual")
+// CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0
+// CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)*
+// CHECK: [[PHI:%.+]] = phi void (%struct.C*)* {{.*}}[ [[FN_PTR]], {{.*}} ]
+// CHECK: call void [[PHI]](
+  (p->*q)();
Index: clang/test/CodeGenCXX/vcall-visibility-metadata.cpp
--- /dev/null
+++ clang/test/CodeGenCXX/vcall-visibility-metadata.cpp
@@ -0,0 +1,88 @@
+// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -emit-llvm -fvirtual-function-elimination -fwhole-program-vtables -o - %s | FileCheck %s
+// Anonymous namespace.
+namespace {
+// CHECK: @_ZTVN12_GLOBAL__N_11AE = {{.*}} !vcall_visibility [[VIS_TU:![0-9]+]]
+struct A {
+  A() {}
+  virtual int f() { return 1; }
+void *construct_A() {
+  return new A();
+// Hidden visibility.
+// CHECK: @_ZTV1B = {{.*}} !vcall_visibility [[VIS_DSO:![0-9]+]]
+struct __attribute__((visibility("hidden"))) B {
+  B() {}
+  virtual int f() { return 1; }
+B *construct_B() {
+  return new B();
+// Default visibility.
+// CHECK-NOT: @_ZTV1C = {{.*}} !vcall_visibility
+struct __attribute__((visibility("default"))) C {
+  C() {}
+  virtual int f() { return 1; }
+C *construct_C() {
+  return new C();
+// Hidden visibility, public LTO visibility.
+// CHECK-NOT: @_ZTV1D = {{.*}} !vcall_visibility
+struct __attribute__((visibility("hidden"))) [[clang::lto_visibility_public]] D {
+  D() {}
+  virtual int f() { return 1; }
+D *construct_D() {
+  return new D();
+// Hidden visibility, but inherits from class with default visibility.
+// CHECK-NOT: @_ZTV1E = {{.*}} !vcall_visibility
+struct __attribute__((visibility("hidden"))) E : C {
+  E() {}
+  virtual int f() { return 1; }
+E *construct_E() {
+  return new E();
+// Anonymous namespace, but inherits from class with default visibility.
+// CHECK-NOT: @_ZTVN12_GLOBAL__N_11FE = {{.*}} !vcall_visibility
+namespace {
+struct __attribute__((visibility("hidden"))) F : C {
+  F() {}
+  virtual int f() { return 1; }
+void *construct_F() {
+  return new F();
+// Anonymous namespace, but inherits from class with hidden visibility.
+// CHECK: @_ZTVN12_GLOBAL__N_11GE = {{.*}} !vcall_visibility [[VIS_DSO:![0-9]+]]
+namespace {
+struct __attribute__((visibility("hidden"))) G : B {
+  G() {}
+  virtual int f() { return 1; }
+void *construct_G() {
+  return new G();
+// CHECK-DAG: [[VIS_DSO]] = !{i64 1}
+// CHECK-DAG: [[VIS_TU]] = !{i64 2}
Index: clang/lib/Frontend/CompilerInvocation.cpp
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -758,6 +758,8 @@
   Opts.CodeViewGHash = Args.hasArg(OPT_gcodeview_ghash);
   Opts.MacroDebugInfo = Args.hasArg(OPT_debug_info_macro);
   Opts.WholeProgramVTables = Args.hasArg(OPT_fwhole_program_vtables);
+  Opts.VirtualFunctionElimination =
+      Args.hasArg(OPT_fvirtual_function_elimination);
   Opts.LTOVisibilityPublicStd = Args.hasArg(OPT_flto_visibility_public_std);
   Opts.SplitDwarfFile = Args.getLastArgValue(OPT_split_dwarf_file);
   Opts.SplitDwarfOutput = Args.getLastArgValue(OPT_split_dwarf_output);
Index: clang/lib/Driver/ToolChains/Clang.cpp
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5432,9 +5432,30 @@
-  bool WholeProgramVTables =
-      Args.hasFlag(options::OPT_fwhole_program_vtables,
-                   options::OPT_fno_whole_program_vtables, false);
+  bool VirtualFunctionElimination =
+      Args.hasFlag(options::OPT_fvirtual_function_elimination,
+                   options::OPT_fno_virtual_function_elimination, false);
+  if (VirtualFunctionElimination) {
+    // VFE requires full LTO (currently, this might be relaxed to allow ThinLTO
+    // in the future).
+    if (D.getLTOMode() != LTOK_Full)
+      D.Diag(diag::err_drv_argument_only_allowed_with)
+          << "-fvirtual-function-elimination"
+          << "-flto=full";
+    CmdArgs.push_back("-fvirtual-function-elimination");
+  }
+  // VFE requires whole-program-vtables, and enables it by default.
+  bool WholeProgramVTables = Args.hasFlag(
+      options::OPT_fwhole_program_vtables,
+      options::OPT_fno_whole_program_vtables, VirtualFunctionElimination);
+  if (VirtualFunctionElimination && !WholeProgramVTables) {
+    D.Diag(diag::err_drv_argument_not_allowed_with)
+        << "-fno-whole-program-vtables"
+        << "-fvirtual-function-elimination";
+  }
   if (WholeProgramVTables) {
     if (!D.isUsingLTO())
Index: clang/lib/CodeGen/ItaniumCXXABI.cpp
--- clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -604,8 +604,6 @@
     VTableOffset = Builder.CreateTrunc(VTableOffset, CGF.Int32Ty);
     VTableOffset = Builder.CreateZExt(VTableOffset, CGM.PtrDiffTy);
-  // Compute the address of the virtual function pointer.
-  llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset);
   // Check the address of the function pointer if CFI on member function
   // pointers is enabled.
@@ -613,44 +611,81 @@
   llvm::Constant *CheckTypeDesc;
   bool ShouldEmitCFICheck = CGF.SanOpts.has(SanitizerKind::CFIMFCall) &&
-  if (ShouldEmitCFICheck) {
-    CodeGenFunction::SanitizerScope SanScope(&CGF);
-    CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc());
-    CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0));
-    llvm::Constant *StaticData[] = {
-        llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall),
-        CheckSourceLocation,
-        CheckTypeDesc,
-    };
-    llvm::Metadata *MD =
-        CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0));
-    llvm::Value *TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD);
+  bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination &&
+                           CGM.HasHiddenLTOVisibility(RD);
+  llvm::Value *VirtualFn = nullptr;
-    llvm::Value *TypeTest = Builder.CreateCall(
-        CGM.getIntrinsic(llvm::Intrinsic::type_test), {VFPAddr, TypeId});
+  {
+    CodeGenFunction::SanitizerScope SanScope(&CGF);
+    llvm::Value *TypeId = nullptr;
+    llvm::Value *CheckResult = nullptr;
+    if (ShouldEmitCFICheck || ShouldEmitVFEInfo) {
+      // If doing CFI or VFE, we will need the metadata node to check against.
+      llvm::Metadata *MD =
+          CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0));
+      TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD);
+    }
-    if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) {
-      CGF.EmitTrapCheck(TypeTest);
+    llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset);
+    if (ShouldEmitVFEInfo) {
+      // If doing VFE, load from the vtable with a type.checked.load intrinsic
+      // call. Note that we use the GEP to calculate the address to load from
+      // and pass 0 as the offset to the intrinsic. This is because every
+      // vtable slot of the correct type is marked with matching metadata, and
+      // we know that the load must be from one of these slots.
+      llvm::Value *CheckedLoad = Builder.CreateCall(
+          CGM.getIntrinsic(llvm::Intrinsic::type_checked_load),
+          {VFPAddr, llvm::ConstantInt::get(CGM.Int32Ty, 0), TypeId});
+      CheckResult = Builder.CreateExtractValue(CheckedLoad, 1);
+      VirtualFn = Builder.CreateExtractValue(CheckedLoad, 0);
+      VirtualFn = Builder.CreateBitCast(VirtualFn, FTy->getPointerTo(),
+                                        "memptr.virtualfn");
     } else {
-      llvm::Value *AllVtables = llvm::MetadataAsValue::get(
-          CGM.getLLVMContext(),
-          llvm::MDString::get(CGM.getLLVMContext(), "all-vtables"));
-      llvm::Value *ValidVtable = Builder.CreateCall(
-          CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables});
-      CGF.EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIMFCall),
-                    SanitizerHandler::CFICheckFail, StaticData,
-                    {VTable, ValidVtable});
+      // When not doing VFE, emit a normal load, as it allows more
+      // optimisations than type.checked.load.
+      if (ShouldEmitCFICheck) {
+        CheckResult = Builder.CreateCall(
+            CGM.getIntrinsic(llvm::Intrinsic::type_test),
+            {Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId});
+      }
+      VFPAddr =
+          Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo());
+      VirtualFn = Builder.CreateAlignedLoad(VFPAddr, CGF.getPointerAlign(),
+                                            "memptr.virtualfn");
+    assert(VirtualFn && "Virtual fuction pointer not created!");
+    assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || CheckResult) &&
+           "Check result required but not created!");
+    if (ShouldEmitCFICheck) {
+      // If doing CFI, emit the check.
+      CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc());
+      CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0));
+      llvm::Constant *StaticData[] = {
+          llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall),
+          CheckSourceLocation,
+          CheckTypeDesc,
+      };
-    FnVirtual = Builder.GetInsertBlock();
-  }
+      if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) {
+        CGF.EmitTrapCheck(CheckResult);
+      } else {
+        llvm::Value *AllVtables = llvm::MetadataAsValue::get(
+            CGM.getLLVMContext(),
+            llvm::MDString::get(CGM.getLLVMContext(), "all-vtables"));
+        llvm::Value *ValidVtable = Builder.CreateCall(
+            CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables});
+        CGF.EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIMFCall),
+                      SanitizerHandler::CFICheckFail, StaticData,
+                      {VTable, ValidVtable});
+      }
+      FnVirtual = Builder.GetInsertBlock();
+    }
+  } // End of sanitizer scope
-  // Load the virtual function to call.
-  VFPAddr = Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo());
-  llvm::Value *VirtualFn = Builder.CreateAlignedLoad(
-      VFPAddr, CGF.getPointerAlign(), "memptr.virtualfn");
   // In the non-virtual path, the function pointer is actually a
@@ -1594,7 +1629,7 @@
   if (!VTable->isDeclarationForLinker())
-    CGM.EmitVTableTypeMetadata(VTable, VTLayout);
+    CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout);
 bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField(
Index: clang/lib/CodeGen/CodeGenModule.h
--- clang/lib/CodeGen/CodeGenModule.h
+++ clang/lib/CodeGen/CodeGenModule.h
@@ -1275,8 +1275,16 @@
   /// optimization.
   bool HasHiddenLTOVisibility(const CXXRecordDecl *RD);
+  /// Returns the vcall visibility of the given type. This is the scope in which
+  /// a virtual function call could be made which ends up being dispatched to a
+  /// member function of this class. This scope can be wider than the visibility
+  /// of the class itself when the class has a more-visible dynamic base class.
+  llvm::GlobalObject::VCallVisibility
+  GetVCallVisibilityLevel(const CXXRecordDecl *RD);
   /// Emit type metadata for the given vtable using the given layout.
-  void EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
+  void EmitVTableTypeMetadata(const CXXRecordDecl *RD,
+                              llvm::GlobalVariable *VTable,
                               const VTableLayout &VTLayout);
   /// Generate a cross-DSO type identifier for MD.
Index: clang/lib/CodeGen/CGVTables.cpp
--- clang/lib/CodeGen/CGVTables.cpp
+++ clang/lib/CodeGen/CGVTables.cpp
@@ -779,7 +779,7 @@
   assert(!VTable->isDeclaration() && "Shouldn't set properties on declaration");
   CGM.setGVProperties(VTable, RD);
-  CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get());
+  CGM.EmitVTableTypeMetadata(RD, VTable, *VTLayout.get());
   return VTable;
@@ -1010,7 +1010,32 @@
   return true;
-void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
+CodeGenModule::GetVCallVisibilityLevel(const CXXRecordDecl *RD) {
+  LinkageInfo LV = RD->getLinkageAndVisibility();
+  llvm::GlobalObject::VCallVisibility TypeVis;
+  if (!isExternallyVisible(LV.getLinkage()))
+    TypeVis = llvm::GlobalObject::VCallVisibilityTranslationUnit;
+  else if (HasHiddenLTOVisibility(RD))
+    TypeVis = llvm::GlobalObject::VCallVisibilityLinkageUnit;
+  else
+    TypeVis = llvm::GlobalObject::VCallVisibilityPublic;
+  for (auto B : RD->bases())
+    if (B.getType()->getAsCXXRecordDecl()->isDynamicClass())
+      TypeVis = std::min(TypeVis,
+                    GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl()));
+  for (auto B : RD->vbases())
+    if (B.getType()->getAsCXXRecordDecl()->isDynamicClass())
+      TypeVis = std::min(TypeVis,
+                    GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl()));
+  return TypeVis;
+void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
+                                           llvm::GlobalVariable *VTable,
                                            const VTableLayout &VTLayout) {
   if (!getCodeGenOpts().LTOUnit)
@@ -1070,4 +1095,10 @@
       VTable->addTypeMetadata((PointerWidth * I).getQuantity(), MD);
+  if (getCodeGenOpts().VirtualFunctionElimination) {
+    llvm::GlobalObject::VCallVisibility TypeVis = GetVCallVisibilityLevel(RD);
+    if (TypeVis != llvm::GlobalObject::VCallVisibilityPublic)
+      VTable->addVCallVisibilityMetadata(TypeVis);
+  }
Index: clang/lib/CodeGen/CGClass.cpp
--- clang/lib/CodeGen/CGClass.cpp
+++ clang/lib/CodeGen/CGClass.cpp
@@ -2784,11 +2784,16 @@
 bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) {
   if (!CGM.getCodeGenOpts().WholeProgramVTables ||
-      !SanOpts.has(SanitizerKind::CFIVCall) ||
-      !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall) ||
     return false;
+  if (CGM.getCodeGenOpts().VirtualFunctionElimination)
+    return true;
+  if (!SanOpts.has(SanitizerKind::CFIVCall) ||
+      !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall))
+    return false;
   std::string TypeName = RD->getQualifiedNameAsString();
   return !getContext().getSanitizerBlacklist().isBlacklistedType(
       SanitizerKind::CFIVCall, TypeName);
@@ -2811,8 +2816,13 @@
   llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1);
-  EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall),
-            SanitizerHandler::CFICheckFail, nullptr, nullptr);
+  std::string TypeName = RD->getQualifiedNameAsString();
+  if (SanOpts.has(SanitizerKind::CFIVCall) &&
+      !getContext().getSanitizerBlacklist().isBlacklistedType(
+          SanitizerKind::CFIVCall, TypeName)) {
+    EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall),
+              SanitizerHandler::CFICheckFail, {}, {});
+  }
   return Builder.CreateBitCast(
       Builder.CreateExtractValue(CheckedLoad, 0),
Index: clang/include/clang/Driver/Options.td
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -1843,6 +1843,13 @@
     HelpText<"Emits more virtual tables to improve devirtualization">;
 def fno_force_emit_vtables : Flag<["-"], "fno-force-emit-vtables">, Group<f_Group>,
+def fvirtual_function_elimination : Flag<["-"], "fvirtual-function-elimination">, Group<f_Group>,
+  Flags<[CoreOption, CC1Option]>,
+  HelpText<"Enables dead virtual function elimination optimization. Requires -flto=full">;
+def fno_virtual_function_elimination : Flag<["-"], "fno-virtual-function_elimination">, Group<f_Group>,
+  Flags<[CoreOption]>;
 def fwrapv : Flag<["-"], "fwrapv">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Treat signed integer overflow as two's complement">;
 def fwritable_strings : Flag<["-"], "fwritable-strings">, Group<f_Group>, Flags<[CC1Option]>,
Index: clang/include/clang/Basic/CodeGenOptions.def
--- clang/include/clang/Basic/CodeGenOptions.def
+++ clang/include/clang/Basic/CodeGenOptions.def
@@ -278,6 +278,10 @@
 CODEGENOPT(WholeProgramVTables, 1, 0) ///< Whether to apply whole-program
                                       ///  vtable optimization.
+CODEGENOPT(VirtualFunctionElimination, 1, 0) ///< Whether to apply the dead
+                                             /// virtual function elimination
+                                             /// optimization.
 /// Whether to use public LTO visibility for entities in std and stdext
 /// namespaces. This is enabled by clang-cl's /MT and /MTd flags.
 CODEGENOPT(LTOVisibilityPublicStd, 1, 0)
