Fix two intertwined bugs in the NX huge page zapping that were introduced
by the incorporation of the TDP MMU.  Because there is a unified list of
NX huge pages, zapping can encounter both TDP MMU and legacy MMU pages,
and the two MMUs have different tracking for TLB flushing.  If one flavor
needs a flush, but the code for the other flavor yields, KVM will fail to
flush before yielding.

First, honor the "flush needed" return from kvm_tdp_mmu_zap_gfn_range(),
which does the flush itself if and only if it yields, and otherwise
expects the caller to do the flush.  This requires feeding the result
into kvm_mmu_remote_flush_or_zap(), and so also fixes the case where the
TDP MMU needs a flush, the legacy MMU does not, and the main loop yields.

Second, tell the TDP MMU a flush is pending if the list of zapped pages
from legacy MMUs is not empty, i.e. the legacy MMU needs a flush.  This
fixes the case where the TDP MMU yields, but it iteslf does not require a
flush.

Fixes: 29cf0f5007a2 ("kvm: x86/mmu: NX largepage recovery for TDP MMU")
Cc: sta...@vger.kernel.org
Cc: Ben Gardon <bgar...@google.com>
Signed-off-by: Sean Christopherson <sea...@google.com>
---
 arch/x86/kvm/mmu/mmu.c     | 15 ++++++++++-----
 arch/x86/kvm/mmu/tdp_mmu.c |  6 +++---
 arch/x86/kvm/mmu/tdp_mmu.h |  3 ++-
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c6ed633594a2..413d6259340e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5517,7 +5517,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, 
gfn_t gfn_end)
        }
 
        if (is_tdp_mmu_enabled(kvm)) {
-               flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end);
+               flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end,
+                                                 false);
                if (flush)
                        kvm_flush_remote_tlbs(kvm);
        }
@@ -5939,6 +5940,8 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
        struct kvm_mmu_page *sp;
        unsigned int ratio;
        LIST_HEAD(invalid_list);
+       bool flush = false;
+       gfn_t gfn_end;
        ulong to_zap;
 
        rcu_idx = srcu_read_lock(&kvm->srcu);
@@ -5960,19 +5963,21 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
                                      lpage_disallowed_link);
                WARN_ON_ONCE(!sp->lpage_disallowed);
                if (is_tdp_mmu_page(sp)) {
-                       kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn,
-                               sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level));
+                       gfn_end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level);
+                       flush = kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, gfn_end,
+                                                         flush || 
!list_empty(&invalid_list));
                } else {
                        kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
                        WARN_ON_ONCE(sp->lpage_disallowed);
                }
 
                if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
-                       kvm_mmu_commit_zap_page(kvm, &invalid_list);
+                       kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
                        cond_resched_rwlock_write(&kvm->mmu_lock);
+                       flush = false;
                }
        }
-       kvm_mmu_commit_zap_page(kvm, &invalid_list);
+       kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
 
        write_unlock(&kvm->mmu_lock);
        srcu_read_unlock(&kvm->srcu, rcu_idx);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 6cf08c3c537f..367f12bf1026 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -709,10 +709,10 @@ static bool zap_gfn_range(struct kvm *kvm, struct 
kvm_mmu_page *root,
  * SPTEs have been cleared and a TLB flush is needed before releasing the
  * MMU lock.
  */
-bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
+bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
+                              bool flush)
 {
        struct kvm_mmu_page *root;
-       bool flush = false;
 
        for_each_tdp_mmu_root_yield_safe(kvm, root)
                flush = zap_gfn_range(kvm, root, start, end, true, flush);
@@ -725,7 +725,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
        gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
        bool flush;
 
-       flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn);
+       flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn, false);
        if (flush)
                kvm_flush_remote_tlbs(kvm);
 }
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 3b761c111bff..e39bee52d49e 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -8,7 +8,8 @@
 hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
 void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root);
 
-bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end);
+bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
+                              bool flush);
 void kvm_tdp_mmu_zap_all(struct kvm *kvm);
 
 int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
-- 
2.31.0.rc2.261.g7f71774620-goog

Reply via email to