In order to allow multiple TDP MMU operations to proceed in parallel,
there must be an option to modify SPTEs atomically so that changes are
not lost. Add that option to __tdp_mmu_set_spte and
__handle_changed_spte.

Reviewed-by: Peter Feiner <pfei...@google.com>

Signed-off-by: Ben Gardon <bgar...@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 67 ++++++++++++++++++++++++++++++++------
 1 file changed, 57 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 264594947c3b..1380ed313476 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -7,6 +7,7 @@
 #include "tdp_mmu.h"
 #include "spte.h"
 
+#include <asm/cmpxchg.h>
 #include <trace/events/kvm.h>
 
 #ifdef CONFIG_X86_64
@@ -226,7 +227,8 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head 
*head)
 }
 
 static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
-                               u64 old_spte, u64 new_spte, int level);
+                               u64 old_spte, u64 new_spte, int level,
+                               bool atomic);
 
 static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
 {
@@ -320,15 +322,19 @@ static void tdp_mmu_unlink_page(struct kvm *kvm, struct 
kvm_mmu_page *sp,
  *
  * @kvm: kvm instance
  * @pt: the page removed from the paging structure
+ * @atomic: Use atomic operations to clear the SPTEs in any disconnected
+ *         pages of memory.
  *
  * Given a page table that has been removed from the TDP paging structure,
  * iterates through the page table to clear SPTEs and free child page tables.
  */
-static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt)
+static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt,
+                                            bool atomic)
 {
        struct kvm_mmu_page *sp;
        gfn_t gfn;
        int level;
+       u64 *sptep;
        u64 old_child_spte;
        int i;
 
@@ -341,11 +347,17 @@ static void handle_disconnected_tdp_mmu_page(struct kvm 
*kvm, u64 *pt)
        tdp_mmu_unlink_page(kvm, sp, atomic);
 
        for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
-               old_child_spte = READ_ONCE(*(pt + i));
-               WRITE_ONCE(*(pt + i), 0);
+               sptep = pt + i;
+
+               if (atomic) {
+                       old_child_spte = xchg(sptep, 0);
+               } else {
+                       old_child_spte = READ_ONCE(*sptep);
+                       WRITE_ONCE(*sptep, 0);
+               }
                handle_changed_spte(kvm, kvm_mmu_page_as_id(sp),
                        gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
-                       old_child_spte, 0, level - 1);
+                       old_child_spte, 0, level - 1, atomic);
        }
 
        kvm_flush_remote_tlbs_with_address(kvm, gfn,
@@ -362,12 +374,15 @@ static void handle_disconnected_tdp_mmu_page(struct kvm 
*kvm, u64 *pt)
  * @old_spte: The value of the SPTE before the change
  * @new_spte: The value of the SPTE after the change
  * @level: the level of the PT the SPTE is part of in the paging structure
+ * @atomic: Use atomic operations to clear the SPTEs in any disconnected
+ *         pages of memory.
  *
  * Handle bookkeeping that might result from the modification of a SPTE.
  * This function must be called for all TDP SPTE modifications.
  */
 static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
-                               u64 old_spte, u64 new_spte, int level)
+                                 u64 old_spte, u64 new_spte, int level,
+                                 bool atomic)
 {
        bool was_present = is_shadow_present_pte(old_spte);
        bool is_present = is_shadow_present_pte(new_spte);
@@ -439,18 +454,50 @@ static void __handle_changed_spte(struct kvm *kvm, int 
as_id, gfn_t gfn,
         */
        if (was_present && !was_leaf && (pfn_changed || !is_present))
                handle_disconnected_tdp_mmu_page(kvm,
-                               spte_to_child_pt(old_spte, level));
+                               spte_to_child_pt(old_spte, level), atomic);
 }
 
 static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
-                               u64 old_spte, u64 new_spte, int level)
+                               u64 old_spte, u64 new_spte, int level,
+                               bool atomic)
 {
-       __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level);
+       __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level,
+                             atomic);
        handle_changed_spte_acc_track(old_spte, new_spte, level);
        handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte,
                                      new_spte, level);
 }
 
+/*
+ * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the
+ * associated bookkeeping
+ *
+ * @kvm: kvm instance
+ * @iter: a tdp_iter instance currently on the SPTE that should be set
+ * @new_spte: The value the SPTE should be set to
+ * Returns: true if the SPTE was set, false if it was not. If false is 
returned,
+ *         this function will have no side-effects.
+ */
+static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
+                                          struct tdp_iter *iter,
+                                          u64 new_spte)
+{
+       u64 *root_pt = tdp_iter_root_pt(iter);
+       struct kvm_mmu_page *root = sptep_to_sp(root_pt);
+       int as_id = kvm_mmu_page_as_id(root);
+
+       kvm_mmu_lock_assert_held_shared(kvm);
+
+       if (cmpxchg64(iter->sptep, iter->old_spte, new_spte) != iter->old_spte)
+               return false;
+
+       handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
+                           iter->level, true);
+
+       return true;
+}
+
+
 /*
  * __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated 
bookkeeping
  * @kvm: kvm instance
@@ -480,7 +527,7 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, 
struct tdp_iter *iter,
        WRITE_ONCE(*iter->sptep, new_spte);
 
        __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
-                             iter->level);
+                             iter->level, false);
        if (record_acc_track)
                handle_changed_spte_acc_track(iter->old_spte, new_spte,
                                              iter->level);
-- 
2.30.0.284.gd98b1dd5eaa7-goog

Reply via email to