Request KVM_REQ_MMU_RELOAD when deleting sptes from MMU notifiers.

Keep pinned sptes intact if page aging.

Signed-off-by: Marcelo Tosatti <mtosa...@redhat.com>

---
 arch/x86/kvm/mmu.c |   71 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 62 insertions(+), 9 deletions(-)

Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c    2014-06-18 17:28:24.339435654 
-0300
+++ kvm.pinned-sptes/arch/x86/kvm/mmu.c 2014-06-18 17:29:32.510225755 -0300
@@ -1184,6 +1184,42 @@
                kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
+static void ack_flush(void *_completed)
+{
+}
+
+static void mmu_reload_pinned_vcpus(struct kvm *kvm)
+{
+       int i, cpu, me;
+       cpumask_var_t cpus;
+       struct kvm_vcpu *vcpu;
+       unsigned int req = KVM_REQ_MMU_RELOAD;
+
+       zalloc_cpumask_var(&cpus, GFP_ATOMIC);
+
+       me = get_cpu();
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               if (list_empty(&vcpu->arch.pinned_mmu_pages))
+                       continue;
+               kvm_make_request(req, vcpu);
+               cpu = vcpu->cpu;
+
+               /* Set ->requests bit before we read ->mode */
+               smp_mb();
+
+               if (cpus != NULL && cpu != -1 && cpu != me &&
+                     kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
+                       cpumask_set_cpu(cpu, cpus);
+       }
+       if (unlikely(cpus == NULL))
+               smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
+       else if (!cpumask_empty(cpus))
+               smp_call_function_many(cpus, ack_flush, NULL, 1);
+       put_cpu();
+       free_cpumask_var(cpus);
+       return;
+}
+
 /*
  * Write-protect on the specified @sptep, @pt_protect indicates whether
  * spte write-protection is caused by protecting shadow page table.
@@ -1276,7 +1312,8 @@
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                          struct kvm_memory_slot *slot, unsigned long data)
+                          struct kvm_memory_slot *slot, unsigned long data,
+                          bool age)
 {
        u64 *sptep;
        struct rmap_iterator iter;
@@ -1286,6 +1323,14 @@
                BUG_ON(!(*sptep & PT_PRESENT_MASK));
                rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, 
*sptep);
 
+               if (is_pinned_spte(*sptep)) {
+                       /* don't nuke pinned sptes if page aging: return
+                        * young=yes instead.
+                        */
+                       if (age)
+                               return 1;
+                       mmu_reload_pinned_vcpus(kvm);
+               }
                drop_spte(kvm, sptep);
                need_tlb_flush = 1;
        }
@@ -1294,7 +1339,8 @@
 }
 
 static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                            struct kvm_memory_slot *slot, unsigned long data)
+                            struct kvm_memory_slot *slot, unsigned long data,
+                            bool age)
 {
        u64 *sptep;
        struct rmap_iterator iter;
@@ -1312,6 +1358,9 @@
 
                need_flush = 1;
 
+               if (is_pinned_spte(*sptep))
+                       mmu_reload_pinned_vcpus(kvm);
+
                if (pte_write(*ptep)) {
                        drop_spte(kvm, sptep);
                        sptep = rmap_get_first(*rmapp, &iter);
@@ -1342,7 +1391,8 @@
                                int (*handler)(struct kvm *kvm,
                                               unsigned long *rmapp,
                                               struct kvm_memory_slot *slot,
-                                              unsigned long data))
+                                              unsigned long data,
+                                              bool age))
 {
        int j;
        int ret = 0;
@@ -1382,7 +1432,7 @@
                        rmapp = __gfn_to_rmap(gfn_start, j, memslot);
 
                        for (; idx <= idx_end; ++idx)
-                               ret |= handler(kvm, rmapp++, memslot, data);
+                               ret |= handler(kvm, rmapp++, memslot, data, 
false);
                }
        }
 
@@ -1393,7 +1443,8 @@
                          unsigned long data,
                          int (*handler)(struct kvm *kvm, unsigned long *rmapp,
                                         struct kvm_memory_slot *slot,
-                                        unsigned long data))
+                                        unsigned long data,
+                                        bool age))
 {
        return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
 }
@@ -1414,7 +1465,8 @@
 }
 
 static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                        struct kvm_memory_slot *slot, unsigned long data)
+                        struct kvm_memory_slot *slot, unsigned long data,
+                        bool age)
 {
        u64 *sptep;
        struct rmap_iterator uninitialized_var(iter);
@@ -1429,7 +1481,7 @@
         * out actively used pages or breaking up actively used hugepages.
         */
        if (!shadow_accessed_mask) {
-               young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
+               young = kvm_unmap_rmapp(kvm, rmapp, slot, data, true);
                goto out;
        }
 
@@ -1450,7 +1502,8 @@
 }
 
 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                             struct kvm_memory_slot *slot, unsigned long data)
+                             struct kvm_memory_slot *slot, unsigned long data,
+                             bool age)
 {
        u64 *sptep;
        struct rmap_iterator iter;
@@ -1488,7 +1541,7 @@
 
        rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
-       kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
+       kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0, false);
        kvm_flush_remote_tlbs(vcpu->kvm);
 }
 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to