Request KVM_REQ_MMU_RELOAD when deleting sptes from MMU notifiers. Keep pinned sptes intact if page aging.
Signed-off-by: Marcelo Tosatti <mtosa...@redhat.com> --- arch/x86/kvm/mmu.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 9 deletions(-) Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c =================================================================== --- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c 2014-06-18 17:28:24.339435654 -0300 +++ kvm.pinned-sptes/arch/x86/kvm/mmu.c 2014-06-18 17:29:32.510225755 -0300 @@ -1184,6 +1184,42 @@ kvm_flush_remote_tlbs(vcpu->kvm); } +static void ack_flush(void *_completed) +{ +} + +static void mmu_reload_pinned_vcpus(struct kvm *kvm) +{ + int i, cpu, me; + cpumask_var_t cpus; + struct kvm_vcpu *vcpu; + unsigned int req = KVM_REQ_MMU_RELOAD; + + zalloc_cpumask_var(&cpus, GFP_ATOMIC); + + me = get_cpu(); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (list_empty(&vcpu->arch.pinned_mmu_pages)) + continue; + kvm_make_request(req, vcpu); + cpu = vcpu->cpu; + + /* Set ->requests bit before we read ->mode */ + smp_mb(); + + if (cpus != NULL && cpu != -1 && cpu != me && + kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE) + cpumask_set_cpu(cpu, cpus); + } + if (unlikely(cpus == NULL)) + smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); + else if (!cpumask_empty(cpus)) + smp_call_function_many(cpus, ack_flush, NULL, 1); + put_cpu(); + free_cpumask_var(cpus); + return; +} + /* * Write-protect on the specified @sptep, @pt_protect indicates whether * spte write-protection is caused by protecting shadow page table. @@ -1276,7 +1312,8 @@ } static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, unsigned long data, + bool age) { u64 *sptep; struct rmap_iterator iter; @@ -1286,6 +1323,14 @@ BUG_ON(!(*sptep & PT_PRESENT_MASK)); rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep); + if (is_pinned_spte(*sptep)) { + /* don't nuke pinned sptes if page aging: return + * young=yes instead. + */ + if (age) + return 1; + mmu_reload_pinned_vcpus(kvm); + } drop_spte(kvm, sptep); need_tlb_flush = 1; } @@ -1294,7 +1339,8 @@ } static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, unsigned long data, + bool age) { u64 *sptep; struct rmap_iterator iter; @@ -1312,6 +1358,9 @@ need_flush = 1; + if (is_pinned_spte(*sptep)) + mmu_reload_pinned_vcpus(kvm); + if (pte_write(*ptep)) { drop_spte(kvm, sptep); sptep = rmap_get_first(*rmapp, &iter); @@ -1342,7 +1391,8 @@ int (*handler)(struct kvm *kvm, unsigned long *rmapp, struct kvm_memory_slot *slot, - unsigned long data)) + unsigned long data, + bool age)) { int j; int ret = 0; @@ -1382,7 +1432,7 @@ rmapp = __gfn_to_rmap(gfn_start, j, memslot); for (; idx <= idx_end; ++idx) - ret |= handler(kvm, rmapp++, memslot, data); + ret |= handler(kvm, rmapp++, memslot, data, false); } } @@ -1393,7 +1443,8 @@ unsigned long data, int (*handler)(struct kvm *kvm, unsigned long *rmapp, struct kvm_memory_slot *slot, - unsigned long data)) + unsigned long data, + bool age)) { return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); } @@ -1414,7 +1465,8 @@ } static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, unsigned long data, + bool age) { u64 *sptep; struct rmap_iterator uninitialized_var(iter); @@ -1429,7 +1481,7 @@ * out actively used pages or breaking up actively used hugepages. */ if (!shadow_accessed_mask) { - young = kvm_unmap_rmapp(kvm, rmapp, slot, data); + young = kvm_unmap_rmapp(kvm, rmapp, slot, data, true); goto out; } @@ -1450,7 +1502,8 @@ } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, unsigned long data, + bool age) { u64 *sptep; struct rmap_iterator iter; @@ -1488,7 +1541,7 @@ rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); - kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); + kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0, false); kvm_flush_remote_tlbs(vcpu->kvm); } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html