Current code just hold rmap lock to ensure parallel page table update is
prevented. That is not sufficient. The kernel should also check whether
a mmu_notifer callback was running in parallel.

Cc: Alexey Kardashevskiy <a...@ozlabs.ru>
Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.ibm.com>
---
 arch/powerpc/kvm/book3s_64_vio_hv.c | 30 +++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c 
b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 6fcaf1fa8e02..acc3ce570be7 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -437,8 +437,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned 
long liobn,
        return H_SUCCESS;
 }
 
-static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
-               unsigned long ua, unsigned long *phpa)
+static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
+                               unsigned long ua, unsigned long *phpa)
 {
        pte_t *ptep, pte;
        unsigned shift = 0;
@@ -452,10 +452,17 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
         * to exit which will agains result in the below page table walk
         * to finish.
         */
-       ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift);
-       if (!ptep || !pte_present(*ptep))
+       /* an rmap lock won't make it safe. because that just ensure hash
+        * page table entries are removed with rmap lock held. After that
+        * mmu notifier returns and we go ahead and removing ptes from Qemu 
page table.
+        */
+       ptep = find_kvm_host_pte(vcpu->kvm, mmu_seq, ua, &shift);
+       if (!ptep)
+               return -ENXIO;
+
+       pte = READ_ONCE(*ptep);
+       if (!pte_present(pte))
                return -ENXIO;
-       pte = *ptep;
 
        if (!shift)
                shift = PAGE_SHIFT;
@@ -477,10 +484,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
                unsigned long liobn, unsigned long ioba,
                unsigned long tce_list, unsigned long npages)
 {
+       struct kvm *kvm = vcpu->kvm;
        struct kvmppc_spapr_tce_table *stt;
        long i, ret = H_SUCCESS;
        unsigned long tces, entry, ua = 0;
        unsigned long *rmap = NULL;
+       unsigned long mmu_seq;
        bool prereg = false;
        struct kvmppc_spapr_tce_iommu_table *stit;
 
@@ -488,6 +497,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
        if (kvm_is_radix(vcpu->kvm))
                return H_TOO_HARD;
 
+       /*
+        * used to check for invalidations in progress
+        */
+       mmu_seq = kvm->mmu_notifier_seq;
+       smp_rmb();
+
        stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;
@@ -547,7 +562,9 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
                 * real page.
                 */
                lock_rmap(rmap);
-               if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
+
+               arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+               if (kvmppc_rm_ua_to_hpa(vcpu, mmu_seq, ua, &tces)) {
                        ret = H_TOO_HARD;
                        goto unlock_exit;
                }
@@ -593,6 +610,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
        if (rmap)
                unlock_rmap(rmap);
 
+       arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
        return ret;
 }
 
-- 
2.25.3

Reply via email to