Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> wrote:

> From: "Aneesh Kumar K.V" <aneesh.ku...@linux.vnet.ibm.com>
> 
> We can find pte that are splitting while walking page tables. Return
> None pte in that case.

Can you expand on this more please.  There are a lot of details below
like removing a ldarx/stdcx loop that should be better described here.

> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/kvm_book3s_64.h | 51 
> ++++++++++++++++++--------------
>  arch/powerpc/kvm/book3s_64_mmu_hv.c      |  7 +++--
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c      |  4 +--
>  3 files changed, 34 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
> b/arch/powerpc/include/asm/kvm_book3s_64.h
> index 9c1ff33..ce20f7e 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -162,33 +162,40 @@ static inline int hpte_cache_flags_ok(unsigned long 
> ptel, unsigned long io_type)
>   * Lock and read a linux PTE.  If it's present and writable, atomically
>   * set dirty and referenced bits and return the PTE, otherwise return 0.

This is comment still valid now the ldarx/stdcx is gone?  

>   */
> -static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
> +static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
> +                                              unsigned int hugepage)
>  {
> -     pte_t pte, tmp;
> -
> -     /* wait until _PAGE_BUSY is clear then set it atomically */
> -     __asm__ __volatile__ (
> -             "1:     ldarx   %0,0,%3\n"
> -             "       andi.   %1,%0,%4\n"
> -             "       bne-    1b\n"
> -             "       ori     %1,%0,%4\n"
> -             "       stdcx.  %1,0,%3\n"
> -             "       bne-    1b"
> -             : "=&r" (pte), "=&r" (tmp), "=m" (*p)
> -             : "r" (p), "i" (_PAGE_BUSY)
> -             : "cc");
> -
> -     if (pte_present(pte)) {
> -             pte = pte_mkyoung(pte);
> -             if (writing && pte_write(pte))
> -                     pte = pte_mkdirty(pte);
> -     }
> +     pte_t old_pte, new_pte = __pte(0);
> +repeat:
> +     do {
> +             old_pte = pte_val(*ptep);
> +             /*
> +              * wait until _PAGE_BUSY is clear then set it atomically
> +              */
> +             if (unlikely(old_pte & _PAGE_BUSY))
> +                     goto repeat;

continue here?  Please don't create looping primitives.
  
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +             /* If hugepage and is trans splitting return None */
> +             if (unlikely(hugepage &&
> +                          pmd_trans_splitting(pte_pmd(old_pte))))

Comment looks much like the code... seems redundant.

> +                     return __pte(0);
> +#endif
>  
> -     *p = pte;       /* clears _PAGE_BUSY */
> +             /* If pte is not present return None */
> +             if (unlikely(!(old_pte & _PAGE_PRESENT)))
> +                     return __pte(0);
>  
> -     return pte;
> +             new_pte = pte_mkyoung(old_pte);
> +             if (writing && pte_write(old_pte))
> +                     new_pte = pte_mkdirty(new_pte);
> +
> +     } while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
> +                                       old_pte, new_pte));
> +     return new_pte;
>  }
>  
> +

Whitespace

>  /* Return HPTE cache control bits corresponding to Linux pte bits */
>  static inline unsigned long hpte_cache_bits(unsigned long pte_val)
>  {
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 5880dfb..e1a9415 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
> struct kvm_vcpu *vcpu,
>               }
>               /* if the guest wants write access, see if that is OK */
>               if (!writing && hpte_is_writable(r)) {
> +                     unsigned int shift;
>                       pte_t *ptep, pte;
>  
>                       /*
> @@ -683,9 +684,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
> struct kvm_vcpu *vcpu,
>                        */
>                       rcu_read_lock_sched();
>                       ptep = find_linux_pte_or_hugepte(current->mm->pgd,
> -                                                      hva, NULL);
> -                     if (ptep && pte_present(*ptep)) {
> -                             pte = kvmppc_read_update_linux_pte(ptep, 1);
> +                                                      hva, &shift);
> +                     if (ptep) {
> +                             pte = kvmppc_read_update_linux_pte(ptep, 1, 
> shift);
>                               if (pte_write(pte))
>                                       write_ok = 1;
>                       }
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
> b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index dcf892d..39ae723 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -150,9 +150,7 @@ static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long 
> hva,
>               *pte_sizep = PAGE_SIZE;
>       if (ps > *pte_sizep)
>               return __pte(0);
> -     if (!pte_present(*ptep))
> -             return __pte(0);
> -     return kvmppc_read_update_linux_pte(ptep, writing);
> +     return kvmppc_read_update_linux_pte(ptep, writing, shift);

'shift' goes into the new 'hugepage' parameter?  Doesn't seem logical?
Can we harmonise the name to make it less confusing?

Mikey

>  }
>  
>  static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to