"Aneesh Kumar K.V" <aneesh.ku...@linux.vnet.ibm.com> writes:

> Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/kvm_book3s_64.h | 146 
> ++++++++++++++++++++++++++-----
>  arch/powerpc/kvm/book3s_hv.c             |   7 ++
>  2 files changed, 130 insertions(+), 23 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
> b/arch/powerpc/include/asm/kvm_book3s_64.h
> index 51388befeddb..f03ea8f90576 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -77,34 +77,122 @@ static inline long try_lock_hpte(unsigned long *hpte, 
> unsigned long bits)
>       return old == 0;
>  }
>
> +static inline int __hpte_actual_psize(unsigned int lp, int psize)
> +{
> +     int i, shift;
> +     unsigned int mask;
> +
> +     /* start from 1 ignoring MMU_PAGE_4K */
> +     for (i = 1; i < MMU_PAGE_COUNT; i++) {
> +
> +             /* invalid penc */
> +             if (mmu_psize_defs[psize].penc[i] == -1)
> +                     continue;
> +             /*
> +              * encoding bits per actual page size
> +              *        PTE LP     actual page size
> +              *    rrrr rrrz         >=8KB
> +              *    rrrr rrzz         >=16KB
> +              *    rrrr rzzz         >=32KB
> +              *    rrrr zzzz         >=64KB
> +              * .......
> +              */
> +             shift = mmu_psize_defs[i].shift - LP_SHIFT;
> +             if (shift > LP_BITS)
> +                     shift = LP_BITS;
> +             mask = (1 << shift) - 1;
> +             if ((lp & mask) == mmu_psize_defs[psize].penc[i])
> +                     return i;
> +     }
> +     return -1;
> +}
> +
>  static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long 
> r,
>                                            unsigned long pte_index)
>  {
> -     unsigned long rb, va_low;
> +     int b_size, a_size;
> +     unsigned int penc;
> +     unsigned long rb = 0, va_low, sllp;
> +     unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
> +
> +     if (!(v & HPTE_V_LARGE)) {
> +             /* both base and actual psize is 4k */
> +             b_size = MMU_PAGE_4K;
> +             a_size = MMU_PAGE_4K;
> +     } else {
> +             for (b_size = 0; b_size < MMU_PAGE_COUNT; b_size++) {
> +
> +                     /* valid entries have a shift value */
> +                     if (!mmu_psize_defs[b_size].shift)
> +                             continue;
>
> +                     a_size = __hpte_actual_psize(lp, b_size);
> +                     if (a_size != -1)
> +                             break;
> +             }
> +     }
> +     /*
> +      * Ignore the top 14 bits of va
> +      * v have top two bits covering segment size, hence move
> +      * by 16 bits, Also clear the lower HPTE_V_AVPN_SHIFT (7) bits.
> +      * AVA field in v also have the lower 23 bits ignored.
> +      * For base page size 4K we need 14 .. 65 bits (so need to
> +      * collect extra 11 bits)
> +      * For others we need 14..14+i
> +      */
> +     /* This covers 14..54 bits of va*/
>       rb = (v & ~0x7fUL) << 16;               /* AVA field */
> +     /*
> +      * AVA in v had cleared lower 23 bits. We need to derive
> +      * that from pteg index
> +      */
>       va_low = pte_index >> 3;
>       if (v & HPTE_V_SECONDARY)
>               va_low = ~va_low;
> -     /* xor vsid from AVA */
> +     /*
> +      * get the vpn bits from va_low using reverse of hashing.
> +      * In v we have va with 23 bits dropped and then left shifted
> +      * HPTE_V_AVPN_SHIFT (7) bits. Now to find vsid we need
> +      * right shift it with (SID_SHIFT - (23 - 7))
> +      */
>       if (!(v & HPTE_V_1TB_SEG))
> -             va_low ^= v >> 12;
> +             va_low ^= v >> (SID_SHIFT - 16);
>       else
> -             va_low ^= v >> 24;
> +             va_low ^= v >> (SID_SHIFT_1T - 16);
>       va_low &= 0x7ff;
> -     if (v & HPTE_V_LARGE) {
> -             rb |= 1;                        /* L field */
> -             if (cpu_has_feature(CPU_FTR_ARCH_206) &&
> -                 (r & 0xff000)) {
> -                     /* non-16MB large page, must be 64k */
> -                     /* (masks depend on page size) */
> -                     rb |= 0x1000;           /* page encoding in LP field */
> -                     rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP 
> field */
> -                     rb |= ((va_low << 4) & 0xf0);   /* AVAL field (P7 
> doesn't seem to care) */
> -             }
> -     } else {
> -             /* 4kB page */
> -             rb |= (va_low & 0x7ff) << 12;   /* remaining 11b of VA */
> +
> +     switch (b_size) {
> +     case MMU_PAGE_4K:
> +             sllp = ((mmu_psize_defs[a_size].sllp & SLB_VSID_L) >> 6) |
> +                     ((mmu_psize_defs[a_size].sllp & SLB_VSID_LP) >> 4);
> +             rb |= sllp << 5;        /*  AP field */
> +             rb |= (va_low & 0x7ff) << 12;   /* remaining 11 bits of AVA */
> +             break;
> +     default:
> +     {
> +             int aval_shift;
> +             /*
> +              * remaining 7bits of AVA/LP fields
> +              * Also contain the rr bits of LP
> +              */
> +             rb |= (va_low & 0x7f) << 16;
> +             /*
> +              * Now clear not needed LP bits based on actual psize
> +              */
> +             rb &= ~((1ul << mmu_psize_defs[a_size].shift) - 1);
> +             /*
> +              * AVAL field 58..77 - base_page_shift bits of va
> +              * we have space for 58..64 bits, Missing bits should
> +              * be zero filled. +1 is to take care of L bit shift
> +              */
> +             aval_shift = 64 - (77 - mmu_psize_defs[b_size].shift) + 1;
> +             rb |= ((va_low << aval_shift) & 0xfe);
> +
> +             rb |= 1;                /* L field */
> +             penc = mmu_psize_defs[b_size].penc[a_size];
> +             rb |= penc << 12;       /* LP field */
> +             break;
> +     }
>       }
>       rb |= (v >> 54) & 0x300;                /* B field */
>       return rb;
> @@ -112,14 +200,26 @@ static inline unsigned long compute_tlbie_rb(unsigned 
> long v, unsigned long r,
>
>  static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
>  {
> +     int size, a_size;
> +     /* Look at the 8 bit LP value */
> +     unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
> +
>       /* only handle 4k, 64k and 16M pages for now */
>       if (!(h & HPTE_V_LARGE))
> -             return 1ul << 12;               /* 4k page */
> -     if ((l & 0xf000) == 0x1000 && cpu_has_feature(CPU_FTR_ARCH_206))
> -             return 1ul << 16;               /* 64k page */
> -     if ((l & 0xff000) == 0)
> -             return 1ul << 24;               /* 16M page */
> -     return 0;                               /* error */
> +             return 1ul << 12;
> +     else {
> +             for (size = 0; size < MMU_PAGE_COUNT; size++) {
> +                     /* valid entries have a shift value */
> +                     if (!mmu_psize_defs[size].shift)
> +                             continue;
> +
> +                     a_size = __hpte_actual_psize(lp, size);
> +                     if (a_size != -1)
> +                             return 1ul << mmu_psize_defs[a_size].shift;
> +             }
> +
> +     }
> +     return 0;
>  }
>
>  static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 8227dba5af0f..a38d3289320a 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1949,6 +1949,13 @@ static void kvmppc_add_seg_page_size(struct 
> kvm_ppc_one_seg_page_size **sps,
>        * support pte_enc here
>        */
>       (*sps)->enc[0].pte_enc = def->penc[linux_psize];
> +     /*
> +      * Add 16MB MPSS support
> +      */
> +     if (linux_psize != MMU_PAGE_16M) {
> +             (*sps)->enc[1].page_shift = 24;
> +             (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M];
> +     }

We ideally want to do this only when the guest memory is backed up by
hugetlbfs. I was thinking qemu should ensure that. But then i am not
sure existing qemu work that way. So we may want to look at how to
enable MPSS.

-aneesh

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to