On Sun, Feb 19, 2017 at 03:37:11PM +0530, Aneesh Kumar K.V wrote: > Inorder to support large effective address range (512TB), we want to increase > the virtual address bits to 68. But we do have platforms like p4 and p5 that > can > only do 65 bit VA. We support those platforms by limiting context bits on them > to 16. > > The protovsid -> vsid conversion is verified to work with both 65 and 68 bit > va values. I also documented the restrictions in a table format as part of > code > comments. > > Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> > --- > /* > * This should be computed such that protovosid * vsid_mulitplier > * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus > + * We also need to make sure that number of bits in divisor is less > + * than twice the number of protovsid bits for our modulus optmization to > work.
Could you please explain why? I am also beginning to wonder if we need the modulus optimization. On my compiler (a * b) % (2^n - 1) generated reasonable code with just one multiplication and a few shifts and boolean operations. > + * The below table shows the current values used. > + * > + * |-------+------------+----------------+------------+--------------| > + * | | Prime Bits | VSID_BITS_65VA | Total Bits | 2* VSID_BITS | > + * |-------+------------+----------------+------------+--------------| > + * | 1T | 24 | 25 | 49 | 50 | > + * |-------+------------+----------------+------------+--------------| > + * | 256MB | 24 | 37 | 61 | 74 | > + * |-------+------------+----------------+------------+--------------| > + * > + * |-------+------------+----------------+------------+--------------| > + * | | Prime Bits | VSID_BITS_68VA | Total Bits | 2* VSID_BITS | > + * |-------+------------+----------------+------------+--------------| > + * | 1T | 24 | 28 | 52 | 56 | > + * |-------+------------+----------------+------------+--------------| > + * | 256MB | 24 | 40 | 64 | 80 | > + * |-------+------------+----------------+------------+--------------| > + * > */ > #define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ > -#define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS) > +#define VSID_BITS_256M (VA_BITS - SID_SHIFT) > #define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1) > +#define VSID_BITS_65_256M (65 - SID_SHIFT) > > #define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */ > -#define VSID_BITS_1T (CONTEXT_BITS + ESID_BITS_1T) > +#define VSID_BITS_1T (VA_BITS - SID_SHIFT_1T) > #define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1) > - > +#define VSID_BITS_65_1T (65 - SID_SHIFT_1T) > > #define USER_VSID_RANGE (1UL << (ESID_BITS + SID_SHIFT)) > > -/* > - * This macro generates asm code to compute the VSID scramble > - * function. Used in slb_allocate() and do_stab_bolted. The function > - * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS > - * > - * rt = register containing the proto-VSID and into which the > - * VSID will be stored > - * rx = scratch register (clobbered) > - * > - * - rt and rx must be different registers > - * - The answer will end up in the low VSID_BITS bits of rt. The higher > - * bits may contain other garbage, so you may need to mask the > - * result. > - */ > -#define ASM_VSID_SCRAMBLE(rt, rx, size) > \ > - lis rx,VSID_MULTIPLIER_##size@h; \ > - ori rx,rx,VSID_MULTIPLIER_##size@l; \ > - mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ > - \ > - srdi rx,rt,VSID_BITS_##size; \ > - clrldi rt,rt,(64-VSID_BITS_##size); \ > - add rt,rt,rx; /* add high and low bits */ \ > - /* NOTE: explanation based on VSID_BITS_##size = 36 \ > - * Now, r3 == VSID (mod 2^36-1), and lies between 0 and \ > - * 2^36-1+2^28-1. That in particular means that if r3 >= \ > - * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \ > - * the bit clear, r3 already has the answer we want, if it \ > - * doesn't, the answer is the low 36 bits of r3+1. So in all \ > - * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\ I see that this comment is lost in the new definition, could we please retain it. > - addi rx,rt,1; \ > - srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ > - add rt,rt,rx > - > /* 4 bits per slice and we have one slice per 1TB */ > #define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41) > > @@ -629,7 +632,7 @@ static inline void subpage_prot_init_new_context(struct > mm_struct *mm) { } > #define vsid_scramble(protovsid, size) \ > ((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size)) > > -#else /* 1 */ > +/* simplified form avoiding mod operation */ > #define vsid_scramble(protovsid, size) \ > ({ \ > unsigned long x; \ > @@ -637,6 +640,21 @@ static inline void subpage_prot_init_new_context(struct > mm_struct *mm) { } > x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \ > (x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \ > }) > + > +static inline unsigned long vsid_scramble(unsigned long protovsid, > + unsigned long vsid_multiplier, int vsid_bits) > +{ > + unsigned long vsid; > + unsigned long vsid_modulus = ((1UL << vsid_bits) - 1); > + /* > + * We have same multipler for both 256 and 1T segements now > + */ > + vsid = protovsid * vsid_multiplier; > + vsid = (vsid >> vsid_bits) + (vsid & vsid_modulus); > + return (vsid + ((vsid + 1) >> vsid_bits)) & vsid_modulus; > +} > + > #endif /* 1 */ > > /* Returns the segment size indicator for a user address */ > @@ -651,17 +669,32 @@ static inline int user_segment_size(unsigned long addr) > static inline unsigned long get_vsid(unsigned long context, unsigned long ea, > int ssize) > { > + unsigned long va_bits = VA_BITS; Is there a reason to convert the constant into unsigned long? > + unsigned long vsid_bits; > + unsigned long protovsid; > + > /* > * Bad address. We return VSID 0 for that > */ > if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) > return 0; > > - if (ssize == MMU_SEGSIZE_256M) > - return vsid_scramble((context << ESID_BITS) > - | ((ea >> SID_SHIFT) & ESID_BITS_MASK), > 256M); > - return vsid_scramble((context << ESID_BITS_1T) > - | ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK), 1T); > + if (!mmu_has_feature(MMU_FTR_68_BIT_VA)) > + va_bits = 65; > + > + if (ssize == MMU_SEGSIZE_256M) { > + vsid_bits = va_bits - SID_SHIFT; > + protovsid = (context << ESID_BITS) | > + ((ea >> SID_SHIFT) & ESID_BITS_MASK); > + return vsid_scramble(protovsid, > + VSID_MULTIPLIER_256M, vsid_bits); > + } > + /* 1T segment */ > + vsid_bits = va_bits - SID_SHIFT_1T; > + protovsid = (context << ESID_BITS_1T) | > + ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK); > + return vsid_scramble(protovsid, > + VSID_MULTIPLIER_1T, vsid_bits); We seem to be passing an unsigned long to a prototype above that expects an int > } > > /* > diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h > index 065e762fae85..78260409dc9c 100644 > --- a/arch/powerpc/include/asm/mmu.h > +++ b/arch/powerpc/include/asm/mmu.h > @@ -29,6 +29,10 @@ > */ > > /* > + * Support for 68 bit VA space. We added that from ISA 2.05 > + */ > +#define MMU_FTR_68_BIT_VA ASM_CONST(0x00002000) > +/* > * Kernel read only support. > * We added the ppp value 0b110 in ISA 2.04. > */ > @@ -109,10 +113,10 @@ > #define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2 > #define MMU_FTRS_PPC970 MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA > #define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE > -#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | > MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO > -#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | > MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO > -#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | > MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO > -#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | > MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO > +#define MMU_FTRS_POWER6 MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | > MMU_FTR_68_BIT_VA > +#define MMU_FTRS_POWER7 MMU_FTRS_POWER6 > +#define MMU_FTRS_POWER8 MMU_FTRS_POWER6 > +#define MMU_FTRS_POWER9 MMU_FTRS_POWER6 > #define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ > MMU_FTR_CI_LARGE_PAGE > #define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ > @@ -136,7 +140,7 @@ enum { > MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL | > MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE | > MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA | > - MMU_FTR_KERNEL_RO | > + MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA | > #ifdef CONFIG_PPC_RADIX_MMU > MMU_FTR_TYPE_RADIX | > #endif > @@ -290,7 +294,10 @@ static inline bool early_radix_enabled(void) > #define MMU_PAGE_16G 14 > #define MMU_PAGE_64G 15 > > -/* N.B. we need to change the type of hpte_page_sizes if this gets to be > > 16 */ > +/* > + * N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 > + * Also we need to change he type of mm_context.low/high_slices_psize. > + */ > #define MMU_PAGE_COUNT 16 > > #ifdef CONFIG_PPC_BOOK3S_64 > diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c > b/arch/powerpc/kvm/book3s_64_mmu_host.c > index d6041101a5e2..bed699ab7654 100644 > --- a/arch/powerpc/kvm/book3s_64_mmu_host.c > +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c > @@ -229,6 +229,7 @@ void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct > kvmppc_pte *pte) > > static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 > gvsid) > { > + unsigned long vsid_bits = VSID_BITS_65_256M; > struct kvmppc_sid_map *map; > struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); > u16 sid_map_mask; > @@ -257,7 +258,12 @@ static struct kvmppc_sid_map *create_sid_map(struct > kvm_vcpu *vcpu, u64 gvsid) > kvmppc_mmu_pte_flush(vcpu, 0, 0); > kvmppc_mmu_flush_segments(vcpu); > } > - map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M); > + > + if (mmu_has_feature(MMU_FTR_68_BIT_VA)) > + vsid_bits = VSID_BITS_256M; > + > + map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, > + VSID_MULTIPLIER_256M, vsid_bits); > > map->guest_vsid = gvsid; > map->valid = true; > diff --git a/arch/powerpc/mm/mmu_context_book3s64.c > b/arch/powerpc/mm/mmu_context_book3s64.c > index a6450c7f9cf3..e6a5bcbf8abe 100644 > --- a/arch/powerpc/mm/mmu_context_book3s64.c > +++ b/arch/powerpc/mm/mmu_context_book3s64.c > @@ -33,6 +33,12 @@ static DEFINE_IDA(mmu_context_ida); > int hash__get_new_context(void) > { > int index, err; > + unsigned long max_user_context; > + > + if (mmu_has_feature(MMU_FTR_68_BIT_VA)) > + max_user_context = MAX_USER_CONTEXT; > + else > + max_user_context = MAX_USER_CONTEXT_65BIT_VA; > > again: > if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL)) > @@ -50,7 +56,7 @@ int hash__get_new_context(void) > else if (err) > return err; > > - if (index > MAX_USER_CONTEXT) { > + if (index > max_user_context) { > spin_lock(&mmu_context_lock); > ida_remove(&mmu_context_ida, index); > spin_unlock(&mmu_context_lock); > diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S > index 4ce050ea4200..bb75a667483e 100644 > --- a/arch/powerpc/mm/slb_low.S > +++ b/arch/powerpc/mm/slb_low.S > @@ -23,6 +23,48 @@ > #include <asm/pgtable.h> > #include <asm/firmware.h> > > +/* > + * This macro generates asm code to compute the VSID scramble > + * function. Used in slb_allocate() and do_stab_bolted. The function > + * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS > + * > + * rt = register containing the proto-VSID and into which the > + * VSID will be stored > + * rx = scratch register (clobbered) > + * rf = flags > + * > + * - rt and rx must be different registers > + * - The answer will end up in the low VSID_BITS bits of rt. The higher > + * bits may contain other garbage, so you may need to mask the > + * result. > + */ > +#define ASM_VSID_SCRAMBLE(rt, rx, rf, size) \ > + lis rx,VSID_MULTIPLIER_##size@h; \ > + ori rx,rx,VSID_MULTIPLIER_##size@l; \ > + mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ > +/* \ > + * powermac get slb fault before feature fixup, so make 65 bit part \ > + * the default part of feature fixup \ > + */ \ > +BEGIN_MMU_FTR_SECTION > \ > + srdi rx,rt,VSID_BITS_65_##size; \ > + clrldi rt,rt,(64-VSID_BITS_65_##size); \ > + add rt,rt,rx; \ > + addi rx,rt,1; \ > + srdi rx,rx,VSID_BITS_65_##size; \ > + add rt,rt,rx; \ > + rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + > VSID_BITS_65_##size)); \ > +MMU_FTR_SECTION_ELSE \ > + srdi rx,rt,VSID_BITS_##size; \ > + clrldi rt,rt,(64-VSID_BITS_##size); \ > + add rt,rt,rx; /* add high and low bits */ \ > + addi rx,rt,1; \ > + srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ > + add rt,rt,rx; \ > + rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + > VSID_BITS_##size)); \ Don't like this bit, it does more than scramble, it adds the flags to the generated scramble bits, could we please revisit what ASM_VSID_SCRAMBLE should do? Otherwise, Acked-by: Balbir Singh <bsinghar...@gmail.com> Balbir