Paul Mackerras <pau...@samba.org> writes: > On Mon, Jul 30, 2012 at 04:52:12PM +0530, Aneesh Kumar K.V wrote: >> From: "Aneesh Kumar K.V" <aneesh.ku...@linux.vnet.ibm.com> >> >> This patch makes the high psizes mask as an unsigned char array >> so that we can have more than 16TB. Currently we support upto >> 64TB > > Comments below... > >> diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S >> index b9ee79ce..c355af6 100644 >> --- a/arch/powerpc/mm/slb_low.S >> +++ b/arch/powerpc/mm/slb_low.S >> @@ -108,17 +108,34 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) >> * between 4k and 64k standard page size >> */ >> #ifdef CONFIG_PPC_MM_SLICES >> + /* r10 have esid */ >> cmpldi r10,16 >> - >> - /* Get the slice index * 4 in r11 and matching slice size mask in r9 */ >> - ld r9,PACALOWSLICESPSIZE(r13) >> - sldi r11,r10,2 >> + /* below SLICE_LOW_TOP */ >> blt 5f >> - ld r9,PACAHIGHSLICEPSIZE(r13) >> - srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2) >> - andi. r11,r11,0x3c >> - >> -5: /* Extract the psize and multiply to get an array offset */ >> + /* >> + * Handle hpsizes, >> + * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index >> + * We use r10 here, later we restore it to esid. >> + * Can we use other register instead of r10 ? > > Only r9, r10 and r11 are available here, and you're using them all. > Restoring r10 with one integer instruction is going to be quicker than > saving and restoring another register to/from memory. > >> + */ >> + srdi r10,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT) /* index */ >> + srdi r11,r10,1 /* r11 is array index */ >> + addi r9,r11,PACAHIGHSLICEPSIZE >> + lbzx r9,r9,r13 /* r9 is hpsizes[r11] */ >> + sldi r11,r11,1 >> + subf r11,r11,r10 /* mask_index = index - (array_index << 1) */ >> + srdi r10,r3,28 /* restore r10 with esid */ >> + b 6f > > How about (untested): > > srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */ > addi r9,r11,PACAHIGHSLICEPSIZE > lbzx r9,r13,r9 /* r9 is hpsizes[r11] */ > /* r11 = (r10 >> 12) & 1, i.e. grab lowest bit of 1T ESID */ > rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63 > b 6f >
nice, I missed the assembly part when you asked to update the c code in previous review. This change also bring it closer to the c code. > Note that I swapped the RA and RB arguments for the lbzx. Our recent > processors process indexed mode instructions more quickly if the value > in RB is small. > >> static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int >> psize) >> { >> + unsigned char *hpsizes; >> + int index, mask_index; >> struct slice_mask ret = { 0, 0 }; >> unsigned long i; >> - u64 psizes; >> + u64 lpsizes; >> >> - psizes = mm->context.low_slices_psize; >> + lpsizes = mm->context.low_slices_psize; >> for (i = 0; i < SLICE_NUM_LOW; i++) >> - if (((psizes >> (i * 4)) & 0xf) == psize) >> + if (((lpsizes >> (i * 4)) & 0xf) == psize) >> ret.low_slices |= 1u << i; >> >> - psizes = mm->context.high_slices_psize; >> - for (i = 0; i < SLICE_NUM_HIGH; i++) >> - if (((psizes >> (i * 4)) & 0xf) == psize) >> + hpsizes = mm->context.high_slices_psize; >> + for (i = 0; i < SLICE_NUM_HIGH; i++) { >> + mask_index = i & 0x1; >> + index = i >> 1; >> + if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) >> ret.high_slices |= 1u << i; > > This needs to be 1ul not 1u, since we are creating a 64-bit mask. > >> static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int >> psize) >> { >> + int index, mask_index; >> /* Write the new slice psize bits */ >> - u64 lpsizes, hpsizes; >> + unsigned char *hpsizes; >> + u64 lpsizes; >> unsigned long i, flags; >> >> slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); >> @@ -201,14 +208,18 @@ static void slice_convert(struct mm_struct *mm, struct >> slice_mask mask, int psiz >> lpsizes = (lpsizes & ~(0xful << (i * 4))) | >> (((unsigned long)psize) << (i * 4)); >> >> + /* Assign the value back */ >> + mm->context.low_slices_psize = lpsizes; >> + >> hpsizes = mm->context.high_slices_psize; >> - for (i = 0; i < SLICE_NUM_HIGH; i++) >> + for (i = 0; i < SLICE_NUM_HIGH; i++) { >> + mask_index = i & 0x1; >> + index = i >> 1; >> if (mask.high_slices & (1u << i)) > > Again, 1ul now. Check all the other places where we manipulate a > slice mask to see if there are any other instances of 1u that need to > be changed. I ended up with this. diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 0136040..b4e996a 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -54,7 +54,7 @@ static void slice_print_mask(const char *label, struct slice_mask mask) *(p++) = '-'; *(p++) = ' '; for (i = 0; i < SLICE_NUM_HIGH; i++) - *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0'; + *(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0'; *(p++) = 0; printk(KERN_DEBUG "%s:%s\n", label, buf); @@ -84,8 +84,8 @@ static struct slice_mask slice_range_to_mask(unsigned long start, } if ((start + len) > SLICE_LOW_TOP) - ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1)) - - (1u << GET_HIGH_SLICE_INDEX(start)); + ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1)) + - (1ul << GET_HIGH_SLICE_INDEX(start)); return ret; } @@ -135,7 +135,7 @@ static struct slice_mask slice_mask_for_free(struct mm_struct *mm) for (i = 0; i < SLICE_NUM_HIGH; i++) if (!slice_high_has_vma(mm, i)) - ret.high_slices |= 1u << i; + ret.high_slices |= 1ul << i; return ret; } @@ -158,7 +158,7 @@ static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize) mask_index = i & 0x1; index = i >> 1; if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) - ret.high_slices |= 1u << i; + ret.high_slices |= 1ul << i; } return ret; @@ -215,7 +215,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz for (i = 0; i < SLICE_NUM_HIGH; i++) { mask_index = i & 0x1; index = i >> 1; - if (mask.high_slices & (1u << i)) + if (mask.high_slices & (1ul << i)) hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) | (((unsigned long)psize) << (mask_index * 4)); -aneesh _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev