Paul Mackerras <pau...@samba.org> writes:

> On Mon, Jul 30, 2012 at 04:52:12PM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.ku...@linux.vnet.ibm.com>
>> 
>> This patch makes the high psizes mask as an unsigned char array
>> so that we can have more than 16TB. Currently we support upto
>> 64TB
>
> Comments below...
>
>> diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
>> index b9ee79ce..c355af6 100644
>> --- a/arch/powerpc/mm/slb_low.S
>> +++ b/arch/powerpc/mm/slb_low.S
>> @@ -108,17 +108,34 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
>>       * between 4k and 64k standard page size
>>       */
>>  #ifdef CONFIG_PPC_MM_SLICES
>> +    /* r10 have esid */
>>      cmpldi  r10,16
>> -
>> -    /* Get the slice index * 4 in r11 and matching slice size mask in r9 */
>> -    ld      r9,PACALOWSLICESPSIZE(r13)
>> -    sldi    r11,r10,2
>> +    /* below SLICE_LOW_TOP */
>>      blt     5f
>> -    ld      r9,PACAHIGHSLICEPSIZE(r13)
>> -    srdi    r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
>> -    andi.   r11,r11,0x3c
>> -
>> -5:  /* Extract the psize and multiply to get an array offset */
>> +    /*
>> +     * Handle hpsizes,
>> +     * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
>> +     * We use r10 here, later we restore it to esid.
>> +     * Can we use other register instead of r10 ?
>
> Only r9, r10 and r11 are available here, and you're using them all.
> Restoring r10 with one integer instruction is going to be quicker than
> saving and restoring another register to/from memory.
>
>> +     */
>> +    srdi    r10,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT) /* index */
>> +    srdi    r11,r10,1                       /* r11 is array index */
>> +    addi    r9,r11,PACAHIGHSLICEPSIZE
>> +    lbzx    r9,r9,r13                       /* r9 is hpsizes[r11] */
>> +    sldi    r11,r11,1
>> +    subf    r11,r11,r10     /* mask_index = index - (array_index << 1) */
>> +    srdi    r10,r3,28       /* restore r10 with esid */
>> +    b       6f
>
> How about (untested):
>
>       srdi    r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
>       addi    r9,r11,PACAHIGHSLICEPSIZE
>       lbzx    r9,r13,r9                       /* r9 is hpsizes[r11] */
>       /* r11 = (r10 >> 12) & 1, i.e. grab lowest bit of 1T ESID */
>       rldicl  r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
>       b       6f
>

nice, I missed the assembly part when you asked to update the c code
in previous review. This change also bring it closer to the c code.

> Note that I swapped the RA and RB arguments for the lbzx.  Our recent
> processors process indexed mode instructions more quickly if the value
> in RB is small.
>
>>  static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int 
>> psize)
>>  {
>> +    unsigned char *hpsizes;
>> +    int index, mask_index;
>>      struct slice_mask ret = { 0, 0 };
>>      unsigned long i;
>> -    u64 psizes;
>> +    u64 lpsizes;
>>  
>> -    psizes = mm->context.low_slices_psize;
>> +    lpsizes = mm->context.low_slices_psize;
>>      for (i = 0; i < SLICE_NUM_LOW; i++)
>> -            if (((psizes >> (i * 4)) & 0xf) == psize)
>> +            if (((lpsizes >> (i * 4)) & 0xf) == psize)
>>                      ret.low_slices |= 1u << i;
>>  
>> -    psizes = mm->context.high_slices_psize;
>> -    for (i = 0; i < SLICE_NUM_HIGH; i++)
>> -            if (((psizes >> (i * 4)) & 0xf) == psize)
>> +    hpsizes = mm->context.high_slices_psize;
>> +    for (i = 0; i < SLICE_NUM_HIGH; i++) {
>> +            mask_index = i & 0x1;
>> +            index = i >> 1;
>> +            if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
>>                      ret.high_slices |= 1u << i;
>
> This needs to be 1ul not 1u, since we are creating a 64-bit mask.
>
>>  static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int 
>> psize)
>>  {
>> +    int index, mask_index;
>>      /* Write the new slice psize bits */
>> -    u64 lpsizes, hpsizes;
>> +    unsigned char *hpsizes;
>> +    u64 lpsizes;
>>      unsigned long i, flags;
>>  
>>      slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
>> @@ -201,14 +208,18 @@ static void slice_convert(struct mm_struct *mm, struct 
>> slice_mask mask, int psiz
>>                      lpsizes = (lpsizes & ~(0xful << (i * 4))) |
>>                              (((unsigned long)psize) << (i * 4));
>>  
>> +    /* Assign the value back */
>> +    mm->context.low_slices_psize = lpsizes;
>> +
>>      hpsizes = mm->context.high_slices_psize;
>> -    for (i = 0; i < SLICE_NUM_HIGH; i++)
>> +    for (i = 0; i < SLICE_NUM_HIGH; i++) {
>> +            mask_index = i & 0x1;
>> +            index = i >> 1;
>>              if (mask.high_slices & (1u << i))
>
> Again, 1ul now.  Check all the other places where we manipulate a
> slice mask to see if there are any other instances of 1u that need to
> be changed.

I ended up with this. 

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 0136040..b4e996a 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -54,7 +54,7 @@ static void slice_print_mask(const char *label, struct 
slice_mask mask)
        *(p++) = '-';
        *(p++) = ' ';
        for (i = 0; i < SLICE_NUM_HIGH; i++)
-               *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0';
+               *(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
        *(p++) = 0;
 
        printk(KERN_DEBUG "%s:%s\n", label, buf);
@@ -84,8 +84,8 @@ static struct slice_mask slice_range_to_mask(unsigned long 
start,
        }
 
        if ((start + len) > SLICE_LOW_TOP)
-               ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1))
-                       - (1u << GET_HIGH_SLICE_INDEX(start));
+               ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1))
+                       - (1ul << GET_HIGH_SLICE_INDEX(start));
 
        return ret;
 }
@@ -135,7 +135,7 @@ static struct slice_mask slice_mask_for_free(struct 
mm_struct *mm)
 
        for (i = 0; i < SLICE_NUM_HIGH; i++)
                if (!slice_high_has_vma(mm, i))
-                       ret.high_slices |= 1u << i;
+                       ret.high_slices |= 1ul << i;
 
        return ret;
 }
@@ -158,7 +158,7 @@ static struct slice_mask slice_mask_for_size(struct 
mm_struct *mm, int psize)
                mask_index = i & 0x1;
                index = i >> 1;
                if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
-                       ret.high_slices |= 1u << i;
+                       ret.high_slices |= 1ul << i;
        }
 
        return ret;
@@ -215,7 +215,7 @@ static void slice_convert(struct mm_struct *mm, struct 
slice_mask mask, int psiz
        for (i = 0; i < SLICE_NUM_HIGH; i++) {
                mask_index = i & 0x1;
                index = i >> 1;
-               if (mask.high_slices & (1u << i))
+               if (mask.high_slices & (1ul << i))
                        hpsizes[index] = (hpsizes[index] &
                                          ~(0xf << (mask_index * 4))) |
                                (((unsigned long)psize) << (mask_index * 4));


-aneesh

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to