On Mon, 2009-06-15 at 18:30 +0530, Sachin Sant wrote:

> The corresponding C code is :
> 
>  278:   2f 80 00 00     cmpwi   cr7,r0,0
>         } else {
>                 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
>                 ssize = mmu_kernel_ssize;
>         }
>         vaddr = hpt_va(addr, vsid, ssize);
>         rpte = __real_pte(__pte(pte), ptep);

That's interesting... __real_pte() is used to reconstruct
a PTE two halves but maybe our huge pages page tables don't have
the second half ! In which case we are just going to peek into
la-la-land. I'm also worried by the negative offset since the second
halves are at +0x8000 iirc, unless it flipped the pointers around but
that would be strange....

I'll have a look later today.

Cheers,
Ben.

>  27c:   eb 89 80 00     ld      r28,-32768(r9)  
> ^^^^ %pc points to the above line. ^^^^^
>          * Check if we have an active batch on this CPU. If not, just
>          * flush now and return. For now, we don global invalidates
>          * in that case, might be worth testing the mm cpu mask though
>          * and decide to use local invalidates instead...
> 
> I have attached the objdump o/p for tlb_hash64.o.
> 
> I could not recreate this issue with git8 kernel
> (45e3e1935e2857c54783291107d33323b3ef33c8).
> 
> Thanks
> -Sachin
> 
> plain text document attachment (tlb_hash64_objlist)
> arch/powerpc/mm/tlb_hash64.o:     file format elf64-powerpc
> 
> 
> Disassembly of section .text:
> 
> 0000000000000000 <.__flush_tlb_pending>:
>  * in a batch.
>  *
>  * Must be called from within some kind of spinlock/non-preempt region...
>  */
> void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
> {
>    0: fb e1 ff f8     std     r31,-8(r1)
>    4: 7c 08 02 a6     mflr    r0
>    8: f8 01 00 10     std     r0,16(r1)
> extern const unsigned long
>       cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
> 
> static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
> {
>       const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
>    c: e8 02 00 00     ld      r0,0(r2)
>   10: 7c 7f 1b 78     mr      r31,r3
>   14: fb c1 ff f0     std     r30,-16(r1)
>                       const unsigned long *src2, int nbits)
> {
>       if (small_const_nbits(nbits))
>               return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
>       else
>               return __bitmap_equal(src1, src2, nbits);
>   18: 38 a0 04 00     li      r5,1024
>   1c: f8 21 ff 81     stdu    r1,-128(r1)
>       const struct cpumask *tmp;
>       int i, local = 0;
> 
>       i = batch->index;
>       tmp = cpumask_of(smp_processor_id());
>   20: a0 8d 00 0a     lhz     r4,10(r13)
>   24: e8 63 00 10     ld      r3,16(r3)
>   28: 78 89 06 a0     clrldi  r9,r4,58
>   2c: 78 84 d1 82     rldicl  r4,r4,58,6
>   30: 39 29 00 01     addi    r9,r9,1
>   34: 78 84 1f 24     rldicr  r4,r4,3,60
>   38: 79 29 3e 24     rldicr  r9,r9,7,56
>   3c: 38 63 03 10     addi    r3,r3,784
>   40: 7c 00 4a 14     add     r0,r0,r9
> void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
> {
>       const struct cpumask *tmp;
>       int i, local = 0;
> 
>       i = batch->index;
>   44: eb df 00 0e     lwa     r30,12(r31)
>   48: 7c 84 00 50     subf    r4,r4,r0
>   4c: 48 00 00 01     bl      4c <.__flush_tlb_pending+0x4c>
>   50: 60 00 00 00     nop
>   54: 7c 69 fe 70     srawi   r9,r3,31
>       tmp = cpumask_of(smp_processor_id());
>       if (cpumask_equal(mm_cpumask(batch->mm), tmp))
>               local = 1;
>       if (i == 1)
>   58: 2f 9e 00 01     cmpwi   cr7,r30,1
>   5c: 7d 20 1a 78     xor     r0,r9,r3
>   60: 7c 00 48 50     subf    r0,r0,r9
>   64: 54 00 0f fe     rlwinm  r0,r0,1,31,31
>   68: 7c 04 07 b4     extsw   r4,r0
>   6c: 40 9e 00 28     bne-    cr7,94 <.__flush_tlb_pending+0x94>
>               flush_hash_page(batch->vaddr[0], batch->pte[0],
>   70: 7c 88 23 78     mr      r8,r4
>   74: e8 7f 0c 18     ld      r3,3096(r31)
>   78: e8 df 12 1a     lwa     r6,4632(r31)
>   7c: e8 ff 12 1e     lwa     r7,4636(r31)
>   80: e8 9f 00 18     ld      r4,24(r31)
>   84: e8 bf 00 20     ld      r5,32(r31)
>   88: 48 00 00 01     bl      88 <.__flush_tlb_pending+0x88>
>   8c: 60 00 00 00     nop
>   90: 48 00 00 10     b       a0 <.__flush_tlb_pending+0xa0>
>                               batch->psize, batch->ssize, local);
>       else
>               flush_hash_range(i, local);
>   94: 7f c3 f3 78     mr      r3,r30
>   98: 48 00 00 01     bl      98 <.__flush_tlb_pending+0x98>
>   9c: 60 00 00 00     nop
>       batch->index = 0;
> }
>   a0: 38 21 00 80     addi    r1,r1,128
>       if (i == 1)
>               flush_hash_page(batch->vaddr[0], batch->pte[0],
>                               batch->psize, batch->ssize, local);
>       else
>               flush_hash_range(i, local);
>       batch->index = 0;
>   a4: 38 00 00 00     li      r0,0
>   a8: f8 1f 00 08     std     r0,8(r31)
> }
>   ac: e8 01 00 10     ld      r0,16(r1)
>   b0: eb c1 ff f0     ld      r30,-16(r1)
>   b4: 7c 08 03 a6     mtlr    r0
>   b8: eb e1 ff f8     ld      r31,-8(r1)
>   bc: 4e 80 00 20     blr
> 
> 00000000000000c0 <.hpte_need_flush>:
>  *
>  * Must be called from within some kind of spinlock/non-preempt region...
>  */
> void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
>                    pte_t *ptep, unsigned long pte, int huge)
> {
>   c0: fa c1 ff b0     std     r22,-80(r1)
>   c4: 7c 08 02 a6     mflr    r0
>   c8: f8 01 00 10     std     r0,16(r1)
>        * NOTE: when using special 64K mappings in 4K environment like
>        * for SPEs, we obtain the page size from the slice, which thus
>        * must still exist (and thus the VMA not reused) at the time
>        * of this call
>        */
>       if (huge) {
>   cc: 2f a7 00 00     cmpdi   cr7,r7,0
>  *
>  * Must be called from within some kind of spinlock/non-preempt region...
>  */
> void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
>                    pte_t *ptep, unsigned long pte, int huge)
> {
>   d0: fb 21 ff c8     std     r25,-56(r1)
> #else
>               BUG();
>               psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
> #endif
>       } else
>               psize = pte_pagesize_index(mm, addr, pte);
>   d4: 6c c0 10 00     xoris   r0,r6,4096
>  *
>  * Must be called from within some kind of spinlock/non-preempt region...
>  */
> void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
>                    pte_t *ptep, unsigned long pte, int huge)
> {
>   d8: fb 41 ff d0     std     r26,-48(r1)
>   dc: 7c d9 33 78     mr      r25,r6
>   e0: fb 61 ff d8     std     r27,-40(r1)
>   e4: 7c b6 2b 78     mr      r22,r5
>   e8: fb 81 ff e0     std     r28,-32(r1)
>       struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
>   ec: eb 82 00 08     ld      r28,8(r2)
>  *
>  * Must be called from within some kind of spinlock/non-preempt region...
>  */
> void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
>                    pte_t *ptep, unsigned long pte, int huge)
> {
>   f0: fb c1 ff f0     std     r30,-16(r1)
> #else
>               BUG();
>               psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
> #endif
>       } else
>               psize = pte_pagesize_index(mm, addr, pte);
>   f4: 78 1b 27 e2     rldicl  r27,r0,36,63
>  *
>  * Must be called from within some kind of spinlock/non-preempt region...
>  */
> void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
>                    pte_t *ptep, unsigned long pte, int huge)
> {
>   f8: fb e1 ff f8     std     r31,-8(r1)
>   fc: 7c 7a 1b 78     mr      r26,r3
>  100: fa e1 ff b8     std     r23,-72(r1)
>       i = batch->index;
> 
>       /* We mask the address for the base page size. Huge pages will
>        * have applied their own masking already
>        */
>       addr &= PAGE_MASK;
>  104: 78 9f 03 e4     rldicr  r31,r4,0,47
>  *
>  * Must be called from within some kind of spinlock/non-preempt region...
>  */
> void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
>                    pte_t *ptep, unsigned long pte, int huge)
> {
>  108: fb 01 ff c0     std     r24,-64(r1)
>  10c: fb a1 ff e8     std     r29,-24(r1)
>  110: f8 21 ff 41     stdu    r1,-192(r1)
>       struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
>  114: ea ed 00 40     ld      r23,64(r13)
>  118: 7f dc ba 14     add     r30,r28,r23
>       unsigned int psize;
>       int ssize;
>       real_pte_t rpte;
>       int i;
> 
>       i = batch->index;
>  11c: eb 1e 00 08     ld      r24,8(r30)
>        * NOTE: when using special 64K mappings in 4K environment like
>        * for SPEs, we obtain the page size from the slice, which thus
>        * must still exist (and thus the VMA not reused) at the time
>        * of this call
>        */
>       if (huge) {
>  120: 41 9e 00 14     beq-    cr7,134 <.hpte_need_flush+0x74>
> #ifdef CONFIG_HUGETLB_PAGE
>               psize = get_slice_psize(mm, addr);;
>  124: 7f e4 fb 78     mr      r4,r31
>  128: 48 00 00 01     bl      128 <.hpte_need_flush+0x68>
>  12c: 60 00 00 00     nop
>  130: 7c 7b 1b 78     mr      r27,r3
> #endif
>       } else
>               psize = pte_pagesize_index(mm, addr, pte);
> 
>       /* Build full vaddr */
>       if (!is_kernel_addr(addr)) {
>  134: e8 02 00 10     ld      r0,16(r2)
>  138: 7f bf 00 40     cmpld   cr7,r31,r0
>  13c: 41 9d 00 a8     bgt-    cr7,1e4 <.hpte_need_flush+0x124>
> 
> /* Returns the segment size indicator for a user address */
> static inline int user_segment_size(unsigned long addr)
> {
>       /* Use 1T segments if possible for addresses >= 1T */
>       if (addr >= (1UL << SID_SHIFT_1T))
>  140: 38 00 ff ff     li      r0,-1
>  144: 3b a0 00 00     li      r29,0
>  148: 78 00 06 00     clrldi  r0,r0,24
>  14c: 7f bf 00 40     cmpld   cr7,r31,r0
>  150: 40 9d 00 0c     ble-    cr7,15c <.hpte_need_flush+0x9c>
>               return mmu_highuser_ssize;
>  154: e9 22 00 18     ld      r9,24(r2)
>  158: eb a9 00 02     lwa     r29,0(r9)
> 
> /* This is only valid for user addresses (which are below 2^44) */
> static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
>                                    int ssize)
> {
>       if (ssize == MMU_SEGSIZE_256M)
>  15c: 2f bd 00 00     cmpdi   cr7,r29,0
>               ssize = user_segment_size(addr);
>               vsid = get_vsid(mm->context.id, addr, ssize);
>  160: e9 7a 03 90     ld      r11,912(r26)
>  164: 40 9e 00 3c     bne-    cr7,1a0 <.hpte_need_flush+0xe0>
>               return vsid_scramble((context << USER_ESID_BITS)
>  168: 79 6b 83 e4     rldicr  r11,r11,16,47
>  16c: 7b e0 27 02     rldicl  r0,r31,36,28
>  170: 3d 20 0b f6     lis     r9,3062
>  174: 7c 00 5b 78     or      r0,r0,r11
>  178: 61 29 e6 1b     ori     r9,r9,58907
>  17c: 7c 00 49 d2     mulld   r0,r0,r9
>                                    | (ea >> SID_SHIFT), 256M);
>  180: 78 09 07 00     clrldi  r9,r0,28
>  184: 78 00 e1 20     rldicl  r0,r0,28,36
>  188: 7d 29 02 14     add     r9,r9,r0
>  18c: 38 09 00 01     addi    r0,r9,1
>  190: 78 00 e1 20     rldicl  r0,r0,28,36
>  194: 7c 00 4a 14     add     r0,r0,r9
>  198: 78 09 07 00     clrldi  r9,r0,28
>  19c: 48 00 00 38     b       1d4 <.hpte_need_flush+0x114>
>       return vsid_scramble((context << USER_ESID_BITS_1T)
>  1a0: 79 6b 26 e4     rldicr  r11,r11,4,59
>  1a4: 7b e0 c2 20     rldicl  r0,r31,24,40
>  1a8: 3d 20 00 bf     lis     r9,191
>  1ac: 7c 00 5b 78     or      r0,r0,r11
>  1b0: 61 29 50 d9     ori     r9,r9,20697
>  1b4: 7c 00 49 d2     mulld   r0,r0,r9
>                            | (ea >> SID_SHIFT_1T), 1T);
>  1b8: 78 09 02 20     clrldi  r9,r0,40
>  1bc: 78 00 46 02     rldicl  r0,r0,40,24
>  1c0: 7d 29 02 14     add     r9,r9,r0
>  1c4: 38 09 00 01     addi    r0,r9,1
>  1c8: 78 00 46 02     rldicl  r0,r0,40,24
>  1cc: 7c 00 4a 14     add     r0,r0,r9
>  1d0: 78 09 02 20     clrldi  r9,r0,40
>               WARN_ON(vsid == 0);
>  1d4: 7d 20 00 74     cntlzd  r0,r9
>  1d8: 78 00 d1 82     rldicl  r0,r0,58,6
>  1dc: 0b 00 00 00     tdnei   r0,0
>  1e0: 48 00 00 70     b       250 <.hpte_need_flush+0x190>
>       } else {
>               vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
>  1e4: e9 22 00 20     ld      r9,32(r2)
>  1e8: eb a9 00 02     lwa     r29,0(r9)
> #endif /* 1 */
> 
> /* This is only valid for addresses >= PAGE_OFFSET */
> static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
> {
>       if (ssize == MMU_SEGSIZE_256M)
>  1ec: 2f bd 00 00     cmpdi   cr7,r29,0
>  1f0: 40 9e 00 34     bne-    cr7,224 <.hpte_need_flush+0x164>
>               return vsid_scramble(ea >> SID_SHIFT, 256M);
>  1f4: 3d 20 0b f6     lis     r9,3062
>  1f8: 7b e0 27 02     rldicl  r0,r31,36,28
>  1fc: 61 29 e6 1b     ori     r9,r9,58907
>  200: 7c 00 49 d2     mulld   r0,r0,r9
>  204: 78 09 07 00     clrldi  r9,r0,28
>  208: 78 00 e1 20     rldicl  r0,r0,28,36
>  20c: 7d 29 02 14     add     r9,r9,r0
>  210: 38 09 00 01     addi    r0,r9,1
>  214: 78 00 e1 20     rldicl  r0,r0,28,36
>  218: 7c 00 4a 14     add     r0,r0,r9
>  21c: 78 09 07 00     clrldi  r9,r0,28
>  220: 48 00 00 38     b       258 <.hpte_need_flush+0x198>
>       return vsid_scramble(ea >> SID_SHIFT_1T, 1T);
>  224: 3d 20 00 bf     lis     r9,191
>  228: 7b e0 c2 20     rldicl  r0,r31,24,40
>  22c: 61 29 50 d9     ori     r9,r9,20697
>  230: 7c 00 49 d2     mulld   r0,r0,r9
>  234: 78 09 02 20     clrldi  r9,r0,40
>  238: 78 00 46 02     rldicl  r0,r0,40,24
>  23c: 7d 29 02 14     add     r9,r9,r0
>  240: 38 09 00 01     addi    r0,r9,1
>  244: 78 00 46 02     rldicl  r0,r0,40,24
>  248: 7c 00 4a 14     add     r0,r0,r9
>  24c: 78 09 02 20     clrldi  r9,r0,40
>  * Build a VA given VSID, EA and segment size
>  */
> static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid,
>                                  int ssize)
> {
>       if (ssize == MMU_SEGSIZE_256M)
>  250: 2f bd 00 00     cmpdi   cr7,r29,0
>  254: 40 9e 00 10     bne-    cr7,264 <.hpte_need_flush+0x1a4>
>               return (vsid << 28) | (ea & 0xfffffffUL);
>  258: 79 29 e0 e4     rldicr  r9,r9,28,35
>  25c: 7b e0 01 20     clrldi  r0,r31,36
>  260: 48 00 00 0c     b       26c <.hpte_need_flush+0x1ac>
>       return (vsid << 40) | (ea & 0xffffffffffUL);
>  264: 79 29 45 c6     rldicr  r9,r9,40,23
>  268: 7b e0 06 00     clrldi  r0,r31,24
>  26c: 7d 3f 03 78     or      r31,r9,r0
>        * Check if we have an active batch on this CPU. If not, just
>        * flush now and return. For now, we don global invalidates
>        * in that case, might be worth testing the mm cpu mask though
>        * and decide to use local invalidates instead...
>        */
>       if (!batch->active) {
>  270: 7c 1c b8 2e     lwzx    r0,r28,r23
>       } else {
>               vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
>               ssize = mmu_kernel_ssize;
>       }
>       vaddr = hpt_va(addr, vsid, ssize);
>       rpte = __real_pte(__pte(pte), ptep);
>  274: 3d 36 00 01     addis   r9,r22,1
>        * Check if we have an active batch on this CPU. If not, just
>        * flush now and return. For now, we don global invalidates
>        * in that case, might be worth testing the mm cpu mask though
>        * and decide to use local invalidates instead...
>        */
>       if (!batch->active) {
>  278: 2f 80 00 00     cmpwi   cr7,r0,0
>       } else {
>               vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
>               ssize = mmu_kernel_ssize;
>       }
>       vaddr = hpt_va(addr, vsid, ssize);
>       rpte = __real_pte(__pte(pte), ptep);
>  27c: eb 89 80 00     ld      r28,-32768(r9)
>        * Check if we have an active batch on this CPU. If not, just
>        * flush now and return. For now, we don global invalidates
>        * in that case, might be worth testing the mm cpu mask though
>        * and decide to use local invalidates instead...
>        */
>       if (!batch->active) {
>  280: 40 9e 00 28     bne-    cr7,2a8 <.hpte_need_flush+0x1e8>
>               flush_hash_page(vaddr, rpte, psize, ssize, 0);
>  284: 7f e3 fb 78     mr      r3,r31
>  288: 7f 24 cb 78     mr      r4,r25
>  28c: 7f 85 e3 78     mr      r5,r28
>  290: 7f 66 07 b4     extsw   r6,r27
>  294: 7f a7 eb 78     mr      r7,r29
>  298: 39 00 00 00     li      r8,0
>  29c: 48 00 00 01     bl      29c <.hpte_need_flush+0x1dc>
>  2a0: 60 00 00 00     nop
>  2a4: 48 00 00 bc     b       360 <.hpte_need_flush+0x2a0>
>        * will change mid stream.
>        *
>        * We also need to ensure only one page size is present in a given
>        * batch
>        */
>       if (i != 0 && (mm != batch->mm || batch->psize != psize ||
>  2a8: 7f 0b 07 b5     extsw.  r11,r24
>  2ac: 41 82 00 30     beq-    2dc <.hpte_need_flush+0x21c>
>  2b0: e8 1e 00 10     ld      r0,16(r30)
>  2b4: 7f ba 00 00     cmpd    cr7,r26,r0
>  2b8: 40 9e 00 1c     bne-    cr7,2d4 <.hpte_need_flush+0x214>
>  2bc: 80 1e 12 18     lwz     r0,4632(r30)
>  2c0: 7f 80 d8 00     cmpw    cr7,r0,r27
>  2c4: 40 9e 00 10     bne-    cr7,2d4 <.hpte_need_flush+0x214>
>  2c8: 80 1e 12 1c     lwz     r0,4636(r30)
>  2cc: 7f 80 e8 00     cmpw    cr7,r0,r29
>  2d0: 41 9e 00 1c     beq-    cr7,2ec <.hpte_need_flush+0x22c>
>                      batch->ssize != ssize)) {
>               __flush_tlb_pending(batch);
>  2d4: 7f c3 f3 78     mr      r3,r30
>  2d8: 48 00 00 01     bl      2d8 <.hpte_need_flush+0x218>
>               i = 0;
>       }
>       if (i == 0) {
>               batch->mm = mm;
>               batch->psize = psize;
>               batch->ssize = ssize;
>  2dc: 39 60 00 00     li      r11,0
>                      batch->ssize != ssize)) {
>               __flush_tlb_pending(batch);
>               i = 0;
>       }
>       if (i == 0) {
>               batch->mm = mm;
>  2e0: fb 5e 00 10     std     r26,16(r30)
>               batch->psize = psize;
>  2e4: 93 7e 12 18     stw     r27,4632(r30)
>               batch->ssize = ssize;
>  2e8: 93 be 12 1c     stw     r29,4636(r30)
>       }
>       batch->pte[i] = rpte;
>       batch->vaddr[i] = vaddr;
>       batch->index = ++i;
>  2ec: 38 0b 00 01     addi    r0,r11,1
>       if (i == 0) {
>               batch->mm = mm;
>               batch->psize = psize;
>               batch->ssize = ssize;
>       }
>       batch->pte[i] = rpte;
>  2f0: 39 2b 00 01     addi    r9,r11,1
>       batch->vaddr[i] = vaddr;
>  2f4: 39 6b 01 82     addi    r11,r11,386
>       batch->index = ++i;
>  2f8: 7c 00 07 b4     extsw   r0,r0
>       if (i == 0) {
>               batch->mm = mm;
>               batch->psize = psize;
>               batch->ssize = ssize;
>       }
>       batch->pte[i] = rpte;
>  2fc: 79 29 26 e4     rldicr  r9,r9,4,59
>       batch->vaddr[i] = vaddr;
>  300: 79 6b 1f 24     rldicr  r11,r11,3,60
>       if (i == 0) {
>               batch->mm = mm;
>               batch->psize = psize;
>               batch->ssize = ssize;
>       }
>       batch->pte[i] = rpte;
>  304: 7d 3e 4a 14     add     r9,r30,r9
>       batch->vaddr[i] = vaddr;
>  308: 7d 7e 5a 14     add     r11,r30,r11
>       if (i == 0) {
>               batch->mm = mm;
>               batch->psize = psize;
>               batch->ssize = ssize;
>       }
>       batch->pte[i] = rpte;
>  30c: fb 29 00 08     std     r25,8(r9)
>       batch->vaddr[i] = vaddr;
>       batch->index = ++i;
>       if (i >= PPC64_TLB_BATCH_NR)
>  310: 2f 80 00 bf     cmpwi   cr7,r0,191
>       if (i == 0) {
>               batch->mm = mm;
>               batch->psize = psize;
>               batch->ssize = ssize;
>       }
>       batch->pte[i] = rpte;
>  314: fb 89 00 10     std     r28,16(r9)
>       batch->vaddr[i] = vaddr;
>  318: fb eb 00 08     std     r31,8(r11)
>       batch->index = ++i;
>  31c: f8 1e 00 08     std     r0,8(r30)
>       if (i >= PPC64_TLB_BATCH_NR)
>  320: 40 9d 00 40     ble-    cr7,360 <.hpte_need_flush+0x2a0>
>               __flush_tlb_pending(batch);
> }
>  324: 38 21 00 c0     addi    r1,r1,192
>       }
>       batch->pte[i] = rpte;
>       batch->vaddr[i] = vaddr;
>       batch->index = ++i;
>       if (i >= PPC64_TLB_BATCH_NR)
>               __flush_tlb_pending(batch);
>  328: 7f c3 f3 78     mr      r3,r30
> }
>  32c: e8 01 00 10     ld      r0,16(r1)
>  330: ea c1 ff b0     ld      r22,-80(r1)
>  334: 7c 08 03 a6     mtlr    r0
>  338: ea e1 ff b8     ld      r23,-72(r1)
>  33c: eb 01 ff c0     ld      r24,-64(r1)
>  340: eb 21 ff c8     ld      r25,-56(r1)
>  344: eb 41 ff d0     ld      r26,-48(r1)
>  348: eb 61 ff d8     ld      r27,-40(r1)
>  34c: eb 81 ff e0     ld      r28,-32(r1)
>  350: eb a1 ff e8     ld      r29,-24(r1)
>  354: eb c1 ff f0     ld      r30,-16(r1)
>  358: eb e1 ff f8     ld      r31,-8(r1)
>       }
>       batch->pte[i] = rpte;
>       batch->vaddr[i] = vaddr;
>       batch->index = ++i;
>       if (i >= PPC64_TLB_BATCH_NR)
>               __flush_tlb_pending(batch);
>  35c: 48 00 00 00     b       35c <.hpte_need_flush+0x29c>
> }
>  360: 38 21 00 c0     addi    r1,r1,192
>  364: e8 01 00 10     ld      r0,16(r1)
>  368: ea c1 ff b0     ld      r22,-80(r1)
>  36c: 7c 08 03 a6     mtlr    r0
>  370: ea e1 ff b8     ld      r23,-72(r1)
>  374: eb 01 ff c0     ld      r24,-64(r1)
>  378: eb 21 ff c8     ld      r25,-56(r1)
>  37c: eb 41 ff d0     ld      r26,-48(r1)
>  380: eb 61 ff d8     ld      r27,-40(r1)
>  384: eb 81 ff e0     ld      r28,-32(r1)
>  388: eb a1 ff e8     ld      r29,-24(r1)
>  38c: eb c1 ff f0     ld      r30,-16(r1)
>  390: eb e1 ff f8     ld      r31,-8(r1)
>  394: 4e 80 00 20     blr
> 
> 0000000000000398 <.__flush_hash_table_range>:
>  */
> #ifdef CONFIG_HOTPLUG
> 
> void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
>                             unsigned long end)
> {
>  398: fb 81 ff e0     std     r28,-32(r1)
>  39c: 7c 08 02 a6     mflr    r0
>  3a0: fb c1 ff f0     std     r30,-16(r1)
>       unsigned long flags;
> 
>       start = _ALIGN_DOWN(start, PAGE_SIZE);
>       end = _ALIGN_UP(end, PAGE_SIZE);
>  3a4: 3c a5 00 01     addis   r5,r5,1
>  */
> #ifdef CONFIG_HOTPLUG
> 
> void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
>                             unsigned long end)
> {
>  3a8: f8 01 00 10     std     r0,16(r1)
>  3ac: 7c 7e 1b 78     mr      r30,r3
>  3b0: fb a1 ff e8     std     r29,-24(r1)
>       unsigned long flags;
> 
>       start = _ALIGN_DOWN(start, PAGE_SIZE);
>       end = _ALIGN_UP(end, PAGE_SIZE);
>  3b4: 38 a5 ff ff     addi    r5,r5,-1
>  */
> #ifdef CONFIG_HOTPLUG
> 
> void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
>                             unsigned long end)
> {
>  3b8: fb e1 ff f8     std     r31,-8(r1)
>  3bc: f8 21 ff 71     stdu    r1,-144(r1)
>       unsigned long flags;
> 
>       start = _ALIGN_DOWN(start, PAGE_SIZE);
>       end = _ALIGN_UP(end, PAGE_SIZE);
>  3c0: 78 bc 03 e4     rldicr  r28,r5,0,47
> 
>       BUG_ON(!mm->pgd);
>  3c4: e8 03 00 48     ld      r0,72(r3)
>  3c8: 7c 00 00 74     cntlzd  r0,r0
>  3cc: 78 00 d1 82     rldicl  r0,r0,58,6
>  3d0: 0b 00 00 00     tdnei   r0,0
> void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
>                             unsigned long end)
> {
>       unsigned long flags;
> 
>       start = _ALIGN_DOWN(start, PAGE_SIZE);
>  3d4: 78 9f 03 e4     rldicr  r31,r4,0,47
> 
> static inline unsigned long raw_local_irq_disable(void)
> {
>       unsigned long flags, zero;
> 
>       __asm__ __volatile__("li %1,0; lbz %0,%2(13); stb %1,%2(13)"
>  3d8: 38 00 00 00     li      r0,0
>  3dc: 8b ad 01 da     lbz     r29,474(r13)
>  3e0: 98 0d 01 da     stb     r0,474(r13)
> 
> static inline void arch_enter_lazy_mmu_mode(void)
> {
>       struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
> 
>       batch->active = 1;
>  3e4: 38 00 00 01     li      r0,1
>  3e8: e9 6d 00 40     ld      r11,64(r13)
> 
> #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
> 
> static inline void arch_enter_lazy_mmu_mode(void)
> {
>       struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
>  3ec: e9 22 00 08     ld      r9,8(r2)
> 
>       batch->active = 1;
>  3f0: 7c 09 59 2e     stwx    r0,r9,r11
>  3f4: 48 00 00 58     b       44c <.__flush_hash_table_range+0xb4>
>       pte_t *pt = NULL;
> 
>       pg = pgdir + pgd_index(ea);
>       if (!pgd_none(*pg)) {
>               pu = pud_offset(pg, ea);
>               if (!pud_none(*pu)) {
>  3f8: e9 3e 00 48     ld      r9,72(r30)
>  3fc: 7c 09 00 2a     ldx     r0,r9,r0
>  400: 2f a0 00 00     cmpdi   cr7,r0,0
>                       pm = pmd_offset(pu, ea);
>                       if (pmd_present(*pm))
>  404: 78 09 05 a4     rldicr  r9,r0,0,54
>       pte_t *pt = NULL;
> 
>       pg = pgdir + pgd_index(ea);
>       if (!pgd_none(*pg)) {
>               pu = pud_offset(pg, ea);
>               if (!pud_none(*pu)) {
>  408: 41 9e 00 40     beq-    cr7,448 <.__flush_hash_table_range+0xb0>
>                       pm = pmd_offset(pu, ea);
>                       if (pmd_present(*pm))
>  40c: 7d 2b 48 2a     ldx     r9,r11,r9
>                               pt = pte_offset_kernel(pm, ea);
>  410: 7b e0 85 22     rldicl  r0,r31,48,52
>       pg = pgdir + pgd_index(ea);
>       if (!pgd_none(*pg)) {
>               pu = pud_offset(pg, ea);
>               if (!pud_none(*pu)) {
>                       pm = pmd_offset(pu, ea);
>                       if (pmd_present(*pm))
>  414: 2f a9 00 00     cmpdi   cr7,r9,0
>                               pt = pte_offset_kernel(pm, ea);
>  418: 78 0b 1f 24     rldicr  r11,r0,3,60
>  41c: 79 26 05 a4     rldicr  r6,r9,0,54
>       arch_enter_lazy_mmu_mode();
>       for (; start < end; start += PAGE_SIZE) {
>               pte_t *ptep = find_linux_pte(mm->pgd, start);
>               unsigned long pte;
> 
>               if (ptep == NULL)
>  420: 7c a6 5a 15     add.    r5,r6,r11
>       pg = pgdir + pgd_index(ea);
>       if (!pgd_none(*pg)) {
>               pu = pud_offset(pg, ea);
>               if (!pud_none(*pu)) {
>                       pm = pmd_offset(pu, ea);
>                       if (pmd_present(*pm))
>  424: 41 9e 00 24     beq-    cr7,448 <.__flush_hash_table_range+0xb0>
>  428: 41 c2 00 20     beq-    448 <.__flush_hash_table_range+0xb0>
>                       continue;
>               pte = pte_val(*ptep);
>  42c: 7c c6 58 2a     ldx     r6,r6,r11
>               if (!(pte & _PAGE_HASHPTE))
>  430: 54 c0 01 27     rlwinm. r0,r6,0,4,19
>  434: 41 82 00 14     beq-    448 <.__flush_hash_table_range+0xb0>
>                       continue;
>               hpte_need_flush(mm, start, ptep, pte, 0);
>  438: 7f e4 fb 78     mr      r4,r31
>  43c: 7f c3 f3 78     mr      r3,r30
>  440: 38 e0 00 00     li      r7,0
>  444: 48 00 00 01     bl      444 <.__flush_hash_table_range+0xac>
>        * to being hashed). This is not the most performance oriented
>        * way to do things but is fine for our needs here.
>        */
>       local_irq_save(flags);
>       arch_enter_lazy_mmu_mode();
>       for (; start < end; start += PAGE_SIZE) {
>  448: 3f ff 00 01     addis   r31,r31,1
>  44c: 7f bf e0 40     cmpld   cr7,r31,r28
>       pte_t *pt = NULL;
> 
>       pg = pgdir + pgd_index(ea);
>       if (!pgd_none(*pg)) {
>               pu = pud_offset(pg, ea);
>               if (!pud_none(*pu)) {
>  450: 7b e0 c5 e0     rldicl  r0,r31,24,55
>                       pm = pmd_offset(pu, ea);
>                       if (pmd_present(*pm))
>  454: 7b e9 25 22     rldicl  r9,r31,36,52
>       pte_t *pt = NULL;
> 
>       pg = pgdir + pgd_index(ea);
>       if (!pgd_none(*pg)) {
>               pu = pud_offset(pg, ea);
>               if (!pud_none(*pu)) {
>  458: 78 00 1f 24     rldicr  r0,r0,3,60
>                       pm = pmd_offset(pu, ea);
>                       if (pmd_present(*pm))
>  45c: 79 2b 1f 24     rldicr  r11,r9,3,60
>  460: 41 9c ff 98     blt+    cr7,3f8 <.__flush_hash_table_range+0x60>
> }
> 
> static inline void arch_leave_lazy_mmu_mode(void)
> {
>       struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
>  464: eb cd 00 40     ld      r30,64(r13)
>  468: eb e2 00 08     ld      r31,8(r2)
>  46c: 7c 7f f2 14     add     r3,r31,r30
> 
>       if (batch->index)
>  470: e8 03 00 08     ld      r0,8(r3)
>  474: 2f a0 00 00     cmpdi   cr7,r0,0
>  478: 41 9e 00 08     beq-    cr7,480 <.__flush_hash_table_range+0xe8>
>               __flush_tlb_pending(batch);
>  47c: 48 00 00 01     bl      47c <.__flush_hash_table_range+0xe4>
>               if (!(pte & _PAGE_HASHPTE))
>                       continue;
>               hpte_need_flush(mm, start, ptep, pte, 0);
>       }
>       arch_leave_lazy_mmu_mode();
>       local_irq_restore(flags);
>  480: 2f bd 00 00     cmpdi   cr7,r29,0
>       batch->active = 0;
>  484: 38 00 00 00     li      r0,0
>  488: 38 60 00 00     li      r3,0
>  48c: 7c 1f f1 2e     stwx    r0,r31,r30
>  490: 41 9e 00 08     beq-    cr7,498 <.__flush_hash_table_range+0x100>
>  494: 7f a3 eb 78     mr      r3,r29
>  498: 48 00 00 01     bl      498 <.__flush_hash_table_range+0x100>
>  49c: 60 00 00 00     nop
> }
>  4a0: 38 21 00 90     addi    r1,r1,144
>  4a4: e8 01 00 10     ld      r0,16(r1)
>  4a8: eb 81 ff e0     ld      r28,-32(r1)
>  4ac: 7c 08 03 a6     mtlr    r0
>  4b0: eb a1 ff e8     ld      r29,-24(r1)
>  4b4: eb c1 ff f0     ld      r30,-16(r1)
>  4b8: eb e1 ff f8     ld      r31,-8(r1)
>  4bc: 4e 80 00 20     blr

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to