Le 28/09/2023 à 21:48, Hari Bathini a écrit :
> patch_instruction() entails setting up pte, patching the instruction,
> clearing the pte and flushing the tlb. If multiple instructions need
> to be patched, every instruction would have to go through the above
> drill unnecessarily. Instead, introduce function patch_instructions()
> that sets up the pte, clears the pte and flushes the tlb only once per
> page range of instructions to be patched. This adds a slight overhead
> to patch_instruction() call while improving the patching time for
> scenarios where more than one instruction needs to be patched.

On my powerpc8xx, this patch leads to an increase of about 8% of the 
time needed to activate ftrace function tracer.

The problem is it complexifies patch_instruction().

Before your patch:

00000234 <patch_instruction>:
  234:  48 00 00 6c     b       2a0 <patch_instruction+0x6c>
  238:  7c e0 00 a6     mfmsr   r7
  23c:  7c 51 13 a6     mtspr   81,r2
  240:  3d 40 00 00     lis     r10,0
                        242: R_PPC_ADDR16_HA    .data
  244:  39 4a 00 00     addi    r10,r10,0
                        246: R_PPC_ADDR16_LO    .data
  248:  7c 69 1b 78     mr      r9,r3
  24c:  3d 29 40 00     addis   r9,r9,16384
  250:  81 0a 00 08     lwz     r8,8(r10)
  254:  55 29 00 26     clrrwi  r9,r9,12
  258:  81 4a 00 04     lwz     r10,4(r10)
  25c:  61 29 01 25     ori     r9,r9,293
  260:  91 28 00 00     stw     r9,0(r8)
  264:  55 49 00 26     clrrwi  r9,r10,12
  268:  50 6a 05 3e     rlwimi  r10,r3,0,20,31
  26c:  90 8a 00 00     stw     r4,0(r10)
  270:  7c 00 50 6c     dcbst   0,r10
  274:  7c 00 04 ac     hwsync
  278:  7c 00 1f ac     icbi    0,r3
  27c:  7c 00 04 ac     hwsync
  280:  4c 00 01 2c     isync
  284:  38 60 00 00     li      r3,0
  288:  39 40 00 00     li      r10,0
  28c:  91 48 00 00     stw     r10,0(r8)
  290:  7c 00 4a 64     tlbie   r9,r0
  294:  7c 00 04 ac     hwsync
  298:  7c e0 01 24     mtmsr   r7
  29c:  4e 80 00 20     blr
  2a0:  90 83 00 00     stw     r4,0(r3)
  2a4:  7c 00 18 6c     dcbst   0,r3
  2a8:  7c 00 04 ac     hwsync
  2ac:  7c 00 1f ac     icbi    0,r3
  2b0:  7c 00 04 ac     hwsync
  2b4:  4c 00 01 2c     isync
  2b8:  38 60 00 00     li      r3,0
  2bc:  4e 80 00 20     blr
  2c0:  38 60 ff ff     li      r3,-1
  2c4:  4b ff ff c4     b       288 <patch_instruction+0x54>
  2c8:  38 60 ff ff     li      r3,-1
  2cc:  4e 80 00 20     blr


After you patch:

0000020c <__do_patch_instructions>:
  20c:  94 21 ff e0     stwu    r1,-32(r1)
  210:  3d 40 00 00     lis     r10,0
                        212: R_PPC_ADDR16_HA    .data
  214:  93 81 00 10     stw     r28,16(r1)
  218:  93 c1 00 18     stw     r30,24(r1)
  21c:  93 a1 00 14     stw     r29,20(r1)
  220:  93 e1 00 1c     stw     r31,28(r1)
  224:  39 4a 00 00     addi    r10,r10,0
                        226: R_PPC_ADDR16_LO    .data
  228:  7c 69 1b 78     mr      r9,r3
  22c:  7c be 2b 79     mr.     r30,r5
  230:  3d 29 40 00     addis   r9,r9,16384
  234:  83 ea 00 04     lwz     r31,4(r10)
  238:  83 aa 00 08     lwz     r29,8(r10)
  23c:  55 29 00 26     clrrwi  r9,r9,12
  240:  61 29 01 25     ori     r9,r9,293
  244:  57 fc 00 26     clrrwi  r28,r31,12
  248:  91 3d 00 00     stw     r9,0(r29)
  24c:  50 7f 05 3e     rlwimi  r31,r3,0,20,31
  250:  40 82 00 4c     bne     29c <__do_patch_instructions+0x90>
  254:  81 24 00 00     lwz     r9,0(r4)
  258:  91 3f 00 00     stw     r9,0(r31)
  25c:  7c 00 f8 6c     dcbst   0,r31
  260:  7c 00 04 ac     hwsync
  264:  7c 00 1f ac     icbi    0,r3
  268:  7c 00 04 ac     hwsync
  26c:  4c 00 01 2c     isync
  270:  38 60 00 00     li      r3,0
  274:  39 20 00 00     li      r9,0
  278:  91 3d 00 00     stw     r9,0(r29)
  27c:  7c 00 e2 64     tlbie   r28,r0
  280:  7c 00 04 ac     hwsync
  284:  83 81 00 10     lwz     r28,16(r1)
  288:  83 a1 00 14     lwz     r29,20(r1)
  28c:  83 c1 00 18     lwz     r30,24(r1)
  290:  83 e1 00 1c     lwz     r31,28(r1)
  294:  38 21 00 20     addi    r1,r1,32
  298:  4e 80 00 20     blr
  29c:  2c 06 00 00     cmpwi   r6,0
  2a0:  7c 08 02 a6     mflr    r0
  2a4:  90 01 00 24     stw     r0,36(r1)
  2a8:  40 82 00 24     bne     2cc <__do_patch_instructions+0xc0>
  2ac:  7f e3 fb 78     mr      r3,r31
  2b0:  48 00 00 01     bl      2b0 <__do_patch_instructions+0xa4>
                        2b0: R_PPC_REL24        memcpy
  2b4:  7c 9f f2 14     add     r4,r31,r30
  2b8:  7f e3 fb 78     mr      r3,r31
  2bc:  48 00 00 01     bl      2bc <__do_patch_instructions+0xb0>
                        2bc: R_PPC_REL24        flush_icache_range
  2c0:  80 01 00 24     lwz     r0,36(r1)
  2c4:  7c 08 03 a6     mtlr    r0
  2c8:  4b ff ff a8     b       270 <__do_patch_instructions+0x64>
  2cc:  80 84 00 00     lwz     r4,0(r4)
  2d0:  57 c5 f0 be     srwi    r5,r30,2
  2d4:  7f e3 fb 78     mr      r3,r31
  2d8:  48 00 00 01     bl      2d8 <__do_patch_instructions+0xcc>
                        2d8: R_PPC_REL24        memset32
  2dc:  4b ff ff d8     b       2b4 <__do_patch_instructions+0xa8>
  2e0:  38 60 ff ff     li      r3,-1
  2e4:  4b ff ff 90     b       274 <__do_patch_instructions+0x68>

...

00000310 <patch_instruction>:
  310:  94 21 ff e0     stwu    r1,-32(r1)
  314:  90 81 00 08     stw     r4,8(r1)
  318:  48 00 00 40     b       358 <patch_instruction+0x48>
  31c:  7c 08 02 a6     mflr    r0
  320:  90 01 00 24     stw     r0,36(r1)
  324:  93 e1 00 1c     stw     r31,28(r1)
  328:  7f e0 00 a6     mfmsr   r31
  32c:  7c 51 13 a6     mtspr   81,r2
  330:  38 c0 00 00     li      r6,0
  334:  38 81 00 08     addi    r4,r1,8
  338:  38 a0 00 00     li      r5,0
  33c:  4b ff fe d1     bl      20c <__do_patch_instructions>
  340:  7f e0 01 24     mtmsr   r31
  344:  80 01 00 24     lwz     r0,36(r1)
  348:  83 e1 00 1c     lwz     r31,28(r1)
  34c:  7c 08 03 a6     mtlr    r0
  350:  38 21 00 20     addi    r1,r1,32
  354:  4e 80 00 20     blr
  358:  81 21 00 08     lwz     r9,8(r1)
  35c:  91 23 00 00     stw     r9,0(r3)
  360:  7c 00 18 6c     dcbst   0,r3
  364:  7c 00 04 ac     hwsync
  368:  7c 00 1f ac     icbi    0,r3
  36c:  7c 00 04 ac     hwsync
  370:  4c 00 01 2c     isync
  374:  38 60 00 00     li      r3,0
  378:  4b ff ff d8     b       350 <patch_instruction+0x40>
  37c:  38 60 ff ff     li      r3,-1
  380:  4b ff ff d0     b       350 <patch_instruction+0x40>


Christophe

> 
> Signed-off-by: Hari Bathini <hbath...@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/code-patching.h |  1 +
>   arch/powerpc/lib/code-patching.c         | 93 +++++++++++++++++++++---
>   2 files changed, 85 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/code-patching.h 
> b/arch/powerpc/include/asm/code-patching.h
> index 3f881548fb61..43a4aedfa703 100644
> --- a/arch/powerpc/include/asm/code-patching.h
> +++ b/arch/powerpc/include/asm/code-patching.h
> @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
>   int patch_branch(u32 *addr, unsigned long target, int flags);
>   int patch_instruction(u32 *addr, ppc_inst_t instr);
>   int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
> +int patch_instructions(void *addr, void *code, size_t len, bool 
> repeat_instr);
>   
>   static inline unsigned long patch_site_addr(s32 *site)
>   {
> diff --git a/arch/powerpc/lib/code-patching.c 
> b/arch/powerpc/lib/code-patching.c
> index b00112d7ad46..4ff002bc41f6 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -278,7 +278,36 @@ static void unmap_patch_area(unsigned long addr)
>       flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>   }
>   
> -static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
> +static int __patch_instructions(u32 *patch_addr, void *code, size_t len, 
> bool repeat_instr)
> +{
> +     unsigned long start = (unsigned long)patch_addr;
> +
> +     /* Repeat instruction */
> +     if (repeat_instr) {
> +             ppc_inst_t instr = ppc_inst_read(code);
> +
> +             if (ppc_inst_prefixed(instr)) {
> +                     u64 val = ppc_inst_as_ulong(instr);
> +
> +                     memset64((uint64_t *)patch_addr, val, len / 8);
> +             } else {
> +                     u32 val = ppc_inst_val(instr);
> +
> +                     memset32(patch_addr, val, len / 4);
> +             }
> +     } else
> +             memcpy(patch_addr, code, len);
> +
> +     smp_wmb();      /* smp write barrier */
> +     flush_icache_range(start, start + len);
> +     return 0;
> +}
> +
> +/*
> + * A page is mapped and instructions that fit the page are patched.
> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
> + */
> +static int __do_patch_instructions_mm(u32 *addr, void *code, size_t len, 
> bool repeat_instr)
>   {
>       int err;
>       u32 *patch_addr;
> @@ -307,11 +336,15 @@ static int __do_patch_instruction_mm(u32 *addr, 
> ppc_inst_t instr)
>   
>       orig_mm = start_using_temp_mm(patching_mm);
>   
> -     err = __patch_instruction(addr, instr, patch_addr);
> +     /* Single instruction case. */
> +     if (len == 0) {
> +             err = __patch_instruction(addr, *(ppc_inst_t *)code, 
> patch_addr);
>   
> -     /* hwsync performed by __patch_instruction (sync) if successful */
> -     if (err)
> -             mb();  /* sync */
> +             /* hwsync performed by __patch_instruction (sync) if successful 
> */
> +             if (err)
> +                     mb();  /* sync */
> +     } else
> +             err = __patch_instructions(patch_addr, code, len, repeat_instr);
>   
>       /* context synchronisation performed by __patch_instruction (isync or 
> exception) */
>       stop_using_temp_mm(patching_mm, orig_mm);
> @@ -328,7 +361,11 @@ static int __do_patch_instruction_mm(u32 *addr, 
> ppc_inst_t instr)
>       return err;
>   }
>   
> -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
> +/*
> + * A page is mapped and instructions that fit the page are patched.
> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
> + */
> +static int __do_patch_instructions(u32 *addr, void *code, size_t len, bool 
> repeat_instr)
>   {
>       int err;
>       u32 *patch_addr;
> @@ -345,7 +382,11 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t 
> instr)
>       if (radix_enabled())
>               asm volatile("ptesync": : :"memory");
>   
> -     err = __patch_instruction(addr, instr, patch_addr);
> +     /* Single instruction case. */
> +     if (len == 0)
> +             err = __patch_instruction(addr, *(ppc_inst_t *)code, 
> patch_addr);
> +     else
> +             err = __patch_instructions(patch_addr, code, len, repeat_instr);
>   
>       pte_clear(&init_mm, text_poke_addr, pte);
>       flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
> @@ -369,15 +410,49 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>   
>       local_irq_save(flags);
>       if (mm_patch_enabled())
> -             err = __do_patch_instruction_mm(addr, instr);
> +             err = __do_patch_instructions_mm(addr, &instr, 0, false);
>       else
> -             err = __do_patch_instruction(addr, instr);
> +             err = __do_patch_instructions(addr, &instr, 0, false);
>       local_irq_restore(flags);
>   
>       return err;
>   }
>   NOKPROBE_SYMBOL(patch_instruction);
>   
> +/*
> + * Patch 'addr' with 'len' bytes of instructions from 'code'.
> + *
> + * If repeat_instr is true, the same instruction is filled for
> + * 'len' bytes.
> + */
> +int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr)
> +{
> +     unsigned long flags;
> +     size_t plen;
> +     int err;
> +
> +     while (len > 0) {
> +             plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
> +
> +             local_irq_save(flags);
> +             if (mm_patch_enabled())
> +                     err = __do_patch_instructions_mm(addr, code, plen, 
> repeat_instr);
> +             else
> +                     err = __do_patch_instructions(addr, code, plen, 
> repeat_instr);
> +             local_irq_restore(flags);
> +             if (err)
> +                     break;
> +
> +             len -= plen;
> +             addr = addr + plen;
> +             if (!repeat_instr)
> +                     code = code + plen;
> +     }
> +
> +     return err;
> +}
> +NOKPROBE_SYMBOL(patch_instructions);
> +
>   int patch_branch(u32 *addr, unsigned long target, int flags)
>   {
>       ppc_inst_t instr;

Reply via email to