Hi,

Le 05/10/2022 à 07:32, Benjamin Gray a écrit :
> Adds a generic text patching mechanism for patches of size int or long
> bytes.
> 
> The patch_instruction function is reimplemented in terms of this
> more generic function. This generic implementation allows patching of
> arbitrary long data, such as pointers on 64-bit.
> 
> On 32-bit patch_int is marked noinline to prevent a mis-optimisation.
> Without noinline, inside patch_branch the compiler may inline all the
> way to do_patch_memory, preventing the compiler from inlining
> do_patch_memory into patch_int. This would needlessly force patch_int
> to be a branch to do_patch_memory.

I'm on business trip this week so I can't test it on hardware, but the 
generated code looks horrid and sub-optimal, with a stack frame and so 
many registers saved into it. That's mpc885_ads_defconfig built with GCC 
12, without modules without stackprotector with 4k pages.

00000168 <__patch_memory.constprop.0>:
  168:  90 83 00 00     stw     r4,0(r3)
  16c:  7c 00 18 6c     dcbst   0,r3
  170:  7c 00 04 ac     hwsync
  174:  7c 00 2f ac     icbi    0,r5
  178:  7c 00 04 ac     hwsync
  17c:  4c 00 01 2c     isync
  180:  38 60 00 00     li      r3,0
  184:  4e 80 00 20     blr
  188:  38 60 ff ff     li      r3,-1
  18c:  4e 80 00 20     blr

00000190 <raw_patch_instruction>:
  190:  90 83 00 00     stw     r4,0(r3)
  194:  7c 00 18 6c     dcbst   0,r3
  198:  7c 00 04 ac     hwsync
  19c:  7c 00 1f ac     icbi    0,r3
  1a0:  7c 00 04 ac     hwsync
  1a4:  4c 00 01 2c     isync
  1a8:  38 60 00 00     li      r3,0
  1ac:  4e 80 00 20     blr
  1b0:  38 60 ff ff     li      r3,-1
  1b4:  4e 80 00 20     blr

000001b8 <patch_uint>:
  1b8:  7c 65 1b 78     mr      r5,r3
  1bc:  48 00 00 a4     b       260 <patch_uint+0xa8>
  1c0:  94 21 ff e0     stwu    r1,-32(r1)
  1c4:  7c 08 02 a6     mflr    r0
  1c8:  90 01 00 24     stw     r0,36(r1)
  1cc:  93 81 00 10     stw     r28,16(r1)
  1d0:  93 a1 00 14     stw     r29,20(r1)
  1d4:  93 c1 00 18     stw     r30,24(r1)
  1d8:  93 e1 00 1c     stw     r31,28(r1)
  1dc:  7f 80 00 a6     mfmsr   r28
  1e0:  7c 51 13 a6     mtspr   81,r2
  1e4:  3d 20 00 00     lis     r9,0
                        1e6: R_PPC_ADDR16_HA    .data
  1e8:  81 49 00 00     lwz     r10,0(r9)
                        1ea: R_PPC_ADDR16_LO    .data
  1ec:  3d 20 00 00     lis     r9,0
                        1ee: R_PPC_ADDR16_HA    init_mm+0x24
  1f0:  83 ea 00 04     lwz     r31,4(r10)
  1f4:  80 e9 00 00     lwz     r7,0(r9)
                        1f6: R_PPC_ADDR16_LO    init_mm+0x24
  1f8:  57 e8 65 3a     rlwinm  r8,r31,12,20,29
  1fc:  7f a7 40 2e     lwzx    r29,r7,r8
  200:  7c 69 1b 78     mr      r9,r3
  204:  3d 29 40 00     addis   r9,r9,16384
  208:  57 fe b5 3a     rlwinm  r30,r31,22,20,29
  20c:  55 29 00 26     clrrwi  r9,r9,12
  210:  61 29 01 25     ori     r9,r9,293
  214:  57 bd 00 26     clrrwi  r29,r29,12
  218:  3f de c0 00     addis   r30,r30,-16384
  21c:  7d 3d f1 2e     stwx    r9,r29,r30
  220:  53 e3 00 26     rlwimi  r3,r31,0,0,19
  224:  4b ff ff 45     bl      168 <__patch_memory.constprop.0>
  228:  39 20 00 00     li      r9,0
  22c:  7d 3d f1 2e     stwx    r9,r29,r30
  230:  57 ff 00 26     clrrwi  r31,r31,12
  234:  7c 00 fa 64     tlbie   r31,r0
  238:  7c 00 04 ac     hwsync
  23c:  7f 80 01 24     mtmsr   r28
  240:  80 01 00 24     lwz     r0,36(r1)
  244:  83 81 00 10     lwz     r28,16(r1)
  248:  83 a1 00 14     lwz     r29,20(r1)
  24c:  83 c1 00 18     lwz     r30,24(r1)
  250:  83 e1 00 1c     lwz     r31,28(r1)
  254:  7c 08 03 a6     mtlr    r0
  258:  38 21 00 20     addi    r1,r1,32
  25c:  4e 80 00 20     blr
  260:  4b ff ff 08     b       168 <__patch_memory.constprop.0>


Christophe


> 
> Signed-off-by: Benjamin Gray <bg...@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/code-patching.h | 29 ++++++++++
>   arch/powerpc/lib/code-patching.c         | 73 ++++++++++++++++++------
>   2 files changed, 85 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/code-patching.h 
> b/arch/powerpc/include/asm/code-patching.h
> index 3f881548fb61..170bfa848c7c 100644
> --- a/arch/powerpc/include/asm/code-patching.h
> +++ b/arch/powerpc/include/asm/code-patching.h
> @@ -72,7 +72,36 @@ static inline int create_branch(ppc_inst_t *instr, const 
> u32 *addr,
>   int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
>                      unsigned long target, int flags);
>   int patch_branch(u32 *addr, unsigned long target, int flags);
> +
> +/* patch_uint and patch_ulong must only be called on addresses where the 
> patch
> + * does not cross a cacheline, otherwise it may not be flushed properly and
> + * mixes of new and stale data may be observed.
> + *
> + * patch_instruction and other instruction patchers automatically satisfy 
> this
> + * requirement due to instruction alignment requirements.
> + */
> +
> +int patch_uint(void *addr, unsigned int val);
> +
> +#ifdef CONFIG_PPC64
> +
> +int patch_ulong(void *addr, unsigned long val);
>   int patch_instruction(u32 *addr, ppc_inst_t instr);
> +
> +#else
> +
> +static inline int patch_ulong(void *addr, unsigned long val)
> +{
> +     return patch_uint(addr, val);
> +}
> +
> +static inline int patch_instruction(u32 *addr, ppc_inst_t instr)
> +{
> +     return patch_uint(addr, ppc_inst_val(instr));
> +}
> +
> +#endif
> +
>   int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
>   
>   static inline unsigned long patch_site_addr(s32 *site)
> diff --git a/arch/powerpc/lib/code-patching.c 
> b/arch/powerpc/lib/code-patching.c
> index 125c55e3e148..ecdd2e523d9a 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -15,20 +15,24 @@
>   #include <asm/code-patching.h>
>   #include <asm/inst.h>
>   
> -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 
> *patch_addr)
> +static int __patch_memory(void *patch_addr, unsigned long val, void 
> *exec_addr,
> +                        bool is_dword)
>   {
> -     if (!ppc_inst_prefixed(instr)) {
> -             u32 val = ppc_inst_val(instr);
> -
> -             __put_kernel_nofault(patch_addr, &val, u32, failed);
> -     } else {
> -             u64 val = ppc_inst_as_ulong(instr);
> +     /* Prefixed instruction may cross cacheline if cacheline smaller than 
> 64 bytes */
> +     BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64);
>   
> +     if (unlikely(is_dword))
>               __put_kernel_nofault(patch_addr, &val, u64, failed);
> -     }
> +     else
> +             __put_kernel_nofault(patch_addr, &val, u32, failed);
>   
> -     asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
> -                                                         "r" (exec_addr));
> +     /* Assume data is inside a single cacheline */
> +     dcbst(patch_addr);
> +     mb(); /* sync */
> +     /* Flush on the EA that may be executed in case of a non-coherent 
> icache */
> +     icbi(exec_addr);
> +     mb(); /* sync */
> +     isync();
>   
>       return 0;
>   
> @@ -38,7 +42,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t 
> instr, u32 *patch_addr
>   
>   int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
>   {
> -     return __patch_instruction(addr, instr, addr);
> +     if (ppc_inst_prefixed(instr))
> +             return __patch_memory(addr, ppc_inst_as_ulong(instr), addr, 
> true);
> +     else
> +             return __patch_memory(addr, ppc_inst_val(instr), addr, false);
>   }
>   
>   static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
> @@ -149,7 +156,7 @@ static void unmap_patch_area(unsigned long addr)
>       flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>   }
>   
> -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
> +static int __do_patch_memory(void *addr, unsigned long val, bool is_dword)
>   {
>       int err;
>       u32 *patch_addr;
> @@ -166,7 +173,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t 
> instr)
>       if (radix_enabled())
>               asm volatile("ptesync": : :"memory");
>   
> -     err = __patch_instruction(addr, instr, patch_addr);
> +     err = __patch_memory(patch_addr, val, addr, is_dword);
>   
>       pte_clear(&init_mm, text_poke_addr, pte);
>       flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
> @@ -174,7 +181,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t 
> instr)
>       return err;
>   }
>   
> -int patch_instruction(u32 *addr, ppc_inst_t instr)
> +static int do_patch_memory(void *addr, unsigned long val, bool is_dword)
>   {
>       int err;
>       unsigned long flags;
> @@ -186,15 +193,47 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>        */
>       if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
>           !static_branch_likely(&poking_init_done))
> -             return raw_patch_instruction(addr, instr);
> +             return __patch_memory(addr, val, addr, is_dword);
>   
>       local_irq_save(flags);
> -     err = __do_patch_instruction(addr, instr);
> +     err = __do_patch_memory(addr, val, is_dword);
>       local_irq_restore(flags);
>   
>       return err;
>   }
> -NOKPROBE_SYMBOL(patch_instruction);
> +
> +#ifdef CONFIG_PPC64
> +
> +int patch_uint(void *addr, unsigned int val)
> +{
> +     return do_patch_memory(addr, val, false);
> +}
> +NOKPROBE_SYMBOL(patch_uint)
> +
> +int patch_ulong(void *addr, unsigned long val)
> +{
> +     return do_patch_memory(addr, val, true);
> +}
> +NOKPROBE_SYMBOL(patch_ulong)
> +
> +int patch_instruction(u32 *addr, ppc_inst_t instr)
> +{
> +     if (ppc_inst_prefixed(instr))
> +             return patch_ulong(addr, ppc_inst_as_ulong(instr));
> +     else
> +             return patch_uint(addr, ppc_inst_val(instr));
> +}
> +NOKPROBE_SYMBOL(patch_instruction)
> +
> +#else
> +
> +noinline int patch_uint(void *addr, unsigned int val)
> +{
> +     return do_patch_memory(addr, val, false);
> +}
> +NOKPROBE_SYMBOL(patch_uint)
> +
> +#endif
>   
>   int patch_branch(u32 *addr, unsigned long target, int flags)
>   {

Reply via email to