Hi, Le 05/10/2022 à 07:32, Benjamin Gray a écrit : > Adds a generic text patching mechanism for patches of size int or long > bytes. > > The patch_instruction function is reimplemented in terms of this > more generic function. This generic implementation allows patching of > arbitrary long data, such as pointers on 64-bit. > > On 32-bit patch_int is marked noinline to prevent a mis-optimisation. > Without noinline, inside patch_branch the compiler may inline all the > way to do_patch_memory, preventing the compiler from inlining > do_patch_memory into patch_int. This would needlessly force patch_int > to be a branch to do_patch_memory.
I'm on business trip this week so I can't test it on hardware, but the generated code looks horrid and sub-optimal, with a stack frame and so many registers saved into it. That's mpc885_ads_defconfig built with GCC 12, without modules without stackprotector with 4k pages. 00000168 <__patch_memory.constprop.0>: 168: 90 83 00 00 stw r4,0(r3) 16c: 7c 00 18 6c dcbst 0,r3 170: 7c 00 04 ac hwsync 174: 7c 00 2f ac icbi 0,r5 178: 7c 00 04 ac hwsync 17c: 4c 00 01 2c isync 180: 38 60 00 00 li r3,0 184: 4e 80 00 20 blr 188: 38 60 ff ff li r3,-1 18c: 4e 80 00 20 blr 00000190 <raw_patch_instruction>: 190: 90 83 00 00 stw r4,0(r3) 194: 7c 00 18 6c dcbst 0,r3 198: 7c 00 04 ac hwsync 19c: 7c 00 1f ac icbi 0,r3 1a0: 7c 00 04 ac hwsync 1a4: 4c 00 01 2c isync 1a8: 38 60 00 00 li r3,0 1ac: 4e 80 00 20 blr 1b0: 38 60 ff ff li r3,-1 1b4: 4e 80 00 20 blr 000001b8 <patch_uint>: 1b8: 7c 65 1b 78 mr r5,r3 1bc: 48 00 00 a4 b 260 <patch_uint+0xa8> 1c0: 94 21 ff e0 stwu r1,-32(r1) 1c4: 7c 08 02 a6 mflr r0 1c8: 90 01 00 24 stw r0,36(r1) 1cc: 93 81 00 10 stw r28,16(r1) 1d0: 93 a1 00 14 stw r29,20(r1) 1d4: 93 c1 00 18 stw r30,24(r1) 1d8: 93 e1 00 1c stw r31,28(r1) 1dc: 7f 80 00 a6 mfmsr r28 1e0: 7c 51 13 a6 mtspr 81,r2 1e4: 3d 20 00 00 lis r9,0 1e6: R_PPC_ADDR16_HA .data 1e8: 81 49 00 00 lwz r10,0(r9) 1ea: R_PPC_ADDR16_LO .data 1ec: 3d 20 00 00 lis r9,0 1ee: R_PPC_ADDR16_HA init_mm+0x24 1f0: 83 ea 00 04 lwz r31,4(r10) 1f4: 80 e9 00 00 lwz r7,0(r9) 1f6: R_PPC_ADDR16_LO init_mm+0x24 1f8: 57 e8 65 3a rlwinm r8,r31,12,20,29 1fc: 7f a7 40 2e lwzx r29,r7,r8 200: 7c 69 1b 78 mr r9,r3 204: 3d 29 40 00 addis r9,r9,16384 208: 57 fe b5 3a rlwinm r30,r31,22,20,29 20c: 55 29 00 26 clrrwi r9,r9,12 210: 61 29 01 25 ori r9,r9,293 214: 57 bd 00 26 clrrwi r29,r29,12 218: 3f de c0 00 addis r30,r30,-16384 21c: 7d 3d f1 2e stwx r9,r29,r30 220: 53 e3 00 26 rlwimi r3,r31,0,0,19 224: 4b ff ff 45 bl 168 <__patch_memory.constprop.0> 228: 39 20 00 00 li r9,0 22c: 7d 3d f1 2e stwx r9,r29,r30 230: 57 ff 00 26 clrrwi r31,r31,12 234: 7c 00 fa 64 tlbie r31,r0 238: 7c 00 04 ac hwsync 23c: 7f 80 01 24 mtmsr r28 240: 80 01 00 24 lwz r0,36(r1) 244: 83 81 00 10 lwz r28,16(r1) 248: 83 a1 00 14 lwz r29,20(r1) 24c: 83 c1 00 18 lwz r30,24(r1) 250: 83 e1 00 1c lwz r31,28(r1) 254: 7c 08 03 a6 mtlr r0 258: 38 21 00 20 addi r1,r1,32 25c: 4e 80 00 20 blr 260: 4b ff ff 08 b 168 <__patch_memory.constprop.0> Christophe > > Signed-off-by: Benjamin Gray <bg...@linux.ibm.com> > --- > arch/powerpc/include/asm/code-patching.h | 29 ++++++++++ > arch/powerpc/lib/code-patching.c | 73 ++++++++++++++++++------ > 2 files changed, 85 insertions(+), 17 deletions(-) > > diff --git a/arch/powerpc/include/asm/code-patching.h > b/arch/powerpc/include/asm/code-patching.h > index 3f881548fb61..170bfa848c7c 100644 > --- a/arch/powerpc/include/asm/code-patching.h > +++ b/arch/powerpc/include/asm/code-patching.h > @@ -72,7 +72,36 @@ static inline int create_branch(ppc_inst_t *instr, const > u32 *addr, > int create_cond_branch(ppc_inst_t *instr, const u32 *addr, > unsigned long target, int flags); > int patch_branch(u32 *addr, unsigned long target, int flags); > + > +/* patch_uint and patch_ulong must only be called on addresses where the > patch > + * does not cross a cacheline, otherwise it may not be flushed properly and > + * mixes of new and stale data may be observed. > + * > + * patch_instruction and other instruction patchers automatically satisfy > this > + * requirement due to instruction alignment requirements. > + */ > + > +int patch_uint(void *addr, unsigned int val); > + > +#ifdef CONFIG_PPC64 > + > +int patch_ulong(void *addr, unsigned long val); > int patch_instruction(u32 *addr, ppc_inst_t instr); > + > +#else > + > +static inline int patch_ulong(void *addr, unsigned long val) > +{ > + return patch_uint(addr, val); > +} > + > +static inline int patch_instruction(u32 *addr, ppc_inst_t instr) > +{ > + return patch_uint(addr, ppc_inst_val(instr)); > +} > + > +#endif > + > int raw_patch_instruction(u32 *addr, ppc_inst_t instr); > > static inline unsigned long patch_site_addr(s32 *site) > diff --git a/arch/powerpc/lib/code-patching.c > b/arch/powerpc/lib/code-patching.c > index 125c55e3e148..ecdd2e523d9a 100644 > --- a/arch/powerpc/lib/code-patching.c > +++ b/arch/powerpc/lib/code-patching.c > @@ -15,20 +15,24 @@ > #include <asm/code-patching.h> > #include <asm/inst.h> > > -static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 > *patch_addr) > +static int __patch_memory(void *patch_addr, unsigned long val, void > *exec_addr, > + bool is_dword) > { > - if (!ppc_inst_prefixed(instr)) { > - u32 val = ppc_inst_val(instr); > - > - __put_kernel_nofault(patch_addr, &val, u32, failed); > - } else { > - u64 val = ppc_inst_as_ulong(instr); > + /* Prefixed instruction may cross cacheline if cacheline smaller than > 64 bytes */ > + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64); > > + if (unlikely(is_dword)) > __put_kernel_nofault(patch_addr, &val, u64, failed); > - } > + else > + __put_kernel_nofault(patch_addr, &val, u32, failed); > > - asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), > - "r" (exec_addr)); > + /* Assume data is inside a single cacheline */ > + dcbst(patch_addr); > + mb(); /* sync */ > + /* Flush on the EA that may be executed in case of a non-coherent > icache */ > + icbi(exec_addr); > + mb(); /* sync */ > + isync(); > > return 0; > > @@ -38,7 +42,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t > instr, u32 *patch_addr > > int raw_patch_instruction(u32 *addr, ppc_inst_t instr) > { > - return __patch_instruction(addr, instr, addr); > + if (ppc_inst_prefixed(instr)) > + return __patch_memory(addr, ppc_inst_as_ulong(instr), addr, > true); > + else > + return __patch_memory(addr, ppc_inst_val(instr), addr, false); > } > > static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); > @@ -149,7 +156,7 @@ static void unmap_patch_area(unsigned long addr) > flush_tlb_kernel_range(addr, addr + PAGE_SIZE); > } > > -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) > +static int __do_patch_memory(void *addr, unsigned long val, bool is_dword) > { > int err; > u32 *patch_addr; > @@ -166,7 +173,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t > instr) > if (radix_enabled()) > asm volatile("ptesync": : :"memory"); > > - err = __patch_instruction(addr, instr, patch_addr); > + err = __patch_memory(patch_addr, val, addr, is_dword); > > pte_clear(&init_mm, text_poke_addr, pte); > flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); > @@ -174,7 +181,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t > instr) > return err; > } > > -int patch_instruction(u32 *addr, ppc_inst_t instr) > +static int do_patch_memory(void *addr, unsigned long val, bool is_dword) > { > int err; > unsigned long flags; > @@ -186,15 +193,47 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) > */ > if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || > !static_branch_likely(&poking_init_done)) > - return raw_patch_instruction(addr, instr); > + return __patch_memory(addr, val, addr, is_dword); > > local_irq_save(flags); > - err = __do_patch_instruction(addr, instr); > + err = __do_patch_memory(addr, val, is_dword); > local_irq_restore(flags); > > return err; > } > -NOKPROBE_SYMBOL(patch_instruction); > + > +#ifdef CONFIG_PPC64 > + > +int patch_uint(void *addr, unsigned int val) > +{ > + return do_patch_memory(addr, val, false); > +} > +NOKPROBE_SYMBOL(patch_uint) > + > +int patch_ulong(void *addr, unsigned long val) > +{ > + return do_patch_memory(addr, val, true); > +} > +NOKPROBE_SYMBOL(patch_ulong) > + > +int patch_instruction(u32 *addr, ppc_inst_t instr) > +{ > + if (ppc_inst_prefixed(instr)) > + return patch_ulong(addr, ppc_inst_as_ulong(instr)); > + else > + return patch_uint(addr, ppc_inst_val(instr)); > +} > +NOKPROBE_SYMBOL(patch_instruction) > + > +#else > + > +noinline int patch_uint(void *addr, unsigned int val) > +{ > + return do_patch_memory(addr, val, false); > +} > +NOKPROBE_SYMBOL(patch_uint) > + > +#endif > > int patch_branch(u32 *addr, unsigned long target, int flags) > {