Le 28/09/2023 à 21:48, Hari Bathini a écrit :
> patch_instruction() entails setting up pte, patching the instruction,
> clearing the pte and flushing the tlb. If multiple instructions need
> to be patched, every instruction would have to go through the above
> drill unnecessarily. Instead, introduce function patch_instructions()
> that sets up the pte, clears the pte and flushes the tlb only once per
> page range of instructions to be patched. This adds a slight overhead
> to patch_instruction() call while improving the patching time for
> scenarios where more than one instruction needs to be patched.

Not a "slight" but a "significant" overhead on PPC32.

Thinking about it once more I don't think it is a good idea to try and 
merge that into the existing code_patching logic which is really single 
instruction performance oriented.

Anyway, comments below.

> 
> Signed-off-by: Hari Bathini <hbath...@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/code-patching.h |  1 +
>   arch/powerpc/lib/code-patching.c         | 93 +++++++++++++++++++++---
>   2 files changed, 85 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/code-patching.h 
> b/arch/powerpc/include/asm/code-patching.h
> index 3f881548fb61..43a4aedfa703 100644
> --- a/arch/powerpc/include/asm/code-patching.h
> +++ b/arch/powerpc/include/asm/code-patching.h
> @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
>   int patch_branch(u32 *addr, unsigned long target, int flags);
>   int patch_instruction(u32 *addr, ppc_inst_t instr);
>   int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
> +int patch_instructions(void *addr, void *code, size_t len, bool 
> repeat_instr);

I don't like void *, you can do to much nasty things with that.
I think you want u32 *

>   
>   static inline unsigned long patch_site_addr(s32 *site)
>   {
> diff --git a/arch/powerpc/lib/code-patching.c 
> b/arch/powerpc/lib/code-patching.c
> index b00112d7ad46..4ff002bc41f6 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -278,7 +278,36 @@ static void unmap_patch_area(unsigned long addr)
>       flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>   }
>   
> -static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
> +static int __patch_instructions(u32 *patch_addr, void *code, size_t len, 
> bool repeat_instr)
> +{
> +     unsigned long start = (unsigned long)patch_addr;
> +
> +     /* Repeat instruction */
> +     if (repeat_instr) {
> +             ppc_inst_t instr = ppc_inst_read(code);
> +
> +             if (ppc_inst_prefixed(instr)) {
> +                     u64 val = ppc_inst_as_ulong(instr);
> +
> +                     memset64((uint64_t *)patch_addr, val, len / 8);

Use u64 instead of uint64_t.

> +             } else {
> +                     u32 val = ppc_inst_val(instr);
> +
> +                     memset32(patch_addr, val, len / 4);
> +             }
> +     } else
> +             memcpy(patch_addr, code, len);

Missing braces, see 
https://docs.kernel.org/process/coding-style.html#placing-braces-and-spaces

> +
> +     smp_wmb();      /* smp write barrier */
> +     flush_icache_range(start, start + len);
> +     return 0;
> +}
> +
> +/*
> + * A page is mapped and instructions that fit the page are patched.
> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
> + */
> +static int __do_patch_instructions_mm(u32 *addr, void *code, size_t len, 
> bool repeat_instr)
>   {
>       int err;
>       u32 *patch_addr;
> @@ -307,11 +336,15 @@ static int __do_patch_instruction_mm(u32 *addr, 
> ppc_inst_t instr)
>   
>       orig_mm = start_using_temp_mm(patching_mm);
>   
> -     err = __patch_instruction(addr, instr, patch_addr);
> +     /* Single instruction case. */
> +     if (len == 0) {
> +             err = __patch_instruction(addr, *(ppc_inst_t *)code, 
> patch_addr);

Take care, you can't convert u32 * to ppc_inst_t that way, you have to 
use ppc_inst_read() otherwise you'll get odd result with prefixed 
instructions depending on endianness.

>   
> -     /* hwsync performed by __patch_instruction (sync) if successful */
> -     if (err)
> -             mb();  /* sync */
> +             /* hwsync performed by __patch_instruction (sync) if successful 
> */
> +             if (err)
> +                     mb();  /* sync */

Get this away, see my patch at 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/e88b154eaf2efd9ff177d472d3411dcdec8ff4f5.1696675567.git.christophe.le...@csgroup.eu/

> +     } else
> +             err = __patch_instructions(patch_addr, code, len, repeat_instr);
>   
>       /* context synchronisation performed by __patch_instruction (isync or 
> exception) */
>       stop_using_temp_mm(patching_mm, orig_mm);
> @@ -328,7 +361,11 @@ static int __do_patch_instruction_mm(u32 *addr, 
> ppc_inst_t instr)
>       return err;
>   }
>   
> -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
> +/*
> + * A page is mapped and instructions that fit the page are patched.
> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
> + */
> +static int __do_patch_instructions(u32 *addr, void *code, size_t len, bool 
> repeat_instr)
>   {
>       int err;
>       u32 *patch_addr;
> @@ -345,7 +382,11 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t 
> instr)
>       if (radix_enabled())
>               asm volatile("ptesync": : :"memory");
>   
> -     err = __patch_instruction(addr, instr, patch_addr);
> +     /* Single instruction case. */
> +     if (len == 0)
> +             err = __patch_instruction(addr, *(ppc_inst_t *)code, 
> patch_addr);

Same, use ppc_inst_read() instead of this nasty casting.

> +     else
> +             err = __patch_instructions(patch_addr, code, len, repeat_instr);
>   
>       pte_clear(&init_mm, text_poke_addr, pte);
>       flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
> @@ -369,15 +410,49 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>   
>       local_irq_save(flags);
>       if (mm_patch_enabled())
> -             err = __do_patch_instruction_mm(addr, instr);
> +             err = __do_patch_instructions_mm(addr, &instr, 0, false);
>       else
> -             err = __do_patch_instruction(addr, instr);
> +             err = __do_patch_instructions(addr, &instr, 0, false);
>       local_irq_restore(flags);
>   
>       return err;
>   }
>   NOKPROBE_SYMBOL(patch_instruction);
>   
> +/*
> + * Patch 'addr' with 'len' bytes of instructions from 'code'.
> + *
> + * If repeat_instr is true, the same instruction is filled for
> + * 'len' bytes.
> + */
> +int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr)

I'd like to see code as a u32 *

> +{
> +     unsigned long flags;
> +     size_t plen;
> +     int err;

Move those three variables inside the only block in which they are used.

> +
> +     while (len > 0) {
> +             plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
> +
> +             local_irq_save(flags);
> +             if (mm_patch_enabled())
> +                     err = __do_patch_instructions_mm(addr, code, plen, 
> repeat_instr);
> +             else
> +                     err = __do_patch_instructions(addr, code, plen, 
> repeat_instr);
> +             local_irq_restore(flags);
> +             if (err)
> +                     break;

replace by 'return err'

> +
> +             len -= plen;
> +             addr = addr + plen;
> +             if (!repeat_instr)
> +                     code = code + plen;
> +     }
> +
> +     return err;

If len is 0 err will be undefined. Is that expected ?

Replace by return 0;

> +}
> +NOKPROBE_SYMBOL(patch_instructions);
> +
>   int patch_branch(u32 *addr, unsigned long target, int flags)
>   {
>       ppc_inst_t instr;

Reply via email to