Hi Peter,

On Tue, 27 Aug 2019 20:06:24 +0200
Peter Zijlstra <pet...@infradead.org> wrote:

> Adding another text_poke_bp_batch() user made me realize the interface
> is all sorts of wrong. The text poke vector should be internal to the
> implementation.
> 
> This then results in a trivial interface:
> 
>   text_poke_queue()  - which has the 'normal' text_poke_bp() interface
>   text_poke_finish() - which takes no arguments and flushes any
>                        pending text_poke()s.

Looks good to me. Maybe it is easy to apply to optprobe too.

Reviewed-by: Masami Hiramatsu <mhira...@kernel.org>

Thank you,

> 
> Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
> Cc: Steven Rostedt <rost...@goodmis.org>
> Cc: Daniel Bristot de Oliveira <bris...@redhat.com>
> Cc: Masami Hiramatsu <mhira...@kernel.org>
> ---
>  arch/x86/include/asm/text-patching.h |   16 ++-----
>  arch/x86/kernel/alternative.c        |   64 +++++++++++++++++++++++++---
>  arch/x86/kernel/jump_label.c         |   80 
> +++++++++--------------------------
>  3 files changed, 84 insertions(+), 76 deletions(-)
> 
> --- a/arch/x86/include/asm/text-patching.h
> +++ b/arch/x86/include/asm/text-patching.h
> @@ -25,14 +25,6 @@ static inline void apply_paravirt(struct
>   */
>  #define POKE_MAX_OPCODE_SIZE 5
>  
> -struct text_poke_loc {
> -     void *addr;
> -     int len;
> -     s32 rel32;
> -     u8 opcode;
> -     const char text[POKE_MAX_OPCODE_SIZE];
> -};
> -
>  extern void text_poke_early(void *addr, const void *opcode, size_t len);
>  
>  /*
> @@ -53,13 +45,15 @@ extern void *text_poke(void *addr, const
>  extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
>  extern int poke_int3_handler(struct pt_regs *regs);
>  extern void text_poke_bp(void *addr, const void *opcode, size_t len, const 
> void *emulate);
> -extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int 
> nr_entries);
> -extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
> -                            const void *opcode, size_t len, const void 
> *emulate);
> +
> +extern void text_poke_queue(void *addr, const void *opcode, size_t len, 
> const void *emulate);
> +extern void text_poke_finish(void);
> +
>  extern int after_bootmem;
>  extern __ro_after_init struct mm_struct *poking_mm;
>  extern __ro_after_init unsigned long poking_addr;
>  
> +
>  #ifndef CONFIG_UML_X86
>  static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
>  {
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -936,6 +936,14 @@ static void do_sync_core(void *info)
>       sync_core();
>  }
>  
> +struct text_poke_loc {
> +     void *addr;
> +     int len;
> +     s32 rel32;
> +     u8 opcode;
> +     const char text[POKE_MAX_OPCODE_SIZE];
> +};
> +
>  static struct bp_patching_desc {
>       struct text_poke_loc *vec;
>       int nr_entries;
> @@ -1017,6 +1025,10 @@ int poke_int3_handler(struct pt_regs *re
>  }
>  NOKPROBE_SYMBOL(poke_int3_handler);
>  
> +#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
> +static struct text_poke_loc tp_vec[TP_VEC_MAX];
> +static int tp_vec_nr;
> +
>  /**
>   * text_poke_bp_batch() -- update instructions on live kernel on SMP
>   * @tp:                      vector of instructions to patch
> @@ -1038,7 +1050,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
>   *             replacing opcode
>   *   - sync cores
>   */
> -void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
> +static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int 
> nr_entries)
>  {
>       unsigned char int3 = INT3_INSN_OPCODE;
>       int patched_all_but_first = 0;
> @@ -1105,11 +1117,7 @@ void text_poke_loc_init(struct text_poke
>  {
>       struct insn insn;
>  
> -     if (!opcode)
> -             opcode = (void *)tp->text;
> -     else
> -             memcpy((void *)tp->text, opcode, len);
> -
> +     memcpy((void *)tp->text, opcode, len);
>       if (!emulate)
>               emulate = opcode;
>  
> @@ -1147,6 +1155,50 @@ void text_poke_loc_init(struct text_poke
>       }
>  }
>  
> +/*
> + * We hard rely on the tp_vec being ordered; ensure this is so by flushing
> + * early if needed.
> + */
> +static bool tp_order_fail(void *addr)
> +{
> +     struct text_poke_loc *tp;
> +
> +     if (!tp_vec_nr)
> +             return false;
> +
> +     if (!addr) /* force */
> +             return true;
> +
> +     tp = &tp_vec[tp_vec_nr - 1];
> +     if ((unsigned long)tp->addr > (unsigned long)addr)
> +             return true;
> +
> +     return false;
> +}
> +
> +static void text_poke_flush(void *addr)
> +{
> +     if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) {
> +             text_poke_bp_batch(tp_vec, tp_vec_nr);
> +             tp_vec_nr = 0;
> +     }
> +}
> +
> +void text_poke_finish(void)
> +{
> +     text_poke_flush(NULL);
> +}
> +
> +void text_poke_queue(void *addr, const void *opcode, size_t len, const void 
> *emulate)
> +{
> +     struct text_poke_loc *tp;
> +
> +     text_poke_flush(addr);
> +
> +     tp = &tp_vec[tp_vec_nr++];
> +     text_poke_loc_init(tp, addr, opcode, len, emulate);
> +}
> +
>  /**
>   * text_poke_bp() -- update instructions on live kernel on SMP
>   * @addr:    address to patch
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -35,18 +35,19 @@ static void bug_at(unsigned char *ip, in
>       BUG();
>  }
>  
> -static void __jump_label_set_jump_code(struct jump_entry *entry,
> -                                    enum jump_label_type type,
> -                                    union jump_code_union *code,
> -                                    int init)
> +static const void *
> +__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type 
> type, int init)
>  {
> +     static union jump_code_union code; /* relies on text_mutex */
>       const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
>       const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
>       const void *expect;
>       int line;
>  
> -     code->jump = 0xe9;
> -     code->offset = jump_entry_target(entry) -
> +     lockdep_assert_held(&text_mutex);
> +
> +     code.jump = JMP32_INSN_OPCODE;
> +     code.offset = jump_entry_target(entry) -
>                      (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
>  
>       if (init) {
> @@ -54,23 +55,23 @@ static void __jump_label_set_jump_code(s
>       } else if (type == JUMP_LABEL_JMP) {
>               expect = ideal_nop; line = __LINE__;
>       } else {
> -             expect = code->code; line = __LINE__;
> +             expect = code.code; line = __LINE__;
>       }
>  
>       if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
>               bug_at((void *)jump_entry_code(entry), line);
>  
>       if (type == JUMP_LABEL_NOP)
> -             memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE);
> +             memcpy(&code, ideal_nop, JUMP_LABEL_NOP_SIZE);
> +
> +     return &code;
>  }
>  
>  static void __ref __jump_label_transform(struct jump_entry *entry,
>                                        enum jump_label_type type,
>                                        int init)
>  {
> -     union jump_code_union code;
> -
> -     __jump_label_set_jump_code(entry, type, &code, init);
> +     const void *opcode = __jump_label_set_jump_code(entry, type, init);
>  
>       /*
>        * As long as only a single processor is running and the code is still
> @@ -84,12 +85,12 @@ static void __ref __jump_label_transform
>        * always nop being the 'currently valid' instruction
>        */
>       if (init || system_state == SYSTEM_BOOTING) {
> -             text_poke_early((void *)jump_entry_code(entry), &code,
> +             text_poke_early((void *)jump_entry_code(entry), opcode,
>                               JUMP_LABEL_NOP_SIZE);
>               return;
>       }
>  
> -     text_poke_bp((void *)jump_entry_code(entry), &code, 
> JUMP_LABEL_NOP_SIZE, NULL);
> +     text_poke_bp((void *)jump_entry_code(entry), opcode, 
> JUMP_LABEL_NOP_SIZE, NULL);
>  }
>  
>  void arch_jump_label_transform(struct jump_entry *entry,
> @@ -100,15 +101,10 @@ void arch_jump_label_transform(struct ju
>       mutex_unlock(&text_mutex);
>  }
>  
> -#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
> -static struct text_poke_loc tp_vec[TP_VEC_MAX];
> -static int tp_vec_nr;
> -
>  bool arch_jump_label_transform_queue(struct jump_entry *entry,
>                                    enum jump_label_type type)
>  {
> -     struct text_poke_loc *tp;
> -     void *entry_code;
> +     const void *opcode;
>  
>       if (system_state == SYSTEM_BOOTING) {
>               /*
> @@ -118,53 +114,19 @@ bool arch_jump_label_transform_queue(str
>               return true;
>       }
>  
> -     /*
> -      * No more space in the vector, tell upper layer to apply
> -      * the queue before continuing.
> -      */
> -     if (tp_vec_nr == TP_VEC_MAX)
> -             return false;
> -
> -     tp = &tp_vec[tp_vec_nr];
> -
> -     entry_code = (void *)jump_entry_code(entry);
> -
> -     /*
> -      * The INT3 handler will do a bsearch in the queue, so we need entries
> -      * to be sorted. We can survive an unsorted list by rejecting the entry,
> -      * forcing the generic jump_label code to apply the queue. Warning once,
> -      * to raise the attention to the case of an unsorted entry that is
> -      * better not happen, because, in the worst case we will perform in the
> -      * same way as we do without batching - with some more overhead.
> -      */
> -     if (tp_vec_nr > 0) {
> -             int prev = tp_vec_nr - 1;
> -             struct text_poke_loc *prev_tp = &tp_vec[prev];
> -
> -             if (WARN_ON_ONCE(prev_tp->addr > entry_code))
> -                     return false;
> -     }
> -
> -     __jump_label_set_jump_code(entry, type,
> -                                (union jump_code_union *)&tp->text, 0);
> -
> -     text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);
> -
> -     tp_vec_nr++;
> -
> +     mutex_lock(&text_mutex);
> +     opcode = __jump_label_set_jump_code(entry, type, 0);
> +     text_poke_queue((void *)jump_entry_code(entry),
> +                     opcode, JUMP_LABEL_NOP_SIZE, NULL);
> +     mutex_unlock(&text_mutex);
>       return true;
>  }
>  
>  void arch_jump_label_transform_apply(void)
>  {
> -     if (!tp_vec_nr)
> -             return;
> -
>       mutex_lock(&text_mutex);
> -     text_poke_bp_batch(tp_vec, tp_vec_nr);
> +     text_poke_finish();
>       mutex_unlock(&text_mutex);
> -
> -     tp_vec_nr = 0;
>  }
>  
>  static enum {
> 
> 


-- 
Masami Hiramatsu <mhira...@kernel.org>

Reply via email to