Currently optimize_nops() scans to see if the alternative starts with
NOPs. However, the emit pattern is:

  141:  \oldinstr
  142:  .skip (len-(142b-141b)), 0x90

That is, when oldinstr is short, we pad the tail with NOPs. This case
never gets optimized.

Rewrite optimize_nops() to replace any string of NOPs inside the
alternative to larger NOPs. Also run it irrespective of patching,
replacing NOPs in both the original and replaced code.

A direct consequence is that padlen becomes superfluous, so remove it.

Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 arch/x86/include/asm/alternative.h            |   14 ++---
 arch/x86/kernel/alternative.c                 |   63 ++++++++++++++++----------
 tools/objtool/arch/x86/include/arch/special.h |    2 
 3 files changed, 46 insertions(+), 33 deletions(-)

--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,7 +65,6 @@ struct alt_instr {
        u16 cpuid;              /* cpuid bit set for replacement */
        u8  instrlen;           /* length of original instruction */
        u8  replacementlen;     /* length of new instruction */
-       u8  padlen;             /* length of build-time padding */
 } __packed;
 
 /*
@@ -104,7 +103,6 @@ static inline int alternatives_text_rese
 
 #define alt_end_marker         "663"
 #define alt_slen               "662b-661b"
-#define alt_pad_len            alt_end_marker"b-662b"
 #define alt_total_slen         alt_end_marker"b-661b"
 #define alt_rlen(num)          e_replacement(num)"f-"b_replacement(num)"f"
 
@@ -151,8 +149,7 @@ static inline int alternatives_text_rese
        " .long " b_replacement(num)"f - .\n"           /* new instruction */ \
        " .word " __stringify(feature) "\n"             /* feature bit     */ \
        " .byte " alt_total_slen "\n"                   /* source len      */ \
-       " .byte " alt_rlen(num) "\n"                    /* replacement len */ \
-       " .byte " alt_pad_len "\n"                      /* pad len */
+       " .byte " alt_rlen(num) "\n"                    /* replacement len */
 
 #define ALTINSTR_REPLACEMENT(newinstr, num)            /* replacement */       
\
        "# ALT: replacement " #num "\n"                                         
\
@@ -315,13 +312,12 @@ static inline int alternatives_text_rese
  * enough information for the alternatives patching code to patch an
  * instruction. See apply_alternatives().
  */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
+.macro altinstruction_entry orig alt feature orig_len alt_len
        .long \orig - .
        .long \alt - .
        .word \feature
        .byte \orig_len
        .byte \alt_len
-       .byte \pad_len
 .endm
 
 /*
@@ -338,7 +334,7 @@ static inline int alternatives_text_rese
 142:
 
        .pushsection .altinstructions,"a"
-       altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+       altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f
        .popsection
 
        .pushsection .altinstr_replacement,"ax"
@@ -375,8 +371,8 @@ static inline int alternatives_text_rese
 142:
 
        .pushsection .altinstructions,"a"
-       altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
-       altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+       altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
+       altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
        .popsection
 
        .pushsection .altinstr_replacement,"ax"
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -345,19 +345,39 @@ recompute_jump(struct alt_instr *a, u8 *
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 
*instr)
 {
        unsigned long flags;
-       int i;
+       int nops = 0, i = 0;
+       struct insn insn;
+       u8 *nop = NULL;
+
+       do {
+               kernel_insn_init(&insn, &instr[i], MAX_INSN_SIZE);
+               insn_get_length(&insn);
+
+               if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) {
+                       if (!nop) {
+                               nop = &instr[i];
+                               nops = 1;
+                       } else {
+                               nops++;
+                       }
+               }
+               i += insn.length;
 
-       for (i = 0; i < a->padlen; i++) {
-               if (instr[i] != 0x90)
-                       return;
-       }
+               if ((insn.length != 1 || i == a->instrlen) && nop) {
+
+                       local_irq_save(flags);
+                       add_nops(nop, nops);
+                       local_irq_restore(flags);
+
+                       DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized 
NOPs: ",
+                                  instr, (int)(unsigned long)(nop-instr), 
nops);
+
+                       nop = NULL;
+               }
 
-       local_irq_save(flags);
-       add_nops(instr + (a->instrlen - a->padlen), a->padlen);
-       local_irq_restore(flags);
+       } while (i < a->instrlen);
 
-       DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
-                  instr, a->instrlen - a->padlen, a->padlen);
+       WARN_ON_ONCE(nop);
 }
 
 /*
@@ -403,19 +423,15 @@ void __init_or_module noinline apply_alt
                 * - feature not present but ALTINSTR_FLAG_INV is set to mean,
                 *   patch if feature is *NOT* present.
                 */
-               if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
-                       if (a->padlen > 1)
-                               optimize_nops(a, instr);
+               if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV))
+                       goto next;
 
-                       continue;
-               }
-
-               DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: 
(%px, len: %d), pad: %d",
+               DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: 
(%px, len: %d)",
                        (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
                        feature >> 5,
                        feature & 0x1f,
                        instr, instr, a->instrlen,
-                       replacement, a->replacementlen, a->padlen);
+                       replacement, a->replacementlen);
 
                DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
                DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", 
replacement);
@@ -439,14 +455,15 @@ void __init_or_module noinline apply_alt
                if (a->replacementlen && is_jmp(replacement[0]))
                        recompute_jump(a, instr, replacement, insn_buff);
 
-               if (a->instrlen > a->replacementlen) {
-                       add_nops(insn_buff + a->replacementlen,
-                                a->instrlen - a->replacementlen);
-                       insn_buff_sz += a->instrlen - a->replacementlen;
-               }
+               for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
+                       insn_buff[insn_buff_sz] = 0x90;
+
                DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
 
                text_poke_early(instr, insn_buff, insn_buff_sz);
+
+next:
+               optimize_nops(a, instr);
        }
 }
 
--- a/tools/objtool/arch/x86/include/arch/special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
@@ -10,7 +10,7 @@
 #define JUMP_ORIG_OFFSET       0
 #define JUMP_NEW_OFFSET                4
 
-#define ALT_ENTRY_SIZE         13
+#define ALT_ENTRY_SIZE         12
 #define ALT_ORIG_OFFSET                0
 #define ALT_NEW_OFFSET         4
 #define ALT_FEATURE_OFFSET     8

Reply via email to