Currently optimize_nops() scans to see if the alternative starts with NOPs. However, the emit pattern is:
141: \oldinstr 142: .skip (len-(142b-141b)), 0x90 That is, when oldinstr is short, we pad the tail with NOPs. This case never gets optimized. Rewrite optimize_nops() to replace any string of NOPs inside the alternative to larger NOPs. Also run it irrespective of patching, replacing NOPs in both the original and replaced code. A direct consequence is that padlen becomes superfluous, so remove it. Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> --- arch/x86/include/asm/alternative.h | 14 ++--- arch/x86/kernel/alternative.c | 63 ++++++++++++++++---------- tools/objtool/arch/x86/include/arch/special.h | 2 3 files changed, 46 insertions(+), 33 deletions(-) --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -65,7 +65,6 @@ struct alt_instr { u16 cpuid; /* cpuid bit set for replacement */ u8 instrlen; /* length of original instruction */ u8 replacementlen; /* length of new instruction */ - u8 padlen; /* length of build-time padding */ } __packed; /* @@ -104,7 +103,6 @@ static inline int alternatives_text_rese #define alt_end_marker "663" #define alt_slen "662b-661b" -#define alt_pad_len alt_end_marker"b-662b" #define alt_total_slen alt_end_marker"b-661b" #define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" @@ -151,8 +149,7 @@ static inline int alternatives_text_rese " .long " b_replacement(num)"f - .\n" /* new instruction */ \ " .word " __stringify(feature) "\n" /* feature bit */ \ " .byte " alt_total_slen "\n" /* source len */ \ - " .byte " alt_rlen(num) "\n" /* replacement len */ \ - " .byte " alt_pad_len "\n" /* pad len */ + " .byte " alt_rlen(num) "\n" /* replacement len */ #define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \ "# ALT: replacement " #num "\n" \ @@ -315,13 +312,12 @@ static inline int alternatives_text_rese * enough information for the alternatives patching code to patch an * instruction. See apply_alternatives(). */ -.macro altinstruction_entry orig alt feature orig_len alt_len pad_len +.macro altinstruction_entry orig alt feature orig_len alt_len .long \orig - . .long \alt - . .word \feature .byte \orig_len .byte \alt_len - .byte \pad_len .endm /* @@ -338,7 +334,7 @@ static inline int alternatives_text_rese 142: .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b + altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f .popsection .pushsection .altinstr_replacement,"ax" @@ -375,8 +371,8 @@ static inline int alternatives_text_rese 142: .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b - altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b + altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f + altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f .popsection .pushsection .altinstr_replacement,"ax" --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -345,19 +345,39 @@ recompute_jump(struct alt_instr *a, u8 * static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) { unsigned long flags; - int i; + int nops = 0, i = 0; + struct insn insn; + u8 *nop = NULL; + + do { + kernel_insn_init(&insn, &instr[i], MAX_INSN_SIZE); + insn_get_length(&insn); + + if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) { + if (!nop) { + nop = &instr[i]; + nops = 1; + } else { + nops++; + } + } + i += insn.length; - for (i = 0; i < a->padlen; i++) { - if (instr[i] != 0x90) - return; - } + if ((insn.length != 1 || i == a->instrlen) && nop) { + + local_irq_save(flags); + add_nops(nop, nops); + local_irq_restore(flags); + + DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", + instr, (int)(unsigned long)(nop-instr), nops); + + nop = NULL; + } - local_irq_save(flags); - add_nops(instr + (a->instrlen - a->padlen), a->padlen); - local_irq_restore(flags); + } while (i < a->instrlen); - DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", - instr, a->instrlen - a->padlen, a->padlen); + WARN_ON_ONCE(nop); } /* @@ -403,19 +423,15 @@ void __init_or_module noinline apply_alt * - feature not present but ALTINSTR_FLAG_INV is set to mean, * patch if feature is *NOT* present. */ - if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) { - if (a->padlen > 1) - optimize_nops(a, instr); + if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) + goto next; - continue; - } - - DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d", + DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)", (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "", feature >> 5, feature & 0x1f, instr, instr, a->instrlen, - replacement, a->replacementlen, a->padlen); + replacement, a->replacementlen); DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); @@ -439,14 +455,15 @@ void __init_or_module noinline apply_alt if (a->replacementlen && is_jmp(replacement[0])) recompute_jump(a, instr, replacement, insn_buff); - if (a->instrlen > a->replacementlen) { - add_nops(insn_buff + a->replacementlen, - a->instrlen - a->replacementlen); - insn_buff_sz += a->instrlen - a->replacementlen; - } + for (; insn_buff_sz < a->instrlen; insn_buff_sz++) + insn_buff[insn_buff_sz] = 0x90; + DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); text_poke_early(instr, insn_buff, insn_buff_sz); + +next: + optimize_nops(a, instr); } } --- a/tools/objtool/arch/x86/include/arch/special.h +++ b/tools/objtool/arch/x86/include/arch/special.h @@ -10,7 +10,7 @@ #define JUMP_ORIG_OFFSET 0 #define JUMP_NEW_OFFSET 4 -#define ALT_ENTRY_SIZE 13 +#define ALT_ENTRY_SIZE 12 #define ALT_ORIG_OFFSET 0 #define ALT_NEW_OFFSET 4 #define ALT_FEATURE_OFFSET 8