On Thu, Mar 18, 2021 at 06:11:05PM +0100, Peter Zijlstra wrote: > --- a/arch/x86/kernel/alternative.c > +++ b/arch/x86/kernel/alternative.c > @@ -345,19 +345,39 @@ recompute_jump(struct alt_instr *a, u8 * > static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 > *instr) > { > unsigned long flags; > - int i; > + int nops = 0, i = 0; > + struct insn insn; > + u8 *nop = NULL; > > - for (i = 0; i < a->padlen; i++) { > - if (instr[i] != 0x90) > + do { > + if (insn_decode_kernel(&insn, &instr[i])) > return; > - } > > - local_irq_save(flags); > - add_nops(instr + (a->instrlen - a->padlen), a->padlen); > - local_irq_restore(flags); > + if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) { > + if (!nop) { > + nop = &instr[i]; > + nops = 1; > + } else { > + nops++; > + } > + } > + i += insn.length; > + > + if ((insn.length != 1 || i == a->instrlen) && nop) { > + > + local_irq_save(flags); > + add_nops(nop, nops); > + local_irq_restore(flags); > + > + DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized > NOPs: ", > + instr, (int)(unsigned long)(nop-instr), > nops); > + > + nop = NULL; > + } > > - DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", > - instr, a->instrlen - a->padlen, a->padlen); > + } while (i < a->instrlen); > + > + WARN_ON_ONCE(nop); > }
I think I've made this simpler; pasting the whole function and not the diff because former is easier to read: /* * "noinline" to cause control flow change and thus invalidate I$ and * cause refetch after modification. * * Jump over the non-NOP insns, the remaining bytes must be single-byte NOPs, * optimize them. */ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) { unsigned long flags; struct insn insn; int i = 0, j; /* Skip preceding non-NOP instructions. */ do { if (insn_decode_kernel(&insn, &instr[i])) return; if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) break; i += insn.length; } while (i < a->instrlen); if (i >= a->instrlen - 1) return; /* Verify rest is NOPs - should not fire(tm) */ for (j = i; j < a->instrlen - 1; j++) { if (WARN(instr[j] != 0x90, "Wrong insn byte 0x%hx at 0x%px\n", instr[j], &instr[j])) return; } local_irq_save(flags); add_nops(&instr[i], a->instrlen - i); local_irq_restore(flags); DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", instr, i, a->instrlen); } -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette