On Thu, Sep 04, 2025 at 11:58:26PM +0200, Peter Zijlstra wrote:
> On Thu, Sep 04, 2025 at 11:56:17PM +0200, Peter Zijlstra wrote:
> 
> > Ooh, that suggests we do something like so:
> 
> N/m, I need to go sleep, that doesn't work right for the 32bit nops that
> use lea instead of nopl. I'll see if I can come up with something more
> sensible.

Something like this. Can someone please look very critical at this fancy
insn_is_nop()?

---
 arch/x86/include/asm/insn-eval.h |  2 +
 arch/x86/kernel/alternative.c    | 20 +--------
 arch/x86/kernel/uprobes.c        | 32 ++------------
 arch/x86/lib/insn-eval.c         | 92 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+), 48 deletions(-)

diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 54368a43abf6..4733e9064ee5 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -44,4 +44,6 @@ enum insn_mmio_type {
 
 enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes);
 
+bool insn_is_nop(struct insn *insn);
+
 #endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 7bde68247b5f..e1f98189fe77 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -9,6 +9,7 @@
 
 #include <asm/text-patching.h>
 #include <asm/insn.h>
+#include <asm/insn-eval.h>
 #include <asm/ibt.h>
 #include <asm/set_memory.h>
 #include <asm/nmi.h>
@@ -345,25 +346,6 @@ static void add_nop(u8 *buf, unsigned int len)
                *buf = INT3_INSN_OPCODE;
 }
 
-/*
- * Matches NOP and NOPL, not any of the other possible NOPs.
- */
-static bool insn_is_nop(struct insn *insn)
-{
-       /* Anything NOP, but no REP NOP */
-       if (insn->opcode.bytes[0] == 0x90 &&
-           (!insn->prefixes.nbytes || insn->prefixes.bytes[0] != 0xF3))
-               return true;
-
-       /* NOPL */
-       if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F)
-               return true;
-
-       /* TODO: more nops */
-
-       return false;
-}
-
 /*
  * Find the offset of the first non-NOP instruction starting at @offset
  * but no further than @len.
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 0a8c0a4a5423..32bc583e6fd4 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -17,6 +17,7 @@
 #include <linux/kdebug.h>
 #include <asm/processor.h>
 #include <asm/insn.h>
+#include <asm/insn-eval.h>
 #include <asm/mmu_context.h>
 #include <asm/nops.h>
 
@@ -1158,35 +1159,12 @@ void arch_uprobe_optimize(struct arch_uprobe *auprobe, 
unsigned long vaddr)
        mmap_write_unlock(mm);
 }
 
-static bool insn_is_nop(struct insn *insn)
-{
-       return insn->opcode.nbytes == 1 && insn->opcode.bytes[0] == 0x90;
-}
-
-static bool insn_is_nopl(struct insn *insn)
-{
-       if (insn->opcode.nbytes != 2)
-               return false;
-
-       if (insn->opcode.bytes[0] != 0x0f || insn->opcode.bytes[1] != 0x1f)
-               return false;
-
-       if (!insn->modrm.nbytes)
-               return false;
-
-       if (X86_MODRM_REG(insn->modrm.bytes[0]) != 0)
-               return false;
-
-       /* 0f 1f /0 - NOPL */
-       return true;
-}
-
 static bool can_optimize(struct insn *insn, unsigned long vaddr)
 {
        if (!insn->x86_64 || insn->length != 5)
                return false;
 
-       if (!insn_is_nop(insn) && !insn_is_nopl(insn))
+       if (!insn_is_nop(insn))
                return false;
 
        /* We can't do cross page atomic writes yet. */
@@ -1428,17 +1406,13 @@ static int branch_setup_xol_ops(struct arch_uprobe 
*auprobe, struct insn *insn)
        insn_byte_t p;
        int i;
 
-       /* x86_nops[insn->length]; same as jmp with .offs = 0 */
-       if (insn->length <= ASM_NOP_MAX &&
-           !memcmp(insn->kaddr, x86_nops[insn->length], insn->length))
+       if (insn_is_nop(insn))
                goto setup;
 
        switch (opc1) {
        case 0xeb:      /* jmp 8 */
        case 0xe9:      /* jmp 32 */
                break;
-       case 0x90:      /* prefix* + nop; same as jmp with .offs = 0 */
-               goto setup;
 
        case 0xe8:      /* call relative */
                branch_clear_offset(auprobe, insn);
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 4e385cbfd444..3a67f1c5582c 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1676,3 +1676,95 @@ enum insn_mmio_type insn_decode_mmio(struct insn *insn, 
int *bytes)
 
        return type;
 }
+
+bool insn_is_nop(struct insn *insn)
+{
+       u8 rex, rex_b = 0, rex_x = 0, rex_r = 0, rex_w = 0;
+       u8 modrm, modrm_mod, modrm_reg, modrm_rm;
+       u8 sib = 0, sib_scale, sib_index, sib_base;
+
+       if (insn->rex_prefix.nbytes) {
+               rex = insn->rex_prefix.bytes[0];
+               rex_w = !!X86_REX_W(rex);
+               rex_r = !!X86_REX_R(rex);
+               rex_x = !!X86_REX_X(rex);
+               rex_b = !!X86_REX_B(rex);
+       }
+
+       if (insn->modrm.nbytes) {
+               modrm = insn->modrm.bytes[0];
+               modrm_mod = X86_MODRM_MOD(modrm);
+               modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
+               modrm_rm  = X86_MODRM_RM(modrm)  + 8*rex_b;
+       }
+
+       if (insn->sib.nbytes) {
+               sib = insn->sib.bytes[0];
+               sib_scale = X86_SIB_SCALE(sib);
+               sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
+               sib_base  = X86_SIB_BASE(sib)  + 8*rex_b;
+
+               modrm_rm = sib_base;
+       }
+
+       switch (insn->opcode.bytes[0]) {
+       case 0x0f: /* 2nd byte */
+               break;
+
+       case 0x89: /* MOV */
+               if (modrm_mod != 3) /* register-direct */
+                       return false;
+
+               if (insn->x86_64 && !rex_w) /* native size */
+                       return false;
+
+               for (int i = 0; i < insn->prefixes.nbytes; i++) {
+                       if (insn->prefixes.bytes[i] == 0x66) /* OSP */
+                               return false;
+               }
+
+               return modrm_reg == modrm_rm; /* MOV %reg, %reg */
+
+       case 0x8d: /* LEA */
+               if (modrm_mod == 0 || modrm_mod == 3) /* register-indirect with 
disp */
+                       return false;
+
+               if (insn->x86_64 && !rex_w) /* native size */
+                       return false;
+
+               if (insn->displacement.value != 0)
+                       return false;
+
+               if (sib & (sib_scale != 0 || sib_index != 4)) /* (%reg, %eiz, 
1) */
+                       return false;
+
+               for (int i = 0; i < insn->prefixes.nbytes; i++) {
+                       if (insn->prefixes.bytes[i] != 0x3e) /* DS */
+                               return false;
+               }
+
+               return modrm_reg == modrm_rm; /* LEA 0(%reg), %reg */
+
+       case 0x90: /* NOP */
+               for (int i = 0; i < insn->prefixes.nbytes; i++) {
+                       if (insn->prefixes.bytes[i] == 0xf3) /* REP */
+                               return false; /* REP NOP -- PAUSE */
+               }
+               return true;
+
+       case 0xe9: /* JMP.d32 */
+       case 0xeb: /* JMP.d8 */
+               return insn->immediate.value == 0; /* JMP +0 */
+
+       default:
+               return false;
+       }
+
+       switch (insn->opcode.bytes[1]) {
+       case 0x1f:
+               return modrm_reg == 0; /* 0f 1f /0 -- NOPL */
+
+       default:
+               return false;
+       }
+}

Reply via email to