Now with the mentioned patch really attached. :-)

On 12.05.25 13:20, Jürgen Groß wrote:
On 09.05.25 10:18, Xin Li wrote:
On 5/6/2025 2:20 AM, Juergen Gross wrote:
Instead of having callback functions for rdmsr/wrmsr on native, switch
to inline the respective instructions directly in order to avoid
overhead with the call interface.

To me, this is a beneficial addition to the existing pvops MSR code.


This requires to use the instruction interfaces for rdmsr/wrmsr
emulation when running as a Xen PV guest.

In order to prepare support for the immediate forms of RDMSR and WRMSR
when not running as a Xen PV guest, use the RDMSR and WRMSR
instructions as the fallback case instead of ALT_CALL_INSTR.

I'm trying to evaluate how to add the immediate form MSR instructions
on top of this patch set.  And I'm close to get it done.

There is something to consider when running as a Xen PV guest, ...



Note that in the Xen PV case the RDMSR/WRMSR patching must not happen
even as an intermediate step, as this would clobber the indirect call
information needed when patching in the direct call for the Xen case.

Good point!

... as this still needs to be true.

There are 2 different ways to deal with this:

1. When running as a Xen PV guest disable X86_FEATURE_WRMSRNS and
    ASM_WRMSRNS_IMM (e.g. in xen_init_capabilities()).

2. Buffer the original instruction before patching in apply_alternatives()
    in order to avoid the sequence limitation above (see attached patch).

Deciding whether to retain the pvops MSR API is the responsibility of
the x86 maintainers, who are the ones experiencing the challenges of maintaining the code.

Well, I'm the PV ops maintainer, so it is basically me who needs to deal
with this. OTOH I do understand that diagnosis of problems with PV ops is
more complicated than without.


tglx said @https://lore.kernel.org/lkml/87y1h81ht4.ffs@tglx/:

 > I fundamentaly hate adding this to the PV infrastructure. We don't
 > want more PV ops, quite the contrary.

That is the reason I took a different direction, i.e., removing the
pvops MSR APIs.  But if your approach is cleaner, they may prefer it.

In the end it isn't adding additional PV ops interfaces. It is modifying
existing ones.


diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a463c747c780..df10b0e4f7b8 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -175,24 +175,72 @@ static inline void __write_cr4(unsigned long x)
      PVOP_VCALL1(cpu.write_cr4, x);
  }
-static inline u64 paravirt_read_msr(u32 msr)
+static __always_inline u64 paravirt_read_msr(u32 msr)
  {
-    return PVOP_CALL1(u64, cpu.read_msr, msr);
+    EAX_EDX_DECLARE_ARGS(val, low, high);

This is under CONFIG_PARAVIRT_XXL, thus CONFIG_XEN_PV and CONFIG_X86_64,
therefore we don't need to consider 32-bit at all, no?

Right. OTOH the macros are there, so why not use them?

In the end I'm fine to open code the 64-bit case here.


+
+    PVOP_TEST_NULL(cpu.read_msr);
+    asm volatile("1: "ALTERNATIVE_2(PARAVIRT_CALL,
+                    "rdmsr", ALT_NOT_XEN,
+                    ALT_CALL_INSTR, ALT_XENPV_CALL)
+             "2:\n"
+             _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR)
+             : EAX_EDX_RET(val, low, high), ASM_CALL_CONSTRAINT
+             : paravirt_ptr(cpu.read_msr), "c" (msr));
+
+    return EAX_EDX_VAL(val, low, high);
  }
-static inline void paravirt_write_msr(u32 msr, u64 val)
+static __always_inline void paravirt_write_msr(u32 msr, u64 val)
  {
-    PVOP_VCALL2(cpu.write_msr, msr, val);
+    PVOP_TEST_NULL(cpu.write_msr);
+    asm volatile("1: "ALTERNATIVE_2(PARAVIRT_CALL,
+                    "wrmsr", ALT_NOT_XEN,
+                    ALT_CALL_INSTR, ALT_XENPV_CALL)
+              "2:\n"
+              _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+              : ASM_CALL_CONSTRAINT
+              : paravirt_ptr(cpu.write_msr),
+              "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32))
+              : "memory");
  }
-static inline int paravirt_read_msr_safe(u32 msr, u64 *val)
+static __always_inline int paravirt_read_msr_safe(u32 msr, u64 *p)
  {
-    return PVOP_CALL2(int, cpu.read_msr_safe, msr, val);
+    int err;
+    EAX_EDX_DECLARE_ARGS(val, low, high);
+
+    PVOP_TEST_NULL(cpu.read_msr_safe);
+    asm volatile("1: "ALTERNATIVE_2(PARAVIRT_CALL,
+                    "rdmsr; xor %[err],%[err]", ALT_NOT_XEN,
+                    ALT_CALL_INSTR, ALT_XENPV_CALL)
+             "2:\n"
+             _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err])
+             : [err] "=c" (err), EAX_EDX_RET(val, low, high),
+               ASM_CALL_CONSTRAINT
+             : paravirt_ptr(cpu.read_msr_safe), "0" (msr));
+
+    *p = EAX_EDX_VAL(val, low, high);
+
+    return err;
  }
-static inline int paravirt_write_msr_safe(u32 msr, u64 val)
+static __always_inline int paravirt_write_msr_safe(u32 msr, u64 val)
  {
-    return PVOP_CALL2(int, cpu.write_msr_safe, msr, val);
+    int err;
+
+    PVOP_TEST_NULL(cpu.write_msr_safe);
+    asm volatile("1: "ALTERNATIVE_2(PARAVIRT_CALL,
+                    "wrmsr; xor %[err],%[err]", ALT_NOT_XEN,
+                    ALT_CALL_INSTR, ALT_XENPV_CALL)
+             "2:\n"
+             _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %[err])
+             : [err] "=a" (err), ASM_CALL_CONSTRAINT
+             : paravirt_ptr(cpu.write_msr_safe),
+               "c" (msr), "0" ((u32)val), "d" ((u32)(val >> 32))
+             : "memory");
+
+    return err;
  }
  static __always_inline u64 read_msr(u32 msr)
@@ -573,27 +621,43 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
  #define PV_SAVE_ALL_CALLER_REGS        "pushl %ecx;"
  #define PV_RESTORE_ALL_CALLER_REGS    "popl  %ecx;"
  #else
+/* save and restore caller-save registers, except %rax, %rcx and %rdx. */
+#define PV_SAVE_COMMON_CALLER_REGS    \
+    "push %rsi;"            \
+    "push %rdi;"            \
+    "push %r8;"            \
+    "push %r9;"            \
+    "push %r10;"            \
+    "push %r11;"

Add an empty line please, easier to read.

Okay (same below).


+#define PV_RESTORE_COMMON_CALLER_REGS    \
+    "pop %r11;"            \
+    "pop %r10;"            \
+    "pop %r9;"            \
+    "pop %r8;"            \
+    "pop %rdi;"            \
+    "pop %rsi;"
+
+#define PV_PROLOGUE_MSR(func)        \
+    PV_SAVE_COMMON_CALLER_REGS    \
+    PV_PROLOGUE_MSR_##func

Ditto.  And the following similar cases.

+#define PV_EPILOGUE_MSR(func)        \
+    PV_EPILOGUE_MSR_##func        \
+    PV_RESTORE_COMMON_CALLER_REGS
+
  /* save and restore all caller-save registers, except return value */
  #define PV_SAVE_ALL_CALLER_REGS                        \
      "push %rcx;"                            \
      "push %rdx;"                            \
-    "push %rsi;"                            \
-    "push %rdi;"                            \
-    "push %r8;"                            \
-    "push %r9;"                            \
-    "push %r10;"                            \
-    "push %r11;"
+    PV_SAVE_COMMON_CALLER_REGS
  #define PV_RESTORE_ALL_CALLER_REGS                    \
-    "pop %r11;"                            \
-    "pop %r10;"                            \
-    "pop %r9;"                            \
-    "pop %r8;"                            \
-    "pop %rdi;"                            \
-    "pop %rsi;"                            \
+    PV_RESTORE_COMMON_CALLER_REGS                    \
      "pop %rdx;"                            \
      "pop %rcx;"
  #endif
+#define PV_PROLOGUE_ALL(func)    PV_SAVE_ALL_CALLER_REGS
+#define PV_EPILOGUE_ALL(func)    PV_RESTORE_ALL_CALLER_REGS
+
  /*
   * Generate a thunk around a function which saves all caller-save
   * registers except for the return value.  This allows C functions to
@@ -607,7 +671,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
   * functions.
   */
  #define PV_THUNK_NAME(func) "__raw_callee_save_" #func
-#define __PV_CALLEE_SAVE_REGS_THUNK(func, section)            \
+#define __PV_CALLEE_SAVE_REGS_THUNK(func, section, helper)        \
      extern typeof(func) __raw_callee_save_##func;            \
                                      \
      asm(".pushsection " section ", \"ax\";"                \
@@ -617,16 +681,18 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
          PV_THUNK_NAME(func) ":"                    \
          ASM_ENDBR                            \
          FRAME_BEGIN                            \
-        PV_SAVE_ALL_CALLER_REGS                    \
+        PV_PROLOGUE_##helper(func)                    \
          "call " #func ";"                        \
-        PV_RESTORE_ALL_CALLER_REGS                    \
+        PV_EPILOGUE_##helper(func)                    \
          FRAME_END                            \
          ASM_RET                            \
          ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";"    \
          ".popsection")
  #define PV_CALLEE_SAVE_REGS_THUNK(func)            \
-    __PV_CALLEE_SAVE_REGS_THUNK(func, ".text")
+    __PV_CALLEE_SAVE_REGS_THUNK(func, ".text", ALL)
+#define PV_CALLEE_SAVE_REGS_MSR_THUNK(func)        \
+    __PV_CALLEE_SAVE_REGS_THUNK(func, ".text", MSR)
  /* Get a reference to a callee-save function */
  #define PV_CALLEE_SAVE(func)                        \
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/ paravirt_types.h
index b08b9d3122d6..f7f879319e90 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -91,15 +91,15 @@ struct pv_cpu_ops {
                unsigned int *ecx, unsigned int *edx);
      /* Unsafe MSR operations.  These will warn or panic on failure. */
-    u64 (*read_msr)(u32 msr);
-    void (*write_msr)(u32 msr, u64 val);
+    struct paravirt_callee_save read_msr;
+    struct paravirt_callee_save write_msr;
      /*
       * Safe MSR operations.
       * Returns 0 or -EIO.
       */
-    int (*read_msr_safe)(u32 msr, u64 *val);
-    int (*write_msr_safe)(u32 msr, u64 val);
+    struct paravirt_callee_save read_msr_safe;
+    struct paravirt_callee_save write_msr_safe;
      u64 (*read_pmc)(int counter);
@@ -520,6 +520,10 @@ unsigned long pv_native_save_fl(void);
  void pv_native_irq_disable(void);
  void pv_native_irq_enable(void);
  unsigned long pv_native_read_cr2(void);
+void pv_native_rdmsr(void);
+void pv_native_wrmsr(void);
+void pv_native_rdmsr_safe(void);
+void pv_native_wrmsr_safe(void);
  #endif
  #define paravirt_nop    ((void *)nop_func)
@@ -527,6 +531,7 @@ unsigned long pv_native_read_cr2(void);
  #endif    /* __ASSEMBLER__ */
  #define ALT_NOT_XEN    ALT_NOT(X86_FEATURE_XENPV)
+#define ALT_XENPV_CALL    ALT_DIRECT_CALL(X86_FEATURE_XENPV)
  #endif  /* CONFIG_PARAVIRT */
  #endif    /* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/ asm/ qspinlock_paravirt.h
index 0a985784be9b..0351acb5a143 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -14,7 +14,8 @@ void __lockfunc __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 lock
   */
  #ifdef CONFIG_64BIT
-__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
+__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text",
+                ALL);
  #define __pv_queued_spin_unlock    __pv_queued_spin_unlock
  /*
@@ -61,7 +62,7 @@ DEFINE_ASM_FUNC(__raw_callee_save___pv_queued_spin_unlock,
  #else /* CONFIG_64BIT */
  extern void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock);
-__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock, ".spinlock.text");
+__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock, ".spinlock.text", ALL);
  #endif /* CONFIG_64BIT */
  #endif
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 015bf298434f..ff7d7fdae360 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -50,6 +50,24 @@ DEFINE_ASM_FUNC(pv_native_save_fl, "pushf; pop %rax", .noinstr.text);
  DEFINE_ASM_FUNC(pv_native_irq_disable, "cli", .noinstr.text);
  DEFINE_ASM_FUNC(pv_native_irq_enable, "sti", .noinstr.text);
  DEFINE_ASM_FUNC(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text);
+DEFINE_ASM_FUNC(pv_native_rdmsr,
+        "1: rdmsr\n"
+        "2:\n"
+        _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR), .noinstr.text);
+DEFINE_ASM_FUNC(pv_native_wrmsr,
+        "1: wrmsr\n"
+        "2:\n"
+        _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR), .noinstr.text);
+DEFINE_ASM_FUNC(pv_native_rdmsr_safe,
+        "1: rdmsr; xor %ecx, %ecx\n"
+        "2:\n"
+        _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %%ecx),
+        .noinstr.text);
+DEFINE_ASM_FUNC(pv_native_wrmsr_safe,
+        "1: wrmsr; xor %eax, %eax\n"
+        "2:\n"
+        _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_WRMSR_SAFE, %%eax),
+        .noinstr.text);
  #endif
  DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key);
@@ -129,10 +147,10 @@ struct paravirt_patch_template pv_ops = {
      .cpu.read_cr0        = native_read_cr0,
      .cpu.write_cr0        = native_write_cr0,
      .cpu.write_cr4        = native_write_cr4,
-    .cpu.read_msr        = native_read_msr,
-    .cpu.write_msr        = native_write_msr,
-    .cpu.read_msr_safe    = native_read_msr_safe,
-    .cpu.write_msr_safe    = native_write_msr_safe,
+    .cpu.read_msr        = __PV_IS_CALLEE_SAVE(pv_native_rdmsr),
+    .cpu.write_msr        = __PV_IS_CALLEE_SAVE(pv_native_wrmsr),
+    .cpu.read_msr_safe    = __PV_IS_CALLEE_SAVE(pv_native_rdmsr_safe),
+    .cpu.write_msr_safe    = __PV_IS_CALLEE_SAVE(pv_native_wrmsr_safe),
      .cpu.read_pmc        = native_read_pmc,
      .cpu.load_tr_desc    = native_load_tr_desc,
      .cpu.set_ldt        = native_set_ldt,
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 3be38350f044..c279b2bef7eb 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1160,36 +1160,66 @@ static void xen_do_write_msr(u32 msr, u64 val, int *err)
      }
  }
-static int xen_read_msr_safe(u32 msr, u64 *val)
-{
+/*
+ * Prototypes for functions called via PV_CALLEE_SAVE_REGS_THUNK() in order
+ * to avoid warnings with "-Wmissing-prototypes".
+ */
+struct xen_rdmsr_safe_ret {
+    u64 val;
      int err;
+};
+struct xen_rdmsr_safe_ret xen_read_msr_safe(u32 msr);
+int xen_write_msr_safe(u32 msr, u32 low, u32 high);
+u64 xen_read_msr(u32 msr);
+void xen_write_msr(u32 msr, u32 low, u32 high);
-    *val = xen_do_read_msr(msr, &err);
-    return err;
+__visible struct xen_rdmsr_safe_ret xen_read_msr_safe(u32 msr)
+{
+    struct xen_rdmsr_safe_ret ret;

struct xen_rdmsr_safe_ret ret = { 0, 0 };

Because the 'err' member may not be set in xen_do_read_msr().

Right.


+
+    ret.val = xen_do_read_msr(msr, &ret.err);
+    return ret;
  }
+#define PV_PROLOGUE_MSR_xen_read_msr_safe    "mov %ecx, %edi;"
+#define PV_EPILOGUE_MSR_xen_read_msr_safe    \
+    "mov %edx, %ecx; mov %rax, %rdx; mov %eax, %eax; shr $0x20, %rdx;"
+PV_CALLEE_SAVE_REGS_MSR_THUNK(xen_read_msr_safe);
-static int xen_write_msr_safe(u32 msr, u64 val)
+__visible int xen_write_msr_safe(u32 msr, u32 low, u32 high)

I think we can avoid splitting this u64 into two u32.

This is related to the native WRMSR interface. The WRMSR needs to be
able to be replaced by the call of the Xen specific function.

I could handle this in the prologue helpers, but I'd prefer to keep
those helpers as small as possible.


  {
      int err = 0;
-    xen_do_write_msr(msr, val, &err);
+    xen_do_write_msr(msr, (u64)high << 32 | low, &err);
      return err;
  }
+#define PV_PROLOGUE_MSR_xen_write_msr_safe    \
+    "mov %ecx, %edi; mov %eax, %esi;"
+#define PV_EPILOGUE_MSR_xen_write_msr_safe
+PV_CALLEE_SAVE_REGS_MSR_THUNK(xen_write_msr_safe);
-static u64 xen_read_msr(u32 msr)
+__visible u64 xen_read_msr(u32 msr)
  {
      int err;
      return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
  }
+#define PV_PROLOGUE_MSR_xen_read_msr    "mov %ecx, %edi;"
+#define PV_EPILOGUE_MSR_xen_read_msr    \
+    "mov %rax, %rdx; mov %eax, %eax; shr $0x20, %rdx;"
+PV_CALLEE_SAVE_REGS_MSR_THUNK(xen_read_msr);
-static void xen_write_msr(u32 msr, u64 val)
+__visible void xen_write_msr(u32 msr, u32 low, u32 high)

Ditto.

See above.


  {
      int err;
-    xen_do_write_msr(msr, val, xen_msr_safe ? &err : NULL);
+    xen_do_write_msr(msr, (u64)high << 32 | low,
+             xen_msr_safe ? &err : NULL);
  }
+#define PV_PROLOGUE_MSR_xen_write_msr    \
+    "mov %ecx, %edi; mov %eax, %esi;"
+#define PV_EPILOGUE_MSR_xen_write_msr
+PV_CALLEE_SAVE_REGS_MSR_THUNK(xen_write_msr);
  /* This is called once we have the cpu_possible_mask */
  void __init xen_setup_vcpu_info_placement(void)


Juergen

From 7db2e9790442d073d25fec220d88fb2f85e4683f Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgr...@suse.com>
Date: Wed, 7 May 2025 12:47:22 +0200
Subject: [PATCH] x86/alternative: save original code before replacing it

In case of ALT_FLAG_DIRECT_CALL being set for an alternative
replacement, the patching needs to look at the original instruction to
find the target address of the direct call to be patched in.

In case of nested ALTERNATIVEs this limits the use of
ALT_FLAG_DIRECT_CALL to either the first replacement, or to be
mutually exclusive with all previous replacements. Otherwise the
original code could have been overwritten already resulting in a
BUG(), due to ALT_FLAG_DIRECT_CALL handling not finding the expected
indirect call instruction.

Avoid this problem by saving the original code before replacing it. As
this is the only case where the original code is required to be
analyzed, special case the copy to happen only if the original code has
the length of an indirect call (6 bytes). This minimizes complexity and
stack usage.

Signed-off-by: Juergen Gross <jgr...@suse.com>
---
 arch/x86/kernel/alternative.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index bf82c6f7d690..8d6d3a4fc4ab 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -387,6 +387,12 @@ EXPORT_SYMBOL(BUG_func);
 
 #define CALL_RIP_REL_OPCODE	0xff
 #define CALL_RIP_REL_MODRM	0x15
+#define CALL_RIP_INSTR_LEN	6
+
+static inline u8 * instr_va(struct alt_instr *i)
+{
+	return (u8 *)&i->instr_offset + i->instr_offset;
+}
 
 /*
  * Rewrite the "call BUG_func" replacement to point to the target of the
@@ -402,7 +408,7 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
 		BUG();
 	}
 
-	if (a->instrlen != 6 ||
+	if (a->instrlen != CALL_RIP_INSTR_LEN ||
 	    instr[0] != CALL_RIP_REL_OPCODE ||
 	    instr[1] != CALL_RIP_REL_MODRM) {
 		pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n");
@@ -414,7 +420,7 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
 #ifdef CONFIG_X86_64
 	/* ff 15 00 00 00 00   call   *0x0(%rip) */
 	/* target address is stored at "next instruction + disp". */
-	target = *(void **)(instr + a->instrlen + disp);
+	target = *(void **)(instr_va(a) + a->instrlen + disp);
 #else
 	/* ff 15 00 00 00 00   call   *0x0 */
 	/* target address is stored at disp. */
@@ -432,11 +438,6 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
 	return 5;
 }
 
-static inline u8 * instr_va(struct alt_instr *i)
-{
-	return (u8 *)&i->instr_offset + i->instr_offset;
-}
-
 /*
  * Replace instructions with better alternatives for this CPU type. This runs
  * before SMP is initialized to avoid SMP problems with self modifying code.
@@ -451,7 +452,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 						  struct alt_instr *end)
 {
 	u8 insn_buff[MAX_PATCH_LEN];
-	u8 *instr, *replacement;
+	u8 old_insn[CALL_RIP_INSTR_LEN];
+	u8 *instr, *replacement, *old_va = NULL;
 	struct alt_instr *a, *b;
 
 	DPRINTK(ALT, "alt table %px, -> %px", start, end);
@@ -513,11 +515,21 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 			instr, instr, a->instrlen,
 			replacement, a->replacementlen, a->flags);
 
+		/*
+		 * Remember original code if it could be an indirect call.
+		 * This enables ALT_FLAG_DIRECT_CALL handling with nested
+		 * alternatives even if the original code has been modified
+		 * already.
+		 */
+		if (old_va != instr && a->instrlen == CALL_RIP_INSTR_LEN) {
+			old_va = instr;
+			memcpy(old_insn, instr, CALL_RIP_INSTR_LEN);
+		}
 		memcpy(insn_buff, replacement, a->replacementlen);
 		insn_buff_sz = a->replacementlen;
 
 		if (a->flags & ALT_FLAG_DIRECT_CALL) {
-			insn_buff_sz = alt_replace_call(instr, insn_buff, a);
+			insn_buff_sz = alt_replace_call(old_insn, insn_buff, a);
 			if (insn_buff_sz < 0)
 				continue;
 		}
-- 
2.43.0

Attachment: OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key

Attachment: OpenPGP_signature.asc
Description: OpenPGP digital signature

Reply via email to