On Thu, 2025-08-21 at 14:28 +0200, Peter Zijlstra wrote: > The uprobe syscall stores and strips the trampoline stack frame from > the user context, to make it appear similar to an exception at the > original instruction. It then restores the trampoline stack when it > can exit using sysexit. > > Make sure to match the regular stack manipulation with shadow stack > operations such that regular and shadow stack don't get out of sync > and causes trouble. > > This enables using the optimization when shadow stack is in use. > > Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> > --- > arch/x86/include/asm/shstk.h | 4 ++++ > arch/x86/kernel/shstk.c | 40 ++++++++++++++++++++++++++++++++++++++++ > arch/x86/kernel/uprobes.c | 17 ++++++++--------- > 3 files changed, 52 insertions(+), 9 deletions(-) > > --- a/arch/x86/include/asm/shstk.h > +++ b/arch/x86/include/asm/shstk.h > @@ -23,6 +23,8 @@ int setup_signal_shadow_stack(struct ksi > int restore_signal_shadow_stack(void); > int shstk_update_last_frame(unsigned long val); > bool shstk_is_enabled(void); > +int shstk_pop(u64 *val); > +int shstk_push(u64 val); > #else > static inline long shstk_prctl(struct task_struct *task, int option, > unsigned long arg2) { return -EINVAL; } > @@ -35,6 +37,8 @@ static inline int setup_signal_shadow_st > static inline int restore_signal_shadow_stack(void) { return 0; } > static inline int shstk_update_last_frame(unsigned long val) { return 0; } > static inline bool shstk_is_enabled(void) { return false; } > +static inline int shstk_pop(u64 *val) { return -ENOTSUPP; } > +static inline int shstk_push(u64 val) { return -ENOTSUPP; } > #endif /* CONFIG_X86_USER_SHADOW_STACK */ > > #endif /* __ASSEMBLER__ */ > --- a/arch/x86/kernel/shstk.c > +++ b/arch/x86/kernel/shstk.c > @@ -246,6 +246,46 @@ static unsigned long get_user_shstk_addr > return ssp; > } > > +int shstk_pop(u64 *val) > +{ > + int ret = 0; > + u64 ssp; > + > + if (!features_enabled(ARCH_SHSTK_SHSTK)) > + return -ENOTSUPP; > + > + fpregs_lock_and_load(); > + > + rdmsrq(MSR_IA32_PL3_SSP, ssp); > + if (val && get_user(*val, (__user u64 *)ssp))
It makes it so shstk_pop() can incssp without pushing anything to the shadow stack, but nothing uses this. Also, since there is no read_user_shstk_64() it should probably check that the VMA is actually shadow stack, like how it does in shstk_pop_sigframe(). What this actually would expose, I'm not sure. It might be ok. There would just be a fault later during shstk_push(args.retaddr) I guess. Hmm, I guess no strong objections, but I'm still not sure it's worth supporting the optimization. > + ret = -EFAULT; > + else > + wrmsrq(MSR_IA32_PL3_SSP, ssp + SS_FRAME_SIZE); > + fpregs_unlock(); > + > + return ret; > +} > + > +int shstk_push(u64 val) > +{ > + u64 ssp; > + int ret; > + > + if (!features_enabled(ARCH_SHSTK_SHSTK)) > + return -ENOTSUPP; > + > + fpregs_lock_and_load(); > + > + rdmsrq(MSR_IA32_PL3_SSP, ssp); > + ssp -= SS_FRAME_SIZE; > + ret = write_user_shstk_64((__user void *)ssp, val); > + if (!ret) > + wrmsrq(MSR_IA32_PL3_SSP, ssp); > + fpregs_unlock(); > + > + return ret; > +} > + > #define SHSTK_DATA_BIT BIT(63) > > static int put_shstk_data(u64 __user *addr, u64 data) > --- a/arch/x86/kernel/uprobes.c > +++ b/arch/x86/kernel/uprobes.c > @@ -804,7 +804,7 @@ SYSCALL_DEFINE0(uprobe) > { > struct pt_regs *regs = task_pt_regs(current); > struct uprobe_syscall_args args; > - unsigned long ip, sp; > + unsigned long ip, sp, sret; > int err; > > /* Allow execution only from uprobe trampolines. */ > @@ -831,6 +831,10 @@ SYSCALL_DEFINE0(uprobe) > > sp = regs->sp; > > + err = shstk_pop((u64 *)&sret); > + if (err == -EFAULT || (!err && sret != args.retaddr)) > + goto sigill; > + > handle_syscall_uprobe(regs, regs->ip); > > /* > @@ -855,6 +859,9 @@ SYSCALL_DEFINE0(uprobe) > if (args.retaddr - 5 != regs->ip) > args.retaddr = regs->ip; > > + if (shstk_push(args.retaddr) == -EFAULT) > + goto sigill; > + > regs->ip = ip; > > err = copy_to_user((void __user *)regs->sp, &args, sizeof(args)); > @@ -1124,14 +1131,6 @@ void arch_uprobe_optimize(struct arch_up > struct mm_struct *mm = current->mm; > uprobe_opcode_t insn[5]; > > - /* > - * Do not optimize if shadow stack is enabled, the return address hijack > - * code in arch_uretprobe_hijack_return_addr updates wrong frame when > - * the entry uprobe is optimized and the shadow stack crashes the app. > - */ > - if (shstk_is_enabled()) > - return; > - > if (!should_optimize(auprobe)) > return; > > >