On Sat, Nov 18, 2023 at 05:45:03PM -0600, Timothy Pearson wrote: > During floating point and vector save to thread data fr0/vs0 are clobbered > by the FPSCR/VSCR store routine. This leads to userspace register corruption > and application data corruption / crash under the following rare condition: > > * A userspace thread is executing with VSX/FP mode enabled > * The userspace thread is making active use of fr0 and/or vs0 > * An IPI is taken in kernel mode, forcing the userspace thread to reschedule > * The userspace thread is interrupted by the IPI before accessing data it > previously stored in fr0/vs0 > * The thread being switched in by the IPI has a pending signal > > If these exact criteria are met, then the following sequence happens: > > * The existing thread FP storage is still valid before the IPI, due to a > prior call to save_fpu() or store_fp_state(). Note that the current > fr0/vs0 registers have been clobbered, so the FP/VSX state in registers > is now invalid pending a call to restore_fp()/restore_altivec(). > * IPI -- FP/VSX register state remains invalid > * interrupt_exit_user_prepare_main() calls do_notify_resume(), > due to the pending signal > * do_notify_resume() eventually calls save_fpu() via giveup_fpu(), which > merrily reads and saves the invalid FP/VSX state to thread local storage. > * interrupt_exit_user_prepare_main() calls restore_math(), writing the > invalid > FP/VSX state back to registers. > * Execution is released to userspace, and the application crashes or corrupts > data. > > Without the pending signal, do_notify_resume() is never called, therefore the > invalid register state does't matter as it is overwritten nearly immeediately > by interrupt_exit_user_prepare_main() calling restore_math() before return > to userspace. > > The combination of MariaDB and io_uring is especially good at triggering data > corruption using the above sequence, see MariaDB bug MDEV-30728. > > Restore fr0/vs0 after FPSCR/VSCR store has completed for both the fp and > altivec register save paths. > > Tested under QEMU in kvm mode, running on a Talos II workstation with dual > POWER9 DD2.2 CPUs. > > Tested-by: Timothy Pearson <tpear...@raptorengineering.com> > Signed-off-by: Timothy Pearson <tpear...@raptorengineering.com> > --- > arch/powerpc/kernel/fpu.S | 13 +++++++++++++ > arch/powerpc/kernel/vector.S | 4 ++++ > 2 files changed, 17 insertions(+) > > diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S > index 6a9acfb690c9..2f8f3f93cbb6 100644 > --- a/arch/powerpc/kernel/fpu.S > +++ b/arch/powerpc/kernel/fpu.S > @@ -23,6 +23,15 @@ > #include <asm/feature-fixups.h> > > #ifdef CONFIG_VSX > +#define __REST_1FPVSR(n,c,base) > \ > +BEGIN_FTR_SECTION \ > + b 2f; \ > +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ > + REST_FPR(n,base); \ > + b 3f; \ > +2: REST_VSR(n,c,base); \ > +3: > + > #define __REST_32FPVSRS(n,c,base) \ > BEGIN_FTR_SECTION \ > b 2f; \ > @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); > \ > 2: SAVE_32VSRS(n,c,base); \ > 3: > #else > +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) > #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) > #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) > #endif > +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) > #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) > #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) > > @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) > SAVE_32FPVSRS(0, R4, R3) > mffs fr0 > stfd fr0,FPSTATE_FPSCR(r3) > + REST_1FPVSR(0, R4, R3) > blr > EXPORT_SYMBOL(store_fp_state) > > @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) > 2: SAVE_32FPVSRS(0, R4, R6) > mffs fr0 > stfd fr0,FPSTATE_FPSCR(r6) > + REST_1FPVSR(0, R4, R6) > blr > diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S > index 4094e4c4c77a..8c63b05b421e 100644 > --- a/arch/powerpc/kernel/vector.S > +++ b/arch/powerpc/kernel/vector.S > @@ -33,6 +33,8 @@ _GLOBAL(store_vr_state) > mfvscr v0 > li r4, VRSTATE_VSCR > stvx v0, r4, r3 > + li r4, 0 > + lvx v0, r4, r3
Just a small nit, no need for clearing r4, "lvx v0,0,r3" will do, as all Power instructions using indexed addressing mode. > blr > EXPORT_SYMBOL(store_vr_state) > > @@ -109,6 +111,8 @@ _GLOBAL(save_altivec) > mfvscr v0 > li r4,VRSTATE_VSCR > stvx v0,r4,r7 > + li r4,0 > + lvx v0,r4,r7 Ditto. And great bug hunt, by the way... > blr > > #ifdef CONFIG_VSX > -- > 2.39.2 Cheers, Gabriel