The default kernel_fpu_begin() doesn't work on systems that support XMM but
haven't yet enabled CR4.OSFXSR.  This causes crashes when _mmx_memcpy() is
called too early because LDMXCSR generates #UD when the aforementioned bit
is clear.

Fix it by using kernel_fpu_begin_mask(KFPU_387) explicitly.

Fixes: 7ad816762f9b ("x86/fpu: Reset MXCSR to default in kernel_fpu_begin()")
Reported-by: Krzysztof Mazur <krzys...@podlesie.net>
Signed-off-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/lib/mmx_32.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index 4321fa02e18d..2a6ad7aa148a 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -26,6 +26,16 @@
 #include <asm/fpu/api.h>
 #include <asm/asm.h>
 
+/*
+ * For MMX, we use KFPU_387.  MMX instructions are not affected by MXCSR,
+ * but both AMD and Intel documentation states that even integer MMX
+ * operations will result in #MF if an exception is pending in FCW.
+ *
+ * We don't need EMMS afterwards because, after we call kernel_fpu_end(),
+ * any subsequent user of the 387 stack will reinitialize it using
+ * KFPU_387.
+ */
+
 void *_mmx_memcpy(void *to, const void *from, size_t len)
 {
        void *p;
@@ -37,7 +47,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
        p = to;
        i = len >> 6; /* len/64 */
 
-       kernel_fpu_begin();
+       kernel_fpu_begin_mask(KFPU_387);
 
        __asm__ __volatile__ (
                "1: prefetch (%0)\n"            /* This set is 28 bytes */
@@ -127,7 +137,7 @@ static void fast_clear_page(void *page)
 {
        int i;
 
-       kernel_fpu_begin();
+       kernel_fpu_begin_mask(KFPU_387);
 
        __asm__ __volatile__ (
                "  pxor %%mm0, %%mm0\n" : :
@@ -160,7 +170,7 @@ static void fast_copy_page(void *to, void *from)
 {
        int i;
 
-       kernel_fpu_begin();
+       kernel_fpu_begin_mask(KFPU_387);
 
        /*
         * maybe the prefetch stuff can go before the expensive fnsave...
@@ -247,7 +257,7 @@ static void fast_clear_page(void *page)
 {
        int i;
 
-       kernel_fpu_begin();
+       kernel_fpu_begin_mask(KFPU_387);
 
        __asm__ __volatile__ (
                "  pxor %%mm0, %%mm0\n" : :
@@ -282,7 +292,7 @@ static void fast_copy_page(void *to, void *from)
 {
        int i;
 
-       kernel_fpu_begin();
+       kernel_fpu_begin_mask(KFPU_387);
 
        __asm__ __volatile__ (
                "1: prefetch (%0)\n"
-- 
2.29.2

Reply via email to