Assuming Manfred's diagnosis is right something like this might fix it

*note*: Not tested this is just off the top of my head...


--- arch/i386/lib/mmx.c~        Sun Apr 15 16:49:54 2001
+++ arch/i386/lib/mmx.c Sat May  5 08:03:17 2001
@@ -57,7 +57,11 @@
                : : "r" (from) );
                
        
-       for(; i>0; i--)
+       /*
+        *      While we have at least 320 bytes left to copy
+        */
+        
+       for(; i>5; i--)
        {
                __asm__ __volatile__ (
                "1:  prefetch 320(%0)\n"
@@ -89,6 +93,31 @@
                from+=64;
                to+=64;
        }
+
+       /*
+        *      While we have at least 64 bytes left to copy
+        */
+        
+       for(; i>0; i--)
+       {
+               __asm__ __volatile__ (
+               "  movq (%0), %%mm0\n"
+               "  movq 8(%0), %%mm1\n"
+               "  movq 16(%0), %%mm2\n"
+               "  movq 24(%0), %%mm3\n"
+               "  movq %%mm0, (%1)\n"
+               "  movq %%mm1, 8(%1)\n"
+               "  movq %%mm2, 16(%1)\n"
+               "  movq %%mm3, 24(%1)\n"
+               "  movq 32(%0), %%mm0\n"
+               "  movq 40(%0), %%mm1\n"
+               "  movq 48(%0), %%mm2\n"
+               "  movq 56(%0), %%mm3\n"
+               "  movq %%mm0, 32(%1)\n"
+               "  movq %%mm1, 40(%1)\n"
+               "  movq %%mm2, 48(%1)\n"
+               "  movq %%mm3, 56(%1)\n"
+               : : "r" (from), "r" (to) : "memory");
        /*
         *      Now do the tail of the block
         */
@@ -163,7 +192,11 @@
                ".previous"
                : : "r" (from) );
 
-       for(i=0; i<4096/64; i++)
+       /*
+        *      While there is at least 320 bytes to copy
+        */
+        
+       for(i=0; i<59; i++)
        {
                __asm__ __volatile__ (
                "1: prefetch 320(%0)\n"
@@ -195,6 +228,35 @@
                from+=64;
                to+=64;
        }
+
+       /*
+        *      Finish off the page
+        */
+        
+       for(; i<64; i++)
+       {
+               __asm__ __volatile__ (
+               "   movq (%0), %%mm0\n"
+               "   movntq %%mm0, (%1)\n"
+               "   movq 8(%0), %%mm1\n"
+               "   movntq %%mm1, 8(%1)\n"
+               "   movq 16(%0), %%mm2\n"
+               "   movntq %%mm2, 16(%1)\n"
+               "   movq 24(%0), %%mm3\n"
+               "   movntq %%mm3, 24(%1)\n"
+               "   movq 32(%0), %%mm4\n"
+               "   movntq %%mm4, 32(%1)\n"
+               "   movq 40(%0), %%mm5\n"
+               "   movntq %%mm5, 40(%1)\n"
+               "   movq 48(%0), %%mm6\n"
+               "   movntq %%mm6, 48(%1)\n"
+               "   movq 56(%0), %%mm7\n"
+               "   movntq %%mm7, 56(%1)\n"
+               : : "r" (from), "r" (to) : "memory");
+               from+=64;
+               to+=64;
+       }
+
        /* since movntq is weakly-ordered, a "sfence" is needed to become
         * ordered again.
         */
@@ -270,7 +332,11 @@
                ".previous"
                : : "r" (from) );
 
-       for(i=0; i<4096/64; i++)
+       /*
+        *      Copy the page until we have 320 bytes to go
+        */
+        
+       for(i=0; i<59; i++)
        {
                __asm__ __volatile__ (
                "1: prefetch 320(%0)\n"
@@ -298,6 +364,34 @@
                "       .align 4\n"
                "       .long 1b, 3b\n"
                ".previous"
+               : : "r" (from), "r" (to) : "memory");
+               from+=64;
+               to+=64;
+       }
+
+       /*
+        *      Copy the tail of the page
+        */
+        
+       for(; i<64; i++)
+       {
+               __asm__ __volatile__ (
+               "   movq (%0), %%mm0\n"
+               "   movq 8(%0), %%mm1\n"
+               "   movq 16(%0), %%mm2\n"
+               "   movq 24(%0), %%mm3\n"
+               "   movq %%mm0, (%1)\n"
+               "   movq %%mm1, 8(%1)\n"
+               "   movq %%mm2, 16(%1)\n"
+               "   movq %%mm3, 24(%1)\n"
+               "   movq 32(%0), %%mm0\n"
+               "   movq 40(%0), %%mm1\n"
+               "   movq 48(%0), %%mm2\n"
+               "   movq 56(%0), %%mm3\n"
+               "   movq %%mm0, 32(%1)\n"
+               "   movq %%mm1, 40(%1)\n"
+               "   movq %%mm2, 48(%1)\n"
+               "   movq %%mm3, 56(%1)\n"
                : : "r" (from), "r" (to) : "memory");
                from+=64;
                to+=64;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to