Hello,

as The bug isn't solved in current unstable 0.cvs20050811-2 yet, I've
updated the patch against the Debian sources (see attachment).

1) The push/pop statements in the first change to
mpegvideo_mmx_template.c are unnecessary, because REG_a will not be used
before it gets loaded with the constant -128 - therefore no reason to
back it up

2) In the second change to megvideo_mmx.template.c, you backed up REG_a
with push, loaded it with the second argument, but then in movq the old
value of REG_a is needed to correctly address qmat[i] - this change to
the upstream source is a bug!

I've solved this by adding the array start address (2'nd parameter = "m"
qmat+64) to the index (REG_a), then accessing qmat[i] and afterwards
subtracting the start address again - push/pop shouldn't be used here
anyway, because gcc might address memory operands through %esp, so
modifying the stack pointer with push/pop is not gueranteed to work
under all conditions

3) Introducing the const qmat2 = qmat+64 doesn't make very much sense to
me - why has this been done?

Regards,

Tobias

diff -Nur ffmpeg-0.cvs20050811/libavcodec/i386/mpegvideo_mmx_template.c ffmpeg-0.cvs20050811-bug#318493/libavcodec/i386/mpegvideo_mmx_template.c
--- ffmpeg-0.cvs20050811/libavcodec/i386/mpegvideo_mmx_template.c	2005-09-04 11:24:05.000000000 +0200
+++ ffmpeg-0.cvs20050811-bug#318493/libavcodec/i386/mpegvideo_mmx_template.c	2005-09-04 13:08:59.000000000 +0200
@@ -96,10 +96,8 @@
             "pxor %%mm7, %%mm7			\n\t" // 0
             "pxor %%mm4, %%mm4			\n\t" // 0
 #if defined(PIC) && !defined(ARCH_X86_64)
-            "push %%"REG_a"			\n\t"
             "movl %2, %%"REG_a"			\n\t"
             "movq (%%"REG_a"), %%mm5		\n\t" // qmat[0]
-            "pop %%"REG_a"			\n\t"
 #else
             "movq (%2), %%mm5			\n\t" // qmat[0]
 #endif
@@ -153,7 +151,6 @@
         : "g" (s->max_qcoeff)
         );
     }else{ // FMT_H263
-	const uint16_t *qmat2 = qmat+64;
         asm volatile(
             "movd %%"REG_a", %%mm3		\n\t" // last_non_zero_p1
             SPREADW(%%mm3)
@@ -170,12 +167,11 @@
             "movq (%3, %%"REG_a"), %%mm6	\n\t" // bias[0]
             "paddusw %%mm6, %%mm0		\n\t" // ABS(block[i]) + bias[0]
 #if defined(PIC) && !defined(ARCH_X86_64)
-            "push %%"REG_a"			\n\t"
-            "movl %2, %%"REG_a"			\n\t"
-            "movq (%%"REG_a", %%"REG_a"), %%mm5	\n\t" // qmat[i]
-            "pop %%"REG_a"			\n\t"
+            "addl %2, %%"REG_a" 	        \n\t" 
+            "movq (%%"REG_a"), %%mm5		\n\t" // qmat[i]
+            "subl %2, %%"REG_a" 	        \n\t" 
 #else
-            "movq (%2, %%"REG_a"), %%mm5		\n\t" // qmat[i]
+            "movq (%2, %%"REG_a"), %%mm5	\n\t" // qmat[i]
 #endif
             "pmulhw %%mm5, %%mm0		\n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
             "por %%mm0, %%mm4			\n\t" 
@@ -199,7 +195,7 @@
             "movzb %%al, %%"REG_a"		\n\t" // last_non_zero_p1
 	    : "+a" (last_non_zero_p1)
 #if defined(PIC) && !defined(ARCH_X86_64)
-            : "r" (block+64), "m" (qmat2), "r" (bias+64),
+            : "r" (block+64), "m" (qmat+64), "r" (bias+64),
 #else
             : "r" (block+64), "r" (qmat+64), "r" (bias+64),
 #endif

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to