Hello, as The bug isn't solved in current unstable 0.cvs20050811-2 yet, I've updated the patch against the Debian sources (see attachment).
1) The push/pop statements in the first change to mpegvideo_mmx_template.c are unnecessary, because REG_a will not be used before it gets loaded with the constant -128 - therefore no reason to back it up 2) In the second change to megvideo_mmx.template.c, you backed up REG_a with push, loaded it with the second argument, but then in movq the old value of REG_a is needed to correctly address qmat[i] - this change to the upstream source is a bug! I've solved this by adding the array start address (2'nd parameter = "m" qmat+64) to the index (REG_a), then accessing qmat[i] and afterwards subtracting the start address again - push/pop shouldn't be used here anyway, because gcc might address memory operands through %esp, so modifying the stack pointer with push/pop is not gueranteed to work under all conditions 3) Introducing the const qmat2 = qmat+64 doesn't make very much sense to me - why has this been done? Regards, Tobias
diff -Nur ffmpeg-0.cvs20050811/libavcodec/i386/mpegvideo_mmx_template.c ffmpeg-0.cvs20050811-bug#318493/libavcodec/i386/mpegvideo_mmx_template.c --- ffmpeg-0.cvs20050811/libavcodec/i386/mpegvideo_mmx_template.c 2005-09-04 11:24:05.000000000 +0200 +++ ffmpeg-0.cvs20050811-bug#318493/libavcodec/i386/mpegvideo_mmx_template.c 2005-09-04 13:08:59.000000000 +0200 @@ -96,10 +96,8 @@ "pxor %%mm7, %%mm7 \n\t" // 0 "pxor %%mm4, %%mm4 \n\t" // 0 #if defined(PIC) && !defined(ARCH_X86_64) - "push %%"REG_a" \n\t" "movl %2, %%"REG_a" \n\t" "movq (%%"REG_a"), %%mm5 \n\t" // qmat[0] - "pop %%"REG_a" \n\t" #else "movq (%2), %%mm5 \n\t" // qmat[0] #endif @@ -153,7 +151,6 @@ : "g" (s->max_qcoeff) ); }else{ // FMT_H263 - const uint16_t *qmat2 = qmat+64; asm volatile( "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 SPREADW(%%mm3) @@ -170,12 +167,11 @@ "movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0] "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] #if defined(PIC) && !defined(ARCH_X86_64) - "push %%"REG_a" \n\t" - "movl %2, %%"REG_a" \n\t" - "movq (%%"REG_a", %%"REG_a"), %%mm5 \n\t" // qmat[i] - "pop %%"REG_a" \n\t" + "addl %2, %%"REG_a" \n\t" + "movq (%%"REG_a"), %%mm5 \n\t" // qmat[i] + "subl %2, %%"REG_a" \n\t" #else - "movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i] + "movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i] #endif "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 "por %%mm0, %%mm4 \n\t" @@ -199,7 +195,7 @@ "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 : "+a" (last_non_zero_p1) #if defined(PIC) && !defined(ARCH_X86_64) - : "r" (block+64), "m" (qmat2), "r" (bias+64), + : "r" (block+64), "m" (qmat+64), "r" (bias+64), #else : "r" (block+64), "r" (qmat+64), "r" (bias+64), #endif
signature.asc
Description: OpenPGP digital signature