On 8 May 2015 at 14:28, Michael Niedermayer <michae...@gmx.at> wrote:
> On Fri, May 08, 2015 at 12:43:20PM -0700, Nick Lewycky wrote: > > On 8 May 2015 at 12:06, Michael Niedermayer <michae...@gmx.at> wrote: > > > > > On Fri, May 08, 2015 at 10:50:49AM -0700, Nick Lewycky wrote: > > > > On 6 May 2015 at 18:03, Michael Niedermayer <michae...@gmx.at> > wrote: > > > > > > > > > On Wed, May 06, 2015 at 04:08:09PM -0700, Nick Lewycky wrote: > > > > > > On 6 May 2015 at 15:06, Michael Niedermayer <michae...@gmx.at> > > > wrote: > > > > > > > > > > > > > Hi > > > > > > > > > > > > > > On Wed, May 06, 2015 at 11:52:59AM -0700, Nick Lewycky wrote: > > > > > > > > Newer versions of clang will allocate %rbx as one of the > inline > > > asm > > > > > > > inputs, > > > > > > > > > > > > > > Thats great > > > > > > > > > > > > > > > > > > > > > > even in PIC. This occurs when building ffmpeg with clang > > > > > > > -fsanitize=address > > > > > > > > -O1 -fPIE. Because the asm does clobber %bx whether PIC is > on or > > > off, > > > > > > > just > > > > > > > > include %bx in the clobber list regardless of whether PIC is > on > > > or > > > > > off. > > > > > > > > > > > > > > you cant include *bx in the clobber list with PIC > > > > > > > it breaks compilers that are less great, like gcc > > > > > > > > > > > > > > > > > > > Doh!! I did check for this, but only tried x86-64, not x86-32. > Sorry! > > > > > > > > > > > > gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3 > > > > > > > ../libswscale/x86/hscale_fast_bilinear_simd.c: In function > > > > > > > ‘ff_hyscale_fast_mmxext’: > > > > > > > ../libswscale/x86/hscale_fast_bilinear_simd.c:205:5: error: PIC > > > > > register > > > > > > > clobbered by ‘%ebx’ in ‘asm’ > > > > > > > ../libswscale/x86/hscale_fast_bilinear_simd.c: In function > > > > > > > ‘ff_hcscale_fast_mmxext’: > > > > > > > ../libswscale/x86/hscale_fast_bilinear_simd.c:276:5: error: PIC > > > > > register > > > > > > > clobbered by ‘%ebx’ in ‘asm’ > > > > > > > > > > > > > > > > > > > > > also what exactly are you trying to fix ? > > > > > > > or rather what exactly goes how exactly wrong with the code as > it > > > is > > > > > > > if rbx is used ? > > > > > > > > > > > > > > > > > > > Ok, let's look at ff_hcscale_fast_mmext. Preprocessor directives > > > > > evaluated > > > > > > in PIC x86-64, the inline constraints work out to: > > > > > > > > > > > > :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos), > > > > > > "m" (mmxextFilterCode), "m" (src2), "m"(dst2) > > > > > > ,"m" (ebxsave) > > > > > > ,"m"(retsave) > > > > > > : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D > > > > > > > > > > > > so clang looks at that and decides that it can pick src1 = > (%r10), > > > dst1 = > > > > > > (%r8), filter = (%r11), filterPos = (%r12), mmxextFilterCode = > > > (%r15), > > > > > src2 > > > > > > = (%rbx), dst2 = (%r14), ebxsave = (%r13), retsave = (%r9). The > > > problem > > > > > > there is src2 being (%rbx). > > > > > > > > > > > > Now let's look at how we use them: > > > > > > > > > > > > "mov %0, %%"REG_c" \n\t" > > > > > > "mov %1, %%"REG_D" \n\t" > > > > > > "mov %2, %%"REG_d" \n\t" > > > > > > "mov %3, %%"REG_b" \n\t" // Clobbers > %rbx / > > > > > src2 > > > > > > / %5 here > > > > > > "xor %%"REG_a", %%"REG_a" \n\t" > > > > > > PREFETCH" (%%"REG_c") \n\t" > > > > > > PREFETCH" 32(%%"REG_c") \n\t" > > > > > > PREFETCH" 64(%%"REG_c") \n\t" > > > > > > > > > > > > CALL_MMXEXT_FILTER_CODE > > > > > > CALL_MMXEXT_FILTER_CODE > > > > > > CALL_MMXEXT_FILTER_CODE > > > > > > CALL_MMXEXT_FILTER_CODE > > > > > > "xor %%"REG_a", %%"REG_a" \n\t" > > > > > > "mov %5, %%"REG_c" \n\t" // %5 is read > too > > > > > late, > > > > > > we get %3 / filterPos instead > > > > > > "mov %6, %%"REG_D" \n\t" > > > > > > PREFETCH" (%%"REG_c") \n\t" > > > > > > PREFETCH" 32(%%"REG_c") \n\t" > > > > > > PREFETCH" 64(%%"REG_c") \n\t" > > > > > > > > > > > > CALL_MMXEXT_FILTER_CODE // Crash... > > > > > > > > > > > > The two marked instructions are intended to refer to different > > > contents. > > > > > > CALL_MMXEXT_FILTER_CODE moves RBX to ESI and calls > mmxextFilterCode: > > > > > > > > > > > > "movl (%%"REG_b"), %%esi \n\t"\ > > > > > > "call *%4 \n\t"\ // > Crash... > > > > > > > > > > > > That callee ultimately segfaults: > > > > > > > > > > > > => 0x7fffefa45000: movq (%rdx,%rax,1),%mm3 > > > > > > => 0x7fffefa45004: movd (%rcx,%rsi,1),%mm0 > > > > > > Program received signal SIGSEGV, Segmentation fault. > > > > > > 0x00007fffefa45004 in ?? () > > > > > > => 0x7fffefa45004: movd (%rcx,%rsi,1),%mm0 > > > > > > > > > > > > I'm trying to fix this segfault. > > > > > > > > > > > > also ideally changes to this code should be tested against > > > gcc/clang/icc > > > > > > > of various versions with and without PIC, 32 and 64 bit > > > > > > > this code is more tricky than than the average so theres a good > > > > > > > change changes to it will break some compilerAnd if you decide > that you don’t want to edit the hunk after all, simply delete everything in > the file, save & close it. Git will then abort the editing process and > leave the hunk unchanged, returning you to the standard selection. > > > > > > > > > > > > > > > > > > > I understand that, but I may not be able to test as many > > > environments as > > > > > > you like. I'm going to give testing it my best effort, but I can > > > tell you > > > > > > up front that I'm only going to test gcc and clang, on an x86 > Ubuntu > > > > > > machine. I don't have ICC, so I can't test that. > > > > > > > > > > iam not sure it would work but, configure could easily test if > > > > > ebx can be put on the clobber list and that then could be used > > > > > in place of PIC in the checks > > > > > > > > > > > > > I checked gcc 4.4, 4.6, 4.7, 4.8, and clang 3.3, 3.4, 3.5 in 32 and > > > 64-bit > > > > builds, and PIC vs. not, cross product thereof. The only one that > emits > > > > this error is all versions of gcc in 32-bit mode with PIC enabled. On > > > > 64-bit, gcc and clang do the same thing, they'll spill BX if they > want to > > > > preserve it. > > > > > > > > Based on that, I decided we could sink the manual saving and > restoring of > > > > ebx down to 32-bit+PIC mode, and just list it as a clobber in 64-bit > mode > > > > regardless of the PIC setting. That works in every compiler and mode > I > > > > tried. > > > > > > > > Tested with make + make check + make fate but not "make fake > > > SAMPLES=...". > > > > Updated patch attached! Let me know what you think. > > > > > > > > and thanks for the detailed informaion > > > > > > > > > > > > > No problem, thanks for the review! > > > > > > please provide a git patch with commit message > > > > > > > Attached! > > > > > > > [...] > > > > > > > int i; > > > > +#if ARCH_X86_64 > > > > + uint64_t retsave; > > > > +#else > > > > #if defined(PIC) > > > > uint64_t ebxsave; > > > > #endif > > > > -#if ARCH_X86_64 > > > > - uint64_t retsave; > > > > #endif > > > > > > > > __asm__ volatile( > > > > -#if defined(PIC) > > > > - "mov %%"REG_b", %5 \n\t" > > > > #if ARCH_X86_64 > > > > "mov -8(%%rsp), %%"REG_a" \n\t" > > > > - "mov %%"REG_a", %6 \n\t" > > > > -#endif > > > > + "mov %%"REG_a", %5 \n\t" // retsave > > > > #else > > > > -#if ARCH_X86_64 > > > > - "mov -8(%%rsp), %%"REG_a" \n\t" > > > > - "mov %%"REG_a", %5 \n\t" > > > > +#if defined(PIC) > > > > + "mov %%"REG_b", %5 \n\t" // ebxsave > > > > #endif > > > > #endif > > > > "pxor %%mm7, %%mm7 \n\t" > > > > @@ -256,28 +253,25 @@ void ff_hyscale_fast_mmxext(SwsContext *c, > int16_t > > > *dst, > > > > CALL_MMXEXT_FILTER_CODE > > > > CALL_MMXEXT_FILTER_CODE > > > > > > > > -#if defined(PIC) > > > > - "mov %5, %%"REG_b" \n\t" > > > > -#if ARCH_X86_64 > > > > - "mov %6, %%"REG_a" \n\t" > > > > - "mov %%"REG_a", -8(%%rsp) \n\t" > > > > -#endif > > > > -#else > > > > #if ARCH_X86_64 > > > > "mov %5, %%"REG_a" \n\t" > > > > "mov %%"REG_a", -8(%%rsp) \n\t" > > > > +#else > > > > +#if defined(PIC) > > > > + "mov %5, %%"REG_b" \n\t" > > > > #endif > > > > #endif > > > > :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos), > > > > "m" (mmxextFilterCode) > > > > +#if ARCH_X86_64 > > > > + ,"m"(retsave) > > > > +#else > > > > #if defined(PIC) > > > > ,"m" (ebxsave) > > > > #endif > > > > -#if ARCH_X86_64 > > > > - ,"m"(retsave) > > > > #endif > > > > : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D > > > > -#if !defined(PIC) > > > > +#if ARCH_X86_64 || !defined(__PIC__) > > > > > > this mixes __PIC__ and PIC > > > > > > > Fixed. Thanks! > > > > > > > > > > > > > [...] > > > > > > -- > > > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > > > > > Into a blind darkness they enter who follow after the Ignorance, > > > they as if into a greater darkness enter who devote themselves > > > to the Knowledge alone. -- Isha Upanishad > > > > > > _______________________________________________ > > > ffmpeg-devel mailing list > > > ffmpeg-devel@ffmpeg.org > > > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > > > > > > > hscale_fast_bilinear_simd.c | 65 > +++++++++++++++++--------------------------- > > 1 file changed, 26 insertions(+), 39 deletions(-) > > 4172e9dc3002b77114f98a067215f0802901b6e2 ebxsave-4.patch > > commit a1922839bbc0eaf3fb1451b6a01c0f2b36f67946 > > Author: Nick Lewycky <nlewy...@google.com> > > Date: Fri May 8 12:40:44 2015 -0700 > > > > On 64-bit with PIC, the compiler may allocate BX and won't complain > about it being in our clobber list. Since we do clobber BX in x86-64+PIC > mode, add it to the clobber list. Fixes a miscompile with address sanitizer > in clang 3.7-svn. > > commit messages should start with what is changed like: > "swscale/x86/hscale_fast_bilinear_simd:" > Sorry about that. I should've read prior commit messages and followed the style. also git am doesnt apply this, this is not a correct git patch > its rather the outout of git show i suspect > a git patch is created with "git format-patch -1" > Yes, it was "git show" output. Now with git format-patch -1. ideally the commit message should list, what is changed, why it is > changed and how it is changed > Ok, I think the new commit message does? the actual change is ok but if you like it could be split (i actually > think its better if it is split) > in one change fixing the bug and another changing the operand > numbering and removing ebxsave in the case where its not needed > both together make the patch somewhat hard to read > I don't see any way to fix the bug without changing the operand numbering? Fixing the bug requires a change to when ebxsave is in the output vs. clobber list, which triggers a renumbering of accesses to retsave. Nick Thanks > > [...] > > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > it is not once nor twice but times without number that the same ideas make > their appearance in the world. -- Aristotle > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > >
From f137b848a718b6428742050a01375ff253611a15 Mon Sep 17 00:00:00 2001 From: Nick Lewycky <nlewy...@google.com> Date: Fri, 8 May 2015 12:40:44 -0700 Subject: [PATCH] swscale/x86/hscale_fast_bilinear_simd: Include BX in the clobber list of ff_hyscale_fast_mmxext and ff_hcscale_fast_mmxext except on 32-bit PIC where it can't be used as an inline asm input. --- libswscale/x86/hscale_fast_bilinear_simd.c | 65 ++++++++++++------------------ 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/libswscale/x86/hscale_fast_bilinear_simd.c b/libswscale/x86/hscale_fast_bilinear_simd.c index 103793d..e6e87cc 100644 --- a/libswscale/x86/hscale_fast_bilinear_simd.c +++ b/libswscale/x86/hscale_fast_bilinear_simd.c @@ -198,24 +198,21 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, int16_t *filter = c->hLumFilter; void *mmxextFilterCode = c->lumMmxextFilterCode; int i; +#if ARCH_X86_64 + uint64_t retsave; +#else #if defined(PIC) uint64_t ebxsave; #endif -#if ARCH_X86_64 - uint64_t retsave; #endif __asm__ volatile( -#if defined(PIC) - "mov %%"REG_b", %5 \n\t" #if ARCH_X86_64 "mov -8(%%rsp), %%"REG_a" \n\t" - "mov %%"REG_a", %6 \n\t" -#endif + "mov %%"REG_a", %5 \n\t" // retsave #else -#if ARCH_X86_64 - "mov -8(%%rsp), %%"REG_a" \n\t" - "mov %%"REG_a", %5 \n\t" +#if defined(PIC) + "mov %%"REG_b", %5 \n\t" // ebxsave #endif #endif "pxor %%mm7, %%mm7 \n\t" @@ -256,28 +253,25 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE -#if defined(PIC) - "mov %5, %%"REG_b" \n\t" -#if ARCH_X86_64 - "mov %6, %%"REG_a" \n\t" - "mov %%"REG_a", -8(%%rsp) \n\t" -#endif -#else #if ARCH_X86_64 "mov %5, %%"REG_a" \n\t" "mov %%"REG_a", -8(%%rsp) \n\t" +#else +#if defined(PIC) + "mov %5, %%"REG_b" \n\t" #endif #endif :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos), "m" (mmxextFilterCode) +#if ARCH_X86_64 + ,"m"(retsave) +#else #if defined(PIC) ,"m" (ebxsave) #endif -#if ARCH_X86_64 - ,"m"(retsave) #endif : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D -#if !defined(PIC) +#if ARCH_X86_64 || !defined(PIC) ,"%"REG_b #endif ); @@ -294,24 +288,20 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, int16_t *filter = c->hChrFilter; void *mmxextFilterCode = c->chrMmxextFilterCode; int i; +#if ARCH_X86_64 + DECLARE_ALIGNED(8, uint64_t, retsave); +#else #if defined(PIC) DECLARE_ALIGNED(8, uint64_t, ebxsave); #endif -#if ARCH_X86_64 - DECLARE_ALIGNED(8, uint64_t, retsave); #endif - __asm__ volatile( -#if defined(PIC) - "mov %%"REG_b", %7 \n\t" #if ARCH_X86_64 "mov -8(%%rsp), %%"REG_a" \n\t" - "mov %%"REG_a", %8 \n\t" -#endif + "mov %%"REG_a", %7 \n\t" // retsave #else -#if ARCH_X86_64 - "mov -8(%%rsp), %%"REG_a" \n\t" - "mov %%"REG_a", %7 \n\t" +#if defined(PIC) + "mov %%"REG_b", %7 \n\t" // ebxsave #endif #endif "pxor %%mm7, %%mm7 \n\t" @@ -340,28 +330,25 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE -#if defined(PIC) - "mov %7, %%"REG_b" \n\t" -#if ARCH_X86_64 - "mov %8, %%"REG_a" \n\t" - "mov %%"REG_a", -8(%%rsp) \n\t" -#endif -#else #if ARCH_X86_64 "mov %7, %%"REG_a" \n\t" "mov %%"REG_a", -8(%%rsp) \n\t" +#else +#if defined(PIC) + "mov %7, %%"REG_b" \n\t" #endif #endif :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos), "m" (mmxextFilterCode), "m" (src2), "m"(dst2) +#if ARCH_X86_64 + ,"m"(retsave) +#else #if defined(PIC) ,"m" (ebxsave) #endif -#if ARCH_X86_64 - ,"m"(retsave) #endif : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D -#if !defined(PIC) +#if ARCH_X86_64 || !defined(PIC) ,"%"REG_b #endif ); -- 2.2.0.rc0.207.ga3a616c
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel