Maybe the problem come from the skip part : +INIT_XMM ssse3 > +cglobal hflip_byte, 3, 5, 3, src, dst, w, x, v > + mova m0, [pb_flip_byte] > + mov xq, 0 > + mov wd, dword wm > + sub wq, 2 * mmsize > + cmp wq, mmsize > + jl .skip > + > + .loop0: > + neg xq > + movu m1, [srcq + xq - mmsize + 1] > + movu m2, [srcq + xq - 2 * mmsize + 1] > + pshufb m1, m0 > + pshufb m2, m0 > + neg xq > + movu [dstq + xq ], m1 > + movu [dstq + xq + mmsize], m2 > + add xq, mmsize * 2 > + cmp xq, wq > + jl .loop0 > + > +.skip: > + add wq, 2 * mmsize >
==> use xq instead of wq ? > + .loop1: > + neg xq > + mov vb, [srcq + xq] > + neg xq > + mov [dstq + xq], vb > + add xq, 1 > + cmp xq, wq > + jl .loop1 > +RET > + > +cglobal hflip_short, 3, 5, 3, src, dst, w, x, v > + mova m0, [pb_flip_short] > + mov xq, 0 > + mov wd, dword wm > + add wq, wq > + sub wq, 2 * mmsize > + cmp wq, mmsize > + jl .skip > + > + .loop0: > + neg xq > + movu m1, [srcq + xq - mmsize + 2] > + movu m2, [srcq + xq - 2 * mmsize + 2] > + pshufb m1, m0 > + pshufb m2, m0 > + neg xq > + movu [dstq + xq ], m1 > + movu [dstq + xq + mmsize], m2 > + add xq, mmsize > + cmp xq, wq > + jl .loop0 > + > +.skip: > + add wq, 2 * mmsize > ==> same here ? + .loop1: > + neg xq > + mov vw, [srcq + xq] > + neg xq > + mov [dstq + xq], vw > + add xq, 2 > + cmp xq, wq > + jl .loop1 > +RET > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel