I modify the checkasm test, to test various width if (check_func(s.flip_line[0], "hflip_%s", report_name)) { for (i = 1; i < w; i++) { call_ref(src, dst_ref, i); call_new(src, dst_new, i); if (memcmp(dst_ref, dst_new, WIDTH)) { printf("FAIL : W = %d\n", i); fail(); } } bench_new(src, dst_new, WIDTH); }
This asm seems to be ok (same idea for the hflip_short version) hflip_byte_c: 28.4 hflip_byte_ssse3: 23.7 hflip_short_c: 275.9 hflip_short_ssse3: 65.2 INIT_XMM ssse3 cglobal hflip_byte, 3, 5, 3, src, dst, w, x, v mova m0, [pb_flip_byte] mov xq, 0 mov wd, dword wm sub wq, 2 * mmsize ;cmp wq, mmsize ; <==== Doesn't seems to be need jl .skip .loop0: neg xq movu m1, [srcq + xq - mmsize + 1] movu m2, [srcq + xq - 2 * mmsize + 1] pshufb m1, m0 pshufb m2, m0 neg xq movu [dstq + xq ], m1 movu [dstq + xq + mmsize], m2 add xq, mmsize * 2 cmp xq, wq jl .loop0 cmp xq, wq ;<==== je .end ;<==== sub xq, mmsize *2 ;<==== jmp .loop1 ;<==== .skip: add wq, 2 * mmsize .loop1: neg xq mov vb, [srcq + xq] neg xq mov [dstq + xq], vb add xq, 1 cmp xq, wq jl .loop1 .end: RET _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel