On Sun, Aug 23, 2015 at 11:23:54PM +0200, Clément Bœsch wrote: > On Sun, Aug 23, 2015 at 11:58:23AM -0400, Ganesh Ajjanagadde wrote: > [...] > > diff --git a/libavfilter/vf_hqx.c b/libavfilter/vf_hqx.c > > index fa15d9c..0178793 100644 > > --- a/libavfilter/vf_hqx.c > > +++ b/libavfilter/vf_hqx.c > > @@ -65,9 +65,9 @@ static av_always_inline int yuv_diff(uint32_t yuv1, > > uint32_t yuv2) > > #define YMASK 0xff0000 > > #define UMASK 0x00ff00 > > #define VMASK 0x0000ff > > - return abs((yuv1 & YMASK) - (yuv2 & YMASK)) > (48 << 16) || > > - abs((yuv1 & UMASK) - (yuv2 & UMASK)) > ( 7 << 8) || > > - abs((yuv1 & VMASK) - (yuv2 & VMASK)) > ( 6 << 0); > > + return FFUABSDIFF(yuv1 & YMASK, yuv2 & YMASK) > (48 << 16) || > > + FFUABSDIFF(yuv1 & UMASK, yuv2 & UMASK) > ( 7 << 8) || > > + FFUABSDIFF(yuv1 & VMASK, yuv2 & VMASK) > ( 6 << 0); > > This is one of the bottleneck function of the filter. How does it affect > speed? Can you compare the generated ASM? >
To answer my second question: [/tmp]☭ cat a.c #include <stdlib.h> #include <stdint.h> #define YMASK 0xff0000 #define UMASK 0x00ff00 #define VMASK 0x0000ff #define FFUABSDIFF(a,b) (((a) > (b)) ? ((a)-(b)) : ((b)-(a))) int yuv_diff_0(uint32_t yuv1, uint32_t yuv2) { return abs((yuv1 & YMASK) - (yuv2 & YMASK)) > (48 << 16) || abs((yuv1 & UMASK) - (yuv2 & UMASK)) > ( 7 << 8) || abs((yuv1 & VMASK) - (yuv2 & VMASK)) > ( 6 << 0); } int yuv_diff_1(uint32_t yuv1, uint32_t yuv2) { return FFUABSDIFF(yuv1 & YMASK, yuv2 & YMASK) > (48 << 16) || FFUABSDIFF(yuv1 & UMASK, yuv2 & UMASK) > ( 7 << 8) || FFUABSDIFF(yuv1 & VMASK, yuv2 & VMASK) > ( 6 << 0); } [/tmp]☭ gcc -Wall -O2 -c a.c && objdump -d -Mintel a.o a.o: file format elf64-x86-64 Disassembly of section .text: 0000000000000000 <yuv_diff_0>: 0: 89 f8 mov eax,edi 2: 89 f2 mov edx,esi 4: 81 e2 00 00 ff 00 and edx,0xff0000 a: 25 00 00 ff 00 and eax,0xff0000 f: 29 d0 sub eax,edx 11: 99 cdq 12: 31 d0 xor eax,edx 14: 29 d0 sub eax,edx 16: 89 c2 mov edx,eax 18: b8 01 00 00 00 mov eax,0x1 1d: 81 fa 00 00 30 00 cmp edx,0x300000 23: 7f 3e jg 63 <yuv_diff_0+0x63> 25: 89 fa mov edx,edi 27: 89 f1 mov ecx,esi 29: 81 e1 00 ff 00 00 and ecx,0xff00 2f: 81 e2 00 ff 00 00 and edx,0xff00 35: 29 ca sub edx,ecx 37: 89 d1 mov ecx,edx 39: c1 f9 1f sar ecx,0x1f 3c: 31 ca xor edx,ecx 3e: 29 ca sub edx,ecx 40: 81 fa 00 07 00 00 cmp edx,0x700 46: 7f 1b jg 63 <yuv_diff_0+0x63> 48: 40 0f b6 ff movzx edi,dil 4c: 40 0f b6 f6 movzx esi,sil 50: 29 f7 sub edi,esi 52: 89 f8 mov eax,edi 54: c1 f8 1f sar eax,0x1f 57: 31 c7 xor edi,eax 59: 29 c7 sub edi,eax 5b: 31 c0 xor eax,eax 5d: 83 ff 06 cmp edi,0x6 60: 0f 9f c0 setg al 63: f3 c3 repz ret 65: 90 nop 66: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0] 6d: 00 00 00 0000000000000070 <yuv_diff_1>: 70: 89 fa mov edx,edi 72: 89 f0 mov eax,esi 74: 81 e2 00 00 ff 00 and edx,0xff0000 7a: 25 00 00 ff 00 and eax,0xff0000 7f: 39 c2 cmp edx,eax 81: 76 4d jbe d0 <yuv_diff_1+0x60> 83: 29 c2 sub edx,eax 85: b8 01 00 00 00 mov eax,0x1 8a: 81 fa 00 00 30 00 cmp edx,0x300000 90: 77 7e ja 110 <yuv_diff_1+0xa0> 92: 89 fa mov edx,edi 94: 89 f0 mov eax,esi 96: 81 e2 00 ff 00 00 and edx,0xff00 9c: 25 00 ff 00 00 and eax,0xff00 a1: 39 c2 cmp edx,eax a3: 76 43 jbe e8 <yuv_diff_1+0x78> a5: 29 c2 sub edx,eax a7: b8 01 00 00 00 mov eax,0x1 ac: 81 fa 00 07 00 00 cmp edx,0x700 b2: 77 74 ja 128 <yuv_diff_1+0xb8> b4: 40 0f b6 ff movzx edi,dil b8: 40 0f b6 f6 movzx esi,sil bc: 39 f7 cmp edi,esi be: 76 58 jbe 118 <yuv_diff_1+0xa8> c0: 29 f7 sub edi,esi c2: 31 c0 xor eax,eax c4: 83 ff 06 cmp edi,0x6 c7: 0f 97 c0 seta al ca: c3 ret cb: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0] d0: 29 d0 sub eax,edx d2: 89 c2 mov edx,eax d4: b8 01 00 00 00 mov eax,0x1 d9: 81 fa 00 00 30 00 cmp edx,0x300000 df: 76 b1 jbe 92 <yuv_diff_1+0x22> e1: f3 c3 repz ret e3: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0] e8: 29 d0 sub eax,edx ea: 89 c2 mov edx,eax ec: b8 01 00 00 00 mov eax,0x1 f1: 81 fa 00 07 00 00 cmp edx,0x700 f7: 77 e8 ja e1 <yuv_diff_1+0x71> f9: 40 0f b6 ff movzx edi,dil fd: 40 0f b6 f6 movzx esi,sil 101: 39 f7 cmp edi,esi 103: 76 13 jbe 118 <yuv_diff_1+0xa8> 105: eb b9 jmp c0 <yuv_diff_1+0x50> 107: 66 0f 1f 84 00 00 00 nop WORD PTR [rax+rax*1+0x0] 10e: 00 00 110: f3 c3 repz ret 112: 66 0f 1f 44 00 00 nop WORD PTR [rax+rax*1+0x0] 118: 29 fe sub esi,edi 11a: 31 c0 xor eax,eax 11c: 83 fe 06 cmp esi,0x6 11f: 0f 97 c0 seta al 122: c3 ret 123: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0] 128: f3 c3 repz ret [/tmp]☭ I must say I'm slightly uncomfortable with that change. -- Clément B.
pgp3KcfUeGvDX.pgp
Description: PGP signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel