On Sun, Jan 03, 2016 at 05:56:41PM -0300, James Almer wrote:
[...]
> >> +static av_always_inline av_const int ff_parity(uint32_t v)
> >> +{
> >> +#if HAVE_PARITY
> >> +    return __builtin_parity(v);
> >> +#else
> >> +    return av_popcount(v) & 1;
> >> +#endif
> > 
> > Do compilers really generate better code for the former?
> 
> GCC does on x86 when the target cpu doesn't support the popcnt instruction,
> otherwise the end result would be the same (popcnt + and).
> av_popcount_c() is not optimal for this.
> 

For the record, this is what it looks like here (GCC 5.3.0, clang 3.7.0,
i5-5250U):

[/tmp]☭ cat a.c
#include <stdint.h>

int parity0(uint32_t x) { return __builtin_popcount(x) & 1; }
int parity1(uint32_t x) { return __builtin_parity(x); }
[/tmp]☭ gcc -O2 -c a.c && objdump -r -d -Mintel a.o

a.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <parity0>:
   0:   48 83 ec 08             sub    rsp,0x8
   4:   89 ff                   mov    edi,edi
   6:   e8 00 00 00 00          call   b <parity0+0xb>
                        7: R_X86_64_PC32        __popcountdi2-0x4
   b:   48 83 c4 08             add    rsp,0x8
   f:   83 e0 01                and    eax,0x1
  12:   c3                      ret    
  13:   0f 1f 00                nop    DWORD PTR [rax]
  16:   66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  1d:   00 00 00 

0000000000000020 <parity1>:
  20:   89 f8                   mov    eax,edi
  22:   c1 ef 10                shr    edi,0x10
  25:   31 f8                   xor    eax,edi
  27:   30 e0                   xor    al,ah
  29:   0f 9b c0                setnp  al
  2c:   0f b6 c0                movzx  eax,al
  2f:   c3                      ret    
[/tmp]☭ clang -O2 -c a.c && objdump -r -d -Mintel a.o

a.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <parity0>:
   0:   89 f8                   mov    eax,edi
   2:   d1 e8                   shr    eax,1
   4:   25 55 55 55 55          and    eax,0x55555555
   9:   29 c7                   sub    edi,eax
   b:   89 f8                   mov    eax,edi
   d:   25 33 33 33 33          and    eax,0x33333333
  12:   c1 ef 02                shr    edi,0x2
  15:   81 e7 33 33 33 33       and    edi,0x33333333
  1b:   01 c7                   add    edi,eax
  1d:   89 f8                   mov    eax,edi
  1f:   c1 e8 04                shr    eax,0x4
  22:   01 f8                   add    eax,edi
  24:   25 0f 0f 0f 01          and    eax,0x10f0f0f
  29:   69 c0 01 01 01 01       imul   eax,eax,0x1010101
  2f:   c1 e8 18                shr    eax,0x18
  32:   83 e0 01                and    eax,0x1
  35:   c3                      ret    
  36:   66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  3d:   00 00 00 

0000000000000040 <parity1>:
  40:   89 f8                   mov    eax,edi
  42:   d1 e8                   shr    eax,1
  44:   25 55 55 55 55          and    eax,0x55555555
  49:   29 c7                   sub    edi,eax
  4b:   89 f8                   mov    eax,edi
  4d:   25 33 33 33 33          and    eax,0x33333333
  52:   c1 ef 02                shr    edi,0x2
  55:   81 e7 33 33 33 33       and    edi,0x33333333
  5b:   01 c7                   add    edi,eax
  5d:   89 f8                   mov    eax,edi
  5f:   c1 e8 04                shr    eax,0x4
  62:   01 f8                   add    eax,edi
  64:   25 0f 0f 0f 01          and    eax,0x10f0f0f
  69:   69 c0 01 01 01 01       imul   eax,eax,0x1010101
  6f:   c1 e8 18                shr    eax,0x18
  72:   83 e0 01                and    eax,0x1
  75:   c3                      ret    
[/tmp]☭ 

Conclusion: with GCC it matters, not so much with Clang.

-- 
Clément B.

Attachment: signature.asc
Description: PGP signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to