Signed-off-by: James Almer <jamr...@gmail.com> --- GCC apparently can't generate a bzhi instruction on its own from the c version, so here's a custom implementation.
Before: gcc -O3 <av_zhb_c>: 0: 89 f1 mov ecx,esi 2: ba 01 00 00 00 mov edx,0x1 7: d3 e2 shl edx,cl 9: 83 ea 01 sub edx,0x1 c: 89 d0 mov eax,edx e: 21 f8 and eax,edi 10: c3 ret gcc -mbmi2 -O3 <av_zhb_c>: 0: ba 01 00 00 00 mov edx,0x1 5: c4 e2 49 f7 d2 shlx edx,edx,esi a: 8d 42 ff lea eax,[rdx-0x1] d: 21 f8 and eax,edi f: c3 ret After: gcc -mbmi2 -O3 <av_zhb_bmi2>: 0: c4 e2 48 f5 c7 bzhi eax,edi,esi 5: c3 ret The non-bmi2 example is a bit bloated with movs to have values in ecx (needed for shl) and eax (ret value) since, unlike the actual function, it was not inlined. Still, best case scenario is mov + shl + sub/dec/lea + and versus a single bzhi when p is not a constant. libavutil/x86/intmath.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h index 7aa6bc4..f19ef64 100644 --- a/libavutil/x86/intmath.h +++ b/libavutil/x86/intmath.h @@ -24,15 +24,36 @@ #include <stdint.h> #include "config.h" +#if defined(__GNUC__) + /* Our generic version of av_popcount is faster than GCC's built-in on * CPUs that don't support the popcnt instruction. */ -#if defined(__GNUC__) && defined(__POPCNT__) +#if defined(__POPCNT__) + #define av_popcount __builtin_popcount #if ARCH_X86_64 #define av_popcount64 __builtin_popcountll #endif -#endif /* defined(__GNUC__) && defined(__POPCNT__) */ +#endif /* __POPCNT__ */ + +#if defined(__BMI2__) + +#define av_zhb av_zhb_bmi2 +static av_always_inline av_const unsigned av_zhb_bmi2(unsigned a, unsigned p) +{ + if (av_builtin_constant_p(p)) + return a & ((1 << p) - 1); + else { + unsigned x; + __asm__ ("bzhi %2, %1, %0 \n\t" : "=r"(x) : "rm"(a), "r"(p)); + return x; + } +} + +#endif /* __BMI2__ */ + +#endif /* __GNUC__ */ #endif /* AVUTIL_X86_INTMATH_H */ -- 2.3.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel