Performance data(Less is better): exposure_sse: 500491 exposure_avx2: 449122
Signed-off-by: Wu Jianhua <jianhua...@intel.com> --- libavfilter/x86/vf_exposure.asm | 15 +++++++++++++++ libavfilter/x86/vf_exposure_init.c | 4 ++++ 2 files changed, 19 insertions(+) diff --git a/libavfilter/x86/vf_exposure.asm b/libavfilter/x86/vf_exposure.asm index 3351c6fb3b..4ee9fbcb15 100644 --- a/libavfilter/x86/vf_exposure.asm +++ b/libavfilter/x86/vf_exposure.asm @@ -36,11 +36,21 @@ cglobal exposure, 2, 2, 4, ptr, length, black, scale VBROADCASTSS m1, xmm1 %endif +%if cpuflag(fma3) + mulps m0, m0, m1 ; black * scale +%endif + .loop: +%if cpuflag(fma3) + mova m2, m0 + vfmsub231ps m2, m1, [ptrq] + movu [ptrq], m2 +%else movu m2, [ptrq] subps m2, m2, m0 mulps m2, m2, m1 movu [ptrq], m2 +%endif add ptrq, mmsize sub lengthq, mmsize/4 @@ -52,4 +62,9 @@ cglobal exposure, 2, 2, 4, ptr, length, black, scale %if ARCH_X86_64 INIT_XMM sse EXPOSURE + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +EXPOSURE +%endif %endif diff --git a/libavfilter/x86/vf_exposure_init.c b/libavfilter/x86/vf_exposure_init.c index de1b360f6c..edc1452850 100644 --- a/libavfilter/x86/vf_exposure_init.c +++ b/libavfilter/x86/vf_exposure_init.c @@ -24,6 +24,7 @@ #include "libavfilter/exposure.h" void ff_exposure_sse(float *ptr, int length, float black, float scale); +void ff_exposure_avx2(float *ptr, int length, float black, float scale); av_cold void ff_exposure_init_x86(ExposureContext *s) { @@ -32,5 +33,8 @@ av_cold void ff_exposure_init_x86(ExposureContext *s) #if ARCH_X86_64 if (EXTERNAL_SSE(cpu_flags)) s->exposure_func = ff_exposure_sse; + + if (EXTERNAL_AVX2_FAST(cpu_flags)) + s->exposure_func = ff_exposure_avx2; #endif } -- 2.17.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".