Signed-off-by: Paul B Mahol <one...@gmail.com> --- libavfilter/x86/vf_gblur.asm | 49 +++++++++++++++++++++++++++++++++ libavfilter/x86/vf_gblur_init.c | 17 ++++++++++-- 2 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm index a25b1659f5..8ccfbdc56b 100644 --- a/libavfilter/x86/vf_gblur.asm +++ b/libavfilter/x86/vf_gblur.asm @@ -183,3 +183,52 @@ HORIZ_SLICE INIT_XMM avx2 HORIZ_SLICE %endif + +%macro POSTSCALE_SLICE 0 +%if UNIX64 +cglobal postscale_slice, 2, 3, 4, ptr, length, x +%else +cglobal postscale_slice, 5, 6, 4, ptr, length, postscale, min, max, x +%endif + shl lengthd, 2 +%if WIN64 + SWAP 0, 2 + SWAP 1, 3 + SWAP 2, 4 +%endif +%if cpuflag(avx2) + vbroadcastss m0, xm0 + vbroadcastss m1, xm1 + vbroadcastss m2, xm2 +%else + shufps xm0, xm0, 0 + shufps xm1, xm1, 0 + shufps xm2, xm2, 0 +%endif + xor xq, xq + + .loop: +%if cpuflag(avx2) + mulps m3, m0, [ptrq + xq] +%else + movu m3, [ptrq + xq] + mulps m3, m0 +%endif + maxps m3, m1 + minps m3, m2 + movu [ptrq+xq], m3 + + add xq, mmsize + cmp xd, lengthd + jl .loop + + RET +%endmacro + +INIT_XMM sse +POSTSCALE_SLICE + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +POSTSCALE_SLICE +%endif diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c index e63e59fe23..9223cb797d 100644 --- a/libavfilter/x86/vf_gblur_init.c +++ b/libavfilter/x86/vf_gblur_init.c @@ -27,14 +27,25 @@ void ff_horiz_slice_sse4(float *ptr, int width, int height, int steps, float nu, float bscale); void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu, float bscale); +void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max); +void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max); + av_cold void ff_gblur_init_x86(GBlurContext *s) { -#if ARCH_X86_64 int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_SSE4(cpu_flags)) + if (EXTERNAL_SSE(cpu_flags)) { + s->postscale_slice = ff_postscale_slice_sse; + } + if (EXTERNAL_AVX2(cpu_flags)) { + s->postscale_slice = ff_postscale_slice_avx2; + } +#if ARCH_X86_64 + if (EXTERNAL_SSE4(cpu_flags)) { s->horiz_slice = ff_horiz_slice_sse4; - if (EXTERNAL_AVX2(cpu_flags)) + } + if (EXTERNAL_AVX2(cpu_flags)) { s->horiz_slice = ff_horiz_slice_avx2; + } #endif } -- 2.17.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".