On 12/7/2017 5:10 PM, Martin Vignali wrote: > 2017-12-03 21:28 GMT+01:00 Martin Vignali <martin.vign...@gmail.com>: > >> >> >> 2017-12-03 21:15 GMT+01:00 James Darnley <james.darn...@gmail.com>: >> >>> On 2017-12-03 19:30, Martin Vignali wrote: >>>> libavfilter/x86/vf_threshold.asm | 19 ++++++++++++++----- >>>> libavfilter/x86/vf_threshold_init.c | 34 >>> ++++++++++++++++++++-------------- >>>> 2 files changed, 34 insertions(+), 19 deletions(-) >>>> >>>> diff --git a/libavfilter/x86/vf_threshold.asm >>> b/libavfilter/x86/vf_threshold.asm >>>> index fb008c376a..7b929c6bd2 100644 >>>> --- a/libavfilter/x86/vf_threshold.asm >>>> +++ b/libavfilter/x86/vf_threshold.asm >>>> @@ -27,14 +27,21 @@ >>>> SECTION_RODATA >>>> >>>> pb_128: times 16 db 128 >>>> +pb_128_0 : times 16 dw 32768 >>> >>> No. Please use db and the values you want. >>> >>> I assume this is supposed to be "times 8 db 0, 128". >> >> >> > Hello, > > new patch in attach (you're right, it's "times 8 db 0, 128") > > > Martin > From ac91cb26724b6e8fe294e0bf9ad2dd17fe0eada9 Mon Sep 17 00:00:00 2001 > From: Martin Vignali <martin.vign...@gmail.com> > Date: Thu, 7 Dec 2017 21:06:43 +0100 > Subject: [PATCH 1/2] avfilter/x86/vf_threshold : add threshold16 SIMD (SSE4 > and AVX2) > > --- > libavfilter/x86/vf_threshold.asm | 19 +++++++++++++------ > libavfilter/x86/vf_threshold_init.c | 34 ++++++++++++++++++++-------------- > 2 files changed, 33 insertions(+), 20 deletions(-) > > diff --git a/libavfilter/x86/vf_threshold.asm > b/libavfilter/x86/vf_threshold.asm > index 56a6c242d8..dc42cd4971 100644 > --- a/libavfilter/x86/vf_threshold.asm > +++ b/libavfilter/x86/vf_threshold.asm > @@ -25,12 +25,14 @@ > SECTION_RODATA > > pb_128: times 16 db 128 > +pb_128_0 : times 8 db 0, 128 > > SECTION .text > > -%macro THRESHOLD_8 0 > +;%1 depth (8 or 16) ; %2 b or w ; %3 constant > +%macro THRESHOLD 3 > %if ARCH_X86_64 > -cglobal threshold8, 10, 13, 5, in, threshold, min, max, out, ilinesize, > tlinesize, flinesize, slinesize, olinesize, w, h, x > +cglobal threshold%1, 10, 13, 5, in, threshold, min, max, out, ilinesize, > tlinesize, flinesize, slinesize, olinesize, w, h, x > mov wd, dword wm > mov hd, dword hm > %else
You should also change the cglobal line for x86_32, right below this else > @@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, > w, x > %define olinesizeq r9mp > %define hd r11mp > %endif > - VBROADCASTI128 m4, [pb_128] > + VBROADCASTI128 m4, [%3] > +%if %1 == 16 > + add wq, wq ; w *= 2 (16 bits instead of 8) > +%endif > add inq, wq > add thresholdq, wq > add minq, wq > @@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, > w, x > movu m3, [maxq + xq] > pxor m0, m4 > pxor m1, m4 > - pcmpgtb m0, m1 > + pcmpgt%2 m0, m1 > PBLENDVB m3, m2, m0 > movu [outq + xq], m3 > add xq, mmsize > @@ -77,9 +82,11 @@ RET > %endmacro > > INIT_XMM sse4 > -THRESHOLD_8 > +THRESHOLD 8, b, pb_128 > +THRESHOLD 16, w, pb_128_0 > > %if HAVE_AVX2_EXTERNAL > INIT_YMM avx2 > -THRESHOLD_8 > +THRESHOLD 8, b, pb_128 > +THRESHOLD 16, w, pb_128_0 > %endif > diff --git a/libavfilter/x86/vf_threshold_init.c > b/libavfilter/x86/vf_threshold_init.c > index db0559533d..8e42296791 100644 > --- a/libavfilter/x86/vf_threshold_init.c > +++ b/libavfilter/x86/vf_threshold_init.c > @@ -23,20 +23,19 @@ > #include "libavutil/x86/cpu.h" > #include "libavfilter/threshold.h" > > -void ff_threshold8_sse4(const uint8_t *in, const uint8_t *threshold, > - const uint8_t *min, const uint8_t *max, > - uint8_t *out, > - ptrdiff_t ilinesize, ptrdiff_t tlinesize, > - ptrdiff_t flinesize, ptrdiff_t slinesize, > - ptrdiff_t olinesize, > - int w, int h); > -void ff_threshold8_avx2(const uint8_t *in, const uint8_t *threshold, > - const uint8_t *min, const uint8_t *max, > - uint8_t *out, > - ptrdiff_t ilinesize, ptrdiff_t tlinesize, > - ptrdiff_t flinesize, ptrdiff_t slinesize, > - ptrdiff_t olinesize, > - int w, int h); > +#define THRESHOLD_FUNC(depth, opt) \ > +void ff_threshold##depth##_##opt(const uint8_t *in, const uint8_t > *threshold,\ > + const uint8_t *min, const uint8_t *max, \ > + uint8_t *out, \ > + ptrdiff_t ilinesize, ptrdiff_t tlinesize, \ > + ptrdiff_t flinesize, ptrdiff_t slinesize, \ > + ptrdiff_t olinesize, \ > + int w, int h); > + > +THRESHOLD_FUNC(8, sse4) > +THRESHOLD_FUNC(8, avx2) > +THRESHOLD_FUNC(16, sse4) > +THRESHOLD_FUNC(16, avx2) > > av_cold void ff_threshold_init_x86(ThresholdContext *s) > { > @@ -49,5 +48,12 @@ av_cold void ff_threshold_init_x86(ThresholdContext *s) > if (EXTERNAL_AVX2_FAST(cpu_flags)) { > s->threshold = ff_threshold8_avx2; > } > + } else if (s->depth == 16) { > + if (EXTERNAL_SSE4(cpu_flags)) { > + s->threshold = ff_threshold16_sse4; > + } > + if (EXTERNAL_AVX2_FAST(cpu_flags)) { > + s->threshold = ff_threshold16_avx2; > + } > } > } > -- > 2.11.0 (Apple Git-81) > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel