On Tue, Sep 28, 2021 at 6:38 PM chen <chenm...@163.com> wrote: > Hello, > > > Excuse me, how about FMADD on AVX2 platform? > > > For example > + mulps m7, m7, m14 > + addps m0, m0, m7 > > ==> > > > fmadd231ps m0,m7,m14 > > Interesting, does having AVX2 guarantee having FMA instructions?
> > Regards, > Min Chen > > > 2021-09-29 09:18:05,mindm...@gmail.com > >From: Mark Reid <mindm...@gmail.com> > > > >Only supports float and 16bit planer formats at the momoment. > >Mainly focused on AVX and AVX2 optimizations, but SSE2 does seem offer > some > >speed gains. > > > >f32 1920x1080 1 thread with prelut > >c impl > >1389936500 UNITS in lut3d->interp, 1 runs, 0 skips > >1425800240 UNITS in lut3d->interp, 2 runs, 0 skips > >1433312777 UNITS in lut3d->interp, 4 runs, 0 skips > >1443346798 UNITS in lut3d->interp, 8 runs, 0 skips > > > >sse2 > >948662320 UNITS in lut3d->interp, 1 runs, 0 skips > >1101247540 UNITS in lut3d->interp, 2 runs, 0 skips > >1050645695 UNITS in lut3d->interp, 4 runs, 0 skips > >1041102937 UNITS in lut3d->interp, 8 runs, 0 skips > > > >avx > >633837000 UNITS in lut3d->interp, 1 runs, 0 skips > >669452850 UNITS in lut3d->interp, 2 runs, 0 skips > >650716580 UNITS in lut3d->interp, 4 runs, 0 skips > >644698550 UNITS in lut3d->interp, 8 runs, 0 skips > > > >avx2 > >354940020 UNITS in lut3d->interp, 1 runs, 0 skips > >362384340 UNITS in lut3d->interp, 2 runs, 0 skips > >356799020 UNITS in lut3d->interp, 4 runs, 0 skips > >357276815 UNITS in lut3d->interp, 8 runs, 0 skips > > > >gbrap16 1920x1080 1 thread with prelut > >c impl > >1445071160 UNITS in lut3d->interp, 1 runs, 0 skips > >1477959120 UNITS in lut3d->interp, 2 runs, 0 skips > >1472102670 UNITS in lut3d->interp, 4 runs, 0 skips > >1462579330 UNITS in lut3d->interp, 8 runs, 0 skips > > > >sse2 > >1035437580 UNITS in lut3d->interp, 1 runs, 0 skips > >1050139710 UNITS in lut3d->interp, 2 runs, 0 skips > >1070147205 UNITS in lut3d->interp, 4 runs, 0 skips > >1064583037 UNITS in lut3d->interp, 8 runs, 0 skips > > > >avx > >678089880 UNITS in lut3d->interp, 1 runs, 0 skips > >679112485 UNITS in lut3d->interp, 2 runs, 0 skips > >695527212 UNITS in lut3d->interp, 4 runs, 0 skips > >691300053 UNITS in lut3d->interp, 8 runs, 0 skips > > > >avx2 > >372671340 UNITS in lut3d->interp, 1 runs, 0 skips > >373449870 UNITS in lut3d->interp, 2 runs, 0 skips > >383725625 UNITS in lut3d->interp, 4 runs, 0 skips > >382860848 UNITS in lut3d->interp, 8 runs, 0 skips > > > >--- > > libavfilter/lut3d.h | 83 ++++ > > libavfilter/vf_lut3d.c | 61 +-- > > libavfilter/x86/Makefile | 2 + > > libavfilter/x86/vf_lut3d.asm | 757 ++++++++++++++++++++++++++++++++ > > libavfilter/x86/vf_lut3d_init.c | 88 ++++ > > 5 files changed, 935 insertions(+), 56 deletions(-) > > create mode 100644 libavfilter/lut3d.h > > create mode 100644 libavfilter/x86/vf_lut3d.asm > > create mode 100644 libavfilter/x86/vf_lut3d_init.c > > > >diff --git a/libavfilter/lut3d.h b/libavfilter/lut3d.h > >new file mode 100644 > >index 0000000000..ded2a036a5 > >--- /dev/null > >+++ b/libavfilter/lut3d.h > >@@ -0,0 +1,83 @@ > >+/* > >+ * Copyright (c) 2013 Clément Bœsch > >+ * Copyright (c) 2018 Paul B Mahol > >+ * > >+ * This file is part of FFmpeg. > >+ * > >+ * FFmpeg is free software; you can redistribute it and/or > >+ * modify it under the terms of the GNU Lesser General Public > >+ * License as published by the Free Software Foundation; either > >+ * version 2.1 of the License, or (at your option) any later version. > >+ * > >+ * FFmpeg is distributed in the hope that it will be useful, > >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of > >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > >+ * Lesser General Public License for more details. > >+ * > >+ * You should have received a copy of the GNU Lesser General Public > >+ * License along with FFmpeg; if not, write to the Free Software > >+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > >+ */ > >+#ifndef AVFILTER_LUT3D_H > >+#define AVFILTER_LUT3D_H > >+ > >+#include "libavutil/pixdesc.h" > >+#include "framesync.h" > >+#include "avfilter.h" > >+ > >+enum interp_mode { > >+ INTERPOLATE_NEAREST, > >+ INTERPOLATE_TRILINEAR, > >+ INTERPOLATE_TETRAHEDRAL, > >+ INTERPOLATE_PYRAMID, > >+ INTERPOLATE_PRISM, > >+ NB_INTERP_MODE > >+}; > >+ > >+struct rgbvec { > >+ float r, g, b; > >+}; > >+ > >+/* 3D LUT don't often go up to level 32, but it is common to have a Hald > CLUT > >+ * of 512x512 (64x64x64) */ > >+#define MAX_LEVEL 256 > >+#define PRELUT_SIZE 65536 > >+ > >+typedef struct Lut3DPreLut { > >+ int size; > >+ float min[3]; > >+ float max[3]; > >+ float scale[3]; > >+ float* lut[3]; > >+} Lut3DPreLut; > >+ > >+typedef struct LUT3DContext { > >+ const AVClass *class; > >+ struct rgbvec *lut; > >+ int lutsize; > >+ int lutsize2; > >+ struct rgbvec scale; > >+ int interpolation; ///<interp_mode > >+ char *file; > >+ uint8_t rgba_map[4]; > >+ int step; > >+ avfilter_action_func *interp; > >+ Lut3DPreLut prelut; > >+#if CONFIG_HALDCLUT_FILTER > >+ uint8_t clut_rgba_map[4]; > >+ int clut_step; > >+ int clut_bits; > >+ int clut_planar; > >+ int clut_float; > >+ int clut_width; > >+ FFFrameSync fs; > >+#endif > >+} LUT3DContext; > >+ > >+typedef struct ThreadData { > >+ AVFrame *in, *out; > >+} ThreadData; > >+ > >+void ff_lut3d_init_x86(LUT3DContext *s, const AVPixFmtDescriptor *desc); > >+ > >+#endif /* AVFILTER_LUT3D_H */ > >\ No newline at end of file > >diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c > >index 9fbda833b9..1fd0af06db 100644 > >--- a/libavfilter/vf_lut3d.c > >+++ b/libavfilter/vf_lut3d.c > >@@ -31,73 +31,18 @@ > > #include "libavutil/intreadwrite.h" > > #include "libavutil/intfloat.h" > > #include "libavutil/avassert.h" > >-#include "libavutil/pixdesc.h" > > #include "libavutil/avstring.h" > >-#include "avfilter.h" > > #include "drawutils.h" > > #include "formats.h" > >-#include "framesync.h" > > #include "internal.h" > > #include "video.h" > >+#include "lut3d.h" > > > > #define R 0 > > #define G 1 > > #define B 2 > > #define A 3 > > > >-enum interp_mode { > >- INTERPOLATE_NEAREST, > >- INTERPOLATE_TRILINEAR, > >- INTERPOLATE_TETRAHEDRAL, > >- INTERPOLATE_PYRAMID, > >- INTERPOLATE_PRISM, > >- NB_INTERP_MODE > >-}; > >- > >-struct rgbvec { > >- float r, g, b; > >-}; > >- > >-/* 3D LUT don't often go up to level 32, but it is common to have a Hald > CLUT > >- * of 512x512 (64x64x64) */ > >-#define MAX_LEVEL 256 > >-#define PRELUT_SIZE 65536 > >- > >-typedef struct Lut3DPreLut { > >- int size; > >- float min[3]; > >- float max[3]; > >- float scale[3]; > >- float* lut[3]; > >-} Lut3DPreLut; > >- > >-typedef struct LUT3DContext { > >- const AVClass *class; > >- int interpolation; ///<interp_mode > >- char *file; > >- uint8_t rgba_map[4]; > >- int step; > >- avfilter_action_func *interp; > >- struct rgbvec scale; > >- struct rgbvec *lut; > >- int lutsize; > >- int lutsize2; > >- Lut3DPreLut prelut; > >-#if CONFIG_HALDCLUT_FILTER > >- uint8_t clut_rgba_map[4]; > >- int clut_step; > >- int clut_bits; > >- int clut_planar; > >- int clut_float; > >- int clut_width; > >- FFFrameSync fs; > >-#endif > >-} LUT3DContext; > >- > >-typedef struct ThreadData { > >- AVFrame *in, *out; > >-} ThreadData; > >- > > #define OFFSET(x) offsetof(LUT3DContext, x) > > #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM > > #define TFLAGS > AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_RUNTIME_PARAM > >@@ -1207,6 +1152,10 @@ static int config_input(AVFilterLink *inlink) > > av_assert0(0); > > } > > > >+ if (ARCH_X86) { > >+ ff_lut3d_init_x86(lut3d, desc); > >+ } > >+ > > return 0; > > } > > > >diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile > >index 016a5b3511..a29941eaeb 100644 > >--- a/libavfilter/x86/Makefile > >+++ b/libavfilter/x86/Makefile > >@@ -17,6 +17,7 @@ OBJS-$(CONFIG_HQDN3D_FILTER) += > x86/vf_hqdn3d_init.o > > OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o > > OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_tinterlace_init.o > > OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o > >+OBJS-$(CONFIG_LUT3D_FILTER) += x86/vf_lut3d_init.o > > OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += x86/vf_maskedclamp_init.o > > OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge_init.o > > OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o > >@@ -57,6 +58,7 @@ X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += > x86/vf_hqdn3d.o > > X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o > > X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o > > X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o > >+X86ASM-OBJS-$(CONFIG_LUT3D_FILTER) += x86/vf_lut3d.o > > X86ASM-OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += x86/vf_maskedclamp.o > > X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge.o > > X86ASM-OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay.o > >diff --git a/libavfilter/x86/vf_lut3d.asm b/libavfilter/x86/vf_lut3d.asm > >new file mode 100644 > >index 0000000000..b3d7c3962b > >--- /dev/null > >+++ b/libavfilter/x86/vf_lut3d.asm > >@@ -0,0 +1,757 @@ > > >+;***************************************************************************** > >+;* x86-optimized functions for lut3d filter > >+;* > >+;* Copyright (c) 2021 Mark Reid <mindm...@gmail.com> > >+;* > >+;* This file is part of FFmpeg. > >+;* > >+;* FFmpeg is free software; you can redistribute it and/or > >+;* modify it under the terms of the GNU Lesser General Public > >+;* License as published by the Free Software Foundation; either > >+;* version 2.1 of the License, or (at your option) any later version. > >+;* > >+;* FFmpeg is distributed in the hope that it will be useful, > >+;* but WITHOUT ANY WARRANTY; without even the implied warranty of > >+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > >+;* Lesser General Public License for more details. > >+;* > >+;* You should have received a copy of the GNU Lesser General Public > >+;* License along with FFmpeg; if not, write to the Free Software > >+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > > >+;****************************************************************************** > >+ > >+%include "libavutil/x86/x86util.asm" > >+ > >+SECTION_RODATA > >+pd_1f: times 8 dd 1.0 > >+pd_3f: times 8 dd 3.0 > >+ > >+; used to limit rshifts as they are more expensive in avx1 > >+pd_001: times 8 dd 001b > >+pd_010: times 8 dd 010b > >+pd_100: times 8 dd 100b > >+ > >+pd_65535f: times 8 dd 65535.0 > >+pd_65535_invf: times 8 dd 0x37800080 ;1.0/65535.0 > >+ > >+pb_shuffle16: db 0, 1, 0x80, 0x80, \ > >+ 2, 3, 0x80, 0x80, \ > >+ 4, 5, 0x80, 0x80, \ > >+ 6, 7, 0x80, 0x80 > >+ > >+pb_lo_pack_shuffle16: db 0, 1, 4, 5, \ > >+ 8, 9, 12, 13, \ > >+ 0x80, 0x80, 0x80, 0x80, \ > >+ 0x80, 0x80, 0x80, 0x80 > >+ > >+pb_hi_pack_shuffle16: db 0x80, 0x80, 0x80, 0x80, \ > >+ 0x80, 0x80, 0x80, 0x80, \ > >+ 0, 1, 4, 5, \ > >+ 8, 9, 12, 13 > >+ > >+; tetrahedral table -------------------------------------------- > >+; name: x2| x1| x0| cxxb| cxxa > >+; values: r 00| r 00| r 00| c011 011| c001 001 > >+; g 01| g 01| g 01| c101 101| c010 010 > >+; b 10| b 10| b 10| c110 110| c100 100 > >+ > >+; g>b b | g | r | > c110 | c100 > >+pd_tetra_table0: times 8 dd (10b << 10) | (01b << 8) | (00b << 6) | > (110b << 3) | 100b > >+; r>b g | b | r | > c101 | c100 > >+pd_tetra_table1: times 8 dd (01b << 10) | (10b << 8) | (00b << 6) | > (101b << 3) | 100b > >+; else g | r | b | > c101 | c001 > >+pd_tetra_table2: times 8 dd (01b << 10) | (00b << 8) | (10b << 6) | > (101b << 3) | 001b > >+; b>g r | g | b | > c011 | c001 > >+pd_tetra_table3: times 8 dd (00b << 10) | (01b << 8) | (10b << 6) | > (011b << 3) | 001b > >+; b>r r | b | g | > c011 | c010 > >+pd_tetra_table4: times 8 dd (00b << 10) | (10b << 8) | (01b << 6) | > (011b << 3) | 010b > >+; else b | r | g | > c110 | c010 > >+pd_tetra_table5: times 8 dd (10b << 10) | (00b << 8) | (01b << 6) | > (110b << 3) | 010b > >+ > >+SECTION .text > >+ > >+struc Lut3DPreLut > >+ .size: resd 1 > >+ .min: resd 3 > >+ .max: resd 3 > >+ .scale: resd 3 > >+ .lut: resq 3 > >+endstruc > >+ > >+struc LUT3DContext > >+ .class: resq 1 > >+ .lut: resq 1 > >+ .lutsize: resd 1 > >+ .lutsize2: resd 1 > >+ .scale: resd 3 > >+endstruc > >+ > >+%define AV_NUM_DATA_POINTERS 8 > >+ > >+struc AVFrame > >+ .data: resq AV_NUM_DATA_POINTERS > >+ .linesize: resd AV_NUM_DATA_POINTERS > >+ .extended_data: resq 1 > >+ .width: resd 1 > >+ .height: resd 1 > >+endstruc > >+ > >+%define rm rsp > >+%define gm rsp+mmsize > >+%define bm rsp+(mmsize*2) > >+ > >+%define lut3dsizem [rsp+mmsize*3] > >+%define lut3dsize2m [rsp+mmsize*4] > >+%define lut3dmaxm [rsp+mmsize*5] > >+%define prelutmaxm [rsp+mmsize*6] > >+ > >+%define scalerm [rsp+mmsize*7] > >+%define scalegm [rsp+mmsize*8] > >+%define scalebm [rsp+mmsize*9] > >+ > >+%define prelutminrm [rsp+mmsize*10] > >+%define prelutmingm [rsp+mmsize*11] > >+%define prelutminbm [rsp+mmsize*12] > >+ > >+%define prelutscalerm [rsp+mmsize*13] > >+%define prelutscalegm [rsp+mmsize*14] > >+%define prelutscalebm [rsp+mmsize*15] > >+ > >+; data pointers > >+%define srcrm [rsp+mmsize*16 + 0] > >+%define srcgm [rsp+mmsize*16 + 8] > >+%define srcbm [rsp+mmsize*16 + 16] > >+%define srcam [rsp+mmsize*16 + 24] > >+ > >+%define dstrm [rsp+mmsize*16 + 32] > >+%define dstgm [rsp+mmsize*16 + 40] > >+%define dstbm [rsp+mmsize*16 + 48] > >+%define dstam [rsp+mmsize*16 + 56] > >+ > >+%macro FETCH_PRELUT_PN 3 > >+ mov tmp2d, [rm + %3] > >+ mov tmp3d, [gm + %3] > >+ movss xm%1, [tmpq + tmp2q*4] > >+ movss xm%2, [tmpq + tmp3q*4] > >+ movss [rm + %3], xm%1 > >+ movss [gm + %3], xm%2 > >+%endmacro > >+ > >+; 1 - p > >+; 2 - n > >+; 3 - p indices > >+; 4 - n indices > >+%macro GATHER_PRELUT 4 > >+ %if cpuflag(avx2) > >+ vpcmpeqb m7, m7 > >+ vgatherdps m%1, [tmpq + m%3*4], m7 ; p > >+ vpcmpeqb m9, m9 > >+ vgatherdps m%2, [tmpq + m%4*4], m9 ; n > >+ %else > >+ mova [rm], m%3 > >+ mova [gm], m%4 > >+ FETCH_PRELUT_PN %1, %2, 0 > >+ FETCH_PRELUT_PN %1, %2, 4 > >+ FETCH_PRELUT_PN %1, %2, 8 > >+ FETCH_PRELUT_PN %1, %2, 12 > >+ %if mmsize > 16 > >+ FETCH_PRELUT_PN %1, %2, 16 > >+ FETCH_PRELUT_PN %1, %2, 20 > >+ FETCH_PRELUT_PN %1, %2, 24 > >+ FETCH_PRELUT_PN %1, %2, 28 > >+ %endif > >+ movu m%1, [rm] > >+ movu m%2, [gm] > >+ %endif > >+%endmacro > >+ > >+%macro FLOORPS 2 > >+ %if mmsize > 16 > >+ vroundps %1, %2, 0x01 > >+ %else > >+ cvttps2dq %1, %2 > >+ cvtdq2ps %1, %1 > >+ %endif > >+%endmacro > >+ > >+; 1 - dst > >+; 2 - index > >+; 3 - min > >+; 4 - scale > >+; assumes lut max m13, m14 1.0f, zero m15 > >+%macro APPLY_PRELUT 4 > >+ ; scale > >+ subps m5, m%1, %3 ; v - min > >+ mulps m5, m5, %4 ; v * scale > >+ ; clamp > >+ maxps m5, m5, m15 ; max zero > >+ minps m5, m5, m13 ; min lut max > >+ > >+ FLOORPS m3, m5 ; prev index > >+ subps m5, m5, m3 ; d > >+ addps m4, m3, m14 ; p+1 = n index > >+ minps m4, m4, m13 ; clamp n idex > >+ > >+ mov tmpq, [prelutq + Lut3DPreLut.lut + %2*8] > >+ cvttps2dq m6, m3 > >+ cvttps2dq m10, m4 > >+ GATHER_PRELUT 3, 4, 6, 10 > >+ > >+ ; lerp > >+ subps m8, m4, m3 > >+ mulps m8, m8, m5 > >+ addps m%1, m8, m3 > >+%endmacro > >+ > >+; 1 - dst > >+; 2 - scale > >+; assumes lut max m13, zero m15 > >+%macro APPLY_SCALE 2 > >+ mulps m%1, m%1, %2 > >+ maxps m%1, m%1, m15 > >+ minps m%1, m%1, m13 > >+%endmacro > >+ > >+%macro BLEND 4 > >+%if mmsize > 16 > >+ vblendvps %1, %2, %3, %4 > >+%else > >+ %ifidni %1,%2 > >+ %error operand 1 must not equal operand 2 > >+ %endif > >+ %ifidni %1,%3 > >+ %error operand 1 must not equal operand 3 > >+ %endif > >+ mova %1, %2 > >+ xorps %1, %3 > >+ andps %1, %4 > >+ xorps %1, %2 > >+%endif > >+%endmacro > >+ > >+; sets nans to zere, +inf -inf handled later by min/max clamps > >+%macro SANITIZE_F 1 > >+ cmpps m5, %1, %1, 0x0 ; nan == nan = False > >+ %if mmsize <= 16 > >+ mova m6, %1 > >+ BLEND %1, m15, m6, m5 > >+ %else > >+ BLEND %1, m15, %1, m5 > >+ %endif > >+%endmacro > >+ > >+%macro ADD3 4 > >+ addps %1, %2, %3 > >+ addps %1, %1, %4 > >+%endmacro > >+ > >+%macro CMP_EQUAL 3 > >+%if cpuflag(avx2) > >+ vpcmpeqd %1, %2, %3 > >+%elif cpuflag(avx) > >+ cmpps %1, %2, %3, 0x0 > >+%else > >+ pcmpeqd %1, %2, %3 > >+%endif > >+%endmacro > >+ > >+%macro SHIFT_RIGHT 2 > >+%if mmsize <= 16 > >+ psrld xm%1, %2 > >+%elif cpuflag(avx2) > >+ vpsrld m%1, m%1, %2 > >+%else > >+ vextractf128 xm15, m%1, 1 > >+ psrld xm%1, %2 > >+ psrld xm15, %2 > >+ vinsertf128 m%1, m%1, xm15, 1 > >+%endif > >+%endmacro > >+ > >+%macro FETCH_LUT3D_RGB 4 > >+ mov tmp2d, [rm + %4] > >+ movss xm%1, [tmpq + tmp2q*4 + 0] > >+ movss xm%2, [tmpq + tmp2q*4 + 4] > >+ movss xm%3, [tmpq + tmp2q*4 + 8] > >+ movss [rm + %4], xm%1 > >+ movss [gm + %4], xm%2 > >+ movss [bm + %4], xm%3 > >+%endmacro > >+ > >+; 1 - dstr > >+; 2 - dstg > >+; 3 - dstb > >+; 4 - indices > >+%macro GATHER_LUT3D_INDICES 4 > >+%if cpuflag(avx2) > >+ vpcmpeqb m3, m3 > >+ vgatherdps m%1, [tmpq + m%4*4 + 0], m3 > >+ vpcmpeqb m14, m14 > >+ vgatherdps m%2, [tmpq + m%4*4 + 4], m14 > >+ vpcmpeqb m15, m15 > >+ vgatherdps m%3, [tmpq + m%4*4 + 8], m15 > >+%else > >+ movu [rm], m%4 > >+ FETCH_LUT3D_RGB %1, %2, %3, 0 > >+ FETCH_LUT3D_RGB %1, %2, %3, 4 > >+ FETCH_LUT3D_RGB %1, %2, %3, 8 > >+ FETCH_LUT3D_RGB %1, %2, %3, 12 > >+%if mmsize > 16 > >+ FETCH_LUT3D_RGB %1, %2, %3, 16 > >+ FETCH_LUT3D_RGB %1, %2, %3, 20 > >+ FETCH_LUT3D_RGB %1, %2, %3, 24 > >+ FETCH_LUT3D_RGB %1, %2, %3, 28 > >+%endif > >+ movu m%1, [rm] > >+ movu m%2, [gm] > >+ movu m%3, [bm] > >+%endif > >+%endmacro > >+ > >+%macro interp_tetrahedral 0 > >+ %define d_r m0 > >+ %define d_g m1 > >+ %define d_b m2 > >+ > >+ %define prev_r m3 > >+ %define prev_g m4 > >+ %define prev_b m5 > >+ > >+ %define next_r m6 > >+ %define next_g m7 > >+ %define next_b m8 > >+ > >+ %define x0 m4 > >+ %define x1 m5 > >+ %define x2 m6 > >+ > >+ ; setup prev index > >+ FLOORPS prev_r, m0 > >+ FLOORPS prev_g, m1 > >+ FLOORPS prev_b, m2 > >+ > >+ ; setup deltas > >+ subps d_r, m0, prev_r > >+ subps d_g, m1, prev_g > >+ subps d_b, m2, prev_b > >+ > >+ ; calculate select mask m9 > >+ movu m6, [pd_tetra_table2] > >+ cmpps m7, d_r, d_b, 0x1E ; r > b CMP_GT_OQ > >+ BLEND m10, m6, [pd_tetra_table1], m7 > >+ cmpps m7, d_g, d_b, 0x1E ; g > b CMP_GT_OQ > >+ BLEND m6, m10, [pd_tetra_table0], m7 > >+ > >+ movu m10, [pd_tetra_table5] > >+ cmpps m7, d_b, d_r, 0x1E ; b > r CMP_GT_OQ > >+ BLEND m9, m10, [pd_tetra_table4], m7 > >+ cmpps m7, d_b, d_g, 0x1E ; b > g CMP_GT_OQ > >+ BLEND m10, m9, [pd_tetra_table3], m7 > >+ > >+ cmpps m7, d_r, d_g, 0x1E ; r > g CMP_GT_OQ > >+ BLEND m9, m10, m6, m7 > >+ > >+ ; setup next index > >+ addps next_r, prev_r, m14 ; +1 > >+ minps next_r, next_r, m13 ; clamp lutmax > >+ > >+ addps next_g, prev_g, m14 ; +1 > >+ minps next_g, next_g, m13 ; clamp lutmax > >+ > >+ addps next_b, prev_b, m14 ; +1 > >+ minps next_b, next_b, m13 ; clamp lutmax > >+ > >+ ; prescale indices > >+ mulps prev_r, prev_r, lut3dsize2m > >+ mulps next_r, next_r, lut3dsize2m > >+ > >+ mulps prev_g, prev_g, lut3dsizem > >+ mulps next_g, next_g, lut3dsizem > >+ > >+ mulps prev_b, prev_b, [pd_3f] > >+ mulps next_b, next_b, [pd_3f] > >+ > >+ movu m14, [pd_001] > >+ > >+ ; cxxa m10 > >+ ; b > >+ andps m15, m9, m14 > >+ CMP_EQUAL m15, m15, m14 > >+ BLEND m10, prev_b, next_b, m15 > >+ > >+ ; g > >+ andps m15, m9, [pd_010] > >+ CMP_EQUAL m15, m15, [pd_010] > >+ BLEND m12, prev_g, next_g, m15 > >+ > >+ ; r > >+ andps m15, m9, [pd_100] > >+ CMP_EQUAL m15, m15, [pd_100] > >+ BLEND m13, prev_r, next_r, m15 > >+ > >+ ADD3 m10, m10, m12, m13 > >+ > >+ SHIFT_RIGHT 9, 3 ; 3 > >+ > >+ ; cxxb m11; > >+ ; b > >+ andps m15, m9, m14 > >+ CMP_EQUAL m15, m15, m14 > >+ BLEND m11, prev_b, next_b, m15 > >+ > >+ ; g > >+ andps m15, m9, [pd_010] > >+ CMP_EQUAL m15, m15, [pd_010] > >+ BLEND m12, prev_g, next_g, m15 > >+ > >+ ; r > >+ andps m15, m9, [pd_100] > >+ CMP_EQUAL m15, m15, [pd_100] > >+ BLEND m13, prev_r, next_r, m15 > >+ > >+ ADD3 m11, m11, m12, m13 > >+ > >+ ; c000 m12; > >+ ADD3 m12, prev_r, prev_g, prev_b > >+ > >+ ; c111 m13; > >+ ADD3 m13, next_r, next_g, next_b > >+ > >+ SHIFT_RIGHT 9, 3 ; 6 > >+ > >+ ; x0, m4 > >+ andps m15, m9, m14 > >+ CMP_EQUAL m15, m15, m14 > >+ BLEND m7, d_r, d_g, m15 ; r,g > >+ > >+ andps m15, m9, [pd_010] > >+ CMP_EQUAL m15, m15, [pd_010] > >+ BLEND x0, m7, d_b, m15 ; b > >+ > >+ ; x1, m5 > >+ andps m15, m9, [pd_100] > >+ CMP_EQUAL m15, m15, [pd_100] > >+ BLEND m7, d_r, d_g, m15 ; r,g > >+ > >+ SHIFT_RIGHT 9, 3 ; 9 > >+ > >+ andps m15, m9, m14 > >+ CMP_EQUAL m15, m15, m14 > >+ BLEND x1, m7, d_b, m15 ; b > >+ > >+ ; x2, m6 > >+ andps m15, m9, [pd_010] > >+ CMP_EQUAL m15, m15, [pd_010] > >+ BLEND m7, d_r, d_g, m15 ; r,g > >+ > >+ andps m15, m9, [pd_100] > >+ CMP_EQUAL m15, m15, [pd_100] > >+ BLEND x2, m7, d_b, m15 ; b > >+ > >+ ; convert indices to integer > >+ cvttps2dq m12, m12 > >+ cvttps2dq m10, m10 > >+ cvttps2dq m11, m11 > >+ cvttps2dq m13, m13 > >+ > >+ ; now the gathering festival > >+ mov tmpq, [ctxq + LUT3DContext.lut] > >+ > >+ GATHER_LUT3D_INDICES 0, 1, 2, 12 > >+ movu m14, [pd_1f] > >+ subps m14, m14, x0; 1 - x0 > >+ > >+ mulps m0, m0, m14 > >+ mulps m1, m1, m14 > >+ mulps m2, m2, m14 > >+ > >+ GATHER_LUT3D_INDICES 7, 8, 9, 10 > >+ subps m14, x0, x1; x0 - x1 > >+ mulps m7, m7, m14 > >+ addps m0, m0, m7 > >+ > >+ mulps m8, m8, m14 > >+ addps m1, m1, m8 > >+ > >+ mulps m9, m9, m14 > >+ addps m2, m2, m9 > >+ > >+ GATHER_LUT3D_INDICES 7, 8, 9, 11 > >+ subps m14, x1, x2; x1 - x2 > >+ > >+ mulps m7, m7, m14 > >+ addps m0, m0, m7 > >+ > >+ mulps m8, m8, m14 > >+ addps m1, m1, m8 > >+ > >+ mulps m9, m9, m14 > >+ addps m2, m2, m9 > >+ > >+ GATHER_LUT3D_INDICES 7, 8, 9, 13 > >+ mulps m7, m7, x2 > >+ addps m0, m0, m7 > >+ > >+ mulps m8, m8, x2 > >+ addps m1, m1, m8 > >+ > >+ mulps m9, m9, x2 > >+ addps m2, m2, m9 > >+%endmacro > >+ > >+%macro INIT_DATA_PTR 3 > >+ mov ptrq, [%2 + AVFrame.data + %3 * 8] > >+ mov tmpd, [%2 + AVFrame.linesize + %3 * 4] > >+ imul tmpd, slice_startd > >+ add ptrq, tmpq > >+ mov %1, ptrq > >+%endmacro > >+ > >+%macro INC_DATA_PTR 3 > >+ mov tmpd, [%2 + AVFrame.linesize + %3 * 4] > >+ mov ptrq, %1 > >+ add ptrq, tmpq > >+ mov %1, ptrq > >+%endmacro > >+ > >+%macro LOAD16 2 > >+ mov ptrq, %2 > >+ %if mmsize > 16 > >+ movu xm%1, [ptrq + xq*2] > >+ %else > >+ movsd xm%1, [ptrq + xq*2] > >+ %endif > >+ %if cpuflag(avx2) > >+ vpmovzxwd m%1, xm%1 > >+ %else > >+ %if mmsize > 16 > >+ pshufd xm4, xm%1, (1 << 6 | 0 << 4 | 3 << 2 | 2 << 0) > >+ pshufb xm%1, xm6 ; pb_shuffle16 > >+ pshufb xm4, xm6 ; pb_shuffle16 > >+ vinsertf128 m%1, m%1, xm4, 1 > >+ %else > >+ pshufd xm%1, xm%1, (3 << 6 | 1 << 4 | 3 << 2 | 0 << 0) > >+ pshuflw xm%1, xm%1, (2 << 6 | 1 << 4 | 2 << 2 | 0 << 0) > >+ pshufhw xm%1, xm%1, (2 << 6 | 1 << 4 | 2 << 2 | 0 << 0) > >+ %endif > >+ %endif > >+ cvtdq2ps m%1, m%1 > >+ mulps m%1, m%1, m7 ; pd_65535_invf > >+%endmacro > >+ > >+%macro STORE16 2 > >+ mulps m%2, m%2, m5 ; [pd_65535f] > >+ minps m%2, m%2, m5 ; [pd_65535f] > >+ maxps m%2, m%2, m15 ; zero > >+ cvttps2dq m%2, m%2 > >+ %if mmsize > 16 > >+ vextractf128 xm4, m%2, 1 > >+ pshufb xm%2, xm6 ; [pb_lo_pack_shuffle16] > >+ pshufb xm4, xm7 ; [pb_hi_pack_shuffle16] > >+ por xm%2, xm4 > >+ %else > >+ pshuflw xm%2, xm%2, (1 << 6 | 1 << 4 | 2 << 2 | 0 << 0) > >+ pshufhw xm%2, xm%2, (1 << 6 | 1 << 4 | 2 << 2 | 0 << 0) > >+ pshufd xm%2, xm%2, (3 << 6 | 3 << 4 | 2 << 2 | 0 << 0) > >+ %endif > >+ mov ptrq, %1 > >+ %if mmsize > 16 > >+ movu [ptrq + xq*2], xm%2 > >+ %else > >+ movsd [ptrq + xq*2], xm%2 > >+ %endif > >+%endmacro > >+ > >+; 1 - interp method > >+; 2 - format_name > >+; 3 - depth > >+; 4 - is float format > >+%macro DEFINE_INTERP_FUNC 4 > >+cglobal interp_%1_%2, 7, 13, 16, mmsize*16+(8*8), ctx, prelut, > src_image, dst_image, slice_start, slice_end, has_alpha, width, x, ptr, > tmp, tmp2, tmp3 > >+ ; store lut max and lutsize > >+ mov tmpd, dword [ctxq + LUT3DContext.lutsize] > >+ cvtsi2ss xm0, tmpd > >+ mulss xm0, xm0, [pd_3f] > >+ VBROADCASTSS m0, xm0 > >+ mova lut3dsizem, m0 > >+ sub tmpd, 1 > >+ cvtsi2ss xm0, tmpd > >+ VBROADCASTSS m0, xm0 > >+ mova lut3dmaxm, m0 > >+ > >+ ; scale_r > >+ mulss xm1, xm0, dword [ctxq + LUT3DContext.scale + 0*4] > >+ VBROADCASTSS m1, xm1 > >+ mova scalerm, m1 > >+ > >+ ; scale_g > >+ mulss xm1, xm0, dword [ctxq + LUT3DContext.scale + 1*4] > >+ VBROADCASTSS m1, xm1 > >+ mova scalegm, m1 > >+ > >+ ; scale_b > >+ mulss xm1, xm0, dword [ctxq + LUT3DContext.scale + 2*4] > >+ VBROADCASTSS m1, xm1 > >+ mova scalebm, m1 > >+ > >+ ; store lutsize2 > >+ cvtsi2ss xm0, dword [ctxq + LUT3DContext.lutsize2] > >+ mulss xm0, xm0, [pd_3f] > >+ VBROADCASTSS m0, xm0 > >+ mova lut3dsize2m, m0 > >+ > >+ ; init prelut values > >+ cmp prelutq, 0 > >+ je %%skip_init_prelut > >+ mov tmpd, dword [prelutq + Lut3DPreLut.size] > >+ sub tmpd, 1 > >+ cvtsi2ss xm0, tmpd > >+ VBROADCASTSS m0, xm0 > >+ mova prelutmaxm, m0 > >+ VBROADCASTSS m0, dword [prelutq + Lut3DPreLut.min + 0*4] > >+ mova prelutminrm, m0 > >+ VBROADCASTSS m0, dword [prelutq + Lut3DPreLut.min + 1*4] > >+ mova prelutmingm, m0 > >+ VBROADCASTSS m0, dword [prelutq + Lut3DPreLut.min + 2*4] > >+ mova prelutminbm, m0 > >+ VBROADCASTSS m0, dword [prelutq + Lut3DPreLut.scale + 0*4] > >+ mova prelutscalerm, m0 > >+ VBROADCASTSS m0, dword [prelutq + Lut3DPreLut.scale + 1*4] > >+ mova prelutscalegm, m0 > >+ VBROADCASTSS m0, dword [prelutq + Lut3DPreLut.scale + 2*4] > >+ mova prelutscalebm, m0 > >+ %%skip_init_prelut: > >+ > >+ mov widthd, [src_imageq + AVFrame.width] > >+ > >+ ; gbra pixel order > >+ INIT_DATA_PTR srcrm, src_imageq, 2 > >+ INIT_DATA_PTR srcgm, src_imageq, 0 > >+ INIT_DATA_PTR srcbm, src_imageq, 1 > >+ INIT_DATA_PTR srcam, src_imageq, 3 > >+ > >+ INIT_DATA_PTR dstrm, dst_imageq, 2 > >+ INIT_DATA_PTR dstgm, dst_imageq, 0 > >+ INIT_DATA_PTR dstbm, dst_imageq, 1 > >+ INIT_DATA_PTR dstam, dst_imageq, 3 > >+ > >+ %%loop_y: > >+ xor xq, xq > >+ %%loop_x: > >+ movu m14, [pd_1f] > >+ xorps m15, m15, m15 > >+ %if %4 ; float > >+ mov ptrq, srcrm > >+ movu m0, [ptrq + xq*4] > >+ mov ptrq, srcgm > >+ movu m1, [ptrq + xq*4] > >+ mov ptrq, srcbm > >+ movu m2, [ptrq + xq*4] > >+ SANITIZE_F m0 > >+ SANITIZE_F m1 > >+ SANITIZE_F m2 > >+ %else > >+ ; constants for LOAD16 > >+ movu m7, [pd_65535_invf] > >+ %if notcpuflag(avx2) && mmsize >= 32 > >+ movu xm6, [pb_shuffle16] > >+ %endif > >+ LOAD16 0, srcrm > >+ LOAD16 1, srcgm > >+ LOAD16 2, srcbm > >+ %endif > >+ > >+ cmp prelutq, 0 > >+ je %%skip_prelut > >+ mova m13, prelutmaxm > >+ APPLY_PRELUT 0, 0, prelutminrm, prelutscalerm > >+ APPLY_PRELUT 1, 1, prelutmingm, prelutscalegm > >+ APPLY_PRELUT 2, 2, prelutminbm, prelutscalebm > >+ %%skip_prelut: > >+ > >+ mova m13, lut3dmaxm > >+ APPLY_SCALE 0, scalerm > >+ APPLY_SCALE 1, scalegm > >+ APPLY_SCALE 2, scalebm > >+ > >+ interp_%1 > >+ > >+ %if %4 ; float > >+ mov ptrq, dstrm > >+ movu [ptrq + xq*4], m0 > >+ mov ptrq, dstgm > >+ movu [ptrq + xq*4], m1 > >+ mov ptrq, dstbm > >+ movu [ptrq + xq*4], m2 > >+ cmp has_alphad, 0 > >+ je %%skip_alphaf > >+ mov ptrq, srcam > >+ movu m0, [ptrq + xq*4] > >+ mov ptrq, dstam > >+ movu [ptrq + xq*4], m0 > >+ %%skip_alphaf: > >+ %else > >+ ; constants for STORE16 > >+ movu m5, [pd_65535f] > >+ %if mmsize > 16 > >+ movu xm6, [pb_lo_pack_shuffle16] > >+ movu xm7, [pb_hi_pack_shuffle16] > >+ %endif > >+ > >+ xorps m15, m15, m15 > >+ STORE16 dstrm, 0 > >+ STORE16 dstgm, 1 > >+ STORE16 dstbm, 2 > >+ > >+ cmp has_alphad, 0 > >+ je %%skip_alpha > >+ %if mmsize > 16 > >+ mov ptrq, srcam > >+ movu xm0, [ptrq + xq*2] > >+ mov ptrq, dstam > >+ movu [ptrq + xq*2], xm0 > >+ %else > >+ mov ptrq, srcam > >+ movsd xm0, [ptrq + xq*2] > >+ mov ptrq, dstam > >+ movsd [ptrq + xq*2], xm0 > >+ %endif > >+ > >+ %%skip_alpha: > >+ %endif > >+ > >+ add xq, mmsize/4 > >+ cmp xd, widthd > >+ jl %%loop_x > >+ > >+ INC_DATA_PTR srcrm, src_imageq, 2 > >+ INC_DATA_PTR srcgm, src_imageq, 0 > >+ INC_DATA_PTR srcbm, src_imageq, 1 > >+ INC_DATA_PTR srcam, src_imageq, 3 > >+ > >+ INC_DATA_PTR dstrm, dst_imageq, 2 > >+ INC_DATA_PTR dstgm, dst_imageq, 0 > >+ INC_DATA_PTR dstbm, dst_imageq, 1 > >+ INC_DATA_PTR dstam, dst_imageq, 3 > >+ > >+ inc slice_startd > >+ cmp slice_startd, slice_endd > >+ jl %%loop_y > >+ > >+ RET > >+%endmacro > >+%if ARCH_X86_64 > >+ %if HAVE_AVX2_EXTERNAL > >+ INIT_YMM avx2 > >+ DEFINE_INTERP_FUNC tetrahedral, pf32, 32, 1 > >+ DEFINE_INTERP_FUNC tetrahedral, p16, 16, 0 > >+ %endif > >+ %if HAVE_AVX_EXTERNAL > >+ INIT_YMM avx > >+ DEFINE_INTERP_FUNC tetrahedral, pf32, 32, 1 > >+ DEFINE_INTERP_FUNC tetrahedral, p16, 16, 0 > >+ %endif > >+ INIT_XMM sse2 > >+ DEFINE_INTERP_FUNC tetrahedral, pf32, 32, 1 > >+ DEFINE_INTERP_FUNC tetrahedral, p16, 16, 0 > >+%endif > >\ No newline at end of file > >diff --git a/libavfilter/x86/vf_lut3d_init.c > b/libavfilter/x86/vf_lut3d_init.c > >new file mode 100644 > >index 0000000000..9b9b36e4af > >--- /dev/null > >+++ b/libavfilter/x86/vf_lut3d_init.c > >@@ -0,0 +1,88 @@ > >+/* > >+ * Copyright (c) 2021 Mark Reid <mindm...@gmail.com> > >+ * > >+ * This file is part of FFmpeg. > >+ * > >+ * FFmpeg is free software; you can redistribute it and/or > >+ * modify it under the terms of the GNU Lesser General Public > >+ * License as published by the Free Software Foundation; either > >+ * version 2.1 of the License, or (at your option) any later version. > >+ * > >+ * FFmpeg is distributed in the hope that it will be useful, > >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of > >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > >+ * Lesser General Public License for more details. > >+ * > >+ * You should have received a copy of the GNU Lesser General Public > >+ * License along with FFmpeg; if not, write to the Free Software > >+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > >+ */ > >+ > >+#include "libavutil/attributes.h" > >+#include "libavutil/cpu.h" > >+#include "libavutil/x86/cpu.h" > >+#include "libavfilter/lut3d.h" > >+ > >+#define DEFINE_INTERP_FUNC(name, format, opt) > \ > >+void ff_interp_##name##_##format##_##opt(LUT3DContext *lut3d, > Lut3DPreLut *prelut, AVFrame *src, AVFrame *dst, int slice_start, int > slice_end, int has_alpha); \ > >+static int interp_##name##_##format##_##opt(AVFilterContext *ctx, void > *arg, int jobnr, int nb_jobs) > \ > >+{ > \ > >+ LUT3DContext *lut3d = ctx->priv; > > \ > >+ Lut3DPreLut *prelut = lut3d->prelut.size > 0? &lut3d->prelut: NULL; > \ > >+ ThreadData *td = arg; > \ > >+ AVFrame *in = td->in; > > \ > >+ AVFrame *out = td->out; > \ > >+ int has_alpha = in->linesize[3] && out != in; > \ > >+ int slice_start = (in->height * jobnr ) / nb_jobs; > \ > >+ int slice_end = (in->height * (jobnr+1)) / nb_jobs; > \ > >+ ff_interp_##name##_##format##_##opt(lut3d, prelut, in, out, > slice_start, slice_end, has_alpha); > \ > >+ return 0; > \ > >+} > >+ > >+#if ARCH_X86_64 > >+#if HAVE_AVX2_EXTERNAL > >+ DEFINE_INTERP_FUNC(tetrahedral, pf32, avx2) > >+ DEFINE_INTERP_FUNC(tetrahedral, p16, avx2) > >+#endif > >+#if HAVE_AVX_EXTERNAL > >+ DEFINE_INTERP_FUNC(tetrahedral, pf32, avx) > >+ DEFINE_INTERP_FUNC(tetrahedral, p16, avx) > >+#endif > >+ DEFINE_INTERP_FUNC(tetrahedral, pf32, sse2) > >+ DEFINE_INTERP_FUNC(tetrahedral, p16, sse2) > >+#endif > >+ > >+ > >+av_cold void ff_lut3d_init_x86(LUT3DContext *s, const AVPixFmtDescriptor > *desc) > >+{ > >+ int cpu_flags = av_get_cpu_flags(); > >+ int planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR; > >+ int isfloat = desc->flags & AV_PIX_FMT_FLAG_FLOAT; > >+ int depth = desc->comp[0].depth; > >+ > >+#if ARCH_X86_64 > >+ if (EXTERNAL_AVX2_FAST(cpu_flags) && s->interpolation == > INTERPOLATE_TETRAHEDRAL && planar) { > >+#if HAVE_AVX2_EXTERNAL > >+ if (isfloat && planar) { > >+ s->interp = interp_tetrahedral_pf32_avx2; > >+ } else if (depth == 16) { > >+ s->interp = interp_tetrahedral_p16_avx2; > >+ } > >+#endif > >+ } else if (EXTERNAL_AVX_FAST(cpu_flags) && s->interpolation == > INTERPOLATE_TETRAHEDRAL && planar) { > >+#if HAVE_AVX_EXTERNAL > >+ if (isfloat) { > >+ s->interp = interp_tetrahedral_pf32_avx; > >+ } else if (depth == 16) { > >+ s->interp = interp_tetrahedral_p16_avx; > >+ } > >+#endif > >+ } else if (EXTERNAL_SSE2(cpu_flags) && s->interpolation == > INTERPOLATE_TETRAHEDRAL && planar) { > >+ if (isfloat) { > >+ s->interp = interp_tetrahedral_pf32_sse2; > >+ } else if (depth == 16) { > >+ s->interp = interp_tetrahedral_p16_sse2; > >+ } > >+ } > >+#endif > >+} > >-- > >2.31.1.windows.1 > > > >_______________________________________________ > >ffmpeg-devel mailing list > >ffmpeg-devel@ffmpeg.org > >https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > >To unsubscribe, visit link above, or email > >ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".