On 9/9/19, Aman Gupta <ffm...@tmm1.net> wrote: > From: Aman Gupta <a...@tmm1.net> > > These are simple algorithms which can be run efficiently > on low powered devices to produce deinteraced images. > > Signed-off-by: Aman Gupta <a...@tmm1.net> > --- > doc/filters.texi | 27 ++ > libavfilter/Makefile | 1 + > libavfilter/aarch64/Makefile | 1 + > libavfilter/aarch64/merge_neon.S | 98 ++++++ > libavfilter/allfilters.c | 1 + > libavfilter/arm/Makefile | 3 + > libavfilter/arm/merge_armv6.S | 70 ++++ > libavfilter/arm/merge_neon.S | 109 ++++++ > libavfilter/vf_fastdeint.c | 588 +++++++++++++++++++++++++++++++ > 9 files changed, 898 insertions(+) > create mode 100644 libavfilter/aarch64/merge_neon.S > create mode 100644 libavfilter/arm/Makefile > create mode 100644 libavfilter/arm/merge_armv6.S > create mode 100644 libavfilter/arm/merge_neon.S > create mode 100644 libavfilter/vf_fastdeint.c > > diff --git a/doc/filters.texi b/doc/filters.texi > index 6c81e1da40..55d9adeb81 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -9796,6 +9796,33 @@ fade=t=in:st=5.5:d=0.5 > > @end itemize > > +@section fastdeint > +Fast deinterlacing algorithms. > + > +@table @option > +@item mode > +Deinterlacing algorithm to use. > + > +It accepts the following values: > +@table @samp > +@item discard > +Discard bottom frame. > + > +@item mean > +Half resolution blender. > + > +@item blend > +Full resolution blender. > + > +@item bob > +Bob doubler. > + > +@item linear > +Bob doubler with linear interpolation. > +@end table > + > +@end table > + > @section fftdnoiz > Denoise frames using 3D FFT (frequency domain filtering). > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index 3ef4191d9a..a2b3566ec0 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -234,6 +234,7 @@ OBJS-$(CONFIG_EROSION_OPENCL_FILTER) += > vf_neighbor_opencl.o opencl.o \ > opencl/neighbor.o > OBJS-$(CONFIG_EXTRACTPLANES_FILTER) += vf_extractplanes.o > OBJS-$(CONFIG_FADE_FILTER) += vf_fade.o > +OBJS-$(CONFIG_FASTDEINT_FILTER) += vf_fastdeint.o > OBJS-$(CONFIG_FFTDNOIZ_FILTER) += vf_fftdnoiz.o > OBJS-$(CONFIG_FFTFILT_FILTER) += vf_fftfilt.o > OBJS-$(CONFIG_FIELD_FILTER) += vf_field.o > diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile > index b58daa3a3f..2b0ad92893 100644 > --- a/libavfilter/aarch64/Makefile > +++ b/libavfilter/aarch64/Makefile > @@ -1,3 +1,4 @@ > OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o > > +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += aarch64/merge_neon.o > NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o > diff --git a/libavfilter/aarch64/merge_neon.S > b/libavfilter/aarch64/merge_neon.S > new file mode 100644 > index 0000000000..62377331a4 > --- /dev/null > +++ b/libavfilter/aarch64/merge_neon.S > @@ -0,0 +1,98 @@ > +/* > + * Copyright (c) 2009-2016 Rémi Denis-Courmont, Janne Grunau, VLC authors > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/aarch64/asm.S" > + > +#define dest x0 > +#define src1 x1 > +#define src2 x2 > +#define size x3 > + > + .align 2 > + // NOTE: Offset and pitch must be multiple of 16-bytes. > +function ff_merge8_neon, export=1 > + ands x5, size, #~63 > + b.eq 2f > + mov x10, #64 > + add x11, src1, #32 > + add x12, src2, #32 > +1: > + ld1 {v0.16b,v1.16b}, [src1], x10 > + ld1 {v4.16b,v5.16b}, [src2], x10 > + ld1 {v2.16b,v3.16b}, [x11], x10 > + uhadd v0.16b, v0.16b, v4.16b > + ld1 {v6.16b,v7.16b}, [x12], x10 > + subs x5, x5, #64 > + uhadd v1.16b, v1.16b, v5.16b > + uhadd v2.16b, v2.16b, v6.16b > + uhadd v3.16b, v3.16b, v7.16b > + st1 {v0.16b,v1.16b}, [dest], #32 > + st1 {v2.16b,v3.16b}, [dest], #32 > + b.gt 1b > +2: > + tbz size, #5, 3f > + ld1 {v0.16b,v1.16b}, [src1], #32 > + ld1 {v4.16b,v5.16b}, [src2], #32 > + uhadd v0.16b, v0.16b, v4.16b > + uhadd v1.16b, v1.16b, v5.16b > + st1 {v0.16b,v1.16b}, [dest], #32 > +3: > + tbz size, #4, 4f > + ld1 {v0.16b}, [src1] > + ld1 {v4.16b}, [src2] > + uhadd v0.16b, v0.16b, v4.16b > + st1 {v0.16b}, [dest] > +4: > + ret > +endfunc > + > + .align 2 > +function ff_merge16_neon, export=1 > + ands x5, size, #~63 > + b.eq 2f > +1: > + ld1 {v0.8h,v1.8h}, [src1], #32 > + ld1 {v4.8h,v5.8h}, [src2], #32 > + ld1 {v2.8h,v3.8h}, [src1], #32 > + uhadd v0.8h, v0.8h, v4.8h > + ld1 {v6.8h,v7.8h}, [src2], #32 > + uhadd v1.8h, v1.8h, v5.8h > + uhadd v2.8h, v2.8h, v6.8h > + uhadd v3.8h, v3.8h, v7.8h > + st1 {v0.8h,v1.8h}, [dest], #32 > + st1 {v2.8h,v3.8h}, [dest], #32 > + subs x5, x5, #64 > + b.gt 1b > +2: > + tbz size, #5, 3f > + ld1 {v0.8h,v1.8h}, [src1], #32 > + ld1 {v4.8h,v5.8h}, [src2], #32 > + uhadd v0.8h, v0.8h, v4.8h > + uhadd v1.8h, v1.8h, v5.8h > + st1 {v0.8h,v1.8h}, [dest], #32 > +3: > + tbz size, #4, 4f > + ld1 {v0.8h}, [src1] > + ld1 {v4.8h}, [src2] > + uhadd v0.8h, v0.8h,v4.8h > + st1 {v0.8h}, [dest] > +4: > + ret > +endfunc > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index b675c688ee..6631af2ffe 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -219,6 +219,7 @@ extern AVFilter ff_vf_erosion; > extern AVFilter ff_vf_erosion_opencl; > extern AVFilter ff_vf_extractplanes; > extern AVFilter ff_vf_fade; > +extern AVFilter ff_vf_fastdeint; > extern AVFilter ff_vf_fftdnoiz; > extern AVFilter ff_vf_fftfilt; > extern AVFilter ff_vf_field; > diff --git a/libavfilter/arm/Makefile b/libavfilter/arm/Makefile > new file mode 100644 > index 0000000000..c92d62fac9 > --- /dev/null > +++ b/libavfilter/arm/Makefile > @@ -0,0 +1,3 @@ > +ARMV6-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_armv6.o > + > +NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_neon.o > diff --git a/libavfilter/arm/merge_armv6.S b/libavfilter/arm/merge_armv6.S > new file mode 100644 > index 0000000000..9b551c2c6c > --- /dev/null > +++ b/libavfilter/arm/merge_armv6.S > @@ -0,0 +1,70 @@ > +/* > + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/arm/asm.S" > + > +#define dest r0 > +#define src1 r1 > +#define src2 r2 > +#define size r3 > + > + .align 2 > +function ff_merge8_armv6, export=1 > + push {r4-r9,lr} > +1: > + pld [src1, #64] > + ldm src1!, {r4-r5} > + pld [src2, #64] > + ldm src2!, {r8-r9} > + subs size, size, #16 > + uhadd8 r4, r4, r8 > + ldm src1!, {r6-r7} > + uhadd8 r5, r5, r9 > + ldm src2!, {ip,lr} > + uhadd8 r6, r6, ip > + stm dest!, {r4-r5} > + uhadd8 r7, r7, lr > + stm dest!, {r6-r7} > + it eq > + popeq {r4-r9,pc} > + b 1b > +endfunc > + > + .align 2 > +function ff_merge16_armv6, export=1 > + push {r4-r9,lr} > +1: > + pld [src1, #64] > + ldm src1!, {r4-r5} > + pld [src2, #64] > + ldm src2!, {r8-r9} > + subs size, size, #16 > + uhadd16 r4, r4, r8 > + ldm src1!, {r6-r7} > + uhadd16 r5, r5, r9 > + ldm src2!, {ip,lr} > + uhadd16 r6, r6, ip > + stm dest!, {r4-r5} > + uhadd16 r7, r7, lr > + stm dest!, {r6-r7} > + it eq > + popeq {r4-r9,pc} > + b 1b > +endfunc > \ No newline at end of file > diff --git a/libavfilter/arm/merge_neon.S b/libavfilter/arm/merge_neon.S > new file mode 100644 > index 0000000000..ae36cf3ca9 > --- /dev/null > +++ b/libavfilter/arm/merge_neon.S > @@ -0,0 +1,109 @@ > +/* > + * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/arm/asm.S" > + > +#define dest r0 > +#define src1 r1 > +#define src2 r2 > +#define size r3 > + > + .align 2 > + @ NOTE: Offset and pitch must be multiple of 16-bytes. > +function ff_merge8_neon, export=1 > + cmp size, #64 > + blo 2f > +1: > + pld [src1, #64] > + vld1.u8 {q0-q1}, [src1,:128]! > + pld [src2, #64] > + vld1.u8 {q8-q9}, [src2,:128]! > + vhadd.u8 q0, q0, q8 > + sub size, size, #64 > + vld1.u8 {q2-q3}, [src1,:128]! > + vhadd.u8 q1, q1, q9 > + vld1.u8 {q10-q11}, [src2,:128]! > + vhadd.u8 q2, q2, q10 > + cmp size, #64 > + vhadd.u8 q3, q3, q11 > + vst1.u8 {q0-q1}, [dest,:128]! > + vst1.u8 {q2-q3}, [dest,:128]! > + bhs 1b > +2: > + cmp size, #32 > + blo 3f > + vld1.u8 {q0-q1}, [src1,:128]! > + sub size, size, #32 > + vld1.u8 {q8-q9}, [src2,:128]! > + vhadd.u8 q0, q0, q8 > + vhadd.u8 q1, q1, q9 > + vst1.u8 {q0-q1}, [dest,:128]! > +3: > + cmp size, #16 > + it lo > + bxlo lr > + vld1.u8 {q0}, [src1,:128]! > + sub size, size, #16 > + vld1.u8 {q8}, [src2,:128]! > + vhadd.u8 q0, q0, q8 > + vst1.u8 {q0}, [dest,:128]! > + bx lr > +endfunc > + > + .align 2 > +function ff_merge16_neon, export=1 > + cmp size, #64 > + blo 2f > +1: > + pld [src1, #64] > + vld1.u16 {q0-q1}, [src1,:128]! > + pld [src2, #64] > + vld1.u16 {q8-q9}, [src2,:128]! > + vhadd.u16 q0, q0, q8 > + sub size, size, #64 > + vld1.u16 {q2-q3}, [src1,:128]! > + vhadd.u16 q1, q1, q9 > + vld1.u16 {q10-q11}, [src2,:128]! > + vhadd.u16 q2, q2, q10 > + cmp size, #64 > + vhadd.u16 q3, q3, q11 > + vst1.u16 {q0-q1}, [dest,:128]! > + vst1.u16 {q2-q3}, [dest,:128]! > + bhs 1b > +2: > + cmp size, #32 > + blo 3f > + vld1.u16 {q0-q1}, [src1,:128]! > + sub size, size, #32 > + vld1.u16 {q8-q9}, [src2,:128]! > + vhadd.u16 q0, q0, q8 > + vhadd.u16 q1, q1, q9 > + vst1.u16 {q0-q1}, [dest,:128]! > +3: > + cmp size, #16 > + it lo > + bxlo lr > + vld1.u16 {q0}, [src1,:128]! > + sub size, size, #16 > + vld1.u16 {q8}, [src2,:128]! > + vhadd.u16 q0, q0, q8 > + vst1.u16 {q0}, [dest,:128]! > + bx lr > +endfunc > \ No newline at end of file > diff --git a/libavfilter/vf_fastdeint.c b/libavfilter/vf_fastdeint.c > new file mode 100644 > index 0000000000..5ddd8be392 > --- /dev/null > +++ b/libavfilter/vf_fastdeint.c > @@ -0,0 +1,588 @@ > +/* > + * Copyright (C) 2015 Aman Gupta <a...@tmm1.net> > + * 2000-2011 VLC authors and VideoLAN > + * > + * Author: Sam Hocevar <s...@zoy.org> > + * Damien Lucas <nit...@videolan.org> > + * Laurent Aimar <fen...@videolan.org> > + * Sigmund Augdal Helberg <sigmu...@videolan.org> > + * > + * These algorithms are derived from the VLC project's > + * modules/video_filter/deinterlace/algo_basic.c > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "libavutil/avassert.h" > +#include "libavutil/cpu.h" > +#include "libavutil/common.h" > +#include "libavutil/opt.h" > +#include "libavutil/pixdesc.h" > +#include "libavutil/imgutils.h" > +#include "libavutil/timestamp.h" > +#include "avfilter.h" > +#include "formats.h" > +#include "internal.h" > +#include "video.h" > + > +enum Mode { > + MODE_DISCARD, > + MODE_MEAN, > + MODE_BLEND, > + MODE_BOB, > + MODE_LINEAR, > + MODE_MAX, > +}; > + > +typedef void (*merge_fn)(void *dst, const void *src1, const void *src2, > size_t len); > + > +typedef struct FastDeintContext { > + const AVClass *class; > + merge_fn merge; > + int merge_size; > + int merge_aligned; > + AVFrame *cur, *next; > + enum Mode mode; > + int eof; > +} FastDeintContext; > + > +static void merge8_c(uint8_t *dst, const uint8_t *src1, const uint8_t > *src2, size_t bytes) > +{ > + for (; bytes > 0; bytes--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > +} > + > +static void merge16_c(uint16_t *dst, const uint16_t *src1, const uint16_t > *src2, size_t bytes) > +{ > + for (size_t words = bytes / 2; words > 0; words--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > +} > + > +static void merge8_unaligned(FastDeintContext *s, uint8_t *dst, const > uint8_t *src1, const uint8_t *src2, size_t bytes) > +{ > + if (s->merge_aligned) { > + size_t remainder = bytes % 16; > + if (remainder > 0) { > + merge8_c(dst, src1, src2, remainder); > + bytes -= remainder; > + dst += remainder; > + src1 += remainder; > + src2 += remainder; > + } > + } > + s->merge(dst, src1, src2, bytes); > +} > + > +static void merge16_unaligned(FastDeintContext *s, uint16_t *dst, const > uint16_t *src1, const uint16_t *src2, size_t bytes) > +{ > + if (s->merge_aligned) { > + size_t words = bytes / 2; > + size_t remainder = words % 8; > + if (remainder > 0) { > + merge16_c(dst, src1, src2, remainder); > + words -= remainder; > + dst += remainder; > + src1 += remainder; > + src2 += remainder; > + } > + } > + s->merge(dst, src1, src2, bytes); > +} > + > +static void merge_unaligned(FastDeintContext *s, void *dst, const void > *src1, const void *src2, size_t bytes) > +{ > + if (s->merge_size == 16) > + merge16_unaligned(s, dst, src1, src2, bytes); > + else > + merge8_unaligned(s, dst, src1, src2, bytes); > +} > + > +#if HAVE_SSE2_INLINE && defined(__x86_64__) > +static void merge8_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t > *src2, size_t bytes) > +{ > + for(; bytes > 0 && ((uintptr_t)src1 & 15); bytes--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > + > + for (; bytes >= 16; bytes -= 16) { > + __asm__ __volatile__( "movdqu %2,%%xmm1;" > + "pavgb %1, %%xmm1;" > + "movdqu %%xmm1, %0" :"=m" (*dst): > + "m" (*src1), > + "m" (*src2) : "xmm1" ); > + dst += 16; > + src1 += 16; > + src2 += 16; > + } > + > + if (bytes > 0) { > + merge8_c(dst, src1, src2, bytes); > + } > +} > +static void merge16_sse2(uint16_t *dst, const uint16_t *src1, const > uint16_t *src2, size_t bytes) > +{ > + size_t words = bytes / 2; > + > + for(; words > 0 && ((uintptr_t)src1 & 15); words--) > + *dst++ = ( *src1++ + *src2++ ) >> 1; > + > + for (; words >= 8; words -= 8) { > + __asm__ __volatile__( "movdqu %2,%%xmm1;" > + "pavgw %1, %%xmm1;" > + "movdqu %%xmm1, %0" :"=m" (*dst): > + "m" (*src1), > + "m" (*src2) : "xmm1" ); > + dst += 8; > + src1 += 8; > + src2 += 8; > + } > +
Unacceptable code. Inline assembly is forbidden. > + if (words > 0) { > + merge16_c(dst, src1, src2, words * 2); > + } > +} > +#define merge8 merge8_sse2 > +#define merge16 merge16_sse2 > +#else > +#define merge8 merge8_c > +#define merge16 merge16_c > +#endif > + > +static void render_image_single(FastDeintContext *s, AVFrame *out, AVFrame > *frame) > +{ > + int i, planes_nb = 0; > + enum Mode mode = s->mode; > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format); > + > + for (i = 0; i < desc->nb_components; i++) > + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); > + > + for (i = 0; i < planes_nb; i++) { > + int height, bwidth; > + int dst_linesize, src_linesize; > + const uint8_t *src; > + uint8_t *dst; > + > + bwidth = av_image_get_linesize(out->format, out->width, i); > + if (bwidth < 0) { > + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n"); > + return; > + } > + > + height = out->height; > + if (i == 1 || i == 2) { > + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h); > + } > + > + src = frame->data[i]; > + dst = out->data[i]; > + dst_linesize = out->linesize[i]; > + src_linesize = frame->linesize[i]; > + > + if (mode == MODE_BLEND) { > + // Copy first line > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + height--; > + } > + > + // Merge remaining lines > + for (; height > 0; height--) { > + if (mode == MODE_DISCARD) > + memcpy(dst, src, bwidth); > + else > + merge_unaligned(s, dst, src, src + src_linesize, bwidth); > + dst += dst_linesize; > + src += src_linesize; > + if (mode == MODE_MEAN || mode == MODE_DISCARD) { > + src += src_linesize; > + height--; > + } > + } > + } > + if (mode != MODE_DISCARD) > + emms_c(); > +} > + > +static void render_image_doubler(FastDeintContext *s, AVFrame *out, AVFrame > *frame, int field) > +{ > + int i, planes_nb = 0; > + enum Mode mode = s->mode; > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format); > + > + for (i = 0; i < desc->nb_components; i++) > + planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); > + > + for (i = 0; i < planes_nb; i++) { > + int height, bwidth; > + int dst_linesize, src_linesize; > + const uint8_t *src; > + uint8_t *dst; > + > + bwidth = av_image_get_linesize(out->format, out->width, i); > + if (bwidth < 0) { > + av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n"); > + return; > + } > + height = out->height; > + if (i == 1 || i == 2) { > + height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h); > + } > + > + src = frame->data[i]; > + dst = out->data[i]; > + src_linesize = frame->linesize[i]; > + dst_linesize = out->linesize[i]; > + > + // For BOTTOM field we need to add the first line > + if (field == 1) { > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + src += src_linesize; > + height--; > + } > + > + height -= 2; > + > + for (; height > 0; height-=2) { > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + > + if (mode == MODE_LINEAR) > + merge_unaligned(s, dst, src, src + 2 * src_linesize, > bwidth); > + else > + memcpy(dst, src, bwidth); > + dst += dst_linesize; > + > + src += src_linesize * 2; > + } > + > + memcpy(dst, src, bwidth); > + > + // For TOP field we need to add the last line > + if (field == 0) > + { > + dst += dst_linesize; > + src += src_linesize; > + memcpy(dst, src, bwidth); > + } > + } > + if (mode == MODE_LINEAR) > + emms_c(); > +} > + > +static int filter_frame_single(AVFilterLink *link, AVFrame *frame) > +{ > + AVFilterContext *ctx = link->dst; > + AVFrame *out; > + FastDeintContext *s = ctx->priv; > + > + if (!frame->interlaced_frame) { > + return ff_filter_frame(ctx->outputs[0], frame); > + } > + > + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h); > + if (!out) { > + av_frame_free(&frame); > + return AVERROR(ENOMEM); > + } > + > + av_frame_copy_props(out, frame); > + out->interlaced_frame = 0; > + render_image_single(s, out, frame); > + > + av_frame_free(&frame); > + return ff_filter_frame(ctx->outputs[0], out); > +} > + > +static AVFrame *copy_frame(AVFilterLink *link, AVFrame *frame) > +{ > + AVFilterContext *ctx = link->dst; > + AVFrame *out; > + > + if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX) > + out = av_frame_alloc(); > + else > + out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h); > + > + if (!out) > + return NULL; > + > + av_frame_copy_props(out, frame); > + return out; > +} > + > +static int filter_frame_double(AVFilterLink *link, AVFrame *in) > +{ > + AVFilterContext *ctx = link->dst; > + FastDeintContext *s = ctx->priv; > + AVFrame *frame, *out, *out2; > + int tff, ret; > + > + s->cur = s->next; > + s->next = in; > + > + if (!s->cur) { > + return 0; > + } > + > + frame = s->cur; > + > + if (!frame->interlaced_frame) { > + if (frame->pts != AV_NOPTS_VALUE) > + frame->pts *= 2; > + s->cur = NULL; > + return ff_filter_frame(ctx->outputs[0], frame); > + } > + > + tff = frame->top_field_first; > + out = copy_frame(link, frame); > + if (!out) { > + av_frame_free(&frame); > + s->cur = NULL; > + return AVERROR(ENOMEM); > + } > + > + out->interlaced_frame = 0; > + if (out->pts != AV_NOPTS_VALUE) > + out->pts = out->pts * 2; > + render_image_doubler(s, out, frame, !tff); > + > + ret = ff_filter_frame(ctx->outputs[0], out); > + if (ret < 0) { > + av_frame_free(&frame); > + s->cur = NULL; > + return ret; > + } > + > + out2 = copy_frame(link, frame); > + if (!out2) { > + av_frame_free(&frame); > + s->cur = NULL; > + return AVERROR(ENOMEM); > + } > + > + out2->interlaced_frame = 0; > + av_frame_remove_side_data(out2, AV_FRAME_DATA_A53_CC); > + if (out2->pts != AV_NOPTS_VALUE) { > + out2->pts = frame->pts + s->next->pts; > + } > + render_image_doubler(s, out2, frame, tff); > + > + av_frame_free(&frame); > + s->cur = NULL; > + > + return ff_filter_frame(ctx->outputs[0], out2); > +} > + > +static int filter_frame(AVFilterLink *link, AVFrame *frame) > +{ > + AVFilterContext *ctx = link->dst; > + FastDeintContext *s = ctx->priv; > + > + av_assert0(frame); > + > + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) { > + return filter_frame_double(link, frame); > + } else { > + return filter_frame_single(link, frame); > + } > +} > + > +static av_cold void uninit(AVFilterContext *ctx) > +{ > + FastDeintContext *s = ctx->priv; > + av_frame_free(&s->cur); > + av_frame_free(&s->next); > +} > + > +static int query_formats(AVFilterContext *ctx) > +{ > + static const enum AVPixelFormat pix_fmts[] = { > + AV_PIX_FMT_YUV420P, > + AV_PIX_FMT_YUV422P, > + AV_PIX_FMT_YUV444P, > + AV_PIX_FMT_YUV410P, > + AV_PIX_FMT_YUV411P, > + AV_PIX_FMT_GRAY8, > + AV_PIX_FMT_YUVJ420P, > + AV_PIX_FMT_YUVJ422P, > + AV_PIX_FMT_YUVJ444P, > + AV_PIX_FMT_GRAY16, > + AV_PIX_FMT_YUV440P, > + AV_PIX_FMT_YUVJ440P, > + AV_PIX_FMT_YUV420P9, > + AV_PIX_FMT_YUV422P9, > + AV_PIX_FMT_YUV444P9, > + AV_PIX_FMT_YUV420P10, > + AV_PIX_FMT_YUV422P10, > + AV_PIX_FMT_YUV444P10, > + AV_PIX_FMT_YUV420P12, > + AV_PIX_FMT_YUV422P12, > + AV_PIX_FMT_YUV444P12, > + AV_PIX_FMT_YUV420P14, > + AV_PIX_FMT_YUV422P14, > + AV_PIX_FMT_YUV444P14, > + AV_PIX_FMT_YUV420P16, > + AV_PIX_FMT_YUV422P16, > + AV_PIX_FMT_YUV444P16, > + AV_PIX_FMT_YUVA420P, > + AV_PIX_FMT_YUVA422P, > + AV_PIX_FMT_YUVA444P, > + AV_PIX_FMT_GBRP, > + AV_PIX_FMT_GBRP9, > + AV_PIX_FMT_GBRP10, > + AV_PIX_FMT_GBRP12, > + AV_PIX_FMT_GBRP14, > + AV_PIX_FMT_GBRP16, > + AV_PIX_FMT_GBRAP, > + AV_PIX_FMT_NONE > + }; Group this ones on less lines somehow. > + > + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); > + if (!fmts_list) > + return AVERROR(ENOMEM); > + return ff_set_common_formats(ctx, fmts_list); > +} > + > +#if ARCH_ARM > +#include "libavutil/arm/cpu.h" > +#endif > +#if ARCH_AARCH64 > +#include "libavutil/aarch64/cpu.h" > +#endif > +#if ARCH_AARCH64 || ARCH_ARM > +void ff_merge8_neon(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, > size_t bytes); > +void ff_merge16_neon(uint16_t *dst, const uint16_t *src1, const uint16_t > *src2, size_t bytes); > +void ff_merge8_armv6(uint8_t *dst, const uint8_t *src1, const uint8_t > *src2, size_t bytes); > +void ff_merge16_armv6(uint16_t *dst, const uint16_t *src1, const uint16_t > *src2, size_t bytes); > +#endif > + I do not like this style. Look what other filters do, like one that adds x86 SIMD. > +static int config_props(AVFilterLink *link) > +{ > + AVFilterContext *ctx = link->src; > + FastDeintContext *s = ctx->priv; > + const AVPixFmtDescriptor *pix; > +#if ARCH_AARCH64 || ARCH_ARM > + int cpu_flags = av_get_cpu_flags(); > +#endif This belongs in separate directory and file. See aarch64 directory > + > + link->w = link->src->inputs[0]->w; > + link->h = link->src->inputs[0]->h; > + link->time_base = link->src->inputs[0]->time_base; > + link->frame_rate = link->src->inputs[0]->frame_rate; > + link->sample_aspect_ratio = link->src->inputs[0]->sample_aspect_ratio; > + > + if (s->mode == MODE_MEAN || s->mode == MODE_DISCARD) { > + link->h /= 2; > + link->sample_aspect_ratio = av_mul_q(link->sample_aspect_ratio, > av_make_q(1, 2)); > + } > + if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) { > + link->time_base = av_mul_q(link->time_base, av_make_q(1, 2)); > + link->frame_rate = av_mul_q(link->frame_rate, av_make_q(2, 1)); > + } > + > + pix = av_pix_fmt_desc_get(link->format); > + s->merge_size = (pix->comp[0].depth > 8) ? 16 : 8; > + s->merge = s->merge_size == 16 ? (merge_fn)merge16 : (merge_fn)merge8; > + > +#if ARCH_ARM > + if (have_armv6(cpu_flags)) { > + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_armv6 : > (merge_fn)ff_merge8_armv6; > + s->merge_aligned = 1; > + } > +#endif > +#if ARCH_AARCH64 || ARCH_ARM > + if (have_neon(cpu_flags)) { > + s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_neon : > (merge_fn)ff_merge8_neon; > + s->merge_aligned = 1; > + } > +#endif > + > + return 0; > +} > + > +static int request_frame(AVFilterLink *link) > +{ > + AVFilterContext *ctx = link->src; > + FastDeintContext *s = ctx->priv; > + int ret; > + > + if (s->eof) > + return AVERROR_EOF; > + > + ret = ff_request_frame(ctx->inputs[0]); > + > + if (ret == AVERROR_EOF && s->cur) { > + AVFrame *next = av_frame_clone(s->next); > + if (!next) > + return AVERROR(ENOMEM); > + > + next->pts = s->next->pts * 2 - s->cur->pts; > + filter_frame(ctx->inputs[0], next); > + s->eof = 1; > + } else if (ret < 0) { > + return ret; > + } > + > + return 0; > +} > + > +#define OFFSET(x) offsetof(FastDeintContext, x) > +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM > + > +#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, > {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit } > + > +static const AVOption fastdeint_options[] = { > + { "mode", "specify the deinterlacing mode", OFFSET(mode), > AV_OPT_TYPE_INT, {.i64=MODE_BLEND}, 0, MODE_MAX-1, FLAGS, "mode" }, > + CONST("discard", "discard bottom frame", MODE_DISCARD, "mode"), > + CONST("mean", "half resolution blender", MODE_MEAN, "mode"), > + CONST("blend", "full resolution blender", MODE_BLEND, "mode"), > + CONST("bob", "bob doubler", MODE_BOB, "mode"), > + CONST("linear", "bob doubler with linear interpolation", MODE_LINEAR, > "mode"), > + > + { NULL } > +}; > + > +AVFILTER_DEFINE_CLASS(fastdeint); > + > +static const AVFilterPad fastdeint_inputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .filter_frame = filter_frame, > + }, > + { NULL } > +}; > + > +static const AVFilterPad fastdeint_outputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .config_props = config_props, > + .request_frame = request_frame > + }, > + { NULL } > +}; > + > +AVFilter ff_vf_fastdeint = { > + .name = "fastdeint", > + .description = NULL_IF_CONFIG_SMALL("fast deinterlacing algorithms"), First letter should be capitalized. > + .priv_size = sizeof(FastDeintContext), > + .priv_class = &fastdeint_class, > + .uninit = uninit, > + .query_formats = query_formats, > + .inputs = fastdeint_inputs, > + .outputs = fastdeint_outputs, > +}; > -- > 2.20.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".