Signed-off-by: Paul B Mahol <one...@gmail.com> --- libavfilter/vf_overlay.c | 76 ++++++++----------------------- libavfilter/vf_overlay.h | 84 ++++++++++++++++++++++++++++++++++ libavfilter/x86/Makefile | 2 + libavfilter/x86/vf_overlay.asm | 94 +++++++++++++++++++++++++++++++++++++++ libavfilter/x86/vf_overlay_init.c | 39 ++++++++++++++++ 5 files changed, 238 insertions(+), 57 deletions(-) create mode 100644 libavfilter/vf_overlay.h create mode 100644 libavfilter/x86/vf_overlay.asm create mode 100644 libavfilter/x86/vf_overlay_init.c
diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c index 8c1895cca4..81522d31a4 100644 --- a/libavfilter/vf_overlay.c +++ b/libavfilter/vf_overlay.c @@ -39,6 +39,7 @@ #include "drawutils.h" #include "framesync.h" #include "video.h" +#include "vf_overlay.h" typedef struct ThreadData { AVFrame *dst, *src; @@ -59,21 +60,6 @@ static const char *const var_names[] = { NULL }; -enum var_name { - VAR_MAIN_W, VAR_MW, - VAR_MAIN_H, VAR_MH, - VAR_OVERLAY_W, VAR_OW, - VAR_OVERLAY_H, VAR_OH, - VAR_HSUB, - VAR_VSUB, - VAR_X, - VAR_Y, - VAR_N, - VAR_POS, - VAR_T, - VAR_VARS_NB -}; - #define MAIN 0 #define OVERLAY 1 @@ -92,45 +78,6 @@ enum EvalMode { EVAL_MODE_NB }; -enum OverlayFormat { - OVERLAY_FORMAT_YUV420, - OVERLAY_FORMAT_YUV422, - OVERLAY_FORMAT_YUV444, - OVERLAY_FORMAT_RGB, - OVERLAY_FORMAT_GBRP, - OVERLAY_FORMAT_AUTO, - OVERLAY_FORMAT_NB -}; - -typedef struct OverlayContext { - const AVClass *class; - int x, y; ///< position of overlaid picture - - uint8_t main_is_packed_rgb; - uint8_t main_rgba_map[4]; - uint8_t main_has_alpha; - uint8_t overlay_is_packed_rgb; - uint8_t overlay_rgba_map[4]; - uint8_t overlay_has_alpha; - int format; ///< OverlayFormat - int alpha_format; - int eval_mode; ///< EvalMode - - FFFrameSync fs; - - int main_pix_step[4]; ///< steps per pixel for each plane of the main output - int overlay_pix_step[4]; ///< steps per pixel for each plane of the overlay - int hsub, vsub; ///< chroma subsampling values - const AVPixFmtDescriptor *main_desc; ///< format descriptor for main input - - double var_values[VAR_VARS_NB]; - char *x_expr, *y_expr; - - AVExpr *x_pexpr, *y_pexpr; - - int (*blend_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); -} OverlayContext; - static av_cold void uninit(AVFilterContext *ctx) { OverlayContext *s = ctx->priv; @@ -509,6 +456,7 @@ static av_always_inline void blend_plane(AVFilterContext *ctx, int jobnr, int nb_jobs) { + OverlayContext *octx = ctx->priv; int src_wp = AV_CEIL_RSHIFT(src_w, hsub); int src_hp = AV_CEIL_RSHIFT(src_h, vsub); int dst_wp = AV_CEIL_RSHIFT(dst_w, hsub); @@ -538,8 +486,17 @@ static av_always_inline void blend_plane(AVFilterContext *ctx, s = sp + k; a = ap + (k<<hsub); da = dap + ((xp+k) << hsub); - - for (kmax = FFMIN(-xp + dst_wp, src_wp); k < kmax; k++) { + kmax = FFMIN(-xp + dst_wp, src_wp); + + if (octx->blend_row) { + octx->blend_row(d, da, s, a, kmax - k, k, j, src_wp, src_hp); + dp += dst->linesize[dst_plane]; + sp += src->linesize[i]; + ap += (1 << vsub) * src->linesize[3]; + dap += (1 << vsub) * dst->linesize[3]; + continue; + } + for (; k < kmax; k++) { int alpha_v, alpha_h, alpha; // average alpha for color components, improve quality @@ -916,7 +873,7 @@ static int config_input_main(AVFilterLink *inlink) } if (!s->alpha_format) - return 0; + goto end; switch (s->format) { case OVERLAY_FORMAT_YUV420: @@ -960,6 +917,11 @@ static int config_input_main(AVFilterLink *inlink) } break; } + +end: + if (ARCH_X86) + ff_overlay_init_x86(s, s->format, s->alpha_format, s->main_has_alpha); + return 0; } diff --git a/libavfilter/vf_overlay.h b/libavfilter/vf_overlay.h new file mode 100644 index 0000000000..8eb91d9a34 --- /dev/null +++ b/libavfilter/vf_overlay.h @@ -0,0 +1,84 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_OVERLAY_H +#define AVFILTER_OVERLAY_H + +#include "libavutil/eval.h" +#include "libavutil/pixdesc.h" +#include "framesync.h" +#include "avfilter.h" + +enum var_name { + VAR_MAIN_W, VAR_MW, + VAR_MAIN_H, VAR_MH, + VAR_OVERLAY_W, VAR_OW, + VAR_OVERLAY_H, VAR_OH, + VAR_HSUB, + VAR_VSUB, + VAR_X, + VAR_Y, + VAR_N, + VAR_POS, + VAR_T, + VAR_VARS_NB +}; + +enum OverlayFormat { + OVERLAY_FORMAT_YUV420, + OVERLAY_FORMAT_YUV422, + OVERLAY_FORMAT_YUV444, + OVERLAY_FORMAT_RGB, + OVERLAY_FORMAT_GBRP, + OVERLAY_FORMAT_AUTO, + OVERLAY_FORMAT_NB +}; + +typedef struct OverlayContext { + const AVClass *class; + int x, y; ///< position of overlaid picture + + uint8_t main_is_packed_rgb; + uint8_t main_rgba_map[4]; + uint8_t main_has_alpha; + uint8_t overlay_is_packed_rgb; + uint8_t overlay_rgba_map[4]; + uint8_t overlay_has_alpha; + int format; ///< OverlayFormat + int alpha_format; + int eval_mode; ///< EvalMode + + FFFrameSync fs; + + int main_pix_step[4]; ///< steps per pixel for each plane of the main output + int overlay_pix_step[4]; ///< steps per pixel for each plane of the overlay + int hsub, vsub; ///< chroma subsampling values + const AVPixFmtDescriptor *main_desc; ///< format descriptor for main input + + double var_values[VAR_VARS_NB]; + char *x_expr, *y_expr; + + AVExpr *x_pexpr, *y_pexpr; + + void (*blend_row)(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a, int w, int x, int y, int src_w, int src_h); + int (*blend_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); +} OverlayContext; + +void ff_overlay_init_x86(OverlayContext *s, int format, int alpha_format, int main_has_alpha); + +#endif /* AVFILTER_OVERLAY_H */ diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index f60de3b73b..b484c8bd1c 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -13,6 +13,7 @@ OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_tinterlace_init.o OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge_init.o OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o +OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay_init.o OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7_init.o OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr_init.o OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o @@ -41,6 +42,7 @@ X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge.o +X86ASM-OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay.o X86ASM-OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7.o X86ASM-OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr.o X86ASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o diff --git a/libavfilter/x86/vf_overlay.asm b/libavfilter/x86/vf_overlay.asm new file mode 100644 index 0000000000..41f74fe946 --- /dev/null +++ b/libavfilter/x86/vf_overlay.asm @@ -0,0 +1,94 @@ +;***************************************************************************** +;* x86-optimized functions for overlay filter +;* +;* Copyright (C) 2018 Paul B Mahol +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;***************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +pd_128: times 4 dd 128 +pd_255: times 4 dd 255 +pd_257: times 4 dd 257 +pb_b2dw: db 0,-1,-1,-1, 1,-1,-1,-1, 2,-1,-1,-1, 3,-1,-1,-1 +pb_dw2b: db 0, 4, 8,12,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + +SECTION .text + +INIT_XMM sse4 +cglobal overlay_row_yuv444, 9, 14, 5, 0, d, da, s, a, w, x, y, src_w, src_h, r, x, t, u, v + xor xq, xq + movsxdifnidn wq, wd + mov rq, wq + and rq, mmsize/4 - 1 + cmp wq, mmsize/4 + jl .loop1 + pxor m0, m0 + sub wq, rq + .loop0: + movu m1, [dq + xq] + movu m2, [aq + xq] + movu m3, [sq + xq] + + pshufb m1, [pb_b2dw] + pshufb m2, [pb_b2dw] + pshufb m3, [pb_b2dw] + mova m4, [pd_255] + psubd m4, m2 + pmulld m1, m4 + pmulld m3, m2 + paddd m1, m3 + paddd m1, [pd_128] + pmulld m1, [pd_257] + psrad m1, 16 + pshufb m1, [pb_dw2b] + movd [dq+xq], m1 + + add xq, mmsize / 4 + cmp xq, wq + jl .loop0 + + cmp rq, 0 + je .end + add wq, rq + + .loop1: + xor tq, tq + xor uq, uq + xor vq, vq + mov rd, 255 + mov tb, [aq + xq] + neg tb + add rb, tb + mov ub, [sq + xq] + neg tb + imul ud, td + mov vb, [dq + xq] + imul rd, vd + add rd, ud + add rd, 128 + imul rd, 257 + sar rd, 16 + mov [dq + xq], rb + add xq, 1 + cmp xq, wq + jl .loop1 + .end: + RET diff --git a/libavfilter/x86/vf_overlay_init.c b/libavfilter/x86/vf_overlay_init.c new file mode 100644 index 0000000000..f57c850a30 --- /dev/null +++ b/libavfilter/x86/vf_overlay_init.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" +#include "libavfilter/vf_overlay.h" + +void ff_overlay_row_yuv444_sse4(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a, + int w, int x, int y, int src_w, int src_h); + +av_cold void ff_overlay_init_x86(OverlayContext *s, int format, int alpha_format, int main_has_alpha) +{ + int cpu_flags = av_get_cpu_flags(); + + if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && + (format == OVERLAY_FORMAT_YUV444 || + format == OVERLAY_FORMAT_GBRP) && + alpha_format == 0 && main_has_alpha == 0) { + s->blend_row = ff_overlay_row_yuv444_sse4; + } +} -- 2.11.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel