By ffmpeg threading support implementation via frame slicing and doing
zimg_filter_graph_build that used to take 30-60% of each frame processig
only if necessary (some parameters changed)
the performance increase vs original version
in video downscale and color conversion >4x is seen
on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
Signed-off-by: Victoria Zhislina <victoria.zhisl...@intel.com>
---
libavfilter/vf_zscale.c | 787 ++++++++++++++++++++++++----------------
1 file changed, 475 insertions(+), 312 deletions(-)
diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
index 1288c5efc1..ea2565025f 100644
--- a/libavfilter/vf_zscale.c
+++ b/libavfilter/vf_zscale.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2015 Paul B Mahol
- *
+ * 2022 Victoria Zhislina, Intel
+
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -44,6 +45,8 @@
#include "libavutil/imgutils.h"
#define ZIMG_ALIGNMENT 32
+#define MIN_TILESIZE 64
+#define MAX_THREADS 64
static const char *const var_names[] = {
"in_w", "iw",
@@ -113,13 +116,17 @@ typedef struct ZScaleContext {
int force_original_aspect_ratio;
- void *tmp;
- size_t tmp_size;
+ void *tmp[MAX_THREADS]; //separate for each thread;
+ int nb_threads;
+ int slice_h;
zimg_image_format src_format, dst_format;
zimg_image_format alpha_src_format, alpha_dst_format;
+ zimg_image_format src_format_tmp, dst_format_tmp;
+ zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
zimg_graph_builder_params alpha_params, params;
- zimg_filter_graph *alpha_graph, *graph;
+ zimg_graph_builder_params alpha_params_tmp, params_tmp;
+ zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
enum AVColorSpace in_colorspace, out_colorspace;
enum AVColorTransferCharacteristic in_trc, out_trc;
@@ -128,10 +135,181 @@ typedef struct ZScaleContext {
enum AVChromaLocation in_chromal, out_chromal;
} ZScaleContext;
+
+typedef struct ThreadData {
+ const AVPixFmtDescriptor *desc, *odesc;
+ AVFrame *in, *out;
+} ThreadData;
+
+static int convert_chroma_location(enum AVChromaLocation chroma_location)
+{
+ switch (chroma_location) {
+ case AVCHROMA_LOC_UNSPECIFIED:
+ case AVCHROMA_LOC_LEFT:
+ return ZIMG_CHROMA_LEFT;
+ case AVCHROMA_LOC_CENTER:
+ return ZIMG_CHROMA_CENTER;
+ case AVCHROMA_LOC_TOPLEFT:
+ return ZIMG_CHROMA_TOP_LEFT;
+ case AVCHROMA_LOC_TOP:
+ return ZIMG_CHROMA_TOP;
+ case AVCHROMA_LOC_BOTTOMLEFT:
+ return ZIMG_CHROMA_BOTTOM_LEFT;
+ case AVCHROMA_LOC_BOTTOM:
+ return ZIMG_CHROMA_BOTTOM;
+ }
+ return ZIMG_CHROMA_LEFT;
+}
+
+static int convert_matrix(enum AVColorSpace colorspace)
+{
+ switch (colorspace) {
+ case AVCOL_SPC_RGB:
+ return ZIMG_MATRIX_RGB;
+ case AVCOL_SPC_BT709:
+ return ZIMG_MATRIX_709;
+ case AVCOL_SPC_UNSPECIFIED:
+ return ZIMG_MATRIX_UNSPECIFIED;
+ case AVCOL_SPC_FCC:
+ return ZIMG_MATRIX_FCC;
+ case AVCOL_SPC_BT470BG:
+ return ZIMG_MATRIX_470BG;
+ case AVCOL_SPC_SMPTE170M:
+ return ZIMG_MATRIX_170M;
+ case AVCOL_SPC_SMPTE240M:
+ return ZIMG_MATRIX_240M;
+ case AVCOL_SPC_YCGCO:
+ return ZIMG_MATRIX_YCGCO;
+ case AVCOL_SPC_BT2020_NCL:
+ return ZIMG_MATRIX_2020_NCL;
+ case AVCOL_SPC_BT2020_CL:
+ return ZIMG_MATRIX_2020_CL;
+ case AVCOL_SPC_CHROMA_DERIVED_NCL:
+ return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
+ case AVCOL_SPC_CHROMA_DERIVED_CL:
+ return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
+ case AVCOL_SPC_ICTCP:
+ return ZIMG_MATRIX_ICTCP;
+ }
+ return ZIMG_MATRIX_UNSPECIFIED;
+}
+
+static int convert_trc(enum AVColorTransferCharacteristic color_trc)
+{
+ switch (color_trc) {
+ case AVCOL_TRC_UNSPECIFIED:
+ return ZIMG_TRANSFER_UNSPECIFIED;
+ case AVCOL_TRC_BT709:
+ return ZIMG_TRANSFER_709;
+ case AVCOL_TRC_GAMMA22:
+ return ZIMG_TRANSFER_470_M;
+ case AVCOL_TRC_GAMMA28:
+ return ZIMG_TRANSFER_470_BG;
+ case AVCOL_TRC_SMPTE170M:
+ return ZIMG_TRANSFER_601;
+ case AVCOL_TRC_SMPTE240M:
+ return ZIMG_TRANSFER_240M;
+ case AVCOL_TRC_LINEAR:
+ return ZIMG_TRANSFER_LINEAR;
+ case AVCOL_TRC_LOG:
+ return ZIMG_TRANSFER_LOG_100;
+ case AVCOL_TRC_LOG_SQRT:
+ return ZIMG_TRANSFER_LOG_316;
+ case AVCOL_TRC_IEC61966_2_4:
+ return ZIMG_TRANSFER_IEC_61966_2_4;
+ case AVCOL_TRC_BT2020_10:
+ return ZIMG_TRANSFER_2020_10;
+ case AVCOL_TRC_BT2020_12:
+ return ZIMG_TRANSFER_2020_12;
+ case AVCOL_TRC_SMPTE2084:
+ return ZIMG_TRANSFER_ST2084;
+ case AVCOL_TRC_ARIB_STD_B67:
+ return ZIMG_TRANSFER_ARIB_B67;
+ case AVCOL_TRC_IEC61966_2_1:
+ return ZIMG_TRANSFER_IEC_61966_2_1;
+ }
+ return ZIMG_TRANSFER_UNSPECIFIED;
+}
+
+static int convert_primaries(enum AVColorPrimaries color_primaries)
+{
+ switch (color_primaries) {
+ case AVCOL_PRI_UNSPECIFIED:
+ return ZIMG_PRIMARIES_UNSPECIFIED;
+ case AVCOL_PRI_BT709:
+ return ZIMG_PRIMARIES_709;
+ case AVCOL_PRI_BT470M:
+ return ZIMG_PRIMARIES_470_M;
+ case AVCOL_PRI_BT470BG:
+ return ZIMG_PRIMARIES_470_BG;
+ case AVCOL_PRI_SMPTE170M:
+ return ZIMG_PRIMARIES_170M;
+ case AVCOL_PRI_SMPTE240M:
+ return ZIMG_PRIMARIES_240M;
+ case AVCOL_PRI_FILM:
+ return ZIMG_PRIMARIES_FILM;
+ case AVCOL_PRI_BT2020:
+ return ZIMG_PRIMARIES_2020;
+ case AVCOL_PRI_SMPTE428:
+ return ZIMG_PRIMARIES_ST428;
+ case AVCOL_PRI_SMPTE431:
+ return ZIMG_PRIMARIES_ST431_2;
+ case AVCOL_PRI_SMPTE432:
+ return ZIMG_PRIMARIES_ST432_1;
+ case AVCOL_PRI_JEDEC_P22:
+ return ZIMG_PRIMARIES_EBU3213_E;
+ }
+ return ZIMG_PRIMARIES_UNSPECIFIED;
+}
+
+static int convert_range(enum AVColorRange color_range)
+{
+ switch (color_range) {
+ case AVCOL_RANGE_UNSPECIFIED:
+ case AVCOL_RANGE_MPEG:
+ return ZIMG_RANGE_LIMITED;
+ case AVCOL_RANGE_JPEG:
+ return ZIMG_RANGE_FULL;
+ }
+ return ZIMG_RANGE_LIMITED;
+}
+
+static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e
color_range)
+{
+ switch (color_range) {
+ case ZIMG_RANGE_LIMITED:
+ return AVCOL_RANGE_MPEG;
+ case ZIMG_RANGE_FULL:
+ return AVCOL_RANGE_JPEG;
+ }
+ return AVCOL_RANGE_UNSPECIFIED;
+}
+
static av_cold int init(AVFilterContext *ctx)
{
ZScaleContext *s = ctx->priv;
int ret;
+ int i;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ s->tmp[i] = NULL;
+ s->graph[i] = NULL;
+ s->alpha_graph[i] = NULL;
+ }
+ zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
+ zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
+ zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
+ zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
+
+ zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
+ zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
+ zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
+ zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
+
+ zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
+ zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
+ zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
+ zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
if (s->size_str && (s->w_expr || s->h_expr)) {
av_log(ctx, AV_LOG_ERROR,
@@ -158,7 +336,6 @@ static av_cold int init(AVFilterContext *ctx)
av_opt_set(s, "w", "iw", 0);
if (!s->h_expr)
av_opt_set(s, "h", "ih", 0);
-
return 0;
}
@@ -194,6 +371,153 @@ static int query_formats(AVFilterContext *ctx)
return ff_formats_ref(ff_make_format_list(pixel_fmts),
&ctx->outputs[0]->incfg.formats);
}
+/* returns 0 if image formats are the same and 1 otherwise */
+static int compare_zimg_image_formats(zimg_image_format *img_fmt0,
zimg_image_format *img_fmt1)
+{
+ return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
+#if ZIMG_API_VERSION >= 0x204
+ (img_fmt0->alpha != img_fmt1->alpha) ||
+#endif
+ (img_fmt0->color_family != img_fmt1->color_family) ||
+ (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
+ (img_fmt0->depth != img_fmt1->depth) ||
+ (img_fmt0->field_parity != img_fmt1->field_parity) ||
+ (img_fmt0->height != img_fmt1->height) ||
+ (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
+ (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
+ (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
+ (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
+ (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
+ (img_fmt0->transfer_characteristics !=
img_fmt1->transfer_characteristics) ||
+ (img_fmt0->width != img_fmt1->width));
+}
+
+/* returns 0 if graph builder parameters are the same and 1 otherwise */
+static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0,
zimg_graph_builder_params *parm1)
+{
+ /* the parameters that could be changed inside a single ffmpeg zscale
invocation are checked only
+ and NaN values that are default for some params are treated properly*/
+ int ret = (parm0->allow_approximate_gamma !=
parm1->allow_approximate_gamma) ||
+ (parm0->dither_type != parm1->dither_type) ||
+ (parm0->resample_filter != parm1->resample_filter) ||
+ (parm0->resample_filter_uv != parm1->resample_filter_uv);
+
+ if ((isnan(parm0->nominal_peak_luminance) == 0) ||
(isnan(parm1->nominal_peak_luminance) == 0))
+ ret = ret || (parm0->nominal_peak_luminance !=
parm1->nominal_peak_luminance);
+ if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a)
== 0))
+ ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
+ if ((isnan(parm0->filter_param_a_uv) == 0) ||
(isnan(parm1->filter_param_a_uv) == 0))
+ ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
+ if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b)
== 0))
+ ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
+ if ((isnan(parm0->filter_param_b_uv) == 0) ||
(isnan(parm1->filter_param_b_uv) == 0))
+ ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
+
+ return ret;
+}
+
+static void format_init(zimg_image_format *format, AVFrame *frame, const
AVPixFmtDescriptor *desc,
+ int colorspace, int primaries, int transfer, int range, int location)
+{
+ format->width = frame->width;
+ format->height = frame->height;
+ format->subsample_w = desc->log2_chroma_w;
+ format->subsample_h = desc->log2_chroma_h;
+ format->depth = desc->comp[0].depth;
+ format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT :
desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+ format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ?
ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
+ format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ?
ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
+ format->color_primaries = primaries == -1 ?
convert_primaries(frame->color_primaries) : primaries;
+ format->transfer_characteristics = transfer == -1 ?
convert_trc(frame->color_trc) : transfer;
+ format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL :
range == -1 ? convert_range(frame->color_range) : range;
+ format->chroma_location = location == -1 ?
convert_chroma_location(frame->chroma_location) : location;
+}