From: Niklas Haas <g...@haasn.dev> Provides a generic fast path for any operation list that can be decomposed into a series of memcpy and memset operations.
25% faster than the x86 backend for yuv444p -> yuva444p 33% faster than the x86 backend for gray -> yuvj444p --- libswscale/Makefile | 1 + libswscale/ops.c | 2 + libswscale/ops_memcpy.c | 132 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+) create mode 100644 libswscale/ops_memcpy.c diff --git a/libswscale/Makefile b/libswscale/Makefile index 6e5696c5a6..136d33f6bc 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -18,6 +18,7 @@ OBJS = alphablend.o \ ops.o \ ops_backend.o \ ops_chain.o \ + ops_memcpy.o \ ops_optimizer.o \ options.o \ output.o \ diff --git a/libswscale/ops.c b/libswscale/ops.c index 3b9c2844f8..6403eff324 100644 --- a/libswscale/ops.c +++ b/libswscale/ops.c @@ -28,8 +28,10 @@ #include "ops_internal.h" extern SwsOpBackend backend_c; +extern SwsOpBackend backend_murder; const SwsOpBackend * const ff_sws_op_backends[] = { + &backend_murder, &backend_c, NULL }; diff --git a/libswscale/ops_memcpy.c b/libswscale/ops_memcpy.c new file mode 100644 index 0000000000..1fcb58d452 --- /dev/null +++ b/libswscale/ops_memcpy.c @@ -0,0 +1,132 @@ +/** + * Copyright (C) 2025 Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" + +#include "ops_backend.h" + +typedef struct MemcpyPriv { + int num_planes; + int index[4]; /* or -1 to clear plane */ + uint8_t clear_value[4]; +} MemcpyPriv; + +/* Memcpy backend for trivial cases */ + +static void process(const SwsOpExec *exec, const void *priv, + int x_start, int y_start, int x_end, int y_end) +{ + const MemcpyPriv *p = priv; + const int lines = y_end - y_start; + av_assert1(x_start == 0 && x_end == exec->width); + + for (int i = 0; i < p->num_planes; i++) { + uint8_t *out = exec->out[i]; + const int idx = p->index[i]; + if (idx < 0) { + memset(out, p->clear_value[i], exec->out_stride[i] * lines); + } else if (exec->out_stride[i] == exec->in_stride[idx]) { + memcpy(out, exec->in[idx], exec->out_stride[i] * lines); + } else { + const int bytes = x_end * exec->pixel_bits_out >> 3; + const uint8_t *in = exec->in[idx]; + for (int y = y_start; y < y_end; y++) { + memcpy(out, in, bytes); + out += exec->out_stride[i]; + in += exec->in_stride[idx]; + } + } + } +} + +static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out) +{ + MemcpyPriv p = {0}; + + for (int n = 0; n < ops->num_ops; n++) { + const SwsOp *op = &ops->ops[n]; + switch (op->op) { + case SWS_OP_READ: + if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac) + return AVERROR(ENOTSUP); + for (int i = 0; i < op->rw.elems; i++) + p.index[i] = i; + break; + + case SWS_OP_SWIZZLE: { + const MemcpyPriv orig = p; + for (int i = 0; i < 4; i++) { + /* Explicitly exclude swizzle masks that contain duplicates, + * because these are wasteful to implement as a memcpy */ + for (int j = 0; j < i; j++) { + if (op->swizzle.in[i] == op->swizzle.in[j]) + return AVERROR(ENOTSUP); + } + p.index[i] = orig.index[op->swizzle.in[i]]; + } + break; + } + + case SWS_OP_CLEAR: + for (int i = 0; i < 4; i++) { + if (!op->c.q4[i].den) + continue; + if (op->c.q4[i].den != 1) + return AVERROR(ENOTSUP); + + /* Ensure all bytes to be cleared are the same, because we + * can't memset on multi-byte sequences */ + uint8_t val = op->c.q4[i].num & 0xFF; + uint32_t ref = val; + switch (ff_sws_pixel_type_size(op->type)) { + case 2: ref *= 0x101; break; + case 4: ref *= 0x1010101; break; + } + if (ref != op->c.q4[i].num) + return AVERROR(ENOTSUP); + p.clear_value[i] = val; + p.index[i] = -1; + } + break; + + case SWS_OP_WRITE: + if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac) + return AVERROR(ENOTSUP); + p.num_planes = op->rw.elems; + break; + + default: + return AVERROR(ENOTSUP); + } + } + + *out = (SwsCompiledOp) { + .block_size = 1, + .func = process, + .priv = av_memdup(&p, sizeof(p)), + .free = av_free, + }; + return out->priv ? 0 : AVERROR(ENOMEM); +} + +SwsOpBackend backend_murder = { + .name = "memcpy", + .compile = compile, +}; -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".