This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit b7a80a9f0df760fbd5960374d25d450d7040f810 Author: Niklas Haas <[email protected]> AuthorDate: Thu Jun 4 00:12:04 2026 +0200 Commit: Niklas Haas <[email protected]> CommitDate: Tue Jun 9 18:27:20 2026 +0200 swscale/ops_backend: delete ops-based C backend And make uops_backend.c the new reference. Signed-off-by: Niklas Haas <[email protected]> --- libswscale/Makefile | 1 - libswscale/ops.c | 2 - libswscale/ops_backend.c | 117 -------- libswscale/ops_backend.h | 162 ----------- libswscale/ops_tmpl_common.c | 346 ------------------------ libswscale/ops_tmpl_float.c | 273 ------------------- libswscale/ops_tmpl_int.c | 619 ------------------------------------------- libswscale/uops_backend.c | 4 +- 8 files changed, 2 insertions(+), 1522 deletions(-) diff --git a/libswscale/Makefile b/libswscale/Makefile index f69b39972a..2f08bc36bc 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -30,7 +30,6 @@ OBJS = alphablend.o \ OBJS-$(CONFIG_UNSTABLE) += \ ops.o \ - ops_backend.o \ ops_chain.o \ ops_dispatch.o \ ops_memcpy.o \ diff --git a/libswscale/ops.c b/libswscale/ops.c index 719198e116..b28dbec75f 100644 --- a/libswscale/ops.c +++ b/libswscale/ops.c @@ -32,7 +32,6 @@ #include "ops_internal.h" extern const SwsOpBackend backend_c; -extern const SwsOpBackend backend_uops; extern const SwsOpBackend backend_murder; extern const SwsOpBackend backend_aarch64; extern const SwsOpBackend backend_x86; @@ -50,7 +49,6 @@ const SwsOpBackend * const ff_sws_op_backends[] = { #elif ARCH_X86_64 && HAVE_X86ASM &backend_x86, #endif - &backend_uops, &backend_c, #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H &backend_spirv, diff --git a/libswscale/ops_backend.c b/libswscale/ops_backend.c deleted file mode 100644 index 254814ee37..0000000000 --- a/libswscale/ops_backend.c +++ /dev/null @@ -1,117 +0,0 @@ -/** - * Copyright (C) 2025 Niklas Haas - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "ops_backend.h" - -/** - * We want to disable FP contraction because this is a reference backend that - * establishes a bit-exact reference result. - */ -#ifdef __clang__ -#pragma STDC FP_CONTRACT OFF -#elif AV_GCC_VERSION_AT_LEAST(4, 8) -#pragma GCC optimize ("fp-contract=off") -#elif defined(_MSC_VER) -#pragma fp_contract (off) -#endif - -#if AV_GCC_VERSION_AT_LEAST(4, 4) -#pragma GCC optimize ("finite-math-only") -#endif - -/* Array-based reference implementation */ - -#ifndef SWS_BLOCK_SIZE -# define SWS_BLOCK_SIZE 32 -#endif - -typedef uint8_t u8block_t[SWS_BLOCK_SIZE]; -typedef uint16_t u16block_t[SWS_BLOCK_SIZE]; -typedef uint32_t u32block_t[SWS_BLOCK_SIZE]; -typedef float f32block_t[SWS_BLOCK_SIZE]; - -#define BIT_DEPTH 8 -# include "ops_tmpl_int.c" -#undef BIT_DEPTH - -#define BIT_DEPTH 16 -# include "ops_tmpl_int.c" -#undef BIT_DEPTH - -#define BIT_DEPTH 32 -# include "ops_tmpl_int.c" -# include "ops_tmpl_float.c" -#undef BIT_DEPTH - -static const SwsOpTable *const tables[] = { - &bitfn(op_table_int, u8), - &bitfn(op_table_int, u16), - &bitfn(op_table_int, u32), - &bitfn(op_table_float, f32), -}; - -static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out) -{ - int ret; - - SwsOpChain *chain = ff_sws_op_chain_alloc(); - if (!chain) - return AVERROR(ENOMEM); - - av_assert0(ops->num_ops > 0); - const SwsPixelType read_type = ops->ops[0].type; - - for (int i = 0; i < ops->num_ops; i++) { - ret = ff_sws_op_compile_tables(ctx, tables, FF_ARRAY_ELEMS(tables), - &ops->ops[i], SWS_BLOCK_SIZE, chain); - if (ret < 0) { - av_log(ctx, AV_LOG_TRACE, "Failed to compile op %d\n", i); - ff_sws_op_chain_free(chain); - return ret; - } - } - - *out = (SwsCompiledOp) { - .slice_align = 1, - .block_size = SWS_BLOCK_SIZE, - .cpu_flags = chain->cpu_flags, - .over_read = chain->over_read, - .over_write = chain->over_write, - .priv = chain, - .free = ff_sws_op_chain_free_cb, - }; - - switch (read_type) { - case SWS_PIXEL_U8: out->func = process_u8; break; - case SWS_PIXEL_U16: out->func = process_u16; break; - case SWS_PIXEL_U32: out->func = process_u32; break; - case SWS_PIXEL_F32: out->func = process_f32; break; - default: av_unreachable("Invalid pixel type!"); - } - - return 0; -} - -const SwsOpBackend backend_c = { - .name = "c", - .flags = SWS_BACKEND_C, - .compile = compile, - .hw_format = AV_PIX_FMT_NONE, -}; diff --git a/libswscale/ops_backend.h b/libswscale/ops_backend.h deleted file mode 100644 index 82eb92fc36..0000000000 --- a/libswscale/ops_backend.h +++ /dev/null @@ -1,162 +0,0 @@ -/** - * Copyright (C) 2025 Niklas Haas - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef SWSCALE_OPS_BACKEND_H -#define SWSCALE_OPS_BACKEND_H - -/** - * Helper macros for the C-based backend. - * - * To use these macros, the following types must be defined: - * - PIXEL_TYPE should be one of SWS_PIXEL_* - * - pixel_t should be the type of pixels - * - block_t should be the type of blocks (groups of pixels) - */ - -#include <assert.h> -#include <float.h> -#include <stdint.h> - -#include "libavutil/attributes.h" -#include "libavutil/mem.h" - -#include "ops_chain.h" - -/** - * Internal context holding per-iter execution data. The data pointers will be - * directly incremented by the corresponding read/write functions. - */ -typedef struct SwsOpIter { - uintptr_t in[4]; - uintptr_t out[4]; - int x, y; - - /* Link back to per-slice execution context */ - const SwsOpExec *exec; -} SwsOpIter; - -#ifdef __clang__ -# define SWS_LOOP AV_PRAGMA(clang loop vectorize(assume_safety)) -#elif defined(__GNUC__) -# define SWS_LOOP AV_PRAGMA(GCC ivdep) -#else -# define SWS_LOOP -#endif - -/* Miscellaneous helpers */ -#define bitfn2(name, ext) name ## _ ## ext -#define bitfn(name, ext) bitfn2(name, ext) - -#define FN_SUFFIX AV_JOIN(FMT_CHAR, BIT_DEPTH) -#define fn(name) bitfn(name, FN_SUFFIX) - -#define av_q2pixel(q) ((q).den ? (pixel_t) (q).num / (q).den : 0) -#define bump_ptr(ptr, bump) ((pixel_t *) ((uintptr_t) (ptr) + (bump))) - -/* Helper macros to make writing common function signatures less painful */ -#define DECL_FUNC(NAME, ...) \ - static av_always_inline void fn(NAME)(SwsOpIter *restrict iter, \ - const SwsOpImpl *restrict impl, \ - block_t x, block_t y, \ - block_t z, block_t w, \ - __VA_ARGS__) - -#define DECL_READ(NAME, ...) \ - DECL_FUNC(NAME, const pixel_t *restrict in0, const pixel_t *restrict in1, \ - const pixel_t *restrict in2, const pixel_t *restrict in3, \ - __VA_ARGS__) - -#define DECL_WRITE(NAME, ...) \ - DECL_FUNC(NAME, pixel_t *restrict out0, pixel_t *restrict out1, \ - pixel_t *restrict out2, pixel_t *restrict out3, \ - __VA_ARGS__) - -/* Helper macros to call into functions declared with DECL_FUNC_* */ -#define CALL(FUNC, ...) \ - fn(FUNC)(iter, impl, x, y, z, w, __VA_ARGS__) - -#define CALL_READ(FUNC, ...) \ - CALL(FUNC, (const pixel_t *) iter->in[0], (const pixel_t *) iter->in[1], \ - (const pixel_t *) iter->in[2], (const pixel_t *) iter->in[3], \ - __VA_ARGS__) - -#define CALL_WRITE(FUNC, ...) \ - CALL(FUNC, (pixel_t *) iter->out[0], (pixel_t *) iter->out[1], \ - (pixel_t *) iter->out[2], (pixel_t *) iter->out[3], __VA_ARGS__) - -/* Helper macros to declare continuation functions */ -#define DECL_IMPL(FUNC, NAME, ...) \ - static void av_flatten fn(NAME)(SwsOpIter *restrict iter, \ - const SwsOpImpl *restrict impl, \ - void *restrict x, void *restrict y, \ - void *restrict z, void *restrict w) \ - { \ - CALL(FUNC, __VA_ARGS__); \ - } - -/* Helper macro to call into the next continuation */ -#define CONTINUE(X, Y, Z, W) \ - ((void (*)(SwsOpIter *, const SwsOpImpl *, \ - void *restrict, void *restrict, \ - void *restrict, void *restrict)) impl->cont) \ - (iter, &impl[1], (X), (Y), (Z), (W)) - -/* Helper macros for common op setup code */ -#define DECL_SETUP(NAME, PARAMS, OUT) \ - static int fn(NAME)(const SwsImplParams *PARAMS, SwsImplResult *OUT) - -#define SETUP_MEMDUP(c, out) ff_setup_memdup(&(c), sizeof(c), out) -static inline int ff_setup_memdup(const void *c, size_t size, SwsImplResult *out) -{ - out->priv.ptr = av_memdup(c, size); - out->free = ff_op_priv_free; - return out->priv.ptr ? 0 : AVERROR(ENOMEM); -} - -/* Helper macro for declaring op table entries */ -#define DECL_ENTRY(NAME, MASK, ...) \ - static const SwsOpEntry fn(op_##NAME) = { \ - .func = (SwsFuncPtr) fn(NAME), \ - .type = PIXEL_TYPE, \ - .mask = (MASK), \ - __VA_ARGS__ \ - } - -/* Helpers to define functions for common subsets of components */ -#define DECL_PATTERN(NAME) \ - DECL_FUNC(NAME, const bool X, const bool Y, const bool Z, const bool W) - -#define WRAP_PATTERN(FUNC, X, Y, Z, W, ...) \ - DECL_IMPL(FUNC, FUNC##_##X##Y##Z##W, X, Y, Z, W) \ - DECL_ENTRY(FUNC##_##X##Y##Z##W, SWS_COMP_MASK(X, Y, Z, W), __VA_ARGS__) - -#define WRAP_COMMON_PATTERNS(FUNC, ...) \ - WRAP_PATTERN(FUNC, 1, 0, 0, 0, __VA_ARGS__); \ - WRAP_PATTERN(FUNC, 1, 0, 0, 1, __VA_ARGS__); \ - WRAP_PATTERN(FUNC, 1, 1, 1, 0, __VA_ARGS__); \ - WRAP_PATTERN(FUNC, 1, 1, 1, 1, __VA_ARGS__) - -#define REF_COMMON_PATTERNS(NAME) \ - &fn(op_##NAME##_1000), \ - &fn(op_##NAME##_1001), \ - &fn(op_##NAME##_1110), \ - &fn(op_##NAME##_1111) - -#endif diff --git a/libswscale/ops_tmpl_common.c b/libswscale/ops_tmpl_common.c deleted file mode 100644 index 0d056ca08e..0000000000 --- a/libswscale/ops_tmpl_common.c +++ /dev/null @@ -1,346 +0,0 @@ -/** - * Copyright (C) 2025 Niklas Haas - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "ops_backend.h" - -#ifndef BIT_DEPTH -# error Should only be included from ops_tmpl_*.c! -#endif - -#define WRAP_CONVERT_UINT(N) \ -DECL_PATTERN(convert_uint##N) \ -{ \ - u##N##block_t xu, yu, zu, wu; \ - \ - SWS_LOOP \ - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { \ - if (X) \ - xu[i] = x[i]; \ - if (Y) \ - yu[i] = y[i]; \ - if (Z) \ - zu[i] = z[i]; \ - if (W) \ - wu[i] = w[i]; \ - } \ - \ - CONTINUE(xu, yu, zu, wu); \ -} \ - \ -WRAP_COMMON_PATTERNS(convert_uint##N, \ - .op = SWS_OP_CONVERT, \ - .convert.to = SWS_PIXEL_U##N, \ -); - -#if BIT_DEPTH != 8 -WRAP_CONVERT_UINT(8) -#endif - -#if BIT_DEPTH != 16 -WRAP_CONVERT_UINT(16) -#endif - -#if BIT_DEPTH != 32 || defined(IS_FLOAT) -WRAP_CONVERT_UINT(32) -#endif - -DECL_PATTERN(clear) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - if (X) - x[i] = impl->priv.px[0]; - if (Y) - y[i] = impl->priv.px[1]; - if (Z) - z[i] = impl->priv.px[2]; - if (W) - w[i] = impl->priv.px[3]; - } - - CONTINUE(x, y, z, w); -} - -#define WRAP_CLEAR(X, Y, Z, W) \ -DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W) \ - \ -DECL_ENTRY(clear##_##X##Y##Z##W, SWS_COMP_ALL, \ - .setup = ff_sws_setup_clear, \ - .op = SWS_OP_CLEAR, \ - .clear.mask = SWS_COMP_MASK(X, Y, Z, W), \ -); - -WRAP_CLEAR(0, 0, 0, 1) /* rgba alpha */ -WRAP_CLEAR(1, 0, 0, 0) /* argb alpha */ -WRAP_CLEAR(0, 1, 0, 0) /* ya alpha */ - -WRAP_CLEAR(1, 1, 0, 0) /* vuya chroma */ -WRAP_CLEAR(0, 1, 1, 0) /* yuva chroma */ -WRAP_CLEAR(0, 0, 1, 1) /* ayuv chroma */ -WRAP_CLEAR(1, 0, 1, 0) /* uyva chroma */ -WRAP_CLEAR(0, 1, 0, 1) /* xvyu chroma */ - -WRAP_CLEAR(0, 1, 1, 1) /* gray -> yuva */ -WRAP_CLEAR(1, 0, 1, 1) /* gray -> ayuv */ -WRAP_CLEAR(1, 1, 0, 1) /* gray -> vuya */ - -DECL_PATTERN(min) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - if (X) - x[i] = FFMIN(x[i], impl->priv.px[0]); - if (Y) - y[i] = FFMIN(y[i], impl->priv.px[1]); - if (Z) - z[i] = FFMIN(z[i], impl->priv.px[2]); - if (W) - w[i] = FFMIN(w[i], impl->priv.px[3]); - } - - CONTINUE(x, y, z, w); -} - -DECL_PATTERN(max) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - if (X) - x[i] = FFMAX(x[i], impl->priv.px[0]); - if (Y) - y[i] = FFMAX(y[i], impl->priv.px[1]); - if (Z) - z[i] = FFMAX(z[i], impl->priv.px[2]); - if (W) - w[i] = FFMAX(w[i], impl->priv.px[3]); - } - - CONTINUE(x, y, z, w); -} - -WRAP_COMMON_PATTERNS(min, - .op = SWS_OP_MIN, - .setup = ff_sws_setup_clamp, -); - -WRAP_COMMON_PATTERNS(max, - .op = SWS_OP_MAX, - .setup = ff_sws_setup_clamp, -); - -DECL_PATTERN(scale) -{ - const pixel_t scale = impl->priv.px[0]; - - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - if (X) - x[i] *= scale; - if (Y) - y[i] *= scale; - if (Z) - z[i] *= scale; - if (W) - w[i] *= scale; - } - - CONTINUE(x, y, z, w); -} - -WRAP_COMMON_PATTERNS(scale, - .op = SWS_OP_SCALE, - .setup = ff_sws_setup_scale, - .flexible = true, -); - -DECL_SETUP(setup_filter_v, params, out) -{ - const SwsFilterWeights *filter = params->op->rw.kernel; - static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]), - ">8 byte pointers not supported"); - - /* Pre-convert weights to float */ - float *weights = av_calloc(filter->num_weights, sizeof(float)); - if (!weights) - return AVERROR(ENOMEM); - - for (int i = 0; i < filter->num_weights; i++) - weights[i] = (float) filter->weights[i] / SWS_FILTER_SCALE; - - out->priv.ptr = weights; - out->priv.i32[2] = filter->filter_size; - out->free = ff_op_priv_free; - return 0; -} - -/* Fully general vertical planar filter case */ -DECL_READ(filter_v, const int elems) -{ - const SwsOpExec *exec = iter->exec; - const float *restrict weights = impl->priv.ptr; - const int filter_size = impl->priv.i32[2]; - weights += filter_size * iter->y; - - f32block_t xs, ys, zs, ws; - memset(xs, 0, sizeof(xs)); - if (elems > 1) - memset(ys, 0, sizeof(ys)); - if (elems > 2) - memset(zs, 0, sizeof(zs)); - if (elems > 3) - memset(ws, 0, sizeof(ws)); - - for (int j = 0; j < filter_size; j++) { - const float weight = weights[j]; - - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - xs[i] += weight * in0[i]; - if (elems > 1) - ys[i] += weight * in1[i]; - if (elems > 2) - zs[i] += weight * in2[i]; - if (elems > 3) - ws[i] += weight * in3[i]; - } - - in0 = bump_ptr(in0, exec->in_stride[0]); - if (elems > 1) - in1 = bump_ptr(in1, exec->in_stride[1]); - if (elems > 2) - in2 = bump_ptr(in2, exec->in_stride[2]); - if (elems > 3) - in3 = bump_ptr(in3, exec->in_stride[3]); - } - - for (int i = 0; i < elems; i++) - iter->in[i] += sizeof(block_t); - - CONTINUE(xs, ys, zs, ws); -} - -DECL_SETUP(setup_filter_h, params, out) -{ - SwsFilterWeights *filter = params->op->rw.kernel; - out->priv.ptr = av_refstruct_ref(filter->weights); - out->priv.i32[2] = filter->filter_size; - out->free = ff_op_priv_unref; - return 0; -} - -/* Fully general horizontal planar filter case */ -DECL_READ(filter_h, const int elems) -{ - const SwsOpExec *exec = iter->exec; - const int *restrict weights = impl->priv.ptr; - const int filter_size = impl->priv.i32[2]; - const float scale = 1.0f / SWS_FILTER_SCALE; - const int xpos = iter->x; - weights += filter_size * iter->x; - - f32block_t xs, ys, zs, ws; - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - const int offset = exec->in_offset_x[xpos + i]; - pixel_t *start0 = bump_ptr(in0, offset); - pixel_t *start1 = bump_ptr(in1, offset); - pixel_t *start2 = bump_ptr(in2, offset); - pixel_t *start3 = bump_ptr(in3, offset); - - inter_t sx = 0, sy = 0, sz = 0, sw = 0; - for (int j = 0; j < filter_size; j++) { - const int weight = weights[j]; - sx += weight * start0[j]; - if (elems > 1) - sy += weight * start1[j]; - if (elems > 2) - sz += weight * start2[j]; - if (elems > 3) - sw += weight * start3[j]; - } - - xs[i] = (float) sx * scale; - if (elems > 1) - ys[i] = (float) sy * scale; - if (elems > 2) - zs[i] = (float) sz * scale; - if (elems > 3) - ws[i] = (float) sw * scale; - - weights += filter_size; - } - - CONTINUE(xs, ys, zs, ws); -} - -#define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX) \ -static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter, \ - const SwsOpImpl *restrict impl, \ - void *restrict x, void *restrict y,\ - void *restrict z, void *restrict w)\ -{ \ - CALL_READ(FUNC##SUFFIX, ELEMS); \ -} \ - \ -DECL_ENTRY(FUNC##ELEMS##SUFFIX, SWS_COMP_ELEMS(ELEMS), \ - .op = SWS_OP_READ, \ - .setup = fn(setup_filter##SUFFIX), \ - .rw.elems = ELEMS, \ - .rw.filter = SWS_OP_FILTER_##DIR, \ -); - -WRAP_FILTER(filter, V, 1, _v) -WRAP_FILTER(filter, V, 2, _v) -WRAP_FILTER(filter, V, 3, _v) -WRAP_FILTER(filter, V, 4, _v) - -WRAP_FILTER(filter, H, 1, _h) -WRAP_FILTER(filter, H, 2, _h) -WRAP_FILTER(filter, H, 3, _h) -WRAP_FILTER(filter, H, 4, _h) - -static void fn(process)(const SwsOpExec *exec, const void *priv, - const int bx_start, const int y_start, - int bx_end, int y_end) -{ - const SwsOpChain *chain = priv; - const SwsOpImpl *impl = chain->impl; - u32block_t x, y, z, w; /* allocate enough space for any intermediate */ - - SwsOpIter iterdata; - SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */ - iter->exec = exec; - for (int i = 0; i < 4; i++) { - iter->in[i] = (uintptr_t) exec->in[i]; - iter->out[i] = (uintptr_t) exec->out[i]; - } - - for (iter->y = y_start; iter->y < y_end; iter->y++) { - for (int block = bx_start; block < bx_end; block++) { - iter->x = block * SWS_BLOCK_SIZE; - CONTINUE(x, y, z, w); - } - - const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0; - for (int i = 0; i < 4; i++) { - iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i]; - iter->out[i] += exec->out_bump[i]; - } - } -} diff --git a/libswscale/ops_tmpl_float.c b/libswscale/ops_tmpl_float.c deleted file mode 100644 index 57f6d8c399..0000000000 --- a/libswscale/ops_tmpl_float.c +++ /dev/null @@ -1,273 +0,0 @@ -/** - * Copyright (C) 2025 Niklas Haas - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/avassert.h" - -#include "ops_backend.h" - -#ifndef BIT_DEPTH -# define BIT_DEPTH 32 -#endif - -#if BIT_DEPTH == 32 -# define PIXEL_TYPE SWS_PIXEL_F32 -# define PIXEL_MAX FLT_MAX -# define pixel_t float -# define inter_t float -# define block_t f32block_t -# define px f32 -#else -# error Invalid BIT_DEPTH -#endif - -#define IS_FLOAT 1 -#define FMT_CHAR f -#include "ops_tmpl_common.c" - -DECL_SETUP(setup_dither, params, out) -{ - const SwsOp *op = params->op; - const int size = 1 << op->dither.size_log2; - const int width = FFMAX(size, SWS_BLOCK_SIZE); - pixel_t *matrix = out->priv.ptr = av_malloc(sizeof(pixel_t) * size * width); - if (!matrix) - return AVERROR(ENOMEM); - out->free = ff_op_priv_free; - - static_assert(sizeof(out->priv.ptr) <= sizeof(uint8_t[8]), - ">8 byte pointers not supported"); - - int8_t *offset = &out->priv.i8[8]; - for (int i = 0; i < 4; i++) - offset[i] = op->dither.y_offset[i]; - - for (int y = 0; y < size; y++) { - for (int x = 0; x < size; x++) - matrix[y * width + x] = av_q2pixel(op->dither.matrix[y * size + x]); - for (int x = size; x < width; x++) /* pad to block size */ - matrix[y * width + x] = matrix[y * width + (x % size)]; - } - - return 0; -} - -DECL_FUNC(dither, const int size_log2) -{ - const pixel_t *restrict matrix = impl->priv.ptr; - const int8_t *restrict offset = &impl->priv.i8[8]; - const int mask = (1 << size_log2) - 1; - const int y_line = iter->y; - const int size = 1 << size_log2; - const int width = FFMAX(size, SWS_BLOCK_SIZE); - const int base = iter->x & ~(SWS_BLOCK_SIZE - 1) & (size - 1); - -#define DITHER_COMP(VAR, IDX) \ - if (offset[IDX] >= 0) { \ - const int row = (y_line + offset[IDX]) & mask; \ - SWS_LOOP \ - for (int i = 0; i < SWS_BLOCK_SIZE; i++) \ - VAR[i] += matrix[row * width + base + i]; \ - } - - DITHER_COMP(x, 0) - DITHER_COMP(y, 1) - DITHER_COMP(z, 2) - DITHER_COMP(w, 3) - - CONTINUE(x, y, z, w); -} - -#define WRAP_DITHER(N) \ -DECL_IMPL(dither, dither##N, N) \ - \ -DECL_ENTRY(dither##N, SWS_COMP_ALL, \ - .op = SWS_OP_DITHER, \ - .dither_size = N, \ - .setup = fn(setup_dither), \ -); - -WRAP_DITHER(0) -WRAP_DITHER(1) -WRAP_DITHER(2) -WRAP_DITHER(3) -WRAP_DITHER(4) -WRAP_DITHER(5) -WRAP_DITHER(6) -WRAP_DITHER(7) -WRAP_DITHER(8) - -typedef struct { - /* Stored in split form for convenience */ - pixel_t m[4][4]; - pixel_t k[4]; -} fn(LinCoeffs); - -DECL_SETUP(setup_linear, params, out) -{ - const SwsOp *op = params->op; - fn(LinCoeffs) c; - - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) - c.m[i][j] = av_q2pixel(op->lin.m[i][j]); - c.k[i] = av_q2pixel(op->lin.m[i][4]); - } - - return SETUP_MEMDUP(c, out); -} - -/** - * Fully general case for a 5x5 linear affine transformation. Should never be - * called without constant `mask`. This function will compile down to the - * appropriately optimized version for the required subset of operations when - * called with a constant mask. - */ -DECL_FUNC(linear_mask, const uint32_t mask) -{ - const fn(LinCoeffs) c = *(const fn(LinCoeffs) *) impl->priv.ptr; - - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - const pixel_t xx = x[i]; - const pixel_t yy = y[i]; - const pixel_t zz = z[i]; - const pixel_t ww = w[i]; - - x[i] = (mask & SWS_MASK_OFF(0)) ? c.k[0] : 0; - x[i] += (mask & SWS_MASK(0, 0)) ? c.m[0][0] * xx : xx; - x[i] += (mask & SWS_MASK(0, 1)) ? c.m[0][1] * yy : 0; - x[i] += (mask & SWS_MASK(0, 2)) ? c.m[0][2] * zz : 0; - x[i] += (mask & SWS_MASK(0, 3)) ? c.m[0][3] * ww : 0; - - y[i] = (mask & SWS_MASK_OFF(1)) ? c.k[1] : 0; - y[i] += (mask & SWS_MASK(1, 0)) ? c.m[1][0] * xx : 0; - y[i] += (mask & SWS_MASK(1, 1)) ? c.m[1][1] * yy : yy; - y[i] += (mask & SWS_MASK(1, 2)) ? c.m[1][2] * zz : 0; - y[i] += (mask & SWS_MASK(1, 3)) ? c.m[1][3] * ww : 0; - - z[i] = (mask & SWS_MASK_OFF(2)) ? c.k[2] : 0; - z[i] += (mask & SWS_MASK(2, 0)) ? c.m[2][0] * xx : 0; - z[i] += (mask & SWS_MASK(2, 1)) ? c.m[2][1] * yy : 0; - z[i] += (mask & SWS_MASK(2, 2)) ? c.m[2][2] * zz : zz; - z[i] += (mask & SWS_MASK(2, 3)) ? c.m[2][3] * ww : 0; - - w[i] = (mask & SWS_MASK_OFF(3)) ? c.k[3] : 0; - w[i] += (mask & SWS_MASK(3, 0)) ? c.m[3][0] * xx : 0; - w[i] += (mask & SWS_MASK(3, 1)) ? c.m[3][1] * yy : 0; - w[i] += (mask & SWS_MASK(3, 2)) ? c.m[3][2] * zz : 0; - w[i] += (mask & SWS_MASK(3, 3)) ? c.m[3][3] * ww : ww; - } - - CONTINUE(x, y, z, w); -} - -#define WRAP_LINEAR(NAME, MASK) \ -DECL_IMPL(linear_mask, linear_##NAME, MASK) \ - \ -DECL_ENTRY(linear_##NAME, SWS_COMP_ALL, \ - .op = SWS_OP_LINEAR, \ - .setup = fn(setup_linear), \ - .linear_mask = (MASK), \ -); - -WRAP_LINEAR(luma, SWS_MASK_LUMA) -WRAP_LINEAR(alpha, SWS_MASK_ALPHA) -WRAP_LINEAR(lumalpha, SWS_MASK_LUMA | SWS_MASK_ALPHA) -WRAP_LINEAR(yalpha, SWS_MASK(1, 1)) /* ya alpha */ -WRAP_LINEAR(dot3, 0x7) -WRAP_LINEAR(dot3a, 0x7 | SWS_MASK_ALPHA) -WRAP_LINEAR(row0, SWS_MASK_ROW(0) ^ SWS_MASK(0, 3)) /* row0 sans alpha */ -WRAP_LINEAR(diag3, SWS_MASK_DIAG3) -WRAP_LINEAR(diag4, SWS_MASK_DIAG4) -WRAP_LINEAR(diagoff3, SWS_MASK_DIAG3 | SWS_MASK_OFF3) -WRAP_LINEAR(affine3, SWS_MASK_MAT3 | SWS_MASK_OFF3) -WRAP_LINEAR(affine3uv, SWS_MASK_MAT3 | SWS_MASK_OFF(1) | SWS_MASK_OFF(2)) -WRAP_LINEAR(affine3x, SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3) -WRAP_LINEAR(affine3xa, SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3 | SWS_MASK_ALPHA) -WRAP_LINEAR(affine3xy, SWS_MASK_MAT3 ^ SWS_MASK(0, 0) ^ SWS_MASK(0, 1) | SWS_MASK_OFF3) -WRAP_LINEAR(affine3a, SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA) - -static const SwsOpTable fn(op_table_float) = { - .block_size = SWS_BLOCK_SIZE, - .entries = { - REF_COMMON_PATTERNS(convert_uint8), - REF_COMMON_PATTERNS(convert_uint16), - REF_COMMON_PATTERNS(convert_uint32), - - &fn(op_clear_0001), - REF_COMMON_PATTERNS(min), - REF_COMMON_PATTERNS(max), - REF_COMMON_PATTERNS(scale), - - &fn(op_dither0), - &fn(op_dither1), - &fn(op_dither2), - &fn(op_dither3), - &fn(op_dither4), - &fn(op_dither5), - &fn(op_dither6), - &fn(op_dither7), - &fn(op_dither8), - - &fn(op_clear_0001), - &fn(op_clear_1000), - &fn(op_clear_1100), - &fn(op_clear_0100), - - &fn(op_linear_luma), - &fn(op_linear_alpha), - &fn(op_linear_lumalpha), - &fn(op_linear_yalpha), - &fn(op_linear_dot3), - &fn(op_linear_dot3a), - &fn(op_linear_row0), - &fn(op_linear_diag3), - &fn(op_linear_diag4), - &fn(op_linear_diagoff3), - &fn(op_linear_affine3), - &fn(op_linear_affine3uv), - &fn(op_linear_affine3x), - &fn(op_linear_affine3xa), - &fn(op_linear_affine3xy), - &fn(op_linear_affine3a), - - &fn(op_filter1_v), - &fn(op_filter2_v), - &fn(op_filter3_v), - &fn(op_filter4_v), - - &fn(op_filter1_h), - &fn(op_filter2_h), - &fn(op_filter3_h), - &fn(op_filter4_h), - - NULL - }, -}; - -#undef PIXEL_TYPE -#undef PIXEL_MAX -#undef pixel_t -#undef inter_t -#undef block_t -#undef px - -#undef FMT_CHAR -#undef IS_FLOAT diff --git a/libswscale/ops_tmpl_int.c b/libswscale/ops_tmpl_int.c deleted file mode 100644 index 3e98c1f5ae..0000000000 --- a/libswscale/ops_tmpl_int.c +++ /dev/null @@ -1,619 +0,0 @@ -/** - * Copyright (C) 2025 Niklas Haas - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/avassert.h" -#include "libavutil/bswap.h" - -#include "ops_backend.h" - -#ifndef BIT_DEPTH -# define BIT_DEPTH 8 -#endif - -#if BIT_DEPTH == 32 -# define PIXEL_TYPE SWS_PIXEL_U32 -# define PIXEL_MAX 0xFFFFFFFFu -# define SWAP_BYTES av_bswap32 -# define pixel_t uint32_t -# define inter_t int64_t -# define block_t u32block_t -# define px u32 -#elif BIT_DEPTH == 16 -# define PIXEL_TYPE SWS_PIXEL_U16 -# define PIXEL_MAX 0xFFFFu -# define SWAP_BYTES av_bswap16 -# define pixel_t uint16_t -# define inter_t int64_t -# define block_t u16block_t -# define px u16 -#elif BIT_DEPTH == 8 -# define PIXEL_TYPE SWS_PIXEL_U8 -# define PIXEL_MAX 0xFFu -# define pixel_t uint8_t -# define inter_t int32_t -# define block_t u8block_t -# define px u8 -#else -# error Invalid BIT_DEPTH -#endif - -#define IS_FLOAT 0 -#define FMT_CHAR u -#include "ops_tmpl_common.c" - -DECL_READ(read_planar, const int elems) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - x[i] = in0[i]; - if (elems > 1) - y[i] = in1[i]; - if (elems > 2) - z[i] = in2[i]; - if (elems > 3) - w[i] = in3[i]; - } - - CONTINUE(x, y, z, w); -} - -DECL_READ(read_packed, const int elems) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - x[i] = in0[elems * i + 0]; - if (elems > 1) - y[i] = in0[elems * i + 1]; - if (elems > 2) - z[i] = in0[elems * i + 2]; - if (elems > 3) - w[i] = in0[elems * i + 3]; - } - - CONTINUE(x, y, z, w); -} - -DECL_WRITE(write_planar, const int elems) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - out0[i] = x[i]; - if (elems > 1) - out1[i] = y[i]; - if (elems > 2) - out2[i] = z[i]; - if (elems > 3) - out3[i] = w[i]; - } -} - -DECL_WRITE(write_packed, const int elems) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - out0[elems * i + 0] = x[i]; - if (elems > 1) - out0[elems * i + 1] = y[i]; - if (elems > 2) - out0[elems * i + 2] = z[i]; - if (elems > 3) - out0[elems * i + 3] = w[i]; - } -} - -#define WRAP_READ(FUNC, ELEMS, FRAC, PACKED) \ -static av_flatten void fn(FUNC##ELEMS)(SwsOpIter *restrict iter, \ - const SwsOpImpl *restrict impl, \ - void *restrict x, void *restrict y, \ - void *restrict z, void *restrict w) \ -{ \ - CALL_READ(FUNC, ELEMS); \ - for (int i = 0; i < (PACKED ? 1 : ELEMS); i++) \ - iter->in[i] += sizeof(block_t) * (PACKED ? ELEMS : 1) >> FRAC; \ -} \ - \ -DECL_ENTRY(FUNC##ELEMS, SWS_COMP_ELEMS(ELEMS), \ - .op = SWS_OP_READ, \ - .rw = { \ - .elems = ELEMS, \ - .packed = PACKED, \ - .frac = FRAC, \ - }, \ -); - -WRAP_READ(read_planar, 1, 0, false) -WRAP_READ(read_planar, 2, 0, false) -WRAP_READ(read_planar, 3, 0, false) -WRAP_READ(read_planar, 4, 0, false) -WRAP_READ(read_packed, 2, 0, true) -WRAP_READ(read_packed, 3, 0, true) -WRAP_READ(read_packed, 4, 0, true) - -#define WRAP_WRITE(FUNC, ELEMS, FRAC, PACKED) \ -static av_flatten void fn(FUNC##ELEMS)(SwsOpIter *restrict iter, \ - const SwsOpImpl *restrict impl, \ - void *restrict x, void *restrict y, \ - void *restrict z, void *restrict w) \ -{ \ - CALL_WRITE(FUNC, ELEMS); \ - for (int i = 0; i < (PACKED ? 1 : ELEMS); i++) \ - iter->out[i] += sizeof(block_t) * (PACKED ? ELEMS : 1) >> FRAC; \ -} \ - \ -DECL_ENTRY(FUNC##ELEMS, SWS_COMP_ALL, \ - .op = SWS_OP_WRITE, \ - .rw = { \ - .elems = ELEMS, \ - .packed = PACKED, \ - .frac = FRAC, \ - }, \ -); - -WRAP_WRITE(write_planar, 1, 0, false) -WRAP_WRITE(write_planar, 2, 0, false) -WRAP_WRITE(write_planar, 3, 0, false) -WRAP_WRITE(write_planar, 4, 0, false) -WRAP_WRITE(write_packed, 2, 0, true) -WRAP_WRITE(write_packed, 3, 0, true) -WRAP_WRITE(write_packed, 4, 0, true) - -#if BIT_DEPTH == 8 -DECL_READ(read_nibbles, const int elems) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i += 2) { - const pixel_t val = ((const pixel_t *) in0)[i >> 1]; - x[i + 0] = val >> 4; /* high nibble */ - x[i + 1] = val & 0xF; /* low nibble */ - } - - CONTINUE(x, y, z, w); -} - -DECL_READ(read_bits, const int elems) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i += 8) { - const pixel_t val = ((const pixel_t *) in0)[i >> 3]; - x[i + 0] = (val >> 7) & 1; - x[i + 1] = (val >> 6) & 1; - x[i + 2] = (val >> 5) & 1; - x[i + 3] = (val >> 4) & 1; - x[i + 4] = (val >> 3) & 1; - x[i + 5] = (val >> 2) & 1; - x[i + 6] = (val >> 1) & 1; - x[i + 7] = (val >> 0) & 1; - } - - CONTINUE(x, y, z, w); -} - -WRAP_READ(read_nibbles, 1, 1, false) -WRAP_READ(read_bits, 1, 3, false) - -DECL_WRITE(write_nibbles, const int elems) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i += 2) - out0[i >> 1] = x[i] << 4 | x[i + 1]; -} - -DECL_WRITE(write_bits, const int elems) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i += 8) { - out0[i >> 3] = x[i + 0] << 7 | - x[i + 1] << 6 | - x[i + 2] << 5 | - x[i + 3] << 4 | - x[i + 4] << 3 | - x[i + 5] << 2 | - x[i + 6] << 1 | - x[i + 7]; - } -} - -WRAP_WRITE(write_nibbles, 1, 1, false) -WRAP_WRITE(write_bits, 1, 3, false) -#endif /* BIT_DEPTH == 8 */ - -#ifdef SWAP_BYTES -DECL_PATTERN(swap_bytes) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - if (X) - x[i] = SWAP_BYTES(x[i]); - if (Y) - y[i] = SWAP_BYTES(y[i]); - if (Z) - z[i] = SWAP_BYTES(z[i]); - if (W) - w[i] = SWAP_BYTES(w[i]); - } - - CONTINUE(x, y, z, w); -} - -WRAP_COMMON_PATTERNS(swap_bytes, .op = SWS_OP_SWAP_BYTES); -#endif /* SWAP_BYTES */ - -#if BIT_DEPTH == 8 -DECL_PATTERN(expand16) -{ - u16block_t x16, y16, z16, w16; - - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - if (X) - x16[i] = x[i] << 8 | x[i]; - if (Y) - y16[i] = y[i] << 8 | y[i]; - if (Z) - z16[i] = z[i] << 8 | z[i]; - if (W) - w16[i] = w[i] << 8 | w[i]; - } - - CONTINUE(x16, y16, z16, w16); -} - -WRAP_COMMON_PATTERNS(expand16, - .op = SWS_OP_CONVERT, - .convert.to = SWS_PIXEL_U16, - .convert.expand = true, -); - -DECL_PATTERN(expand32) -{ - u32block_t x32, y32, z32, w32; - - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - x32[i] = (uint32_t)x[i] << 24 | x[i] << 16 | x[i] << 8 | x[i]; - y32[i] = (uint32_t)y[i] << 24 | y[i] << 16 | y[i] << 8 | y[i]; - z32[i] = (uint32_t)z[i] << 24 | z[i] << 16 | z[i] << 8 | z[i]; - w32[i] = (uint32_t)w[i] << 24 | w[i] << 16 | w[i] << 8 | w[i]; - } - - CONTINUE(x32, y32, z32, w32); -} - -WRAP_COMMON_PATTERNS(expand32, - .op = SWS_OP_CONVERT, - .convert.to = SWS_PIXEL_U32, - .convert.expand = true, -); -#endif - -DECL_FUNC(pack, const int bits0, const int bits1, const int bits2, const int bits3) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - x[i] = x[i] << (bits1 + bits2 + bits3); - if (bits1) - x[i] |= y[i] << (bits2 + bits3); - if (bits2) - x[i] |= z[i] << bits3; - if (bits3) - x[i] |= w[i]; - } - - CONTINUE(x, y, z, w); -} - -DECL_FUNC(unpack, const int bits0, const int bits1, const int bits2, const int bits3) -{ - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - const pixel_t val = x[i]; - x[i] = val >> (bits1 + bits2 + bits3); - if (bits1) - y[i] = (val >> (bits2 + bits3)) & ((1 << bits1) - 1); - if (bits2) - z[i] = (val >> bits3) & ((1 << bits2) - 1); - if (bits3) - w[i] = val & ((1 << bits3) - 1); - } - - CONTINUE(x, y, z, w); -} - -#define WRAP_PACK_UNPACK(X, Y, Z, W) \ -DECL_IMPL(pack, pack_##X##Y##Z##W, X, Y, Z, W) \ - \ -DECL_ENTRY(pack_##X##Y##Z##W, SWS_COMP(0), \ - .op = SWS_OP_PACK, \ - .pack.pattern = { X, Y, Z, W }, \ -); \ - \ -DECL_IMPL(unpack, unpack_##X##Y##Z##W, X, Y, Z, W) \ - \ -DECL_ENTRY(unpack_##X##Y##Z##W, SWS_COMP_MASK(X, Y, Z, W), \ - .op = SWS_OP_UNPACK, \ - .pack.pattern = { X, Y, Z, W }, \ -); - -WRAP_PACK_UNPACK( 3, 3, 2, 0) -WRAP_PACK_UNPACK( 2, 3, 3, 0) -WRAP_PACK_UNPACK( 1, 2, 1, 0) -WRAP_PACK_UNPACK( 5, 6, 5, 0) -WRAP_PACK_UNPACK( 5, 5, 5, 0) -WRAP_PACK_UNPACK( 4, 4, 4, 0) -WRAP_PACK_UNPACK( 2, 10, 10, 10) -WRAP_PACK_UNPACK(10, 10, 10, 2) - -#if BIT_DEPTH != 8 -DECL_PATTERN(lshift) -{ - const uint8_t amount = impl->priv.u8[0]; - - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - x[i] <<= amount; - y[i] <<= amount; - z[i] <<= amount; - w[i] <<= amount; - } - - CONTINUE(x, y, z, w); -} - -DECL_PATTERN(rshift) -{ - const uint8_t amount = impl->priv.u8[0]; - - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - x[i] >>= amount; - y[i] >>= amount; - z[i] >>= amount; - w[i] >>= amount; - } - - CONTINUE(x, y, z, w); -} - -WRAP_COMMON_PATTERNS(lshift, - .op = SWS_OP_LSHIFT, - .setup = ff_sws_setup_shift, - .flexible = true, -); - -WRAP_COMMON_PATTERNS(rshift, - .op = SWS_OP_RSHIFT, - .setup = ff_sws_setup_shift, - .flexible = true, -); -#endif /* BIT_DEPTH != 8 */ - -DECL_PATTERN(convert_float) -{ - f32block_t xf, yf, zf, wf; - - SWS_LOOP - for (int i = 0; i < SWS_BLOCK_SIZE; i++) { - xf[i] = x[i]; - yf[i] = y[i]; - zf[i] = z[i]; - wf[i] = w[i]; - } - - CONTINUE(xf, yf, zf, wf); -} - -WRAP_COMMON_PATTERNS(convert_float, - .op = SWS_OP_CONVERT, - .convert.to = SWS_PIXEL_F32, -); - -/** - * Swizzle by directly swapping the order of arguments to the continuation. - * Note that this is only safe to do if no arguments are duplicated. - */ -#define DECL_SWIZZLE(X, Y, Z, W) \ -static void \ -fn(swizzle_##X##Y##Z##W)(SwsOpIter *restrict iter, \ - const SwsOpImpl *restrict impl, \ - void *restrict c0, void *restrict c1, \ - void *restrict c2, void *restrict c3) \ -{ \ - CONTINUE(c##X, c##Y, c##Z, c##W); \ -} \ - \ -DECL_ENTRY(swizzle_##X##Y##Z##W, SWS_COMP_ALL, \ - .op = SWS_OP_SWIZZLE, \ - .swizzle.in = { X, Y, Z, W }, \ -); - -DECL_SWIZZLE(3, 0, 1, 2) -DECL_SWIZZLE(3, 0, 2, 1) -DECL_SWIZZLE(2, 1, 0, 3) -DECL_SWIZZLE(3, 2, 1, 0) -DECL_SWIZZLE(3, 1, 0, 2) -DECL_SWIZZLE(3, 2, 0, 1) -DECL_SWIZZLE(1, 2, 0, 3) -DECL_SWIZZLE(1, 0, 2, 3) -DECL_SWIZZLE(2, 0, 1, 3) -DECL_SWIZZLE(2, 3, 1, 0) -DECL_SWIZZLE(2, 1, 3, 0) -DECL_SWIZZLE(1, 2, 3, 0) -DECL_SWIZZLE(1, 3, 2, 0) -DECL_SWIZZLE(0, 2, 1, 3) -DECL_SWIZZLE(0, 2, 3, 1) -DECL_SWIZZLE(0, 3, 1, 2) -DECL_SWIZZLE(3, 1, 2, 0) -DECL_SWIZZLE(0, 3, 2, 1) - -/* Broadcast luma -> rgb (only used for y(a) -> rgb(a)) */ -#define DECL_EXPAND_LUMA(X, W, T0, T1) \ -DECL_FUNC(expand_luma_##X##W##_impl, \ - block_t c0, block_t c1, block_t c2, block_t c3) \ -{ \ - SWS_LOOP \ - for (int i = 0; i < SWS_BLOCK_SIZE; i++) \ - T0[i] = T1[i] = c0[i]; \ - \ - CONTINUE(c##X, T0, T1, c##W); \ -} \ - \ -DECL_IMPL(expand_luma_##X##W##_impl, expand_luma_##X##W, x, y, z, w) \ - \ -DECL_ENTRY(expand_luma_##X##W, SWS_COMP_ALL, \ - .op = SWS_OP_SWIZZLE, \ - .swizzle.in = { X, 0, 0, W }, \ -); - -DECL_EXPAND_LUMA(0, 3, c1, c2) -DECL_EXPAND_LUMA(3, 0, c1, c2) -DECL_EXPAND_LUMA(1, 0, c2, c3) -DECL_EXPAND_LUMA(0, 1, c2, c3) - -static const SwsOpTable fn(op_table_int) = { - .block_size = SWS_BLOCK_SIZE, - .entries = { - &fn(op_read_planar1), - &fn(op_read_planar2), - &fn(op_read_planar3), - &fn(op_read_planar4), - &fn(op_read_packed2), - &fn(op_read_packed3), - &fn(op_read_packed4), - - &fn(op_write_planar1), - &fn(op_write_planar2), - &fn(op_write_planar3), - &fn(op_write_planar4), - &fn(op_write_packed2), - &fn(op_write_packed3), - &fn(op_write_packed4), - - &fn(op_filter1_v), - &fn(op_filter2_v), - &fn(op_filter3_v), - &fn(op_filter4_v), - - &fn(op_filter1_h), - &fn(op_filter2_h), - &fn(op_filter3_h), - &fn(op_filter4_h), - -#if BIT_DEPTH == 8 - &fn(op_read_bits1), - &fn(op_read_nibbles1), - &fn(op_write_bits1), - &fn(op_write_nibbles1), - - &fn(op_pack_1210), - &fn(op_pack_2330), - &fn(op_pack_3320), - - &fn(op_unpack_1210), - &fn(op_unpack_2330), - &fn(op_unpack_3320), - - REF_COMMON_PATTERNS(expand16), - REF_COMMON_PATTERNS(expand32), -#elif BIT_DEPTH == 16 - &fn(op_pack_4440), - &fn(op_pack_5550), - &fn(op_pack_5650), - &fn(op_unpack_4440), - &fn(op_unpack_5550), - &fn(op_unpack_5650), -#elif BIT_DEPTH == 32 - &fn(op_pack_2101010), - &fn(op_pack_1010102), - &fn(op_unpack_2101010), - &fn(op_unpack_1010102), -#endif - -#ifdef SWAP_BYTES - REF_COMMON_PATTERNS(swap_bytes), -#endif - - REF_COMMON_PATTERNS(min), - REF_COMMON_PATTERNS(max), - REF_COMMON_PATTERNS(scale), - REF_COMMON_PATTERNS(convert_float), - - &fn(op_clear_0001), - &fn(op_clear_1000), - &fn(op_clear_1100), - &fn(op_clear_0100), - &fn(op_clear_0110), - &fn(op_clear_0011), - &fn(op_clear_1010), - &fn(op_clear_0101), - &fn(op_clear_0111), - &fn(op_clear_1011), - &fn(op_clear_1101), - - &fn(op_swizzle_3012), - &fn(op_swizzle_3021), - &fn(op_swizzle_2103), - &fn(op_swizzle_3210), - &fn(op_swizzle_3102), - &fn(op_swizzle_3201), - &fn(op_swizzle_1203), - &fn(op_swizzle_1023), - &fn(op_swizzle_2013), - &fn(op_swizzle_2310), - &fn(op_swizzle_2130), - &fn(op_swizzle_1230), - &fn(op_swizzle_1320), - &fn(op_swizzle_0213), - &fn(op_swizzle_0231), - &fn(op_swizzle_0312), - &fn(op_swizzle_3120), - &fn(op_swizzle_0321), - - &fn(op_expand_luma_03), - &fn(op_expand_luma_30), - &fn(op_expand_luma_10), - &fn(op_expand_luma_01), - -#if BIT_DEPTH != 8 - REF_COMMON_PATTERNS(lshift), - REF_COMMON_PATTERNS(rshift), - REF_COMMON_PATTERNS(convert_uint8), -#endif /* BIT_DEPTH != 8 */ - -#if BIT_DEPTH != 16 - REF_COMMON_PATTERNS(convert_uint16), -#endif -#if BIT_DEPTH != 32 - REF_COMMON_PATTERNS(convert_uint32), -#endif - - NULL - }, -}; - -#undef PIXEL_TYPE -#undef PIXEL_MAX -#undef SWAP_BYTES -#undef pixel_t -#undef inter_t -#undef block_t -#undef px - -#undef FMT_CHAR -#undef IS_FLOAT diff --git a/libswscale/uops_backend.c b/libswscale/uops_backend.c index 591fc154db..bcfc511e57 100644 --- a/libswscale/uops_backend.c +++ b/libswscale/uops_backend.c @@ -189,8 +189,8 @@ fail: return ret; } -const SwsOpBackend backend_uops = { - .name = "uops", +const SwsOpBackend backend_c = { + .name = "c", .flags = SWS_BACKEND_C, .compile = compile, .hw_format = AV_PIX_FMT_NONE, _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
