ops_backend: delete ops-based C backend

Niklas Haas via ffmpeg-cvslog Tue, 09 Jun 2026 10:07:00 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit b7a80a9f0df760fbd5960374d25d450d7040f810
Author:     Niklas Haas <[email protected]>
AuthorDate: Thu Jun 4 00:12:04 2026 +0200
Commit:     Niklas Haas <[email protected]>
CommitDate: Tue Jun 9 18:27:20 2026 +0200

    swscale/ops_backend: delete ops-based C backend
    
    And make uops_backend.c the new reference.
    
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/Makefile          |   1 -
 libswscale/ops.c             |   2 -
 libswscale/ops_backend.c     | 117 --------
 libswscale/ops_backend.h     | 162 -----------
 libswscale/ops_tmpl_common.c | 346 ------------------------
 libswscale/ops_tmpl_float.c  | 273 -------------------
 libswscale/ops_tmpl_int.c    | 619 -------------------------------------------
 libswscale/uops_backend.c    |   4 +-
 8 files changed, 2 insertions(+), 1522 deletions(-)

diff --git a/libswscale/Makefile b/libswscale/Makefile
index f69b39972a..2f08bc36bc 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -30,7 +30,6 @@ OBJS = alphablend.o                                     \
 
 OBJS-$(CONFIG_UNSTABLE) +=                              \
        ops.o                                            \
-       ops_backend.o                                    \
        ops_chain.o                                      \
        ops_dispatch.o                                   \
        ops_memcpy.o                                     \
diff --git a/libswscale/ops.c b/libswscale/ops.c
index 719198e116..b28dbec75f 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -32,7 +32,6 @@
 #include "ops_internal.h"
 
 extern const SwsOpBackend backend_c;
-extern const SwsOpBackend backend_uops;
 extern const SwsOpBackend backend_murder;
 extern const SwsOpBackend backend_aarch64;
 extern const SwsOpBackend backend_x86;
@@ -50,7 +49,6 @@ const SwsOpBackend * const ff_sws_op_backends[] = {
 #elif ARCH_X86_64 && HAVE_X86ASM
     &backend_x86,
 #endif
-    &backend_uops,
     &backend_c,
 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
     &backend_spirv,
diff --git a/libswscale/ops_backend.c b/libswscale/ops_backend.c
deleted file mode 100644
index 254814ee37..0000000000
--- a/libswscale/ops_backend.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/**
- * Copyright (C) 2025 Niklas Haas
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "ops_backend.h"
-
-/**
- * We want to disable FP contraction because this is a reference backend that
- * establishes a bit-exact reference result.
- */
-#ifdef __clang__
-#pragma STDC FP_CONTRACT OFF
-#elif AV_GCC_VERSION_AT_LEAST(4, 8)
-#pragma GCC optimize ("fp-contract=off")
-#elif defined(_MSC_VER)
-#pragma fp_contract (off)
-#endif
-
-#if AV_GCC_VERSION_AT_LEAST(4, 4)
-#pragma GCC optimize ("finite-math-only")
-#endif
-
-/* Array-based reference implementation */
-
-#ifndef SWS_BLOCK_SIZE
-#  define SWS_BLOCK_SIZE 32
-#endif
-
-typedef  uint8_t  u8block_t[SWS_BLOCK_SIZE];
-typedef uint16_t u16block_t[SWS_BLOCK_SIZE];
-typedef uint32_t u32block_t[SWS_BLOCK_SIZE];
-typedef    float f32block_t[SWS_BLOCK_SIZE];
-
-#define BIT_DEPTH 8
-# include "ops_tmpl_int.c"
-#undef BIT_DEPTH
-
-#define BIT_DEPTH 16
-# include "ops_tmpl_int.c"
-#undef BIT_DEPTH
-
-#define BIT_DEPTH 32
-# include "ops_tmpl_int.c"
-# include "ops_tmpl_float.c"
-#undef BIT_DEPTH
-
-static const SwsOpTable *const tables[] = {
-    &bitfn(op_table_int,    u8),
-    &bitfn(op_table_int,   u16),
-    &bitfn(op_table_int,   u32),
-    &bitfn(op_table_float, f32),
-};
-
-static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
-{
-    int ret;
-
-    SwsOpChain *chain = ff_sws_op_chain_alloc();
-    if (!chain)
-        return AVERROR(ENOMEM);
-
-    av_assert0(ops->num_ops > 0);
-    const SwsPixelType read_type = ops->ops[0].type;
-
-    for (int i = 0; i < ops->num_ops; i++) {
-        ret = ff_sws_op_compile_tables(ctx, tables, FF_ARRAY_ELEMS(tables),
-                                       &ops->ops[i], SWS_BLOCK_SIZE, chain);
-        if (ret < 0) {
-            av_log(ctx, AV_LOG_TRACE, "Failed to compile op %d\n", i);
-            ff_sws_op_chain_free(chain);
-            return ret;
-        }
-    }
-
-    *out = (SwsCompiledOp) {
-        .slice_align = 1,
-        .block_size  = SWS_BLOCK_SIZE,
-        .cpu_flags   = chain->cpu_flags,
-        .over_read   = chain->over_read,
-        .over_write  = chain->over_write,
-        .priv        = chain,
-        .free        = ff_sws_op_chain_free_cb,
-    };
-
-    switch (read_type) {
-    case SWS_PIXEL_U8:  out->func = process_u8;  break;
-    case SWS_PIXEL_U16: out->func = process_u16; break;
-    case SWS_PIXEL_U32: out->func = process_u32; break;
-    case SWS_PIXEL_F32: out->func = process_f32; break;
-    default: av_unreachable("Invalid pixel type!");
-    }
-
-    return 0;
-}
-
-const SwsOpBackend backend_c = {
-    .name       = "c",
-    .flags      = SWS_BACKEND_C,
-    .compile    = compile,
-    .hw_format  = AV_PIX_FMT_NONE,
-};
diff --git a/libswscale/ops_backend.h b/libswscale/ops_backend.h
deleted file mode 100644
index 82eb92fc36..0000000000
--- a/libswscale/ops_backend.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/**
- * Copyright (C) 2025 Niklas Haas
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef SWSCALE_OPS_BACKEND_H
-#define SWSCALE_OPS_BACKEND_H
-
-/**
- * Helper macros for the C-based backend.
- *
- * To use these macros, the following types must be defined:
- *  - PIXEL_TYPE should be one of SWS_PIXEL_*
- *  - pixel_t should be the type of pixels
- *  - block_t should be the type of blocks (groups of pixels)
- */
-
-#include <assert.h>
-#include <float.h>
-#include <stdint.h>
-
-#include "libavutil/attributes.h"
-#include "libavutil/mem.h"
-
-#include "ops_chain.h"
-
-/**
- * Internal context holding per-iter execution data. The data pointers will be
- * directly incremented by the corresponding read/write functions.
- */
-typedef struct SwsOpIter {
-    uintptr_t in[4];
-    uintptr_t out[4];
-    int x, y;
-
-    /* Link back to per-slice execution context */
-    const SwsOpExec *exec;
-} SwsOpIter;
-
-#ifdef __clang__
-#  define SWS_LOOP AV_PRAGMA(clang loop vectorize(assume_safety))
-#elif defined(__GNUC__)
-#  define SWS_LOOP AV_PRAGMA(GCC ivdep)
-#else
-#  define SWS_LOOP
-#endif
-
-/* Miscellaneous helpers */
-#define bitfn2(name, ext) name ## _ ## ext
-#define bitfn(name, ext)  bitfn2(name, ext)
-
-#define FN_SUFFIX AV_JOIN(FMT_CHAR, BIT_DEPTH)
-#define fn(name)  bitfn(name, FN_SUFFIX)
-
-#define av_q2pixel(q) ((q).den ? (pixel_t) (q).num / (q).den : 0)
-#define bump_ptr(ptr, bump) ((pixel_t *) ((uintptr_t) (ptr) + (bump)))
-
-/* Helper macros to make writing common function signatures less painful */
-#define DECL_FUNC(NAME, ...)                                                   
 \
-    static av_always_inline void fn(NAME)(SwsOpIter *restrict iter,            
 \
-                                          const SwsOpImpl *restrict impl,      
 \
-                                          block_t x, block_t y,                
 \
-                                          block_t z, block_t w,                
 \
-                                          __VA_ARGS__)
-
-#define DECL_READ(NAME, ...)                                                   
 \
-    DECL_FUNC(NAME, const pixel_t *restrict in0, const pixel_t *restrict in1,  
 \
-                    const pixel_t *restrict in2, const pixel_t *restrict in3,  
 \
-                    __VA_ARGS__)
-
-#define DECL_WRITE(NAME, ...)                                                  
 \
-    DECL_FUNC(NAME, pixel_t *restrict out0, pixel_t *restrict out1,            
 \
-                    pixel_t *restrict out2, pixel_t *restrict out3,            
 \
-                    __VA_ARGS__)
-
-/* Helper macros to call into functions declared with DECL_FUNC_* */
-#define CALL(FUNC, ...) \
-    fn(FUNC)(iter, impl, x, y, z, w, __VA_ARGS__)
-
-#define CALL_READ(FUNC, ...)                                                   
 \
-    CALL(FUNC, (const pixel_t *) iter->in[0], (const pixel_t *) iter->in[1],   
 \
-               (const pixel_t *) iter->in[2], (const pixel_t *) iter->in[3],   
 \
-               __VA_ARGS__)
-
-#define CALL_WRITE(FUNC, ...)                                                  
 \
-    CALL(FUNC, (pixel_t *) iter->out[0], (pixel_t *) iter->out[1],             
 \
-               (pixel_t *) iter->out[2], (pixel_t *) iter->out[3], __VA_ARGS__)
-
-/* Helper macros to declare continuation functions */
-#define DECL_IMPL(FUNC, NAME, ...)                                             
 \
-    static void av_flatten fn(NAME)(SwsOpIter *restrict iter,                  
 \
-                                    const SwsOpImpl *restrict impl,            
 \
-                                    void *restrict x, void *restrict y,        
 \
-                                    void *restrict z, void *restrict w)        
 \
-    {                                                                          
 \
-        CALL(FUNC, __VA_ARGS__);                                               
 \
-    }
-
-/* Helper macro to call into the next continuation */
-#define CONTINUE(X, Y, Z, W)                                                   
 \
-    ((void (*)(SwsOpIter *, const SwsOpImpl *,                                 
 \
-               void *restrict, void *restrict,                                 
 \
-               void *restrict, void *restrict)) impl->cont)                    
 \
-        (iter, &impl[1], (X), (Y), (Z), (W))
-
-/* Helper macros for common op setup code */
-#define DECL_SETUP(NAME, PARAMS, OUT)                                          
 \
-    static int fn(NAME)(const SwsImplParams *PARAMS, SwsImplResult *OUT)
-
-#define SETUP_MEMDUP(c, out) ff_setup_memdup(&(c), sizeof(c), out)
-static inline int ff_setup_memdup(const void *c, size_t size, SwsImplResult 
*out)
-{
-    out->priv.ptr = av_memdup(c, size);
-    out->free = ff_op_priv_free;
-    return out->priv.ptr ? 0 : AVERROR(ENOMEM);
-}
-
-/* Helper macro for declaring op table entries */
-#define DECL_ENTRY(NAME, MASK, ...)                                            
 \
-    static const SwsOpEntry fn(op_##NAME) = {                                  
 \
-        .func = (SwsFuncPtr) fn(NAME),                                         
 \
-        .type = PIXEL_TYPE,                                                    
 \
-        .mask = (MASK),                                                        
 \
-        __VA_ARGS__                                                            
 \
-    }
-
-/* Helpers to define functions for common subsets of components */
-#define DECL_PATTERN(NAME) \
-    DECL_FUNC(NAME, const bool X, const bool Y, const bool Z, const bool W)
-
-#define WRAP_PATTERN(FUNC, X, Y, Z, W, ...)                                    
 \
-    DECL_IMPL(FUNC, FUNC##_##X##Y##Z##W, X, Y, Z, W)                           
 \
-    DECL_ENTRY(FUNC##_##X##Y##Z##W, SWS_COMP_MASK(X, Y, Z, W), __VA_ARGS__)
-
-#define WRAP_COMMON_PATTERNS(FUNC, ...)                                        
 \
-    WRAP_PATTERN(FUNC, 1, 0, 0, 0, __VA_ARGS__);                               
 \
-    WRAP_PATTERN(FUNC, 1, 0, 0, 1, __VA_ARGS__);                               
 \
-    WRAP_PATTERN(FUNC, 1, 1, 1, 0, __VA_ARGS__);                               
 \
-    WRAP_PATTERN(FUNC, 1, 1, 1, 1, __VA_ARGS__)
-
-#define REF_COMMON_PATTERNS(NAME)                                              
 \
-    &fn(op_##NAME##_1000),                                                     
 \
-    &fn(op_##NAME##_1001),                                                     
 \
-    &fn(op_##NAME##_1110),                                                     
 \
-    &fn(op_##NAME##_1111)
-
-#endif
diff --git a/libswscale/ops_tmpl_common.c b/libswscale/ops_tmpl_common.c
deleted file mode 100644
index 0d056ca08e..0000000000
--- a/libswscale/ops_tmpl_common.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/**
- * Copyright (C) 2025 Niklas Haas
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "ops_backend.h"
-
-#ifndef BIT_DEPTH
-#  error Should only be included from ops_tmpl_*.c!
-#endif
-
-#define WRAP_CONVERT_UINT(N)                                                   
 \
-DECL_PATTERN(convert_uint##N)                                                  
 \
-{                                                                              
 \
-    u##N##block_t xu, yu, zu, wu;                                              
 \
-                                                                               
 \
-    SWS_LOOP                                                                   
 \
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {                                 
 \
-        if (X)                                                                 
 \
-            xu[i] = x[i];                                                      
 \
-        if (Y)                                                                 
 \
-            yu[i] = y[i];                                                      
 \
-        if (Z)                                                                 
 \
-            zu[i] = z[i];                                                      
 \
-        if (W)                                                                 
 \
-            wu[i] = w[i];                                                      
 \
-    }                                                                          
 \
-                                                                               
 \
-    CONTINUE(xu, yu, zu, wu);                                                  
 \
-}                                                                              
 \
-                                                                               
 \
-WRAP_COMMON_PATTERNS(convert_uint##N,                                          
 \
-    .op = SWS_OP_CONVERT,                                                      
 \
-    .convert.to = SWS_PIXEL_U##N,                                              
 \
-);
-
-#if BIT_DEPTH != 8
-WRAP_CONVERT_UINT(8)
-#endif
-
-#if BIT_DEPTH != 16
-WRAP_CONVERT_UINT(16)
-#endif
-
-#if BIT_DEPTH != 32 || defined(IS_FLOAT)
-WRAP_CONVERT_UINT(32)
-#endif
-
-DECL_PATTERN(clear)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        if (X)
-            x[i] = impl->priv.px[0];
-        if (Y)
-            y[i] = impl->priv.px[1];
-        if (Z)
-            z[i] = impl->priv.px[2];
-        if (W)
-            w[i] = impl->priv.px[3];
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-#define WRAP_CLEAR(X, Y, Z, W)                                                 
 \
-DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W)                             
 \
-                                                                               
 \
-DECL_ENTRY(clear##_##X##Y##Z##W, SWS_COMP_ALL,                                 
 \
-    .setup = ff_sws_setup_clear,                                               
 \
-    .op = SWS_OP_CLEAR,                                                        
 \
-    .clear.mask = SWS_COMP_MASK(X, Y, Z, W),                                   
 \
-);
-
-WRAP_CLEAR(0, 0, 0, 1) /* rgba alpha */
-WRAP_CLEAR(1, 0, 0, 0) /* argb alpha */
-WRAP_CLEAR(0, 1, 0, 0) /* ya alpha */
-
-WRAP_CLEAR(1, 1, 0, 0) /* vuya chroma */
-WRAP_CLEAR(0, 1, 1, 0) /* yuva chroma */
-WRAP_CLEAR(0, 0, 1, 1) /* ayuv chroma */
-WRAP_CLEAR(1, 0, 1, 0) /* uyva chroma */
-WRAP_CLEAR(0, 1, 0, 1) /* xvyu chroma */
-
-WRAP_CLEAR(0, 1, 1, 1) /* gray -> yuva */
-WRAP_CLEAR(1, 0, 1, 1) /* gray -> ayuv */
-WRAP_CLEAR(1, 1, 0, 1) /* gray -> vuya */
-
-DECL_PATTERN(min)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        if (X)
-            x[i] = FFMIN(x[i], impl->priv.px[0]);
-        if (Y)
-            y[i] = FFMIN(y[i], impl->priv.px[1]);
-        if (Z)
-            z[i] = FFMIN(z[i], impl->priv.px[2]);
-        if (W)
-            w[i] = FFMIN(w[i], impl->priv.px[3]);
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-DECL_PATTERN(max)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        if (X)
-            x[i] = FFMAX(x[i], impl->priv.px[0]);
-        if (Y)
-            y[i] = FFMAX(y[i], impl->priv.px[1]);
-        if (Z)
-            z[i] = FFMAX(z[i], impl->priv.px[2]);
-        if (W)
-            w[i] = FFMAX(w[i], impl->priv.px[3]);
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-WRAP_COMMON_PATTERNS(min,
-    .op = SWS_OP_MIN,
-    .setup = ff_sws_setup_clamp,
-);
-
-WRAP_COMMON_PATTERNS(max,
-    .op = SWS_OP_MAX,
-    .setup = ff_sws_setup_clamp,
-);
-
-DECL_PATTERN(scale)
-{
-    const pixel_t scale = impl->priv.px[0];
-
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        if (X)
-            x[i] *= scale;
-        if (Y)
-            y[i] *= scale;
-        if (Z)
-            z[i] *= scale;
-        if (W)
-            w[i] *= scale;
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-WRAP_COMMON_PATTERNS(scale,
-    .op = SWS_OP_SCALE,
-    .setup = ff_sws_setup_scale,
-    .flexible = true,
-);
-
-DECL_SETUP(setup_filter_v, params, out)
-{
-    const SwsFilterWeights *filter = params->op->rw.kernel;
-    static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]),
-                  ">8 byte pointers not supported");
-
-    /* Pre-convert weights to float */
-    float *weights = av_calloc(filter->num_weights, sizeof(float));
-    if (!weights)
-        return AVERROR(ENOMEM);
-
-    for (int i = 0; i < filter->num_weights; i++)
-        weights[i] = (float) filter->weights[i] / SWS_FILTER_SCALE;
-
-    out->priv.ptr = weights;
-    out->priv.i32[2] = filter->filter_size;
-    out->free = ff_op_priv_free;
-    return 0;
-}
-
-/* Fully general vertical planar filter case */
-DECL_READ(filter_v, const int elems)
-{
-    const SwsOpExec *exec = iter->exec;
-    const float *restrict weights = impl->priv.ptr;
-    const int filter_size = impl->priv.i32[2];
-    weights += filter_size * iter->y;
-
-    f32block_t xs, ys, zs, ws;
-    memset(xs, 0, sizeof(xs));
-    if (elems > 1)
-        memset(ys, 0, sizeof(ys));
-    if (elems > 2)
-        memset(zs, 0, sizeof(zs));
-    if (elems > 3)
-        memset(ws, 0, sizeof(ws));
-
-    for (int j = 0; j < filter_size; j++) {
-        const float weight = weights[j];
-
-        SWS_LOOP
-        for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-            xs[i] += weight * in0[i];
-            if (elems > 1)
-                ys[i] += weight * in1[i];
-            if (elems > 2)
-                zs[i] += weight * in2[i];
-            if (elems > 3)
-                ws[i] += weight * in3[i];
-        }
-
-        in0 = bump_ptr(in0, exec->in_stride[0]);
-        if (elems > 1)
-            in1 = bump_ptr(in1, exec->in_stride[1]);
-        if (elems > 2)
-            in2 = bump_ptr(in2, exec->in_stride[2]);
-        if (elems > 3)
-            in3 = bump_ptr(in3, exec->in_stride[3]);
-    }
-
-    for (int i = 0; i < elems; i++)
-        iter->in[i] += sizeof(block_t);
-
-    CONTINUE(xs, ys, zs, ws);
-}
-
-DECL_SETUP(setup_filter_h, params, out)
-{
-    SwsFilterWeights *filter = params->op->rw.kernel;
-    out->priv.ptr = av_refstruct_ref(filter->weights);
-    out->priv.i32[2] = filter->filter_size;
-    out->free = ff_op_priv_unref;
-    return 0;
-}
-
-/* Fully general horizontal planar filter case */
-DECL_READ(filter_h, const int elems)
-{
-    const SwsOpExec *exec = iter->exec;
-    const int *restrict weights = impl->priv.ptr;
-    const int filter_size = impl->priv.i32[2];
-    const float scale = 1.0f / SWS_FILTER_SCALE;
-    const int xpos = iter->x;
-    weights += filter_size * iter->x;
-
-    f32block_t xs, ys, zs, ws;
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        const int offset = exec->in_offset_x[xpos + i];
-        pixel_t *start0 = bump_ptr(in0, offset);
-        pixel_t *start1 = bump_ptr(in1, offset);
-        pixel_t *start2 = bump_ptr(in2, offset);
-        pixel_t *start3 = bump_ptr(in3, offset);
-
-        inter_t sx = 0, sy = 0, sz = 0, sw = 0;
-        for (int j = 0; j < filter_size; j++) {
-            const int weight = weights[j];
-            sx += weight * start0[j];
-            if (elems > 1)
-                sy += weight * start1[j];
-            if (elems > 2)
-                sz += weight * start2[j];
-            if (elems > 3)
-                sw += weight * start3[j];
-        }
-
-        xs[i] = (float) sx * scale;
-        if (elems > 1)
-            ys[i] = (float) sy * scale;
-        if (elems > 2)
-            zs[i] = (float) sz * scale;
-        if (elems > 3)
-            ws[i] = (float) sw * scale;
-
-        weights += filter_size;
-    }
-
-    CONTINUE(xs, ys, zs, ws);
-}
-
-#define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX)                                  
 \
-static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter,       
 \
-                                             const SwsOpImpl *restrict impl,   
 \
-                                             void *restrict x, void *restrict 
y,\
-                                             void *restrict z, void *restrict 
w)\
-{                                                                              
 \
-    CALL_READ(FUNC##SUFFIX, ELEMS);                                            
 \
-}                                                                              
 \
-                                                                               
 \
-DECL_ENTRY(FUNC##ELEMS##SUFFIX, SWS_COMP_ELEMS(ELEMS),                         
 \
-    .op = SWS_OP_READ,                                                         
 \
-    .setup = fn(setup_filter##SUFFIX),                                         
 \
-    .rw.elems = ELEMS,                                                         
 \
-    .rw.filter = SWS_OP_FILTER_##DIR,                                          
 \
-);
-
-WRAP_FILTER(filter, V, 1, _v)
-WRAP_FILTER(filter, V, 2, _v)
-WRAP_FILTER(filter, V, 3, _v)
-WRAP_FILTER(filter, V, 4, _v)
-
-WRAP_FILTER(filter, H, 1, _h)
-WRAP_FILTER(filter, H, 2, _h)
-WRAP_FILTER(filter, H, 3, _h)
-WRAP_FILTER(filter, H, 4, _h)
-
-static void fn(process)(const SwsOpExec *exec, const void *priv,
-                        const int bx_start, const int y_start,
-                        int bx_end, int y_end)
-{
-    const SwsOpChain *chain = priv;
-    const SwsOpImpl *impl = chain->impl;
-    u32block_t x, y, z, w; /* allocate enough space for any intermediate */
-
-    SwsOpIter iterdata;
-    SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
-    iter->exec = exec;
-    for (int i = 0; i < 4; i++) {
-        iter->in[i]  = (uintptr_t) exec->in[i];
-        iter->out[i] = (uintptr_t) exec->out[i];
-    }
-
-    for (iter->y = y_start; iter->y < y_end; iter->y++) {
-        for (int block = bx_start; block < bx_end; block++) {
-            iter->x = block * SWS_BLOCK_SIZE;
-            CONTINUE(x, y, z, w);
-        }
-
-        const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
-        for (int i = 0; i < 4; i++) {
-            iter->in[i]  += exec->in_bump[i] + y_bump * exec->in_stride[i];
-            iter->out[i] += exec->out_bump[i];
-        }
-    }
-}
diff --git a/libswscale/ops_tmpl_float.c b/libswscale/ops_tmpl_float.c
deleted file mode 100644
index 57f6d8c399..0000000000
--- a/libswscale/ops_tmpl_float.c
+++ /dev/null
@@ -1,273 +0,0 @@
-/**
- * Copyright (C) 2025 Niklas Haas
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/avassert.h"
-
-#include "ops_backend.h"
-
-#ifndef BIT_DEPTH
-#  define BIT_DEPTH 32
-#endif
-
-#if BIT_DEPTH == 32
-#  define PIXEL_TYPE SWS_PIXEL_F32
-#  define PIXEL_MAX  FLT_MAX
-#  define pixel_t    float
-#  define inter_t    float
-#  define block_t    f32block_t
-#  define px         f32
-#else
-#  error Invalid BIT_DEPTH
-#endif
-
-#define IS_FLOAT 1
-#define FMT_CHAR f
-#include "ops_tmpl_common.c"
-
-DECL_SETUP(setup_dither, params, out)
-{
-    const SwsOp *op = params->op;
-    const int size = 1 << op->dither.size_log2;
-    const int width = FFMAX(size, SWS_BLOCK_SIZE);
-    pixel_t *matrix = out->priv.ptr = av_malloc(sizeof(pixel_t) * size * 
width);
-    if (!matrix)
-        return AVERROR(ENOMEM);
-    out->free = ff_op_priv_free;
-
-    static_assert(sizeof(out->priv.ptr) <= sizeof(uint8_t[8]),
-                  ">8 byte pointers not supported");
-
-    int8_t *offset = &out->priv.i8[8];
-    for (int i = 0; i < 4; i++)
-        offset[i] = op->dither.y_offset[i];
-
-    for (int y = 0; y < size; y++) {
-        for (int x = 0; x < size; x++)
-            matrix[y * width + x] = av_q2pixel(op->dither.matrix[y * size + 
x]);
-        for (int x = size; x < width; x++) /* pad to block size */
-            matrix[y * width + x] = matrix[y * width + (x % size)];
-    }
-
-    return 0;
-}
-
-DECL_FUNC(dither, const int size_log2)
-{
-    const pixel_t *restrict matrix = impl->priv.ptr;
-    const int8_t *restrict offset = &impl->priv.i8[8];
-    const int mask = (1 << size_log2) - 1;
-    const int y_line = iter->y;
-    const int size = 1 << size_log2;
-    const int width = FFMAX(size, SWS_BLOCK_SIZE);
-    const int base = iter->x & ~(SWS_BLOCK_SIZE - 1) & (size - 1);
-
-#define DITHER_COMP(VAR, IDX)                                                  
          \
-    if (offset[IDX] >= 0) {                                                    
          \
-        const int row = (y_line + offset[IDX]) & mask;                         
          \
-        SWS_LOOP                                                               
          \
-        for (int i = 0; i < SWS_BLOCK_SIZE; i++)                               
          \
-            VAR[i] += matrix[row * width + base + i];                          
          \
-    }
-
-    DITHER_COMP(x, 0)
-    DITHER_COMP(y, 1)
-    DITHER_COMP(z, 2)
-    DITHER_COMP(w, 3)
-
-    CONTINUE(x, y, z, w);
-}
-
-#define WRAP_DITHER(N)                                                         
 \
-DECL_IMPL(dither, dither##N, N)                                                
 \
-                                                                               
 \
-DECL_ENTRY(dither##N, SWS_COMP_ALL,                                            
 \
-    .op = SWS_OP_DITHER,                                                       
 \
-    .dither_size = N,                                                          
 \
-    .setup = fn(setup_dither),                                                 
 \
-);
-
-WRAP_DITHER(0)
-WRAP_DITHER(1)
-WRAP_DITHER(2)
-WRAP_DITHER(3)
-WRAP_DITHER(4)
-WRAP_DITHER(5)
-WRAP_DITHER(6)
-WRAP_DITHER(7)
-WRAP_DITHER(8)
-
-typedef struct {
-    /* Stored in split form for convenience */
-    pixel_t m[4][4];
-    pixel_t k[4];
-} fn(LinCoeffs);
-
-DECL_SETUP(setup_linear, params, out)
-{
-    const SwsOp *op = params->op;
-    fn(LinCoeffs) c;
-
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++)
-            c.m[i][j] = av_q2pixel(op->lin.m[i][j]);
-        c.k[i] = av_q2pixel(op->lin.m[i][4]);
-    }
-
-    return SETUP_MEMDUP(c, out);
-}
-
-/**
- * Fully general case for a 5x5 linear affine transformation. Should never be
- * called without constant `mask`. This function will compile down to the
- * appropriately optimized version for the required subset of operations when
- * called with a constant mask.
- */
-DECL_FUNC(linear_mask, const uint32_t mask)
-{
-    const fn(LinCoeffs) c = *(const fn(LinCoeffs) *) impl->priv.ptr;
-
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        const pixel_t xx = x[i];
-        const pixel_t yy = y[i];
-        const pixel_t zz = z[i];
-        const pixel_t ww = w[i];
-
-        x[i]  = (mask & SWS_MASK_OFF(0)) ? c.k[0] : 0;
-        x[i] += (mask & SWS_MASK(0, 0))  ? c.m[0][0] * xx : xx;
-        x[i] += (mask & SWS_MASK(0, 1))  ? c.m[0][1] * yy : 0;
-        x[i] += (mask & SWS_MASK(0, 2))  ? c.m[0][2] * zz : 0;
-        x[i] += (mask & SWS_MASK(0, 3))  ? c.m[0][3] * ww : 0;
-
-        y[i]  = (mask & SWS_MASK_OFF(1)) ? c.k[1] : 0;
-        y[i] += (mask & SWS_MASK(1, 0))  ? c.m[1][0] * xx : 0;
-        y[i] += (mask & SWS_MASK(1, 1))  ? c.m[1][1] * yy : yy;
-        y[i] += (mask & SWS_MASK(1, 2))  ? c.m[1][2] * zz : 0;
-        y[i] += (mask & SWS_MASK(1, 3))  ? c.m[1][3] * ww : 0;
-
-        z[i]  = (mask & SWS_MASK_OFF(2)) ? c.k[2] : 0;
-        z[i] += (mask & SWS_MASK(2, 0))  ? c.m[2][0] * xx : 0;
-        z[i] += (mask & SWS_MASK(2, 1))  ? c.m[2][1] * yy : 0;
-        z[i] += (mask & SWS_MASK(2, 2))  ? c.m[2][2] * zz : zz;
-        z[i] += (mask & SWS_MASK(2, 3))  ? c.m[2][3] * ww : 0;
-
-        w[i]  = (mask & SWS_MASK_OFF(3)) ? c.k[3] : 0;
-        w[i] += (mask & SWS_MASK(3, 0))  ? c.m[3][0] * xx : 0;
-        w[i] += (mask & SWS_MASK(3, 1))  ? c.m[3][1] * yy : 0;
-        w[i] += (mask & SWS_MASK(3, 2))  ? c.m[3][2] * zz : 0;
-        w[i] += (mask & SWS_MASK(3, 3))  ? c.m[3][3] * ww : ww;
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-#define WRAP_LINEAR(NAME, MASK)                                                
 \
-DECL_IMPL(linear_mask, linear_##NAME, MASK)                                    
 \
-                                                                               
 \
-DECL_ENTRY(linear_##NAME, SWS_COMP_ALL,                                        
 \
-    .op    = SWS_OP_LINEAR,                                                    
 \
-    .setup = fn(setup_linear),                                                 
 \
-    .linear_mask = (MASK),                                                     
 \
-);
-
-WRAP_LINEAR(luma,      SWS_MASK_LUMA)
-WRAP_LINEAR(alpha,     SWS_MASK_ALPHA)
-WRAP_LINEAR(lumalpha,  SWS_MASK_LUMA | SWS_MASK_ALPHA)
-WRAP_LINEAR(yalpha,    SWS_MASK(1, 1)) /* ya alpha */
-WRAP_LINEAR(dot3,      0x7)
-WRAP_LINEAR(dot3a,     0x7 | SWS_MASK_ALPHA)
-WRAP_LINEAR(row0,      SWS_MASK_ROW(0) ^ SWS_MASK(0, 3)) /* row0 sans alpha */
-WRAP_LINEAR(diag3,     SWS_MASK_DIAG3)
-WRAP_LINEAR(diag4,     SWS_MASK_DIAG4)
-WRAP_LINEAR(diagoff3,  SWS_MASK_DIAG3 | SWS_MASK_OFF3)
-WRAP_LINEAR(affine3,   SWS_MASK_MAT3 | SWS_MASK_OFF3)
-WRAP_LINEAR(affine3uv, SWS_MASK_MAT3 | SWS_MASK_OFF(1) | SWS_MASK_OFF(2))
-WRAP_LINEAR(affine3x,  SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3)
-WRAP_LINEAR(affine3xa, SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3 | 
SWS_MASK_ALPHA)
-WRAP_LINEAR(affine3xy, SWS_MASK_MAT3 ^ SWS_MASK(0, 0) ^ SWS_MASK(0, 1) | 
SWS_MASK_OFF3)
-WRAP_LINEAR(affine3a,  SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA)
-
-static const SwsOpTable fn(op_table_float) = {
-    .block_size = SWS_BLOCK_SIZE,
-    .entries = {
-        REF_COMMON_PATTERNS(convert_uint8),
-        REF_COMMON_PATTERNS(convert_uint16),
-        REF_COMMON_PATTERNS(convert_uint32),
-
-        &fn(op_clear_0001),
-        REF_COMMON_PATTERNS(min),
-        REF_COMMON_PATTERNS(max),
-        REF_COMMON_PATTERNS(scale),
-
-        &fn(op_dither0),
-        &fn(op_dither1),
-        &fn(op_dither2),
-        &fn(op_dither3),
-        &fn(op_dither4),
-        &fn(op_dither5),
-        &fn(op_dither6),
-        &fn(op_dither7),
-        &fn(op_dither8),
-
-        &fn(op_clear_0001),
-        &fn(op_clear_1000),
-        &fn(op_clear_1100),
-        &fn(op_clear_0100),
-
-        &fn(op_linear_luma),
-        &fn(op_linear_alpha),
-        &fn(op_linear_lumalpha),
-        &fn(op_linear_yalpha),
-        &fn(op_linear_dot3),
-        &fn(op_linear_dot3a),
-        &fn(op_linear_row0),
-        &fn(op_linear_diag3),
-        &fn(op_linear_diag4),
-        &fn(op_linear_diagoff3),
-        &fn(op_linear_affine3),
-        &fn(op_linear_affine3uv),
-        &fn(op_linear_affine3x),
-        &fn(op_linear_affine3xa),
-        &fn(op_linear_affine3xy),
-        &fn(op_linear_affine3a),
-
-        &fn(op_filter1_v),
-        &fn(op_filter2_v),
-        &fn(op_filter3_v),
-        &fn(op_filter4_v),
-
-        &fn(op_filter1_h),
-        &fn(op_filter2_h),
-        &fn(op_filter3_h),
-        &fn(op_filter4_h),
-
-        NULL
-    },
-};
-
-#undef PIXEL_TYPE
-#undef PIXEL_MAX
-#undef pixel_t
-#undef inter_t
-#undef block_t
-#undef px
-
-#undef FMT_CHAR
-#undef IS_FLOAT
diff --git a/libswscale/ops_tmpl_int.c b/libswscale/ops_tmpl_int.c
deleted file mode 100644
index 3e98c1f5ae..0000000000
--- a/libswscale/ops_tmpl_int.c
+++ /dev/null
@@ -1,619 +0,0 @@
-/**
- * Copyright (C) 2025 Niklas Haas
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/avassert.h"
-#include "libavutil/bswap.h"
-
-#include "ops_backend.h"
-
-#ifndef BIT_DEPTH
-#  define BIT_DEPTH 8
-#endif
-
-#if BIT_DEPTH == 32
-#  define PIXEL_TYPE SWS_PIXEL_U32
-#  define PIXEL_MAX  0xFFFFFFFFu
-#  define SWAP_BYTES av_bswap32
-#  define pixel_t    uint32_t
-#  define inter_t    int64_t
-#  define block_t    u32block_t
-#  define px         u32
-#elif BIT_DEPTH == 16
-#  define PIXEL_TYPE SWS_PIXEL_U16
-#  define PIXEL_MAX  0xFFFFu
-#  define SWAP_BYTES av_bswap16
-#  define pixel_t    uint16_t
-#  define inter_t    int64_t
-#  define block_t    u16block_t
-#  define px         u16
-#elif BIT_DEPTH == 8
-#  define PIXEL_TYPE SWS_PIXEL_U8
-#  define PIXEL_MAX  0xFFu
-#  define pixel_t    uint8_t
-#  define inter_t    int32_t
-#  define block_t    u8block_t
-#  define px         u8
-#else
-#  error Invalid BIT_DEPTH
-#endif
-
-#define IS_FLOAT  0
-#define FMT_CHAR  u
-#include "ops_tmpl_common.c"
-
-DECL_READ(read_planar, const int elems)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        x[i] = in0[i];
-        if (elems > 1)
-            y[i] = in1[i];
-        if (elems > 2)
-            z[i] = in2[i];
-        if (elems > 3)
-            w[i] = in3[i];
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-DECL_READ(read_packed, const int elems)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        x[i] = in0[elems * i + 0];
-        if (elems > 1)
-            y[i] = in0[elems * i + 1];
-        if (elems > 2)
-            z[i] = in0[elems * i + 2];
-        if (elems > 3)
-            w[i] = in0[elems * i + 3];
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-DECL_WRITE(write_planar, const int elems)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        out0[i] = x[i];
-        if (elems > 1)
-            out1[i] = y[i];
-        if (elems > 2)
-            out2[i] = z[i];
-        if (elems > 3)
-            out3[i] = w[i];
-    }
-}
-
-DECL_WRITE(write_packed, const int elems)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        out0[elems * i + 0] = x[i];
-        if (elems > 1)
-            out0[elems * i + 1] = y[i];
-        if (elems > 2)
-            out0[elems * i + 2] = z[i];
-        if (elems > 3)
-            out0[elems * i + 3] = w[i];
-    }
-}
-
-#define WRAP_READ(FUNC, ELEMS, FRAC, PACKED)                                   
 \
-static av_flatten void fn(FUNC##ELEMS)(SwsOpIter *restrict iter,               
 \
-                                       const SwsOpImpl *restrict impl,         
 \
-                                       void *restrict x, void *restrict y,     
 \
-                                       void *restrict z, void *restrict w)     
 \
-{                                                                              
 \
-    CALL_READ(FUNC, ELEMS);                                                    
 \
-    for (int i = 0; i < (PACKED ? 1 : ELEMS); i++)                             
 \
-        iter->in[i] += sizeof(block_t) * (PACKED ? ELEMS : 1) >> FRAC;         
 \
-}                                                                              
 \
-                                                                               
 \
-DECL_ENTRY(FUNC##ELEMS, SWS_COMP_ELEMS(ELEMS),                                 
 \
-    .op = SWS_OP_READ,                                                         
 \
-    .rw = {                                                                    
 \
-        .elems  = ELEMS,                                                       
 \
-        .packed = PACKED,                                                      
 \
-        .frac   = FRAC,                                                        
 \
-    },                                                                         
 \
-);
-
-WRAP_READ(read_planar, 1, 0, false)
-WRAP_READ(read_planar, 2, 0, false)
-WRAP_READ(read_planar, 3, 0, false)
-WRAP_READ(read_planar, 4, 0, false)
-WRAP_READ(read_packed, 2, 0, true)
-WRAP_READ(read_packed, 3, 0, true)
-WRAP_READ(read_packed, 4, 0, true)
-
-#define WRAP_WRITE(FUNC, ELEMS, FRAC, PACKED)                                  
 \
-static av_flatten void fn(FUNC##ELEMS)(SwsOpIter *restrict iter,               
 \
-                                       const SwsOpImpl *restrict impl,         
 \
-                                       void *restrict x, void *restrict y,     
 \
-                                       void *restrict z, void *restrict w)     
 \
-{                                                                              
 \
-    CALL_WRITE(FUNC, ELEMS);                                                   
 \
-    for (int i = 0; i < (PACKED ? 1 : ELEMS); i++)                             
 \
-        iter->out[i] += sizeof(block_t) * (PACKED ? ELEMS : 1) >> FRAC;        
 \
-}                                                                              
 \
-                                                                               
 \
-DECL_ENTRY(FUNC##ELEMS, SWS_COMP_ALL,                                          
 \
-    .op = SWS_OP_WRITE,                                                        
 \
-    .rw = {                                                                    
 \
-        .elems  = ELEMS,                                                       
 \
-        .packed = PACKED,                                                      
 \
-        .frac   = FRAC,                                                        
 \
-    },                                                                         
 \
-);
-
-WRAP_WRITE(write_planar, 1, 0, false)
-WRAP_WRITE(write_planar, 2, 0, false)
-WRAP_WRITE(write_planar, 3, 0, false)
-WRAP_WRITE(write_planar, 4, 0, false)
-WRAP_WRITE(write_packed, 2, 0, true)
-WRAP_WRITE(write_packed, 3, 0, true)
-WRAP_WRITE(write_packed, 4, 0, true)
-
-#if BIT_DEPTH == 8
-DECL_READ(read_nibbles, const int elems)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i += 2) {
-        const pixel_t val = ((const pixel_t *) in0)[i >> 1];
-        x[i + 0] = val >> 4;  /* high nibble */
-        x[i + 1] = val & 0xF; /* low nibble */
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-DECL_READ(read_bits, const int elems)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i += 8) {
-        const pixel_t val = ((const pixel_t *) in0)[i >> 3];
-        x[i + 0] = (val >> 7) & 1;
-        x[i + 1] = (val >> 6) & 1;
-        x[i + 2] = (val >> 5) & 1;
-        x[i + 3] = (val >> 4) & 1;
-        x[i + 4] = (val >> 3) & 1;
-        x[i + 5] = (val >> 2) & 1;
-        x[i + 6] = (val >> 1) & 1;
-        x[i + 7] = (val >> 0) & 1;
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-WRAP_READ(read_nibbles, 1, 1, false)
-WRAP_READ(read_bits,    1, 3, false)
-
-DECL_WRITE(write_nibbles, const int elems)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i += 2)
-        out0[i >> 1] = x[i] << 4 | x[i + 1];
-}
-
-DECL_WRITE(write_bits, const int elems)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i += 8) {
-        out0[i >> 3] = x[i + 0] << 7 |
-                       x[i + 1] << 6 |
-                       x[i + 2] << 5 |
-                       x[i + 3] << 4 |
-                       x[i + 4] << 3 |
-                       x[i + 5] << 2 |
-                       x[i + 6] << 1 |
-                       x[i + 7];
-    }
-}
-
-WRAP_WRITE(write_nibbles, 1, 1, false)
-WRAP_WRITE(write_bits,    1, 3, false)
-#endif /* BIT_DEPTH == 8 */
-
-#ifdef SWAP_BYTES
-DECL_PATTERN(swap_bytes)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        if (X)
-            x[i] = SWAP_BYTES(x[i]);
-        if (Y)
-            y[i] = SWAP_BYTES(y[i]);
-        if (Z)
-            z[i] = SWAP_BYTES(z[i]);
-        if (W)
-            w[i] = SWAP_BYTES(w[i]);
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-WRAP_COMMON_PATTERNS(swap_bytes, .op = SWS_OP_SWAP_BYTES);
-#endif /* SWAP_BYTES */
-
-#if BIT_DEPTH == 8
-DECL_PATTERN(expand16)
-{
-    u16block_t x16, y16, z16, w16;
-
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        if (X)
-            x16[i] = x[i] << 8 | x[i];
-        if (Y)
-            y16[i] = y[i] << 8 | y[i];
-        if (Z)
-            z16[i] = z[i] << 8 | z[i];
-        if (W)
-            w16[i] = w[i] << 8 | w[i];
-    }
-
-    CONTINUE(x16, y16, z16, w16);
-}
-
-WRAP_COMMON_PATTERNS(expand16,
-    .op = SWS_OP_CONVERT,
-    .convert.to = SWS_PIXEL_U16,
-    .convert.expand = true,
-);
-
-DECL_PATTERN(expand32)
-{
-    u32block_t x32, y32, z32, w32;
-
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        x32[i] = (uint32_t)x[i] << 24 | x[i] << 16 | x[i] << 8 | x[i];
-        y32[i] = (uint32_t)y[i] << 24 | y[i] << 16 | y[i] << 8 | y[i];
-        z32[i] = (uint32_t)z[i] << 24 | z[i] << 16 | z[i] << 8 | z[i];
-        w32[i] = (uint32_t)w[i] << 24 | w[i] << 16 | w[i] << 8 | w[i];
-    }
-
-    CONTINUE(x32, y32, z32, w32);
-}
-
-WRAP_COMMON_PATTERNS(expand32,
-    .op = SWS_OP_CONVERT,
-    .convert.to = SWS_PIXEL_U32,
-    .convert.expand = true,
-);
-#endif
-
-DECL_FUNC(pack, const int bits0, const int bits1, const int bits2, const int 
bits3)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        x[i] = x[i] << (bits1 + bits2 + bits3);
-        if (bits1)
-            x[i] |= y[i] << (bits2 + bits3);
-        if (bits2)
-            x[i] |= z[i] << bits3;
-        if (bits3)
-            x[i] |= w[i];
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-DECL_FUNC(unpack, const int bits0, const int bits1, const int bits2, const int 
bits3)
-{
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        const pixel_t val = x[i];
-        x[i] = val >> (bits1 + bits2 + bits3);
-        if (bits1)
-            y[i] = (val >> (bits2 + bits3)) & ((1 << bits1) - 1);
-        if (bits2)
-            z[i] = (val >> bits3) & ((1 << bits2) - 1);
-        if (bits3)
-            w[i] = val & ((1 << bits3) - 1);
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-#define WRAP_PACK_UNPACK(X, Y, Z, W)                                           
 \
-DECL_IMPL(pack, pack_##X##Y##Z##W, X, Y, Z, W)                                 
 \
-                                                                               
 \
-DECL_ENTRY(pack_##X##Y##Z##W, SWS_COMP(0),                                     
 \
-    .op = SWS_OP_PACK,                                                         
 \
-    .pack.pattern = { X, Y, Z, W },                                            
 \
-);                                                                             
 \
-                                                                               
 \
-DECL_IMPL(unpack, unpack_##X##Y##Z##W, X, Y, Z, W)                             
 \
-                                                                               
 \
-DECL_ENTRY(unpack_##X##Y##Z##W, SWS_COMP_MASK(X, Y, Z, W),                     
 \
-    .op = SWS_OP_UNPACK,                                                       
 \
-    .pack.pattern = { X, Y, Z, W },                                            
 \
-);
-
-WRAP_PACK_UNPACK( 3,  3,  2,  0)
-WRAP_PACK_UNPACK( 2,  3,  3,  0)
-WRAP_PACK_UNPACK( 1,  2,  1,  0)
-WRAP_PACK_UNPACK( 5,  6,  5,  0)
-WRAP_PACK_UNPACK( 5,  5,  5,  0)
-WRAP_PACK_UNPACK( 4,  4,  4,  0)
-WRAP_PACK_UNPACK( 2, 10, 10, 10)
-WRAP_PACK_UNPACK(10, 10, 10,  2)
-
-#if BIT_DEPTH != 8
-DECL_PATTERN(lshift)
-{
-    const uint8_t amount = impl->priv.u8[0];
-
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        x[i] <<= amount;
-        y[i] <<= amount;
-        z[i] <<= amount;
-        w[i] <<= amount;
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-DECL_PATTERN(rshift)
-{
-    const uint8_t amount = impl->priv.u8[0];
-
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        x[i] >>= amount;
-        y[i] >>= amount;
-        z[i] >>= amount;
-        w[i] >>= amount;
-    }
-
-    CONTINUE(x, y, z, w);
-}
-
-WRAP_COMMON_PATTERNS(lshift,
-    .op       = SWS_OP_LSHIFT,
-    .setup    = ff_sws_setup_shift,
-    .flexible = true,
-);
-
-WRAP_COMMON_PATTERNS(rshift,
-    .op       = SWS_OP_RSHIFT,
-    .setup    = ff_sws_setup_shift,
-    .flexible = true,
-);
-#endif /* BIT_DEPTH != 8 */
-
-DECL_PATTERN(convert_float)
-{
-    f32block_t xf, yf, zf, wf;
-
-    SWS_LOOP
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
-        xf[i] = x[i];
-        yf[i] = y[i];
-        zf[i] = z[i];
-        wf[i] = w[i];
-    }
-
-    CONTINUE(xf, yf, zf, wf);
-}
-
-WRAP_COMMON_PATTERNS(convert_float,
-    .op = SWS_OP_CONVERT,
-    .convert.to = SWS_PIXEL_F32,
-);
-
-/**
- * Swizzle by directly swapping the order of arguments to the continuation.
- * Note that this is only safe to do if no arguments are duplicated.
- */
-#define DECL_SWIZZLE(X, Y, Z, W)                                               
 \
-static void                                                                    
 \
-fn(swizzle_##X##Y##Z##W)(SwsOpIter *restrict iter,                             
 \
-                         const SwsOpImpl *restrict impl,                       
 \
-                         void *restrict c0, void *restrict c1,                 
 \
-                         void *restrict c2, void *restrict c3)                 
 \
-{                                                                              
 \
-    CONTINUE(c##X, c##Y, c##Z, c##W);                                          
 \
-}                                                                              
 \
-                                                                               
 \
-DECL_ENTRY(swizzle_##X##Y##Z##W, SWS_COMP_ALL,                                 
 \
-    .op = SWS_OP_SWIZZLE,                                                      
 \
-    .swizzle.in = { X, Y, Z, W },                                              
 \
-);
-
-DECL_SWIZZLE(3, 0, 1, 2)
-DECL_SWIZZLE(3, 0, 2, 1)
-DECL_SWIZZLE(2, 1, 0, 3)
-DECL_SWIZZLE(3, 2, 1, 0)
-DECL_SWIZZLE(3, 1, 0, 2)
-DECL_SWIZZLE(3, 2, 0, 1)
-DECL_SWIZZLE(1, 2, 0, 3)
-DECL_SWIZZLE(1, 0, 2, 3)
-DECL_SWIZZLE(2, 0, 1, 3)
-DECL_SWIZZLE(2, 3, 1, 0)
-DECL_SWIZZLE(2, 1, 3, 0)
-DECL_SWIZZLE(1, 2, 3, 0)
-DECL_SWIZZLE(1, 3, 2, 0)
-DECL_SWIZZLE(0, 2, 1, 3)
-DECL_SWIZZLE(0, 2, 3, 1)
-DECL_SWIZZLE(0, 3, 1, 2)
-DECL_SWIZZLE(3, 1, 2, 0)
-DECL_SWIZZLE(0, 3, 2, 1)
-
-/* Broadcast luma -> rgb (only used for y(a) -> rgb(a)) */
-#define DECL_EXPAND_LUMA(X, W, T0, T1)                                         
 \
-DECL_FUNC(expand_luma_##X##W##_impl,                                           
 \
-          block_t c0, block_t c1, block_t c2, block_t c3)                      
 \
-{                                                                              
 \
-    SWS_LOOP                                                                   
 \
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++)                                   
 \
-        T0[i] = T1[i] = c0[i];                                                 
 \
-                                                                               
 \
-    CONTINUE(c##X, T0, T1, c##W);                                              
 \
-}                                                                              
 \
-                                                                               
 \
-DECL_IMPL(expand_luma_##X##W##_impl, expand_luma_##X##W, x, y, z, w)           
 \
-                                                                               
 \
-DECL_ENTRY(expand_luma_##X##W, SWS_COMP_ALL,                                   
 \
-    .op = SWS_OP_SWIZZLE,                                                      
 \
-    .swizzle.in = { X, 0, 0, W },                                              
 \
-);
-
-DECL_EXPAND_LUMA(0, 3, c1, c2)
-DECL_EXPAND_LUMA(3, 0, c1, c2)
-DECL_EXPAND_LUMA(1, 0, c2, c3)
-DECL_EXPAND_LUMA(0, 1, c2, c3)
-
-static const SwsOpTable fn(op_table_int) = {
-    .block_size = SWS_BLOCK_SIZE,
-    .entries = {
-        &fn(op_read_planar1),
-        &fn(op_read_planar2),
-        &fn(op_read_planar3),
-        &fn(op_read_planar4),
-        &fn(op_read_packed2),
-        &fn(op_read_packed3),
-        &fn(op_read_packed4),
-
-        &fn(op_write_planar1),
-        &fn(op_write_planar2),
-        &fn(op_write_planar3),
-        &fn(op_write_planar4),
-        &fn(op_write_packed2),
-        &fn(op_write_packed3),
-        &fn(op_write_packed4),
-
-        &fn(op_filter1_v),
-        &fn(op_filter2_v),
-        &fn(op_filter3_v),
-        &fn(op_filter4_v),
-
-        &fn(op_filter1_h),
-        &fn(op_filter2_h),
-        &fn(op_filter3_h),
-        &fn(op_filter4_h),
-
-#if BIT_DEPTH == 8
-        &fn(op_read_bits1),
-        &fn(op_read_nibbles1),
-        &fn(op_write_bits1),
-        &fn(op_write_nibbles1),
-
-        &fn(op_pack_1210),
-        &fn(op_pack_2330),
-        &fn(op_pack_3320),
-
-        &fn(op_unpack_1210),
-        &fn(op_unpack_2330),
-        &fn(op_unpack_3320),
-
-        REF_COMMON_PATTERNS(expand16),
-        REF_COMMON_PATTERNS(expand32),
-#elif BIT_DEPTH == 16
-        &fn(op_pack_4440),
-        &fn(op_pack_5550),
-        &fn(op_pack_5650),
-        &fn(op_unpack_4440),
-        &fn(op_unpack_5550),
-        &fn(op_unpack_5650),
-#elif BIT_DEPTH == 32
-        &fn(op_pack_2101010),
-        &fn(op_pack_1010102),
-        &fn(op_unpack_2101010),
-        &fn(op_unpack_1010102),
-#endif
-
-#ifdef SWAP_BYTES
-        REF_COMMON_PATTERNS(swap_bytes),
-#endif
-
-        REF_COMMON_PATTERNS(min),
-        REF_COMMON_PATTERNS(max),
-        REF_COMMON_PATTERNS(scale),
-        REF_COMMON_PATTERNS(convert_float),
-
-        &fn(op_clear_0001),
-        &fn(op_clear_1000),
-        &fn(op_clear_1100),
-        &fn(op_clear_0100),
-        &fn(op_clear_0110),
-        &fn(op_clear_0011),
-        &fn(op_clear_1010),
-        &fn(op_clear_0101),
-        &fn(op_clear_0111),
-        &fn(op_clear_1011),
-        &fn(op_clear_1101),
-
-        &fn(op_swizzle_3012),
-        &fn(op_swizzle_3021),
-        &fn(op_swizzle_2103),
-        &fn(op_swizzle_3210),
-        &fn(op_swizzle_3102),
-        &fn(op_swizzle_3201),
-        &fn(op_swizzle_1203),
-        &fn(op_swizzle_1023),
-        &fn(op_swizzle_2013),
-        &fn(op_swizzle_2310),
-        &fn(op_swizzle_2130),
-        &fn(op_swizzle_1230),
-        &fn(op_swizzle_1320),
-        &fn(op_swizzle_0213),
-        &fn(op_swizzle_0231),
-        &fn(op_swizzle_0312),
-        &fn(op_swizzle_3120),
-        &fn(op_swizzle_0321),
-
-        &fn(op_expand_luma_03),
-        &fn(op_expand_luma_30),
-        &fn(op_expand_luma_10),
-        &fn(op_expand_luma_01),
-
-#if BIT_DEPTH != 8
-        REF_COMMON_PATTERNS(lshift),
-        REF_COMMON_PATTERNS(rshift),
-        REF_COMMON_PATTERNS(convert_uint8),
-#endif /* BIT_DEPTH != 8 */
-
-#if BIT_DEPTH != 16
-        REF_COMMON_PATTERNS(convert_uint16),
-#endif
-#if BIT_DEPTH != 32
-        REF_COMMON_PATTERNS(convert_uint32),
-#endif
-
-        NULL
-    },
-};
-
-#undef PIXEL_TYPE
-#undef PIXEL_MAX
-#undef SWAP_BYTES
-#undef pixel_t
-#undef inter_t
-#undef block_t
-#undef px
-
-#undef FMT_CHAR
-#undef IS_FLOAT
diff --git a/libswscale/uops_backend.c b/libswscale/uops_backend.c
index 591fc154db..bcfc511e57 100644
--- a/libswscale/uops_backend.c
+++ b/libswscale/uops_backend.c
@@ -189,8 +189,8 @@ fail:
     return ret;
 }
 
-const SwsOpBackend backend_uops = {
-    .name       = "uops",
+const SwsOpBackend backend_c = {
+    .name       = "c",
     .flags      = SWS_BACKEND_C,
     .compile    = compile,
     .hw_format  = AV_PIX_FMT_NONE,

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 18/34: swscale/ops_backend: delete ops-based C backend

Reply via email to