This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit dc88bcdf8c7e49b2c831418cc68d26f1f20a20e1
Author:     Niklas Haas <[email protected]>
AuthorDate: Tue Apr 7 17:37:41 2026 +0200
Commit:     Niklas Haas <[email protected]>
CommitDate: Tue Jun 9 01:09:34 2026 +0200

    swscale/uops: add uop definitions
    
    Taken from AARCH64_OP_*, but generalized/simplified a bit and updated to add
    missing op types, especially for special cases that already have dedicated
    implementations on x86.
    
    This initial definition is kept intentionally simple and close to SwsOp, to
    make it easier to port the existing ops backends to the new infrastructure.
    However, in the future, this will be refactored dramatically - distinctions
    like convert vs expand will cease to exist on the SwsOp level, and will
    instead be introduced by separate optimization passes on the uops level.
    
    SWS_UOP_LINEAR in particular will most likely be broken up into multiple
    uops. I also took this opportunity to redefine the mask in a more useful 
way.
    
    I decided to split up SWS_OP_CONVERT as well, because it was making x86
    codegen unnecessarily difficult due to the strong interaction between exact
    pixel sizes.
    
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/Makefile |   1 +
 libswscale/uops.c   | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 libswscale/uops.h   | 150 +++++++++++++++++++++++++++++++++++++
 3 files changed, 363 insertions(+)

diff --git a/libswscale/Makefile b/libswscale/Makefile
index e8765a190c..2738d66f35 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -35,6 +35,7 @@ OBJS-$(CONFIG_UNSTABLE) +=                              \
        ops_dispatch.o                                   \
        ops_memcpy.o                                     \
        ops_optimizer.o                                  \
+       uops.o                                           \
 
 ifeq ($(CONFIG_UNSTABLE),yes)
 include $(SRC_PATH)/libswscale/vulkan/Makefile
diff --git a/libswscale/uops.c b/libswscale/uops.c
new file mode 100644
index 0000000000..5dc6694375
--- /dev/null
+++ b/libswscale/uops.c
@@ -0,0 +1,212 @@
+/**
+ * Copyright (C) 2026 Niklas Haas
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdbool.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/mem.h"
+#include "libavutil/refstruct.h"
+
+#include "ops.h"
+#include "uops.h"
+
+int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b)
+{
+    if (a->type != b->type)
+        return (int) a->type - b->type;
+    if (a->uop != b->uop)
+        return (int) a->uop - b->uop;
+    if (a->mask != b->mask)
+        return (int) a->mask - b->mask;
+    return memcmp(&a->par, &b->par, sizeof(a->par));
+}
+
+static const struct {
+    char full[24];
+    char abbr[16];
+} uop_names[SWS_UOP_TYPE_NB] = {
+#define UOP_NAME(OP, ABBR) [SWS_UOP_##OP] = { "SWS_UOP_" #OP, ABBR }
+    UOP_NAME(INVALID,           "invalid"),
+    UOP_NAME(READ_PLANAR,       "read_planar"),
+    UOP_NAME(READ_PLANAR_FH,    "read_planar_fh"),
+    UOP_NAME(READ_PLANAR_FV,    "read_planar_fv"),
+    UOP_NAME(READ_PACKED,       "read_packed"),
+    UOP_NAME(READ_NIBBLE,       "read_nibble"),
+    UOP_NAME(READ_BIT,          "read_bit"),
+    UOP_NAME(WRITE_PLANAR,      "write_planar"),
+    UOP_NAME(WRITE_PACKED,      "write_packed"),
+    UOP_NAME(WRITE_NIBBLE,      "write_nibble"),
+    UOP_NAME(WRITE_BIT,         "write_bit"),
+    UOP_NAME(PERMUTE,           "permute"),
+    UOP_NAME(COPY,              "copy"),
+    UOP_NAME(SWAP_BYTES,        "swap_bytes"),
+    UOP_NAME(EXPAND_BIT,        "expand_bit"),
+    UOP_NAME(EXPAND_PAIR,       "expand_pair"),
+    UOP_NAME(EXPAND_QUAD,       "expand_quad"),
+    UOP_NAME(TO_U8,             "to_u8"),
+    UOP_NAME(TO_U16,            "to_u16"),
+    UOP_NAME(TO_U32,            "to_u32"),
+    UOP_NAME(TO_F32,            "to_f32"),
+    UOP_NAME(SCALE,             "scale"),
+    UOP_NAME(LINEAR,            "linear"),
+    UOP_NAME(ADD,               "add"),
+    UOP_NAME(MIN,               "min"),
+    UOP_NAME(MAX,               "max"),
+    UOP_NAME(UNPACK,            "unpack"),
+    UOP_NAME(PACK,              "pack"),
+    UOP_NAME(LSHIFT,            "lshift"),
+    UOP_NAME(RSHIFT,            "rshift"),
+    UOP_NAME(CLEAR,             "clear"),
+    UOP_NAME(DITHER,            "dither"),
+#undef UOP_NAME
+};
+
+void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX])
+{
+    AVBPrint bp;
+    av_bprint_init_for_buffer(&bp, buf, SWS_UOP_NAME_MAX);
+
+    if (op->type != SWS_PIXEL_NONE)
+        av_bprintf(&bp, "%s_", ff_sws_pixel_type_name(op->type));
+    av_bprintf(&bp, "%s", uop_names[op->uop].abbr);
+
+    if (op->mask) {
+        av_bprint_chars(&bp, '_', 1);
+        for (int i = 0; i < 4; i++) {
+            if (SWS_COMP_TEST(op->mask, i))
+                av_bprint_chars(&bp, "xyzw"[i], 1);
+        }
+    }
+
+    const SwsUOpParams *par = &op->par;
+    switch (op->uop) {
+    case SWS_UOP_LSHIFT:
+    case SWS_UOP_RSHIFT:
+        av_bprintf(&bp, "_%u", par->shift.amount);
+        break;
+    case SWS_UOP_PERMUTE:
+    case SWS_UOP_COPY:
+        av_bprint_chars(&bp, '_', 1);
+        for (int i = 0; i < 4; i++) {
+            if (SWS_COMP_TEST(op->mask, i))
+                av_bprint_chars(&bp, "xyzw"[par->swizzle.in[i]], 1);
+        }
+        break;
+    case SWS_UOP_PACK:
+    case SWS_UOP_UNPACK:
+        av_bprint_chars(&bp, '_', 1);
+        for (int i = 0; i < 4 && par->pack.pattern[i]; i++)
+            av_bprintf(&bp, "%x", par->pack.pattern[i]);
+        break;
+    case SWS_UOP_CLEAR:
+        av_bprint_chars(&bp, '_', 1);
+        for (int i = 0; i < 4; i++) {
+            if (!SWS_COMP_TEST(op->mask, i))
+                continue;
+            else if (SWS_COMP_TEST(par->clear.one, i))
+                av_bprint_chars(&bp, '1', 1);
+            else if (SWS_COMP_TEST(par->clear.zero, i))
+                av_bprint_chars(&bp, '0', 1);
+            else
+                av_bprint_chars(&bp, 'x', 1);
+        }
+        break;
+    case SWS_UOP_LINEAR:
+        for (int i = 0; i < 4; i++) {
+            if (!SWS_COMP_TEST(op->mask, i))
+                continue;
+            av_bprint_chars(&bp, '_', 1);
+            for (int j = 0; j < 5; j++) {
+                if (par->lin.one & SWS_MASK(i, j))
+                    av_bprint_chars(&bp, '1', 1);
+                else if (par->lin.zero & SWS_MASK(i, j))
+                    av_bprint_chars(&bp, '0', 1);
+                else
+                    av_bprint_chars(&bp, 'x', 1);
+            }
+        }
+        break;
+    case SWS_UOP_DITHER:
+        for (int i = 0; i < 4; i++) {
+            if (SWS_COMP_TEST(op->mask, i))
+                av_bprintf(&bp, "_%d", par->dither.y_offset[i]);
+        }
+        const unsigned size = 1u << par->dither.size_log2;
+        av_bprintf(&bp, "_%ux%u", size, size);
+        break;
+    }
+
+    av_assert0(av_bprint_is_complete(&bp));
+}
+
+static void uop_uninit(SwsUOp *uop)
+{
+    switch (uop->uop) {
+    case SWS_UOP_DITHER:
+        av_refstruct_unref(&uop->data.ptr);
+        break;
+    case SWS_UOP_READ_PLANAR_FH:
+    case SWS_UOP_READ_PLANAR_FV:
+        av_refstruct_unref(&uop->data.kernel);
+        break;
+    }
+
+    *uop = (SwsUOp) {0};
+}
+
+void ff_sws_uop_list_free(SwsUOpList **p_ops)
+{
+    SwsUOpList *ops = *p_ops;
+    if (!ops)
+        return;
+
+    for (int i = 0; i < ops->num_ops; i++)
+        uop_uninit(&ops->ops[i]);
+
+    av_freep(&ops->ops);
+    av_free(ops);
+    *p_ops = NULL;
+}
+
+SwsUOpList *ff_sws_uop_list_alloc(void)
+{
+    return av_mallocz(sizeof(SwsUOpList));
+}
+
+int ff_sws_uop_list_append(SwsUOpList *uops, SwsUOp *uop)
+{
+    if (!av_dynarray2_add((void **) &uops->ops, &uops->num_ops,
+                          sizeof(*uop), (uint8_t *) uop))
+    {
+        uop_uninit(uop);
+        return AVERROR(ENOMEM);
+    }
+
+    *uop = (SwsUOp) {0};
+    return 0;
+}
+
+int ff_sws_dither_height(const SwsDitherUOp *dither)
+{
+    int max_offset = 0;
+    for (int i = 0; i < 4; i++)
+        max_offset = FFMAX(max_offset, dither->y_offset[i]);
+    return (1 << dither->size_log2) + max_offset;
+}
diff --git a/libswscale/uops.h b/libswscale/uops.h
index 07958a6b62..5f1271ec38 100644
--- a/libswscale/uops.h
+++ b/libswscale/uops.h
@@ -21,6 +21,7 @@
 #ifndef SWSCALE_UOPS_H
 #define SWSCALE_UOPS_H
 
+#include <assert.h>
 #include <stdbool.h>
 #include <stdint.h>
 
@@ -30,6 +31,8 @@
 
 #include "libavutil/attributes.h"
 
+typedef struct SwsFilterWeights SwsFilterWeights;
+
 typedef enum SwsPixelType {
     SWS_PIXEL_NONE = 0,
     SWS_PIXEL_U8,
@@ -43,6 +46,18 @@ const char *ff_sws_pixel_type_name(SwsPixelType type);
 int ff_sws_pixel_type_size(SwsPixelType type) av_const;
 bool ff_sws_pixel_type_is_int(SwsPixelType type) av_const;
 
+typedef union SwsPixel {
+    char data[4];
+
+    uint8_t  u8;
+    uint16_t u16;
+    uint32_t u32;
+    float    f32;
+} SwsPixel;
+
+/* Ensures (SwsPixel) {0} is properly initialized to all zeros */
+static_assert(sizeof(SwsPixel) == sizeof(char[4]), "SwsPixel size mismatch");
+
 /**
  * Bit-mask of components. Exact meaning depends on the usage context.
  */
@@ -61,4 +76,139 @@ enum {
      ((W) ? SWS_COMP(3) : 0))
 };
 
+typedef enum SwsUOpType {
+    SWS_UOP_INVALID = 0,
+
+    /* Read/write uops; mask = components to read/write */
+    SWS_UOP_READ_PLANAR,     /* simple planar byte-aligned read */
+    SWS_UOP_READ_PLANAR_FH,  /* planar read with horizontal filter */
+    SWS_UOP_READ_PLANAR_FV,  /* planar read with vertical filter */
+    SWS_UOP_READ_PACKED,     /* simple packed byte-aligned read */
+    SWS_UOP_READ_NIBBLE,     /* fractional read (4 bits) from single plane */
+    SWS_UOP_READ_BIT,        /* fractional read (1 bit) from single plane */
+
+    SWS_UOP_WRITE_PLANAR,    /* simple planar byte-aligned write */
+    SWS_UOP_WRITE_PACKED,    /* simple packed byte-aligned write */
+    SWS_UOP_WRITE_NIBBLE,    /* fractional write (4 bits) to single plane */
+    SWS_UOP_WRITE_BIT,       /* fractional write (1 bit) to single plane */
+
+    /* Data rearrangement uops; mask = non-trivial and needed components */
+    SWS_UOP_PERMUTE,         /* rearrange components (no duplicates) */
+    SWS_UOP_COPY,            /* copy/duplicate components */
+
+    /* Data conversion / manipulation uops; mask = affected components */
+    SWS_UOP_SWAP_BYTES,      /* swap byte order in components */
+    SWS_UOP_EXPAND_BIT,      /* expand low-order bit to all bits in type */
+    SWS_UOP_EXPAND_PAIR,     /* expand bytes in pairs (16 bit) */
+    SWS_UOP_EXPAND_QUAD,     /* expand bytes in quads (32 bit) */
+    SWS_UOP_TO_U8,           /* cast pixel values to SWS_PIXEL_U8  */
+    SWS_UOP_TO_U16,          /* cast pixel values to SWS_PIXEL_U16 */
+    SWS_UOP_TO_U32,          /* cast pixel values to SWS_PIXEL_U32 */
+    SWS_UOP_TO_F32,          /* cast pixel values to SWS_PIXEL_F32 */
+
+    /* Arithmetic uops */
+    SWS_UOP_SCALE,           /* multiply masked components by scalar */
+    SWS_UOP_ADD,             /* add vec4 to masked components */
+    SWS_UOP_MIN,             /* min(x, vec4) on masked components */
+    SWS_UOP_MAX,             /* max(x, vec4) on masked components */
+
+    /* Identical to corresponding SwsOpType */
+    SWS_UOP_UNPACK,          /* mask = nonzero components in pack pattern */
+    SWS_UOP_PACK,            /* mask = nonzero components in pack pattern */
+    SWS_UOP_LSHIFT,          /* mask = components to shift */
+    SWS_UOP_RSHIFT,          /* mask = components to shift */
+    SWS_UOP_CLEAR,           /* mask = components to clear */
+    SWS_UOP_LINEAR,          /* mask = non-trivial output rows */
+    SWS_UOP_DITHER,          /* mask = components to dither */
+
+    /* Platform-specific uops would go here */
+    SWS_UOP_TYPE_NB,
+} SwsUOpType;
+
+typedef struct SwsShiftUOp {
+    uint8_t amount;
+} SwsShiftUOp;
+
+typedef struct SwsSwizzleUOp {
+    uint8_t in[4]; /* input component for each output component */
+} SwsSwizzleUOp;
+
+typedef struct SwsPackUOp {
+    uint8_t pattern[4]; /* bit depth pattern, from MSB to LSB */
+} SwsPackUOp;
+
+typedef struct SwsClearUOp {
+    SwsCompMask one;  /* mask of coefficients equal to all 1s */
+    SwsCompMask zero; /* mask of coefficients equal to all 0s */
+} SwsClearUOp;
+
+typedef struct SwsLinearUOp {
+    uint32_t one;  /* mask of coefficients equal to one */
+    uint32_t zero; /* mask of coefficients equal to zero */
+} SwsLinearUOp;
+
+typedef struct SwsDitherUOp {
+    uint8_t y_offset[4];
+    uint8_t size_log2;
+} SwsDitherUOp;
+
+/**
+ * Computes (1 << size_log2) + MAX(y_offset). The dither matrix attached to
+ * the SwsUOp is always pre-padded to this number of lines.
+ */
+int ff_sws_dither_height(const SwsDitherUOp *dither);
+
+typedef union SwsUOpParams {
+    SwsShiftUOp     shift;
+    SwsSwizzleUOp   swizzle;
+    SwsPackUOp      pack;
+    SwsClearUOp     clear;
+    SwsLinearUOp    lin;
+    SwsDitherUOp    dither;
+} SwsUOpParams;
+
+typedef struct SwsUOp {
+    /* These fields uniquely identify the uop implementation */
+    SwsPixelType type;
+    SwsUOpType uop;
+    SwsCompMask mask;
+    SwsUOpParams par;
+
+    /* Constant data for this uop; not part of the unique identifier */
+    union {
+        SwsFilterWeights *kernel;   /* refstruct */
+        SwsPixel *ptr;              /* refstruct */
+        SwsPixel scalar;
+        SwsPixel vec4[4];
+        SwsPixel mat4[4][5];        /* row major */
+    } data;
+} SwsUOp;
+
+/**
+ * Compare two SwsUOps for equality (excluding constant data).
+ */
+int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b);
+
+static inline int ff_sws_uop_cmp_v(const void *a, const void *b)
+{
+    return ff_sws_uop_cmp(a, b);
+}
+
+/**
+ * Generate a unique name for a SwsUOp.
+ */
+#define SWS_UOP_NAME_MAX 64
+void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX]);
+
+typedef struct SwsUOpList {
+    SwsUOp *ops;
+    int num_ops;
+} SwsUOpList;
+
+SwsUOpList *ff_sws_uop_list_alloc(void);
+void ff_sws_uop_list_free(SwsUOpList **ops);
+
+/* Takes over ownership of `uop` and sets it to {0}, even on failure. */
+int ff_sws_uop_list_append(SwsUOpList *uops, SwsUOp *uop);
+
 #endif

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to