This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 2576e09434d8026aab1769481b7b2fb43aa567c3
Author:     Ramiro Polla <[email protected]>
AuthorDate: Mon Jun 8 21:10:03 2026 +0200
Commit:     Ramiro Polla <[email protected]>
CommitDate: Wed Jun 10 01:47:11 2026 +0200

    swscale/aarch64/ops: simplify process function generation
    
    There was no good reason to have it as an SwsAArch64OpType.
    
    Sponsored-by: Sovereign Tech Fund
    Signed-off-by: Ramiro Polla <[email protected]>
---
 libswscale/aarch64/ops.c           | 22 +++++++++++---------
 libswscale/aarch64/ops_asmgen.c    | 42 ++++++++++++++++----------------------
 libswscale/aarch64/ops_entries.c   |  4 ----
 libswscale/aarch64/ops_impl.c      |  3 ---
 libswscale/aarch64/ops_impl.h      |  1 -
 libswscale/tests/sws_ops_aarch64.c | 28 -------------------------
 6 files changed, 30 insertions(+), 70 deletions(-)

diff --git a/libswscale/aarch64/ops.c b/libswscale/aarch64/ops.c
index 5a95792017..0b9a39fe90 100644
--- a/libswscale/aarch64/ops.c
+++ b/libswscale/aarch64/ops.c
@@ -222,22 +222,24 @@ static int aarch64_compile(SwsContext *ctx, const 
SwsOpList *ops,
     }
 
     /* Look up process function. */
+    void ff_sws_process_0001_neon(void);
+    void ff_sws_process_0011_neon(void);
+    void ff_sws_process_0111_neon(void);
+    void ff_sws_process_1111_neon(void);
+
     const SwsOp *read  = ff_sws_op_list_input(&rest);
     const SwsOp *write = ff_sws_op_list_output(&rest);
     const int read_planes  = read ? (read->rw.packed ? 1 : read->rw.elems) : 0;
     const int write_planes = write->rw.packed ? 1 : write->rw.elems;
-    SwsAArch64OpMask mask = 0;
-    for (int i = 0; i < FFMAX(read_planes, write_planes); i++)
-        MASK_SET(mask, i, 1);
-
-    SwsAArch64OpImplParams process_params = { .op = AARCH64_SWS_OP_PROCESS, 
.mask = mask };
-    SwsFuncPtr process_func = ff_sws_aarch64_lookup(&process_params);
-    if (!process_func) {
-        ret = AVERROR(ENOTSUP);
-        goto error;
+    SwsOpFunc process_func = NULL;
+    switch (FFMAX(read_planes, write_planes)) {
+    case 1: process_func = (SwsOpFunc) ff_sws_process_0001_neon; break;
+    case 2: process_func = (SwsOpFunc) ff_sws_process_0011_neon; break;
+    case 3: process_func = (SwsOpFunc) ff_sws_process_0111_neon; break;
+    case 4: process_func = (SwsOpFunc) ff_sws_process_1111_neon; break;
     }
 
-    out->func      = (SwsOpFunc) process_func;
+    out->func      = process_func;
     out->cpu_flags = chain->cpu_flags;
 
 error:
diff --git a/libswscale/aarch64/ops_asmgen.c b/libswscale/aarch64/ops_asmgen.c
index 7d4182c909..c03e0832ee 100644
--- a/libswscale/aarch64/ops_asmgen.c
+++ b/libswscale/aarch64/ops_asmgen.c
@@ -272,12 +272,12 @@ static void clobber_gpr(RasmOp regs[MAX_SAVED_REGS], 
unsigned *count,
 }
 
 static unsigned clobbered_gprs(const SwsAArch64Context *s,
-                               const SwsAArch64OpImplParams *p,
+                               SwsAArch64OpMask mask,
                                RasmOp regs[MAX_SAVED_REGS])
 {
     unsigned count = 0;
     clobber_gpr(regs, &count, a64op_lr());
-    LOOP_MASK(p, i) {
+    LOOP(mask, i) {
         clobber_gpr(regs, &count, s->in[i]);
         clobber_gpr(regs, &count, s->out[i]);
         clobber_gpr(regs, &count, s->in_bump[i]);
@@ -286,7 +286,7 @@ static unsigned clobbered_gprs(const SwsAArch64Context *s,
     return count;
 }
 
-static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams 
*p)
+static void asmgen_process(SwsAArch64Context *s, SwsAArch64OpMask mask)
 {
     RasmContext *r = s->rctx;
     char func_name[128];
@@ -297,13 +297,13 @@ static void asmgen_process(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p
      * The description in x86/ops_include.asm mostly holds as well here.
      */
 
-    aarch64_op_impl_func_name(func_name, sizeof(func_name), p);
+    snprintf(func_name, sizeof(func_name), "ff_sws_process_%04x_neon", mask);
 
     rasm_func_begin(r, func_name, true, false);
 
     /* Function prologue */
     RasmOp saved_regs[MAX_SAVED_REGS];
-    unsigned nsaved = clobbered_gprs(s, p, saved_regs);
+    unsigned nsaved = clobbered_gprs(s, mask, saved_regs);
     if (nsaved)
         asmgen_prologue(s, saved_regs, nsaved);
 
@@ -312,19 +312,19 @@ static void asmgen_process(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p
     i_add(r, s->op1_impl, s->impl, IMM(sizeof_impl));               
CMT("SwsOpImpl *op1_impl = impl + 1;");
 
     /* Load values from exec. */
-    LOOP_MASK(p, i) {
+    LOOP(mask, i) {
         rasm_annotate_nextf(r, buf, sizeof(buf), "in[%u] = exec->in[%u];", i, 
i);
         i_ldr(r, s->in[i],       a64op_off(s->exec, offsetof_exec_in       + 
(i * sizeof(uint8_t *))));
     }
-    LOOP_MASK(p, i) {
+    LOOP(mask, i) {
         rasm_annotate_nextf(r, buf, sizeof(buf), "out[%u] = exec->out[%u];", 
i, i);
         i_ldr(r, s->out[i],      a64op_off(s->exec, offsetof_exec_out      + 
(i * sizeof(uint8_t *))));
     }
-    LOOP_MASK(p, i) {
+    LOOP(mask, i) {
         rasm_annotate_nextf(r, buf, sizeof(buf), "in_bump[%u] = 
exec->in_bump[%u];", i, i);
         i_ldr(r, s->in_bump[i],  a64op_off(s->exec, offsetof_exec_in_bump  + 
(i * sizeof(ptrdiff_t))));
     }
-    LOOP_MASK(p, i) {
+    LOOP(mask, i) {
         rasm_annotate_nextf(r, buf, sizeof(buf), "out_bump[%u] = 
exec->out_bump[%u];", i, i);
         i_ldr(r, s->out_bump[i], a64op_off(s->exec, offsetof_exec_out_bump + 
(i * sizeof(ptrdiff_t))));
     }
@@ -338,8 +338,8 @@ static void asmgen_process(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p
 
     /* Perform padding, preparing for next row. */
     rasm_add_label(r, next_row);            CMT("next_row:");
-    LOOP_MASK(p, i) { i_add(r, s->in[i],  s->in[i],  s->in_bump[i]);  
CMTF("in[%u] += in_bump[%u];", i, i); }
-    LOOP_MASK(p, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); 
CMTF("out[%u] += out_bump[%u];", i, i); }
+    LOOP(mask, i) { i_add(r, s->in[i],  s->in[i],  s->in_bump[i]);  
CMTF("in[%u] += in_bump[%u];", i, i); }
+    LOOP(mask, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); 
CMTF("out[%u] += out_bump[%u];", i, i); }
 
     /* First row (reset x). */
     rasm_add_label(r, first_row);           CMT("first_row:");
@@ -1438,18 +1438,6 @@ static void asmgen_op_cps(SwsAArch64Context *s, const 
SwsAArch64OpImplParams *p)
     }
 }
 
-static void asmgen_op(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
-{
-    switch (p->op) {
-    case AARCH64_SWS_OP_PROCESS:
-        asmgen_process(s, p);
-        break;
-    default:
-        asmgen_op_cps(s, p);
-        break;
-    }
-}
-
 /*********************************************************************/
 static void aarch64_op_impl_lookup_str(char *buf, size_t size, const 
SwsAArch64OpImplParams *params,
                                        const SwsAArch64OpImplParams *prev, 
const char *p_str)
@@ -1641,10 +1629,16 @@ static int asmgen(void)
     s.in_bump [3] = a64op_gpx(26);
     s.out_bump[3] = a64op_gpx(27);
 
+    /* Generate all process functions using rasm. */
+    asmgen_process(&s, 0x0001);
+    asmgen_process(&s, 0x0011);
+    asmgen_process(&s, 0x0111);
+    asmgen_process(&s, 0x1111);
+
     /* Generate all functions from ops_entries.c using rasm. */
     const SwsAArch64OpImplParams *params = impl_params;
     while (params->op) {
-        asmgen_op(&s, params++);
+        asmgen_op_cps(&s, params++);
         if (rctx->error) {
             ret = rctx->error;
             goto error;
diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c
index ae30ca8b57..04a665a9f1 100644
--- a/libswscale/aarch64/ops_entries.c
+++ b/libswscale/aarch64/ops_entries.c
@@ -3,10 +3,6 @@
  * To regenerate, run: make sws_ops_entries_aarch64
  */
 
-{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0001 },
-{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0011 },
-{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0111 },
-{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x1111 },
 { .op = AARCH64_SWS_OP_READ_BIT, .block_size = 8, .type = AARCH64_PIXEL_U8, 
.mask = 0x0001 },
 { .op = AARCH64_SWS_OP_READ_BIT, .block_size = 16, .type = AARCH64_PIXEL_U8, 
.mask = 0x0001 },
 { .op = AARCH64_SWS_OP_READ_NIBBLE, .block_size = 8, .type = AARCH64_PIXEL_U8, 
.mask = 0x0001 },
diff --git a/libswscale/aarch64/ops_impl.c b/libswscale/aarch64/ops_impl.c
index 26d6a8d954..d5be4563c6 100644
--- a/libswscale/aarch64/ops_impl.c
+++ b/libswscale/aarch64/ops_impl.c
@@ -76,7 +76,6 @@ static const char 
*aarch64_pixel_type_name(SwsAArch64PixelType fmt)
 /*********************************************************************/
 static const char op_types[AARCH64_SWS_OP_TYPE_NB][32] = {
     [AARCH64_SWS_OP_NONE          ] = "AARCH64_SWS_OP_NONE",
-    [AARCH64_SWS_OP_PROCESS       ] = "AARCH64_SWS_OP_PROCESS",
     [AARCH64_SWS_OP_READ_BIT      ] = "AARCH64_SWS_OP_READ_BIT",
     [AARCH64_SWS_OP_READ_NIBBLE   ] = "AARCH64_SWS_OP_READ_NIBBLE",
     [AARCH64_SWS_OP_READ_PACKED   ] = "AARCH64_SWS_OP_READ_PACKED",
@@ -112,7 +111,6 @@ static const char *aarch64_op_type(SwsAArch64OpType op)
 
 static const char op_type_names[AARCH64_SWS_OP_TYPE_NB][16] = {
     [AARCH64_SWS_OP_NONE          ] = "none",
-    [AARCH64_SWS_OP_PROCESS       ] = "process",
     [AARCH64_SWS_OP_READ_BIT      ] = "read_bit",
     [AARCH64_SWS_OP_READ_NIBBLE   ] = "read_nibble",
     [AARCH64_SWS_OP_READ_PACKED   ] = "read_packed",
@@ -323,7 +321,6 @@ static const ParamField field_dither_size_log2 = { 
PARAM_FIELD(dither.size_log2)
 /* Fields needed to uniquely identify each SwsAArch64OpType. */
 #define MAX_LEVELS 8
 static const ParamField *op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS] = {
-    [AARCH64_SWS_OP_PROCESS       ] = { &field_op,                             
                                                     &field_mask },
     [AARCH64_SWS_OP_READ_BIT      ] = { &field_op,                             
                     &field_block_size, &field_type, &field_mask },
     [AARCH64_SWS_OP_READ_NIBBLE   ] = { &field_op,                             
                     &field_block_size, &field_type, &field_mask },
     [AARCH64_SWS_OP_READ_PACKED   ] = { &field_op,                             
                     &field_block_size, &field_type, &field_mask },
diff --git a/libswscale/aarch64/ops_impl.h b/libswscale/aarch64/ops_impl.h
index f0bbc9f697..9ccacc60e7 100644
--- a/libswscale/aarch64/ops_impl.h
+++ b/libswscale/aarch64/ops_impl.h
@@ -37,7 +37,6 @@ typedef enum SwsAArch64PixelType {
 /* Similar to SwsOpType */
 typedef enum SwsAArch64OpType {
     AARCH64_SWS_OP_NONE = 0,
-    AARCH64_SWS_OP_PROCESS,
     AARCH64_SWS_OP_READ_BIT,
     AARCH64_SWS_OP_READ_NIBBLE,
     AARCH64_SWS_OP_READ_PACKED,
diff --git a/libswscale/tests/sws_ops_aarch64.c 
b/libswscale/tests/sws_ops_aarch64.c
index 84300c6af4..4fa10c7bb0 100644
--- a/libswscale/tests/sws_ops_aarch64.c
+++ b/libswscale/tests/sws_ops_aarch64.c
@@ -72,30 +72,6 @@ error:
     return ret;
 }
 
-/* Collect the parameters for the process function. */
-static int aarch64_collect_process(const SwsOpList *ops, struct AVTreeNode 
**root)
-{
-    const SwsOp *read  = ff_sws_op_list_input(ops);
-    const SwsOp *write = ff_sws_op_list_output(ops);
-    const int read_planes  = read ? (read->rw.packed ? 1 : read->rw.elems) : 0;
-    const int write_planes = write->rw.packed ? 1 : write->rw.elems;
-    int ret;
-
-    SwsAArch64OpMask mask = 0;
-    for (int i = 0; i < FFMAX(read_planes, write_planes); i++)
-        MASK_SET(mask, i, 1);
-    SwsAArch64OpImplParams params = {
-        .op   = AARCH64_SWS_OP_PROCESS,
-        .mask = mask,
-    };
-
-    ret = aarch64_collect_op(&params, root);
-    if (ret < 0)
-        return ret;
-
-    return 0;
-}
-
 static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops)
 {
     struct AVTreeNode **root = (struct AVTreeNode **) opaque;
@@ -106,10 +82,6 @@ static int register_op(SwsContext *ctx, void *opaque, 
SwsOpList *ops)
     /* Use at most two full vregs during the widest precision section */
     int block_size = (ff_sws_op_list_max_size(ops) == 4) ? 8 : 16;
 
-    ret = aarch64_collect_process(&rest, root);
-    if (ret < 0)
-        return ret;
-
     for (int i = 0; i < rest.num_ops; i++) {
         SwsAArch64OpImplParams params = { 0 };
         ret = convert_to_aarch64_impl(ctx, &rest, i, block_size, &params);

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to