over_read to array

Niklas Haas via ffmpeg-cvslog Thu, 11 Jun 2026 09:29:33 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit 2d01687fd0b13b00c5991017cf7e38c932cb2d18
Author:     Niklas Haas <[email protected]>
AuthorDate: Tue Jun 2 14:29:39 2026 +0200
Commit:     Niklas Haas <[email protected]>
CommitDate: Thu Jun 11 16:27:47 2026 +0000

    swscale/ops_dispatch: generalize over_read/over_read to array
    
    I want to introduce operations like semiplanar reads, which would
    possibly require a different number of over_read bytes per plane.
    
    That aside, this is just a general cleanliness improvement.
    
    Sponsored-by: Sovereign Tech Fund
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/ops_chain.c    |  7 +++++--
 libswscale/ops_chain.h    | 10 +++++-----
 libswscale/ops_dispatch.c | 27 ++++++++++++++++++---------
 libswscale/ops_dispatch.h |  6 +++---
 libswscale/uops_backend.c |  5 +++--
 libswscale/x86/ops.c      | 26 +++++++++++++++++---------
 6 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/libswscale/ops_chain.c b/libswscale/ops_chain.c
index 331f2f696b..d60f7ccf28 100644
--- a/libswscale/ops_chain.c
+++ b/libswscale/ops_chain.c
@@ -173,9 +173,12 @@ int ff_sws_uop_lookup(SwsContext *ctx, const SwsOpTable 
*const tables[],
         return ret;
     }
 
+    for (int i = 0; i < 4; i++) {
+        chain->over_read[i]  = FFMAX(chain->over_read[i],  res.over_read[i]);
+        chain->over_write[i] = FFMAX(chain->over_write[i], res.over_write[i]);
+    }
+
     chain->cpu_flags |= params.table->cpu_flags;
-    chain->over_read  = FFMAX(chain->over_read,  res.over_read);
-    chain->over_write = FFMAX(chain->over_write, res.over_write);
     return 0;
 }
 
diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h
index 2126787782..abe4c545cd 100644
--- a/libswscale/ops_chain.h
+++ b/libswscale/ops_chain.h
@@ -86,9 +86,9 @@ typedef struct SwsOpChain {
     SwsOpImpl impl[SWS_MAX_OPS + 1]; /* reserve extra space for the entrypoint 
*/
     void (*free[SWS_MAX_OPS + 1])(SwsOpPriv *);
     int num_impl;
-    int cpu_flags;  /* set of all used CPU flags */
-    int over_read;  /* chain over-reads input by this many bytes */
-    int over_write; /* chain over-writes output by this many bytes */
+    int cpu_flags;      /* set of all used CPU flags */
+    int over_read[4];   /* chain over-reads input by this many bytes */
+    int over_write[4];  /* chain over-writes output by this many bytes */
 } SwsOpChain;
 
 SwsOpChain *ff_sws_op_chain_alloc(void);
@@ -115,8 +115,8 @@ typedef struct SwsImplResult {
     SwsFuncPtr func; /* overrides `SwsOpEntry.func` if non-NULL */
     SwsOpPriv priv; /* private data for this implementation instance */
     void (*free)(SwsOpPriv *priv); /* free function for `priv` */
-    int over_read;  /* implementation over-reads input by this many bytes */
-    int over_write; /* implementation over-writes output by this many bytes */
+    int over_read[4];  /* implementation over-reads input by this many bytes */
+    int over_write[4]; /* implementation over-writes output by this many bytes 
*/
 } SwsImplResult;
 
 typedef struct SwsOpEntry {
diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c
index 1186a8a73e..fcf77204db 100644
--- a/libswscale/ops_dispatch.c
+++ b/libswscale/ops_dispatch.c
@@ -81,8 +81,12 @@ static int compile_backend(SwsContext *ctx, const 
SwsOpBackend *backend,
     *out = compiled;
 
     av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
-           "block size = %d, over-read = %d, over-write = %d, cpu flags = 
0x%x\n",
-           backend->name, out->block_size, out->over_read, out->over_write,
+           "block size = %d, over-read = {%d %d %d %d}, over-write = {%d %d %d 
%d}, "
+           "cpu flags = 0x%x\n", backend->name, out->block_size,
+           out->over_read[0], out->over_read[1],
+           out->over_read[2], out->over_read[3],
+           out->over_write[0], out->over_write[1],
+           out->over_write[2], out->over_write[3],
            out->cpu_flags);
 
     ff_sws_op_list_print(ctx, AV_LOG_VERBOSE, AV_LOG_TRACE, ops);
@@ -229,7 +233,7 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
             input_bytes = pixel_bytes(plane_w, p->pixel_bits_in, AV_ROUND_UP);
         }
 
-        size_t safe_bytes = safe_bytes_pad(input_bytes, comp->over_read);
+        size_t safe_bytes = safe_bytes_pad(input_bytes, comp->over_read[i]);
         size_t safe_blocks_in;
         if (exec->in_offset_x) {
             size_t filter_size = pixel_bytes(p->filter_size_h, 
p->pixel_bits_in,
@@ -260,7 +264,7 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
         int chroma = idx == 1 || idx == 2;
         int sub_x  = chroma ? outdesc->log2_chroma_w : 0;
         int sub_y  = chroma ? outdesc->log2_chroma_h : 0;
-        size_t safe_bytes = safe_bytes_pad(out->linesize[idx], 
comp->over_write);
+        size_t safe_bytes = safe_bytes_pad(out->linesize[idx], 
comp->over_write[i]);
         size_t safe_blocks_out = safe_bytes / exec->block_size_out;
         if (safe_blocks_out < num_blocks) {
             p->memcpy_out = true;
@@ -314,7 +318,7 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
             needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, 
AV_ROUND_UP);
         }
         size_t loop_size   = p->tail_blocks * exec->block_size_in;
-        tail->in_stride[i] = FFALIGN(needed_size + comp->over_read, align);
+        tail->in_stride[i] = FFALIGN(needed_size + comp->over_read[i], align);
         tail->in_bump[i]   = tail->in_stride[i] - loop_size;
         alloc_size += tail->in_stride[i] * in->height;
     }
@@ -322,7 +326,7 @@ static int op_pass_setup(const SwsFrame *out, const 
SwsFrame *in,
     for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
         size_t needed_size  = pixel_bytes(alloc_width, p->pixel_bits_out, 
AV_ROUND_UP);
         size_t loop_size    = p->tail_blocks * exec->block_size_out;
-        tail->out_stride[i] = FFALIGN(needed_size + comp->over_write, align);
+        tail->out_stride[i] = FFALIGN(needed_size + comp->over_write[i], 
align);
         tail->out_bump[i]   = tail->out_stride[i] - loop_size;
         alloc_size += tail->out_stride[i] * out->height;
     }
@@ -484,17 +488,22 @@ static int rw_pixel_bits(const SwsOp *op)
     return elems * size * bits;
 }
 
-static void align_pass(SwsPass *pass, int block_size, int over_rw, int 
pixel_bits)
+static void align_pass(SwsPass *pass, int block_size, const int *over_rw,
+                       int pixel_bits)
 {
     if (!pass)
         return;
 
     /* Add at least as many pixels as needed to cover the padding requirement 
*/
-    const int pad = (over_rw * 8 + pixel_bits - 1) / pixel_bits;
+    int pad_max = 0;
+    for (int i = 0; i < 4; i++) {
+        const int pad = (over_rw[i] * 8 + pixel_bits - 1) / pixel_bits;
+        pad_max = FFMAX(pad_max, pad);
+    }
 
     SwsPassBuffer *buf = pass->output;
     buf->width_align = FFMAX(buf->width_align, block_size);
-    buf->width_pad = FFMAX(buf->width_pad, pad);
+    buf->width_pad = FFMAX(buf->width_pad, pad_max);
 }
 
 static int compile(SwsGraph *graph, const SwsOpBackend *backend,
diff --git a/libswscale/ops_dispatch.h b/libswscale/ops_dispatch.h
index 7f1304dcc4..237a036f69 100644
--- a/libswscale/ops_dispatch.h
+++ b/libswscale/ops_dispatch.h
@@ -119,9 +119,9 @@ typedef struct SwsCompiledOp {
     int cpu_flags;   /* active set of CPU flags (informative) */
 
     /* Execution parameters for non-opaque functions only */
-    int block_size;  /* number of pixels processed per iteration */
-    int over_read;   /* implementation over-reads input by this many bytes */
-    int over_write;  /* implementation over-writes output by this many bytes */
+    int block_size;     /* number of pixels processed per iteration */
+    int over_read[4];   /* implementation over-reads input by this many bytes 
*/
+    int over_write[4];  /* implementation over-writes output by this many 
bytes */
 
     /* Arbitrary private data */
     void *priv;
diff --git a/libswscale/uops_backend.c b/libswscale/uops_backend.c
index fd7220a57a..50f5302ca6 100644
--- a/libswscale/uops_backend.c
+++ b/libswscale/uops_backend.c
@@ -165,13 +165,14 @@ static int compile(SwsContext *ctx, const SwsOpList *ops, 
SwsCompiledOp *out)
         .slice_align = 1,
         .block_size  = SWS_BLOCK_SIZE,
         .cpu_flags   = chain->cpu_flags,
-        .over_read   = chain->over_read,
-        .over_write  = chain->over_write,
         .priv        = chain,
         .free        = ff_sws_op_chain_free_cb,
         .func        = process,
     };
 
+    memcpy(out->over_read,  chain->over_read,  sizeof(out->over_read));
+    memcpy(out->over_write, chain->over_write, sizeof(out->over_write));
+
     av_log(ctx, AV_LOG_DEBUG, "Compiled micro-ops:\n");
     for (int i = 0; i < uops->num_ops; i++) {
         char name[SWS_UOP_NAME_MAX];
diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c
index 4c8eceb1cb..e8b0a20a1c 100644
--- a/libswscale/x86/ops.c
+++ b/libswscale/x86/ops.c
@@ -35,8 +35,8 @@ static int setup_rw_packed(const SwsImplParams *params, 
SwsImplResult *out)
     /* 3-component packed reads/writes process one extra garbage word */
     if (uop->mask == SWS_COMP_ELEMS(3)) {
         switch (uop->uop) {
-        case SWS_UOP_READ_PACKED:  out->over_read  = sizeof(uint32_t); break;
-        case SWS_UOP_WRITE_PACKED: out->over_write = sizeof(uint32_t); break;
+        case SWS_UOP_READ_PACKED:  out->over_read[0]  = sizeof(uint32_t); 
break;
+        case SWS_UOP_WRITE_PACKED: out->over_write[0] = sizeof(uint32_t); 
break;
         }
     }
 
@@ -153,7 +153,11 @@ static int setup_filter_h(const SwsImplParams *params, 
SwsImplResult *out)
     out->priv.ptr = weights.ptr;
     out->priv.uptr[1] = aligned_size;
     out->free = ff_op_priv_free;
-    out->over_read = (aligned_size - filter_size) * pixel_size;
+
+    for (int i = 0; i < 4; i++) {
+        if (uop->mask & SWS_COMP(i))
+            out->over_read[i] = (aligned_size - filter_size) * pixel_size;
+    }
     return 0;
 }
 
@@ -236,7 +240,11 @@ static int setup_filter_h_4x4(const SwsImplParams *params, 
SwsImplResult *out)
     out->priv.ptr = weights.ptr;
     out->priv.uptr[1] = aligned_size * sizeof_weights;
     out->free = ff_op_priv_free;
-    out->over_read = (aligned_size - filter_size) * pixel_size;
+
+    for (int i = 0; i < 4; i++) {
+        if (uop->mask & SWS_COMP(i))
+            out->over_read[i] = (aligned_size - filter_size) * pixel_size;
+    }
     return 0;
 }
 
@@ -506,8 +514,8 @@ static int solve_shuffle(const SwsOpList *ops, int mmsize, 
SwsCompiledOp *out)
         .free        = av_free,
         .slice_align = 1,
         .block_size  = pixels * num_lanes,
-        .over_read   = movsize(in_total,  mmsize) - in_total,
-        .over_write  = movsize(out_total, mmsize) - out_total,
+        .over_read   = { movsize(in_total,  mmsize) - in_total },
+        .over_write  = { movsize(out_total, mmsize) - out_total },
         .cpu_flags   = mmsize > 32 ? AV_CPU_FLAG_AVX512 :
                        mmsize > 16 ? AV_CPU_FLAG_AVX2 :
                                      AV_CPU_FLAG_SSE4,
@@ -640,9 +648,9 @@ static int compile(SwsContext *ctx, const SwsOpList *ops, 
SwsCompiledOp *out)
         return ret;
     }
 
-    out->cpu_flags  = chain->cpu_flags;
-    out->over_read  = chain->over_read;
-    out->over_write = chain->over_write;
+    out->cpu_flags = chain->cpu_flags;
+    memcpy(out->over_read,  chain->over_read,  sizeof(out->over_read));
+    memcpy(out->over_write, chain->over_write, sizeof(out->over_write));
     ff_sws_uop_list_free(&uops);
     return 0;
 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 04/12: swscale/ops_dispatch: generalize over_read/over_read to array

Reply via email to