This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 2576e09434d8026aab1769481b7b2fb43aa567c3 Author: Ramiro Polla <[email protected]> AuthorDate: Mon Jun 8 21:10:03 2026 +0200 Commit: Ramiro Polla <[email protected]> CommitDate: Wed Jun 10 01:47:11 2026 +0200 swscale/aarch64/ops: simplify process function generation There was no good reason to have it as an SwsAArch64OpType. Sponsored-by: Sovereign Tech Fund Signed-off-by: Ramiro Polla <[email protected]> --- libswscale/aarch64/ops.c | 22 +++++++++++--------- libswscale/aarch64/ops_asmgen.c | 42 ++++++++++++++++---------------------- libswscale/aarch64/ops_entries.c | 4 ---- libswscale/aarch64/ops_impl.c | 3 --- libswscale/aarch64/ops_impl.h | 1 - libswscale/tests/sws_ops_aarch64.c | 28 ------------------------- 6 files changed, 30 insertions(+), 70 deletions(-) diff --git a/libswscale/aarch64/ops.c b/libswscale/aarch64/ops.c index 5a95792017..0b9a39fe90 100644 --- a/libswscale/aarch64/ops.c +++ b/libswscale/aarch64/ops.c @@ -222,22 +222,24 @@ static int aarch64_compile(SwsContext *ctx, const SwsOpList *ops, } /* Look up process function. */ + void ff_sws_process_0001_neon(void); + void ff_sws_process_0011_neon(void); + void ff_sws_process_0111_neon(void); + void ff_sws_process_1111_neon(void); + const SwsOp *read = ff_sws_op_list_input(&rest); const SwsOp *write = ff_sws_op_list_output(&rest); const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0; const int write_planes = write->rw.packed ? 1 : write->rw.elems; - SwsAArch64OpMask mask = 0; - for (int i = 0; i < FFMAX(read_planes, write_planes); i++) - MASK_SET(mask, i, 1); - - SwsAArch64OpImplParams process_params = { .op = AARCH64_SWS_OP_PROCESS, .mask = mask }; - SwsFuncPtr process_func = ff_sws_aarch64_lookup(&process_params); - if (!process_func) { - ret = AVERROR(ENOTSUP); - goto error; + SwsOpFunc process_func = NULL; + switch (FFMAX(read_planes, write_planes)) { + case 1: process_func = (SwsOpFunc) ff_sws_process_0001_neon; break; + case 2: process_func = (SwsOpFunc) ff_sws_process_0011_neon; break; + case 3: process_func = (SwsOpFunc) ff_sws_process_0111_neon; break; + case 4: process_func = (SwsOpFunc) ff_sws_process_1111_neon; break; } - out->func = (SwsOpFunc) process_func; + out->func = process_func; out->cpu_flags = chain->cpu_flags; error: diff --git a/libswscale/aarch64/ops_asmgen.c b/libswscale/aarch64/ops_asmgen.c index 7d4182c909..c03e0832ee 100644 --- a/libswscale/aarch64/ops_asmgen.c +++ b/libswscale/aarch64/ops_asmgen.c @@ -272,12 +272,12 @@ static void clobber_gpr(RasmOp regs[MAX_SAVED_REGS], unsigned *count, } static unsigned clobbered_gprs(const SwsAArch64Context *s, - const SwsAArch64OpImplParams *p, + SwsAArch64OpMask mask, RasmOp regs[MAX_SAVED_REGS]) { unsigned count = 0; clobber_gpr(regs, &count, a64op_lr()); - LOOP_MASK(p, i) { + LOOP(mask, i) { clobber_gpr(regs, &count, s->in[i]); clobber_gpr(regs, &count, s->out[i]); clobber_gpr(regs, &count, s->in_bump[i]); @@ -286,7 +286,7 @@ static unsigned clobbered_gprs(const SwsAArch64Context *s, return count; } -static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) +static void asmgen_process(SwsAArch64Context *s, SwsAArch64OpMask mask) { RasmContext *r = s->rctx; char func_name[128]; @@ -297,13 +297,13 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p * The description in x86/ops_include.asm mostly holds as well here. */ - aarch64_op_impl_func_name(func_name, sizeof(func_name), p); + snprintf(func_name, sizeof(func_name), "ff_sws_process_%04x_neon", mask); rasm_func_begin(r, func_name, true, false); /* Function prologue */ RasmOp saved_regs[MAX_SAVED_REGS]; - unsigned nsaved = clobbered_gprs(s, p, saved_regs); + unsigned nsaved = clobbered_gprs(s, mask, saved_regs); if (nsaved) asmgen_prologue(s, saved_regs, nsaved); @@ -312,19 +312,19 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p i_add(r, s->op1_impl, s->impl, IMM(sizeof_impl)); CMT("SwsOpImpl *op1_impl = impl + 1;"); /* Load values from exec. */ - LOOP_MASK(p, i) { + LOOP(mask, i) { rasm_annotate_nextf(r, buf, sizeof(buf), "in[%u] = exec->in[%u];", i, i); i_ldr(r, s->in[i], a64op_off(s->exec, offsetof_exec_in + (i * sizeof(uint8_t *)))); } - LOOP_MASK(p, i) { + LOOP(mask, i) { rasm_annotate_nextf(r, buf, sizeof(buf), "out[%u] = exec->out[%u];", i, i); i_ldr(r, s->out[i], a64op_off(s->exec, offsetof_exec_out + (i * sizeof(uint8_t *)))); } - LOOP_MASK(p, i) { + LOOP(mask, i) { rasm_annotate_nextf(r, buf, sizeof(buf), "in_bump[%u] = exec->in_bump[%u];", i, i); i_ldr(r, s->in_bump[i], a64op_off(s->exec, offsetof_exec_in_bump + (i * sizeof(ptrdiff_t)))); } - LOOP_MASK(p, i) { + LOOP(mask, i) { rasm_annotate_nextf(r, buf, sizeof(buf), "out_bump[%u] = exec->out_bump[%u];", i, i); i_ldr(r, s->out_bump[i], a64op_off(s->exec, offsetof_exec_out_bump + (i * sizeof(ptrdiff_t)))); } @@ -338,8 +338,8 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p /* Perform padding, preparing for next row. */ rasm_add_label(r, next_row); CMT("next_row:"); - LOOP_MASK(p, i) { i_add(r, s->in[i], s->in[i], s->in_bump[i]); CMTF("in[%u] += in_bump[%u];", i, i); } - LOOP_MASK(p, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); CMTF("out[%u] += out_bump[%u];", i, i); } + LOOP(mask, i) { i_add(r, s->in[i], s->in[i], s->in_bump[i]); CMTF("in[%u] += in_bump[%u];", i, i); } + LOOP(mask, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); CMTF("out[%u] += out_bump[%u];", i, i); } /* First row (reset x). */ rasm_add_label(r, first_row); CMT("first_row:"); @@ -1438,18 +1438,6 @@ static void asmgen_op_cps(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) } } -static void asmgen_op(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) -{ - switch (p->op) { - case AARCH64_SWS_OP_PROCESS: - asmgen_process(s, p); - break; - default: - asmgen_op_cps(s, p); - break; - } -} - /*********************************************************************/ static void aarch64_op_impl_lookup_str(char *buf, size_t size, const SwsAArch64OpImplParams *params, const SwsAArch64OpImplParams *prev, const char *p_str) @@ -1641,10 +1629,16 @@ static int asmgen(void) s.in_bump [3] = a64op_gpx(26); s.out_bump[3] = a64op_gpx(27); + /* Generate all process functions using rasm. */ + asmgen_process(&s, 0x0001); + asmgen_process(&s, 0x0011); + asmgen_process(&s, 0x0111); + asmgen_process(&s, 0x1111); + /* Generate all functions from ops_entries.c using rasm. */ const SwsAArch64OpImplParams *params = impl_params; while (params->op) { - asmgen_op(&s, params++); + asmgen_op_cps(&s, params++); if (rctx->error) { ret = rctx->error; goto error; diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c index ae30ca8b57..04a665a9f1 100644 --- a/libswscale/aarch64/ops_entries.c +++ b/libswscale/aarch64/ops_entries.c @@ -3,10 +3,6 @@ * To regenerate, run: make sws_ops_entries_aarch64 */ -{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0011 }, -{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_PROCESS, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_READ_BIT, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_READ_BIT, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_READ_NIBBLE, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, diff --git a/libswscale/aarch64/ops_impl.c b/libswscale/aarch64/ops_impl.c index 26d6a8d954..d5be4563c6 100644 --- a/libswscale/aarch64/ops_impl.c +++ b/libswscale/aarch64/ops_impl.c @@ -76,7 +76,6 @@ static const char *aarch64_pixel_type_name(SwsAArch64PixelType fmt) /*********************************************************************/ static const char op_types[AARCH64_SWS_OP_TYPE_NB][32] = { [AARCH64_SWS_OP_NONE ] = "AARCH64_SWS_OP_NONE", - [AARCH64_SWS_OP_PROCESS ] = "AARCH64_SWS_OP_PROCESS", [AARCH64_SWS_OP_READ_BIT ] = "AARCH64_SWS_OP_READ_BIT", [AARCH64_SWS_OP_READ_NIBBLE ] = "AARCH64_SWS_OP_READ_NIBBLE", [AARCH64_SWS_OP_READ_PACKED ] = "AARCH64_SWS_OP_READ_PACKED", @@ -112,7 +111,6 @@ static const char *aarch64_op_type(SwsAArch64OpType op) static const char op_type_names[AARCH64_SWS_OP_TYPE_NB][16] = { [AARCH64_SWS_OP_NONE ] = "none", - [AARCH64_SWS_OP_PROCESS ] = "process", [AARCH64_SWS_OP_READ_BIT ] = "read_bit", [AARCH64_SWS_OP_READ_NIBBLE ] = "read_nibble", [AARCH64_SWS_OP_READ_PACKED ] = "read_packed", @@ -323,7 +321,6 @@ static const ParamField field_dither_size_log2 = { PARAM_FIELD(dither.size_log2) /* Fields needed to uniquely identify each SwsAArch64OpType. */ #define MAX_LEVELS 8 static const ParamField *op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS] = { - [AARCH64_SWS_OP_PROCESS ] = { &field_op, &field_mask }, [AARCH64_SWS_OP_READ_BIT ] = { &field_op, &field_block_size, &field_type, &field_mask }, [AARCH64_SWS_OP_READ_NIBBLE ] = { &field_op, &field_block_size, &field_type, &field_mask }, [AARCH64_SWS_OP_READ_PACKED ] = { &field_op, &field_block_size, &field_type, &field_mask }, diff --git a/libswscale/aarch64/ops_impl.h b/libswscale/aarch64/ops_impl.h index f0bbc9f697..9ccacc60e7 100644 --- a/libswscale/aarch64/ops_impl.h +++ b/libswscale/aarch64/ops_impl.h @@ -37,7 +37,6 @@ typedef enum SwsAArch64PixelType { /* Similar to SwsOpType */ typedef enum SwsAArch64OpType { AARCH64_SWS_OP_NONE = 0, - AARCH64_SWS_OP_PROCESS, AARCH64_SWS_OP_READ_BIT, AARCH64_SWS_OP_READ_NIBBLE, AARCH64_SWS_OP_READ_PACKED, diff --git a/libswscale/tests/sws_ops_aarch64.c b/libswscale/tests/sws_ops_aarch64.c index 84300c6af4..4fa10c7bb0 100644 --- a/libswscale/tests/sws_ops_aarch64.c +++ b/libswscale/tests/sws_ops_aarch64.c @@ -72,30 +72,6 @@ error: return ret; } -/* Collect the parameters for the process function. */ -static int aarch64_collect_process(const SwsOpList *ops, struct AVTreeNode **root) -{ - const SwsOp *read = ff_sws_op_list_input(ops); - const SwsOp *write = ff_sws_op_list_output(ops); - const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0; - const int write_planes = write->rw.packed ? 1 : write->rw.elems; - int ret; - - SwsAArch64OpMask mask = 0; - for (int i = 0; i < FFMAX(read_planes, write_planes); i++) - MASK_SET(mask, i, 1); - SwsAArch64OpImplParams params = { - .op = AARCH64_SWS_OP_PROCESS, - .mask = mask, - }; - - ret = aarch64_collect_op(¶ms, root); - if (ret < 0) - return ret; - - return 0; -} - static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops) { struct AVTreeNode **root = (struct AVTreeNode **) opaque; @@ -106,10 +82,6 @@ static int register_op(SwsContext *ctx, void *opaque, SwsOpList *ops) /* Use at most two full vregs during the widest precision section */ int block_size = (ff_sws_op_list_max_size(ops) == 4) ? 8 : 16; - ret = aarch64_collect_process(&rest, root); - if (ret < 0) - return ret; - for (int i = 0; i < rest.num_ops; i++) { SwsAArch64OpImplParams params = { 0 }; ret = convert_to_aarch64_impl(ctx, &rest, i, block_size, ¶ms); _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
