This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 19250a184601742ac1b60795f0323692a445c048 Author: Ramiro Polla <[email protected]> AuthorDate: Mon Apr 13 15:28:32 2026 +0200 Commit: Ramiro Polla <[email protected]> CommitDate: Wed Jun 10 01:47:10 2026 +0200 swscale/aarch64/ops: use plain `ret` instruction Use a call/ret pair instead of awkwardly exporting and then jumping back to the return label. This is similar to c29465bcb6, but for aarch64. Sponsored-by: Sovereign Tech Fund Signed-off-by: Ramiro Polla <[email protected]> --- libswscale/aarch64/ops.c | 12 +--- libswscale/aarch64/ops_asmgen.c | 112 +++++++++++++++++++++---------------- libswscale/aarch64/ops_entries.c | 4 -- libswscale/aarch64/ops_impl.c | 3 - libswscale/aarch64/ops_impl.h | 1 - libswscale/tests/sws_ops_aarch64.c | 7 +-- 6 files changed, 67 insertions(+), 72 deletions(-) diff --git a/libswscale/aarch64/ops.c b/libswscale/aarch64/ops.c index a7e96b16c3..5a95792017 100644 --- a/libswscale/aarch64/ops.c +++ b/libswscale/aarch64/ops.c @@ -221,7 +221,7 @@ static int aarch64_compile(SwsContext *ctx, const SwsOpList *ops, goto error; } - /* Look up process/process_return functions. */ + /* Look up process function. */ const SwsOp *read = ff_sws_op_list_input(&rest); const SwsOp *write = ff_sws_op_list_output(&rest); const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0; @@ -230,19 +230,13 @@ static int aarch64_compile(SwsContext *ctx, const SwsOpList *ops, for (int i = 0; i < FFMAX(read_planes, write_planes); i++) MASK_SET(mask, i, 1); - SwsAArch64OpImplParams process_params = { .op = AARCH64_SWS_OP_PROCESS, .mask = mask }; - SwsAArch64OpImplParams return_params = { .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = mask }; + SwsAArch64OpImplParams process_params = { .op = AARCH64_SWS_OP_PROCESS, .mask = mask }; SwsFuncPtr process_func = ff_sws_aarch64_lookup(&process_params); - SwsFuncPtr return_func = ff_sws_aarch64_lookup(&return_params); - if (!process_func || !return_func) { + if (!process_func) { ret = AVERROR(ENOTSUP); goto error; } - ret = ff_sws_op_chain_append(chain, return_func, NULL, &(SwsOpPriv) { 0 }); - if (ret < 0) - goto error; - out->func = (SwsOpFunc) process_func; out->cpu_flags = chain->cpu_flags; diff --git a/libswscale/aarch64/ops_asmgen.c b/libswscale/aarch64/ops_asmgen.c index 7c5bb83f46..7d4182c909 100644 --- a/libswscale/aarch64/ops_asmgen.c +++ b/libswscale/aarch64/ops_asmgen.c @@ -260,14 +260,14 @@ static void asmgen_epilogue(SwsAArch64Context *s, const RasmOp *regs, unsigned n } /*********************************************************************/ -/* Callee-saved registers (r19-r28). */ -#define MAX_SAVED_REGS 10 +/* Callee-saved registers (r19-r28, fp, and lr). */ +#define MAX_SAVED_REGS 12 static void clobber_gpr(RasmOp regs[MAX_SAVED_REGS], unsigned *count, RasmOp gpr) { const int n = a64op_gpr_n(gpr); - if (n >= 19 && n <= 28) + if (n >= 19 && n <= 30) regs[(*count)++] = gpr; } @@ -276,6 +276,7 @@ static unsigned clobbered_gprs(const SwsAArch64Context *s, RasmOp regs[MAX_SAVED_REGS]) { unsigned count = 0; + clobber_gpr(regs, &count, a64op_lr()); LOOP_MASK(p, i) { clobber_gpr(regs, &count, s->in[i]); clobber_gpr(regs, &count, s->out[i]); @@ -292,9 +293,8 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p char buf[64]; /** - * The process/process_return functions for aarch64 work similarly - * to the x86 backend. The description in x86/ops_include.asm mostly - * holds as well here. + * The process function for aarch64 works similarly to the x86 backend. + * The description in x86/ops_include.asm mostly holds as well here. */ aarch64_op_impl_func_name(func_name, sizeof(func_name), p); @@ -329,49 +329,38 @@ static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p i_ldr(r, s->out_bump[i], a64op_off(s->exec, offsetof_exec_out_bump + (i * sizeof(ptrdiff_t)))); } - /* Reset x and jump to first kernel. */ - i_mov(r, s->bx, s->bx_start); CMT("bx = bx_start;"); - i_mov(r, s->impl, s->op1_impl); CMT("impl = op1_impl;"); - i_br (r, s->op0_func); CMT("jump to op0_func"); -} + int first_row = rasm_new_label(r, NULL); + int next_row = rasm_new_label(r, NULL); + int next_block = rasm_new_label(r, NULL); -static void asmgen_process_return(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) -{ - RasmContext *r = s->rctx; - char func_name[128]; + /* Jump to first row (skips padding). */ + i_b (r, rasm_op_label(first_row)); CMT("goto first_row;"); - aarch64_op_impl_func_name(func_name, sizeof(func_name), p); + /* Perform padding, preparing for next row. */ + rasm_add_label(r, next_row); CMT("next_row:"); + LOOP_MASK(p, i) { i_add(r, s->in[i], s->in[i], s->in_bump[i]); CMTF("in[%u] += in_bump[%u];", i, i); } + LOOP_MASK(p, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); CMTF("out[%u] += out_bump[%u];", i, i); } - rasm_func_begin(r, func_name, true, true); + /* First row (reset x). */ + rasm_add_label(r, first_row); CMT("first_row:"); + i_mov(r, s->bx, s->bx_start); CMT("bx = bx_start;"); - /* Reset impl to first kernel. */ + /* Reset impl and call first kernel. */ + rasm_add_label(r, next_block); CMT("next_block:"); i_mov(r, s->impl, s->op1_impl); CMT("impl = op1_impl;"); + i_blr(r, s->op0_func); CMT("op0_func();"); /* Perform horizontal loop. */ - int loop = rasm_new_label(r, NULL); i_add(r, s->bx, s->bx, IMM(1)); CMT("bx += 1;"); i_cmp(r, s->bx, s->bx_end); CMT("if (bx != bx_end)"); - i_bne(r, loop); CMT(" goto loop;"); + i_bne(r, next_block); CMT(" goto next_block;"); /* Perform vertical loop. */ - int end = rasm_new_label(r, NULL); i_add(r, s->y, s->y, IMM(1)); CMT("y += 1;"); - i_cmp(r, s->y, s->y_end); CMT("if (y == y_end)"); - i_beq(r, end); CMT(" goto end;"); - - /* Perform padding and reset x, preparing for next row. */ - LOOP_MASK(p, i) { i_add(r, s->in[i], s->in[i], s->in_bump[i]); CMTF("in[%u] += in_bump[%u];", i, i); } - LOOP_MASK(p, i) { i_add(r, s->out[i], s->out[i], s->out_bump[i]); CMTF("out[%u] += out_bump[%u];", i, i); } - i_mov(r, s->bx, s->bx_start); CMT("bx = bx_start;"); - - /* Loop back or end of function. */ - rasm_add_label(r, loop); CMT("loop:"); - i_br (r, s->op0_func); CMT("jump to op0_func"); - rasm_add_label(r, end); CMT("end:"); + i_cmp(r, s->y, s->y_end); CMT("if (y != y_end)"); + i_bne(r, next_row); CMT(" goto next_row;"); /* Function epilogue */ - RasmOp saved_regs[MAX_SAVED_REGS]; - unsigned nsaved = clobbered_gprs(s, p, saved_regs); if (nsaved) asmgen_epilogue(s, saved_regs, nsaved); @@ -1367,9 +1356,28 @@ static void asmgen_op_cps(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) { RasmContext *r = s->rctx; + bool is_read = false; + bool is_write = false; + switch (p->op) { + case AARCH64_SWS_OP_READ_BIT: + case AARCH64_SWS_OP_READ_NIBBLE: + case AARCH64_SWS_OP_READ_PACKED: + case AARCH64_SWS_OP_READ_PLANAR: + is_read = true; + break; + case AARCH64_SWS_OP_WRITE_BIT: + case AARCH64_SWS_OP_WRITE_NIBBLE: + case AARCH64_SWS_OP_WRITE_PACKED: + case AARCH64_SWS_OP_WRITE_PLANAR: + is_write = true; + break; + default: + break; + } + char func_name[128]; aarch64_op_impl_func_name(func_name, sizeof(func_name), p); - rasm_func_begin(r, func_name, true, true); + rasm_func_begin(r, func_name, true, !is_read); /** * Set up vector register dimensions and reshape all vectors @@ -1416,14 +1424,18 @@ static void asmgen_op_cps(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) break; } - /* Load continuation address and increment impl pointer. */ - RasmNode *node = rasm_set_current_node(r, s->load_cont_node); - RasmOp impl_post = a64op_post(s->impl, sizeof_impl); - i_ldr(r, s->cont, impl_post); CMT("SwsFuncPtr cont = (impl++)->cont;"); - rasm_set_current_node(r, node); - - /* Common end for CPS functions. */ - i_br (r, s->cont); CMT("jump to cont"); + if (is_write) { + /* Write functions return directly. */ + i_ret(r); + } else { + /* Load continuation address and increment impl pointer. */ + RasmNode *node = rasm_set_current_node(r, s->load_cont_node); + RasmOp impl_post = a64op_post(s->impl, sizeof_impl); + i_ldr(r, s->cont, impl_post); CMT("SwsFuncPtr cont = (impl++)->cont;"); + rasm_set_current_node(r, node); + /* Common end for remaining CPS functions. */ + i_br (r, s->cont); CMT("jump to cont"); + } } static void asmgen_op(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) @@ -1432,9 +1444,6 @@ static void asmgen_op(SwsAArch64Context *s, const SwsAArch64OpImplParams *p) case AARCH64_SWS_OP_PROCESS: asmgen_process(s, p); break; - case AARCH64_SWS_OP_PROCESS_RETURN: - asmgen_process_return(s, p); - break; default: asmgen_op_cps(s, p); break; @@ -1561,9 +1570,11 @@ static int asmgen(void) /** * The entry point of the SwsOpFunc is the `process` function. The + * first kernel function is called from `process`, and subsequent * kernel functions are chained by directly branching to the next - * operation, using a continuation-passing style design. The exit - * point of the SwsOpFunc is the `process_return` function. + * operation, using a continuation-passing style design. The last + * operation must be a write operation, which returns from the call + * to the `process` function. * * The GPRs used by the entire call-chain are listed below. * @@ -1586,6 +1597,9 @@ static int asmgen(void) * The read/write data pointers and padding values first use up the * remaining free caller-saved registers, and only then are the * caller-saved registers (r19-r28) used. + * + * The Link Register (r30) is used when calling the first kernel, + * so it must be saved. */ /* SwsOpFunc arguments. */ diff --git a/libswscale/aarch64/ops_entries.c b/libswscale/aarch64/ops_entries.c index 70aad8ae89..ae30ca8b57 100644 --- a/libswscale/aarch64/ops_entries.c +++ b/libswscale/aarch64/ops_entries.c @@ -7,10 +7,6 @@ { .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0011 }, { .op = AARCH64_SWS_OP_PROCESS, .mask = 0x0111 }, { .op = AARCH64_SWS_OP_PROCESS, .mask = 0x1111 }, -{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0001 }, -{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0011 }, -{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x0111 }, -{ .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = 0x1111 }, { .op = AARCH64_SWS_OP_READ_BIT, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_READ_BIT, .block_size = 16, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, { .op = AARCH64_SWS_OP_READ_NIBBLE, .block_size = 8, .type = AARCH64_PIXEL_U8, .mask = 0x0001 }, diff --git a/libswscale/aarch64/ops_impl.c b/libswscale/aarch64/ops_impl.c index f7e7b18dcf..26d6a8d954 100644 --- a/libswscale/aarch64/ops_impl.c +++ b/libswscale/aarch64/ops_impl.c @@ -77,7 +77,6 @@ static const char *aarch64_pixel_type_name(SwsAArch64PixelType fmt) static const char op_types[AARCH64_SWS_OP_TYPE_NB][32] = { [AARCH64_SWS_OP_NONE ] = "AARCH64_SWS_OP_NONE", [AARCH64_SWS_OP_PROCESS ] = "AARCH64_SWS_OP_PROCESS", - [AARCH64_SWS_OP_PROCESS_RETURN] = "AARCH64_SWS_OP_PROCESS_RETURN", [AARCH64_SWS_OP_READ_BIT ] = "AARCH64_SWS_OP_READ_BIT", [AARCH64_SWS_OP_READ_NIBBLE ] = "AARCH64_SWS_OP_READ_NIBBLE", [AARCH64_SWS_OP_READ_PACKED ] = "AARCH64_SWS_OP_READ_PACKED", @@ -114,7 +113,6 @@ static const char *aarch64_op_type(SwsAArch64OpType op) static const char op_type_names[AARCH64_SWS_OP_TYPE_NB][16] = { [AARCH64_SWS_OP_NONE ] = "none", [AARCH64_SWS_OP_PROCESS ] = "process", - [AARCH64_SWS_OP_PROCESS_RETURN] = "process_return", [AARCH64_SWS_OP_READ_BIT ] = "read_bit", [AARCH64_SWS_OP_READ_NIBBLE ] = "read_nibble", [AARCH64_SWS_OP_READ_PACKED ] = "read_packed", @@ -326,7 +324,6 @@ static const ParamField field_dither_size_log2 = { PARAM_FIELD(dither.size_log2) #define MAX_LEVELS 8 static const ParamField *op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS] = { [AARCH64_SWS_OP_PROCESS ] = { &field_op, &field_mask }, - [AARCH64_SWS_OP_PROCESS_RETURN] = { &field_op, &field_mask }, [AARCH64_SWS_OP_READ_BIT ] = { &field_op, &field_block_size, &field_type, &field_mask }, [AARCH64_SWS_OP_READ_NIBBLE ] = { &field_op, &field_block_size, &field_type, &field_mask }, [AARCH64_SWS_OP_READ_PACKED ] = { &field_op, &field_block_size, &field_type, &field_mask }, diff --git a/libswscale/aarch64/ops_impl.h b/libswscale/aarch64/ops_impl.h index 67c4672812..f0bbc9f697 100644 --- a/libswscale/aarch64/ops_impl.h +++ b/libswscale/aarch64/ops_impl.h @@ -38,7 +38,6 @@ typedef enum SwsAArch64PixelType { typedef enum SwsAArch64OpType { AARCH64_SWS_OP_NONE = 0, AARCH64_SWS_OP_PROCESS, - AARCH64_SWS_OP_PROCESS_RETURN, AARCH64_SWS_OP_READ_BIT, AARCH64_SWS_OP_READ_NIBBLE, AARCH64_SWS_OP_READ_PACKED, diff --git a/libswscale/tests/sws_ops_aarch64.c b/libswscale/tests/sws_ops_aarch64.c index ca6279e8cf..84300c6af4 100644 --- a/libswscale/tests/sws_ops_aarch64.c +++ b/libswscale/tests/sws_ops_aarch64.c @@ -72,7 +72,7 @@ error: return ret; } -/* Collect the parameters for the process/process_return functions. */ +/* Collect the parameters for the process function. */ static int aarch64_collect_process(const SwsOpList *ops, struct AVTreeNode **root) { const SwsOp *read = ff_sws_op_list_input(ops); @@ -89,11 +89,6 @@ static int aarch64_collect_process(const SwsOpList *ops, struct AVTreeNode **roo .mask = mask, }; - ret = aarch64_collect_op(¶ms, root); - if (ret < 0) - return ret; - - params.op = AARCH64_SWS_OP_PROCESS_RETURN; ret = aarch64_collect_op(¶ms, root); if (ret < 0) return ret; _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
