This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 941a35149b68d29c780896b50c962f3f544114d0 Author: Niklas Haas <[email protected]> AuthorDate: Tue Jun 9 01:51:30 2026 +0200 Commit: Niklas Haas <[email protected]> CommitDate: Tue Jun 9 18:27:20 2026 +0200 swscale/x86/ops_int: switch to SWS_UOP_MOVE Instead of SWS_UOP_PERMUTE/SWS_UOP_COPY. No real measurable difference in performance (it just eliminates a few practically free register renames), but definitely simpler. Signed-off-by: Niklas Haas <[email protected]> --- libswscale/x86/ops.c | 8 ++-- libswscale/x86/ops_int.asm | 101 ++++++--------------------------------- libswscale/x86/uops_macros.asm.h | 3 +- 3 files changed, 19 insertions(+), 93 deletions(-) diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c index 0c0899479e..4c8eceb1cb 100644 --- a/libswscale/x86/ops.c +++ b/libswscale/x86/ops.c @@ -309,8 +309,7 @@ SWS_FOR_STRUCT(TYPE, WRITE_NIBBLE, DECL_ENTRY, EXT, NULL, NULL) SWS_FOR_STRUCT(TYPE, WRITE_BIT, DECL_ENTRY, EXT, NULL, NULL) \ SWS_FOR_STRUCT(TYPE, SWAP_BYTES, DECL_ENTRY, EXT, NULL, NULL) \ SWS_FOR_STRUCT(TYPE, EXPAND_BIT, DECL_ENTRY, EXT, NULL, NULL) \ -SWS_FOR_STRUCT(TYPE, PERMUTE, DECL_ENTRY, EXT, NULL, NULL) \ -SWS_FOR_STRUCT(TYPE, COPY, DECL_ENTRY, EXT, NULL, NULL) \ +SWS_FOR_STRUCT(TYPE, MOVE, DECL_ENTRY, EXT, NULL, NULL) \ SWS_FOR_STRUCT(TYPE, SCALE, DECL_ENTRY, EXT, NULL, setup_scale) \ SWS_FOR_STRUCT(TYPE, ADD, DECL_ENTRY, EXT, NULL, ff_sws_setup_vec4) \ SWS_FOR_STRUCT(TYPE, MIN, DECL_ENTRY, EXT, NULL, ff_sws_setup_vec4) \ @@ -332,8 +331,7 @@ SWS_FOR_STRUCT(TYPE, DITHER, DECL_ENTRY, EXT, NULL, setup_dither) SWS_FOR(TYPE, WRITE_BIT, REF_ENTRY, EXT) \ SWS_FOR(TYPE, SWAP_BYTES, REF_ENTRY, EXT) \ SWS_FOR(TYPE, EXPAND_BIT, REF_ENTRY, EXT) \ - SWS_FOR(TYPE, PERMUTE, REF_ENTRY, EXT) \ - SWS_FOR(TYPE, COPY, REF_ENTRY, EXT) \ + SWS_FOR(TYPE, MOVE, REF_ENTRY, EXT) \ SWS_FOR(TYPE, SCALE, REF_ENTRY, EXT) \ SWS_FOR(TYPE, ADD, REF_ENTRY, EXT) \ SWS_FOR(TYPE, MIN, REF_ENTRY, EXT) \ @@ -593,7 +591,7 @@ static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out) goto fail; } - SwsUOpFlags flags = 0; + SwsUOpFlags flags = SWS_UOP_FLAG_MOVE; if (X86_FMA4(cpu_flags)) flags |= SWS_UOP_FLAG_FMA; diff --git a/libswscale/x86/ops_int.asm b/libswscale/x86/ops_int.asm index 1779b15faa..111e6d0796 100644 --- a/libswscale/x86/ops_int.asm +++ b/libswscale/x86/ops_int.asm @@ -502,95 +502,25 @@ IF W, vcvtdq2ps mw2, mw2 %endmacro ;--------------------------------------------------------- -; Permuting, copying and clearing +; Moving, copying and clearing -%macro MOV_IDX 2 ; dstidx, srcidx - %assign DST2 4 + %1 - %assign SRC2 4 + %2 - mova m %+ %1, m %+ %2 -IF V2, mova m %+ DST2, m %+ SRC2 -%endmacro - -; decompose a permutation into unique cycles and emit a minimal set of mova -; instructions for each cycle -%macro PERMUTE_CYCLES 4 ; x, y, z, w -%assign IN0 %1 -%assign IN1 %2 -%assign IN2 %3 -%assign IN3 %4 -%assign TMP 8 +%macro MOVE 13 ; num, dst0..dst5, src0..src5 +%assign NUM_MOVES %1 +%define DST %2 +%define SRC %8 LOAD_CONT tmp0q -%rep 4 - %ifndef CUR - ; start of new cycle, find next register not in correct location - %if X - %assign CUR IN0 - %assign IN0 TMP - %elif Y - %assign CUR IN1 - %assign IN1 TMP - %elif Z - %assign CUR IN2 - %assign IN2 TMP - %elif W - %assign CUR IN3 - %assign IN3 TMP - %else - %exitrep ; all registers happy - %endif - MOV_IDX TMP, CUR ; preserve previous value of CUR - %endif - %ifdef CUR ; work-around for NASM bug - ; rotate CUR <- in[CUR] and follow the cycle - %assign NEXT IN %+ CUR - MOV_IDX CUR, NEXT - - %if CUR == 0 - %assign X 0 - %elif CUR == 1 - %assign Y 0 - %elif CUR == 2 - %assign Z 0 - %else - %assign W 0 - %endif - - %assign CUR NEXT - %if CUR == TMP ; end of cycle - %assign TMP TMP+1 ; pick non-overlapping tmp register for next cycle - %undef CUR - %endif +%rep NUM_MOVES + %assign dstidx %2 < 0 ? 8 : %2 + %assign srcidx %8 < 0 ? 8 : %8 + mova m %+ dstidx, m %+ srcidx + %if V2 + %assign dstidx dstidx + 4 + %assign srcidx srcidx + 4 + mova m %+ dstidx, m %+ srcidx %endif +%rotate 1 %endrep - - CONTINUE tmp0q -%endmacro - -%macro COPY 4 ; x, y, z, w - LOAD_CONT tmp0q -IF X, mova m8, m%1 -IF Y, mova m9, m%2 -IF Z, mova m10, m%3 -IF W, mova m11, m%4 -IF X, mova mx, m8 -IF Y, mova my, m9 -IF Z, mova mz, m10 -IF W, mova mw, m11 -%if V2 - %assign x2 4 + %1 - %assign y2 4 + %2 - %assign z2 4 + %3 - %assign w2 4 + %4 -IF X, mova m12, m %+ x2 -IF Y, mova m13, m %+ y2 -IF Z, mova m14, m %+ z2 -IF W, mova m15, m %+ w2 -IF X, mova mx2, m12 -IF Y, mova my2, m13 -IF Z, mova mz2, m14 -IF W, mova mw2, m15 -%endif CONTINUE tmp0q %endmacro @@ -789,8 +719,7 @@ assert 0, SWS_UOP_DITHER is not implemented for integer types DECL_%1_WRITE_PACKED (WRITE_PACKED) DECL_%1_WRITE_NIBBLE (WRITE_NIBBLE) DECL_%1_WRITE_BIT (WRITE_BIT) - DECL_%1_PERMUTE (PERMUTE_CYCLES) - DECL_%1_COPY (COPY) + DECL_%1_MOVE (MOVE) DECL_%1_SWAP_BYTES (SWAP_BYTES) DECL_%1_EXPAND_BIT (EXPAND_BIT) DECL_%1_SCALE (SCALE) diff --git a/libswscale/x86/uops_macros.asm.h b/libswscale/x86/uops_macros.asm.h index 544db36c26..d9565d12f2 100644 --- a/libswscale/x86/uops_macros.asm.h +++ b/libswscale/x86/uops_macros.asm.h @@ -61,8 +61,7 @@ {DEF_MACRO(WRITE_NIBBLE, TYPE)}, \ {DEF_MACRO(WRITE_PACKED, TYPE)}, \ {DEF_MACRO(WRITE_PLANAR, TYPE)}, \ - {DEF_MACRO(PERMUTE, TYPE)}, \ - {DEF_MACRO(COPY, TYPE)}, \ + {DEF_MACRO(MOVE, TYPE)}, \ {DEF_MACRO(SWAP_BYTES, TYPE)}, \ {DEF_MACRO(EXPAND_BIT, TYPE)}, \ {DEF_MACRO(EXPAND_PAIR, TYPE)}, \ _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
