This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 941a35149b68d29c780896b50c962f3f544114d0
Author:     Niklas Haas <[email protected]>
AuthorDate: Tue Jun 9 01:51:30 2026 +0200
Commit:     Niklas Haas <[email protected]>
CommitDate: Tue Jun 9 18:27:20 2026 +0200

    swscale/x86/ops_int: switch to SWS_UOP_MOVE
    
    Instead of SWS_UOP_PERMUTE/SWS_UOP_COPY.
    
    No real measurable difference in performance (it just eliminates a few
    practically free register renames), but definitely simpler.
    
    Signed-off-by: Niklas Haas <[email protected]>
---
 libswscale/x86/ops.c             |   8 ++--
 libswscale/x86/ops_int.asm       | 101 ++++++---------------------------------
 libswscale/x86/uops_macros.asm.h |   3 +-
 3 files changed, 19 insertions(+), 93 deletions(-)

diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c
index 0c0899479e..4c8eceb1cb 100644
--- a/libswscale/x86/ops.c
+++ b/libswscale/x86/ops.c
@@ -309,8 +309,7 @@ SWS_FOR_STRUCT(TYPE, WRITE_NIBBLE,    DECL_ENTRY, EXT, 
NULL, NULL)
 SWS_FOR_STRUCT(TYPE, WRITE_BIT,       DECL_ENTRY, EXT, NULL, NULL)             
 \
 SWS_FOR_STRUCT(TYPE, SWAP_BYTES,      DECL_ENTRY, EXT, NULL, NULL)             
 \
 SWS_FOR_STRUCT(TYPE, EXPAND_BIT,      DECL_ENTRY, EXT, NULL, NULL)             
 \
-SWS_FOR_STRUCT(TYPE, PERMUTE,         DECL_ENTRY, EXT, NULL, NULL)             
 \
-SWS_FOR_STRUCT(TYPE, COPY,            DECL_ENTRY, EXT, NULL, NULL)             
 \
+SWS_FOR_STRUCT(TYPE, MOVE,            DECL_ENTRY, EXT, NULL, NULL)             
 \
 SWS_FOR_STRUCT(TYPE, SCALE,           DECL_ENTRY, EXT, NULL, setup_scale)      
 \
 SWS_FOR_STRUCT(TYPE, ADD,             DECL_ENTRY, EXT, NULL, 
ff_sws_setup_vec4) \
 SWS_FOR_STRUCT(TYPE, MIN,             DECL_ENTRY, EXT, NULL, 
ff_sws_setup_vec4) \
@@ -332,8 +331,7 @@ SWS_FOR_STRUCT(TYPE, DITHER,          DECL_ENTRY, EXT, 
NULL, setup_dither)
     SWS_FOR(TYPE, WRITE_BIT,      REF_ENTRY, EXT)                              
 \
     SWS_FOR(TYPE, SWAP_BYTES,     REF_ENTRY, EXT)                              
 \
     SWS_FOR(TYPE, EXPAND_BIT,     REF_ENTRY, EXT)                              
 \
-    SWS_FOR(TYPE, PERMUTE,        REF_ENTRY, EXT)                              
 \
-    SWS_FOR(TYPE, COPY,           REF_ENTRY, EXT)                              
 \
+    SWS_FOR(TYPE, MOVE,           REF_ENTRY, EXT)                              
 \
     SWS_FOR(TYPE, SCALE,          REF_ENTRY, EXT)                              
 \
     SWS_FOR(TYPE, ADD,            REF_ENTRY, EXT)                              
 \
     SWS_FOR(TYPE, MIN,            REF_ENTRY, EXT)                              
 \
@@ -593,7 +591,7 @@ static int compile(SwsContext *ctx, const SwsOpList *ops, 
SwsCompiledOp *out)
         goto fail;
     }
 
-    SwsUOpFlags flags = 0;
+    SwsUOpFlags flags = SWS_UOP_FLAG_MOVE;
     if (X86_FMA4(cpu_flags))
         flags |= SWS_UOP_FLAG_FMA;
 
diff --git a/libswscale/x86/ops_int.asm b/libswscale/x86/ops_int.asm
index 1779b15faa..111e6d0796 100644
--- a/libswscale/x86/ops_int.asm
+++ b/libswscale/x86/ops_int.asm
@@ -502,95 +502,25 @@ IF W,   vcvtdq2ps mw2, mw2
 %endmacro
 
 ;---------------------------------------------------------
-; Permuting, copying and clearing
+; Moving, copying and clearing
 
-%macro MOV_IDX 2 ; dstidx, srcidx
-        %assign DST2 4 + %1
-        %assign SRC2 4 + %2
-        mova m %+ %1,   m %+ %2
-IF V2,  mova m %+ DST2, m %+ SRC2
-%endmacro
-
-; decompose a permutation into unique cycles and emit a minimal set of mova
-; instructions for each cycle
-%macro PERMUTE_CYCLES 4 ; x, y, z, w
-%assign IN0 %1
-%assign IN1 %2
-%assign IN2 %3
-%assign IN3 %4
-%assign TMP 8
+%macro MOVE 13 ; num, dst0..dst5, src0..src5
+%assign NUM_MOVES %1
+%define DST %2
+%define SRC %8
 
         LOAD_CONT tmp0q
-%rep 4
-    %ifndef CUR
-        ; start of new cycle, find next register not in correct location
-        %if X
-            %assign CUR IN0
-            %assign IN0 TMP
-        %elif Y
-            %assign CUR IN1
-            %assign IN1 TMP
-        %elif Z
-            %assign CUR IN2
-            %assign IN2 TMP
-        %elif W
-            %assign CUR IN3
-            %assign IN3 TMP
-        %else
-            %exitrep ; all registers happy
-        %endif
-        MOV_IDX TMP, CUR ; preserve previous value of CUR
-    %endif
-    %ifdef CUR ; work-around for NASM bug
-        ; rotate CUR <- in[CUR] and follow the cycle
-        %assign NEXT IN %+ CUR
-        MOV_IDX CUR, NEXT
-
-        %if CUR == 0
-            %assign X 0
-        %elif CUR == 1
-            %assign Y 0
-        %elif CUR == 2
-            %assign Z 0
-        %else
-            %assign W 0
-        %endif
-
-        %assign CUR NEXT
-        %if CUR == TMP ; end of cycle
-            %assign TMP TMP+1 ; pick non-overlapping tmp register for next 
cycle
-            %undef CUR
-        %endif
+%rep NUM_MOVES
+        %assign dstidx %2 < 0 ? 8 : %2
+        %assign srcidx %8 < 0 ? 8 : %8
+        mova m %+ dstidx, m %+ srcidx
+    %if V2
+        %assign dstidx dstidx + 4
+        %assign srcidx srcidx + 4
+        mova m %+ dstidx, m %+ srcidx
     %endif
+%rotate 1
 %endrep
-
-        CONTINUE tmp0q
-%endmacro
-
-%macro COPY 4 ; x, y, z, w
-        LOAD_CONT tmp0q
-IF X,   mova m8,  m%1
-IF Y,   mova m9,  m%2
-IF Z,   mova m10, m%3
-IF W,   mova m11, m%4
-IF X,   mova mx, m8
-IF Y,   mova my, m9
-IF Z,   mova mz, m10
-IF W,   mova mw, m11
-%if V2
-        %assign x2 4 + %1
-        %assign y2 4 + %2
-        %assign z2 4 + %3
-        %assign w2 4 + %4
-IF X,   mova m12, m %+ x2
-IF Y,   mova m13, m %+ y2
-IF Z,   mova m14, m %+ z2
-IF W,   mova m15, m %+ w2
-IF X,   mova mx2, m12
-IF Y,   mova my2, m13
-IF Z,   mova mz2, m14
-IF W,   mova mw2, m15
-%endif
         CONTINUE tmp0q
 %endmacro
 
@@ -789,8 +719,7 @@ assert 0, SWS_UOP_DITHER is not implemented for integer 
types
     DECL_%1_WRITE_PACKED    (WRITE_PACKED)
     DECL_%1_WRITE_NIBBLE    (WRITE_NIBBLE)
     DECL_%1_WRITE_BIT       (WRITE_BIT)
-    DECL_%1_PERMUTE         (PERMUTE_CYCLES)
-    DECL_%1_COPY            (COPY)
+    DECL_%1_MOVE            (MOVE)
     DECL_%1_SWAP_BYTES      (SWAP_BYTES)
     DECL_%1_EXPAND_BIT      (EXPAND_BIT)
     DECL_%1_SCALE           (SCALE)
diff --git a/libswscale/x86/uops_macros.asm.h b/libswscale/x86/uops_macros.asm.h
index 544db36c26..d9565d12f2 100644
--- a/libswscale/x86/uops_macros.asm.h
+++ b/libswscale/x86/uops_macros.asm.h
@@ -61,8 +61,7 @@
     {DEF_MACRO(WRITE_NIBBLE,        TYPE)}, \
     {DEF_MACRO(WRITE_PACKED,        TYPE)}, \
     {DEF_MACRO(WRITE_PLANAR,        TYPE)}, \
-    {DEF_MACRO(PERMUTE,             TYPE)}, \
-    {DEF_MACRO(COPY,                TYPE)}, \
+    {DEF_MACRO(MOVE,                TYPE)}, \
     {DEF_MACRO(SWAP_BYTES,          TYPE)}, \
     {DEF_MACRO(EXPAND_BIT,          TYPE)}, \
     {DEF_MACRO(EXPAND_PAIR,         TYPE)}, \

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to