This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 48ab318f5c9c932ef6274fe038dea3494d4f2cb0 Author: Niklas Haas <[email protected]> AuthorDate: Wed Feb 25 15:19:32 2026 +0100 Commit: Niklas Haas <[email protected]> CommitDate: Thu Feb 26 13:09:14 2026 +0000 swscale/x86/ops: don't preload dither weights This doesn't actually gain any performance but makes the code needlessly complicated. Just directly add the indirect address as needed. Signed-off-by: Niklas Haas <[email protected]> --- libswscale/x86/ops_float.asm | 64 +++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 43 deletions(-) diff --git a/libswscale/x86/ops_float.asm b/libswscale/x86/ops_float.asm index 625cf81553..78f35a9785 100644 --- a/libswscale/x86/ops_float.asm +++ b/libswscale/x86/ops_float.asm @@ -179,20 +179,6 @@ IF W, mulps mw2, m8 CONTINUE tmp0q %endmacro -%macro load_dither_row 5 ; size_log2, comp_idx, addr, out, out2 - mov tmp0w, [implq + SwsOpImpl.priv + (4 + %2) * 2] ; priv.u16[4 + i] -%if %1 == 1 - vbroadcastsd %4, [%3 + tmp0q] -%elif %1 == 2 - VBROADCASTI128 %4, [%3 + tmp0q] -%else - mova %4, [%3 + tmp0q] - %if (4 << %1) > mmsize - mova %5, [%3 + tmp0q + mmsize] - %endif -%endif -%endmacro - %macro dither0 0 op dither0 ; constant offset for all channels @@ -209,23 +195,24 @@ IF W, addps mw2, m8 CONTINUE tmp0q %endmacro +%macro dither_row 5 ; size_log2, comp_idx, matrix, out, out2 + mov tmp0w, [implq + SwsOpImpl.priv + (4 + %2) * 2] ; priv.u16[4 + i] +%if %1 == 1 + vbroadcastsd m8, [%3 + tmp0q] + addps %4, m8 + addps %5, m8 +%elif %1 == 2 + VBROADCASTI128 m8, [%3 + tmp0q] + addps %4, m8 + addps %5, m8 +%else + addps %4, [%3 + tmp0q] + addps %5, [%3 + tmp0q + mmsize * ((4 << %1) > mmsize)] +%endif +%endmacro + %macro dither 1 ; size_log2 op dither%1 - %define DX m8 - %define DY m9 - %define DZ m10 - %define DW m11 - %if (4 << %1) > mmsize - %define DX2 m12 - %define DY2 m13 - %define DZ2 m14 - %define DW2 m15 - %else - %define DX2 DX - %define DY2 DY - %define DZ2 DZ - %define DW2 DW - %endif ; dither matrix is stored indirectly at the private data address mov tmp1q, [implq + SwsOpImpl.priv] ; add y offset. note that for 2x2, we would only need to look at the @@ -243,20 +230,11 @@ op dither%1 and tmp0d, (4 << %1) - 1 add tmp1q, tmp0q %endif -IF X, load_dither_row %1, 0, tmp1q, DX, DX2 -IF Y, load_dither_row %1, 1, tmp1q, DY, DY2 -IF Z, load_dither_row %1, 2, tmp1q, DZ, DZ2 -IF W, load_dither_row %1, 3, tmp1q, DW, DW2 - LOAD_CONT tmp0q -IF X, addps mx, DX -IF Y, addps my, DY -IF Z, addps mz, DZ -IF W, addps mw, DW -IF X, addps mx2, DX2 -IF Y, addps my2, DY2 -IF Z, addps mz2, DZ2 -IF W, addps mw2, DW2 - CONTINUE tmp0q + dither_row %1, 0, tmp1q, mx, mx2 + dither_row %1, 1, tmp1q, my, my2 + dither_row %1, 2, tmp1q, mz, mz2 + dither_row %1, 3, tmp1q, mw, mw2 + CONTINUE %endmacro %macro dither_fns 0 _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
