Re: [FFmpeg-devel] [PATCH] lavc/aarch64: unify formatting

Martin Storsjö Mon, 20 Jun 2022 13:30:18 -0700

On Tue, 14 Jun 2022, J. Dekker wrote:

Performs the following changes:
- Lower case format specifiers
- Lower case sxtw/uxtw
- Numeric labels on same line as next instruction

Why do you feel the need to do that? I like the occasional extra spacingthat an empty line for such labels adds - at least I wouldn't go changinghow that's done in other places. Where there are empty lines and wherethere aren't, is up to the author IMO, based on how they see their code.

- Indentation to 9/25

Signed-off-by: J. Dekker <j...@itanimul.li>
---

I initially started writing a full lexer to do this but ended up
wasting too much time on it. Now using Mostly text editor macros. Still
need to skim through again and make a few manual changes here and there
but wondering if there's any other major automatable changes I missed.

libavcodec/aarch64/aacpsdsp_neon.S       | 208 +++----
libavcodec/aarch64/fft_neon.S            |  89 ++-
libavcodec/aarch64/fmtconvert_neon.S     |  12 +-
libavcodec/aarch64/h264cmc_neon.S        | 534 +++++++++---------
libavcodec/aarch64/h264dsp_neon.S        | 664 +++++++++++------------
libavcodec/aarch64/h264idct_neon.S       | 418 +++++++-------
libavcodec/aarch64/h264pred_neon.S       |  16 +-
libavcodec/aarch64/h264qpel_neon.S       | 644 +++++++++++-----------
libavcodec/aarch64/hevcdsp_idct_neon.S   | 323 ++++++-----
libavcodec/aarch64/hevcdsp_sao_neon.S    |   8 +-
libavcodec/aarch64/hpeldsp_neon.S        | 504 ++++++++---------
libavcodec/aarch64/mdct_neon.S           |  27 +-
libavcodec/aarch64/mpegaudiodsp_neon.S   |  26 +-
libavcodec/aarch64/neon.S                | 246 ++++-----
libavcodec/aarch64/opusdsp_neon.S        | 106 ++--
libavcodec/aarch64/pixblockdsp_neon.S    |   6 +-
libavcodec/aarch64/sbrdsp_neon.S         | 308 +++++------
libavcodec/aarch64/simple_idct_neon.S    | 410 +++++++-------
libavcodec/aarch64/synth_filter_neon.S   |  15 +-
libavcodec/aarch64/vc1dsp_neon.S         | 206 +++----
libavcodec/aarch64/videodsp.S            |   3 +-
libavcodec/aarch64/vp8dsp_neon.S         | 485 ++++++++---------
libavcodec/aarch64/vp9itxfm_16bpp_neon.S |  64 +--
libavcodec/aarch64/vp9itxfm_neon.S       |  50 +-
libavcodec/aarch64/vp9lpf_16bpp_neon.S   |  24 +-
libavcodec/aarch64/vp9lpf_neon.S         |  78 +--
libavcodec/aarch64/vp9mc_16bpp_neon.S    |  30 +-
libavcodec/aarch64/vp9mc_aarch64.S       |   9 +-
libavcodec/aarch64/vp9mc_neon.S          |  39 +-
29 files changed, 2688 insertions(+), 2864 deletions(-)

diff --git a/libavcodec/aarch64/aacpsdsp_neon.S 
b/libavcodec/aarch64/aacpsdsp_neon.S
index ff4e6e244a..dfa6a9dc33 100644
--- a/libavcodec/aarch64/aacpsdsp_neon.S
+++ b/libavcodec/aarch64/aacpsdsp_neon.S
@@ -19,130 +19,130 @@
#include "libavutil/aarch64/asm.S"

function ff_ps_add_squares_neon, export=1
-1:      ld1         {v0.4S,v1.4S}, [x1], #32
-        fmul        v0.4S, v0.4S, v0.4S
-        fmul        v1.4S, v1.4S, v1.4S
-        faddp       v2.4S, v0.4S, v1.4S
-        ld1         {v3.4S}, [x0]
-        fadd        v3.4S, v3.4S, v2.4S
-        st1         {v3.4S}, [x0], #16
-        subs        w2, w2, #4
+1:      ld1             {v0.4s,v1.4s}, [x1], #32
+        fmul            v0.4s, v0.4s, v0.4s
+        fmul            v1.4s, v1.4s, v1.4s
+        faddp           v2.4s, v0.4s, v1.4s
+        ld1             {v3.4s}, [x0]
+        fadd            v3.4s, v3.4s, v2.4s
+        st1             {v3.4s}, [x0], #16
+        subs            w2, w2, #4
        b.gt        1b

This leaves the b.gt parameter misaligned with the rest - same thing inthe whole rest of this file.

@@ -295,8 +295,7 @@ function fft_pass_neon
        st1             {v21.4s}, [x1], #16     // {z[o1],z[o1+1]}
        st1             {v22.4s}, [x2], #16     // {z[o2],z[o2+1]}
        st1             {v23.4s}, [x3], #16     // {z[o3],z[o3+1]}
-1:
-        ld1             {v20.4s},[x2]    // {z[o2],z[o2+1]}
+1:      ld1             {v20.4s},[x2]    // {z[o2],z[o2+1]}
        ld1             {v22.4s},[x3]    // {z[o3],z[o3+1]}

I really don't think getting rid of the empty line here or anywhere elseadds anything - I think it worsens readability.

@@ -359,18 +358,18 @@ function fft\n\()_neon, align=6
endfunc
.endm

-        def_fft    32,    16,     8
-        def_fft    64,    32,    16
-        def_fft   128,    64,    32
-        def_fft   256,   128,    64
-        def_fft   512,   256,   128
-        def_fft  1024,   512,   256
-        def_fft  2048,  1024,   512
-        def_fft  4096,  2048,  1024
-        def_fft  8192,  4096,  2048
-        def_fft 16384,  8192,  4096
-        def_fft 32768, 16384,  8192
-        def_fft 65536, 32768, 16384
+        def_fft         32,    16,     8
+        def_fft         64,    32,    16
+        def_fft         128,    64,    32
+        def_fft         256,   128,    64
+        def_fft         512,   256,   128
+        def_fft         1024,   512,   256
+        def_fft         2048,  1024,   512
+        def_fft         4096,  2048,  1024
+        def_fft         8192,  4096,  2048
+        def_fft         16384,  8192,  4096
+        def_fft         32768, 16384,  8192
+        def_fft         65536, 32768, 16384

Previously, the columns were right-aligned here. I don't think we gainanything from changing that.

-const   pmmp, align=4
+const pmmp, align=4


Changes like this are good indeed.

diff --git a/libavcodec/aarch64/h264cmc_neon.S 
b/libavcodec/aarch64/h264cmc_neon.S
index f8e9407854..0add73ffec 100644
--- a/libavcodec/aarch64/h264cmc_neon.S
+++ b/libavcodec/aarch64/h264cmc_neon.S
@@ -26,24 +26,24 @@
/* chroma_mc8(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int 
y) */
.macro  h264_chroma_mc8 type, codec=h264
function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
-  .ifc \type,avg
+.ifc \type,avg
        mov             x8,  x0
-  .endif
+.endif

I didn't mind the indented .if/.endif, but I won't oppose making itconsistent with the rest either, i.e. making it unindented.

diff --git a/libavcodec/aarch64/h264dsp_neon.S 
b/libavcodec/aarch64/h264dsp_neon.S
index ea221e6862..926c6e8362 100644
.macro  h264_loop_filter_luma
-        dup             v22.16B, w2                     // alpha
-        uxtl            v24.8H,  v24.8B
-        uabd            v21.16B, v16.16B, v0.16B        // abs(p0 - q0)
-        uxtl            v24.4S,  v24.4H
-        uabd            v28.16B, v18.16B, v16.16B       // abs(p1 - p0)
-        sli             v24.8H,  v24.8H,  #8
-        uabd            v30.16B, v2.16B,  v0.16B        // abs(q1 - q0)
-        sli             v24.4S,  v24.4S,  #16
-        cmhi            v21.16B, v22.16B, v21.16B       // < alpha
-        dup             v22.16B, w3                     // beta
-        cmlt            v23.16B, v24.16B, #0
-        cmhi            v28.16B, v22.16B, v28.16B       // < beta
-        cmhi            v30.16B, v22.16B, v30.16B       // < beta
-        bic             v21.16B, v21.16B, v23.16B
-        uabd            v17.16B, v20.16B, v16.16B       // abs(p2 - p0)
-        and             v21.16B, v21.16B, v28.16B
-        uabd            v19.16B,  v4.16B,  v0.16B       // abs(q2 - q0)
-        and             v21.16B, v21.16B, v30.16B      // < beta


Here's a misaligned comment that could be fixed while touching this

+        dup             v22.16b, w2                     // alpha
+        uxtl            v24.8h,  v24.8b
+        uabd            v21.16b, v16.16b, v0.16b        // abs(p0 - q0)
+        uxtl            v24.4s,  v24.4h
+        uabd            v28.16b, v18.16b, v16.16b       // abs(p1 - p0)
+        sli             v24.8h,  v24.8h,  #8
+        uabd            v30.16b, v2.16b,  v0.16b        // abs(q1 - q0)
+        sli             v24.4s,  v24.4s,  #16
+        cmhi            v21.16b, v22.16b, v21.16b       // < alpha
+        dup             v22.16b, w3                     // beta
+        cmlt            v23.16b, v24.16b, #0
+        cmhi            v28.16b, v22.16b, v28.16b       // < beta
+        cmhi            v30.16b, v22.16b, v30.16b       // < beta
+        bic             v21.16b, v21.16b, v23.16b
+        uabd            v17.16b, v20.16b, v16.16b       // abs(p2 - p0)
+        and             v21.16b, v21.16b, v28.16b
+        uabd            v19.16b,  v4.16b,  v0.16b       // abs(q2 - q0)
+        and             v21.16b, v21.16b, v30.16b      // < beta
        shrn            v30.8b,  v21.8h,  #4
        mov             x7, v30.d[0]
-        cmhi            v17.16B, v22.16B, v17.16B       // < beta
-        cmhi            v19.16B, v22.16B, v19.16B       // < beta
+        cmhi            v17.16b, v22.16b, v17.16b       // < beta
+        cmhi            v19.16b, v22.16b, v19.16b       // < beta
        cbz             x7,  9f
-        and             v17.16B, v17.16B, v21.16B
-        and             v19.16B, v19.16B, v21.16B
-        and             v24.16B, v24.16B, v21.16B
-        urhadd          v28.16B, v16.16B,  v0.16B
-        sub             v21.16B, v24.16B, v17.16B
-        uqadd           v23.16B, v18.16B, v24.16B
-        uhadd           v20.16B, v20.16B, v28.16B
-        sub             v21.16B, v21.16B, v19.16B
-        uhadd           v28.16B,  v4.16B, v28.16B
-        umin            v23.16B, v23.16B, v20.16B
-        uqsub           v22.16B, v18.16B, v24.16B
-        uqadd           v4.16B,   v2.16B, v24.16B
-        umax            v23.16B, v23.16B, v22.16B
-        uqsub           v22.16B,  v2.16B, v24.16B
-        umin            v28.16B,  v4.16B, v28.16B
-        uxtl            v4.8H,    v0.8B
-        umax            v28.16B, v28.16B, v22.16B
-        uxtl2           v20.8H,   v0.16B
-        usubw           v4.8H,    v4.8H,  v16.8B
-        usubw2          v20.8H,  v20.8H,  v16.16B
-        shl             v4.8H,    v4.8H,  #2
-        shl             v20.8H,  v20.8H,  #2
-        uaddw           v4.8H,    v4.8H,  v18.8B
-        uaddw2          v20.8H,  v20.8H,  v18.16B
-        usubw           v4.8H,    v4.8H,   v2.8B
-        usubw2          v20.8H,  v20.8H,   v2.16B
-        rshrn           v4.8B,    v4.8H,  #3
-        rshrn2          v4.16B,  v20.8H,  #3
-        bsl             v17.16B, v23.16B, v18.16B
-        bsl             v19.16B, v28.16B,  v2.16B
-        neg             v23.16B, v21.16B
-        uxtl            v28.8H,  v16.8B
-        smin            v4.16B,   v4.16B, v21.16B
-        uxtl2           v21.8H,  v16.16B
-        smax            v4.16B,   v4.16B, v23.16B
-        uxtl            v22.8H,   v0.8B
-        uxtl2           v24.8H,   v0.16B
-        saddw           v28.8H,  v28.8H,  v4.8B
-        saddw2          v21.8H,  v21.8H,  v4.16B
-        ssubw           v22.8H,  v22.8H,  v4.8B
-        ssubw2          v24.8H,  v24.8H,  v4.16B
-        sqxtun          v16.8B,  v28.8H
-        sqxtun2         v16.16B, v21.8H
-        sqxtun          v0.8B,   v22.8H
-        sqxtun2         v0.16B,  v24.8H
+        and             v17.16b, v17.16b, v21.16b
+        and             v19.16b, v19.16b, v21.16b
+        and             v24.16b, v24.16b, v21.16b
+        urhadd          v28.16b, v16.16b,  v0.16b
+        sub             v21.16b, v24.16b, v17.16b
+        uqadd           v23.16b, v18.16b, v24.16b
+        uhadd           v20.16b, v20.16b, v28.16b
+        sub             v21.16b, v21.16b, v19.16b
+        uhadd           v28.16b,  v4.16b, v28.16b
+        umin            v23.16b, v23.16b, v20.16b
+        uqsub           v22.16b, v18.16b, v24.16b
+        uqadd           v4.16b,   v2.16b, v24.16b
+        umax            v23.16b, v23.16b, v22.16b
+        uqsub           v22.16b,  v2.16b, v24.16b
+        umin            v28.16b,  v4.16b, v28.16b
+        uxtl            v4.8h,    v0.8b
+        umax            v28.16b, v28.16b, v22.16b
+        uxtl2           v20.8h,   v0.16b
+        usubw           v4.8h,    v4.8h,  v16.8b
+        usubw2          v20.8h,  v20.8h,  v16.16b
+        shl             v4.8h,    v4.8h,  #2
+        shl             v20.8h,  v20.8h,  #2
+        uaddw           v4.8h,    v4.8h,  v18.8b
+        uaddw2          v20.8h,  v20.8h,  v18.16b
+        usubw           v4.8h,    v4.8h,   v2.8b
+        usubw2          v20.8h,  v20.8h,   v2.16b
+        rshrn           v4.8b,    v4.8h,  #3
+        rshrn2          v4.16b,  v20.8h,  #3
+        bsl             v17.16b, v23.16b, v18.16b
+        bsl             v19.16b, v28.16b,  v2.16b
+        neg             v23.16b, v21.16b
+        uxtl            v28.8h,  v16.8b
+        smin            v4.16b,   v4.16b, v21.16b
+        uxtl2           v21.8h,  v16.16b
+        smax            v4.16b,   v4.16b, v23.16b
+        uxtl            v22.8h,   v0.8b
+        uxtl2           v24.8h,   v0.16b
+        saddw           v28.8h,  v28.8h,  v4.8b
+        saddw2          v21.8h,  v21.8h,  v4.16b
+        ssubw           v22.8h,  v22.8h,  v4.8b
+        ssubw2          v24.8h,  v24.8h,  v4.16b
+        sqxtun          v16.8b,  v28.8h
+        sqxtun2         v16.16b, v21.8h
+        sqxtun          v0.8b,   v22.8h
+        sqxtun2         v0.16b,  v24.8h
.endm

function ff_h264_v_loop_filter_luma_neon, export=1
        h264_loop_filter_start

-        ld1             {v0.16B},  [x0], x1
-        ld1             {v2.16B},  [x0], x1
-        ld1             {v4.16B},  [x0], x1
+        ld1             {v0.16b},  [x0], x1
+        ld1             {v2.16b},  [x0], x1
+        ld1             {v4.16b},  [x0], x1
        sub             x0,  x0,  x1, lsl #2
        sub             x0,  x0,  x1, lsl #1
-        ld1             {v20.16B},  [x0], x1
-        ld1             {v18.16B},  [x0], x1
-        ld1             {v16.16B},  [x0], x1
+        ld1             {v20.16b},  [x0], x1
+        ld1             {v18.16b},  [x0], x1
+        ld1             {v16.16b},  [x0], x1

        h264_loop_filter_luma

        sub             x0,  x0,  x1, lsl #1
-        st1             {v17.16B},  [x0], x1
-        st1             {v16.16B}, [x0], x1
-        st1             {v0.16B},  [x0], x1
-        st1             {v19.16B}, [x0]
-9:
-        ret
+        st1             {v17.16b},  [x0], x1
+        st1             {v16.16b}, [x0], x1
+        st1             {v0.16b},  [x0], x1
+        st1             {v19.16b}, [x0]
+9:      ret
endfunc

function ff_h264_h_loop_filter_luma_neon, export=1
        h264_loop_filter_start

        sub             x0,  x0,  #4
-        ld1             {v6.8B},  [x0], x1
-        ld1             {v20.8B}, [x0], x1
-        ld1             {v18.8B}, [x0], x1
-        ld1             {v16.8B}, [x0], x1
-        ld1             {v0.8B},  [x0], x1
-        ld1             {v2.8B},  [x0], x1
-        ld1             {v4.8B},  [x0], x1
-        ld1             {v26.8B}, [x0], x1
-        ld1             {v6.D}[1],  [x0], x1
-        ld1             {v20.D}[1], [x0], x1
-        ld1             {v18.D}[1], [x0], x1
-        ld1             {v16.D}[1], [x0], x1
-        ld1             {v0.D}[1],  [x0], x1
-        ld1             {v2.D}[1],  [x0], x1
-        ld1             {v4.D}[1],  [x0], x1
-        ld1             {v26.D}[1], [x0], x1
+        ld1             {v6.8b},  [x0], x1
+        ld1             {v20.8b}, [x0], x1
+        ld1             {v18.8b}, [x0], x1
+        ld1             {v16.8b}, [x0], x1
+        ld1             {v0.8b},  [x0], x1
+        ld1             {v2.8b},  [x0], x1
+        ld1             {v4.8b},  [x0], x1
+        ld1             {v26.8b}, [x0], x1
+        ld1             {v6.d}[1],  [x0], x1
+        ld1             {v20.d}[1], [x0], x1
+        ld1             {v18.d}[1], [x0], x1
+        ld1             {v16.d}[1], [x0], x1
+        ld1             {v0.d}[1],  [x0], x1
+        ld1             {v2.d}[1],  [x0], x1
+        ld1             {v4.d}[1],  [x0], x1
+        ld1             {v26.d}[1], [x0], x1

        transpose_8x16B v6, v20, v18, v16, v0, v2, v4, v26, v21, v23

@@ -160,24 +158,23 @@ function ff_h264_h_loop_filter_luma_neon, export=1

        sub             x0,  x0,  x1, lsl #4
        add             x0,  x0,  #2
-        st1             {v17.S}[0],  [x0], x1
-        st1             {v16.S}[0], [x0], x1
-        st1             {v0.S}[0],  [x0], x1
-        st1             {v19.S}[0], [x0], x1
-        st1             {v17.S}[1],  [x0], x1
-        st1             {v16.S}[1], [x0], x1
-        st1             {v0.S}[1],  [x0], x1
-        st1             {v19.S}[1], [x0], x1
-        st1             {v17.S}[2],  [x0], x1
-        st1             {v16.S}[2], [x0], x1
-        st1             {v0.S}[2],  [x0], x1
-        st1             {v19.S}[2], [x0], x1
-        st1             {v17.S}[3],  [x0], x1
-        st1             {v16.S}[3], [x0], x1
-        st1             {v0.S}[3],  [x0], x1
-        st1             {v19.S}[3], [x0], x1
-9:
-        ret
+        st1             {v17.s}[0],  [x0], x1
+        st1             {v16.s}[0], [x0], x1
+        st1             {v0.s}[0],  [x0], x1
+        st1             {v19.s}[0], [x0], x1
+        st1             {v17.s}[1],  [x0], x1
+        st1             {v16.s}[1], [x0], x1
+        st1             {v0.s}[1],  [x0], x1
+        st1             {v19.s}[1], [x0], x1
+        st1             {v17.s}[2],  [x0], x1
+        st1             {v16.s}[2], [x0], x1
+        st1             {v0.s}[2],  [x0], x1
+        st1             {v19.s}[2], [x0], x1
+        st1             {v17.s}[3],  [x0], x1
+        st1             {v16.s}[3], [x0], x1
+        st1             {v0.s}[3],  [x0], x1
+        st1             {v19.s}[3], [x0], x1

Here, I'd love to align the righthand columns that are uneven (although Iguess such changes are a bit out of scope for what you do here).

@@ -193,139 +193,139 @@ function ff_h264_idct_add8_neon, export=1
endfunc

.macro  idct8x8_cols    pass
-  .if \pass == 0
-        va      .req    v18
-        vb      .req    v30

I think the custom formatting of these lines is better to keep as is,rather than forcing it into the formatting of the rest.

diff --git a/libavcodec/aarch64/h264qpel_neon.S 
b/libavcodec/aarch64/h264qpel_neon.S
index 451fd8af24..3829f17bd1 100644
--- a/libavcodec/aarch64/h264qpel_neon.S
+++ b/libavcodec/aarch64/h264qpel_neon.S
@@ -24,130 +24,130 @@

        /* H.264 qpel MC */

-.macro  lowpass_const   r
+.macro  lowpass_const r


Here you could get rid of the double spaces after .macro too

@@ -580,12 +580,12 @@ function \type\()_h264_qpel16_hv_lowpass_l2_neon
endfunc
.endm

-        h264_qpel16_hv put
-        h264_qpel16_hv avg
+        h264_qpel16_hv  put
+        h264_qpel16_hv  avg

.macro  h264_qpel8      type
function ff_\type\()_h264_qpel8_mc10_neon, export=1
-        lowpass_const   w3
+        lowpass_const w3

The parameter to lowpass_const was properly aligned before, now it nolonger is

.macro  pixfunc         pfx, name, suf, rnd=1, avg=0
-  .if \rnd
-    .macro avg  rd, rn, rm
+.if \rnd
+.macro avg  rd, rn, rm
        urhadd          \rd, \rn, \rm


I think this looks much more like a mess than it was before, TBH

diff --git a/libavcodec/aarch64/opusdsp_neon.S 
b/libavcodec/aarch64/opusdsp_neon.S
index 46c2be0874..3b2b89d068 100644
--- a/libavcodec/aarch64/opusdsp_neon.S
+++ b/libavcodec/aarch64/opusdsp_neon.S
@@ -20,93 +20,93 @@

           // 0.85..^1    0.85..^2    0.85..^3    0.85..^4
const tab_st, align=4
-        .word 0x3f599a00, 0x3f38f671, 0x3f1d382a, 0x3f05a32f
+        .word           0x3f599a00, 0x3f38f671, 0x3f1d382a, 0x3f05a32f

These used to be aligned with the comments above, now the no longer arealigned

diff --git a/libavcodec/aarch64/vc1dsp_neon.S b/libavcodec/aarch64/vc1dsp_neon.S
index 9a96c2523c..7df48ea000 100644
--- a/libavcodec/aarch64/vc1dsp_neon.S
+++ b/libavcodec/aarch64/vc1dsp_neon.S
@@ -1391,35 +1391,35 @@ function ff_vc1_unescape_buffer_helper_neon, export=1
        tst             w1, #32
        b.ne            1f

-          ld1             {v0.16b, v1.16b, v2.16b}, [x0], #48
-          ext             v25.16b, v0.16b, v1.16b, #1

This function intentionally used a different indentation style, tovisualize how multiple iterations of the same algorithm are unrolled andinterleaved with each other. It was discussed during the review of thispatch. So for that, I'd kinda be inclined to keep it as is.

There's lots of changes that I agree with here, but also lots of things Idisagree with. I think it'd be better to split it up over individualpatches, fixing one issue at a time (starting with the least controversialones), because right now it's too big and there's too many styleregressions intermixed among with the good changes.


// Martin

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH] lavc/aarch64: unify formatting

Reply via email to