On Tue, 14 Jun 2022, J. Dekker wrote:
Performs the following changes: - Lower case format specifiers - Lower case sxtw/uxtw - Numeric labels on same line as next instruction
Why do you feel the need to do that? I like the occasional extra spacing that an empty line for such labels adds - at least I wouldn't go changing how that's done in other places. Where there are empty lines and where there aren't, is up to the author IMO, based on how they see their code.
- Indentation to 9/25 Signed-off-by: J. Dekker <j...@itanimul.li> --- I initially started writing a full lexer to do this but ended up wasting too much time on it. Now using Mostly text editor macros. Still need to skim through again and make a few manual changes here and there but wondering if there's any other major automatable changes I missed. libavcodec/aarch64/aacpsdsp_neon.S | 208 +++---- libavcodec/aarch64/fft_neon.S | 89 ++- libavcodec/aarch64/fmtconvert_neon.S | 12 +- libavcodec/aarch64/h264cmc_neon.S | 534 +++++++++--------- libavcodec/aarch64/h264dsp_neon.S | 664 +++++++++++------------ libavcodec/aarch64/h264idct_neon.S | 418 +++++++------- libavcodec/aarch64/h264pred_neon.S | 16 +- libavcodec/aarch64/h264qpel_neon.S | 644 +++++++++++----------- libavcodec/aarch64/hevcdsp_idct_neon.S | 323 ++++++----- libavcodec/aarch64/hevcdsp_sao_neon.S | 8 +- libavcodec/aarch64/hpeldsp_neon.S | 504 ++++++++--------- libavcodec/aarch64/mdct_neon.S | 27 +- libavcodec/aarch64/mpegaudiodsp_neon.S | 26 +- libavcodec/aarch64/neon.S | 246 ++++----- libavcodec/aarch64/opusdsp_neon.S | 106 ++-- libavcodec/aarch64/pixblockdsp_neon.S | 6 +- libavcodec/aarch64/sbrdsp_neon.S | 308 +++++------ libavcodec/aarch64/simple_idct_neon.S | 410 +++++++------- libavcodec/aarch64/synth_filter_neon.S | 15 +- libavcodec/aarch64/vc1dsp_neon.S | 206 +++---- libavcodec/aarch64/videodsp.S | 3 +- libavcodec/aarch64/vp8dsp_neon.S | 485 ++++++++--------- libavcodec/aarch64/vp9itxfm_16bpp_neon.S | 64 +-- libavcodec/aarch64/vp9itxfm_neon.S | 50 +- libavcodec/aarch64/vp9lpf_16bpp_neon.S | 24 +- libavcodec/aarch64/vp9lpf_neon.S | 78 +-- libavcodec/aarch64/vp9mc_16bpp_neon.S | 30 +- libavcodec/aarch64/vp9mc_aarch64.S | 9 +- libavcodec/aarch64/vp9mc_neon.S | 39 +- 29 files changed, 2688 insertions(+), 2864 deletions(-) diff --git a/libavcodec/aarch64/aacpsdsp_neon.S b/libavcodec/aarch64/aacpsdsp_neon.S index ff4e6e244a..dfa6a9dc33 100644 --- a/libavcodec/aarch64/aacpsdsp_neon.S +++ b/libavcodec/aarch64/aacpsdsp_neon.S @@ -19,130 +19,130 @@ #include "libavutil/aarch64/asm.S" function ff_ps_add_squares_neon, export=1 -1: ld1 {v0.4S,v1.4S}, [x1], #32 - fmul v0.4S, v0.4S, v0.4S - fmul v1.4S, v1.4S, v1.4S - faddp v2.4S, v0.4S, v1.4S - ld1 {v3.4S}, [x0] - fadd v3.4S, v3.4S, v2.4S - st1 {v3.4S}, [x0], #16 - subs w2, w2, #4 +1: ld1 {v0.4s,v1.4s}, [x1], #32 + fmul v0.4s, v0.4s, v0.4s + fmul v1.4s, v1.4s, v1.4s + faddp v2.4s, v0.4s, v1.4s + ld1 {v3.4s}, [x0] + fadd v3.4s, v3.4s, v2.4s + st1 {v3.4s}, [x0], #16 + subs w2, w2, #4 b.gt 1b
This leaves the b.gt parameter misaligned with the rest - same thing in the whole rest of this file.
@@ -295,8 +295,7 @@ function fft_pass_neon st1 {v21.4s}, [x1], #16 // {z[o1],z[o1+1]} st1 {v22.4s}, [x2], #16 // {z[o2],z[o2+1]} st1 {v23.4s}, [x3], #16 // {z[o3],z[o3+1]} -1: - ld1 {v20.4s},[x2] // {z[o2],z[o2+1]} +1: ld1 {v20.4s},[x2] // {z[o2],z[o2+1]} ld1 {v22.4s},[x3] // {z[o3],z[o3+1]}
I really don't think getting rid of the empty line here or anywhere else adds anything - I think it worsens readability.
@@ -359,18 +358,18 @@ function fft\n\()_neon, align=6 endfunc .endm - def_fft 32, 16, 8 - def_fft 64, 32, 16 - def_fft 128, 64, 32 - def_fft 256, 128, 64 - def_fft 512, 256, 128 - def_fft 1024, 512, 256 - def_fft 2048, 1024, 512 - def_fft 4096, 2048, 1024 - def_fft 8192, 4096, 2048 - def_fft 16384, 8192, 4096 - def_fft 32768, 16384, 8192 - def_fft 65536, 32768, 16384 + def_fft 32, 16, 8 + def_fft 64, 32, 16 + def_fft 128, 64, 32 + def_fft 256, 128, 64 + def_fft 512, 256, 128 + def_fft 1024, 512, 256 + def_fft 2048, 1024, 512 + def_fft 4096, 2048, 1024 + def_fft 8192, 4096, 2048 + def_fft 16384, 8192, 4096 + def_fft 32768, 16384, 8192 + def_fft 65536, 32768, 16384
Previously, the columns were right-aligned here. I don't think we gain anything from changing that.
-const pmmp, align=4 +const pmmp, align=4
Changes like this are good indeed.
diff --git a/libavcodec/aarch64/h264cmc_neon.S b/libavcodec/aarch64/h264cmc_neon.S index f8e9407854..0add73ffec 100644 --- a/libavcodec/aarch64/h264cmc_neon.S +++ b/libavcodec/aarch64/h264cmc_neon.S @@ -26,24 +26,24 @@ /* chroma_mc8(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */ .macro h264_chroma_mc8 type, codec=h264 function ff_\type\()_\codec\()_chroma_mc8_neon, export=1 - .ifc \type,avg +.ifc \type,avg mov x8, x0 - .endif +.endif
I didn't mind the indented .if/.endif, but I won't oppose making it consistent with the rest either, i.e. making it unindented.
diff --git a/libavcodec/aarch64/h264dsp_neon.S b/libavcodec/aarch64/h264dsp_neon.S index ea221e6862..926c6e8362 100644 .macro h264_loop_filter_luma - dup v22.16B, w2 // alpha - uxtl v24.8H, v24.8B - uabd v21.16B, v16.16B, v0.16B // abs(p0 - q0) - uxtl v24.4S, v24.4H - uabd v28.16B, v18.16B, v16.16B // abs(p1 - p0) - sli v24.8H, v24.8H, #8 - uabd v30.16B, v2.16B, v0.16B // abs(q1 - q0) - sli v24.4S, v24.4S, #16 - cmhi v21.16B, v22.16B, v21.16B // < alpha - dup v22.16B, w3 // beta - cmlt v23.16B, v24.16B, #0 - cmhi v28.16B, v22.16B, v28.16B // < beta - cmhi v30.16B, v22.16B, v30.16B // < beta - bic v21.16B, v21.16B, v23.16B - uabd v17.16B, v20.16B, v16.16B // abs(p2 - p0) - and v21.16B, v21.16B, v28.16B - uabd v19.16B, v4.16B, v0.16B // abs(q2 - q0) - and v21.16B, v21.16B, v30.16B // < beta
Here's a misaligned comment that could be fixed while touching this
+ dup v22.16b, w2 // alpha + uxtl v24.8h, v24.8b + uabd v21.16b, v16.16b, v0.16b // abs(p0 - q0) + uxtl v24.4s, v24.4h + uabd v28.16b, v18.16b, v16.16b // abs(p1 - p0) + sli v24.8h, v24.8h, #8 + uabd v30.16b, v2.16b, v0.16b // abs(q1 - q0) + sli v24.4s, v24.4s, #16 + cmhi v21.16b, v22.16b, v21.16b // < alpha + dup v22.16b, w3 // beta + cmlt v23.16b, v24.16b, #0 + cmhi v28.16b, v22.16b, v28.16b // < beta + cmhi v30.16b, v22.16b, v30.16b // < beta + bic v21.16b, v21.16b, v23.16b + uabd v17.16b, v20.16b, v16.16b // abs(p2 - p0) + and v21.16b, v21.16b, v28.16b + uabd v19.16b, v4.16b, v0.16b // abs(q2 - q0) + and v21.16b, v21.16b, v30.16b // < beta shrn v30.8b, v21.8h, #4 mov x7, v30.d[0] - cmhi v17.16B, v22.16B, v17.16B // < beta - cmhi v19.16B, v22.16B, v19.16B // < beta + cmhi v17.16b, v22.16b, v17.16b // < beta + cmhi v19.16b, v22.16b, v19.16b // < beta cbz x7, 9f - and v17.16B, v17.16B, v21.16B - and v19.16B, v19.16B, v21.16B - and v24.16B, v24.16B, v21.16B - urhadd v28.16B, v16.16B, v0.16B - sub v21.16B, v24.16B, v17.16B - uqadd v23.16B, v18.16B, v24.16B - uhadd v20.16B, v20.16B, v28.16B - sub v21.16B, v21.16B, v19.16B - uhadd v28.16B, v4.16B, v28.16B - umin v23.16B, v23.16B, v20.16B - uqsub v22.16B, v18.16B, v24.16B - uqadd v4.16B, v2.16B, v24.16B - umax v23.16B, v23.16B, v22.16B - uqsub v22.16B, v2.16B, v24.16B - umin v28.16B, v4.16B, v28.16B - uxtl v4.8H, v0.8B - umax v28.16B, v28.16B, v22.16B - uxtl2 v20.8H, v0.16B - usubw v4.8H, v4.8H, v16.8B - usubw2 v20.8H, v20.8H, v16.16B - shl v4.8H, v4.8H, #2 - shl v20.8H, v20.8H, #2 - uaddw v4.8H, v4.8H, v18.8B - uaddw2 v20.8H, v20.8H, v18.16B - usubw v4.8H, v4.8H, v2.8B - usubw2 v20.8H, v20.8H, v2.16B - rshrn v4.8B, v4.8H, #3 - rshrn2 v4.16B, v20.8H, #3 - bsl v17.16B, v23.16B, v18.16B - bsl v19.16B, v28.16B, v2.16B - neg v23.16B, v21.16B - uxtl v28.8H, v16.8B - smin v4.16B, v4.16B, v21.16B - uxtl2 v21.8H, v16.16B - smax v4.16B, v4.16B, v23.16B - uxtl v22.8H, v0.8B - uxtl2 v24.8H, v0.16B - saddw v28.8H, v28.8H, v4.8B - saddw2 v21.8H, v21.8H, v4.16B - ssubw v22.8H, v22.8H, v4.8B - ssubw2 v24.8H, v24.8H, v4.16B - sqxtun v16.8B, v28.8H - sqxtun2 v16.16B, v21.8H - sqxtun v0.8B, v22.8H - sqxtun2 v0.16B, v24.8H + and v17.16b, v17.16b, v21.16b + and v19.16b, v19.16b, v21.16b + and v24.16b, v24.16b, v21.16b + urhadd v28.16b, v16.16b, v0.16b + sub v21.16b, v24.16b, v17.16b + uqadd v23.16b, v18.16b, v24.16b + uhadd v20.16b, v20.16b, v28.16b + sub v21.16b, v21.16b, v19.16b + uhadd v28.16b, v4.16b, v28.16b + umin v23.16b, v23.16b, v20.16b + uqsub v22.16b, v18.16b, v24.16b + uqadd v4.16b, v2.16b, v24.16b + umax v23.16b, v23.16b, v22.16b + uqsub v22.16b, v2.16b, v24.16b + umin v28.16b, v4.16b, v28.16b + uxtl v4.8h, v0.8b + umax v28.16b, v28.16b, v22.16b + uxtl2 v20.8h, v0.16b + usubw v4.8h, v4.8h, v16.8b + usubw2 v20.8h, v20.8h, v16.16b + shl v4.8h, v4.8h, #2 + shl v20.8h, v20.8h, #2 + uaddw v4.8h, v4.8h, v18.8b + uaddw2 v20.8h, v20.8h, v18.16b + usubw v4.8h, v4.8h, v2.8b + usubw2 v20.8h, v20.8h, v2.16b + rshrn v4.8b, v4.8h, #3 + rshrn2 v4.16b, v20.8h, #3 + bsl v17.16b, v23.16b, v18.16b + bsl v19.16b, v28.16b, v2.16b + neg v23.16b, v21.16b + uxtl v28.8h, v16.8b + smin v4.16b, v4.16b, v21.16b + uxtl2 v21.8h, v16.16b + smax v4.16b, v4.16b, v23.16b + uxtl v22.8h, v0.8b + uxtl2 v24.8h, v0.16b + saddw v28.8h, v28.8h, v4.8b + saddw2 v21.8h, v21.8h, v4.16b + ssubw v22.8h, v22.8h, v4.8b + ssubw2 v24.8h, v24.8h, v4.16b + sqxtun v16.8b, v28.8h + sqxtun2 v16.16b, v21.8h + sqxtun v0.8b, v22.8h + sqxtun2 v0.16b, v24.8h .endm function ff_h264_v_loop_filter_luma_neon, export=1 h264_loop_filter_start - ld1 {v0.16B}, [x0], x1 - ld1 {v2.16B}, [x0], x1 - ld1 {v4.16B}, [x0], x1 + ld1 {v0.16b}, [x0], x1 + ld1 {v2.16b}, [x0], x1 + ld1 {v4.16b}, [x0], x1 sub x0, x0, x1, lsl #2 sub x0, x0, x1, lsl #1 - ld1 {v20.16B}, [x0], x1 - ld1 {v18.16B}, [x0], x1 - ld1 {v16.16B}, [x0], x1 + ld1 {v20.16b}, [x0], x1 + ld1 {v18.16b}, [x0], x1 + ld1 {v16.16b}, [x0], x1 h264_loop_filter_luma sub x0, x0, x1, lsl #1 - st1 {v17.16B}, [x0], x1 - st1 {v16.16B}, [x0], x1 - st1 {v0.16B}, [x0], x1 - st1 {v19.16B}, [x0] -9: - ret + st1 {v17.16b}, [x0], x1 + st1 {v16.16b}, [x0], x1 + st1 {v0.16b}, [x0], x1 + st1 {v19.16b}, [x0] +9: ret endfunc function ff_h264_h_loop_filter_luma_neon, export=1 h264_loop_filter_start sub x0, x0, #4 - ld1 {v6.8B}, [x0], x1 - ld1 {v20.8B}, [x0], x1 - ld1 {v18.8B}, [x0], x1 - ld1 {v16.8B}, [x0], x1 - ld1 {v0.8B}, [x0], x1 - ld1 {v2.8B}, [x0], x1 - ld1 {v4.8B}, [x0], x1 - ld1 {v26.8B}, [x0], x1 - ld1 {v6.D}[1], [x0], x1 - ld1 {v20.D}[1], [x0], x1 - ld1 {v18.D}[1], [x0], x1 - ld1 {v16.D}[1], [x0], x1 - ld1 {v0.D}[1], [x0], x1 - ld1 {v2.D}[1], [x0], x1 - ld1 {v4.D}[1], [x0], x1 - ld1 {v26.D}[1], [x0], x1 + ld1 {v6.8b}, [x0], x1 + ld1 {v20.8b}, [x0], x1 + ld1 {v18.8b}, [x0], x1 + ld1 {v16.8b}, [x0], x1 + ld1 {v0.8b}, [x0], x1 + ld1 {v2.8b}, [x0], x1 + ld1 {v4.8b}, [x0], x1 + ld1 {v26.8b}, [x0], x1 + ld1 {v6.d}[1], [x0], x1 + ld1 {v20.d}[1], [x0], x1 + ld1 {v18.d}[1], [x0], x1 + ld1 {v16.d}[1], [x0], x1 + ld1 {v0.d}[1], [x0], x1 + ld1 {v2.d}[1], [x0], x1 + ld1 {v4.d}[1], [x0], x1 + ld1 {v26.d}[1], [x0], x1 transpose_8x16B v6, v20, v18, v16, v0, v2, v4, v26, v21, v23 @@ -160,24 +158,23 @@ function ff_h264_h_loop_filter_luma_neon, export=1 sub x0, x0, x1, lsl #4 add x0, x0, #2 - st1 {v17.S}[0], [x0], x1 - st1 {v16.S}[0], [x0], x1 - st1 {v0.S}[0], [x0], x1 - st1 {v19.S}[0], [x0], x1 - st1 {v17.S}[1], [x0], x1 - st1 {v16.S}[1], [x0], x1 - st1 {v0.S}[1], [x0], x1 - st1 {v19.S}[1], [x0], x1 - st1 {v17.S}[2], [x0], x1 - st1 {v16.S}[2], [x0], x1 - st1 {v0.S}[2], [x0], x1 - st1 {v19.S}[2], [x0], x1 - st1 {v17.S}[3], [x0], x1 - st1 {v16.S}[3], [x0], x1 - st1 {v0.S}[3], [x0], x1 - st1 {v19.S}[3], [x0], x1 -9: - ret + st1 {v17.s}[0], [x0], x1 + st1 {v16.s}[0], [x0], x1 + st1 {v0.s}[0], [x0], x1 + st1 {v19.s}[0], [x0], x1 + st1 {v17.s}[1], [x0], x1 + st1 {v16.s}[1], [x0], x1 + st1 {v0.s}[1], [x0], x1 + st1 {v19.s}[1], [x0], x1 + st1 {v17.s}[2], [x0], x1 + st1 {v16.s}[2], [x0], x1 + st1 {v0.s}[2], [x0], x1 + st1 {v19.s}[2], [x0], x1 + st1 {v17.s}[3], [x0], x1 + st1 {v16.s}[3], [x0], x1 + st1 {v0.s}[3], [x0], x1 + st1 {v19.s}[3], [x0], x1
Here, I'd love to align the righthand columns that are uneven (although I guess such changes are a bit out of scope for what you do here).
@@ -193,139 +193,139 @@ function ff_h264_idct_add8_neon, export=1 endfunc .macro idct8x8_cols pass - .if \pass == 0 - va .req v18 - vb .req v30
I think the custom formatting of these lines is better to keep as is, rather than forcing it into the formatting of the rest.
diff --git a/libavcodec/aarch64/h264qpel_neon.S b/libavcodec/aarch64/h264qpel_neon.S index 451fd8af24..3829f17bd1 100644 --- a/libavcodec/aarch64/h264qpel_neon.S +++ b/libavcodec/aarch64/h264qpel_neon.S @@ -24,130 +24,130 @@ /* H.264 qpel MC */ -.macro lowpass_const r +.macro lowpass_const r
Here you could get rid of the double spaces after .macro too
@@ -580,12 +580,12 @@ function \type\()_h264_qpel16_hv_lowpass_l2_neon endfunc .endm - h264_qpel16_hv put - h264_qpel16_hv avg + h264_qpel16_hv put + h264_qpel16_hv avg .macro h264_qpel8 type function ff_\type\()_h264_qpel8_mc10_neon, export=1 - lowpass_const w3 + lowpass_const w3
The parameter to lowpass_const was properly aligned before, now it no longer is
.macro pixfunc pfx, name, suf, rnd=1, avg=0 - .if \rnd - .macro avg rd, rn, rm +.if \rnd +.macro avg rd, rn, rm urhadd \rd, \rn, \rm
I think this looks much more like a mess than it was before, TBH
diff --git a/libavcodec/aarch64/opusdsp_neon.S b/libavcodec/aarch64/opusdsp_neon.S index 46c2be0874..3b2b89d068 100644 --- a/libavcodec/aarch64/opusdsp_neon.S +++ b/libavcodec/aarch64/opusdsp_neon.S @@ -20,93 +20,93 @@ // 0.85..^1 0.85..^2 0.85..^3 0.85..^4 const tab_st, align=4 - .word 0x3f599a00, 0x3f38f671, 0x3f1d382a, 0x3f05a32f + .word 0x3f599a00, 0x3f38f671, 0x3f1d382a, 0x3f05a32f
These used to be aligned with the comments above, now the no longer are aligned
diff --git a/libavcodec/aarch64/vc1dsp_neon.S b/libavcodec/aarch64/vc1dsp_neon.S index 9a96c2523c..7df48ea000 100644 --- a/libavcodec/aarch64/vc1dsp_neon.S +++ b/libavcodec/aarch64/vc1dsp_neon.S @@ -1391,35 +1391,35 @@ function ff_vc1_unescape_buffer_helper_neon, export=1 tst w1, #32 b.ne 1f - ld1 {v0.16b, v1.16b, v2.16b}, [x0], #48 - ext v25.16b, v0.16b, v1.16b, #1
This function intentionally used a different indentation style, to visualize how multiple iterations of the same algorithm are unrolled and interleaved with each other. It was discussed during the review of this patch. So for that, I'd kinda be inclined to keep it as is.
There's lots of changes that I agree with here, but also lots of things I disagree with. I think it'd be better to split it up over individual patches, fixing one issue at a time (starting with the least controversial ones), because right now it's too big and there's too many style regressions intermixed among with the good changes.
// Martin _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".