Use load/store instructions that modify sp to save registers to stack, like it is done for all other functions. At least valgrind complains about the current code. --- libavutil/aarch64/tx_float_neon.S | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/libavutil/aarch64/tx_float_neon.S b/libavutil/aarch64/tx_float_neon.S index 4126c3b812..4be93cc963 100644 --- a/libavutil/aarch64/tx_float_neon.S +++ b/libavutil/aarch64/tx_float_neon.S @@ -866,10 +866,10 @@ FFT16_FN ns_float, 1 .macro FFT32_FN name, no_perm function ff_tx_fft32_\name\()_neon, export=1 - stp d8, d9, [sp, #-16] - stp d10, d11, [sp, #-32] - stp d12, d13, [sp, #-48] - stp d14, d15, [sp, #-64] + stp d8, d9, [sp, #-16]! + stp d10, d11, [sp, #-16]! + stp d12, d13, [sp, #-16]! + stp d14, d15, [sp, #-16]! LOAD_SUBADD SETUP_SR_RECOMB 32, x7, x8, x9 @@ -911,10 +911,10 @@ function ff_tx_fft32_\name\()_neon, export=1 zip2 v31.2d, v11.2d, v15.2d st1 { v28.4s, v29.4s, v30.4s, v31.4s }, [x1] - ldp d14, d15, [sp, #-64] - ldp d12, d13, [sp, #-48] - ldp d10, d11, [sp, #-32] - ldp d8, d9, [sp, #-16] + ldp d14, d15, [sp], #16 + ldp d12, d13, [sp], #16 + ldp d10, d11, [sp], #16 + ldp d8, d9, [sp], #16 ret endfunc -- 2.37.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".