On Mon, 23 Sep 2024, Zhao Zhili wrote:

From: Zhao Zhili <zhiliz...@tencent.com>

w_avg_8_2x2_c:                                           0.0 ( 0.00x)
w_avg_8_2x2_neon:                                        0.0 ( 0.00x)
w_avg_8_4x4_c:                                           0.2 ( 1.00x)
w_avg_8_4x4_neon:                                        0.0 ( 0.00x)
w_avg_8_8x8_c:                                           1.2 ( 1.00x)
w_avg_8_8x8_neon:                                        0.2 ( 5.00x)
w_avg_8_16x16_c:                                         4.2 ( 1.00x)
w_avg_8_16x16_neon:                                      0.8 ( 5.67x)
w_avg_8_32x32_c:                                        16.2 ( 1.00x)
w_avg_8_32x32_neon:                                      2.5 ( 6.50x)
w_avg_8_64x64_c:                                        64.5 ( 1.00x)
w_avg_8_64x64_neon:                                      9.0 ( 7.17x)
w_avg_8_128x128_c:                                     269.5 ( 1.00x)
w_avg_8_128x128_neon:                                   35.5 ( 7.59x)
w_avg_10_2x2_c:                                          0.2 ( 1.00x)
w_avg_10_2x2_neon:                                       0.2 ( 1.00x)
w_avg_10_4x4_c:                                          0.2 ( 1.00x)
w_avg_10_4x4_neon:                                       0.2 ( 1.00x)
w_avg_10_8x8_c:                                          1.0 ( 1.00x)
w_avg_10_8x8_neon:                                       0.2 ( 4.00x)
w_avg_10_16x16_c:                                        4.2 ( 1.00x)
w_avg_10_16x16_neon:                                     0.8 ( 5.67x)
w_avg_10_32x32_c:                                       16.2 ( 1.00x)
w_avg_10_32x32_neon:                                     2.5 ( 6.50x)
w_avg_10_64x64_c:                                       66.2 ( 1.00x)
w_avg_10_64x64_neon:                                    10.0 ( 6.62x)
w_avg_10_128x128_c:                                    277.8 ( 1.00x)
w_avg_10_128x128_neon:                                  39.8 ( 6.99x)
w_avg_12_2x2_c:                                          0.0 ( 0.00x)
w_avg_12_2x2_neon:                                       0.2 ( 0.00x)
w_avg_12_4x4_c:                                          0.2 ( 1.00x)
w_avg_12_4x4_neon:                                       0.0 ( 0.00x)
w_avg_12_8x8_c:                                          1.2 ( 1.00x)
w_avg_12_8x8_neon:                                       0.5 ( 2.50x)
w_avg_12_16x16_c:                                        4.8 ( 1.00x)
w_avg_12_16x16_neon:                                     0.8 ( 6.33x)
w_avg_12_32x32_c:                                       17.0 ( 1.00x)
w_avg_12_32x32_neon:                                     2.8 ( 6.18x)
w_avg_12_64x64_c:                                       64.0 ( 1.00x)
w_avg_12_64x64_neon:                                    10.0 ( 6.40x)
w_avg_12_128x128_c:                                    269.2 ( 1.00x)
w_avg_12_128x128_neon:                                  42.0 ( 6.41x)
---
libavcodec/aarch64/vvc/dsp_init.c | 34 +++++++++++
libavcodec/aarch64/vvc/inter.S    | 99 +++++++++++++++++++++++++------
2 files changed, 116 insertions(+), 17 deletions(-)

diff --git a/libavcodec/aarch64/vvc/dsp_init.c 
b/libavcodec/aarch64/vvc/dsp_init.c
index ad767d17e2..b39ebb83fc 100644
--- a/libavcodec/aarch64/vvc/dsp_init.c
+++ b/libavcodec/aarch64/vvc/dsp_init.c
@@ -52,6 +52,37 @@ void ff_vvc_avg_12_neon(uint8_t *dst, ptrdiff_t dst_stride,
                        const int16_t *src0, const int16_t *src1, int width,
                        int height);

+void ff_vvc_w_avg_8_neon(uint8_t *_dst, const ptrdiff_t _dst_stride,
+                         const int16_t *src0, const int16_t *src1,
+                         const int width, const int height,
+                         uintptr_t w0_w1, uintptr_t offset_shift);

Including "const" on scalar parameters is entirely redundant, and we don't prescribe use of that elsewhere in ffmpeg, and just makes the whole declaration more noisy.

+void ff_vvc_w_avg_10_neon(uint8_t *_dst, const ptrdiff_t _dst_stride,
+                         const int16_t *src0, const int16_t *src1,
+                         const int width, const int height,
+                         uintptr_t w0_w1, uintptr_t offset_shift);
+void ff_vvc_w_avg_12_neon(uint8_t *_dst, const ptrdiff_t _dst_stride,
+                          const int16_t *src0, const int16_t *src1,
+                          const int width, const int height,
+                          uintptr_t w0_w1, uintptr_t offset_shift);
+/* When passing arguments to functions, Apple platforms diverge from the ARM64
+ * standard ABI, that we can't implement the function directly in asm.
+ */

It's fully possible to implement that in assembly, but it usually requires ugly ifdefs.

That said, I'm ok with this kind of wrapper, as it avoids the problem kinda neatly, but ifdefs in the assembly can also be needed at times.

+#define W_AVG_FUN(bit_depth) \
+static void vvc_w_avg_ ## bit_depth(uint8_t *dst, const ptrdiff_t dst_stride, \
+    const int16_t *src0, const int16_t *src1, const int width, const int 
height, \
+    const int denom, const int w0, const int w1, const int o0, const int o1) \
+{ \
+    const int shift = denom + FFMAX(3, 15 - bit_depth); \
+    const int offset = ((o0 + o1) * (1 << (bit_depth - 8)) + 1) * (1 << (shift 
- 1)); \

Same about the superfluous "const" everywhere. For local variables, I guess it can be argued that marking them as const can aid readability in some way, but I don't think we generally prescribe doing that.

The rest of the patch seems fine, thanks!

// Martin

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to