vvc: Add w_avg

Martin Storsjö Thu, 26 Sep 2024 04:25:46 -0700

On Mon, 23 Sep 2024, Zhao Zhili wrote:

From: Zhao Zhili <[email protected]>


w_avg_8_2x2_c:                                           0.0 ( 0.00x)
w_avg_8_2x2_neon:                                        0.0 ( 0.00x)
w_avg_8_4x4_c:                                           0.2 ( 1.00x)
w_avg_8_4x4_neon:                                        0.0 ( 0.00x)
w_avg_8_8x8_c:                                           1.2 ( 1.00x)
w_avg_8_8x8_neon:                                        0.2 ( 5.00x)
w_avg_8_16x16_c:                                         4.2 ( 1.00x)
w_avg_8_16x16_neon:                                      0.8 ( 5.67x)
w_avg_8_32x32_c:                                        16.2 ( 1.00x)
w_avg_8_32x32_neon:                                      2.5 ( 6.50x)
w_avg_8_64x64_c:                                        64.5 ( 1.00x)
w_avg_8_64x64_neon:                                      9.0 ( 7.17x)
w_avg_8_128x128_c:                                     269.5 ( 1.00x)
w_avg_8_128x128_neon:                                   35.5 ( 7.59x)
w_avg_10_2x2_c:                                          0.2 ( 1.00x)
w_avg_10_2x2_neon:                                       0.2 ( 1.00x)
w_avg_10_4x4_c:                                          0.2 ( 1.00x)
w_avg_10_4x4_neon:                                       0.2 ( 1.00x)
w_avg_10_8x8_c:                                          1.0 ( 1.00x)
w_avg_10_8x8_neon:                                       0.2 ( 4.00x)
w_avg_10_16x16_c:                                        4.2 ( 1.00x)
w_avg_10_16x16_neon:                                     0.8 ( 5.67x)
w_avg_10_32x32_c:                                       16.2 ( 1.00x)
w_avg_10_32x32_neon:                                     2.5 ( 6.50x)
w_avg_10_64x64_c:                                       66.2 ( 1.00x)
w_avg_10_64x64_neon:                                    10.0 ( 6.62x)
w_avg_10_128x128_c:                                    277.8 ( 1.00x)
w_avg_10_128x128_neon:                                  39.8 ( 6.99x)
w_avg_12_2x2_c:                                          0.0 ( 0.00x)
w_avg_12_2x2_neon:                                       0.2 ( 0.00x)
w_avg_12_4x4_c:                                          0.2 ( 1.00x)
w_avg_12_4x4_neon:                                       0.0 ( 0.00x)
w_avg_12_8x8_c:                                          1.2 ( 1.00x)
w_avg_12_8x8_neon:                                       0.5 ( 2.50x)
w_avg_12_16x16_c:                                        4.8 ( 1.00x)
w_avg_12_16x16_neon:                                     0.8 ( 6.33x)
w_avg_12_32x32_c:                                       17.0 ( 1.00x)
w_avg_12_32x32_neon:                                     2.8 ( 6.18x)
w_avg_12_64x64_c:                                       64.0 ( 1.00x)
w_avg_12_64x64_neon:                                    10.0 ( 6.40x)
w_avg_12_128x128_c:                                    269.2 ( 1.00x)
w_avg_12_128x128_neon:                                  42.0 ( 6.41x)
---
libavcodec/aarch64/vvc/dsp_init.c | 34 +++++++++++
libavcodec/aarch64/vvc/inter.S    | 99 +++++++++++++++++++++++++------
2 files changed, 116 insertions(+), 17 deletions(-)

diff --git a/libavcodec/aarch64/vvc/dsp_init.c 
b/libavcodec/aarch64/vvc/dsp_init.c
index ad767d17e2..b39ebb83fc 100644
--- a/libavcodec/aarch64/vvc/dsp_init.c
+++ b/libavcodec/aarch64/vvc/dsp_init.c
@@ -52,6 +52,37 @@ void ff_vvc_avg_12_neon(uint8_t *dst, ptrdiff_t dst_stride,
                        const int16_t *src0, const int16_t *src1, int width,
                        int height);

+void ff_vvc_w_avg_8_neon(uint8_t *_dst, const ptrdiff_t _dst_stride,
+                         const int16_t *src0, const int16_t *src1,
+                         const int width, const int height,
+                         uintptr_t w0_w1, uintptr_t offset_shift);

Including "const" on scalar parameters is entirely redundant, and we don'tprescribe use of that elsewhere in ffmpeg, and just makes the wholedeclaration more noisy.

+void ff_vvc_w_avg_10_neon(uint8_t *_dst, const ptrdiff_t _dst_stride,
+                         const int16_t *src0, const int16_t *src1,
+                         const int width, const int height,
+                         uintptr_t w0_w1, uintptr_t offset_shift);
+void ff_vvc_w_avg_12_neon(uint8_t *_dst, const ptrdiff_t _dst_stride,
+                          const int16_t *src0, const int16_t *src1,
+                          const int width, const int height,
+                          uintptr_t w0_w1, uintptr_t offset_shift);
+/* When passing arguments to functions, Apple platforms diverge from the ARM64
+ * standard ABI, that we can't implement the function directly in asm.
+ */

It's fully possible to implement that in assembly, but it usually requiresugly ifdefs.

That said, I'm ok with this kind of wrapper, as it avoids the problemkinda neatly, but ifdefs in the assembly can also be needed at times.

+#define W_AVG_FUN(bit_depth) \
+static void vvc_w_avg_ ## bit_depth(uint8_t *dst, const ptrdiff_t dst_stride, \
+    const int16_t *src0, const int16_t *src1, const int width, const int 
height, \
+    const int denom, const int w0, const int w1, const int o0, const int o1) \
+{ \
+    const int shift = denom + FFMAX(3, 15 - bit_depth); \
+    const int offset = ((o0 + o1) * (1 << (bit_depth - 8)) + 1) * (1 << (shift 
- 1)); \

Same about the superfluous "const" everywhere. For local variables, Iguess it can be argued that marking them as const can aid readability insome way, but I don't think we generally prescribe doing that.


The rest of the patch seems fine, thanks!

// Martin

_______________________________________________
ffmpeg-devel mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH v2 1/3] aarch64/vvc: Add w_avg

Reply via email to