A76 pix_norm1_c: 231.5 pix_norm1_neon: 44.2 ( 5.24x) pix_norm1_dotprod: 20.7 (11.18x) --- libavcodec/aarch64/mpegvideoencdsp_init.c | 10 ++++++++ libavcodec/aarch64/mpegvideoencdsp_neon.S | 28 +++++++++++++++++++++++ 2 files changed, 38 insertions(+)
diff --git a/libavcodec/aarch64/mpegvideoencdsp_init.c b/libavcodec/aarch64/mpegvideoencdsp_init.c index 7eb632ed1b..d0ce07e178 100644 --- a/libavcodec/aarch64/mpegvideoencdsp_init.c +++ b/libavcodec/aarch64/mpegvideoencdsp_init.c @@ -27,6 +27,10 @@ int ff_pix_sum16_neon(const uint8_t *pix, int line_size); int ff_pix_norm1_neon(const uint8_t *pix, int line_size); +#if HAVE_DOTPROD +int ff_pix_norm1_neon_dotprod(const uint8_t *pix, int line_size); +#endif + av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c, AVCodecContext *avctx) { @@ -36,4 +40,10 @@ av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c, c->pix_sum = ff_pix_sum16_neon; c->pix_norm1 = ff_pix_norm1_neon; } + +#if HAVE_DOTPROD + if (have_dotprod(cpu_flags)) { + c->pix_norm1 = ff_pix_norm1_neon_dotprod; + } +#endif } diff --git a/libavcodec/aarch64/mpegvideoencdsp_neon.S b/libavcodec/aarch64/mpegvideoencdsp_neon.S index 89e50e29b3..eccbdd850f 100644 --- a/libavcodec/aarch64/mpegvideoencdsp_neon.S +++ b/libavcodec/aarch64/mpegvideoencdsp_neon.S @@ -65,3 +65,31 @@ function ff_pix_norm1_neon, export=1 ret endfunc + +#if HAVE_DOTPROD +ENABLE_DOTPROD + +function ff_pix_norm1_neon_dotprod, export=1 +// x0 const uint8_t *pix +// x1 int line_size + + sxtw x1, w1 + movi v0.16b, #0 + mov w2, #16 + +1: + ld1 { v1.16b }, [x0], x1 + ld1 { v2.16b }, [x0], x1 + udot v0.4s, v1.16b, v1.16b + subs w2, w2, #2 + udot v0.4s, v2.16b, v2.16b + b.ne 1b + + uaddlv d0, v0.4s + fmov w0, s0 + + ret +endfunc + +DISABLE_DOTPROD +#endif -- 2.30.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".