From: Zhao Zhili <zhiliz...@tencent.com> dmvr_8_12x20_c: 2.2 ( 1.00x) dmvr_8_12x20_neon: 0.5 ( 4.50x) dmvr_8_20x12_c: 2.0 ( 1.00x) dmvr_8_20x12_neon: 0.2 ( 8.00x) dmvr_8_20x20_c: 3.2 ( 1.00x) dmvr_8_20x20_neon: 0.5 ( 6.50x) dmvr_12_12x20_c: 2.2 ( 1.00x) dmvr_12_12x20_neon: 0.5 ( 4.50x) dmvr_12_20x12_c: 2.2 ( 1.00x) dmvr_12_20x12_neon: 0.5 ( 4.50x) dmvr_12_20x20_c: 3.2 ( 1.00x) dmvr_12_20x20_neon: 0.8 ( 4.33x) --- libavcodec/aarch64/vvc/dsp_init.c | 4 ++ libavcodec/aarch64/vvc/inter.S | 94 ++++++++++++++++++++++++++++++- 2 files changed, 97 insertions(+), 1 deletion(-)
diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c index 995e26d163..e9bb65bd0f 100644 --- a/libavcodec/aarch64/vvc/dsp_init.c +++ b/libavcodec/aarch64/vvc/dsp_init.c @@ -88,6 +88,8 @@ W_AVG_FUN(12) const uint8_t *_src, const ptrdiff_t _src_stride, const int height, \ const intptr_t mx, const intptr_t my, const int width); +DMVR_FUN(, 8) +DMVR_FUN(, 12) DMVR_FUN(hv_, 8) DMVR_FUN(hv_, 10) DMVR_FUN(hv_, 12) @@ -164,6 +166,7 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd) c->inter.avg = ff_vvc_avg_8_neon; c->inter.w_avg = vvc_w_avg_8; + c->inter.dmvr[0][0] = ff_vvc_dmvr_8_neon; c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_8_neon; for (int i = 0; i < FF_ARRAY_ELEMS(c->sao.band_filter); i++) @@ -213,6 +216,7 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd) } else if (bd == 12) { c->inter.avg = ff_vvc_avg_12_neon; c->inter.w_avg = vvc_w_avg_12; + c->inter.dmvr[0][0] = ff_vvc_dmvr_12_neon; c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_12_neon; c->alf.filter[LUMA] = alf_filter_luma_12_neon; diff --git a/libavcodec/aarch64/vvc/inter.S b/libavcodec/aarch64/vvc/inter.S index a0bb356f07..1b3fb5b468 100644 --- a/libavcodec/aarch64/vvc/inter.S +++ b/libavcodec/aarch64/vvc/inter.S @@ -235,7 +235,7 @@ vvc_avg w_avg, 12 * x5: const intptr_t my * w6: const int width */ -function ff_vvc_dmvr_hv_8_neon, export=1 +function ff_vvc_dmvr_8_neon, export=1 dst .req x0 src .req x1 src_stride .req x2 @@ -243,6 +243,98 @@ function ff_vvc_dmvr_hv_8_neon, export=1 mx .req x4 my .req x5 width .req w6 + + sxtw x6, w6 + mov x7, #(VVC_MAX_PB_SIZE * 2 + 8) + cmp width, #16 + sub src_stride, src_stride, x6 + cset w15, gt // width > 16 + movi v16.8h, #2 // DMVR_SHIFT + sub x7, x7, x6, lsl #1 +1: + cbz w15, 2f + ldr q0, [src], #16 + uxtl v1.8h, v0.8b + uxtl2 v2.8h, v0.16b + ushl v1.8h, v1.8h, v16.8h + ushl v2.8h, v2.8h, v16.8h + stp q1, q2, [dst], #32 + b 3f +2: + ldr d0, [src], #8 + uxtl v1.8h, v0.8b + ushl v1.8h, v1.8h, v16.8h + str q1, [dst], #16 +3: + subs height, height, #1 + ldr s3, [src], #4 + uxtl v4.8h, v3.8b + ushl v4.4h, v4.4h, v16.4h + st1 {v4.4h}, [dst], x7 + + add src, src, src_stride + b.ne 1b + + ret +endfunc + +function ff_vvc_dmvr_12_neon, export=1 + sxtw x6, w6 + mov x7, #(VVC_MAX_PB_SIZE * 2 + 8) + cmp width, #16 + sub src_stride, src_stride, x6, lsl #1 + cset w15, gt // width > 16 + movi v16.4s, #2 // offset4 + sub x7, x7, x6, lsl #1 +1: + cbz w15, 2f + ldp q0, q1, [src], #32 + uxtl v2.4s, v0.4h + uxtl2 v3.4s, v0.8h + uxtl v4.4s, v1.4h + uxtl2 v5.4s, v1.8h + add v2.4s, v2.4s, v16.4s + add v3.4s, v3.4s, v16.4s + add v4.4s, v4.4s, v16.4s + add v5.4s, v5.4s, v16.4s + ushr v2.4s, v2.4s, #2 + ushr v3.4s, v3.4s, #2 + ushr v4.4s, v4.4s, #2 + ushr v5.4s, v5.4s, #2 + uqxtn v2.4h, v2.4s + uqxtn2 v2.8h, v3.4s + uqxtn v4.4h, v4.4s + uqxtn2 v4.8h, v5.4s + + stp q2, q4, [dst], #32 + b 3f +2: + ldr q0, [src], #16 + uxtl v2.4s, v0.4h + uxtl2 v3.4s, v0.8h + add v2.4s, v2.4s, v16.4s + add v3.4s, v3.4s, v16.4s + ushr v2.4s, v2.4s, #2 + ushr v3.4s, v3.4s, #2 + uqxtn v2.4h, v2.4s + uqxtn2 v2.8h, v3.4s + str q2, [dst], #16 +3: + subs height, height, #1 + ldr d0, [src], #8 + uxtl v3.4s, v0.4h + add v3.4s, v3.4s, v16.4s + ushr v3.4s, v3.4s, #2 + uqxtn v3.4h, v3.4s + st1 {v3.4h}, [dst], x7 + + add src, src, src_stride + b.ne 1b + + ret +endfunc + +function ff_vvc_dmvr_hv_8_neon, export=1 tmp0 .req x7 tmp1 .req x8 -- 2.42.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".