From a59509c554a319f8271ad4175da40788445f7a56 Mon Sep 17 00:00:00 2001 From: sunyuechi <sunyue...@iscas.ac.cn> Date: Thu, 21 Mar 2024 17:49:54 +0800 Subject: [PATCH 2/3] lavc/vp8dsp: R-V V put_epel v
C908: vp8_put_epel4_v4_c: 11.0 vp8_put_epel4_v4_rvv_i32: 5.0 vp8_put_epel4_v6_c: 16.5 vp8_put_epel4_v6_rvv_i32: 6.2 vp8_put_epel8_v4_c: 43.7 vp8_put_epel8_v4_rvv_i32: 11.2 vp8_put_epel8_v6_c: 68.7 vp8_put_epel8_v6_rvv_i32: 13.2 vp8_put_epel16_v4_c: 92.5 vp8_put_epel16_v4_rvv_i32: 13.7 vp8_put_epel16_v6_c: 135.7 vp8_put_epel16_v6_rvv_i32: 16.5 --- libavcodec/riscv/vp8dsp_init.c | 7 ++++++ libavcodec/riscv/vp8dsp_rvv.S | 44 +++++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c index 6614d661f7..2f123b67fe 100644 --- a/libavcodec/riscv/vp8dsp_init.c +++ b/libavcodec/riscv/vp8dsp_init.c @@ -85,6 +85,13 @@ av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c) c->put_vp8_epel_pixels_tab[0][0][1] = ff_put_vp8_epel16_h4_rvv; c->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_rvv; c->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_rvv; + + c->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_rvv; + c->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_rvv; + c->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_rvv; + c->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_rvv; + c->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_rvv; + c->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_rvv; } #endif } diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S index a0dd46e3a8..134154acfc 100644 --- a/libavcodec/riscv/vp8dsp_rvv.S +++ b/libavcodec/riscv/vp8dsp_rvv.S @@ -233,9 +233,13 @@ subpel_filters: .byte 1, -8, 36, 108, -11, 2 .byte 0, -1, 12, 123, -6, 0 -.macro epel_filter size +.macro epel_filter size type lla t2, subpel_filters +.ifc \type,v + addi t0, a6, -1 +.elseif \type == h addi t0, a5, -1 +.endif li t1, 6 mul t0, t0, t1 add t0, t0, t2 @@ -248,19 +252,33 @@ subpel_filters: .endif .endm -.macro epel_load dst len size +.macro epel_load dst len size type +.ifc \type,v + sub t6, a2, a3 + add a7, a2, a3 +.elseif \type == h addi t6, a2, -1 addi a7, a2, 1 +.endif vle8.v v24, (a2) vle8.v v22, (t6) vle8.v v26, (a7) +.ifc \type,v + add a7, a7, a3 +.elseif \type == h addi a7, a7, 1 +.endif vle8.v v28, (a7) vwmulu.vx v16, v24, t2 vwmulu.vx v20, v26, t3 .ifc \size,6 +.ifc \type,v + sub t6, t6, a3 + add a7, a7, a3 +.elseif \type == h addi t6, t6, -1 addi a7, a7, 1 +.endif vle8.v v24, (t6) vle8.v v26, (a7) vwmaccu.vx v16, t0, v24 @@ -292,13 +310,13 @@ subpel_filters: vnclipu.wi \dst, v24, 0 .endm -.macro epel_load_inc dst len size - epel_load \dst \len \size +.macro epel_load_inc dst len size type + epel_load \dst \len \size \type add a2, a2, a3 .endm -.macro epel len size - epel_filter \size +.macro epel len size type + epel_filter \size \type .ifc \len,4 vsetivli zero, 4, e8, mf4, ta, ma @@ -310,7 +328,7 @@ subpel_filters: 1: addi a4, a4, -1 - epel_load_inc v30 \len \size + epel_load_inc v30 \len \size \type vse8.v v30, (a0) add a0, a0, a1 bnez a4, 1b @@ -320,10 +338,18 @@ subpel_filters: .irp len 16,8,4 func ff_put_vp8_epel\len\()_h6_rvv, zve32x - epel \len 6 + epel \len 6 h endfunc func ff_put_vp8_epel\len\()_h4_rvv, zve32x - epel \len 4 + epel \len 4 h +endfunc + +func ff_put_vp8_epel\len\()_v6_rvv, zve32x + epel \len 6 v +endfunc + +func ff_put_vp8_epel\len\()_v4_rvv, zve32x + epel \len 4 v endfunc .endr -- 2.44.0
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".