ping. ([PATCH 1/5] lavc/vvc_mc: R-V V put_pixels is after this) <u...@foxmail.com> 于2024年9月29日周日 00:47写道:
> From: sunyuechi <sunyue...@iscas.ac.cn> > > k230 banana_f3 > dmvr_8_12x20_c: 619.3 ( 1.00x) 624.1 ( 1.00x) > dmvr_8_12x20_rvv_i32: 128.6 ( 4.82x) 103.4 ( 6.04x) > dmvr_8_20x12_c: 610.0 ( 1.00x) 665.6 ( 1.00x) > dmvr_8_20x12_rvv_i32: 137.6 ( 4.44x) 92.9 ( 7.17x) > dmvr_8_20x20_c: 1008.0 ( 1.00x) 1082.7 ( 1.00x) > dmvr_8_20x20_rvv_i32: 221.1 ( 4.56x) 155.4 ( 6.97x) > dmvr_h_8_12x20_c: 2008.0 ( 1.00x) 2009.7 ( 1.00x) > dmvr_h_8_12x20_rvv_i32: 239.6 ( 8.38x) 186.7 (10.77x) > dmvr_h_8_20x12_c: 1989.5 ( 1.00x) 2009.4 ( 1.00x) > dmvr_h_8_20x12_rvv_i32: 230.3 ( 8.64x) 155.4 (12.93x) > dmvr_h_8_20x20_c: 3304.1 ( 1.00x) 3342.9 ( 1.00x) > dmvr_h_8_20x20_rvv_i32: 378.3 ( 8.73x) 248.9 (13.43x) > dmvr_hv_8_12x20_c: 3609.8 ( 1.00x) 3603.4 ( 1.00x) > dmvr_hv_8_12x20_rvv_i32: 369.1 ( 9.78x) 322.1 (11.19x) > dmvr_hv_8_20x12_c: 3628.3 ( 1.00x) 3624.2 ( 1.00x) > dmvr_hv_8_20x12_rvv_i32: 322.8 (11.24x) 238.7 (15.19x) > dmvr_hv_8_20x20_c: 5933.8 ( 1.00x) 5936.6 ( 1.00x) > dmvr_hv_8_20x20_rvv_i32: 526.5 (11.27x) 374.1 (15.87x) > dmvr_v_8_12x20_c: 2156.3 ( 1.00x) 2155.4 ( 1.00x) > dmvr_v_8_12x20_rvv_i32: 239.6 ( 9.00x) 176.2 (12.24x) > dmvr_v_8_20x12_c: 2137.6 ( 1.00x) 2165.9 ( 1.00x) > dmvr_v_8_20x12_rvv_i32: 230.3 ( 9.28x) 155.2 (13.96x) > dmvr_v_8_20x20_c: 4183.8 ( 1.00x) 3592.9 ( 1.00x) > dmvr_v_8_20x20_rvv_i32: 369.3 (11.33x) 249.2 (14.42x) > --- > libavcodec/riscv/vvc/vvc_mc_rvv.S | 120 +++++++++++++++++++++++++++++ > libavcodec/riscv/vvc/vvcdsp_init.c | 22 ++++++ > 2 files changed, 142 insertions(+) > > diff --git a/libavcodec/riscv/vvc/vvc_mc_rvv.S > b/libavcodec/riscv/vvc/vvc_mc_rvv.S > index 18532616d9..2c634af48f 100644 > --- a/libavcodec/riscv/vvc/vvc_mc_rvv.S > +++ b/libavcodec/riscv/vvc/vvc_mc_rvv.S > @@ -285,3 +285,123 @@ endfunc > func_w_avg 128 > func_w_avg 256 > #endif > + > +func dmvr zve32x, zbb, zba > + lpad 0 > + li t0, 4 > +1: > + add t1, a1, a2 > + addi t4, a0, 128*2 > + vle8.v v0, (a1) > + vle8.v v4, (t1) > + addi a3, a3, -2 > + vwmulu.vx v16, v0, t0 > + vwmulu.vx v20, v4, t0 > + vse16.v v16, (a0) > + vse16.v v20, (t4) > + sh1add a1, a2, a1 > + add a0, a0, 128*2*2 > + bnez a3, 1b > + ret > +endfunc > + > +.macro dmvr_h_v mn, type, w, vlen > +dmvr_\type\vlen\w: > + lla t4, ff_vvc_inter_luma_dmvr_filters > + sh1add t4, \mn, t4 > + lbu t5, (t4) > + lbu t6, 1(t4) > +1: > + vsetvlstatic8 \w, \vlen > +.ifc \type,h > + addi t0, a1, 1 > + addi t1, a1, 2 > +.else > + add t0, a1, a2 > + add t1, t0, a2 > +.endif > + vle8.v v0, (a1) > + vle8.v v4, (t0) > + vle8.v v8, (t1) > + addi a3, a3, -2 > + addi t2, a0, 128*2 > + vwmulu.vx v12, v0, t5 > + vwmulu.vx v24, v4, t5 > + vwmaccu.vx v12, t6, v4 > + vwmaccu.vx v24, t6, v8 > + vsetvlstatic16 \w, \vlen > + vssrl.vi v12, v12, 2 > + vssrl.vi v24, v24, 2 > + vse16.v v12, (a0) > + vse16.v v24, (t2) > + add a0, a0, 128*4 > + sh1add a1, a2, a1 > + bnez a3, 1b > + ret > +.endm > + > +.macro dmvr_load_h dst, filter0, filter1, w, vlen > + vsetvlstatic8 \w, \vlen > + addi a6, a1, 1 > + vle8.v \dst, (a1) > + vle8.v v2, (a6) > + vwmulu.vx v4, \dst, \filter0 > + vwmaccu.vx v4, \filter1, v2 > + vsetvlstatic16 \w, \vlen > + vssrl.vi \dst, v4, 2 > +.endm > + > +.macro dmvr_hv w, vlen > +dmvr_hv\vlen\w: > + lla t0, ff_vvc_inter_luma_dmvr_filters > + sh1add t1, a4, t0 > + sh1add t2, a5, t0 > + lbu t3, (t1) // filter[mx][0] > + lbu t4, 1(t1) // filter[mx][1] > + lbu t5, (t2) // filter[my][0] > + lbu t6, 1(t2) // filter[my][1] > + dmvr_load_h v12, t3, t4, \w, \vlen > + add a1, a1, a2 > +1: > + vmul.vx v28, v12, t5 > + addi a3, a3, -1 > + dmvr_load_h v12, t3, t4, \w, \vlen > + vmacc.vx v28, t6, v12 > + vssrl.vi v28, v28, 4 > + vse16.v v28, (a0) > + add a1, a1, a2 > + addi a0, a0, 128*2 > + bnez a3, 1b > + ret > +.endm > + > +.macro func_dmvr vlen, name > +func ff_vvc_\name\()_8_rvv_\vlen\(), zve32x, zbb, zba > + lpad 0 > + li t0, 20 > + beq a6, t0, DMVR\name\vlen\()20 > + .irp w,12,20 > +DMVR\name\vlen\w: > + .ifc \name, dmvr > + vsetvlstatic8 \w, \vlen > + j \name > + .else > + csrwi vxrm, 0 > + j \name\()\vlen\w > + .endif > + .endr > +endfunc > +.endm > + > + > +.irp vlen,256,128 > +.irp w,12,20 > +dmvr_h_v a4, h, \w, \vlen > +dmvr_h_v a5, v, \w, \vlen > +dmvr_hv \w, \vlen > +.endr > +func_dmvr \vlen, dmvr > +func_dmvr \vlen, dmvr_h > +func_dmvr \vlen, dmvr_v > +func_dmvr \vlen, dmvr_hv > +.endr > diff --git a/libavcodec/riscv/vvc/vvcdsp_init.c > b/libavcodec/riscv/vvc/vvcdsp_init.c > index ac1e7dda7d..7df3ce58db 100644 > --- a/libavcodec/riscv/vvc/vvcdsp_init.c > +++ b/libavcodec/riscv/vvc/vvcdsp_init.c > @@ -37,6 +37,26 @@ void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t > dst_stride, > AVG_PROTOTYPES(8, rvv_128) > AVG_PROTOTYPES(8, rvv_256) > > +#define DMVR_PROTOTYPES(bd, opt) > \ > +void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t > src_stride, \ > + int height, intptr_t mx, intptr_t my, int width); > \ > +void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, > ptrdiff_t src_stride, \ > + int height, intptr_t mx, intptr_t my, int width); > \ > +void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, > ptrdiff_t src_stride, \ > + int height, intptr_t mx, intptr_t my, int width); > \ > +void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, > ptrdiff_t src_stride, \ > + int height, intptr_t mx, intptr_t my, int width); > \ > + > +DMVR_PROTOTYPES(8, rvv_128) > +DMVR_PROTOTYPES(8, rvv_256) > + > +#define DMVR_INIT(bd, opt) do { \ > + c->inter.dmvr[0][0] = ff_vvc_dmvr_##bd##_##opt; \ > + c->inter.dmvr[0][1] = ff_vvc_dmvr_h_##bd##_##opt; \ > + c->inter.dmvr[1][0] = ff_vvc_dmvr_v_##bd##_##opt; \ > + c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_##bd##_##opt; \ > +} while (0) > + > void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd) > { > #if HAVE_RVV > @@ -51,6 +71,7 @@ void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const > int bd) > # if (__riscv_xlen == 64) > c->inter.w_avg = ff_vvc_w_avg_8_rvv_256; > # endif > + DMVR_INIT(8, rvv_256); > break; > default: > break; > @@ -63,6 +84,7 @@ void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const > int bd) > # if (__riscv_xlen == 64) > c->inter.w_avg = ff_vvc_w_avg_8_rvv_128; > # endif > + DMVR_INIT(8, rvv_128); > break; > default: > break; > -- > 2.46.2 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".