> Copying vectors is rarely justified - mostly only before destructive > instructions such as FMA.
It is slightly different from VP8. In VP8, many scalar values are positive, so the related calculations can be easily replaced. However, in this context of VP9, since t2 is a negative number, vwmaccsu is required. Therefore, unlike the logic in VP8, we cannot use vwmulu.vx before bilin_load to avoid vmv. <u...@foxmail.com> 于2024年6月15日周六 19:51写道: > From: sunyuechi <sunyue...@iscas.ac.cn> > > C908 X60 > vp9_avg_bilin_4hv_8bpp_c : 10.7 9.5 > vp9_avg_bilin_4hv_8bpp_rvv_i32 : 4.0 3.5 > vp9_avg_bilin_8hv_8bpp_c : 38.5 34.2 > vp9_avg_bilin_8hv_8bpp_rvv_i32 : 7.2 6.5 > vp9_avg_bilin_16hv_8bpp_c : 147.2 130.5 > vp9_avg_bilin_16hv_8bpp_rvv_i32 : 14.5 12.7 > vp9_avg_bilin_32hv_8bpp_c : 574.2 509.7 > vp9_avg_bilin_32hv_8bpp_rvv_i32 : 42.5 38.0 > vp9_avg_bilin_64hv_8bpp_c : 2321.2 2017.7 > vp9_avg_bilin_64hv_8bpp_rvv_i32 : 163.5 131.0 > vp9_put_bilin_4hv_8bpp_c : 10.0 8.7 > vp9_put_bilin_4hv_8bpp_rvv_i32 : 3.5 3.0 > vp9_put_bilin_8hv_8bpp_c : 35.2 31.2 > vp9_put_bilin_8hv_8bpp_rvv_i32 : 6.5 5.7 > vp9_put_bilin_16hv_8bpp_c : 134.0 119.0 > vp9_put_bilin_16hv_8bpp_rvv_i32 : 12.7 11.5 > vp9_put_bilin_32hv_8bpp_c : 538.5 464.2 > vp9_put_bilin_32hv_8bpp_rvv_i32 : 39.7 35.2 > vp9_put_bilin_64hv_8bpp_c : 2111.7 1833.2 > vp9_put_bilin_64hv_8bpp_rvv_i32 : 138.5 122.5 > --- > libavcodec/riscv/vp9_mc_rvv.S | 38 +++++++++++++++++++++++++++++++++- > libavcodec/riscv/vp9dsp_init.c | 10 +++++++++ > 2 files changed, 47 insertions(+), 1 deletion(-) > > diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S > index fb7377048a..5241562531 100644 > --- a/libavcodec/riscv/vp9_mc_rvv.S > +++ b/libavcodec/riscv/vp9_mc_rvv.S > @@ -147,6 +147,40 @@ func ff_\op\()_vp9_bilin_64\type\()_rvv, zve32x > endfunc > .endm > > +.macro bilin_hv op > +func ff_\op\()_vp9_bilin_64hv_rvv, zve32x > + vsetvlstatic8 64, t0, 64 > +.Lbilin_hv\op: > +.ifc \op,avg > + csrwi vxrm, 0 > +.endif > + neg t1, a5 > + neg t2, a6 > + li t4, 8 > + bilin_load_h v24, put, a5 > + add a2, a2, a3 > +1: > + addi a4, a4, -1 > + bilin_load_h v4, put, a5 > + vwmulu.vx v16, v4, a6 > + vwmaccsu.vx v16, t2, v24 > + vwadd.wx v16, v16, t4 > + vnsra.wi v16, v16, 4 > + vadd.vv v0, v16, v24 > +.ifc \op,avg > + vle8.v v16, (a0) > + vaaddu.vv v0, v0, v16 > +.endif > + vse8.v v0, (a0) > + vmv.v.v v24, v4 > + add a2, a2, a3 > + add a0, a0, a1 > + bnez a4, 1b > + > + ret > +endfunc > +.endm > + > .irp len, 64, 32, 16, 8, 4 > copy_avg \len > .endr > @@ -155,6 +189,8 @@ bilin_h_v put, h, a5 > bilin_h_v avg, h, a5 > bilin_h_v put, v, a6 > bilin_h_v avg, v, a6 > +bilin_hv put > +bilin_hv avg > > .macro func_bilin_h_v len, op, type > func ff_\op\()_vp9_bilin_\len\()\type\()_rvv, zve32x > @@ -165,7 +201,7 @@ endfunc > > .irp len, 32, 16, 8, 4 > .irp op, put, avg > - .irp type, h, v > + .irp type, h, v, hv > func_bilin_h_v \len, \op, \type > .endr > .endr > diff --git a/libavcodec/riscv/vp9dsp_init.c > b/libavcodec/riscv/vp9dsp_init.c > index 9606d8545f..b3700dfb08 100644 > --- a/libavcodec/riscv/vp9dsp_init.c > +++ b/libavcodec/riscv/vp9dsp_init.c > @@ -83,6 +83,16 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext > *dsp, int bpp) > dsp->mc[4][FILTER_BILINEAR ][0][1][0] = ff_put_vp9_bilin_4h_rvv; > dsp->mc[4][FILTER_BILINEAR ][1][0][1] = ff_avg_vp9_bilin_4v_rvv; > dsp->mc[4][FILTER_BILINEAR ][1][1][0] = ff_avg_vp9_bilin_4h_rvv; > + dsp->mc[0][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_64hv_rvv; > + dsp->mc[0][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_64hv_rvv; > + dsp->mc[1][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_32hv_rvv; > + dsp->mc[1][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_32hv_rvv; > + dsp->mc[2][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_16hv_rvv; > + dsp->mc[2][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_16hv_rvv; > + dsp->mc[3][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_8hv_rvv; > + dsp->mc[3][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_8hv_rvv; > + dsp->mc[4][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_4hv_rvv; > + dsp->mc[4][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_4hv_rvv; > > #undef init_fpel > } > -- > 2.45.2 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".