Would it be better to replace the two vsetvlstatic8 and vsetvlstatic16 with two vsetvl? This would require the previous patch and this one to work together, increasing the number of lines of code and making the code a bit harder to read. Additionally, I have a question about patch 4 'save one R-V GPR' and patch 5. Should they be submitted as a single patch? Because patch 4 looks similar to what I initially submitted, and you suggested changing it to save lines of code. If it is only for patch 5, shouldn't they be combined together?
Rémi Denis-Courmont <r...@remlab.net> 于2024年5月25日周六 23:39写道: > --- > libavcodec/riscv/vp8dsp_rvv.S | 56 ++++++++++++++++++++--------------- > 1 file changed, 32 insertions(+), 24 deletions(-) > > diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S > index a4fcd158a5..002e7f3174 100644 > --- a/libavcodec/riscv/vp8dsp_rvv.S > +++ b/libavcodec/riscv/vp8dsp_rvv.S > @@ -32,16 +32,6 @@ > .endif > .endm > > -.macro vsetvlstatic16 len > -.if \len <= 4 > - vsetivli zero, \len, e16, mf2, ta, ma > -.elseif \len <= 8 > - vsetivli zero, \len, e16, m1, ta, ma > -.elseif \len <= 16 > - vsetivli zero, \len, e16, m2, ta, ma > -.endif > -.endm > - > .macro vp8_idct_dc_add > vlse32.v v0, (a0), a2 > lh a5, 0(a1) > @@ -181,13 +171,8 @@ const subpel_filters > .byte 0, -1, 12, 123, -6, 0 > endconst > > -.macro epel len size type > -func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x > -.ifc \type,v > - addi t0, a6, -1 > -.else > - addi t0, a5, -1 > -.endif > +.macro epel_common size, type > +func ff_put_vp8_epel_\type\()\size\().rvv, zve32x > lla t2, subpel_filters > sh1add t0, t0, t0 > sh1add t0, t0, t2 > @@ -198,7 +183,6 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, > zve32x > lb t5, 5(t0) > lb t0, (t0) > .endif > - vsetvlstatic8 \len > 1: > addi a4, a4, -1 > .ifc \type,v > @@ -236,11 +220,11 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, > zve32x > vwmaccsu.vx v16, t1, v22 > vwmaccsu.vx v16, t4, v28 > vwadd.wx v16, v16, t6 > - vsetvlstatic16 \len > + vsetvl zero, zero, a6 # e16 > vwadd.vv v24, v16, v20 > vnsra.wi v24, v24, 7 > vmax.vx v24, v24, zero > - vsetvlstatic8 \len > + vsetvl zero, zero, a5 # e8 > vnclipu.wi v30, v24, 0 > add a2, a2, a3 > vse8.v v30, (a0) > @@ -251,9 +235,33 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, > zve32x > endfunc > .endm > > +.macro epel len, size, type > +func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x > +.ifc \type,v > + addi t0, a6, -1 > +.else > + addi t0, a5, -1 > +.endif > +.if \len <= 4 > + li a5, 0306 # e8, mf4, ta, ma > + li a6, 0317 # e16, mf2, ta, ma > +.elseif \len <= 8 > + li a5, 0307 # e8, mf2, ta, ma > + li a6, 0310 # e16, m1, ta, ma > +.else # if len <= 16 > + li a5, 0300 # e8, m1, ta, ma > + li a6, 0311 # e16, m2, ta, ma > +.endif > + vsetvlstatic8 \len > + j ff_put_vp8_epel_\type\()\size\().rvv > +endfunc > +.endm > + > +.irp type,h,v > +.irp size,4,6 > +epel_common \size, \type > .irp len,16,8,4 > -epel \len 6 h > -epel \len 4 h > -epel \len 6 v > -epel \len 4 v > +epel \len, \size, \type > +.endr > +.endr > .endr > -- > 2.45.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".