Le perjantaina 9. kesäkuuta 2023, 10.17.27 EEST Arnie Chang a écrit : > Optimize the put and avg filtering for 4xH and 2xH blocks > > Signed-off-by: Arnie Chang <arnie.ch...@sifive.com> > --- > checkasm: using random seed 3475799765 > RVVi32: > - h264chroma.chroma_mc [OK] > checkasm: all 6 tests passed > avg_h264_chroma_mc1_8_c: 1821.5 > avg_h264_chroma_mc1_8_rvv_i32: 466.5 > avg_h264_chroma_mc2_8_c: 939.2 > avg_h264_chroma_mc2_8_rvv_i32: 466.5 > avg_h264_chroma_mc4_8_c: 502.2 > avg_h264_chroma_mc4_8_rvv_i32: 466.5 > put_h264_chroma_mc1_8_c: 1436.5 > put_h264_chroma_mc1_8_rvv_i32: 382.5 > put_h264_chroma_mc2_8_c: 824.2 > put_h264_chroma_mc2_8_rvv_i32: 382.5 > put_h264_chroma_mc4_8_c: 431.2 > put_h264_chroma_mc4_8_rvv_i32: 382.5 > > libavcodec/riscv/h264_chroma_init_riscv.c | 8 + > libavcodec/riscv/h264_mc_chroma.S | 216 ++++++++++++++-------- > 2 files changed, 144 insertions(+), 80 deletions(-) > > diff --git a/libavcodec/riscv/h264_chroma_init_riscv.c > b/libavcodec/riscv/h264_chroma_init_riscv.c index 7c905edfcd..9f95150ea3 > 100644 > --- a/libavcodec/riscv/h264_chroma_init_riscv.c > +++ b/libavcodec/riscv/h264_chroma_init_riscv.c > @@ -27,6 +27,10 @@ > > void h264_put_chroma_mc8_rvv(uint8_t *p_dst, const uint8_t *p_src, > ptrdiff_t stride, int h, int x, int y); void > h264_avg_chroma_mc8_rvv(uint8_t *p_dst, const uint8_t *p_src, ptrdiff_t > stride, int h, int x, int y); +void h264_put_chroma_mc4_rvv(uint8_t *p_dst, > const uint8_t *p_src, ptrdiff_t stride, int h, int x, int y); +void > h264_avg_chroma_mc4_rvv(uint8_t *p_dst, const uint8_t *p_src, ptrdiff_t > stride, int h, int x, int y); +void h264_put_chroma_mc2_rvv(uint8_t *p_dst, > const uint8_t *p_src, ptrdiff_t stride, int h, int x, int y); +void > h264_avg_chroma_mc2_rvv(uint8_t *p_dst, const uint8_t *p_src, ptrdiff_t > stride, int h, int x, int y); > > av_cold void ff_h264chroma_init_riscv(H264ChromaContext *c, int bit_depth) > { > @@ -36,6 +40,10 @@ av_cold void ff_h264chroma_init_riscv(H264ChromaContext > *c, int bit_depth) if (bit_depth == 8 && (flags & AV_CPU_FLAG_RVV_I32) && > ff_get_rv_vlenb() >= 16) { c->put_h264_chroma_pixels_tab[0] = > h264_put_chroma_mc8_rvv; c->avg_h264_chroma_pixels_tab[0] = > h264_avg_chroma_mc8_rvv; + c->put_h264_chroma_pixels_tab[1] = > h264_put_chroma_mc4_rvv; + c->avg_h264_chroma_pixels_tab[1] = > h264_avg_chroma_mc4_rvv; + c->put_h264_chroma_pixels_tab[2] = > h264_put_chroma_mc2_rvv; + c->avg_h264_chroma_pixels_tab[2] = > h264_avg_chroma_mc2_rvv; } > #endif > } > diff --git a/libavcodec/riscv/h264_mc_chroma.S > b/libavcodec/riscv/h264_mc_chroma.S index 364bc3156e..c97cdbad86 100644 > --- a/libavcodec/riscv/h264_mc_chroma.S > +++ b/libavcodec/riscv/h264_mc_chroma.S > @@ -19,8 +19,7 @@ > */ > #include "libavutil/riscv/asm.S" > > -.macro h264_chroma_mc8 type > -func h264_\type\()_chroma_mc8_rvv, zve32x > +.macro do_chroma_mc type width unroll
It looks like \width is only ever used as AVL. You could advantageously pass it as a run-time argument to an internal function, and spare the instruction cache, instead of instantiating otherwise identical code thrice. > csrw vxrm, zero > slli t2, a5, 3 > mul t1, a5, a4 > @@ -30,94 +29,104 @@ func h264_\type\()_chroma_mc8_rvv, zve32x > sub a7, a4, t1 > addi a6, a5, 64 > sub t0, t2, t1 > - vsetivli t3, 8, e8, m1, ta, mu > + vsetivli t3, \width, e8, m1, ta, mu > beqz t1, 2f > blez a3, 8f > li t4, 0 > li t2, 0 > li t5, 1 > addi a5, t3, 1 > + .ifc \unroll,1 > slli t3, a2, 2 > + .else > + slli t3, a2, 1 > + .endif Note that all those 5-line conditional shift blocks could be simplified by folding, e.g.: slli t3, a2, (1 + \unroll) Though I wonder if we could leverage SH*ADD instructions in some cases instead of SLLI? (..) > +.endm > + > +.macro h264_chroma_mc type width > +func h264_\type\()_chroma_mc\width\()_rvv, zve32x > + .ifc \width,8 > + do_chroma_mc \type 8 1 > + .else > + li a7, 3 > + blt a3, a7, 11f > + do_chroma_mc \type \width 1 > +11: > + do_chroma_mc \type \width 0 > + .endif -- Rémi Denis-Courmont http://www.remlab.net/ _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".