Note that the performance reported by checkasm is slightly worse. This is expected since the assembler is now doing more work. --- libavcodec/riscv/h264dsp_init.c | 3 ++- libavcodec/riscv/h264dsp_rvv.S | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c index ab412a9924..9650cae66b 100644 --- a/libavcodec/riscv/h264dsp_init.c +++ b/libavcodec/riscv/h264dsp_init.c @@ -30,7 +30,8 @@ void ff_h264_v_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride, - int alpha, int beta, int8_t *tc0); + int alpha, int beta, const int8_t *tc0, + const int16_t *bS); void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); diff --git a/libavcodec/riscv/h264dsp_rvv.S b/libavcodec/riscv/h264dsp_rvv.S index 96a8a0a8a3..6bc5406ba3 100644 --- a/libavcodec/riscv/h264dsp_rvv.S +++ b/libavcodec/riscv/h264dsp_rvv.S @@ -126,9 +126,11 @@ func ff_h264_v_loop_filter_luma_8_rvv, zve32x endfunc func ff_h264_h_loop_filter_luma_8_rvv, zve32x - vsetivli zero, 4, e32, m1, ta, ma - vle8.v v4, (a4) + vsetivli zero, 4, e8, mf4, ta, ma + vle16.v v8, (a5) li t0, 0x01010101 + vluxei16.v v4, (a4), v8 + vsetivli zero, 4, e32, m1, ta, ma vzext.vf4 v6, v4 addi a0, a0, -3 vmul.vx v6, v6, t0 -- 2.45.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".