Note that the performance reported by checkasm is slightly worse.
This is expected since the assembler is now doing more work.
---
 libavcodec/riscv/h264dsp_init.c | 3 ++-
 libavcodec/riscv/h264dsp_rvv.S  | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index ab412a9924..9650cae66b 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -30,7 +30,8 @@
 void ff_h264_v_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
                                       int alpha, int beta, int8_t *tc0);
 void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
-                                      int alpha, int beta, int8_t *tc0);
+                                      int alpha, int beta, const int8_t *tc0,
+                                      const int16_t *bS);
 void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride,
                                             int alpha, int beta, int8_t *tc0);
 
diff --git a/libavcodec/riscv/h264dsp_rvv.S b/libavcodec/riscv/h264dsp_rvv.S
index 96a8a0a8a3..6bc5406ba3 100644
--- a/libavcodec/riscv/h264dsp_rvv.S
+++ b/libavcodec/riscv/h264dsp_rvv.S
@@ -126,9 +126,11 @@ func ff_h264_v_loop_filter_luma_8_rvv, zve32x
 endfunc
 
 func ff_h264_h_loop_filter_luma_8_rvv, zve32x
-        vsetivli    zero, 4, e32, m1, ta, ma
-        vle8.v      v4, (a4)
+        vsetivli    zero, 4, e8, mf4, ta, ma
+        vle16.v     v8, (a5)
         li          t0, 0x01010101
+        vluxei16.v  v4, (a4), v8
+        vsetivli    zero, 4, e32, m1, ta, ma
         vzext.vf4   v6, v4
         addi        a0, a0, -3
         vmul.vx     v6, v6, t0
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to