T-Head C908: h263dsp.dct_unquantize_inter_c: 5.7 h263dsp.dct_unquantize_inter_rvv_i32: 2.7 h263dsp.dct_unquantize_intra_c: 5.7 h263dsp.dct_unquantize_intra_rvv_i32: 3.0
SpacemiT X60: h263dsp.dct_unquantize_inter_c: 5.0 h263dsp.dct_unquantize_inter_rvv_i32: 1.2 h263dsp.dct_unquantize_intra_c: 5.0 h263dsp.dct_unquantize_intra_rvv_i32: 1.5 --- libavcodec/riscv/h263dsp_init.c | 12 +++++++++--- libavcodec/riscv/h263dsp_rvv.S | 22 ++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/libavcodec/riscv/h263dsp_init.c b/libavcodec/riscv/h263dsp_init.c index 21b536366c..5d73fde865 100644 --- a/libavcodec/riscv/h263dsp_init.c +++ b/libavcodec/riscv/h263dsp_init.c @@ -25,6 +25,7 @@ #include "libavutil/riscv/cpu.h" #include "libavcodec/h263dsp.h" +void ff_h263_dct_unquantize_rvv(int16_t *, size_t start, size_t end, int, int); void ff_h263_h_loop_filter_rvv(uint8_t *src, int stride, int q); void ff_h263_v_loop_filter_rvv(uint8_t *src, int stride, int q); @@ -33,9 +34,14 @@ av_cold void ff_h263dsp_init_riscv(H263DSPContext *c) #if HAVE_RVV int flags = av_get_cpu_flags(); - if ((flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) { - c->h263_h_loop_filter = ff_h263_h_loop_filter_rvv; - c->h263_v_loop_filter = ff_h263_v_loop_filter_rvv; + if (flags & AV_CPU_FLAG_RVV_I32) { + if (flags & AV_CPU_FLAG_RVB_ADDR) + c->h263_dct_unquantize = ff_h263_dct_unquantize_rvv; + + if (ff_rv_vlen_least(128)) { + c->h263_h_loop_filter = ff_h263_h_loop_filter_rvv; + c->h263_v_loop_filter = ff_h263_v_loop_filter_rvv; + } } #endif } diff --git a/libavcodec/riscv/h263dsp_rvv.S b/libavcodec/riscv/h263dsp_rvv.S index 97503d527c..319c51a0bb 100644 --- a/libavcodec/riscv/h263dsp_rvv.S +++ b/libavcodec/riscv/h263dsp_rvv.S @@ -20,6 +20,28 @@ #include "libavutil/riscv/asm.S" +func ff_h263_dct_unquantize_rvv, zve32x + sub a2, a2, a1 + sh1add a0, a1, a0 + addi a2, a2, 1 +1: + vsetvli t0, a2, e16, m4, ta, mu + vle16.v v8, (a0) + sub a2, a2, t0 + vmv.v.x v24, a4 + vmslt.vi v0, v8, 0 + vneg.v v24, v24, v0.t + vmsne.vi v0, v8, 0 + vwmul.vx v16, v8, a3 + vwadd.wv v16, v16, v24, v0.t + vncvt.x.x.w v8, v16 + vse16.v v8, (a0) + sh1add a0, t0, a0 + bnez a2, 1b + + ret +endfunc + .option push .option norelax func ff_h263_h_loop_filter_rvv, zve32x -- 2.45.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".