Considering the marginality of the measured performance gains (3-4%), I suppose that we should not merge this. Furthermore those measurements are not expected to improve with large vector sizes, since the code uses only 32 bits per vector no matter what.
deemphasis_c: 7703.2 deemphasis_rvv_f32: 7452.0 --- libavcodec/riscv/opusdsp_init.c | 10 +++++--- libavcodec/riscv/opusdsp_rvv.S | 43 +++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/libavcodec/riscv/opusdsp_init.c b/libavcodec/riscv/opusdsp_init.c index 88d8e77f0e..8d363aaf37 100644 --- a/libavcodec/riscv/opusdsp_init.c +++ b/libavcodec/riscv/opusdsp_init.c @@ -26,14 +26,18 @@ #include "libavcodec/opusdsp.h" void ff_opus_postfilter_rvv(float *data, int period, float *g, int len); +float ff_opus_deemphasis_rvv(float *y, float *x, float coeff, int len); av_cold void ff_opus_dsp_init_riscv(OpusDSP *d) { #if HAVE_RVV int flags = av_get_cpu_flags(); - if ((flags & AV_CPU_FLAG_RVV_F32) && (flags & AV_CPU_FLAG_RVB_ADDR) && - (flags & AV_CPU_FLAG_RVB_BASIC)) - d->postfilter = ff_opus_postfilter_rvv; + if (flags & AV_CPU_FLAG_RVV_F32) { + if ((flags & AV_CPU_FLAG_RVB_ADDR) && (flags & AV_CPU_FLAG_RVB_BASIC)) + d->postfilter = ff_opus_postfilter_rvv; + if (ff_get_rv_vlenb() >= 8) + d->deemphasis = ff_opus_deemphasis_rvv; + } #endif } diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S index 79ae86c30e..839edfa4b0 100644 --- a/libavcodec/riscv/opusdsp_rvv.S +++ b/libavcodec/riscv/opusdsp_rvv.S @@ -64,3 +64,46 @@ func ff_opus_postfilter_rvv, zve32f ret endfunc + +// FIXME: Zvl64b +func ff_opus_deemphasis_rvv, zve32f + li t0, 0x3f599a00 // 0.85f + li t1, 8 +NOHWF fmv.w.x fa0, a2 +NOHWF mv a2, a3 + vsetivli zero, 1, e32, mf2, ta, ma + vmv.s.x v8, t0 + fmv.w.x ft0, t0 + blt a2, t1, 2f +1: + vlseg8e32.v v0, (a1) + addi a2, a2, -8 + vfmacc.vf v0, fa0, v8 + addi a1, a1, 8 * 4 + vfmacc.vf v1, ft0, v0 + vfmacc.vf v2, ft0, v1 + vfmacc.vf v3, ft0, v2 + vfmacc.vf v4, ft0, v3 + vfmacc.vf v5, ft0, v4 + vfmacc.vf v6, ft0, v5 + vfmacc.vf v7, ft0, v6 + vfmv.f.s fa0, v7 + vsseg8e32.v v0, (a0) + addi a0, a0, 8 * 4 + bge a2, t1, 1b +2: + beqz a2, 4f +3: + flw fa1, (a1) + addi a2, a2, -1 + fmadd.s fa0, ft0, fa0, fa1 + addi a1, a1, 4 + fsw fa0, (a0) + addi a0, a0, 4 + bnez a2, 3b +4: + ret + +NOHWF fmv.x.w a0, fa0 + ret +endfunc -- 2.42.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".