Okay, updated it in the reply

Rémi Denis-Courmont <r...@remlab.net> 于2024年2月13日周二 03:54写道:

> Hi,
>
> To avoid repeating the code, you can either use .repr or .irp. You can
> even
> use assembler conditionals to elide the redundant code on the last
> iteration.
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
From d44e7ec0950ddb431aa75992cfa12d7687d39514 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyue...@iscas.ac.cn>
Date: Fri, 2 Feb 2024 12:50:07 +0800
Subject: [PATCH 2/3] lavc/vp8dsp: R-V V vp8_idct_dc_add4y

c908:
vp8_idct_dc_add4y_c: 368.5
vp8_idct_dc_add4y_rvv_i32: 134.5
---
 libavcodec/riscv/vp8dsp_init.c |  2 ++
 libavcodec/riscv/vp8dsp_rvv.S  | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c
index ab020070eb..6615d3d440 100644
--- a/libavcodec/riscv/vp8dsp_init.c
+++ b/libavcodec/riscv/vp8dsp_init.c
@@ -26,6 +26,7 @@
 #include "libavcodec/vp8dsp.h"
 
 void ff_vp8_idct_dc_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add4y_rvv(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
 
 av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
 {
@@ -34,6 +35,7 @@ av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
 
     if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
         c->vp8_idct_dc_add = ff_vp8_idct_dc_add_rvv;
+        c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_rvv;
     }
 #endif
 }
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index 8609b79937..a5a22188c1 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -36,9 +36,25 @@
         vsse32.v      v0, (a0), a2
 .endm
 
+.macro vp8_idct_dc_addy
+        vp8_idct_dc_add
+        addi          a0, a0, 4
+        addi          a1, a1, 32
+.endm
+
 func ff_vp8_idct_dc_add_rvv, zve32x
         vsetivli      zero, 4, e8, mf4, ta, ma
         vp8_idct_dc_add
 
         ret
 endfunc
+
+func ff_vp8_idct_dc_add4y_rvv, zve32x
+        vsetivli      zero, 4, e8, mf4, ta, ma
+        .rept 3
+        vp8_idct_dc_addy
+        .endr
+        vp8_idct_dc_add
+
+        ret
+endfunc
-- 
2.43.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to