The branch, master has been updated
via 7435a3180d3381e20c1ceb184242b48b3eac687e (commit)
via 39abb1ac9421004dea61dc82ddbc8c187b9d6a65 (commit)
via c17d304e1f9bc0f96170cf3c8fba28a165050413 (commit)
via e3b0d58394484def810aca712d090be000ddeece (commit)
from df1fd43db4e20145f6c60b03a5bdeb02b3e8235a (commit)
- Log -----------------------------------------------------------------
commit 7435a3180d3381e20c1ceb184242b48b3eac687e
Author: Rémi Denis-Courmont <[email protected]>
AuthorDate: Sun Oct 5 16:03:04 2025 +0300
Commit: Rémi Denis-Courmont <[email protected]>
CommitDate: Fri Nov 7 08:43:23 2025 +0000
riscv/cpu: add av_const for VLEN helpers
This is read from a read-only constant system register.
diff --git a/libavutil/riscv/cpu.h b/libavutil/riscv/cpu.h
index f2e6b7b430..66e86a3527 100644
--- a/libavutil/riscv/cpu.h
+++ b/libavutil/riscv/cpu.h
@@ -45,7 +45,7 @@ static inline av_const bool ff_rv_zbb_support(void)
* Returns the vector size in bytes (always a power of two and at least 4).
* This is undefined behaviour if vectors are not implemented.
*/
-static inline size_t ff_get_rv_vlenb(void)
+static inline av_const size_t ff_get_rv_vlenb(void)
{
size_t vlenb;
@@ -61,7 +61,7 @@ static inline size_t ff_get_rv_vlenb(void)
* Checks that the vector bit-size is at least the given value.
* This is potentially undefined behaviour if vectors are not implemented.
*/
-static inline bool ff_rv_vlen_least(unsigned int bits)
+static inline av_const bool ff_rv_vlen_least(unsigned int bits)
{
#ifdef __riscv_v_min_vlen
if (bits <= __riscv_v_min_vlen)
commit 39abb1ac9421004dea61dc82ddbc8c187b9d6a65
Author: Rémi Denis-Courmont <[email protected]>
AuthorDate: Sun Oct 5 15:16:03 2025 +0300
Commit: Rémi Denis-Courmont <[email protected]>
CommitDate: Fri Nov 7 08:43:23 2025 +0000
pixblockdsp: avoid segments on R-V V diff_pixels_unaligned
On SpacemiT X86, before:
diff_pixels_unaligned_rvv_i32: 250.2 ( 0.59x)
...after:
diff_pixels_unaligned_rvv_i32: 56.9 ( 2.60x)
diff --git a/libavcodec/riscv/pixblockdsp_rvv.S
b/libavcodec/riscv/pixblockdsp_rvv.S
index 89023c3320..78dcddf33a 100644
--- a/libavcodec/riscv/pixblockdsp_rvv.S
+++ b/libavcodec/riscv/pixblockdsp_rvv.S
@@ -66,17 +66,18 @@ endfunc
func ff_diff_pixels_unaligned_rvv, zve32x
lpad 0
+ li t3, 8
vsetivli zero, 8, e8, mf2, ta, ma
- vlsseg8e8.v v16, (a1), a3
- vlsseg8e8.v v24, (a2), a3
+1:
+ vle8.v v16, (a1)
+ add a1, a1, a3
+ vle8.v v24, (a2)
+ add a2, a2, a3
vwsubu.vv v8, v16, v24
- vwsubu.vv v9, v17, v25
- vwsubu.vv v10, v18, v26
- vwsubu.vv v11, v19, v27
- vwsubu.vv v12, v20, v28
- vwsubu.vv v13, v21, v29
- vwsubu.vv v14, v22, v30
- vwsubu.vv v15, v23, v31
- vsseg8e16.v v8, (a0)
+ addi t3, t3, -1
+ vse16.v v8, (a0)
+ addi a0, a0, 2 * 8
+ bnez t3, 1b
+
ret
endfunc
commit c17d304e1f9bc0f96170cf3c8fba28a165050413
Author: Rémi Denis-Courmont <[email protected]>
AuthorDate: Sun Oct 5 15:16:03 2025 +0300
Commit: Rémi Denis-Courmont <[email protected]>
CommitDate: Fri Nov 7 08:43:23 2025 +0000
pixblockdsp: avoid segments on R-V V get_pixels_unaligned
On SpacemiT X86, before:
get_pixels_unaligned_rvv_i32: 172.4 ( 0.37x)
...after:
get_pixels_unaligned_rvv_i32: 34.4 ( 1.84x)
diff --git a/libavcodec/riscv/pixblockdsp_rvv.S
b/libavcodec/riscv/pixblockdsp_rvv.S
index 4425227c9b..89023c3320 100644
--- a/libavcodec/riscv/pixblockdsp_rvv.S
+++ b/libavcodec/riscv/pixblockdsp_rvv.S
@@ -33,17 +33,22 @@ endfunc
func ff_get_pixels_unaligned_8_rvv, zve32x
lpad 0
+ li t2, 8
vsetivli zero, 8, e8, mf2, ta, ma
- vlsseg8e8.v v16, (a1), a2
+1:
+ add t1, a1, a2
+ vle8.v v16, (a1)
+ addi t0, a0, 2 * 8
+ vle8.v v17, (t1)
+ addi t2, t2, -2
vwcvtu.x.x.v v8, v16
vwcvtu.x.x.v v9, v17
- vwcvtu.x.x.v v10, v18
- vwcvtu.x.x.v v11, v19
- vwcvtu.x.x.v v12, v20
- vwcvtu.x.x.v v13, v21
- vwcvtu.x.x.v v14, v22
- vwcvtu.x.x.v v15, v23
- vsseg8e16.v v8, (a0)
+ vse16.v v8, (a0)
+ add a1, t1, a2
+ vse16.v v9, (t0)
+ addi a0, t0, 2 * 8
+ bnez t2, 1b
+
ret
endfunc
commit e3b0d58394484def810aca712d090be000ddeece
Author: Rémi Denis-Courmont <[email protected]>
AuthorDate: Sun Oct 5 15:08:06 2025 +0300
Commit: Rémi Denis-Courmont <[email protected]>
CommitDate: Fri Nov 7 08:43:23 2025 +0000
Revert "lavc/pixblockdsp: rework R-V V get_pixels_unaligned"
The optimised version does not work if the stride is not a multiple 8,
which can occur as reproduce by vsynth3-asv1 and vsynth3-asv2 tests.
This reverts commit 02594c8c011ca2cdc20334e9bb812ec8f6f37cf3.
Conflicts:
libavcodec/riscv/pixblockdsp_init.c
libavcodec/riscv/pixblockdsp_rvv.S
diff --git a/libavcodec/riscv/pixblockdsp_init.c
b/libavcodec/riscv/pixblockdsp_init.c
index 083873c0b0..8041656953 100644
--- a/libavcodec/riscv/pixblockdsp_init.c
+++ b/libavcodec/riscv/pixblockdsp_init.c
@@ -65,15 +65,18 @@ av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext
*c,
if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
c->diff_pixels = ff_diff_pixels_unaligned_rvv;
c->diff_pixels_unaligned = ff_diff_pixels_unaligned_rvv;
- }
- if ((cpu_flags & AV_CPU_FLAG_RVV_I64) && ff_get_rv_vlenb() >= 16) {
if (!high_bit_depth) {
- c->get_pixels = ff_get_pixels_8_rvv;
+ c->get_pixels = ff_get_pixels_unaligned_8_rvv;
c->get_pixels_unaligned = ff_get_pixels_unaligned_8_rvv;
}
- c->diff_pixels = ff_diff_pixels_rvv;
+ if (cpu_flags & AV_CPU_FLAG_RVV_I64) {
+ if (!high_bit_depth)
+ c->get_pixels = ff_get_pixels_8_rvv;
+
+ c->diff_pixels = ff_diff_pixels_rvv;
+ }
}
#endif
#endif
diff --git a/libavcodec/riscv/pixblockdsp_rvv.S
b/libavcodec/riscv/pixblockdsp_rvv.S
index 85233470cf..4425227c9b 100644
--- a/libavcodec/riscv/pixblockdsp_rvv.S
+++ b/libavcodec/riscv/pixblockdsp_rvv.S
@@ -24,7 +24,6 @@ func ff_get_pixels_8_rvv, zve64x
lpad 0
vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8
-1:
vlse64.v v16, (a1), a2
vsetvli zero, t0, e8, m4, ta, ma
vwcvtu.x.x.v v8, v16
@@ -32,24 +31,19 @@ func ff_get_pixels_8_rvv, zve64x
ret
endfunc
-func ff_get_pixels_unaligned_8_rvv, zve64x
+func ff_get_pixels_unaligned_8_rvv, zve32x
lpad 0
- andi t1, a1, 7
- vsetivli zero, 8, e64, m4, ta, ma
- li t0, 8 * 8
- beqz t1, 1b
- andi a1, a1, -8
- slli t2, t1, 3
- addi t1, a1, 8
- sub t3, t0, t2
- vlse64.v v16, (a1), a2
- vlse64.v v24, (t1), a2
- vsrl.vx v16, v16, t2
- vsll.vx v24, v24, t3
- vor.vv v16, v16, v24
- vsetvli zero, t0, e8, m4, ta, ma
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vlsseg8e8.v v16, (a1), a2
vwcvtu.x.x.v v8, v16
- vse16.v v8, (a0)
+ vwcvtu.x.x.v v9, v17
+ vwcvtu.x.x.v v10, v18
+ vwcvtu.x.x.v v11, v19
+ vwcvtu.x.x.v v12, v20
+ vwcvtu.x.x.v v13, v21
+ vwcvtu.x.x.v v14, v22
+ vwcvtu.x.x.v v15, v23
+ vsseg8e16.v v8, (a0)
ret
endfunc
-----------------------------------------------------------------------
Summary of changes:
libavcodec/riscv/pixblockdsp_init.c | 11 +++++---
libavcodec/riscv/pixblockdsp_rvv.S | 52 ++++++++++++++++++-------------------
libavutil/riscv/cpu.h | 4 +--
3 files changed, 35 insertions(+), 32 deletions(-)
hooks/post-receive
--
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]