[FFmpeg-cvslog] swscale/aarch64: Add bgr24 to yuv

2024-07-05 Thread Zhao Zhili
ffmpeg | branch: master | Zhao Zhili  | Mon Jun 24 
21:02:11 2024 +0800| [b8b71be07a7969e9c450bc7f61b7d6135f60e61c] | committer: 
Zhao Zhili

swscale/aarch64: Add bgr24 to yuv

Test on Apple M1 with kperf
: -O3   : -O3 -fno-vectorize
bgr24_to_uv_8_c : 28.5  : 52.5
bgr24_to_uv_8_neon  : 54.5  : 59.7
bgr24_to_uv_128_c   : 294.0 : 830.7
bgr24_to_uv_128_neon: 99.7  : 112.0
bgr24_to_uv_1080_c  : 965.0 : 6624.0
bgr24_to_uv_1080_neon   : 751.5 : 754.7
bgr24_to_uv_1920_c  : 1693.2: 11554.5
bgr24_to_uv_1920_neon   : 1292.5: 1307.5
bgr24_to_uv_half_8_c: 54.2  : 37.0
bgr24_to_uv_half_8_neon : 27.2  : 22.5
bgr24_to_uv_half_128_c  : 127.2 : 392.5
bgr24_to_uv_half_128_neon   : 63.0  : 52.0
bgr24_to_uv_half_1080_c : 880.2 : 3329.0
bgr24_to_uv_half_1080_neon  : 401.5 : 390.7
bgr24_to_uv_half_1920_c : 1585.7: 6390.7
bgr24_to_uv_half_1920_neon  : 694.7 : 698.7
bgr24_to_y_8_c  : 21.7  : 22.5
bgr24_to_y_8_neon   : 797.2 : 25.5
bgr24_to_y_128_c: 88.0  : 280.5
bgr24_to_y_128_neon : 63.7  : 55.0
bgr24_to_y_1080_c   : 616.7 : 2208.7
bgr24_to_y_1080_neon: 900.0 : 452.0
bgr24_to_y_1920_c   : 1093.2: 3894.7
bgr24_to_y_1920_neon: 777.2 : 767.5

Signed-off-by: Zhao Zhili 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b8b71be07a7969e9c450bc7f61b7d6135f60e61c
---

 libswscale/aarch64/input.S   | 71 
 libswscale/aarch64/swscale.c | 32 
 2 files changed, 71 insertions(+), 32 deletions(-)

diff --git a/libswscale/aarch64/input.S b/libswscale/aarch64/input.S
index 33afa34111..2cfec4cb6a 100644
--- a/libswscale/aarch64/input.S
+++ b/libswscale/aarch64/input.S
@@ -20,7 +20,7 @@
 
 #include "libavutil/aarch64/asm.S"
 
-.macro rgb24_to_yuv_load_rgb, src
+.macro rgb_to_yuv_load_rgb src
 ld3 { v16.16b, v17.16b, v18.16b }, [\src]
 uxtlv19.8h, v16.8b // v19: r
 uxtlv20.8h, v17.8b // v20: g
@@ -30,7 +30,7 @@
 uxtl2   v24.8h, v18.16b// v24: b
 .endm
 
-.macro rgb24_to_yuv_product, r, g, b, dst1, dst2, dst, coef0, coef1, coef2, 
right_shift
+.macro rgb_to_yuv_product r, g, b, dst1, dst2, dst, coef0, coef1, coef2, 
right_shift
 mov \dst1\().16b, v6.16b// dst1 = 
const_offset
 mov \dst2\().16b, v6.16b// dst2 = 
const_offset
 smlal   \dst1\().4s, \coef0\().4h, \r\().4h // dst1 += rx 
* r
@@ -43,12 +43,20 @@
 sqshrn2 \dst\().8h, \dst2\().4s, \right_shift   // 
dst_higher_half = dst2 >> right_shift
 .endm
 
+function ff_bgr24ToY_neon, export=1
+cmp w4, #0  // check width > 0
+ldp w12, w11, [x5]  // w12: ry, w11: gy
+ldr w10, [x5, #8]   // w10: by
+b.gt4f
+ret
+endfunc
+
 function ff_rgb24ToY_neon, export=1
 cmp w4, #0  // check width > 0
 ldp w10, w11, [x5]  // w10: ry, w11: gy
 ldr w12, [x5, #8]   // w12: by
 b.le3f
-
+4:
 mov w9, #256// w9 = 1 << (RGB2YUV_SHIFT - 
7)
 movkw9, #8, lsl #16 // w9 += 32 << (RGB2YUV_SHIFT 
- 1)
 dup v6.4s, w9   // w9: const_offset
@@ -59,9 +67,9 @@ function ff_rgb24ToY_neon, export=1
 dup v2.8h, w12
 b.lt2f
 1:
-rgb24_to_yuv_load_rgb x1
-rgb24_to_yuv_product v19, v20, v21, v25, v26, v16, v0, v1, v2, #9
-rgb24_to_yuv_product v22, v23, v24, v27, v28, v17, v0, v1, v2, #9
+rgb_to_yuv_load_rgb x1
+rgb_to_yuv_product v19, v20, v21, v25, v26, v16, v0, v1, v2, #9
+rgb_to_yuv_product v22, v23, v24, v27, v28, v17, v0, v1, v2, #9
 sub w4, w4, #16 // width -= 16
 add x1, x1, #48 // src += 48
 cmp w4, #16 // width >= 16 ?
@@ -85,10 +93,7 @@ function ff_rgb24ToY_neon, export=1
 ret
 endfunc
 
-.macro rgb24_load_uv_coeff half
-ldp w10, w11, [x6, #12] // w10: ru, w11: gu
-ldp w12, w13, [x6, #20] // w12: bu, w13: rv
-ldp w14, w15, [x6, #28] // w14: gv, w15: bv
+.macro rgb_set_uv_coeff half
 .if \half
 mov w9, #512
 movkw9, #128, lsl #16   

[FFmpeg-cvslog] swscale/aarch64: Add argb/abgr to yuv

2024-07-05 Thread Zhao Zhili
ffmpeg | branch: master | Zhao Zhili  | Mon Jun 24 
21:02:13 2024 +0800| [4d90a76986cf5ef80266ce845679321b68e3412d] | committer: 
Zhao Zhili

swscale/aarch64: Add argb/abgr to yuv

Test on Apple M1 with kperf:
: -O3   : -O3 -fno-vectorize
abgr_to_uv_8_c  : 19.4  : 26.1
abgr_to_uv_8_neon   : 29.9  : 51.1
abgr_to_uv_128_c: 146.4 : 558.9
abgr_to_uv_128_neon : 85.1  : 83.4
abgr_to_uv_1080_c   : 1162.6: 4786.4
abgr_to_uv_1080_neon: 819.6 : 826.6
abgr_to_uv_1920_c   : 2063.6: 8492.1
abgr_to_uv_1920_neon: 1435.1: 1447.1
abgr_to_uv_half_8_c : 16.4  : 11.4
abgr_to_uv_half_8_neon  : 35.6  : 20.4
abgr_to_uv_half_128_c   : 108.6 : 359.4
abgr_to_uv_half_128_neon: 75.4  : 42.6
abgr_to_uv_half_1080_c  : 883.4 : 2885.6
abgr_to_uv_half_1080_neon   : 460.6 : 481.1
abgr_to_uv_half_1920_c  : 1553.6: 5106.9
abgr_to_uv_half_1920_neon   : 817.6 : 820.4
abgr_to_y_8_c   : 6.1   : 26.4
abgr_to_y_8_neon: 40.6  : 6.4
abgr_to_y_128_c : 99.9  : 390.1
abgr_to_y_128_neon  : 67.4  : 55.9
abgr_to_y_1080_c: 735.9 : 3170.4
abgr_to_y_1080_neon : 534.6 : 536.6
abgr_to_y_1920_c: 1279.4: 6016.4
abgr_to_y_1920_neon : 932.6 : 927.6

Signed-off-by: Zhao Zhili 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4d90a76986cf5ef80266ce845679321b68e3412d
---

 libswscale/aarch64/input.S   | 86 +---
 libswscale/aarch64/swscale.c | 17 +
 2 files changed, 82 insertions(+), 21 deletions(-)

diff --git a/libswscale/aarch64/input.S b/libswscale/aarch64/input.S
index ce5b042371..5cb18711fb 100644
--- a/libswscale/aarch64/input.S
+++ b/libswscale/aarch64/input.S
@@ -34,6 +34,16 @@
 uxtl2   v24.8h, v18.16b// v24: b
 .endm
 
+.macro argb_to_yuv_load_rgb src
+ld4 { v16.16b, v17.16b, v18.16b, v19.16b }, [\src]
+uxtlv21.8h, v19.8b // v21: b
+uxtl2   v24.8h, v19.16b// v24: b
+uxtlv19.8h, v17.8b // v19: r
+uxtlv20.8h, v18.8b // v20: g
+uxtl2   v22.8h, v17.16b// v22: r
+uxtl2   v23.8h, v18.16b// v23: g
+.endm
+
 .macro rgb_to_yuv_product r, g, b, dst1, dst2, dst, coef0, coef1, coef2, 
right_shift
 mov \dst1\().16b, v6.16b// dst1 = 
const_offset
 mov \dst2\().16b, v6.16b// dst2 = 
const_offset
@@ -47,7 +57,7 @@
 sqshrn2 \dst\().8h, \dst2\().4s, \right_shift   // 
dst_higher_half = dst2 >> right_shift
 .endm
 
-.macro rgbToY_neon fmt_bgr, fmt_rgb, element
+.macro rgbToY_neon fmt_bgr, fmt_rgb, element, alpha_first=0
 function ff_\fmt_bgr\()ToY_neon, export=1
 cmp w4, #0  // check width > 0
 ldp w12, w11, [x5]  // w12: ry, w11: gy
@@ -72,7 +82,11 @@ function ff_\fmt_rgb\()ToY_neon, export=1
 dup v2.8h, w12
 b.lt2f
 1:
+.if \alpha_first
+argb_to_yuv_load_rgb x1
+.else
 rgb_to_yuv_load_rgb x1, \element
+.endif
 rgb_to_yuv_product v19, v20, v21, v25, v26, v16, v0, v1, v2, #9
 rgb_to_yuv_product v22, v23, v24, v27, v28, v17, v0, v1, v2, #9
 sub w4, w4, #16 // width -= 16
@@ -82,9 +96,15 @@ function ff_\fmt_rgb\()ToY_neon, export=1
 b.ge1b
 cbz x4, 3f
 2:
+.if \alpha_first
+ldrbw13, [x1, #1]   // w13: r
+ldrbw14, [x1, #2]   // w14: g
+ldrbw15, [x1, #3]   // w15: b
+.else
 ldrbw13, [x1]   // w13: r
 ldrbw14, [x1, #1]   // w14: g
 ldrbw15, [x1, #2]   // w15: b
+.endif
 
 smaddl  x13, w13, w10, x9   // x13 = ry * r + const_offset
 smaddl  x13, w14, w11, x13  // x13 += gy * g
@@ -103,6 +123,8 @@ rgbToY_neon bgr24, rgb24, element=3
 
 rgbToY_neon bgra32, rgba32, element=4
 
+rgbToY_neon abgr32, argb32, element=4, alpha_first=1
+
 .macro rgb_set_uv_coeff half
 .if \half
 mov w9, #512
@@ -120,7 +142,21 @@ rgbToY_neon bgra32, rgba32, element=4
 dup v6.4s, w9
 .endm
 
-.macro rgbToUV_half_neon fmt_bgr, fmt_rgb, element
+.macro rgb_load_add_half off_r1, off_r2, off_g1, off_g2, off_b1, off_b2
+ldrbw2, [x3, #\off_

[FFmpeg-cvslog] swscale/aarch64: Add bgra/rgba to yuv

2024-07-05 Thread Zhao Zhili
ffmpeg | branch: master | Zhao Zhili  | Mon Jun 24 
21:02:12 2024 +0800| [52422133ae9905fdd3c4845a41ac7af9a678b47a] | committer: 
Zhao Zhili

swscale/aarch64: Add bgra/rgba to yuv

Test on Apple M1 with kperf
: -O3   : -O3 -fno-vectorize
bgra_to_uv_8_c  : 13.4  : 27.5
bgra_to_uv_8_neon   : 37.4  : 41.7
bgra_to_uv_128_c: 155.9 : 550.2
bgra_to_uv_128_neon : 91.7  : 92.7
bgra_to_uv_1080_c   : 1173.2: 4558.2
bgra_to_uv_1080_neon: 822.7 : 809.5
bgra_to_uv_1920_c   : 2078.2: 8115.2
bgra_to_uv_1920_neon: 1437.7: 1438.7
bgra_to_uv_half_8_c : 17.9  : 14.2
bgra_to_uv_half_8_neon  : 37.4  : 10.5
bgra_to_uv_half_128_c   : 103.9 : 326.0
bgra_to_uv_half_128_neon: 73.9  : 68.7
bgra_to_uv_half_1080_c  : 850.2 : 3732.0
bgra_to_uv_half_1080_neon   : 484.2 : 490.0
bgra_to_uv_half_1920_c  : 1479.2: 4942.7
bgra_to_uv_half_1920_neon   : 824.2 : 824.7
bgra_to_y_8_c   : 8.2   : 29.5
bgra_to_y_8_neon: 18.2  : 32.7
bgra_to_y_128_c : 101.4 : 361.5
bgra_to_y_128_neon  : 74.9  : 73.7
bgra_to_y_1080_c: 739.4 : 3018.0
bgra_to_y_1080_neon : 613.4 : 544.2
bgra_to_y_1920_c: 1298.7: 5326.0
bgra_to_y_1920_neon : 918.7 : 934.2

Signed-off-by: Zhao Zhili 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=52422133ae9905fdd3c4845a41ac7af9a678b47a
---

 libswscale/aarch64/input.S   | 68 +---
 libswscale/aarch64/swscale.c | 16 +++
 2 files changed, 68 insertions(+), 16 deletions(-)

diff --git a/libswscale/aarch64/input.S b/libswscale/aarch64/input.S
index 2cfec4cb6a..ce5b042371 100644
--- a/libswscale/aarch64/input.S
+++ b/libswscale/aarch64/input.S
@@ -20,8 +20,12 @@
 
 #include "libavutil/aarch64/asm.S"
 
-.macro rgb_to_yuv_load_rgb src
+.macro rgb_to_yuv_load_rgb src, element=3
+.if \element == 3
 ld3 { v16.16b, v17.16b, v18.16b }, [\src]
+.else
+ld4 { v16.16b, v17.16b, v18.16b, v19.16b }, [\src]
+.endif
 uxtlv19.8h, v16.8b // v19: r
 uxtlv20.8h, v17.8b // v20: g
 uxtlv21.8h, v18.8b // v21: b
@@ -43,7 +47,8 @@
 sqshrn2 \dst\().8h, \dst2\().4s, \right_shift   // 
dst_higher_half = dst2 >> right_shift
 .endm
 
-function ff_bgr24ToY_neon, export=1
+.macro rgbToY_neon fmt_bgr, fmt_rgb, element
+function ff_\fmt_bgr\()ToY_neon, export=1
 cmp w4, #0  // check width > 0
 ldp w12, w11, [x5]  // w12: ry, w11: gy
 ldr w10, [x5, #8]   // w10: by
@@ -51,7 +56,7 @@ function ff_bgr24ToY_neon, export=1
 ret
 endfunc
 
-function ff_rgb24ToY_neon, export=1
+function ff_\fmt_rgb\()ToY_neon, export=1
 cmp w4, #0  // check width > 0
 ldp w10, w11, [x5]  // w10: ry, w11: gy
 ldr w12, [x5, #8]   // w12: by
@@ -67,11 +72,11 @@ function ff_rgb24ToY_neon, export=1
 dup v2.8h, w12
 b.lt2f
 1:
-rgb_to_yuv_load_rgb x1
+rgb_to_yuv_load_rgb x1, \element
 rgb_to_yuv_product v19, v20, v21, v25, v26, v16, v0, v1, v2, #9
 rgb_to_yuv_product v22, v23, v24, v27, v28, v17, v0, v1, v2, #9
 sub w4, w4, #16 // width -= 16
-add x1, x1, #48 // src += 48
+add x1, x1, #(16*\element)
 cmp w4, #16 // width >= 16 ?
 stp q16, q17, [x0], #32 // store to dst
 b.ge1b
@@ -86,12 +91,17 @@ function ff_rgb24ToY_neon, export=1
 smaddl  x13, w15, w12, x13  // x13 += by * b
 asr w13, w13, #9// x13 >>= 9
 sub w4, w4, #1  // width--
-add x1, x1, #3  // src += 3
+add x1, x1, #\element
 strhw13, [x0], #2   // store to dst
 cbnzw4, 2b
 3:
 ret
 endfunc
+.endm
+
+rgbToY_neon bgr24, rgb24, element=3
+
+rgbToY_neon bgra32, rgba32, element=4
 
 .macro rgb_set_uv_coeff half
 .if \half
@@ -110,7 +120,8 @@ endfunc
 dup v6.4s, w9
 .endm
 
-function ff_bgr24ToUV_half_neon, export=1
+.macro rgbToUV_half_neon fmt_bgr, fmt_rgb, element
+function ff_\fmt_bgr\()ToUV_half_neon, export=1
 cmp w5, #0  // check width > 0
 b.le 

[FFmpeg-cvslog] lavc/libx264: minor format fix

2024-07-05 Thread Jun Zhao
ffmpeg | branch: master | Jun Zhao  | Sat Jun 29 09:37:52 
2024 +0800| [25a7dcf06916b6b55789abf801ccbc77859da9e2] | committer: Jun Zhao

lavc/libx264: minor format fix

Remove redundant semicolons

Signed-off-by: Jun Zhao 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=25a7dcf06916b6b55789abf801ccbc77859da9e2
---

 libavcodec/libx264.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 8186f68aec..d07a65a103 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -725,7 +725,7 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, 
const AVFrame *frame,
 
 /* SSE = MSE * width * height / scale -> because of possible 
chroma downsampling */
 sse[i] = (int64_t)floor(mse * plane_size + .5);
-};
+}
 
 errors = sse;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavf/scdet: minor fix

2024-07-05 Thread Jun Zhao
ffmpeg | branch: master | Jun Zhao  | Sat Jun 29 09:40:53 
2024 +0800| [03c2e9d77eace004db4579116e0141c496862895] | committer: Jun Zhao

lavf/scdet: minor fix

Change dbl to i64 for bool type

Reviewed-by: Michael Niedermayer 
Signed-off-by: Jun Zhao 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=03c2e9d77eace004db4579116e0141c496862895
---

 libavfilter/vf_scdet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vf_scdet.c b/libavfilter/vf_scdet.c
index 15399cfebf..705bdf9777 100644
--- a/libavfilter/vf_scdet.c
+++ b/libavfilter/vf_scdet.c
@@ -53,8 +53,8 @@ typedef struct SCDetContext {
 static const AVOption scdet_options[] = {
 { "threshold",   "set scene change detect threshold",
OFFSET(threshold),  AV_OPT_TYPE_DOUBLE,   {.dbl = 10.}, 0,  100., V|F },
 { "t",   "set scene change detect threshold",
OFFSET(threshold),  AV_OPT_TYPE_DOUBLE,   {.dbl = 10.}, 0,  100., V|F },
-{ "sc_pass", "Set the flag to pass scene change frames", 
OFFSET(sc_pass),AV_OPT_TYPE_BOOL, {.dbl =  0  },0,1,  V|F },
-{ "s",   "Set the flag to pass scene change frames", 
OFFSET(sc_pass),AV_OPT_TYPE_BOOL, {.dbl =  0  },0,1,  V|F },
+{ "sc_pass", "Set the flag to pass scene change frames", 
OFFSET(sc_pass),AV_OPT_TYPE_BOOL, {.i64 = 0  }, 0,1,  V|F },
+{ "s",   "Set the flag to pass scene change frames", 
OFFSET(sc_pass),AV_OPT_TYPE_BOOL, {.i64 = 0  }, 0,1,  V|F },
 {NULL}
 };
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avutil/hwcontext_videotoolbox: Unset undefined values

2024-07-05 Thread Marvin Scholz
ffmpeg | branch: master | Marvin Scholz  | Mon May 20 
03:12:01 2024 +0200| [1fa7554bd6cc35b008045d33f9e948a92e7b901c] | committer: 
Zhao Zhili

avutil/hwcontext_videotoolbox: Unset undefined values

When mapping AVFrame properties to the CVBuffer attachments, it is
necessary to properly delete undefined attachments, else we can
leave incorrect values in there guessed from VideoToolbox for
example, leading to inconsistent results where the AVFrame and
CVBuffer differ in metadata.

Ref #10884

Signed-off-by: Zhao Zhili 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1fa7554bd6cc35b008045d33f9e948a92e7b901c
---

 libavutil/hwcontext_videotoolbox.c | 76 --
 1 file changed, 41 insertions(+), 35 deletions(-)

diff --git a/libavutil/hwcontext_videotoolbox.c 
b/libavutil/hwcontext_videotoolbox.c
index 9f82b104c3..0af2ab822f 100644
--- a/libavutil/hwcontext_videotoolbox.c
+++ b/libavutil/hwcontext_videotoolbox.c
@@ -342,8 +342,10 @@ static int vt_pixbuf_set_par(void *log_ctx,
 CFNumberRef num = NULL, den = NULL;
 AVRational avpar = src->sample_aspect_ratio;
 
-if (avpar.num == 0)
+if (avpar.num == 0) {
+CVBufferRemoveAttachment(pixbuf, kCVImageBufferPixelAspectRatioKey);
 return 0;
+}
 
 av_reduce(&avpar.num, &avpar.den,
 avpar.num, avpar.den,
@@ -423,7 +425,10 @@ static int vt_pixbuf_set_chromaloc(void *log_ctx,
 kCVImageBufferChromaLocationTopFieldKey,
 loc,
 kCVAttachmentMode_ShouldPropagate);
-}
+} else
+CVBufferRemoveAttachment(
+pixbuf,
+kCVImageBufferChromaLocationTopFieldKey);
 
 return 0;
 }
@@ -534,52 +539,53 @@ static int vt_pixbuf_set_colorspace(void *log_ctx,
 Float32 gamma = 0;
 
 colormatrix = av_map_videotoolbox_color_matrix_from_av(src->colorspace);
-if (!colormatrix && src->colorspace != AVCOL_SPC_UNSPECIFIED)
-av_log(log_ctx, AV_LOG_WARNING, "Color space %s is not supported.\n", 
av_color_space_name(src->colorspace));
+if (colormatrix)
+CVBufferSetAttachment(pixbuf, kCVImageBufferYCbCrMatrixKey,
+colormatrix, kCVAttachmentMode_ShouldPropagate);
+else {
+CVBufferRemoveAttachment(pixbuf, kCVImageBufferYCbCrMatrixKey);
+if (src->colorspace != AVCOL_SPC_UNSPECIFIED)
+av_log(log_ctx, AV_LOG_WARNING,
+"Color space %s is not supported.\n",
+av_color_space_name(src->colorspace));
+}
 
 colorpri = 
av_map_videotoolbox_color_primaries_from_av(src->color_primaries);
-if (!colorpri && src->color_primaries != AVCOL_PRI_UNSPECIFIED)
-av_log(log_ctx, AV_LOG_WARNING, "Color primaries %s is not 
supported.\n", av_color_primaries_name(src->color_primaries));
+if (colorpri)
+CVBufferSetAttachment(pixbuf, kCVImageBufferColorPrimariesKey,
+colorpri, kCVAttachmentMode_ShouldPropagate);
+else {
+CVBufferRemoveAttachment(pixbuf, kCVImageBufferColorPrimariesKey);
+if (src->color_primaries != AVCOL_SPC_UNSPECIFIED)
+av_log(log_ctx, AV_LOG_WARNING,
+"Color primaries %s is not supported.\n",
+av_color_primaries_name(src->color_primaries));
+}
 
 colortrc = av_map_videotoolbox_color_trc_from_av(src->color_trc);
-if (!colortrc && src->color_trc != AVCOL_TRC_UNSPECIFIED)
-av_log(log_ctx, AV_LOG_WARNING, "Color transfer function %s is not 
supported.\n", av_color_transfer_name(src->color_trc));
+if (colortrc)
+CVBufferSetAttachment(pixbuf, kCVImageBufferTransferFunctionKey,
+colorpri, kCVAttachmentMode_ShouldPropagate);
+else {
+CVBufferRemoveAttachment(pixbuf, kCVImageBufferTransferFunctionKey);
+if (src->color_trc != AVCOL_TRC_UNSPECIFIED)
+av_log(log_ctx, AV_LOG_WARNING,
+"Color transfer function %s is not supported.\n",
+av_color_transfer_name(src->color_trc));
+}
 
 if (src->color_trc == AVCOL_TRC_GAMMA22)
 gamma = 2.2;
 else if (src->color_trc == AVCOL_TRC_GAMMA28)
 gamma = 2.8;
 
-if (colormatrix) {
-CVBufferSetAttachment(
-pixbuf,
-kCVImageBufferYCbCrMatrixKey,
-colormatrix,
-kCVAttachmentMode_ShouldPropagate);
-}
-if (colorpri) {
-CVBufferSetAttachment(
-pixbuf,
-kCVImageBufferColorPrimariesKey,
-colorpri,
-kCVAttachmentMode_ShouldPropagate);
-}
-if (colortrc) {
-CVBufferSetAttachment(
-pixbuf,
-kCVImageBufferTransferFunctionKey,
-colortrc,
-kCVAttachmentMode_ShouldPropagate);
-}
 if (gamma != 0) {
 CFNumberRef gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, 
&gamma);
-CVBufferSetAttachment(
-pixbuf,
-kCVImageBufferGammaLevelKey,
-gamma_l

[FFmpeg-cvslog] avutil/hwcontext_videotoolbox: Update documentation

2024-07-05 Thread Marvin Scholz
ffmpeg | branch: master | Marvin Scholz  | Thu May 30 
03:16:56 2024 +0200| [b4f9fcc63c29827f3bd2822f0d6ba6af098c9cb7] | committer: 
Zhao Zhili

avutil/hwcontext_videotoolbox: Update documentation

The documentation was not clear at all what specifically the
function does, so it was left unspecified if it will unset or
not touch attachments it could not map from the AVFrame.

The documentation of the return  value was wrong as well.

Signed-off-by: Zhao Zhili 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b4f9fcc63c29827f3bd2822f0d6ba6af098c9cb7
---

 libavutil/hwcontext_videotoolbox.h | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libavutil/hwcontext_videotoolbox.h 
b/libavutil/hwcontext_videotoolbox.h
index 600e9f2c8d..d35cfbb6c1 100644
--- a/libavutil/hwcontext_videotoolbox.h
+++ b/libavutil/hwcontext_videotoolbox.h
@@ -90,8 +90,15 @@ CFStringRef av_map_videotoolbox_color_primaries_from_av(enum 
AVColorPrimaries pr
 CFStringRef av_map_videotoolbox_color_trc_from_av(enum 
AVColorTransferCharacteristic trc);
 
 /**
- * Update a CVPixelBufferRef's metadata to based on an AVFrame.
- * Returns 0 if no known equivalent was found.
+ * Set CVPixelBufferRef's metadata based on an AVFrame.
+ *
+ * Sets/unsets the CVPixelBuffer attachments to match as closely as possible 
the
+ * AVFrame metadata. To prevent inconsistent attachments, the attachments for 
properties
+ * that could not be matched or are unspecified in the given AVFrame are 
unset. So if
+ * any attachments already covered by AVFrame metadata need to be set to a 
specific
+ * value, this should happen after calling this function.
+ *
+ * Returns < 0 in case of an error.
  */
 int av_vt_pixbuf_set_attachments(void *log_ctx,
  CVPixelBufferRef pixbuf, const struct AVFrame 
*src);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avutil/hwcontext_videotoolbox: Set CVBuffer CGColorSpace

2024-07-05 Thread Marvin Scholz
ffmpeg | branch: master | Marvin Scholz  | Mon May 20 
03:12:01 2024 +0200| [cd9ceaef22ecc25278c771169d179dbfdb24a355] | committer: 
Zhao Zhili

avutil/hwcontext_videotoolbox: Set CVBuffer CGColorSpace

In addition to the other properties, try to obtain the right
CGColorSpace and set it as well, else it could lead to a CVBuffer
tagged as BT.2020 but with a CGColorSpace indicating BT.709.

Therefore it is essential for consistency to set a colorspace
according to the other values, or if none can be obtained (for example
because the other values are all unspecified) unset it as well.

Fix #10884

Signed-off-by: Zhao Zhili 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cd9ceaef22ecc25278c771169d179dbfdb24a355
---

 libavutil/hwcontext_videotoolbox.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/libavutil/hwcontext_videotoolbox.c 
b/libavutil/hwcontext_videotoolbox.c
index 0af2ab822f..953155ce32 100644
--- a/libavutil/hwcontext_videotoolbox.c
+++ b/libavutil/hwcontext_videotoolbox.c
@@ -535,6 +535,7 @@ CFStringRef av_map_videotoolbox_color_trc_from_av(enum 
AVColorTransferCharacteri
 static int vt_pixbuf_set_colorspace(void *log_ctx,
 CVPixelBufferRef pixbuf, const AVFrame 
*src)
 {
+CGColorSpaceRef colorspace = NULL;
 CFStringRef colormatrix = NULL, colorpri = NULL, colortrc = NULL;
 Float32 gamma = 0;
 
@@ -587,6 +588,21 @@ static int vt_pixbuf_set_colorspace(void *log_ctx,
 } else
 CVBufferRemoveAttachment(pixbuf, kCVImageBufferGammaLevelKey);
 
+if (__builtin_available(macOS 10.8, iOS 10, *)) {
+CFDictionaryRef attachments = CVBufferCopyAttachments(pixbuf, 
kCVAttachmentMode_ShouldPropagate);
+if (attachments) {
+colorspace = 
CVImageBufferCreateColorSpaceFromAttachments(attachments);
+CFRelease(attachments);
+}
+}
+
+if (colorspace) {
+CVBufferSetAttachment(pixbuf, kCVImageBufferCGColorSpaceKey,
+colorspace, kCVAttachmentMode_ShouldPropagate);
+CFRelease(colorspace);
+} else
+CVBufferRemoveAttachment(pixbuf, kCVImageBufferCGColorSpaceKey);
+
 return 0;
 }
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavc/h264dsp: R-V V 8-bit h264_idct8_add4

2024-07-05 Thread Rémi Denis-Courmont
ffmpeg | branch: master | Rémi Denis-Courmont  | Mon Jul  1 
23:41:37 2024 +0300| [e0eff64ed1e60d14391d55a91732ec612abf3f64] | committer: 
Rémi Denis-Courmont

lavc/h264dsp: R-V V 8-bit h264_idct8_add4

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e0eff64ed1e60d14391d55a91732ec612abf3f64
---

 libavcodec/riscv/h264dsp_init.c |  4 +++
 libavcodec/riscv/h264idct_rvv.S | 70 +
 2 files changed, 74 insertions(+)

diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 7fc47929cf..6b9ffe1c9f 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -40,6 +40,9 @@ void ff_h264_idct_add16_8_rvv(uint8_t *dst, const int 
*blockoffset,
 void ff_h264_idct_add16intra_8_rvv(uint8_t *dst, const int *blockoffset,
int16_t *block, int stride,
const uint8_t nnzc[5 * 8]);
+void ff_h264_idct8_add4_8_rvv(uint8_t *dst, const int *blockoffset,
+  int16_t *block, int stride,
+  const uint8_t nnzc[5 * 8]);
 
 extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
 extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
@@ -63,6 +66,7 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const 
int bit_depth,
 #  if __riscv_xlen == 64
 dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv;
 dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv;
+dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv;
 #  endif
 }
 dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S
index 42a29ba336..7422942717 100644
--- a/libavcodec/riscv/h264idct_rvv.S
+++ b/libavcodec/riscv/h264idct_rvv.S
@@ -170,5 +170,75 @@ func ff_h264_idct_add16intra_\depth\()_rvv, zve32x
 addisp, sp, 80
 ret
 endfunc
+
+func ff_h264_idct8_add4_\depth\()_rvv, zve32x
+addisp, sp, -80
+lla t0, ff_h264_scan8
+sd  s0,   (sp)
+li  t1, 4 * 32 << (\depth > 8)
+mv  s0, sp
+li  t2, 4
+sd  ra,  8(sp)
+sd  s1, 16(sp)
+sd  s2, 24(sp)
+sd  s3, 32(sp)
+sd  s4, 40(sp)
+sd  s5, 48(sp)
+sd  s6, 56(sp)
+sd  s7, 64(sp)
+vsetivli  zero, 4, e8, mf4, ta, ma
+vlse8.v   v8, (t0), t2
+vlse16.v  v16, (a2), t1
+vluxei8.v v12, (a4), v8
+.if \depth == 8
+vsetvli   zero, zero, e16, mf2, ta, ma
+.else
+vsetvli   zero, zero, e32, m1, ta, ma
+.endif
+vmsne.vi  v1, v16, 0
+vsetvli   zero, zero, e8, mf4, ta, ma
+vmseq.vi  v2, v12, 1
+vmsne.vi  v0, v12, 0
+vmand.mm  v1, v1, v2
+vmv.x.s   s2, v0
+vmv.x.s   s3, v1
+li  s1, 4
+mv  s4, a0
+mv  s5, a1
+mv  s6, a2
+mv  s7, a3
+1:
+andit0, s2, 1
+addis1, s1, -1
+srlis2, s2, 1
+beqzt0, 3f # if (nnz)
+lw  t2, (s5)   # block_offset[i]
+andit1, s3, 1
+mv  a1, s6
+mv  a2, s7
+add a0, s4, t2
+beqzt1, 2f# if (nnz == 1 && block[i * 16])
+callff_h264_idct8_dc_add_\depth\()_c
+j   3f
+2:
+callff_h264_idct8_add_\depth\()_c
+3:
+srlis3, s3, 1
+addis5, s5, 4 * 4
+addis6, s6, 4 * 16 * 2 << (\depth > 8)
+bnezs1, 1b
+
+ld  s7, 64(sp)
+ld  s6, 56(sp)
+ld  s5, 48(sp)
+ld  s4, 40(sp)
+ld  s3, 32(sp)
+ld  s2, 24(sp)
+ld  s1, 16(sp)
+ld  ra,  8(sp)
+ld  s0,  0(sp)
+addisp, sp, 80
+ret
+endfunc
 .endr
 #endif

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavc/h264dsp: R-V V 8-bit h264_idct_add16

2024-07-05 Thread Rémi Denis-Courmont
ffmpeg | branch: master | Rémi Denis-Courmont  | Mon Jul  1 
23:41:37 2024 +0300| [30475c95ba50d40cf7605cb382bfd7852c825deb] | committer: 
Rémi Denis-Courmont

lavc/h264dsp: R-V V 8-bit h264_idct_add16

While this *tends* to be faster than plain C, the performance numbers
are all over the place, presuambly due to the conditional character of
the main loop.

Some additional micro-optimisations should be feasible after the
underlying h264_idct_add and h264_idct_dc_add functions are also
implemented. Then it will no longer be necesseray to stricly abide by
the C ABI.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=30475c95ba50d40cf7605cb382bfd7852c825deb
---

 libavcodec/riscv/Makefile   |   2 +-
 libavcodec/riscv/h264dsp_init.c |   8 +++
 libavcodec/riscv/h264idct_rvv.S | 106 
 3 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index a1510e8c6e..92e1544e76 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -31,7 +31,7 @@ RVV-OBJS-$(CONFIG_H263DSP) += riscv/h263dsp_rvv.o
 OBJS-$(CONFIG_H264CHROMA) += riscv/h264_chroma_init_riscv.o
 RVV-OBJS-$(CONFIG_H264CHROMA) += riscv/h264_mc_chroma.o
 OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_init.o
-RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_rvv.o
+RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_rvv.o riscv/h264idct_rvv.o
 OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
 RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
 OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index ab412a9924..064ee95578 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -34,6 +34,10 @@ void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, 
ptrdiff_t stride,
 void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride,
 int alpha, int beta, int8_t *tc0);
 
+void ff_h264_idct_add16_8_rvv(uint8_t *dst, const int *blockoffset,
+  int16_t *block, int stride,
+  const uint8_t nnzc[5 * 8]);
+
 extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
 extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
 
@@ -52,6 +56,10 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, 
const int bit_depth,
 dsp->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_8_rvv;
 dsp->h264_h_loop_filter_luma_mbaff =
 ff_h264_h_loop_filter_luma_mbaff_8_rvv;
+
+#  if __riscv_xlen == 64
+dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv;
+#  endif
 }
 dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
 }
diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S
new file mode 100644
index 00..74083f8221
--- /dev/null
+++ b/libavcodec/riscv/h264idct_rvv.S
@@ -0,0 +1,106 @@
+/*
+ * Copyright © 2024 Rémi Denis-Courmont.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *this list of conditions and the following disclaimer in the documentation
+ *and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "libavutil/riscv/asm.S"
+
+const ff_h264_scan8
+.byte   014, 015, 024, 025, 016, 017, 026, 027
+.byte   034, 035, 044, 045, 036, 037, 046, 047
+endconst
+
+#if (__riscv_xlen == 64)
+.irpdepth, 8
+func ff_h264_idct_add16_\depth\()_rvv, zve32x
+addisp, sp, -80
+lla t0, ff_h264_scan8
+sd  s0,   (sp)
+li  t1, 32 << (\depth > 8)
+mv  s0, sp
+sd  ra,  8(sp)
+sd  s1, 16(sp)
+sd  s2, 24(sp)
+sd  s3, 32(sp)
+sd  s4, 40(sp)
+sd  s5, 48

[FFmpeg-cvslog] lavc/h264dsp: R-V V 8-bit h264_idct_add16intra

2024-07-05 Thread Rémi Denis-Courmont
ffmpeg | branch: master | Rémi Denis-Courmont  | Mon Jul  1 
23:41:37 2024 +0300| [d1f0c1fbf8db8dffd514c706905a2a8b8e986cb4] | committer: 
Rémi Denis-Courmont

lavc/h264dsp: R-V V 8-bit h264_idct_add16intra

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d1f0c1fbf8db8dffd514c706905a2a8b8e986cb4
---

 libavcodec/riscv/h264dsp_init.c |  4 +++
 libavcodec/riscv/h264idct_rvv.S | 68 +
 2 files changed, 72 insertions(+)

diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 064ee95578..7fc47929cf 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -37,6 +37,9 @@ void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, 
ptrdiff_t stride,
 void ff_h264_idct_add16_8_rvv(uint8_t *dst, const int *blockoffset,
   int16_t *block, int stride,
   const uint8_t nnzc[5 * 8]);
+void ff_h264_idct_add16intra_8_rvv(uint8_t *dst, const int *blockoffset,
+   int16_t *block, int stride,
+   const uint8_t nnzc[5 * 8]);
 
 extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
 extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
@@ -59,6 +62,7 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const 
int bit_depth,
 
 #  if __riscv_xlen == 64
 dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv;
+dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv;
 #  endif
 }
 dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S
index 74083f8221..42a29ba336 100644
--- a/libavcodec/riscv/h264idct_rvv.S
+++ b/libavcodec/riscv/h264idct_rvv.S
@@ -102,5 +102,73 @@ func ff_h264_idct_add16_\depth\()_rvv, zve32x
 addisp, sp, 80
 ret
 endfunc
+
+func ff_h264_idct_add16intra_\depth\()_rvv, zve32x
+addisp, sp, -80
+lla t0, ff_h264_scan8
+sd  s0,   (sp)
+li  t1, 32 << (\depth > 8)
+mv  s0, sp
+sd  ra,  8(sp)
+sd  s1, 16(sp)
+sd  s2, 24(sp)
+sd  s3, 32(sp)
+sd  s4, 40(sp)
+sd  s5, 48(sp)
+sd  s6, 56(sp)
+sd  s7, 64(sp)
+vsetivli  zero, 16, e8, m1, ta, ma
+vle8.vv8, (t0)
+vlse16.v  v16, (a2), t1
+vluxei8.v v12, (a4), v8
+.if \depth == 8
+vsetvli   zero, zero, e16, m2, ta, ma
+.else
+vsetvli   zero, zero, e32, m4, ta, ma
+.endif
+vmsne.vi  v1, v16, 0
+vsetvli   zero, zero, e8, m1, ta, ma
+vmsne.vi  v0, v12, 0
+vsetvli   zero, zero, e16, m2, ta, ma
+vmv.x.s   s2, v0
+vmv.x.s   s3, v1
+li  s1, 16
+mv  s4, a0
+mv  s5, a1
+mv  s6, a2
+mv  s7, a3
+1:
+andit0, s2, 1
+addis1, s1, -1
+srlis2, s2, 1
+lw  t2, (s5)   # block_offset[i]
+andit1, s3, 1
+mv  a1, s6
+mv  a2, s7
+add a0, s4, t2
+beqzt0, 2f # if (nnzc[scan8[i]])
+callff_h264_idct_add_\depth\()_c
+j   3f
+2:
+beqzt1, 3f# if (block[i * 16])
+callff_h264_idct_dc_add_\depth\()_c
+3:
+srlis3, s3, 1
+addis5, s5, 4
+addis6, s6, 16 * 2 << (\depth > 8)
+bnezs1, 1b
+
+ld  s7, 64(sp)
+ld  s6, 56(sp)
+ld  s5, 48(sp)
+ld  s4, 40(sp)
+ld  s3, 32(sp)
+ld  s2, 24(sp)
+ld  s1, 16(sp)
+ld  ra,  8(sp)
+ld  s0,  0(sp)
+addisp, sp, 80
+ret
+endfunc
 .endr
 #endif

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avutil/timestamp: avoid possible FPE when 0 is passed to av_ts_make_time_string2()

2024-07-05 Thread Marton Balint
ffmpeg | branch: release/7.0 | Marton Balint  | Mon Jun 17 
22:30:26 2024 +0200| [c75cabef94e0985d1aa3f8d5ea9f4f7b8795d5ab] | committer: 
Marton Balint

avutil/timestamp: avoid possible FPE when 0 is passed to 
av_ts_make_time_string2()

Signed-off-by: Marton Balint 
(cherry picked from commit 0d5e3f5a4034b6c9312b7c621e25aa4303a00b6f)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c75cabef94e0985d1aa3f8d5ea9f4f7b8795d5ab
---

 libavutil/timestamp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/timestamp.c b/libavutil/timestamp.c
index 2a3e3012a4..6c231a517d 100644
--- a/libavutil/timestamp.c
+++ b/libavutil/timestamp.c
@@ -24,7 +24,7 @@ char *av_ts_make_time_string2(char *buf, int64_t ts, 
AVRational tb)
 snprintf(buf, AV_TS_MAX_STRING_SIZE, "NOPTS");
 } else {
 double val = av_q2d(tb) * ts;
-double log = floor(log10(fabs(val)));
+double log = (fpclassify(val) == FP_ZERO ? -INFINITY : 
floor(log10(fabs(val;
 int precision = (isfinite(log) && log < 0) ? -log + 5 : 6;
 int last = snprintf(buf, AV_TS_MAX_STRING_SIZE, "%.*f", precision, 
val);
 last = FFMIN(last, AV_TS_MAX_STRING_SIZE - 1) - 1;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavc/h264dsp: R-V V 8-bit h264_idct_add

2024-07-05 Thread Rémi Denis-Courmont
ffmpeg | branch: master | Rémi Denis-Courmont  | Tue Jul  2 
22:03:07 2024 +0300| [f447189b0c8067edf54a16c8c6d5513b2de77276] | committer: 
Rémi Denis-Courmont

lavc/h264dsp: R-V V 8-bit h264_idct_add

T-Head C908 (cycles):
h264_idct4_add_8bpp_c:  271.5
h264_idct4_add_8bpp_rvv_i32: 91.5

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f447189b0c8067edf54a16c8c6d5513b2de77276
---

 libavcodec/riscv/h264dsp_init.c |  2 +
 libavcodec/riscv/h264idct_rvv.S | 83 -
 2 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 6b9ffe1c9f..f78ca3ea05 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -34,6 +34,7 @@ void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t 
stride,
 void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride,
 int alpha, int beta, int8_t *tc0);
 
+void ff_h264_idct_add_8_rvv(uint8_t *dst, int16_t *block, int stride);
 void ff_h264_idct_add16_8_rvv(uint8_t *dst, const int *blockoffset,
   int16_t *block, int stride,
   const uint8_t nnzc[5 * 8]);
@@ -63,6 +64,7 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const 
int bit_depth,
 dsp->h264_h_loop_filter_luma_mbaff =
 ff_h264_h_loop_filter_luma_mbaff_8_rvv;
 
+dsp->h264_idct_add = ff_h264_idct_add_8_rvv;
 #  if __riscv_xlen == 64
 dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv;
 dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv;
diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S
index 7422942717..b36a7f7572 100644
--- a/libavcodec/riscv/h264idct_rvv.S
+++ b/libavcodec/riscv/h264idct_rvv.S
@@ -26,6 +26,83 @@
 
 #include "libavutil/riscv/asm.S"
 
+.macro  sx rd, addr
+#if (__riscv_xlen == 32)
+sw  \rd, \addr
+#elif (__riscv_xlen == 64)
+sd  \rd, \addr
+#else
+sq  \rd, \addr
+#endif
+.endm
+
+.variant_cc ff_h264_idct4_rvv
+func ff_h264_idct4_rvv, zve32x
+vsra.vi v5, v1, 1
+vsra.vi v7, v3, 1
+vadd.vv v8, v0, v2   # z0
+vsub.vv v9, v0, v2   # z1
+vsub.vv v10, v5, v3  # z2
+vadd.vv v11, v1, v7  # z3
+vadd.vv v1, v9, v10
+vsub.vv v2, v9, v10
+vadd.vv v0, v8, v11
+vsub.vv v3, v8, v11
+jr  t0
+endfunc
+
+func ff_h264_idct_add_8_rvv, zve32x
+csrwi   vxrm, 0
+.Lidct_add4_8_rvv:
+vsetivlizero, 4, e16, mf2, ta, ma
+addit1, a1, 1 * 4 * 2
+vle16.v v0, (a1)
+addit2, a1, 2 * 4 * 2
+vle16.v v1, (t1)
+addit3, a1, 3 * 4 * 2
+vle16.v v2, (t2)
+vle16.v v3, (t3)
+jal t0, ff_h264_idct4_rvv
+vse16.v v0, (a1)
+vse16.v v1, (t1)
+vse16.v v2, (t2)
+vse16.v v3, (t3)
+vlseg4e16.v v0, (a1)
+.rept   256 / __riscv_xlen
+sx  zero, ((__riscv_xlen / 8) * \+)(a1)
+.endr
+jal t0, ff_h264_idct4_rvv
+add t1, a0, a2
+vle8.v  v4, (a0)
+add t2, t1, a2
+vle8.v  v5, (t1)
+add t3, t2, a2
+vle8.v  v6, (t2)
+vle8.v  v7, (t3)
+.irpn,0,1,2,3
+vssra.viv\n, v\n, 6
+.endr
+vsetvli zero, zero, e8, mf4, ta, ma
+vwaddu.wv   v0, v0, v4
+vwaddu.wv   v1, v1, v5
+vwaddu.wv   v2, v2, v6
+vwaddu.wv   v3, v3, v7
+vsetvli zero, zero, e16, mf2, ta, ma
+.irpn,0,1,2,3
+vmax.vx v\n, v\n, zero
+.endr
+vsetvli zero, zero, e8, mf4, ta, ma
+vnclipu.wi  v4, v0, 0
+vnclipu.wi  v5, v1, 0
+vnclipu.wi  v6, v2, 0
+vnclipu.wi  v7, v3, 0
+vse8.v  v4, (a0)
+vse8.v  v5, (t1)
+vse8.v  v6, (t2)
+vse8.v  v7, (t3)
+ret
+endfunc
+
 const ff_h264_scan8
 .byte   014, 015, 024, 025, 016, 017, 026, 027
 .byte   034, 035, 044, 045, 036, 037, 046, 047
@@ -34,6 +111,7 @@ endconst
 #if (__riscv_xlen == 64)
 .irpdepth, 8
 func ff_h264_idct_add16_\depth\()_rvv, zve32x
+csrwi   vxrm, 0
 addisp, sp, -80
 lla t0, ff_h264_scan8
 sd  s0,   (sp)
@@ -83,7 +161,7 @@ func ff_h264_idct_add16_\depth\()_rvv, zve32x
 callff_h264_idct_dc_add_\depth\()_c
 j   3f
 2:
-callff_h264_idct_add_\depth\()_c
+call.Lidct_add4_\depth\()_rvv
 3:
 srlis3, s3, 1
 addis5, s5, 4
@@ -104,6 +182,7 @@ func ff_h264_idct_add16_\depth\()_rvv, zve32x
 endfunc
 
 func ff_h264_idct_add16intra_\depth\()_rvv, zve32x
+csrwi   vxrm, 0
 addis