[FFmpeg-cvslog] swscale/aarch64: Add bgr24 to yuv
ffmpeg | branch: master | Zhao Zhili | Mon Jun 24 21:02:11 2024 +0800| [b8b71be07a7969e9c450bc7f61b7d6135f60e61c] | committer: Zhao Zhili swscale/aarch64: Add bgr24 to yuv Test on Apple M1 with kperf : -O3 : -O3 -fno-vectorize bgr24_to_uv_8_c : 28.5 : 52.5 bgr24_to_uv_8_neon : 54.5 : 59.7 bgr24_to_uv_128_c : 294.0 : 830.7 bgr24_to_uv_128_neon: 99.7 : 112.0 bgr24_to_uv_1080_c : 965.0 : 6624.0 bgr24_to_uv_1080_neon : 751.5 : 754.7 bgr24_to_uv_1920_c : 1693.2: 11554.5 bgr24_to_uv_1920_neon : 1292.5: 1307.5 bgr24_to_uv_half_8_c: 54.2 : 37.0 bgr24_to_uv_half_8_neon : 27.2 : 22.5 bgr24_to_uv_half_128_c : 127.2 : 392.5 bgr24_to_uv_half_128_neon : 63.0 : 52.0 bgr24_to_uv_half_1080_c : 880.2 : 3329.0 bgr24_to_uv_half_1080_neon : 401.5 : 390.7 bgr24_to_uv_half_1920_c : 1585.7: 6390.7 bgr24_to_uv_half_1920_neon : 694.7 : 698.7 bgr24_to_y_8_c : 21.7 : 22.5 bgr24_to_y_8_neon : 797.2 : 25.5 bgr24_to_y_128_c: 88.0 : 280.5 bgr24_to_y_128_neon : 63.7 : 55.0 bgr24_to_y_1080_c : 616.7 : 2208.7 bgr24_to_y_1080_neon: 900.0 : 452.0 bgr24_to_y_1920_c : 1093.2: 3894.7 bgr24_to_y_1920_neon: 777.2 : 767.5 Signed-off-by: Zhao Zhili > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b8b71be07a7969e9c450bc7f61b7d6135f60e61c --- libswscale/aarch64/input.S | 71 libswscale/aarch64/swscale.c | 32 2 files changed, 71 insertions(+), 32 deletions(-) diff --git a/libswscale/aarch64/input.S b/libswscale/aarch64/input.S index 33afa34111..2cfec4cb6a 100644 --- a/libswscale/aarch64/input.S +++ b/libswscale/aarch64/input.S @@ -20,7 +20,7 @@ #include "libavutil/aarch64/asm.S" -.macro rgb24_to_yuv_load_rgb, src +.macro rgb_to_yuv_load_rgb src ld3 { v16.16b, v17.16b, v18.16b }, [\src] uxtlv19.8h, v16.8b // v19: r uxtlv20.8h, v17.8b // v20: g @@ -30,7 +30,7 @@ uxtl2 v24.8h, v18.16b// v24: b .endm -.macro rgb24_to_yuv_product, r, g, b, dst1, dst2, dst, coef0, coef1, coef2, right_shift +.macro rgb_to_yuv_product r, g, b, dst1, dst2, dst, coef0, coef1, coef2, right_shift mov \dst1\().16b, v6.16b// dst1 = const_offset mov \dst2\().16b, v6.16b// dst2 = const_offset smlal \dst1\().4s, \coef0\().4h, \r\().4h // dst1 += rx * r @@ -43,12 +43,20 @@ sqshrn2 \dst\().8h, \dst2\().4s, \right_shift // dst_higher_half = dst2 >> right_shift .endm +function ff_bgr24ToY_neon, export=1 +cmp w4, #0 // check width > 0 +ldp w12, w11, [x5] // w12: ry, w11: gy +ldr w10, [x5, #8] // w10: by +b.gt4f +ret +endfunc + function ff_rgb24ToY_neon, export=1 cmp w4, #0 // check width > 0 ldp w10, w11, [x5] // w10: ry, w11: gy ldr w12, [x5, #8] // w12: by b.le3f - +4: mov w9, #256// w9 = 1 << (RGB2YUV_SHIFT - 7) movkw9, #8, lsl #16 // w9 += 32 << (RGB2YUV_SHIFT - 1) dup v6.4s, w9 // w9: const_offset @@ -59,9 +67,9 @@ function ff_rgb24ToY_neon, export=1 dup v2.8h, w12 b.lt2f 1: -rgb24_to_yuv_load_rgb x1 -rgb24_to_yuv_product v19, v20, v21, v25, v26, v16, v0, v1, v2, #9 -rgb24_to_yuv_product v22, v23, v24, v27, v28, v17, v0, v1, v2, #9 +rgb_to_yuv_load_rgb x1 +rgb_to_yuv_product v19, v20, v21, v25, v26, v16, v0, v1, v2, #9 +rgb_to_yuv_product v22, v23, v24, v27, v28, v17, v0, v1, v2, #9 sub w4, w4, #16 // width -= 16 add x1, x1, #48 // src += 48 cmp w4, #16 // width >= 16 ? @@ -85,10 +93,7 @@ function ff_rgb24ToY_neon, export=1 ret endfunc -.macro rgb24_load_uv_coeff half -ldp w10, w11, [x6, #12] // w10: ru, w11: gu -ldp w12, w13, [x6, #20] // w12: bu, w13: rv -ldp w14, w15, [x6, #28] // w14: gv, w15: bv +.macro rgb_set_uv_coeff half .if \half mov w9, #512 movkw9, #128, lsl #16
[FFmpeg-cvslog] swscale/aarch64: Add argb/abgr to yuv
ffmpeg | branch: master | Zhao Zhili | Mon Jun 24 21:02:13 2024 +0800| [4d90a76986cf5ef80266ce845679321b68e3412d] | committer: Zhao Zhili swscale/aarch64: Add argb/abgr to yuv Test on Apple M1 with kperf: : -O3 : -O3 -fno-vectorize abgr_to_uv_8_c : 19.4 : 26.1 abgr_to_uv_8_neon : 29.9 : 51.1 abgr_to_uv_128_c: 146.4 : 558.9 abgr_to_uv_128_neon : 85.1 : 83.4 abgr_to_uv_1080_c : 1162.6: 4786.4 abgr_to_uv_1080_neon: 819.6 : 826.6 abgr_to_uv_1920_c : 2063.6: 8492.1 abgr_to_uv_1920_neon: 1435.1: 1447.1 abgr_to_uv_half_8_c : 16.4 : 11.4 abgr_to_uv_half_8_neon : 35.6 : 20.4 abgr_to_uv_half_128_c : 108.6 : 359.4 abgr_to_uv_half_128_neon: 75.4 : 42.6 abgr_to_uv_half_1080_c : 883.4 : 2885.6 abgr_to_uv_half_1080_neon : 460.6 : 481.1 abgr_to_uv_half_1920_c : 1553.6: 5106.9 abgr_to_uv_half_1920_neon : 817.6 : 820.4 abgr_to_y_8_c : 6.1 : 26.4 abgr_to_y_8_neon: 40.6 : 6.4 abgr_to_y_128_c : 99.9 : 390.1 abgr_to_y_128_neon : 67.4 : 55.9 abgr_to_y_1080_c: 735.9 : 3170.4 abgr_to_y_1080_neon : 534.6 : 536.6 abgr_to_y_1920_c: 1279.4: 6016.4 abgr_to_y_1920_neon : 932.6 : 927.6 Signed-off-by: Zhao Zhili > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4d90a76986cf5ef80266ce845679321b68e3412d --- libswscale/aarch64/input.S | 86 +--- libswscale/aarch64/swscale.c | 17 + 2 files changed, 82 insertions(+), 21 deletions(-) diff --git a/libswscale/aarch64/input.S b/libswscale/aarch64/input.S index ce5b042371..5cb18711fb 100644 --- a/libswscale/aarch64/input.S +++ b/libswscale/aarch64/input.S @@ -34,6 +34,16 @@ uxtl2 v24.8h, v18.16b// v24: b .endm +.macro argb_to_yuv_load_rgb src +ld4 { v16.16b, v17.16b, v18.16b, v19.16b }, [\src] +uxtlv21.8h, v19.8b // v21: b +uxtl2 v24.8h, v19.16b// v24: b +uxtlv19.8h, v17.8b // v19: r +uxtlv20.8h, v18.8b // v20: g +uxtl2 v22.8h, v17.16b// v22: r +uxtl2 v23.8h, v18.16b// v23: g +.endm + .macro rgb_to_yuv_product r, g, b, dst1, dst2, dst, coef0, coef1, coef2, right_shift mov \dst1\().16b, v6.16b// dst1 = const_offset mov \dst2\().16b, v6.16b// dst2 = const_offset @@ -47,7 +57,7 @@ sqshrn2 \dst\().8h, \dst2\().4s, \right_shift // dst_higher_half = dst2 >> right_shift .endm -.macro rgbToY_neon fmt_bgr, fmt_rgb, element +.macro rgbToY_neon fmt_bgr, fmt_rgb, element, alpha_first=0 function ff_\fmt_bgr\()ToY_neon, export=1 cmp w4, #0 // check width > 0 ldp w12, w11, [x5] // w12: ry, w11: gy @@ -72,7 +82,11 @@ function ff_\fmt_rgb\()ToY_neon, export=1 dup v2.8h, w12 b.lt2f 1: +.if \alpha_first +argb_to_yuv_load_rgb x1 +.else rgb_to_yuv_load_rgb x1, \element +.endif rgb_to_yuv_product v19, v20, v21, v25, v26, v16, v0, v1, v2, #9 rgb_to_yuv_product v22, v23, v24, v27, v28, v17, v0, v1, v2, #9 sub w4, w4, #16 // width -= 16 @@ -82,9 +96,15 @@ function ff_\fmt_rgb\()ToY_neon, export=1 b.ge1b cbz x4, 3f 2: +.if \alpha_first +ldrbw13, [x1, #1] // w13: r +ldrbw14, [x1, #2] // w14: g +ldrbw15, [x1, #3] // w15: b +.else ldrbw13, [x1] // w13: r ldrbw14, [x1, #1] // w14: g ldrbw15, [x1, #2] // w15: b +.endif smaddl x13, w13, w10, x9 // x13 = ry * r + const_offset smaddl x13, w14, w11, x13 // x13 += gy * g @@ -103,6 +123,8 @@ rgbToY_neon bgr24, rgb24, element=3 rgbToY_neon bgra32, rgba32, element=4 +rgbToY_neon abgr32, argb32, element=4, alpha_first=1 + .macro rgb_set_uv_coeff half .if \half mov w9, #512 @@ -120,7 +142,21 @@ rgbToY_neon bgra32, rgba32, element=4 dup v6.4s, w9 .endm -.macro rgbToUV_half_neon fmt_bgr, fmt_rgb, element +.macro rgb_load_add_half off_r1, off_r2, off_g1, off_g2, off_b1, off_b2 +ldrbw2, [x3, #\off_
[FFmpeg-cvslog] swscale/aarch64: Add bgra/rgba to yuv
ffmpeg | branch: master | Zhao Zhili | Mon Jun 24 21:02:12 2024 +0800| [52422133ae9905fdd3c4845a41ac7af9a678b47a] | committer: Zhao Zhili swscale/aarch64: Add bgra/rgba to yuv Test on Apple M1 with kperf : -O3 : -O3 -fno-vectorize bgra_to_uv_8_c : 13.4 : 27.5 bgra_to_uv_8_neon : 37.4 : 41.7 bgra_to_uv_128_c: 155.9 : 550.2 bgra_to_uv_128_neon : 91.7 : 92.7 bgra_to_uv_1080_c : 1173.2: 4558.2 bgra_to_uv_1080_neon: 822.7 : 809.5 bgra_to_uv_1920_c : 2078.2: 8115.2 bgra_to_uv_1920_neon: 1437.7: 1438.7 bgra_to_uv_half_8_c : 17.9 : 14.2 bgra_to_uv_half_8_neon : 37.4 : 10.5 bgra_to_uv_half_128_c : 103.9 : 326.0 bgra_to_uv_half_128_neon: 73.9 : 68.7 bgra_to_uv_half_1080_c : 850.2 : 3732.0 bgra_to_uv_half_1080_neon : 484.2 : 490.0 bgra_to_uv_half_1920_c : 1479.2: 4942.7 bgra_to_uv_half_1920_neon : 824.2 : 824.7 bgra_to_y_8_c : 8.2 : 29.5 bgra_to_y_8_neon: 18.2 : 32.7 bgra_to_y_128_c : 101.4 : 361.5 bgra_to_y_128_neon : 74.9 : 73.7 bgra_to_y_1080_c: 739.4 : 3018.0 bgra_to_y_1080_neon : 613.4 : 544.2 bgra_to_y_1920_c: 1298.7: 5326.0 bgra_to_y_1920_neon : 918.7 : 934.2 Signed-off-by: Zhao Zhili > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=52422133ae9905fdd3c4845a41ac7af9a678b47a --- libswscale/aarch64/input.S | 68 +--- libswscale/aarch64/swscale.c | 16 +++ 2 files changed, 68 insertions(+), 16 deletions(-) diff --git a/libswscale/aarch64/input.S b/libswscale/aarch64/input.S index 2cfec4cb6a..ce5b042371 100644 --- a/libswscale/aarch64/input.S +++ b/libswscale/aarch64/input.S @@ -20,8 +20,12 @@ #include "libavutil/aarch64/asm.S" -.macro rgb_to_yuv_load_rgb src +.macro rgb_to_yuv_load_rgb src, element=3 +.if \element == 3 ld3 { v16.16b, v17.16b, v18.16b }, [\src] +.else +ld4 { v16.16b, v17.16b, v18.16b, v19.16b }, [\src] +.endif uxtlv19.8h, v16.8b // v19: r uxtlv20.8h, v17.8b // v20: g uxtlv21.8h, v18.8b // v21: b @@ -43,7 +47,8 @@ sqshrn2 \dst\().8h, \dst2\().4s, \right_shift // dst_higher_half = dst2 >> right_shift .endm -function ff_bgr24ToY_neon, export=1 +.macro rgbToY_neon fmt_bgr, fmt_rgb, element +function ff_\fmt_bgr\()ToY_neon, export=1 cmp w4, #0 // check width > 0 ldp w12, w11, [x5] // w12: ry, w11: gy ldr w10, [x5, #8] // w10: by @@ -51,7 +56,7 @@ function ff_bgr24ToY_neon, export=1 ret endfunc -function ff_rgb24ToY_neon, export=1 +function ff_\fmt_rgb\()ToY_neon, export=1 cmp w4, #0 // check width > 0 ldp w10, w11, [x5] // w10: ry, w11: gy ldr w12, [x5, #8] // w12: by @@ -67,11 +72,11 @@ function ff_rgb24ToY_neon, export=1 dup v2.8h, w12 b.lt2f 1: -rgb_to_yuv_load_rgb x1 +rgb_to_yuv_load_rgb x1, \element rgb_to_yuv_product v19, v20, v21, v25, v26, v16, v0, v1, v2, #9 rgb_to_yuv_product v22, v23, v24, v27, v28, v17, v0, v1, v2, #9 sub w4, w4, #16 // width -= 16 -add x1, x1, #48 // src += 48 +add x1, x1, #(16*\element) cmp w4, #16 // width >= 16 ? stp q16, q17, [x0], #32 // store to dst b.ge1b @@ -86,12 +91,17 @@ function ff_rgb24ToY_neon, export=1 smaddl x13, w15, w12, x13 // x13 += by * b asr w13, w13, #9// x13 >>= 9 sub w4, w4, #1 // width-- -add x1, x1, #3 // src += 3 +add x1, x1, #\element strhw13, [x0], #2 // store to dst cbnzw4, 2b 3: ret endfunc +.endm + +rgbToY_neon bgr24, rgb24, element=3 + +rgbToY_neon bgra32, rgba32, element=4 .macro rgb_set_uv_coeff half .if \half @@ -110,7 +120,8 @@ endfunc dup v6.4s, w9 .endm -function ff_bgr24ToUV_half_neon, export=1 +.macro rgbToUV_half_neon fmt_bgr, fmt_rgb, element +function ff_\fmt_bgr\()ToUV_half_neon, export=1 cmp w5, #0 // check width > 0 b.le
[FFmpeg-cvslog] lavc/libx264: minor format fix
ffmpeg | branch: master | Jun Zhao | Sat Jun 29 09:37:52 2024 +0800| [25a7dcf06916b6b55789abf801ccbc77859da9e2] | committer: Jun Zhao lavc/libx264: minor format fix Remove redundant semicolons Signed-off-by: Jun Zhao > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=25a7dcf06916b6b55789abf801ccbc77859da9e2 --- libavcodec/libx264.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index 8186f68aec..d07a65a103 100644 --- a/libavcodec/libx264.c +++ b/libavcodec/libx264.c @@ -725,7 +725,7 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame, /* SSE = MSE * width * height / scale -> because of possible chroma downsampling */ sse[i] = (int64_t)floor(mse * plane_size + .5); -}; +} errors = sse; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavf/scdet: minor fix
ffmpeg | branch: master | Jun Zhao | Sat Jun 29 09:40:53 2024 +0800| [03c2e9d77eace004db4579116e0141c496862895] | committer: Jun Zhao lavf/scdet: minor fix Change dbl to i64 for bool type Reviewed-by: Michael Niedermayer Signed-off-by: Jun Zhao > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=03c2e9d77eace004db4579116e0141c496862895 --- libavfilter/vf_scdet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavfilter/vf_scdet.c b/libavfilter/vf_scdet.c index 15399cfebf..705bdf9777 100644 --- a/libavfilter/vf_scdet.c +++ b/libavfilter/vf_scdet.c @@ -53,8 +53,8 @@ typedef struct SCDetContext { static const AVOption scdet_options[] = { { "threshold", "set scene change detect threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., V|F }, { "t", "set scene change detect threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., V|F }, -{ "sc_pass", "Set the flag to pass scene change frames", OFFSET(sc_pass),AV_OPT_TYPE_BOOL, {.dbl = 0 },0,1, V|F }, -{ "s", "Set the flag to pass scene change frames", OFFSET(sc_pass),AV_OPT_TYPE_BOOL, {.dbl = 0 },0,1, V|F }, +{ "sc_pass", "Set the flag to pass scene change frames", OFFSET(sc_pass),AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0,1, V|F }, +{ "s", "Set the flag to pass scene change frames", OFFSET(sc_pass),AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0,1, V|F }, {NULL} }; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avutil/hwcontext_videotoolbox: Unset undefined values
ffmpeg | branch: master | Marvin Scholz | Mon May 20 03:12:01 2024 +0200| [1fa7554bd6cc35b008045d33f9e948a92e7b901c] | committer: Zhao Zhili avutil/hwcontext_videotoolbox: Unset undefined values When mapping AVFrame properties to the CVBuffer attachments, it is necessary to properly delete undefined attachments, else we can leave incorrect values in there guessed from VideoToolbox for example, leading to inconsistent results where the AVFrame and CVBuffer differ in metadata. Ref #10884 Signed-off-by: Zhao Zhili > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1fa7554bd6cc35b008045d33f9e948a92e7b901c --- libavutil/hwcontext_videotoolbox.c | 76 -- 1 file changed, 41 insertions(+), 35 deletions(-) diff --git a/libavutil/hwcontext_videotoolbox.c b/libavutil/hwcontext_videotoolbox.c index 9f82b104c3..0af2ab822f 100644 --- a/libavutil/hwcontext_videotoolbox.c +++ b/libavutil/hwcontext_videotoolbox.c @@ -342,8 +342,10 @@ static int vt_pixbuf_set_par(void *log_ctx, CFNumberRef num = NULL, den = NULL; AVRational avpar = src->sample_aspect_ratio; -if (avpar.num == 0) +if (avpar.num == 0) { +CVBufferRemoveAttachment(pixbuf, kCVImageBufferPixelAspectRatioKey); return 0; +} av_reduce(&avpar.num, &avpar.den, avpar.num, avpar.den, @@ -423,7 +425,10 @@ static int vt_pixbuf_set_chromaloc(void *log_ctx, kCVImageBufferChromaLocationTopFieldKey, loc, kCVAttachmentMode_ShouldPropagate); -} +} else +CVBufferRemoveAttachment( +pixbuf, +kCVImageBufferChromaLocationTopFieldKey); return 0; } @@ -534,52 +539,53 @@ static int vt_pixbuf_set_colorspace(void *log_ctx, Float32 gamma = 0; colormatrix = av_map_videotoolbox_color_matrix_from_av(src->colorspace); -if (!colormatrix && src->colorspace != AVCOL_SPC_UNSPECIFIED) -av_log(log_ctx, AV_LOG_WARNING, "Color space %s is not supported.\n", av_color_space_name(src->colorspace)); +if (colormatrix) +CVBufferSetAttachment(pixbuf, kCVImageBufferYCbCrMatrixKey, +colormatrix, kCVAttachmentMode_ShouldPropagate); +else { +CVBufferRemoveAttachment(pixbuf, kCVImageBufferYCbCrMatrixKey); +if (src->colorspace != AVCOL_SPC_UNSPECIFIED) +av_log(log_ctx, AV_LOG_WARNING, +"Color space %s is not supported.\n", +av_color_space_name(src->colorspace)); +} colorpri = av_map_videotoolbox_color_primaries_from_av(src->color_primaries); -if (!colorpri && src->color_primaries != AVCOL_PRI_UNSPECIFIED) -av_log(log_ctx, AV_LOG_WARNING, "Color primaries %s is not supported.\n", av_color_primaries_name(src->color_primaries)); +if (colorpri) +CVBufferSetAttachment(pixbuf, kCVImageBufferColorPrimariesKey, +colorpri, kCVAttachmentMode_ShouldPropagate); +else { +CVBufferRemoveAttachment(pixbuf, kCVImageBufferColorPrimariesKey); +if (src->color_primaries != AVCOL_SPC_UNSPECIFIED) +av_log(log_ctx, AV_LOG_WARNING, +"Color primaries %s is not supported.\n", +av_color_primaries_name(src->color_primaries)); +} colortrc = av_map_videotoolbox_color_trc_from_av(src->color_trc); -if (!colortrc && src->color_trc != AVCOL_TRC_UNSPECIFIED) -av_log(log_ctx, AV_LOG_WARNING, "Color transfer function %s is not supported.\n", av_color_transfer_name(src->color_trc)); +if (colortrc) +CVBufferSetAttachment(pixbuf, kCVImageBufferTransferFunctionKey, +colorpri, kCVAttachmentMode_ShouldPropagate); +else { +CVBufferRemoveAttachment(pixbuf, kCVImageBufferTransferFunctionKey); +if (src->color_trc != AVCOL_TRC_UNSPECIFIED) +av_log(log_ctx, AV_LOG_WARNING, +"Color transfer function %s is not supported.\n", +av_color_transfer_name(src->color_trc)); +} if (src->color_trc == AVCOL_TRC_GAMMA22) gamma = 2.2; else if (src->color_trc == AVCOL_TRC_GAMMA28) gamma = 2.8; -if (colormatrix) { -CVBufferSetAttachment( -pixbuf, -kCVImageBufferYCbCrMatrixKey, -colormatrix, -kCVAttachmentMode_ShouldPropagate); -} -if (colorpri) { -CVBufferSetAttachment( -pixbuf, -kCVImageBufferColorPrimariesKey, -colorpri, -kCVAttachmentMode_ShouldPropagate); -} -if (colortrc) { -CVBufferSetAttachment( -pixbuf, -kCVImageBufferTransferFunctionKey, -colortrc, -kCVAttachmentMode_ShouldPropagate); -} if (gamma != 0) { CFNumberRef gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma); -CVBufferSetAttachment( -pixbuf, -kCVImageBufferGammaLevelKey, -gamma_l
[FFmpeg-cvslog] avutil/hwcontext_videotoolbox: Update documentation
ffmpeg | branch: master | Marvin Scholz | Thu May 30 03:16:56 2024 +0200| [b4f9fcc63c29827f3bd2822f0d6ba6af098c9cb7] | committer: Zhao Zhili avutil/hwcontext_videotoolbox: Update documentation The documentation was not clear at all what specifically the function does, so it was left unspecified if it will unset or not touch attachments it could not map from the AVFrame. The documentation of the return value was wrong as well. Signed-off-by: Zhao Zhili > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b4f9fcc63c29827f3bd2822f0d6ba6af098c9cb7 --- libavutil/hwcontext_videotoolbox.h | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/libavutil/hwcontext_videotoolbox.h b/libavutil/hwcontext_videotoolbox.h index 600e9f2c8d..d35cfbb6c1 100644 --- a/libavutil/hwcontext_videotoolbox.h +++ b/libavutil/hwcontext_videotoolbox.h @@ -90,8 +90,15 @@ CFStringRef av_map_videotoolbox_color_primaries_from_av(enum AVColorPrimaries pr CFStringRef av_map_videotoolbox_color_trc_from_av(enum AVColorTransferCharacteristic trc); /** - * Update a CVPixelBufferRef's metadata to based on an AVFrame. - * Returns 0 if no known equivalent was found. + * Set CVPixelBufferRef's metadata based on an AVFrame. + * + * Sets/unsets the CVPixelBuffer attachments to match as closely as possible the + * AVFrame metadata. To prevent inconsistent attachments, the attachments for properties + * that could not be matched or are unspecified in the given AVFrame are unset. So if + * any attachments already covered by AVFrame metadata need to be set to a specific + * value, this should happen after calling this function. + * + * Returns < 0 in case of an error. */ int av_vt_pixbuf_set_attachments(void *log_ctx, CVPixelBufferRef pixbuf, const struct AVFrame *src); ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avutil/hwcontext_videotoolbox: Set CVBuffer CGColorSpace
ffmpeg | branch: master | Marvin Scholz | Mon May 20 03:12:01 2024 +0200| [cd9ceaef22ecc25278c771169d179dbfdb24a355] | committer: Zhao Zhili avutil/hwcontext_videotoolbox: Set CVBuffer CGColorSpace In addition to the other properties, try to obtain the right CGColorSpace and set it as well, else it could lead to a CVBuffer tagged as BT.2020 but with a CGColorSpace indicating BT.709. Therefore it is essential for consistency to set a colorspace according to the other values, or if none can be obtained (for example because the other values are all unspecified) unset it as well. Fix #10884 Signed-off-by: Zhao Zhili > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cd9ceaef22ecc25278c771169d179dbfdb24a355 --- libavutil/hwcontext_videotoolbox.c | 16 1 file changed, 16 insertions(+) diff --git a/libavutil/hwcontext_videotoolbox.c b/libavutil/hwcontext_videotoolbox.c index 0af2ab822f..953155ce32 100644 --- a/libavutil/hwcontext_videotoolbox.c +++ b/libavutil/hwcontext_videotoolbox.c @@ -535,6 +535,7 @@ CFStringRef av_map_videotoolbox_color_trc_from_av(enum AVColorTransferCharacteri static int vt_pixbuf_set_colorspace(void *log_ctx, CVPixelBufferRef pixbuf, const AVFrame *src) { +CGColorSpaceRef colorspace = NULL; CFStringRef colormatrix = NULL, colorpri = NULL, colortrc = NULL; Float32 gamma = 0; @@ -587,6 +588,21 @@ static int vt_pixbuf_set_colorspace(void *log_ctx, } else CVBufferRemoveAttachment(pixbuf, kCVImageBufferGammaLevelKey); +if (__builtin_available(macOS 10.8, iOS 10, *)) { +CFDictionaryRef attachments = CVBufferCopyAttachments(pixbuf, kCVAttachmentMode_ShouldPropagate); +if (attachments) { +colorspace = CVImageBufferCreateColorSpaceFromAttachments(attachments); +CFRelease(attachments); +} +} + +if (colorspace) { +CVBufferSetAttachment(pixbuf, kCVImageBufferCGColorSpaceKey, +colorspace, kCVAttachmentMode_ShouldPropagate); +CFRelease(colorspace); +} else +CVBufferRemoveAttachment(pixbuf, kCVImageBufferCGColorSpaceKey); + return 0; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavc/h264dsp: R-V V 8-bit h264_idct8_add4
ffmpeg | branch: master | Rémi Denis-Courmont | Mon Jul 1 23:41:37 2024 +0300| [e0eff64ed1e60d14391d55a91732ec612abf3f64] | committer: Rémi Denis-Courmont lavc/h264dsp: R-V V 8-bit h264_idct8_add4 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e0eff64ed1e60d14391d55a91732ec612abf3f64 --- libavcodec/riscv/h264dsp_init.c | 4 +++ libavcodec/riscv/h264idct_rvv.S | 70 + 2 files changed, 74 insertions(+) diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c index 7fc47929cf..6b9ffe1c9f 100644 --- a/libavcodec/riscv/h264dsp_init.c +++ b/libavcodec/riscv/h264dsp_init.c @@ -40,6 +40,9 @@ void ff_h264_idct_add16_8_rvv(uint8_t *dst, const int *blockoffset, void ff_h264_idct_add16intra_8_rvv(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[5 * 8]); +void ff_h264_idct8_add4_8_rvv(uint8_t *dst, const int *blockoffset, + int16_t *block, int stride, + const uint8_t nnzc[5 * 8]); extern int ff_startcode_find_candidate_rvb(const uint8_t *, int); extern int ff_startcode_find_candidate_rvv(const uint8_t *, int); @@ -63,6 +66,7 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, # if __riscv_xlen == 64 dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv; dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv; +dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv; # endif } dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv; diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S index 42a29ba336..7422942717 100644 --- a/libavcodec/riscv/h264idct_rvv.S +++ b/libavcodec/riscv/h264idct_rvv.S @@ -170,5 +170,75 @@ func ff_h264_idct_add16intra_\depth\()_rvv, zve32x addisp, sp, 80 ret endfunc + +func ff_h264_idct8_add4_\depth\()_rvv, zve32x +addisp, sp, -80 +lla t0, ff_h264_scan8 +sd s0, (sp) +li t1, 4 * 32 << (\depth > 8) +mv s0, sp +li t2, 4 +sd ra, 8(sp) +sd s1, 16(sp) +sd s2, 24(sp) +sd s3, 32(sp) +sd s4, 40(sp) +sd s5, 48(sp) +sd s6, 56(sp) +sd s7, 64(sp) +vsetivli zero, 4, e8, mf4, ta, ma +vlse8.v v8, (t0), t2 +vlse16.v v16, (a2), t1 +vluxei8.v v12, (a4), v8 +.if \depth == 8 +vsetvli zero, zero, e16, mf2, ta, ma +.else +vsetvli zero, zero, e32, m1, ta, ma +.endif +vmsne.vi v1, v16, 0 +vsetvli zero, zero, e8, mf4, ta, ma +vmseq.vi v2, v12, 1 +vmsne.vi v0, v12, 0 +vmand.mm v1, v1, v2 +vmv.x.s s2, v0 +vmv.x.s s3, v1 +li s1, 4 +mv s4, a0 +mv s5, a1 +mv s6, a2 +mv s7, a3 +1: +andit0, s2, 1 +addis1, s1, -1 +srlis2, s2, 1 +beqzt0, 3f # if (nnz) +lw t2, (s5) # block_offset[i] +andit1, s3, 1 +mv a1, s6 +mv a2, s7 +add a0, s4, t2 +beqzt1, 2f# if (nnz == 1 && block[i * 16]) +callff_h264_idct8_dc_add_\depth\()_c +j 3f +2: +callff_h264_idct8_add_\depth\()_c +3: +srlis3, s3, 1 +addis5, s5, 4 * 4 +addis6, s6, 4 * 16 * 2 << (\depth > 8) +bnezs1, 1b + +ld s7, 64(sp) +ld s6, 56(sp) +ld s5, 48(sp) +ld s4, 40(sp) +ld s3, 32(sp) +ld s2, 24(sp) +ld s1, 16(sp) +ld ra, 8(sp) +ld s0, 0(sp) +addisp, sp, 80 +ret +endfunc .endr #endif ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavc/h264dsp: R-V V 8-bit h264_idct_add16
ffmpeg | branch: master | Rémi Denis-Courmont | Mon Jul 1 23:41:37 2024 +0300| [30475c95ba50d40cf7605cb382bfd7852c825deb] | committer: Rémi Denis-Courmont lavc/h264dsp: R-V V 8-bit h264_idct_add16 While this *tends* to be faster than plain C, the performance numbers are all over the place, presuambly due to the conditional character of the main loop. Some additional micro-optimisations should be feasible after the underlying h264_idct_add and h264_idct_dc_add functions are also implemented. Then it will no longer be necesseray to stricly abide by the C ABI. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=30475c95ba50d40cf7605cb382bfd7852c825deb --- libavcodec/riscv/Makefile | 2 +- libavcodec/riscv/h264dsp_init.c | 8 +++ libavcodec/riscv/h264idct_rvv.S | 106 3 files changed, 115 insertions(+), 1 deletion(-) diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile index a1510e8c6e..92e1544e76 100644 --- a/libavcodec/riscv/Makefile +++ b/libavcodec/riscv/Makefile @@ -31,7 +31,7 @@ RVV-OBJS-$(CONFIG_H263DSP) += riscv/h263dsp_rvv.o OBJS-$(CONFIG_H264CHROMA) += riscv/h264_chroma_init_riscv.o RVV-OBJS-$(CONFIG_H264CHROMA) += riscv/h264_mc_chroma.o OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_init.o -RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_rvv.o +RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_rvv.o riscv/h264idct_rvv.o OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c index ab412a9924..064ee95578 100644 --- a/libavcodec/riscv/h264dsp_init.c +++ b/libavcodec/riscv/h264dsp_init.c @@ -34,6 +34,10 @@ void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride, void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); +void ff_h264_idct_add16_8_rvv(uint8_t *dst, const int *blockoffset, + int16_t *block, int stride, + const uint8_t nnzc[5 * 8]); + extern int ff_startcode_find_candidate_rvb(const uint8_t *, int); extern int ff_startcode_find_candidate_rvv(const uint8_t *, int); @@ -52,6 +56,10 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, dsp->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_8_rvv; dsp->h264_h_loop_filter_luma_mbaff = ff_h264_h_loop_filter_luma_mbaff_8_rvv; + +# if __riscv_xlen == 64 +dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv; +# endif } dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv; } diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S new file mode 100644 index 00..74083f8221 --- /dev/null +++ b/libavcodec/riscv/h264idct_rvv.S @@ -0,0 +1,106 @@ +/* + * Copyright © 2024 Rémi Denis-Courmont. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + *this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + *this list of conditions and the following disclaimer in the documentation + *and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "libavutil/riscv/asm.S" + +const ff_h264_scan8 +.byte 014, 015, 024, 025, 016, 017, 026, 027 +.byte 034, 035, 044, 045, 036, 037, 046, 047 +endconst + +#if (__riscv_xlen == 64) +.irpdepth, 8 +func ff_h264_idct_add16_\depth\()_rvv, zve32x +addisp, sp, -80 +lla t0, ff_h264_scan8 +sd s0, (sp) +li t1, 32 << (\depth > 8) +mv s0, sp +sd ra, 8(sp) +sd s1, 16(sp) +sd s2, 24(sp) +sd s3, 32(sp) +sd s4, 40(sp) +sd s5, 48
[FFmpeg-cvslog] lavc/h264dsp: R-V V 8-bit h264_idct_add16intra
ffmpeg | branch: master | Rémi Denis-Courmont | Mon Jul 1 23:41:37 2024 +0300| [d1f0c1fbf8db8dffd514c706905a2a8b8e986cb4] | committer: Rémi Denis-Courmont lavc/h264dsp: R-V V 8-bit h264_idct_add16intra > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d1f0c1fbf8db8dffd514c706905a2a8b8e986cb4 --- libavcodec/riscv/h264dsp_init.c | 4 +++ libavcodec/riscv/h264idct_rvv.S | 68 + 2 files changed, 72 insertions(+) diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c index 064ee95578..7fc47929cf 100644 --- a/libavcodec/riscv/h264dsp_init.c +++ b/libavcodec/riscv/h264dsp_init.c @@ -37,6 +37,9 @@ void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride, void ff_h264_idct_add16_8_rvv(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[5 * 8]); +void ff_h264_idct_add16intra_8_rvv(uint8_t *dst, const int *blockoffset, + int16_t *block, int stride, + const uint8_t nnzc[5 * 8]); extern int ff_startcode_find_candidate_rvb(const uint8_t *, int); extern int ff_startcode_find_candidate_rvv(const uint8_t *, int); @@ -59,6 +62,7 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, # if __riscv_xlen == 64 dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv; +dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv; # endif } dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv; diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S index 74083f8221..42a29ba336 100644 --- a/libavcodec/riscv/h264idct_rvv.S +++ b/libavcodec/riscv/h264idct_rvv.S @@ -102,5 +102,73 @@ func ff_h264_idct_add16_\depth\()_rvv, zve32x addisp, sp, 80 ret endfunc + +func ff_h264_idct_add16intra_\depth\()_rvv, zve32x +addisp, sp, -80 +lla t0, ff_h264_scan8 +sd s0, (sp) +li t1, 32 << (\depth > 8) +mv s0, sp +sd ra, 8(sp) +sd s1, 16(sp) +sd s2, 24(sp) +sd s3, 32(sp) +sd s4, 40(sp) +sd s5, 48(sp) +sd s6, 56(sp) +sd s7, 64(sp) +vsetivli zero, 16, e8, m1, ta, ma +vle8.vv8, (t0) +vlse16.v v16, (a2), t1 +vluxei8.v v12, (a4), v8 +.if \depth == 8 +vsetvli zero, zero, e16, m2, ta, ma +.else +vsetvli zero, zero, e32, m4, ta, ma +.endif +vmsne.vi v1, v16, 0 +vsetvli zero, zero, e8, m1, ta, ma +vmsne.vi v0, v12, 0 +vsetvli zero, zero, e16, m2, ta, ma +vmv.x.s s2, v0 +vmv.x.s s3, v1 +li s1, 16 +mv s4, a0 +mv s5, a1 +mv s6, a2 +mv s7, a3 +1: +andit0, s2, 1 +addis1, s1, -1 +srlis2, s2, 1 +lw t2, (s5) # block_offset[i] +andit1, s3, 1 +mv a1, s6 +mv a2, s7 +add a0, s4, t2 +beqzt0, 2f # if (nnzc[scan8[i]]) +callff_h264_idct_add_\depth\()_c +j 3f +2: +beqzt1, 3f# if (block[i * 16]) +callff_h264_idct_dc_add_\depth\()_c +3: +srlis3, s3, 1 +addis5, s5, 4 +addis6, s6, 16 * 2 << (\depth > 8) +bnezs1, 1b + +ld s7, 64(sp) +ld s6, 56(sp) +ld s5, 48(sp) +ld s4, 40(sp) +ld s3, 32(sp) +ld s2, 24(sp) +ld s1, 16(sp) +ld ra, 8(sp) +ld s0, 0(sp) +addisp, sp, 80 +ret +endfunc .endr #endif ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avutil/timestamp: avoid possible FPE when 0 is passed to av_ts_make_time_string2()
ffmpeg | branch: release/7.0 | Marton Balint | Mon Jun 17 22:30:26 2024 +0200| [c75cabef94e0985d1aa3f8d5ea9f4f7b8795d5ab] | committer: Marton Balint avutil/timestamp: avoid possible FPE when 0 is passed to av_ts_make_time_string2() Signed-off-by: Marton Balint (cherry picked from commit 0d5e3f5a4034b6c9312b7c621e25aa4303a00b6f) > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c75cabef94e0985d1aa3f8d5ea9f4f7b8795d5ab --- libavutil/timestamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavutil/timestamp.c b/libavutil/timestamp.c index 2a3e3012a4..6c231a517d 100644 --- a/libavutil/timestamp.c +++ b/libavutil/timestamp.c @@ -24,7 +24,7 @@ char *av_ts_make_time_string2(char *buf, int64_t ts, AVRational tb) snprintf(buf, AV_TS_MAX_STRING_SIZE, "NOPTS"); } else { double val = av_q2d(tb) * ts; -double log = floor(log10(fabs(val))); +double log = (fpclassify(val) == FP_ZERO ? -INFINITY : floor(log10(fabs(val; int precision = (isfinite(log) && log < 0) ? -log + 5 : 6; int last = snprintf(buf, AV_TS_MAX_STRING_SIZE, "%.*f", precision, val); last = FFMIN(last, AV_TS_MAX_STRING_SIZE - 1) - 1; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavc/h264dsp: R-V V 8-bit h264_idct_add
ffmpeg | branch: master | Rémi Denis-Courmont | Tue Jul 2 22:03:07 2024 +0300| [f447189b0c8067edf54a16c8c6d5513b2de77276] | committer: Rémi Denis-Courmont lavc/h264dsp: R-V V 8-bit h264_idct_add T-Head C908 (cycles): h264_idct4_add_8bpp_c: 271.5 h264_idct4_add_8bpp_rvv_i32: 91.5 > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f447189b0c8067edf54a16c8c6d5513b2de77276 --- libavcodec/riscv/h264dsp_init.c | 2 + libavcodec/riscv/h264idct_rvv.S | 83 - 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c index 6b9ffe1c9f..f78ca3ea05 100644 --- a/libavcodec/riscv/h264dsp_init.c +++ b/libavcodec/riscv/h264dsp_init.c @@ -34,6 +34,7 @@ void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride, void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); +void ff_h264_idct_add_8_rvv(uint8_t *dst, int16_t *block, int stride); void ff_h264_idct_add16_8_rvv(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[5 * 8]); @@ -63,6 +64,7 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, dsp->h264_h_loop_filter_luma_mbaff = ff_h264_h_loop_filter_luma_mbaff_8_rvv; +dsp->h264_idct_add = ff_h264_idct_add_8_rvv; # if __riscv_xlen == 64 dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv; dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv; diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S index 7422942717..b36a7f7572 100644 --- a/libavcodec/riscv/h264idct_rvv.S +++ b/libavcodec/riscv/h264idct_rvv.S @@ -26,6 +26,83 @@ #include "libavutil/riscv/asm.S" +.macro sx rd, addr +#if (__riscv_xlen == 32) +sw \rd, \addr +#elif (__riscv_xlen == 64) +sd \rd, \addr +#else +sq \rd, \addr +#endif +.endm + +.variant_cc ff_h264_idct4_rvv +func ff_h264_idct4_rvv, zve32x +vsra.vi v5, v1, 1 +vsra.vi v7, v3, 1 +vadd.vv v8, v0, v2 # z0 +vsub.vv v9, v0, v2 # z1 +vsub.vv v10, v5, v3 # z2 +vadd.vv v11, v1, v7 # z3 +vadd.vv v1, v9, v10 +vsub.vv v2, v9, v10 +vadd.vv v0, v8, v11 +vsub.vv v3, v8, v11 +jr t0 +endfunc + +func ff_h264_idct_add_8_rvv, zve32x +csrwi vxrm, 0 +.Lidct_add4_8_rvv: +vsetivlizero, 4, e16, mf2, ta, ma +addit1, a1, 1 * 4 * 2 +vle16.v v0, (a1) +addit2, a1, 2 * 4 * 2 +vle16.v v1, (t1) +addit3, a1, 3 * 4 * 2 +vle16.v v2, (t2) +vle16.v v3, (t3) +jal t0, ff_h264_idct4_rvv +vse16.v v0, (a1) +vse16.v v1, (t1) +vse16.v v2, (t2) +vse16.v v3, (t3) +vlseg4e16.v v0, (a1) +.rept 256 / __riscv_xlen +sx zero, ((__riscv_xlen / 8) * \+)(a1) +.endr +jal t0, ff_h264_idct4_rvv +add t1, a0, a2 +vle8.v v4, (a0) +add t2, t1, a2 +vle8.v v5, (t1) +add t3, t2, a2 +vle8.v v6, (t2) +vle8.v v7, (t3) +.irpn,0,1,2,3 +vssra.viv\n, v\n, 6 +.endr +vsetvli zero, zero, e8, mf4, ta, ma +vwaddu.wv v0, v0, v4 +vwaddu.wv v1, v1, v5 +vwaddu.wv v2, v2, v6 +vwaddu.wv v3, v3, v7 +vsetvli zero, zero, e16, mf2, ta, ma +.irpn,0,1,2,3 +vmax.vx v\n, v\n, zero +.endr +vsetvli zero, zero, e8, mf4, ta, ma +vnclipu.wi v4, v0, 0 +vnclipu.wi v5, v1, 0 +vnclipu.wi v6, v2, 0 +vnclipu.wi v7, v3, 0 +vse8.v v4, (a0) +vse8.v v5, (t1) +vse8.v v6, (t2) +vse8.v v7, (t3) +ret +endfunc + const ff_h264_scan8 .byte 014, 015, 024, 025, 016, 017, 026, 027 .byte 034, 035, 044, 045, 036, 037, 046, 047 @@ -34,6 +111,7 @@ endconst #if (__riscv_xlen == 64) .irpdepth, 8 func ff_h264_idct_add16_\depth\()_rvv, zve32x +csrwi vxrm, 0 addisp, sp, -80 lla t0, ff_h264_scan8 sd s0, (sp) @@ -83,7 +161,7 @@ func ff_h264_idct_add16_\depth\()_rvv, zve32x callff_h264_idct_dc_add_\depth\()_c j 3f 2: -callff_h264_idct_add_\depth\()_c +call.Lidct_add4_\depth\()_rvv 3: srlis3, s3, 1 addis5, s5, 4 @@ -104,6 +182,7 @@ func ff_h264_idct_add16_\depth\()_rvv, zve32x endfunc func ff_h264_idct_add16intra_\depth\()_rvv, zve32x +csrwi vxrm, 0 addis