[FFmpeg-devel] [PATCH 1/6] aarch64/hevc: Simplify function prototypes by macro
From: Zhao Zhili --- libavcodec/aarch64/hevcdsp_init_aarch64.c | 66 +++ 1 file changed, 18 insertions(+), 48 deletions(-) diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c index a90da0246e..26bbc8750f 100644 --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c @@ -92,54 +92,24 @@ void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs); void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs); void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs); void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs); -void ff_hevc_put_hevc_qpel_h4_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, - intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_h6_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, - intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_h8_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, - intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_h12_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, - intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_h16_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, - intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_h32_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, - intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_uni_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, - ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, - int width); -void ff_hevc_put_hevc_qpel_uni_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, - ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, - int width); -void ff_hevc_put_hevc_qpel_uni_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, - ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, - int width); -void ff_hevc_put_hevc_qpel_uni_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, - ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t - my, int width); -void ff_hevc_put_hevc_qpel_uni_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, - ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t - my, int width); -void ff_hevc_put_hevc_qpel_uni_h32_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, - ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t - my, int width); -void ff_hevc_put_hevc_qpel_bi_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, -ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t -mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_bi_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, -ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t -mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_bi_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, -ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t -mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_bi_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, - ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t - mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_bi_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, - ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t - mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_bi_h32_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, - ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t - mx, intptr_t my, int width); + +#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
[FFmpeg-devel] [PATCH 3/6] aarch64/vvc: Add put_qpel_h_* and put_qpel_uni_h_*
From: Zhao Zhili Just share hevc implementation. checkasm --test=vvc_mc --benchmark: put_luma_h_8_4x4_c: 0.2 ( 1.00x) put_luma_h_8_4x4_neon: 0.2 ( 1.00x) put_luma_h_8_8x8_c: 1.0 ( 1.00x) put_luma_h_8_8x8_neon: 0.2 ( 4.33x) put_luma_h_8_16x16_c:3.2 ( 1.00x) put_luma_h_8_16x16_neon: 1.2 ( 2.63x) put_luma_h_8_32x32_c: 13.7 ( 1.00x) put_luma_h_8_32x32_neon: 4.0 ( 3.45x) put_luma_h_8_64x64_c: 48.2 ( 1.00x) put_luma_h_8_64x64_neon:15.7 ( 3.07x) put_luma_h_8_128x128_c:203.5 ( 1.00x) put_luma_h_8_128x128_neon: 62.0 ( 3.28x) put_uni_h_luma_8_4x4_c: 0.2 ( 1.00x) put_uni_h_luma_8_4x4_neon: 0.2 ( 1.00x) put_uni_h_luma_8_8x8_c: 1.5 ( 1.00x) put_uni_h_luma_8_8x8_neon: 0.2 ( 6.56x) put_uni_h_luma_8_16x16_c:5.7 ( 1.00x) put_uni_h_luma_8_16x16_neon: 1.2 ( 4.67x) put_uni_h_luma_8_32x32_c: 24.0 ( 1.00x) put_uni_h_luma_8_32x32_neon: 4.7 ( 5.07x) put_uni_h_luma_8_64x64_c: 90.0 ( 1.00x) put_uni_h_luma_8_64x64_neon:17.0 ( 5.30x) put_uni_h_luma_8_128x128_c:357.7 ( 1.00x) put_uni_h_luma_8_128x128_neon: 67.5 ( 5.30x) --- libavcodec/aarch64/h26x/dsp.h | 13 ++ libavcodec/aarch64/h26x/qpel_neon.S | 202 libavcodec/aarch64/vvc/Makefile | 1 + libavcodec/aarch64/vvc/dsp_init.c | 14 ++ 4 files changed, 171 insertions(+), 59 deletions(-) diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h index 902286872d..f72746ce03 100644 --- a/libavcodec/aarch64/h26x/dsp.h +++ b/libavcodec/aarch64/h26x/dsp.h @@ -235,4 +235,17 @@ NEON8_FNPROTO(qpel_bi_hv, (uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width), _i8mm); +#undef NEON8_FNPROTO_PARTIAL_4 +#define NEON8_FNPROTO_PARTIAL_4(fn, args, ext) \ +void ff_vvc_put_##fn##_h4_8_neon##ext args; \ +void ff_vvc_put_##fn##_h8_8_neon##ext args; \ +void ff_vvc_put_##fn##_h16_8_neon##ext args; \ +void ff_vvc_put_##fn##_h32_8_neon##ext args; + +NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, +const int8_t *hf, const int8_t *vf, int width),) + +NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, +ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width),) + #endif diff --git a/libavcodec/aarch64/h26x/qpel_neon.S b/libavcodec/aarch64/h26x/qpel_neon.S index 8ddaa32b70..a05009c9d6 100644 --- a/libavcodec/aarch64/h26x/qpel_neon.S +++ b/libavcodec/aarch64/h26x/qpel_neon.S @@ -21,7 +21,8 @@ */ #include "libavutil/aarch64/asm.S" -#define MAX_PB_SIZE 64 +#define HEVC_MAX_PB_SIZE 64 +#define VVC_MAX_PB_SIZE 128 const qpel_filters, align=4 .byte 0, 0, 0, 0, 0, 0, 0, 0 @@ -44,6 +45,11 @@ endconst sxtlv0.8h, v0.8b .endm +.macro vvc_load_filter m +ld1 {v0.8b}, [\m] +sxtlv0.8h, v0.8b +.endm + .macro load_qpel_filterb freg, xreg movrel \xreg, qpel_filters_abs add \xreg, \xreg, \freg, lsl #3 @@ -212,22 +218,40 @@ function ff_hevc_put_hevc_h4_8_neon, export=0 endfunc .endif +.ifnc \type, qpel_bi +function ff_vvc_put_\type\()_h4_8_neon, export=1 +vvc_load_filter mx +sub src, src, #3 +mov mx, x30 +.ifc \type, qpel +mov dststride, #(VVC_MAX_PB_SIZE << 1) +lsl x13, srcstride, #1 // srcstridel +mov x14, #(VVC_MAX_PB_SIZE << 2) +.else +lsl x14, dststride, #1 // dststridel +lsl x13, srcstride, #1 // srcstridel +.endif +b 1f +endfunc +.endif // !qpel_bi + function ff_hevc_put_hevc_\type\()_h4_8_neon, export=1 load_filter mx .ifc \type, qpel_bi -mov x16, #(MAX_PB_SIZE << 2) // src2bstridel -add x15, x4, #(MAX_PB_SIZE << 1) // src2b +mov x16, #(HEVC_MAX_PB_SIZE << 2) // src2bstridel +add x15, x4, #(HEVC_MAX_PB_SIZE << 1) // src2b .endif sub src, src, #3 mov mx, x30 .ifc \type, qpel -mov dststride,
[FFmpeg-devel] [PATCH 5/6] aarch64/vvc: Add put_qpel_hx i8mm
From: Zhao Zhili Benchmark on Android pixel 8 with -fno-vectorize put_luma_h_8_4x4_c: 0.2 ( 1.00x) put_luma_h_8_4x4_neon: 0.2 ( 1.00x) put_luma_h_8_4x4_i8mm: 0.0 ( 0.00x) put_luma_h_8_8x8_c: 1.5 ( 1.00x) put_luma_h_8_8x8_neon: 0.5 ( 3.00x) put_luma_h_8_8x8_i8mm: 0.5 ( 3.00x) put_luma_h_8_16x16_c:6.2 ( 1.00x) put_luma_h_8_16x16_neon: 2.0 ( 3.12x) put_luma_h_8_16x16_i8mm: 1.5 ( 4.17x) put_luma_h_8_32x32_c: 25.5 ( 1.00x) put_luma_h_8_32x32_neon: 9.0 ( 2.83x) put_luma_h_8_32x32_i8mm: 6.8 ( 3.78x) put_luma_h_8_64x64_c: 99.8 ( 1.00x) put_luma_h_8_64x64_neon:35.2 ( 2.83x) put_luma_h_8_64x64_i8mm:27.2 ( 3.66x) put_luma_h_8_128x128_c:422.0 ( 1.00x) put_luma_h_8_128x128_neon: 138.5 ( 3.05x) put_luma_h_8_128x128_i8mm: 109.2 ( 3.86x) --- libavcodec/aarch64/h26x/dsp.h | 4 ++ libavcodec/aarch64/h26x/qpel_neon.S | 68 ++--- libavcodec/aarch64/vvc/dsp_init.c | 9 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h index 076d01b477..323a253257 100644 --- a/libavcodec/aarch64/h26x/dsp.h +++ b/libavcodec/aarch64/h26x/dsp.h @@ -270,4 +270,8 @@ NEON8_FNPROTO_PARTIAL_6(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride, int height, int denom, int wx, int ox, const int8_t *hf, const int8_t *vf, int width),); +NEON8_FNPROTO_PARTIAL_6(qpel_h, (int16_t * dst, +const uint8_t *_src, ptrdiff_t _srcstride, int height, +const int8_t *hf, const int8_t *vf, int width), _i8mm); + #endif diff --git a/libavcodec/aarch64/h26x/qpel_neon.S b/libavcodec/aarch64/h26x/qpel_neon.S index 0585f03de9..8a372a76be 100644 --- a/libavcodec/aarch64/h26x/qpel_neon.S +++ b/libavcodec/aarch64/h26x/qpel_neon.S @@ -3518,6 +3518,17 @@ endfunc sub x1, x1, #3 .endm +.macro VVC_QPEL_H_HEADER +ld1r{v31.2d}, [x4] +sub x1, x1, #3 +.endm + +function ff_vvc_put_qpel_h4_8_neon_i8mm, export=1 +VVC_QPEL_H_HEADER +mov x10, #VVC_MAX_PB_SIZE * 2 +b 1f +endfunc + function ff_hevc_put_hevc_qpel_h4_8_neon_i8mm, export=1 QPEL_H_HEADER mov x10, #HEVC_MAX_PB_SIZE * 2 @@ -3574,6 +3585,12 @@ function ff_hevc_put_hevc_qpel_h6_8_neon_i8mm, export=1 ret endfunc +function ff_vvc_put_qpel_h8_8_neon_i8mm, export=1 +VVC_QPEL_H_HEADER +mov x10, #VVC_MAX_PB_SIZE * 2 +b 1f +endfunc + function ff_hevc_put_hevc_qpel_h8_8_neon_i8mm, export=1 QPEL_H_HEADER mov x10, #HEVC_MAX_PB_SIZE * 2 @@ -3658,6 +3675,12 @@ function ff_hevc_put_hevc_qpel_h12_8_neon_i8mm, export=1 ret endfunc +function ff_vvc_put_qpel_h16_8_neon_i8mm, export=1 +VVC_QPEL_H_HEADER +mov x10, #VVC_MAX_PB_SIZE * 2 +b 1f +endfunc + function ff_hevc_put_hevc_qpel_h16_8_neon_i8mm, export=1 QPEL_H_HEADER mov x10, #HEVC_MAX_PB_SIZE * 2 @@ -3748,6 +3771,13 @@ function ff_hevc_put_hevc_qpel_h24_8_neon_i8mm, export=1 ret endfunc +function ff_vvc_put_qpel_h32_8_neon_i8mm, export=1 +VVC_QPEL_H_HEADER +mov x10, #VVC_MAX_PB_SIZE * 2 +add x15, x0, #32 +b 1f +endfunc + function ff_hevc_put_hevc_qpel_h32_8_neon_i8mm, export=1 QPEL_H_HEADER mov x10, #HEVC_MAX_PB_SIZE * 2 @@ -3883,10 +3913,7 @@ function ff_hevc_put_hevc_qpel_h48_8_neon_i8mm, export=1 ret endfunc -function ff_hevc_put_hevc_qpel_h64_8_neon_i8mm, export=1 -QPEL_H_HEADER -sub x2, x2, #64 -1: +.macro put_qpel_h64_8_neon_i8mm ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x1], #64 ext v1.16b, v16.16b, v17.16b, #1 ext v2.16b, v16.16b, v17.16b, #2 @@ -3977,11 +4004,42 @@ function ff_hevc_put_hevc_qpel_h64_8_neon_i8mm, export=1 sqxtn2 v20.8h, v26.4s sqxtn v21.4h, v23.4s sqxtn2 v21.8h, v27.4s -stp q20, q21, [x0], #32 +stp q20, q21, [x0] +add x0, x0, x10 +.endm + +function ff_vvc_put_qpel_h64_8_neon_i8mm, export=1 +VVC_QPEL_H_HEADER +mov x10, #(VVC_MAX_PB_SIZE * 2 - 32 * 3) +
[FFmpeg-devel] [PATCH 4/6] aarch64/vvc: Add put_pel/put_pel_uni/put_pel_uni_w
From: Zhao Zhili put_luma_pixels_8_4x4_c: 0.2 ( 1.00x) put_luma_pixels_8_4x4_neon: 0.2 ( 1.00x) put_luma_pixels_8_8x8_c: 0.7 ( 1.00x) put_luma_pixels_8_8x8_neon: 0.2 ( 3.22x) put_luma_pixels_8_16x16_c: 2.2 ( 1.00x) put_luma_pixels_8_16x16_neon:0.2 ( 9.89x) put_luma_pixels_8_32x32_c: 8.2 ( 1.00x) put_luma_pixels_8_32x32_neon:1.2 ( 6.71x) put_luma_pixels_8_64x64_c: 33.7 ( 1.00x) put_luma_pixels_8_64x64_neon:2.5 (13.63x) put_luma_pixels_8_128x128_c: 145.5 ( 1.00x) put_luma_pixels_8_128x128_neon: 10.2 (14.23x) put_uni_pixels_luma_8_4x4_c: 0.5 ( 1.00x) put_uni_pixels_luma_8_4x4_neon: 0.0 ( 0.00x) put_uni_pixels_luma_8_8x8_c: 0.5 ( 1.00x) put_uni_pixels_luma_8_8x8_neon: 0.2 ( 2.11x) put_uni_pixels_luma_8_16x16_c: 1.2 ( 1.00x) put_uni_pixels_luma_8_16x16_neon:0.2 ( 5.44x) put_uni_pixels_luma_8_32x32_c: 3.0 ( 1.00x) put_uni_pixels_luma_8_32x32_neon:0.5 ( 6.26x) put_uni_pixels_luma_8_64x64_c: 3.0 ( 1.00x) put_uni_pixels_luma_8_64x64_neon:1.7 ( 1.72x) put_uni_pixels_luma_8_128x128_c: 6.5 ( 1.00x) put_uni_pixels_luma_8_128x128_neon: 6.5 ( 1.00x) --- libavcodec/aarch64/h26x/dsp.h | 22 libavcodec/aarch64/h26x/epel_neon.S | 193 +--- libavcodec/aarch64/h26x/qpel_neon.S | 83 +++- libavcodec/aarch64/vvc/Makefile | 1 + libavcodec/aarch64/vvc/dsp_init.c | 21 +++ 5 files changed, 245 insertions(+), 75 deletions(-) diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h index f72746ce03..076d01b477 100644 --- a/libavcodec/aarch64/h26x/dsp.h +++ b/libavcodec/aarch64/h26x/dsp.h @@ -248,4 +248,26 @@ NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _src NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width),) +#undef NEON8_FNPROTO_PARTIAL_6 +#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \ +void ff_vvc_put_##fn##4_8_neon##ext args; \ +void ff_vvc_put_##fn##8_8_neon##ext args; \ +void ff_vvc_put_##fn##16_8_neon##ext args; \ +void ff_vvc_put_##fn##32_8_neon##ext args; \ +void ff_vvc_put_##fn##64_8_neon##ext args; \ +void ff_vvc_put_##fn##128_8_neon##ext args + +NEON8_FNPROTO_PARTIAL_6(pel_pixels, (int16_t *dst, +const uint8_t *src, ptrdiff_t srcstride, int height, +const int8_t *hf, const int8_t *vf, int width),); + +NEON8_FNPROTO_PARTIAL_6(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride, +const uint8_t *_src, ptrdiff_t _srcstride, int height, +const int8_t *hf, const int8_t *vf, int width),); + +NEON8_FNPROTO_PARTIAL_6(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride, +const uint8_t *_src, ptrdiff_t _srcstride, +int height, int denom, int wx, int ox, +const int8_t *hf, const int8_t *vf, int width),); + #endif diff --git a/libavcodec/aarch64/h26x/epel_neon.S b/libavcodec/aarch64/h26x/epel_neon.S index 378b0f7fb2..729395f2f0 100644 --- a/libavcodec/aarch64/h26x/epel_neon.S +++ b/libavcodec/aarch64/h26x/epel_neon.S @@ -19,7 +19,8 @@ */ #include "libavutil/aarch64/asm.S" -#define MAX_PB_SIZE 64 +#define HEVC_MAX_PB_SIZE 64 +#define VVC_MAX_PB_SIZE 128 const epel_filters, align=4 .byte 0, 0, 0, 0 @@ -131,8 +132,13 @@ endconst b.ne1b .endm +function ff_vvc_put_pel_pixels4_8_neon, export=1 +mov x7, #(VVC_MAX_PB_SIZE * 2) +b 1f +endfunc + function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1 -mov x7, #(MAX_PB_SIZE * 2) +mov x7, #(HEVC_MAX_PB_SIZE * 2) 1: ld1 {v0.s}[0], [x1], x2 ushll v4.8h, v0.8b, #6 subsw3, w3, #1 @@ -142,7 +148,7 @@ function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1 endfunc function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1 -mov x7, #(MAX_PB_SIZE * 2 - 8) +mov x7, #(HEVC_MAX_PB_SIZE * 2 - 8) 1: ld1 {v0.8b}, [x1], x2 ushll v4.8h, v0.8b, #6 st1 {v4.d}[0], [x0], #8 @@ -152,8 +158,13 @@ function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1 ret endfunc +function ff_vvc_put_pel_pixels8_8_neon, export=1 +mov x7, #(VVC_MAX_PB_SIZE * 2) +
[FFmpeg-devel] [PATCH 6/6] avcodec/hevc: ff_hevc_(qpel/epel)_filters are signed type
From: Zhao Zhili --- libavcodec/hevc/dsp_template.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/hevc/dsp_template.c b/libavcodec/hevc/dsp_template.c index aebccd1a0c..a0f79c2673 100644 --- a/libavcodec/hevc/dsp_template.c +++ b/libavcodec/hevc/dsp_template.c @@ -302,8 +302,8 @@ IDCT_DC(32) #define ff_hevc_pel_filters ff_hevc_qpel_filters #define DECL_HV_FILTER(f) \ -const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \ -const uint8_t *vf = ff_hevc_ ## f ## _filters[my]; +const int8_t *hf = ff_hevc_ ## f ## _filters[mx]; \ +const int8_t *vf = ff_hevc_ ## f ## _filters[my]; #define FW_PUT(p, f, t) \ static void FUNC(put_hevc_## f)(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height,\ -- 2.42.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/6] aarch64/hevc: Move epel/qpel to h26x directory
From: Zhao Zhili So vvc can reuse the implementation. --- libavcodec/aarch64/Makefile | 4 +- libavcodec/aarch64/h26x/dsp.h | 198 ++ .../{hevcdsp_epel_neon.S => h26x/epel_neon.S} | 0 .../{hevcdsp_qpel_neon.S => h26x/qpel_neon.S} | 0 libavcodec/aarch64/hevcdsp_init_aarch64.c | 197 - 5 files changed, 200 insertions(+), 199 deletions(-) rename libavcodec/aarch64/{hevcdsp_epel_neon.S => h26x/epel_neon.S} (100%) rename libavcodec/aarch64/{hevcdsp_qpel_neon.S => h26x/qpel_neon.S} (100%) diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile index a01e665b55..9affb92789 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile @@ -71,6 +71,6 @@ NEON-OBJS-$(CONFIG_VP9_DECODER) += aarch64/vp9itxfm_16bpp_neon.o \ NEON-OBJS-$(CONFIG_HEVC_DECODER)+= aarch64/hevcdsp_deblock_neon.o \ aarch64/hevcdsp_idct_neon.o \ aarch64/hevcdsp_init_aarch64.o \ - aarch64/hevcdsp_qpel_neon.o \ - aarch64/hevcdsp_epel_neon.o \ + aarch64/h26x/epel_neon.o \ + aarch64/h26x/qpel_neon.o \ aarch64/h26x/sao_neon.o diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h index d3f7a4dfe3..902286872d 100644 --- a/libavcodec/aarch64/h26x/dsp.h +++ b/libavcodec/aarch64/h26x/dsp.h @@ -37,4 +37,202 @@ void ff_vvc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdi const int16_t *sao_offset_val, int eo, int width, int height); void ff_vvc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height); + +#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \ +void ff_hevc_put_hevc_##fn##_h4_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##_h6_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##_h8_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##_h12_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##_h16_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##_h32_8_neon##ext args; + +NEON8_FNPROTO_PARTIAL_6(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, +intptr_t mx, intptr_t my, int width),) + +NEON8_FNPROTO_PARTIAL_6(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, +ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width),) + +NEON8_FNPROTO_PARTIAL_6(qpel_bi, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, +ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t +mx, intptr_t my, int width),) + +#define NEON8_FNPROTO(fn, args, ext) \ +void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##6_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##12_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##24_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##48_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##64_8_neon##ext args + +#define NEON8_FNPROTO_PARTIAL_4(fn, args, ext) \ +void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##64_8_neon##ext args + +#define NEON8_FNPROTO_PARTIAL_5(fn, args, ext) \ +void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \ +void ff_hevc_put_hevc_##fn##64_8_neon##ext args + +NEON8_FNPROTO(pel_pixels, (int16_t *dst, +const uint8_t *src, ptrdiff_t srcstride, +int height, intptr_t mx, intptr_t my, int width),); + +NEON8_FNPROTO(pel_bi_pixels, (uint8_t *dst, ptrdiff_t dststride, +const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, +int height, intptr_t mx, intptr_t my, int width),); + +NEON8_FNPROTO(epel_bi_h, (uint8_t *dst, ptrdiff_t dststride, +const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2, +int height, intptr_t mx, intptr_t my, int width),); + +NEON8_FNPROTO(epel_bi_v, (uint8_t *dst, ptrdiff_t dststride, +const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2, +int height, intptr_t mx, intptr_t my, int width),); + +NEON8_FNPROTO(epel_bi_hv, (uint8_t *dst, ptrdiff_t dststride, +const uint8_
Re: [FFmpeg-devel] [PATCH] libavcodec: implementation of DNxUncompressed decoder
On Thu, Sep 05, 2024 at 04:35:04AM +0200, Martin Schitter wrote: > This is a second attempt to contribute the corrected code of an > AVID DNxUncompressed / SMTPE RDD 50 decoder. > > Thanks > Martin > > --- > Changelog | 1 + > doc/general_contents.texi | 1 + > libavcodec/Makefile | 1 + > libavcodec/allcodecs.c| 1 + > libavcodec/codec_desc.c | 7 + > libavcodec/codec_id.h | 1 + > libavcodec/dnxucdec.c | 495 ++ > libavcodec/parsers.c | 1 + > libavcodec/version.c | 2 +- > libavcodec/version.h | 2 +- > libavformat/mxf.c | 1 + > libavformat/mxfdec.c | 21 ++ > 12 files changed, 532 insertions(+), 2 deletions(-) > create mode 100644 libavcodec/dnxucdec.c fails build with --enable-small libavcodec/dnxucdec.c:489:22: error: expected ‘}’ before string constant 489 | CODEC_LONG_NAME()"DNxUncompressed (SMPTE RDD 50)", | ^~~~ libavcodec/dnxucdec.c:487:34: note: to match this ‘{’ 487 | const FFCodec ff_dnxuc_decoder = { | ^ libavcodec/dnxucdec.c:422:12: warning: ‘dnxuc_decode_frame’ defined but not used [-Wunused-function] 422 | static int dnxuc_decode_frame(AVCodecContext *avctx, AVFrame *frame, |^~ libavcodec/dnxucdec.c:119:20: warning: ‘dnxuc_decode_init’ defined but not used [-Wunused-function] 119 | static av_cold int dnxuc_decode_init(AVCodecContext *avctx){ |^ make: *** [ffbuild/common.mak:81: libavcodec/dnxucdec.o] Error 1 make: *** Waiting for unfinished jobs thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB It is a danger to trust the dream we wish for rather than the science we have, -- Dr. Kenneth Brown signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] lavu/opt: av_opt_set_array: fix uninitialised return
In one failure path for av_opt_set_array, the ret variable was declared again, shadowing the outer one and writing the return value to the wrong one and then after the goto returning the uninitialized one instead. Introduced in 450a3f58edb22d28912a5e65dc08d9e2fb805066 Fixes: CID1619242 Uninitialized scalar variable --- libavutil/opt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/libavutil/opt.c b/libavutil/opt.c index d2af76478c..81fc1c5994 100644 --- a/libavutil/opt.c +++ b/libavutil/opt.c @@ -2368,7 +2368,6 @@ int av_opt_set_array(void *obj, const char *name, int search_flags, val_type == AV_OPT_TYPE_FLOAT|| val_type == AV_OPT_TYPE_DOUBLE || val_type == AV_OPT_TYPE_RATIONAL) { -int ret; switch (val_type) { case AV_OPT_TYPE_INT: intnum = *(int*)src; break; base-commit: c0666d8bedfb8bd242ea2a9fe2bd3e5a1addc0a5 -- 2.39.3 (Apple Git-146) ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] configure: enable warnings for shadowed variables
These can easily lead to incredibly confusing errors, and should practically never happen. I'd have loved to make this a -Werror even, but sadly there is way too many instances in the codebase right now that first needs to be weeded out. --- configure | 1 + 1 file changed, 1 insertion(+) diff --git a/configure b/configure index a8e67d230c..547b67565d 100755 --- a/configure +++ b/configure @@ -7406,6 +7406,7 @@ check_cflags -Wundef check_cflags -Wmissing-prototypes check_cflags -Wstrict-prototypes check_cflags -Wempty-body +check_cflags -Wshadow if enabled extra_warnings; then check_cflags -Wcast-qual -- 2.44.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] configure: enable warnings for shadowed variables
On Sat, Sep 07, 2024 at 08:52:41PM +0200, Timo Rothenpieler wrote: > These can easily lead to incredibly confusing errors, and should > practically never happen. > I'd have loved to make this a -Werror even, but sadly there is way too > many instances in the codebase right now that first needs to be weeded > out. > --- > configure | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/configure b/configure > index a8e67d230c..547b67565d 100755 > --- a/configure > +++ b/configure > @@ -7406,6 +7406,7 @@ check_cflags -Wundef > check_cflags -Wmissing-prototypes > check_cflags -Wstrict-prototypes > check_cflags -Wempty-body > +check_cflags -Wshadow can you provide a list of warnings this produces ? but in principle this seems a good idea thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Does the universe only have a finite lifespan? No, its going to go on forever, its just that you wont like living in it. -- Hiranya Peiri signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] libavcodec/dnxucdec.c: fix displaced bracket
Sorry for this blocking tiny stupid error. I don't know, how it happend! martin --- libavcodec/dnxucdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/dnxucdec.c b/libavcodec/dnxucdec.c index 502e736..e9a2157 100644 --- a/libavcodec/dnxucdec.c +++ b/libavcodec/dnxucdec.c @@ -486,7 +486,7 @@ const AVCodecParser ff_dnxuc_parser = { const FFCodec ff_dnxuc_decoder = { .p.name = "dnxuc", -CODEC_LONG_NAME()"DNxUncompressed (SMPTE RDD 50)", +CODEC_LONG_NAME("DNxUncompressed (SMPTE RDD 50)"), .p.type = AVMEDIA_TYPE_VIDEO, .p.id = AV_CODEC_ID_DNXUC, .init = dnxuc_decode_init, -- 2.45.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH v5] avfilter: add XPSNR filter
On Wed, Aug 28, 2024 at 01:40:39PM +, Helmrich, Christian wrote: > Following up on this: attached a (final, in our view) v5. Changes over v3: > > > - cleanup and align to psnr filter > - add metadata > - add xpsnr tests for yuv and rgb will apply thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB If you think the mosad wants you dead since a long time then you are either wrong or dead since a long time. signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH v1] XPSNR: add maintainers
On Fri, Sep 06, 2024 at 03:11:01PM +, Helmrich, Christian wrote: > Hi, adding us XPSNR authors to MAINTAINERS, as requested/suggested in > > https://ffmpeg.org/pipermail/ffmpeg-devel/2024-September/332940.html [...] > MAINTAINERS |1 + > 1 file changed, 1 insertion(+) > f69e632b19924e0a25a0735386775488fadf921a xpsnr_add_maintainers.patch > From 6a020fc9279ab2fd66e6dd8596f566ee6578cb35 Mon Sep 17 00:00:00 2001 > From: Christian Helmrich > Date: Fri, 6 Sep 2024 17:00:00 +0200 > Subject: [PATCH v1] XPSNR: add maintainers > > Add XPSNR authors to MAINTAINERS > --- > > diff --git a/MAINTAINERS b/MAINTAINERS > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -344,6 +344,7 @@ >vf_readvitc.c Tobias Rapp (CC t.rapp at > noa-archive dot com) >vf_scale.c[2] Michael Niedermayer >vf_tonemap_opencl.c Ruiling Song > + vf_xpsnr.cChristian Helmrich, Christian Lehmann please add the status [1] or [2] thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Many things microsoft did are stupid, but not doing something just because microsoft did it is even more stupid. If everything ms did were stupid they would be bankrupt already. signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [RFC] 7.1 Release
On Sat, Aug 24, 2024 at 10:31:28PM +0200, Michael Niedermayer wrote: > On Wed, Aug 14, 2024 at 02:41:55PM +0200, Michael Niedermayer wrote: > > Hi all > > > > Are there any upcoming LTS releases that want to/could include FFmpeg 7.1 ? > > If so please reply here and list the date before which we would have to > > finish the 7.1 release so it can be included with no problems > > > > Otherwise, are there any preferrances of the general/approximate release > > date? > > I intend to branch 7.1 in the next 1-2 weeks and hopefully make the 7.1 > release then 1-2 weeks after that > > If you have release blocking issues (bugs, anything else you want to do > before) > open up an issue and set "Blocking: 7.1" on trac so it shows up here: > https://trac.ffmpeg.org/report/16 Nothing new on the "Release Blocking Issues" page on trac thus I intend to branch 7.1 in the next 24h release could be made from that branch 1-2 weeks after thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Some people wanted to paint the bikeshed green, some blue and some pink. People argued and fought, when they finally agreed, only rust was left. signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [FFmpeg-cvslog] lavc/vvc: Remove experimental flag
On Sat, Sep 7, 2024 at 12:49 AM Jean-Baptiste Kempf wrote: > > > On Fri, 6 Sep 2024, at 16:18, Frank Plowman wrote: > > ffmpeg | branch: master | Frank Plowman | Fri > > Aug 23 13:36:50 2024 +0100| [6df0c5f9f4c3261acf5b0efe43597b9eb765d6b6] > > | committer: Nuo Mi > > > > lavc/vvc: Remove experimental flag > > Congratulations. > It’s been a great collaboration between many community members. Special thanks to Andreas Rheinhardt, Eliny Huang, Frank Plowman, Haihao Xiang, James Almer, Jun Zhao, Kieran Kunhya, Stone Chen, Martin Storsjö, Michael Niedermayer, Wu Jianhua, and Zhao Zhili for their patch contributions. Thanks as well to you, Anton Khirnov, Benjamin Bross, Mark Thompson, and Ronald S. Bultje for the valuable code reviews and suggestions. > -- > Jean-Baptiste Kempf - President > +33 672 704 734 > https://jbkempf.com/ > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".