date:20240907

[FFmpeg-devel] [PATCH 1/6] aarch64/hevc: Simplify function prototypes by macro

2024-09-07 Thread Zhao Zhili

From: Zhao Zhili 

---
 libavcodec/aarch64/hevcdsp_init_aarch64.c | 66 +++
 1 file changed, 18 insertions(+), 48 deletions(-)

diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c 
b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index a90da0246e..26bbc8750f 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -92,54 +92,24 @@ void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
 void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
 void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
 void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs);
-void ff_hevc_put_hevc_qpel_h4_8_neon(int16_t *dst, const uint8_t *_src, 
ptrdiff_t _srcstride, int height,
- intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_hevc_qpel_h6_8_neon(int16_t *dst, const uint8_t *_src, 
ptrdiff_t _srcstride, int height,
- intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_hevc_qpel_h8_8_neon(int16_t *dst, const uint8_t *_src, 
ptrdiff_t _srcstride, int height,
- intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_hevc_qpel_h12_8_neon(int16_t *dst, const uint8_t *_src, 
ptrdiff_t _srcstride, int height,
-  intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_hevc_qpel_h16_8_neon(int16_t *dst, const uint8_t *_src, 
ptrdiff_t _srcstride, int height,
-  intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_hevc_qpel_h32_8_neon(int16_t *dst, const uint8_t *_src, 
ptrdiff_t _srcstride, int height,
-  intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_hevc_qpel_uni_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
- ptrdiff_t _srcstride, int height, 
intptr_t mx, intptr_t my,
- int width);
-void ff_hevc_put_hevc_qpel_uni_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
- ptrdiff_t _srcstride, int height, 
intptr_t mx, intptr_t my,
- int width);
-void ff_hevc_put_hevc_qpel_uni_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
- ptrdiff_t _srcstride, int height, 
intptr_t mx, intptr_t my,
- int width);
-void ff_hevc_put_hevc_qpel_uni_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
-  ptrdiff_t _srcstride, int height, 
intptr_t mx, intptr_t
-  my, int width);
-void ff_hevc_put_hevc_qpel_uni_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
-  ptrdiff_t _srcstride, int height, 
intptr_t mx, intptr_t
-  my, int width);
-void ff_hevc_put_hevc_qpel_uni_h32_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
-  ptrdiff_t _srcstride, int height, 
intptr_t mx, intptr_t
-  my, int width);
-void ff_hevc_put_hevc_qpel_bi_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
-ptrdiff_t _srcstride, const int16_t 
*src2, int height, intptr_t
-mx, intptr_t my, int width);
-void ff_hevc_put_hevc_qpel_bi_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
-ptrdiff_t _srcstride, const int16_t 
*src2, int height, intptr_t
-mx, intptr_t my, int width);
-void ff_hevc_put_hevc_qpel_bi_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
-ptrdiff_t _srcstride, const int16_t 
*src2, int height, intptr_t
-mx, intptr_t my, int width);
-void ff_hevc_put_hevc_qpel_bi_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
- ptrdiff_t _srcstride, const int16_t 
*src2, int height, intptr_t
- mx, intptr_t my, int width);
-void ff_hevc_put_hevc_qpel_bi_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
- ptrdiff_t _srcstride, const int16_t 
*src2, int height, intptr_t
- mx, intptr_t my, int width);
-void ff_hevc_put_hevc_qpel_bi_h32_8_neon(uint8_t *_dst, ptrdiff_t _dststride, 
const uint8_t *_src,
- ptrdiff_t _srcstride, const int16_t 
*src2, int height, intptr_t
- mx, intptr_t my, int width);
+
+#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \

[FFmpeg-devel] [PATCH 3/6] aarch64/vvc: Add put_qpel_h_* and put_qpel_uni_h_*

2024-09-07 Thread Zhao Zhili

From: Zhao Zhili 

Just share hevc implementation.

checkasm --test=vvc_mc --benchmark:

put_luma_h_8_4x4_c:  0.2 ( 1.00x)
put_luma_h_8_4x4_neon:   0.2 ( 1.00x)
put_luma_h_8_8x8_c:  1.0 ( 1.00x)
put_luma_h_8_8x8_neon:   0.2 ( 4.33x)
put_luma_h_8_16x16_c:3.2 ( 1.00x)
put_luma_h_8_16x16_neon: 1.2 ( 2.63x)
put_luma_h_8_32x32_c:   13.7 ( 1.00x)
put_luma_h_8_32x32_neon: 4.0 ( 3.45x)
put_luma_h_8_64x64_c:   48.2 ( 1.00x)
put_luma_h_8_64x64_neon:15.7 ( 3.07x)
put_luma_h_8_128x128_c:203.5 ( 1.00x)
put_luma_h_8_128x128_neon:  62.0 ( 3.28x)
put_uni_h_luma_8_4x4_c:  0.2 ( 1.00x)
put_uni_h_luma_8_4x4_neon:   0.2 ( 1.00x)
put_uni_h_luma_8_8x8_c:  1.5 ( 1.00x)
put_uni_h_luma_8_8x8_neon:   0.2 ( 6.56x)
put_uni_h_luma_8_16x16_c:5.7 ( 1.00x)
put_uni_h_luma_8_16x16_neon: 1.2 ( 4.67x)
put_uni_h_luma_8_32x32_c:   24.0 ( 1.00x)
put_uni_h_luma_8_32x32_neon: 4.7 ( 5.07x)
put_uni_h_luma_8_64x64_c:   90.0 ( 1.00x)
put_uni_h_luma_8_64x64_neon:17.0 ( 5.30x)
put_uni_h_luma_8_128x128_c:357.7 ( 1.00x)
put_uni_h_luma_8_128x128_neon:  67.5 ( 5.30x)
---
 libavcodec/aarch64/h26x/dsp.h   |  13 ++
 libavcodec/aarch64/h26x/qpel_neon.S | 202 
 libavcodec/aarch64/vvc/Makefile |   1 +
 libavcodec/aarch64/vvc/dsp_init.c   |  14 ++
 4 files changed, 171 insertions(+), 59 deletions(-)

diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h
index 902286872d..f72746ce03 100644
--- a/libavcodec/aarch64/h26x/dsp.h
+++ b/libavcodec/aarch64/h26x/dsp.h
@@ -235,4 +235,17 @@ NEON8_FNPROTO(qpel_bi_hv, (uint8_t *dst, ptrdiff_t 
dststride,
 const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
 int height, intptr_t mx, intptr_t my, int width), _i8mm);
 
+#undef NEON8_FNPROTO_PARTIAL_4
+#define NEON8_FNPROTO_PARTIAL_4(fn, args, ext) \
+void ff_vvc_put_##fn##_h4_8_neon##ext args;  \
+void ff_vvc_put_##fn##_h8_8_neon##ext args;  \
+void ff_vvc_put_##fn##_h16_8_neon##ext args; \
+void ff_vvc_put_##fn##_h32_8_neon##ext args;
+
+NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t 
_srcstride, int height,
+const int8_t *hf, const int8_t *vf, int width),)
+
+NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const 
uint8_t *_src,
+ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, 
int width),)
+
 #endif
diff --git a/libavcodec/aarch64/h26x/qpel_neon.S 
b/libavcodec/aarch64/h26x/qpel_neon.S
index 8ddaa32b70..a05009c9d6 100644
--- a/libavcodec/aarch64/h26x/qpel_neon.S
+++ b/libavcodec/aarch64/h26x/qpel_neon.S
@@ -21,7 +21,8 @@
  */
 
 #include "libavutil/aarch64/asm.S"
-#define MAX_PB_SIZE 64
+#define HEVC_MAX_PB_SIZE 64
+#define VVC_MAX_PB_SIZE 128
 
 const qpel_filters, align=4
 .byte   0,  0,  0,  0,  0,  0, 0,  0
@@ -44,6 +45,11 @@ endconst
 sxtlv0.8h, v0.8b
 .endm
 
+.macro vvc_load_filter m
+ld1 {v0.8b}, [\m]
+sxtlv0.8h, v0.8b
+.endm
+
 .macro load_qpel_filterb freg, xreg
 movrel  \xreg, qpel_filters_abs
 add \xreg, \xreg, \freg, lsl #3
@@ -212,22 +218,40 @@ function ff_hevc_put_hevc_h4_8_neon, export=0
 endfunc
 .endif
 
+.ifnc \type, qpel_bi
+function ff_vvc_put_\type\()_h4_8_neon, export=1
+vvc_load_filter mx
+sub src, src, #3
+mov mx, x30
+.ifc \type, qpel
+mov dststride, #(VVC_MAX_PB_SIZE << 1)
+lsl x13, srcstride, #1 // srcstridel
+mov x14, #(VVC_MAX_PB_SIZE << 2)
+.else
+lsl x14, dststride, #1 // dststridel
+lsl x13, srcstride, #1 // srcstridel
+.endif
+b   1f
+endfunc
+.endif // !qpel_bi
+
 function ff_hevc_put_hevc_\type\()_h4_8_neon, export=1
 load_filter mx
 .ifc \type, qpel_bi
-mov x16, #(MAX_PB_SIZE << 2) // src2bstridel
-add x15, x4, #(MAX_PB_SIZE << 1) // src2b
+mov x16, #(HEVC_MAX_PB_SIZE << 2) // src2bstridel
+add x15, x4, #(HEVC_MAX_PB_SIZE << 1) // src2b
 .endif
 sub src, src, #3
 mov mx, x30
 .ifc \type, qpel
-mov dststride,

[FFmpeg-devel] [PATCH 5/6] aarch64/vvc: Add put_qpel_hx i8mm

2024-09-07 Thread Zhao Zhili

From: Zhao Zhili 

Benchmark on Android pixel 8 with -fno-vectorize

put_luma_h_8_4x4_c:  0.2 ( 1.00x)
put_luma_h_8_4x4_neon:   0.2 ( 1.00x)
put_luma_h_8_4x4_i8mm:   0.0 ( 0.00x)
put_luma_h_8_8x8_c:  1.5 ( 1.00x)
put_luma_h_8_8x8_neon:   0.5 ( 3.00x)
put_luma_h_8_8x8_i8mm:   0.5 ( 3.00x)
put_luma_h_8_16x16_c:6.2 ( 1.00x)
put_luma_h_8_16x16_neon: 2.0 ( 3.12x)
put_luma_h_8_16x16_i8mm: 1.5 ( 4.17x)
put_luma_h_8_32x32_c:   25.5 ( 1.00x)
put_luma_h_8_32x32_neon: 9.0 ( 2.83x)
put_luma_h_8_32x32_i8mm: 6.8 ( 3.78x)
put_luma_h_8_64x64_c:   99.8 ( 1.00x)
put_luma_h_8_64x64_neon:35.2 ( 2.83x)
put_luma_h_8_64x64_i8mm:27.2 ( 3.66x)
put_luma_h_8_128x128_c:422.0 ( 1.00x)
put_luma_h_8_128x128_neon: 138.5 ( 3.05x)
put_luma_h_8_128x128_i8mm: 109.2 ( 3.86x)
---
 libavcodec/aarch64/h26x/dsp.h   |  4 ++
 libavcodec/aarch64/h26x/qpel_neon.S | 68 ++---
 libavcodec/aarch64/vvc/dsp_init.c   |  9 
 3 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h
index 076d01b477..323a253257 100644
--- a/libavcodec/aarch64/h26x/dsp.h
+++ b/libavcodec/aarch64/h26x/dsp.h
@@ -270,4 +270,8 @@ NEON8_FNPROTO_PARTIAL_6(pel_uni_w_pixels, (uint8_t *_dst, 
ptrdiff_t _dststride,
 int height, int denom, int wx, int ox,
 const int8_t *hf, const int8_t *vf, int width),);
 
+NEON8_FNPROTO_PARTIAL_6(qpel_h, (int16_t * dst,
+const uint8_t *_src, ptrdiff_t _srcstride, int height,
+const int8_t *hf, const int8_t *vf, int width), _i8mm);
+
 #endif
diff --git a/libavcodec/aarch64/h26x/qpel_neon.S 
b/libavcodec/aarch64/h26x/qpel_neon.S
index 0585f03de9..8a372a76be 100644
--- a/libavcodec/aarch64/h26x/qpel_neon.S
+++ b/libavcodec/aarch64/h26x/qpel_neon.S
@@ -3518,6 +3518,17 @@ endfunc
 sub x1, x1, #3
 .endm
 
+.macro VVC_QPEL_H_HEADER
+ld1r{v31.2d}, [x4]
+sub x1, x1, #3
+.endm
+
+function ff_vvc_put_qpel_h4_8_neon_i8mm, export=1
+VVC_QPEL_H_HEADER
+mov x10, #VVC_MAX_PB_SIZE * 2
+b   1f
+endfunc
+
 function ff_hevc_put_hevc_qpel_h4_8_neon_i8mm, export=1
 QPEL_H_HEADER
 mov x10, #HEVC_MAX_PB_SIZE * 2
@@ -3574,6 +3585,12 @@ function ff_hevc_put_hevc_qpel_h6_8_neon_i8mm, export=1
 ret
 endfunc
 
+function ff_vvc_put_qpel_h8_8_neon_i8mm, export=1
+VVC_QPEL_H_HEADER
+mov x10, #VVC_MAX_PB_SIZE * 2
+b   1f
+endfunc
+
 function ff_hevc_put_hevc_qpel_h8_8_neon_i8mm, export=1
 QPEL_H_HEADER
 mov x10, #HEVC_MAX_PB_SIZE * 2
@@ -3658,6 +3675,12 @@ function ff_hevc_put_hevc_qpel_h12_8_neon_i8mm, export=1
 ret
 endfunc
 
+function ff_vvc_put_qpel_h16_8_neon_i8mm, export=1
+VVC_QPEL_H_HEADER
+mov x10, #VVC_MAX_PB_SIZE * 2
+b   1f
+endfunc
+
 function ff_hevc_put_hevc_qpel_h16_8_neon_i8mm, export=1
 QPEL_H_HEADER
 mov x10, #HEVC_MAX_PB_SIZE * 2
@@ -3748,6 +3771,13 @@ function ff_hevc_put_hevc_qpel_h24_8_neon_i8mm, export=1
 ret
 endfunc
 
+function ff_vvc_put_qpel_h32_8_neon_i8mm, export=1
+VVC_QPEL_H_HEADER
+mov x10, #VVC_MAX_PB_SIZE * 2
+add x15, x0, #32
+b   1f
+endfunc
+
 function ff_hevc_put_hevc_qpel_h32_8_neon_i8mm, export=1
 QPEL_H_HEADER
 mov x10, #HEVC_MAX_PB_SIZE * 2
@@ -3883,10 +3913,7 @@ function ff_hevc_put_hevc_qpel_h48_8_neon_i8mm, export=1
 ret
 endfunc
 
-function ff_hevc_put_hevc_qpel_h64_8_neon_i8mm, export=1
-QPEL_H_HEADER
-sub x2, x2, #64
-1:
+.macro put_qpel_h64_8_neon_i8mm
 ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x1], #64
 ext v1.16b, v16.16b, v17.16b, #1
 ext v2.16b, v16.16b, v17.16b, #2
@@ -3977,11 +4004,42 @@ function ff_hevc_put_hevc_qpel_h64_8_neon_i8mm, export=1
 sqxtn2  v20.8h, v26.4s
 sqxtn   v21.4h, v23.4s
 sqxtn2  v21.8h, v27.4s
-stp q20, q21, [x0], #32
+stp q20, q21, [x0]
+add x0, x0, x10
+.endm
+
+function ff_vvc_put_qpel_h64_8_neon_i8mm, export=1
+VVC_QPEL_H_HEADER
+mov x10, #(VVC_MAX_PB_SIZE * 2 - 32 * 3)
+

[FFmpeg-devel] [PATCH 4/6] aarch64/vvc: Add put_pel/put_pel_uni/put_pel_uni_w

2024-09-07 Thread Zhao Zhili

From: Zhao Zhili 

put_luma_pixels_8_4x4_c: 0.2 ( 1.00x)
put_luma_pixels_8_4x4_neon:  0.2 ( 1.00x)
put_luma_pixels_8_8x8_c: 0.7 ( 1.00x)
put_luma_pixels_8_8x8_neon:  0.2 ( 3.22x)
put_luma_pixels_8_16x16_c:   2.2 ( 1.00x)
put_luma_pixels_8_16x16_neon:0.2 ( 9.89x)
put_luma_pixels_8_32x32_c:   8.2 ( 1.00x)
put_luma_pixels_8_32x32_neon:1.2 ( 6.71x)
put_luma_pixels_8_64x64_c:  33.7 ( 1.00x)
put_luma_pixels_8_64x64_neon:2.5 (13.63x)
put_luma_pixels_8_128x128_c:   145.5 ( 1.00x)
put_luma_pixels_8_128x128_neon: 10.2 (14.23x)
put_uni_pixels_luma_8_4x4_c: 0.5 ( 1.00x)
put_uni_pixels_luma_8_4x4_neon:  0.0 ( 0.00x)
put_uni_pixels_luma_8_8x8_c: 0.5 ( 1.00x)
put_uni_pixels_luma_8_8x8_neon:  0.2 ( 2.11x)
put_uni_pixels_luma_8_16x16_c:   1.2 ( 1.00x)
put_uni_pixels_luma_8_16x16_neon:0.2 ( 5.44x)
put_uni_pixels_luma_8_32x32_c:   3.0 ( 1.00x)
put_uni_pixels_luma_8_32x32_neon:0.5 ( 6.26x)
put_uni_pixels_luma_8_64x64_c:   3.0 ( 1.00x)
put_uni_pixels_luma_8_64x64_neon:1.7 ( 1.72x)
put_uni_pixels_luma_8_128x128_c: 6.5 ( 1.00x)
put_uni_pixels_luma_8_128x128_neon:  6.5 ( 1.00x)
---
 libavcodec/aarch64/h26x/dsp.h   |  22 
 libavcodec/aarch64/h26x/epel_neon.S | 193 +---
 libavcodec/aarch64/h26x/qpel_neon.S |  83 +++-
 libavcodec/aarch64/vvc/Makefile |   1 +
 libavcodec/aarch64/vvc/dsp_init.c   |  21 +++
 5 files changed, 245 insertions(+), 75 deletions(-)

diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h
index f72746ce03..076d01b477 100644
--- a/libavcodec/aarch64/h26x/dsp.h
+++ b/libavcodec/aarch64/h26x/dsp.h
@@ -248,4 +248,26 @@ NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t 
*_src, ptrdiff_t _src
 NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const 
uint8_t *_src,
 ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, 
int width),)
 
+#undef NEON8_FNPROTO_PARTIAL_6
+#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
+void ff_vvc_put_##fn##4_8_neon##ext args; \
+void ff_vvc_put_##fn##8_8_neon##ext args; \
+void ff_vvc_put_##fn##16_8_neon##ext args; \
+void ff_vvc_put_##fn##32_8_neon##ext args; \
+void ff_vvc_put_##fn##64_8_neon##ext args; \
+void ff_vvc_put_##fn##128_8_neon##ext args
+
+NEON8_FNPROTO_PARTIAL_6(pel_pixels, (int16_t *dst,
+const uint8_t *src, ptrdiff_t srcstride, int height,
+const int8_t *hf, const int8_t *vf, int width),);
+
+NEON8_FNPROTO_PARTIAL_6(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
+const uint8_t *_src, ptrdiff_t _srcstride, int height,
+const int8_t *hf, const int8_t *vf, int width),);
+
+NEON8_FNPROTO_PARTIAL_6(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
+const uint8_t *_src, ptrdiff_t _srcstride,
+int height, int denom, int wx, int ox,
+const int8_t *hf, const int8_t *vf, int width),);
+
 #endif
diff --git a/libavcodec/aarch64/h26x/epel_neon.S 
b/libavcodec/aarch64/h26x/epel_neon.S
index 378b0f7fb2..729395f2f0 100644
--- a/libavcodec/aarch64/h26x/epel_neon.S
+++ b/libavcodec/aarch64/h26x/epel_neon.S
@@ -19,7 +19,8 @@
  */
 
 #include "libavutil/aarch64/asm.S"
-#define MAX_PB_SIZE 64
+#define HEVC_MAX_PB_SIZE 64
+#define VVC_MAX_PB_SIZE 128
 
 const epel_filters, align=4
 .byte  0,  0,  0,  0
@@ -131,8 +132,13 @@ endconst
 b.ne1b
 .endm
 
+function ff_vvc_put_pel_pixels4_8_neon, export=1
+mov x7, #(VVC_MAX_PB_SIZE * 2)
+b   1f
+endfunc
+
 function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1
-mov x7, #(MAX_PB_SIZE * 2)
+mov x7, #(HEVC_MAX_PB_SIZE * 2)
 1:  ld1 {v0.s}[0], [x1], x2
 ushll   v4.8h, v0.8b, #6
 subsw3, w3, #1
@@ -142,7 +148,7 @@ function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1
 endfunc
 
 function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1
-mov x7, #(MAX_PB_SIZE * 2 - 8)
+mov x7, #(HEVC_MAX_PB_SIZE * 2 - 8)
 1:  ld1 {v0.8b}, [x1], x2
 ushll   v4.8h, v0.8b, #6
 st1 {v4.d}[0], [x0], #8
@@ -152,8 +158,13 @@ function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1
 ret
 endfunc
 
+function ff_vvc_put_pel_pixels8_8_neon, export=1
+mov x7, #(VVC_MAX_PB_SIZE * 2)
+

[FFmpeg-devel] [PATCH 6/6] avcodec/hevc: ff_hevc_(qpel/epel)_filters are signed type

2024-09-07 Thread Zhao Zhili

From: Zhao Zhili 

---
 libavcodec/hevc/dsp_template.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/hevc/dsp_template.c b/libavcodec/hevc/dsp_template.c
index aebccd1a0c..a0f79c2673 100644
--- a/libavcodec/hevc/dsp_template.c
+++ b/libavcodec/hevc/dsp_template.c
@@ -302,8 +302,8 @@ IDCT_DC(32)
 

 #define ff_hevc_pel_filters ff_hevc_qpel_filters
 #define DECL_HV_FILTER(f)  \
-const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
-const uint8_t *vf = ff_hevc_ ## f ## _filters[my];
+const int8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
+const int8_t *vf = ff_hevc_ ## f ## _filters[my];
 
 #define FW_PUT(p, f, t)
   \
 static void FUNC(put_hevc_## f)(int16_t *dst, const uint8_t *src, ptrdiff_t 
srcstride, int height,\
-- 
2.42.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 2/6] aarch64/hevc: Move epel/qpel to h26x directory

2024-09-07 Thread Zhao Zhili

From: Zhao Zhili 

So vvc can reuse the implementation.
---
 libavcodec/aarch64/Makefile   |   4 +-
 libavcodec/aarch64/h26x/dsp.h | 198 ++
 .../{hevcdsp_epel_neon.S => h26x/epel_neon.S} |   0
 .../{hevcdsp_qpel_neon.S => h26x/qpel_neon.S} |   0
 libavcodec/aarch64/hevcdsp_init_aarch64.c | 197 -
 5 files changed, 200 insertions(+), 199 deletions(-)
 rename libavcodec/aarch64/{hevcdsp_epel_neon.S => h26x/epel_neon.S} (100%)
 rename libavcodec/aarch64/{hevcdsp_qpel_neon.S => h26x/qpel_neon.S} (100%)

diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index a01e665b55..9affb92789 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -71,6 +71,6 @@ NEON-OBJS-$(CONFIG_VP9_DECODER) += 
aarch64/vp9itxfm_16bpp_neon.o   \
 NEON-OBJS-$(CONFIG_HEVC_DECODER)+= aarch64/hevcdsp_deblock_neon.o  
\
aarch64/hevcdsp_idct_neon.o 
\
aarch64/hevcdsp_init_aarch64.o  
\
-   aarch64/hevcdsp_qpel_neon.o 
\
-   aarch64/hevcdsp_epel_neon.o 
\
+   aarch64/h26x/epel_neon.o
\
+   aarch64/h26x/qpel_neon.o
\
aarch64/h26x/sao_neon.o
diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h
index d3f7a4dfe3..902286872d 100644
--- a/libavcodec/aarch64/h26x/dsp.h
+++ b/libavcodec/aarch64/h26x/dsp.h
@@ -37,4 +37,202 @@ void ff_vvc_sao_edge_filter_16x16_8_neon(uint8_t *dst, 
const uint8_t *src, ptrdi
  const int16_t *sao_offset_val, int 
eo, int width, int height);
 void ff_vvc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, 
ptrdiff_t stride_dst,
const int16_t *sao_offset_val, int eo, 
int width, int height);
+
+#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
+void ff_hevc_put_hevc_##fn##_h4_8_neon##ext args;  \
+void ff_hevc_put_hevc_##fn##_h6_8_neon##ext args;  \
+void ff_hevc_put_hevc_##fn##_h8_8_neon##ext args;  \
+void ff_hevc_put_hevc_##fn##_h12_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##_h16_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##_h32_8_neon##ext args;
+
+NEON8_FNPROTO_PARTIAL_6(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t 
_srcstride, int height,
+intptr_t mx, intptr_t my, int width),)
+
+NEON8_FNPROTO_PARTIAL_6(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const 
uint8_t *_src,
+ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int 
width),)
+
+NEON8_FNPROTO_PARTIAL_6(qpel_bi, (uint8_t *_dst, ptrdiff_t _dststride, const 
uint8_t *_src,
+ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
+mx, intptr_t my, int width),)
+
+#define NEON8_FNPROTO(fn, args, ext) \
+void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##6_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##12_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##24_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##48_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##64_8_neon##ext args
+
+#define NEON8_FNPROTO_PARTIAL_4(fn, args, ext) \
+void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##64_8_neon##ext args
+
+#define NEON8_FNPROTO_PARTIAL_5(fn, args, ext) \
+void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \
+void ff_hevc_put_hevc_##fn##64_8_neon##ext args
+
+NEON8_FNPROTO(pel_pixels, (int16_t *dst,
+const uint8_t *src, ptrdiff_t srcstride,
+int height, intptr_t mx, intptr_t my, int width),);
+
+NEON8_FNPROTO(pel_bi_pixels, (uint8_t *dst, ptrdiff_t dststride,
+const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+int height, intptr_t mx, intptr_t my, int width),);
+
+NEON8_FNPROTO(epel_bi_h, (uint8_t *dst, ptrdiff_t dststride,
+const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
+int height, intptr_t mx, intptr_t my, int width),);
+
+NEON8_FNPROTO(epel_bi_v, (uint8_t *dst, ptrdiff_t dststride,
+const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
+int height, intptr_t mx, intptr_t my, int width),);
+
+NEON8_FNPROTO(epel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
+const uint8_

Re: [FFmpeg-devel] [PATCH] libavcodec: implementation of DNxUncompressed decoder

2024-09-07 Thread Michael Niedermayer

On Thu, Sep 05, 2024 at 04:35:04AM +0200, Martin Schitter wrote:
> This is a second attempt to contribute the corrected code of an
> AVID DNxUncompressed / SMTPE RDD 50 decoder.
> 
> Thanks
> Martin
> 
> ---
>  Changelog |   1 +
>  doc/general_contents.texi |   1 +
>  libavcodec/Makefile   |   1 +
>  libavcodec/allcodecs.c|   1 +
>  libavcodec/codec_desc.c   |   7 +
>  libavcodec/codec_id.h |   1 +
>  libavcodec/dnxucdec.c | 495 ++
>  libavcodec/parsers.c  |   1 +
>  libavcodec/version.c  |   2 +-
>  libavcodec/version.h  |   2 +-
>  libavformat/mxf.c |   1 +
>  libavformat/mxfdec.c  |  21 ++
>  12 files changed, 532 insertions(+), 2 deletions(-)
>  create mode 100644 libavcodec/dnxucdec.c

fails build with --enable-small

libavcodec/dnxucdec.c:489:22: error: expected ‘}’ before string constant
  489 | CODEC_LONG_NAME()"DNxUncompressed (SMPTE RDD 50)",
  |  ^~~~
libavcodec/dnxucdec.c:487:34: note: to match this ‘{’
  487 | const FFCodec ff_dnxuc_decoder = {
  |  ^
libavcodec/dnxucdec.c:422:12: warning: ‘dnxuc_decode_frame’ defined but not 
used [-Wunused-function]
  422 | static int dnxuc_decode_frame(AVCodecContext *avctx, AVFrame *frame,
  |^~
libavcodec/dnxucdec.c:119:20: warning: ‘dnxuc_decode_init’ defined but not used 
[-Wunused-function]
  119 | static av_cold int dnxuc_decode_init(AVCodecContext *avctx){
  |^
make: *** [ffbuild/common.mak:81: libavcodec/dnxucdec.o] Error 1
make: *** Waiting for unfinished jobs

thx

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

It is a danger to trust the dream we wish for rather than
the science we have, -- Dr. Kenneth Brown


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] lavu/opt: av_opt_set_array: fix uninitialised return

2024-09-07 Thread Marvin Scholz

In one failure path for av_opt_set_array, the ret variable
was declared again, shadowing the outer one and writing the
return value to the wrong one and then after the goto returning
the uninitialized one instead.

Introduced in 450a3f58edb22d28912a5e65dc08d9e2fb805066

Fixes: CID1619242 Uninitialized scalar variable
---
 libavutil/opt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavutil/opt.c b/libavutil/opt.c
index d2af76478c..81fc1c5994 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -2368,7 +2368,6 @@ int av_opt_set_array(void *obj, const char *name, int 
search_flags,
   val_type == AV_OPT_TYPE_FLOAT||
   val_type == AV_OPT_TYPE_DOUBLE   ||
   val_type == AV_OPT_TYPE_RATIONAL) {
-int ret;
 
 switch (val_type) {
 case AV_OPT_TYPE_INT:   intnum = *(int*)src;
break;

base-commit: c0666d8bedfb8bd242ea2a9fe2bd3e5a1addc0a5
-- 
2.39.3 (Apple Git-146)
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] configure: enable warnings for shadowed variables

2024-09-07 Thread Timo Rothenpieler

These can easily lead to incredibly confusing errors, and should
practically never happen.
I'd have loved to make this a -Werror even, but sadly there is way too
many instances in the codebase right now that first needs to be weeded
out.
---
 configure | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure b/configure
index a8e67d230c..547b67565d 100755
--- a/configure
+++ b/configure
@@ -7406,6 +7406,7 @@ check_cflags -Wundef
 check_cflags -Wmissing-prototypes
 check_cflags -Wstrict-prototypes
 check_cflags -Wempty-body
+check_cflags -Wshadow
 
 if enabled extra_warnings; then
 check_cflags -Wcast-qual
-- 
2.44.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH] configure: enable warnings for shadowed variables

2024-09-07 Thread Michael Niedermayer

On Sat, Sep 07, 2024 at 08:52:41PM +0200, Timo Rothenpieler wrote:
> These can easily lead to incredibly confusing errors, and should
> practically never happen.
> I'd have loved to make this a -Werror even, but sadly there is way too
> many instances in the codebase right now that first needs to be weeded
> out.
> ---
>  configure | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/configure b/configure
> index a8e67d230c..547b67565d 100755
> --- a/configure
> +++ b/configure
> @@ -7406,6 +7406,7 @@ check_cflags -Wundef
>  check_cflags -Wmissing-prototypes
>  check_cflags -Wstrict-prototypes
>  check_cflags -Wempty-body
> +check_cflags -Wshadow

can you provide a list of warnings this produces ?
but in principle this seems a good idea

thx

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Does the universe only have a finite lifespan? No, its going to go on
forever, its just that you wont like living in it. -- Hiranya Peiri


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH] libavcodec/dnxucdec.c: fix displaced bracket

2024-09-07 Thread Martin Schitter

Sorry for this blocking tiny stupid error.
I don't know, how it happend!

martin

---
 libavcodec/dnxucdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/dnxucdec.c b/libavcodec/dnxucdec.c
index 502e736..e9a2157 100644
--- a/libavcodec/dnxucdec.c
+++ b/libavcodec/dnxucdec.c
@@ -486,7 +486,7 @@ const AVCodecParser ff_dnxuc_parser = {
 
 const FFCodec ff_dnxuc_decoder = {
 .p.name = "dnxuc",
-CODEC_LONG_NAME()"DNxUncompressed (SMPTE RDD 50)",
+CODEC_LONG_NAME("DNxUncompressed (SMPTE RDD 50)"),
 .p.type = AVMEDIA_TYPE_VIDEO,
 .p.id = AV_CODEC_ID_DNXUC,
 .init   = dnxuc_decode_init,
-- 
2.45.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH v5] avfilter: add XPSNR filter

2024-09-07 Thread Michael Niedermayer

On Wed, Aug 28, 2024 at 01:40:39PM +, Helmrich, Christian wrote:
> Following up on this: attached a (final, in our view) v5. Changes over v3:
> 
> 
> - cleanup and align to psnr filter
> - add metadata
> - add xpsnr tests for yuv and rgb

will apply

thx

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If you think the mosad wants you dead since a long time then you are either
wrong or dead since a long time.

signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH v1] XPSNR: add maintainers

2024-09-07 Thread Michael Niedermayer

On Fri, Sep 06, 2024 at 03:11:01PM +, Helmrich, Christian wrote:
> Hi, adding us XPSNR authors to MAINTAINERS, as requested/suggested in
> 
> https://ffmpeg.org/pipermail/ffmpeg-devel/2024-September/332940.html

[...]

>  MAINTAINERS |1 +
>  1 file changed, 1 insertion(+)
> f69e632b19924e0a25a0735386775488fadf921a  xpsnr_add_maintainers.patch
> From 6a020fc9279ab2fd66e6dd8596f566ee6578cb35 Mon Sep 17 00:00:00 2001
> From: Christian Helmrich 
> Date: Fri, 6 Sep 2024 17:00:00 +0200
> Subject: [PATCH v1] XPSNR: add maintainers
> 
> Add XPSNR authors to MAINTAINERS
> ---
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -344,6 +344,7 @@
>vf_readvitc.c Tobias Rapp (CC t.rapp at 
> noa-archive dot com)
>vf_scale.c[2] Michael Niedermayer
>vf_tonemap_opencl.c   Ruiling Song
> +  vf_xpsnr.cChristian Helmrich, Christian Lehmann

please add the status [1] or [2]

thx

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Many things microsoft did are stupid, but not doing something just because
microsoft did it is even more stupid. If everything ms did were stupid they
would be bankrupt already.


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [RFC] 7.1 Release

2024-09-07 Thread Michael Niedermayer

On Sat, Aug 24, 2024 at 10:31:28PM +0200, Michael Niedermayer wrote:
> On Wed, Aug 14, 2024 at 02:41:55PM +0200, Michael Niedermayer wrote:
> > Hi all
> > 
> > Are there any upcoming LTS releases that want to/could include FFmpeg 7.1 ?
> > If so please reply here and list the date before which we would have to
> > finish the 7.1 release so it can be included with no problems
> > 
> > Otherwise, are there any preferrances of the general/approximate release 
> > date?
> 
> I intend to branch 7.1 in the next 1-2 weeks and hopefully make the 7.1
> release then 1-2 weeks after that
> 
> If you have release blocking issues (bugs, anything else you want to do 
> before)
> open up an issue and set "Blocking: 7.1" on trac so it shows up here:
> https://trac.ffmpeg.org/report/16

Nothing new on the "Release Blocking Issues" page on trac thus
I intend to branch 7.1 in the next 24h

release could be made from that branch 1-2 weeks after

thx

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Some people wanted to paint the bikeshed green, some blue and some pink.
People argued and fought, when they finally agreed, only rust was left.


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [FFmpeg-cvslog] lavc/vvc: Remove experimental flag

2024-09-07 Thread Nuo Mi

On Sat, Sep 7, 2024 at 12:49 AM Jean-Baptiste Kempf  wrote:

>
>
> On Fri, 6 Sep 2024, at 16:18, Frank Plowman wrote:
> > ffmpeg | branch: master | Frank Plowman  | Fri
> > Aug 23 13:36:50 2024 +0100| [6df0c5f9f4c3261acf5b0efe43597b9eb765d6b6]
> > | committer: Nuo Mi
> >
> > lavc/vvc: Remove experimental flag
>
> Congratulations.
>
It’s been a great collaboration between many community members.
Special thanks to Andreas Rheinhardt, Eliny Huang, Frank Plowman, Haihao
Xiang, James Almer, Jun Zhao, Kieran Kunhya, Stone Chen, Martin Storsjö,
Michael Niedermayer, Wu Jianhua, and Zhao Zhili for their patch
contributions.
Thanks as well to you, Anton Khirnov, Benjamin Bross, Mark Thompson, and
Ronald S. Bultje for the valuable code reviews and suggestions.

> --
> Jean-Baptiste Kempf -  President
> +33 672 704 734
> https://jbkempf.com/
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH 1/6] aarch64/hevc: Simplify function prototypes by macro

[FFmpeg-devel] [PATCH 3/6] aarch64/vvc: Add put_qpel_h_* and put_qpel_uni_h_*

[FFmpeg-devel] [PATCH 5/6] aarch64/vvc: Add put_qpel_hx i8mm

[FFmpeg-devel] [PATCH 4/6] aarch64/vvc: Add put_pel/put_pel_uni/put_pel_uni_w

[FFmpeg-devel] [PATCH 6/6] avcodec/hevc: ff_hevc_(qpel/epel)_filters are signed type

[FFmpeg-devel] [PATCH 2/6] aarch64/hevc: Move epel/qpel to h26x directory

Re: [FFmpeg-devel] [PATCH] libavcodec: implementation of DNxUncompressed decoder

[FFmpeg-devel] [PATCH] lavu/opt: av_opt_set_array: fix uninitialised return

[FFmpeg-devel] [PATCH] configure: enable warnings for shadowed variables

Re: [FFmpeg-devel] [PATCH] configure: enable warnings for shadowed variables

[FFmpeg-devel] [PATCH] libavcodec/dnxucdec.c: fix displaced bracket

Re: [FFmpeg-devel] [PATCH v5] avfilter: add XPSNR filter

Re: [FFmpeg-devel] [PATCH v1] XPSNR: add maintainers

Re: [FFmpeg-devel] [RFC] 7.1 Release

Re: [FFmpeg-devel] [FFmpeg-cvslog] lavc/vvc: Remove experimental flag

15 matches

Site Navigation

Mail list logo

Footer information