diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h
index f72746ce03..076d01b477 100644
--- a/libavcodec/aarch64/h26x/dsp.h
+++ b/libavcodec/aarch64/h26x/dsp.h
@@ -248,4 +248,26 @@ NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t
*_src, ptrdiff_t _src
NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const
uint8_t *_src,
ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf,
int width),)
+#undef NEON8_FNPROTO_PARTIAL_6
+#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
+ void ff_vvc_put_##fn##4_8_neon##ext args; \
+ void ff_vvc_put_##fn##8_8_neon##ext args; \
+ void ff_vvc_put_##fn##16_8_neon##ext args; \
+ void ff_vvc_put_##fn##32_8_neon##ext args; \
+ void ff_vvc_put_##fn##64_8_neon##ext args; \
+ void ff_vvc_put_##fn##128_8_neon##ext args
+
+NEON8_FNPROTO_PARTIAL_6(pel_pixels, (int16_t *dst,
+ const uint8_t *src, ptrdiff_t srcstride, int height,
+ const int8_t *hf, const int8_t *vf, int width),);
+
+NEON8_FNPROTO_PARTIAL_6(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
+ const uint8_t *_src, ptrdiff_t _srcstride, int height,
+ const int8_t *hf, const int8_t *vf, int width),);
+
+NEON8_FNPROTO_PARTIAL_6(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
+ const uint8_t *_src, ptrdiff_t _srcstride,
+ int height, int denom, int wx, int ox,
+ const int8_t *hf, const int8_t *vf, int width),);
+
#endif
diff --git a/libavcodec/aarch64/h26x/epel_neon.S
b/libavcodec/aarch64/h26x/epel_neon.S
index 378b0f7fb2..729395f2f0 100644
--- a/libavcodec/aarch64/h26x/epel_neon.S
+++ b/libavcodec/aarch64/h26x/epel_neon.S
@@ -19,7 +19,8 @@
*/
#include "libavutil/aarch64/asm.S"
-#define MAX_PB_SIZE 64
+#define HEVC_MAX_PB_SIZE 64
+#define VVC_MAX_PB_SIZE 128
const epel_filters, align=4
.byte 0, 0, 0, 0
@@ -131,8 +132,13 @@ endconst
b.ne 1b
.endm
+function ff_vvc_put_pel_pixels4_8_neon, export=1
+ mov x7, #(VVC_MAX_PB_SIZE * 2)
+ b 1f
+endfunc
+
function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.s}[0], [x1], x2
ushll v4.8h, v0.8b, #6
subs w3, w3, #1
@@ -142,7 +148,7 @@ function ff_hevc_put_hevc_pel_pixels4_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2 - 8)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2 - 8)
1: ld1 {v0.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
st1 {v4.d}[0], [x0], #8
@@ -152,8 +158,13 @@ function ff_hevc_put_hevc_pel_pixels6_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_pixels8_8_neon, export=1
+ mov x7, #(VVC_MAX_PB_SIZE * 2)
+ b 1f
+endfunc
+
function ff_hevc_put_hevc_pel_pixels8_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
subs w3, w3, #1
@@ -163,7 +174,7 @@ function ff_hevc_put_hevc_pel_pixels8_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_pixels12_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2 - 16)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2 - 16)
1: ld1 {v0.8b, v1.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
st1 {v4.8h}, [x0], #16
@@ -174,8 +185,13 @@ function ff_hevc_put_hevc_pel_pixels12_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_pixels16_8_neon, export=1
+ mov x7, #(VVC_MAX_PB_SIZE * 2)
+ b 1f
+endfunc
+
function ff_hevc_put_hevc_pel_pixels16_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.8b, v1.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
ushll v5.8h, v1.8b, #6
@@ -186,7 +202,7 @@ function ff_hevc_put_hevc_pel_pixels16_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_pixels24_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.8b-v2.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
ushll v5.8h, v1.8b, #6
@@ -197,8 +213,13 @@ function ff_hevc_put_hevc_pel_pixels24_8_neon, export=1
ret
endfunc
+function ff_vvc_put_pel_pixels32_8_neon, export=1
+ mov x7, #(VVC_MAX_PB_SIZE * 2)
+ b 1f
+endfunc
+
function ff_hevc_put_hevc_pel_pixels32_8_neon, export=1
- mov x7, #(MAX_PB_SIZE * 2)
+ mov x7, #(HEVC_MAX_PB_SIZE * 2)
1: ld1 {v0.8b-v3.8b}, [x1], x2
ushll v4.8h, v0.8b, #6
ushll v5.8h, v1.8b, #6
@@ -211,7 +232,7 @@ function ff_hevc_put_hevc_pel_pixels32_8_neon, export=1
endfunc
function ff_hevc_put_hevc_pel_pixels48_8_neon, export=1
- mov x7, #(MAX_PB_SIZE)
+ mov x7, #(HEVC_MAX_PB_SIZE)
1: ld1 {v0.16b-v2.16b}, [x1], x2
ushll v4.8h, v0.8b, #6
ushll2 v5.8h, v0.16b, #6
@@ -226,26 +247,50 @@ function ff_hevc_put_hevc_pel_pixels48_8_neon, export=1
ret
endfunc
-function ff_hevc_put_hevc_pel_pixels64_8_neon, export=1
-1: ld1 {v0.16b-v3.16b}, [x1], x2
+.macro put_pel_pixels64_8_neon
ushll v4.8h, v0.8b, #6
ushll2 v5.8h, v0.16b, #6
ushll v6.8h, v1.8b, #6
ushll2 v7.8h, v1.16b, #6
- st1 {v4.8h-v7.8h}, [x0], #(MAX_PB_SIZE)
+ st1 {v4.8h-v7.8h}, [x0], #64
ushll v16.8h, v2.8b, #6
ushll2 v17.8h, v2.16b, #6
ushll v18.8h, v3.8b, #6
ushll2 v19.8h, v3.16b, #6
- subs w3, w3, #1
- st1 {v16.8h-v19.8h}, [x0], #(MAX_PB_SIZE)
- b.ne 1b
+ st1 {v16.8h-v19.8h}, [x0], x7
+.endm
+
+function ff_vvc_put_pel_pixels64_8_neon, export=1
+ mov x7, #(2 * VVC_MAX_PB_SIZE - 64)
+ b 1f
+endfunc
+
+function ff_hevc_put_hevc_pel_pixels64_8_neon, export=1
+ mov x7, #(HEVC_MAX_PB_SIZE)
+1:
+ ld1 {v0.16b-v3.16b}, [x1], x2
+ sub w3, w3, #1
+ put_pel_pixels64_8_neon
+ cbnz w3, 1b