+ EPEL_UNI_W_V_HEADER
+
+ ld1 {v16.16b, v17.16b, v18.16b}, [x2], x3
+ ld1 {v19.16b, v20.16b, v21.16b}, [x2], x3
+ ld1 {v22.16b, v23.16b, v24.16b}, [x2], x3
+1:
+ ld1 {v25.16b, v26.16b, v27.16b}, [x2], x3
+
+ EPEL_UNI_W_V16_CALC v4, v6, v16, v19, v22, v25, v8, v9, v10, v11
+ EPEL_UNI_W_V16_CALC v5, v7, v17, v20, v23, v26, v8, v9, v10, v11
+ EPEL_UNI_W_V16_CALC v6, v7, v18, v21, v24, v27, v8, v9, v10, v11
+ st1 {v4.16b, v5.16b, v6.16b}, [x0], x1
+ subs w4, w4, #1
+ b.eq 2f
+ ld1 {v16.16b, v17.16b, v18.16b}, [x2], x3
+ EPEL_UNI_W_V16_CALC v4, v6, v19, v22, v25, v16, v8, v9, v10, v11
+ EPEL_UNI_W_V16_CALC v5, v7, v20, v23, v26, v17, v8, v9, v10, v11
+ EPEL_UNI_W_V16_CALC v6, v7, v21, v24, v27, v18, v8, v9, v10, v11
+ st1 {v4.16b, v5.16b, v6.16b}, [x0], x1
+ subs w4, w4, #1
+ b.eq 2f
+ ld1 {v19.16b, v20.16b, v21.16b}, [x2], x3
+ EPEL_UNI_W_V16_CALC v4, v6, v22, v25, v16, v19, v8, v9, v10, v11
+ EPEL_UNI_W_V16_CALC v5, v7, v23, v26, v17, v20, v8, v9, v10, v11
+ EPEL_UNI_W_V16_CALC v6, v7, v24, v27, v18, v21, v8, v9, v10, v11
+ st1 {v4.16b, v5.16b, v6.16b}, [x0], x1
+ subs w4, w4, #1
+ b.eq 2f
+ ld1 {v22.16b, v23.16b, v24.16b}, [x2], x3
+ EPEL_UNI_W_V16_CALC v4, v6, v25, v16, v19, v22, v8, v9, v10, v11
+ EPEL_UNI_W_V16_CALC v5, v7, v26, v17, v20, v23, v8, v9, v10, v11
+ EPEL_UNI_W_V16_CALC v6, v7, v27, v18, v21, v24, v8, v9, v10, v11
+ st1 {v4.16b, v5.16b, v6.16b}, [x0], x1
+ subs w4, w4, #1
+ b.hi 1b
+2:
+ ldp q8, q9, [sp, #-32]
+ ldp q10, q11, [sp, #-64]
+ ret
+endfunc
+
+function ff_hevc_put_hevc_epel_uni_w_v64_8_neon, export=1
+ stp q8, q9, [sp, #-32]
+ stp q10, q11, [sp, #-64]
+ stp q12, q13, [sp, #-96]
+ stp q14, q15, [sp, #-128]