+ sqrshrun \d0\().4H, \d0\().4S, #5
+ sqrshrun2 \d0\().8H, \d1\().4S, #5
+ ext v3.16B, \r2\().16B, \r3\().16B, #8
+ add v1.8H, v1.8H, v3.8H
+ ext v2.16B, \r2\().16B, \r3\().16B, #10
+ uaddl v3.4S, \r2\().4H, v2.4H
+ uaddl2 v4.4S, \r2\().8H, v2.8H
+ umlal v3.4S, v0.4H, v6.H[1]
+ umlal2 v4.4S, v0.8H, v6.H[1]
+ umlsl v3.4S, v1.4H, v6.H[0]
+ umlsl2 v4.4S, v1.8H, v6.H[0]
+ mvni v5.8h, #0xFC, lsl #8 // 1023 for clipping
+ sqrshrun \d1\().4H, v3.4S, #5
+ sqrshrun2 \d1\().8H, v4.4S, #5
+ smin \d0\().8H, \d0\().8H, v5.8h
+ smin \d1\().8H, \d1\().8H, v5.8h
+.endm
+
+function put_h264_qpel16_h_lowpass_neon_packed_10
+ mov x4, x30
+ mov x12, #32
+ mov x3, #16
+ bl put_h264_qpel8_h_lowpass_neon_10
+ sub x1, x1, x2, lsl #4
+ add x1, x1, #16
+ mov x12, #32
+ mov x30, x4
+ b put_h264_qpel8_h_lowpass_neon_10
+endfunc
+
+.macro h264_qpel_h_lowpass_10 type
+function \type\()_h264_qpel16_h_lowpass_neon_10
+ mov x13, x30
+ mov x12, #32
+ bl \type\()_h264_qpel8_h_lowpass_neon_10
+ sub x0, x0, x3, lsl #4
+ sub x1, x1, x2, lsl #4
+ add x0, x0, #16
+ add x1, x1, #16
+ mov x12, #32
+ mov x30, x13
+endfunc
+
+function \type\()_h264_qpel8_h_lowpass_neon_10
+1: ld1 {v28.8H, v29.8H}, [x1], x2
+ ld1 {v16.8H, v17.8H}, [x1], x2
+ subs x12, x12, #4
+ lowpass_8_10 v28, v29, v16, v17, v28, v20
+ .ifc \type,avg
+ ld1 {v2.8H}, [x0], x3
+ urhadd v28.8H, v28.8H, v2.8H
+ ld1 {v3.8H}, [x0]
+ urhadd v20.8H, v20.8H, v3.8H
+ sub x0, x0, x3
+ .endif
+ st1 {v28.8H}, [x0], x3
+ st1 {v20.8H}, [x0], x3
+ b.ne 1b
+ ret
+endfunc
+.endm
+
+ h264_qpel_h_lowpass_10 put
+ h264_qpel_h_lowpass_10 avg
+
+.macro h264_qpel_h_lowpass_l2_10 type
+function \type\()_h264_qpel16_h_lowpass_l2_neon_10
+ mov x13, x30
+ mov x12, #32
+ bl \type\()_h264_qpel8_h_lowpass_l2_neon_10
+ sub x0, x0, x2, lsl #4
+ sub x1, x1, x2, lsl #4
+ sub x3, x3, x2, lsl #4
+ add x0, x0, #16
+ add x1, x1, #16
+ add x3, x3, #16
+ mov x12, #32
+ mov x30, x13
+endfunc
+
+function \type\()_h264_qpel8_h_lowpass_l2_neon_10
+1: ld1 {v26.8H, v27.8H}, [x1], x2
+ ld1 {v16.8H, v17.8H}, [x1], x2
+ ld1 {v28.8H}, [x3], x2
+ ld1 {v29.8H}, [x3], x2
+ subs x12, x12, #4
+ lowpass_8_10 v26, v27, v16, v17, v26, v27
+ urhadd v26.8H, v26.8H, v28.8H
+ urhadd v27.8H, v27.8H, v29.8H
+ .ifc \type,avg
+ ld1 {v2.8H}, [x0], x2
+ urhadd v26.8H, v26.8H, v2.8H
+ ld1 {v3.8H}, [x0]
+ urhadd v27.8H, v27.8H, v3.8H
+ sub x0, x0, x2
+ .endif
+ st1 {v26.8H}, [x0], x2
+ st1 {v27.8H}, [x0], x2
+ b.ne 1b
+ ret
+endfunc
+.endm
+
+ h264_qpel_h_lowpass_l2_10 put
+ h264_qpel_h_lowpass_l2_10 avg
+
+function put_h264_qpel16_v_lowpass_neon_packed_10
+ mov x4, x30
+ mov x2, #8
+ bl put_h264_qpel8_v_lowpass_neon
+ sub x1, x1, x3, lsl #2
+ bl put_h264_qpel8_v_lowpass_neon
+ sub x1, x1, x3, lsl #4
+ sub x1, x1, x3, lsl #2
+ add x1, x1, #8
+ bl put_h264_qpel8_v_lowpass_neon
+ sub x1, x1, x3, lsl #2
+ mov x30, x4
+ b put_h264_qpel8_v_lowpass_neon
+endfunc
+
+.macro h264_qpel_v_lowpass_10 type
+function \type\()_h264_qpel16_v_lowpass_neon_10
+ mov x4, x30
+ bl \type\()_h264_qpel8_v_lowpass_neon_10
+ sub x1, x1, x3, lsl #2
+ bl \type\()_h264_qpel8_v_lowpass_neon_10
+ sub x0, x0, x2, lsl #4
+ add x0, x0, #16
+ sub x1, x1, x3, lsl #4
+ sub x1, x1, x3, lsl #2
+ add x1, x1, #16
+ bl \type\()_h264_qpel8_v_lowpass_neon_10
+ sub x1, x1, x3, lsl #2
+ mov x30, x4
+endfunc
+
+function \type\()_h264_qpel8_v_lowpass_neon_10
+ ld1 {v16.8H}, [x1], x3
+ ld1 {v18.8H}, [x1], x3
+ ld1 {v20.8H}, [x1], x3
+ ld1 {v22.8H}, [x1], x3
+ ld1 {v24.8H}, [x1], x3
+ ld1 {v26.8H}, [x1], x3
+ ld1 {v28.8H}, [x1], x3
+ ld1 {v30.8H}, [x1], x3
+ ld1 {v17.8H}, [x1], x3
+ ld1 {v19.8H}, [x1], x3
+ ld1 {v21.8H}, [x1], x3
+ ld1 {v23.8H}, [x1], x3
+ ld1 {v25.8H}, [x1]
+
+ transpose_8x8H v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
+ transpose_8x8H v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
+ lowpass_8_10 v16, v17, v18, v19, v16, v17
+ lowpass_8_10 v20, v21, v22, v23, v18, v19
+ lowpass_8_10 v24, v25, v26, v27, v20, v21
+ lowpass_8_10 v28, v29, v30, v31, v22, v23
+ transpose_8x8H v16, v17, v18, v19, v20, v21, v22, v23, v0, v1