On Sun, 4 Jun 2023, logan....@myais.com.cn wrote:

From: Logan Lyu <logan....@myais.com.cn>

Signed-off-by: Logan Lyu <logan....@myais.com.cn>
---
libavcodec/aarch64/Makefile               |   1 +
libavcodec/aarch64/hevcdsp_epel_neon.S    | 378 ++++++++++++++++++++++
libavcodec/aarch64/hevcdsp_init_aarch64.c |   7 +-
3 files changed, 385 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/aarch64/hevcdsp_epel_neon.S

diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 216191640c..cb428b49e0 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -69,4 +69,5 @@ NEON-OBJS-$(CONFIG_HEVC_DECODER)        += 
aarch64/hevcdsp_deblock_neon.o      \
                                           aarch64/hevcdsp_idct_neon.o         \
                                           aarch64/hevcdsp_init_aarch64.o      \
                                           aarch64/hevcdsp_qpel_neon.o         \
+                                           aarch64/hevcdsp_epel_neon.o         
\
                                           aarch64/hevcdsp_sao_neon.o
diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S 
b/libavcodec/aarch64/hevcdsp_epel_neon.S
new file mode 100644
index 0000000000..fe494dd843
--- /dev/null
+++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
@@ -0,0 +1,378 @@
+/* -*-arm64-*-
+ * vim: syntax=arm64asm
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+#define MAX_PB_SIZE 64
+
+const epel_filters, align=4
+        .byte  0,  0,  0,  0
+        .byte -2, 58, 10, -2
+        .byte -4, 54, 16, -2
+        .byte -6, 46, 28, -4
+        .byte -4, 36, 36, -4
+        .byte -4, 28, 46, -6
+        .byte -2, 16, 54, -4
+        .byte -2, 10, 58, -2
+endconst
+
+#if HAVE_I8MM
+.macro EPEL_UNI_W_H_HEADER
+        ldr             x12, [sp]
+        sub             x2, x2, #1
+        movrel          x9, epel_filters
+        add             x9, x9, x12, lsl #2
+        ldr             w11, [x9]
+        dup             v28.4s, w11

Why not just do "ld1r {v28.4s}, [x9]" here instead, avoiding the indirection via GPRs?

Other than that, I think this mostly looks reasonable.

Btw, for any assembly patches like these, it would be appreciated if you can provide benchmarks from checkasm, e.g. "checkasm --test=hevc_pel --bench=put_hevc" (or maybe just "--bench") and extract the relevant lines for the functions that you've added/modified, and mention what system you've benchmarked it on. You get the most useful benchmarks for micro-tuning if you can enable userspace access to the timing registers and configure with --disable-linux-perf.

// Martin

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to