./ffmpeg -threads 1 -f lavfi -t 60 -i anoisesrc -af 'anlmdn' -f null -benchmark -
Test results on Snapdragon 845: Before: size=N/A time=00:01:00.00 bitrate=N/A speed=11.2x video:0kB audio:5625kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown bench: utime=5.320s stime=0.010s rtime=5.358s bench: maxrss=14172kB After: size=N/A time=00:01:00.00 bitrate=N/A speed=15.4x video:0kB audio:5625kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown bench: utime=3.870s stime=0.000s rtime=3.902s bench: maxrss=14036kB --- libavfilter/aarch64/Makefile | 2 + libavfilter/aarch64/af_anlmdn_init.c | 31 ++++++++ libavfilter/aarch64/af_anlmdn_neon.S | 112 +++++++++++++++++++++++++++ libavfilter/af_anlmdn.c | 3 + libavfilter/af_anlmdndsp.h | 1 + 5 files changed, 149 insertions(+) create mode 100644 libavfilter/aarch64/af_anlmdn_init.c create mode 100644 libavfilter/aarch64/af_anlmdn_neon.S diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile index f52d7a4842..6c727f9859 100644 --- a/libavfilter/aarch64/Makefile +++ b/libavfilter/aarch64/Makefile @@ -1,5 +1,7 @@ OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/af_afir_init.o +OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/af_anlmdn_init.o OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/af_afir_neon.o +NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/af_anlmdn_neon.o NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o diff --git a/libavfilter/aarch64/af_anlmdn_init.c b/libavfilter/aarch64/af_anlmdn_init.c new file mode 100644 index 0000000000..e28a152e04 --- /dev/null +++ b/libavfilter/aarch64/af_anlmdn_init.c @@ -0,0 +1,31 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/aarch64/cpu.h" +#include "libavfilter/af_anlmdndsp.h" + +float ff_compute_distance_ssd_neon(const float *f1, const float *f2, + ptrdiff_t len); + +av_cold void ff_anlmdn_init_aarch64(AudioNLMDNDSPContext *s) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) + s->compute_distance_ssd = ff_compute_distance_ssd_neon; +} diff --git a/libavfilter/aarch64/af_anlmdn_neon.S b/libavfilter/aarch64/af_anlmdn_neon.S new file mode 100644 index 0000000000..3ad985b476 --- /dev/null +++ b/libavfilter/aarch64/af_anlmdn_neon.S @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2020 Zhao Zhili + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/aarch64/asm.S" + +// float ff_compute_distance_ssd_neon(const float *f1, const float *f2, ptrdiff_t len); +function ff_compute_distance_ssd_neon, export=1 + fmov s0, wzr + add x3, x0, x2, lsl #2 // end of f1 + sub x0, x0, x2, lsl #2 // begin of f1 + sub x1, x1, x2, lsl #2 // begin of f2 + add x3, x3, #4 // end + 1 of f1 + + // process 32 pairs of data per loop + add x4, x0, #128 + cmp x4, x3 + b.gt 2f +1: ld1 {v16.4S, v17.4S, v18.4S, v19.4S}, [x0], #64 + ld1 {v20.4S, v21.4S, v22.4S, v23.4S}, [x1], #64 + ld1 {v24.4S, v25.4S, v26.4S, v27.4S}, [x0], #64 + ld1 {v28.4S, v29.4S, v30.4S, v31.4S}, [x1], #64 + + fsub v16.4S, v16.4S, v20.4S + + fsub v17.4S, v17.4S, v21.4S + fmul v16.4S, v16.4S, v16.4S + + fsub v18.4S, v18.4S, v22.4S + fmul v17.4S, v17.4S, v17.4S + + fsub v19.4S, v19.4S, v23.4S + fmla v16.4S, v18.4S, v18.4S + + fsub v24.4S, v24.4S, v28.4S + fmla v17.4S, v19.4S, v19.4S + + fsub v25.4S, v25.4S, v29.4S + fmla v16.4S, v24.4S, v24.4S + + fsub v26.4S, v26.4S, v30.4S + fmla v17.4S, v25.4S, v25.4S + + fsub v27.4S, v27.4S, v31.4S + fmla v16.4S, v26.4S, v26.4S + + fmla v17.4S, v27.4S, v27.4S + + fadd v1.4S, v16.4S, v17.4S + faddp v1.4S, v1.4S, v1.4S + faddp s1, v1.2S + fadd s0, s0, s1 + add x4, x0, #128 + cmp x4, x3 + b.le 1b + + // process 16 pairs of data per loop +2: add x4, x0, #64 + cmp x4, x3 + b.gt 4f +3: ld1 {v16.4S, v17.4S, v18.4S, v19.4S}, [x0], #64 + ld1 {v20.4S, v21.4S, v22.4S, v23.4S}, [x1], #64 + + fsub v16.4S, v16.4S, v20.4S + + fsub v17.4S, v17.4S, v21.4S + fmul v16.4S, v16.4S, v16.4S + + fsub v18.4S, v18.4S, v22.4S + fmul v17.4S, v17.4S, v17.4S + + fsub v19.4S, v19.4S, v23.4S + fmla v16.4S, v18.4S, v18.4S + + fmla v17.4S, v19.4S, v19.4S + + fadd v1.4S, v16.4S, v17.4S + faddp v1.4S, v1.4S, v1.4S + faddp s1, v1.2S + fadd s0, s0, s1 + add x4, x0, #64 + cmp x4, x3 + b.le 3b + + // process 1 pair of data per loop +4: cmp x0, x3 + b.eq 6f +5: ldr s1, [x0], #4 + ldr s2, [x1], #4 + fsub s1, s1, s2 + cmp x0, x3 + fmadd s0, s1, s1, s0 + b.ne 5b +6: ret + +endfunc diff --git a/libavfilter/af_anlmdn.c b/libavfilter/af_anlmdn.c index b8aef31c35..63bc1a1f2c 100644 --- a/libavfilter/af_anlmdn.c +++ b/libavfilter/af_anlmdn.c @@ -145,6 +145,9 @@ void ff_anlmdn_init(AudioNLMDNDSPContext *dsp) if (ARCH_X86) ff_anlmdn_init_x86(dsp); + if (ARCH_AARCH64) { + ff_anlmdn_init_aarch64(dsp); + } } static int config_output(AVFilterLink *outlink) diff --git a/libavfilter/af_anlmdndsp.h b/libavfilter/af_anlmdndsp.h index d8f5136cd8..f9d8a80c83 100644 --- a/libavfilter/af_anlmdndsp.h +++ b/libavfilter/af_anlmdndsp.h @@ -35,6 +35,7 @@ typedef struct AudioNLMDNDSPContext { } AudioNLMDNDSPContext; void ff_anlmdn_init(AudioNLMDNDSPContext *s); +void ff_anlmdn_init_aarch64(AudioNLMDNDSPContext *s); void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s); #endif /* AVFILTER_ANLMDNDSP_H */ -- 2.24.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".