[FFmpeg-devel] [PATCH] avfilter/af_anlmdn: add AArch64 SIMD for compute_distance_ssd
Zhao Zhili
quinkblack at foxmail.com
Fri Jan 24 11:15:16 EET 2020
./ffmpeg -threads 1 -f lavfi -t 60 -i anoisesrc -af 'anlmdn' -f null -benchmark -
Test results on Snapdragon 845:
Before:
size=N/A time=00:01:00.00 bitrate=N/A speed=11.2x
video:0kB audio:5625kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
bench: utime=5.320s stime=0.010s rtime=5.358s
bench: maxrss=14172kB
After:
size=N/A time=00:01:00.00 bitrate=N/A speed=15.4x
video:0kB audio:5625kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
bench: utime=3.870s stime=0.000s rtime=3.902s
bench: maxrss=14036kB
---
libavfilter/aarch64/Makefile | 2 +
libavfilter/aarch64/af_anlmdn_init.c | 31 ++++++++
libavfilter/aarch64/af_anlmdn_neon.S | 112 +++++++++++++++++++++++++++
libavfilter/af_anlmdn.c | 3 +
libavfilter/af_anlmdndsp.h | 1 +
5 files changed, 149 insertions(+)
create mode 100644 libavfilter/aarch64/af_anlmdn_init.c
create mode 100644 libavfilter/aarch64/af_anlmdn_neon.S
diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile
index f52d7a4842..6c727f9859 100644
--- a/libavfilter/aarch64/Makefile
+++ b/libavfilter/aarch64/Makefile
@@ -1,5 +1,7 @@
OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/af_afir_init.o
+OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/af_anlmdn_init.o
OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o
NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/af_afir_neon.o
+NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/af_anlmdn_neon.o
NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o
diff --git a/libavfilter/aarch64/af_anlmdn_init.c b/libavfilter/aarch64/af_anlmdn_init.c
new file mode 100644
index 0000000000..e28a152e04
--- /dev/null
+++ b/libavfilter/aarch64/af_anlmdn_init.c
@@ -0,0 +1,31 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/cpu.h"
+#include "libavfilter/af_anlmdndsp.h"
+
+float ff_compute_distance_ssd_neon(const float *f1, const float *f2,
+ ptrdiff_t len);
+
+av_cold void ff_anlmdn_init_aarch64(AudioNLMDNDSPContext *s)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_neon(cpu_flags))
+ s->compute_distance_ssd = ff_compute_distance_ssd_neon;
+}
diff --git a/libavfilter/aarch64/af_anlmdn_neon.S b/libavfilter/aarch64/af_anlmdn_neon.S
new file mode 100644
index 0000000000..3ad985b476
--- /dev/null
+++ b/libavfilter/aarch64/af_anlmdn_neon.S
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2020 Zhao Zhili
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+// float ff_compute_distance_ssd_neon(const float *f1, const float *f2, ptrdiff_t len);
+function ff_compute_distance_ssd_neon, export=1
+ fmov s0, wzr
+ add x3, x0, x2, lsl #2 // end of f1
+ sub x0, x0, x2, lsl #2 // begin of f1
+ sub x1, x1, x2, lsl #2 // begin of f2
+ add x3, x3, #4 // end + 1 of f1
+
+ // process 32 pairs of data per loop
+ add x4, x0, #128
+ cmp x4, x3
+ b.gt 2f
+1: ld1 {v16.4S, v17.4S, v18.4S, v19.4S}, [x0], #64
+ ld1 {v20.4S, v21.4S, v22.4S, v23.4S}, [x1], #64
+ ld1 {v24.4S, v25.4S, v26.4S, v27.4S}, [x0], #64
+ ld1 {v28.4S, v29.4S, v30.4S, v31.4S}, [x1], #64
+
+ fsub v16.4S, v16.4S, v20.4S
+
+ fsub v17.4S, v17.4S, v21.4S
+ fmul v16.4S, v16.4S, v16.4S
+
+ fsub v18.4S, v18.4S, v22.4S
+ fmul v17.4S, v17.4S, v17.4S
+
+ fsub v19.4S, v19.4S, v23.4S
+ fmla v16.4S, v18.4S, v18.4S
+
+ fsub v24.4S, v24.4S, v28.4S
+ fmla v17.4S, v19.4S, v19.4S
+
+ fsub v25.4S, v25.4S, v29.4S
+ fmla v16.4S, v24.4S, v24.4S
+
+ fsub v26.4S, v26.4S, v30.4S
+ fmla v17.4S, v25.4S, v25.4S
+
+ fsub v27.4S, v27.4S, v31.4S
+ fmla v16.4S, v26.4S, v26.4S
+
+ fmla v17.4S, v27.4S, v27.4S
+
+ fadd v1.4S, v16.4S, v17.4S
+ faddp v1.4S, v1.4S, v1.4S
+ faddp s1, v1.2S
+ fadd s0, s0, s1
+ add x4, x0, #128
+ cmp x4, x3
+ b.le 1b
+
+ // process 16 pairs of data per loop
+2: add x4, x0, #64
+ cmp x4, x3
+ b.gt 4f
+3: ld1 {v16.4S, v17.4S, v18.4S, v19.4S}, [x0], #64
+ ld1 {v20.4S, v21.4S, v22.4S, v23.4S}, [x1], #64
+
+ fsub v16.4S, v16.4S, v20.4S
+
+ fsub v17.4S, v17.4S, v21.4S
+ fmul v16.4S, v16.4S, v16.4S
+
+ fsub v18.4S, v18.4S, v22.4S
+ fmul v17.4S, v17.4S, v17.4S
+
+ fsub v19.4S, v19.4S, v23.4S
+ fmla v16.4S, v18.4S, v18.4S
+
+ fmla v17.4S, v19.4S, v19.4S
+
+ fadd v1.4S, v16.4S, v17.4S
+ faddp v1.4S, v1.4S, v1.4S
+ faddp s1, v1.2S
+ fadd s0, s0, s1
+ add x4, x0, #64
+ cmp x4, x3
+ b.le 3b
+
+ // process 1 pair of data per loop
+4: cmp x0, x3
+ b.eq 6f
+5: ldr s1, [x0], #4
+ ldr s2, [x1], #4
+ fsub s1, s1, s2
+ cmp x0, x3
+ fmadd s0, s1, s1, s0
+ b.ne 5b
+6: ret
+
+endfunc
diff --git a/libavfilter/af_anlmdn.c b/libavfilter/af_anlmdn.c
index b8aef31c35..63bc1a1f2c 100644
--- a/libavfilter/af_anlmdn.c
+++ b/libavfilter/af_anlmdn.c
@@ -145,6 +145,9 @@ void ff_anlmdn_init(AudioNLMDNDSPContext *dsp)
if (ARCH_X86)
ff_anlmdn_init_x86(dsp);
+ if (ARCH_AARCH64) {
+ ff_anlmdn_init_aarch64(dsp);
+ }
}
static int config_output(AVFilterLink *outlink)
diff --git a/libavfilter/af_anlmdndsp.h b/libavfilter/af_anlmdndsp.h
index d8f5136cd8..f9d8a80c83 100644
--- a/libavfilter/af_anlmdndsp.h
+++ b/libavfilter/af_anlmdndsp.h
@@ -35,6 +35,7 @@ typedef struct AudioNLMDNDSPContext {
} AudioNLMDNDSPContext;
void ff_anlmdn_init(AudioNLMDNDSPContext *s);
+void ff_anlmdn_init_aarch64(AudioNLMDNDSPContext *s);
void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s);
#endif /* AVFILTER_ANLMDNDSP_H */
--
2.24.0
More information about the ffmpeg-devel
mailing list