[FFmpeg-devel] [PATCH 2/3] libavcodec: arm: Add a NEON implementation of pixblockdsp

Martin Storsjö martin at martin.st
Wed May 13 16:27:56 EEST 2020


                         Cortex A7     A8     A9    A53   A72
get_pixels_c:                144.7  146.0  143.0  137.7   69.0
get_pixels_armv6:            112.0  106.7   90.2   95.0   72.5
get_pixels_neon:              69.0   29.7   68.7   40.2   19.0
get_pixels_unaligned_c:      144.7  146.2  143.0  137.7   69.0
get_pixels_unaligned_neon:    77.0   36.5   72.5   48.5   19.0
diff_pixels_c:               376.7  319.7  265.5  307.7  148.0
diff_pixels_armv6:           179.0  159.5  205.5  139.0  142.0
diff_pixels_neon:             69.0   40.2   77.5   53.2   26.0
diff_pixels_unaligned_c:     376.7  319.7  265.5  307.7  148.0
diff_pixels_unaligned_neon:   85.0   54.5   93.5   66.7   26.0
---
 libavcodec/arm/Makefile               |  1 +
 libavcodec/arm/pixblockdsp_init_arm.c | 18 +++++++
 libavcodec/arm/pixblockdsp_neon.S     | 69 +++++++++++++++++++++++++++
 3 files changed, 88 insertions(+)
 create mode 100644 libavcodec/arm/pixblockdsp_neon.S

diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index c99e8e1bd1..c6be814153 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -126,6 +126,7 @@ NEON-OBJS-$(CONFIG_IDCTDSP)            += arm/idctdsp_init_neon.o       \
 NEON-OBJS-$(CONFIG_MDCT)               += arm/mdct_neon.o               \
                                           arm/mdct_fixed_neon.o
 NEON-OBJS-$(CONFIG_MPEGVIDEO)          += arm/mpegvideo_neon.o
+NEON-OBJS-$(CONFIG_PIXBLOCKDSP)        += arm/pixblockdsp_neon.o
 NEON-OBJS-$(CONFIG_RDFT)               += arm/rdft_neon.o
 NEON-OBJS-$(CONFIG_VC1DSP)             += arm/vc1dsp_init_neon.o        \
                                           arm/vc1dsp_neon.o
diff --git a/libavcodec/arm/pixblockdsp_init_arm.c b/libavcodec/arm/pixblockdsp_init_arm.c
index 59d2b49381..5481c0178c 100644
--- a/libavcodec/arm/pixblockdsp_init_arm.c
+++ b/libavcodec/arm/pixblockdsp_init_arm.c
@@ -29,6 +29,15 @@ void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels,
 void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
                           const uint8_t *s2, ptrdiff_t stride);
 
+void ff_get_pixels_neon(int16_t *block, const uint8_t *pixels,
+                        ptrdiff_t stride);
+void ff_get_pixels_unaligned_neon(int16_t *block, const uint8_t *pixels,
+                                  ptrdiff_t stride);
+void ff_diff_pixels_neon(int16_t *block, const uint8_t *s1,
+                         const uint8_t *s2, ptrdiff_t stride);
+void ff_diff_pixels_unaligned_neon(int16_t *block, const uint8_t *s1,
+                                   const uint8_t *s2, ptrdiff_t stride);
+
 av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
                                      AVCodecContext *avctx,
                                      unsigned high_bit_depth)
@@ -40,4 +49,13 @@ av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
             c->get_pixels = ff_get_pixels_armv6;
         c->diff_pixels = ff_diff_pixels_armv6;
     }
+
+    if (have_neon(cpu_flags)) {
+        if (!high_bit_depth) {
+            c->get_pixels_unaligned = ff_get_pixels_unaligned_neon;
+            c->get_pixels = ff_get_pixels_neon;
+        }
+        c->diff_pixels_unaligned = ff_diff_pixels_unaligned_neon;
+        c->diff_pixels = ff_diff_pixels_neon;
+    }
 }
diff --git a/libavcodec/arm/pixblockdsp_neon.S b/libavcodec/arm/pixblockdsp_neon.S
new file mode 100644
index 0000000000..25674586ea
--- /dev/null
+++ b/libavcodec/arm/pixblockdsp_neon.S
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Martin Storsjo
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+.macro vld1_8 dst, src, incr, aligned
+.if \aligned
+        vld1.8          {\dst}, [\src, :64], \incr
+.else
+        vld1.8          {\dst}, [\src], \incr
+.endif
+.endm
+
+.macro get_pixels suffix, aligned
+function ff_get_pixels\suffix\()_neon, export=1
+        mov             r3,  #8
+1:
+        vld1_8          d0,  r1,  r2,  \aligned
+        subs            r3,  r3,  #2
+        vld1_8          d2,  r1,  r2,  \aligned
+        vmovl.u8        q0,  d0
+        vmovl.u8        q1,  d2
+        vst1.16         {q0, q1}, [r0, :128]!
+        bgt             1b
+
+        bx              lr
+endfunc
+.endm
+
+get_pixels , aligned=1
+get_pixels _unaligned, aligned=0
+
+.macro diff_pixels suffix, aligned=0
+function ff_diff_pixels\suffix\()_neon, export=1
+        mov             r12, #8
+1:
+        vld1_8          d0,  r1,  r3,  \aligned
+        vld1_8          d1,  r2,  r3,  \aligned
+        subs            r12, r12, #2
+        vld1_8          d2,  r1,  r3,  \aligned
+        vsubl.u8        q0,  d0,  d1
+        vld1_8          d3,  r2,  r3,  \aligned
+        vsubl.u8        q1,  d2,  d3
+        vst1.16         {q0, q1}, [r0]!
+        bgt             1b
+
+        bx              lr
+endfunc
+.endm
+
+diff_pixels , aligned=1
+diff_pixels _unaligned, aligned=0
-- 
2.17.1



More information about the ffmpeg-devel mailing list