[FFmpeg-cvslog] avcodec/aarch64/mpegvideoencdsp: add dotprod implementation for pix_norm1

Ramiro Polla git at videolan.org
Mon Aug 26 13:54:25 EEST 2024


ffmpeg | branch: master | Ramiro Polla <ramiro.polla at gmail.com> | Wed Aug 21 16:55:52 2024 +0200| [8c203ea7c794e01efcf985ba8303f598a8180864] | committer: Ramiro Polla

avcodec/aarch64/mpegvideoencdsp: add dotprod implementation for pix_norm1

                      A55             A76
pix_norm1_c:        484.3           235.2
pix_norm1_neon:     193.8 ( 2.50x)   44.7 ( 5.26x)
pix_norm1_dotprod:   91.8 ( 5.28x)   21.2 (11.09x)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8c203ea7c794e01efcf985ba8303f598a8180864
---

 libavcodec/aarch64/mpegvideoencdsp_init.c | 10 ++++++++++
 libavcodec/aarch64/mpegvideoencdsp_neon.S | 28 ++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/libavcodec/aarch64/mpegvideoencdsp_init.c b/libavcodec/aarch64/mpegvideoencdsp_init.c
index 7eb632ed1b..d0ce07e178 100644
--- a/libavcodec/aarch64/mpegvideoencdsp_init.c
+++ b/libavcodec/aarch64/mpegvideoencdsp_init.c
@@ -27,6 +27,10 @@
 int ff_pix_sum16_neon(const uint8_t *pix, int line_size);
 int ff_pix_norm1_neon(const uint8_t *pix, int line_size);
 
+#if HAVE_DOTPROD
+int ff_pix_norm1_neon_dotprod(const uint8_t *pix, int line_size);
+#endif
+
 av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c,
                                              AVCodecContext *avctx)
 {
@@ -36,4 +40,10 @@ av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c,
         c->pix_sum   = ff_pix_sum16_neon;
         c->pix_norm1 = ff_pix_norm1_neon;
     }
+
+#if HAVE_DOTPROD
+    if (have_dotprod(cpu_flags)) {
+        c->pix_norm1 = ff_pix_norm1_neon_dotprod;
+    }
+#endif
 }
diff --git a/libavcodec/aarch64/mpegvideoencdsp_neon.S b/libavcodec/aarch64/mpegvideoencdsp_neon.S
index f562ee3eba..4944e7b7f4 100644
--- a/libavcodec/aarch64/mpegvideoencdsp_neon.S
+++ b/libavcodec/aarch64/mpegvideoencdsp_neon.S
@@ -66,3 +66,31 @@ function ff_pix_norm1_neon, export=1
 
         ret
 endfunc
+
+#if HAVE_DOTPROD
+ENABLE_DOTPROD
+
+function ff_pix_norm1_neon_dotprod, export=1
+// x0  const uint8_t *pix
+// x1  int line_size
+
+        sxtw            x1, w1
+        movi            v0.16b, #0
+        mov             w2, #16
+
+1:
+        ld1             {v1.16b}, [x0], x1
+        ld1             {v2.16b}, [x0], x1
+        udot            v0.4s, v1.16b, v1.16b
+        subs            w2, w2, #2
+        udot            v0.4s, v2.16b, v2.16b
+        b.ne            1b
+
+        uaddlv          d0, v0.4s
+        fmov            w0, s0
+
+        ret
+endfunc
+
+DISABLE_DOTPROD
+#endif



More information about the ffmpeg-cvslog mailing list