[MPlayer-cvslog] r36499 - trunk/libaf/af_volume.c

reimar subversion at mplayerhq.hu
Wed Oct 30 19:45:49 CET 2013


Author: reimar
Date: Wed Oct 30 19:45:48 2013
New Revision: 36499

Log:
af_volume: add NEON optimization for common float case.

gcc is too stupid to use vmin/vmax, which leads to float
code interleaved with status register reads, which has simply
horrible performance.

Modified:
   trunk/libaf/af_volume.c

Modified: trunk/libaf/af_volume.c
==============================================================================
--- trunk/libaf/af_volume.c	Tue Oct 29 20:24:16 2013	(r36498)
+++ trunk/libaf/af_volume.c	Wed Oct 30 19:45:48 2013	(r36499)
@@ -125,6 +125,27 @@ static av_always_inline void s16_inner_l
 static av_always_inline void float_inner_loop(float *data, int len, int offset, int step, float level, int softclip)
 {
   int i;
+#if HAVE_NEON
+  if (offset == 0 && step == 1 && !softclip && len >= 8)
+  {
+    __asm__(
+      "vmov.32 d2[0], %2\n\t"
+      "vdup.32 q8, %3\n\t"
+      "vneg.f32 q9, q8\n\t"
+"0:\n\t"
+      "vld1.32 {q0}, [%0]\n\t"
+      "vmul.f32 q0, q0, d2[0]\n\t"
+      "cmp %0, %1\n\t"
+      "vmin.f32 q0, q0, q8\n\t"
+      "vmax.f32 q0, q0, q9\n\t"
+      "vst1.32 {q0}, [%0]!\n\t"
+      "blo 0b\n\t"
+    : "+&r"(data)
+    : "r"(data + len - 7), "r"(level), "r"(0x3f800000)
+    : "cc", "q0", "d2", "q8", "q9", "memory");
+    len &= 3;
+  }
+#endif
   for (i = offset; i < len; i += step)
   {
     register float x = data[i];


More information about the MPlayer-cvslog mailing list