[MPlayer-cvslog] r36499 - trunk/libaf/af_volume.c
reimar
subversion at mplayerhq.hu
Wed Oct 30 19:45:49 CET 2013
Author: reimar
Date: Wed Oct 30 19:45:48 2013
New Revision: 36499
Log:
af_volume: add NEON optimization for common float case.
gcc is too stupid to use vmin/vmax, which leads to float
code interleaved with status register reads, which has simply
horrible performance.
Modified:
trunk/libaf/af_volume.c
Modified: trunk/libaf/af_volume.c
==============================================================================
--- trunk/libaf/af_volume.c Tue Oct 29 20:24:16 2013 (r36498)
+++ trunk/libaf/af_volume.c Wed Oct 30 19:45:48 2013 (r36499)
@@ -125,6 +125,27 @@ static av_always_inline void s16_inner_l
static av_always_inline void float_inner_loop(float *data, int len, int offset, int step, float level, int softclip)
{
int i;
+#if HAVE_NEON
+ if (offset == 0 && step == 1 && !softclip && len >= 8)
+ {
+ __asm__(
+ "vmov.32 d2[0], %2\n\t"
+ "vdup.32 q8, %3\n\t"
+ "vneg.f32 q9, q8\n\t"
+"0:\n\t"
+ "vld1.32 {q0}, [%0]\n\t"
+ "vmul.f32 q0, q0, d2[0]\n\t"
+ "cmp %0, %1\n\t"
+ "vmin.f32 q0, q0, q8\n\t"
+ "vmax.f32 q0, q0, q9\n\t"
+ "vst1.32 {q0}, [%0]!\n\t"
+ "blo 0b\n\t"
+ : "+&r"(data)
+ : "r"(data + len - 7), "r"(level), "r"(0x3f800000)
+ : "cc", "q0", "d2", "q8", "q9", "memory");
+ len &= 3;
+ }
+#endif
for (i = offset; i < len; i += step)
{
register float x = data[i];
More information about the MPlayer-cvslog
mailing list