[FFmpeg-cvslog] r19637 - in trunk/libavcodec/arm: dsputil_neon.c dsputil_neon_s.S
mru
subversion
Fri Aug 14 03:02:06 CEST 2009
Author: mru
Date: Fri Aug 14 03:02:06 2009
New Revision: 19637
Log:
ARM: NEON optimised vorbis_inverse_coupling
12% faster Vorbis decoding on Cortex-A8.
Modified:
trunk/libavcodec/arm/dsputil_neon.c
trunk/libavcodec/arm/dsputil_neon_s.S
Modified: trunk/libavcodec/arm/dsputil_neon.c
==============================================================================
--- trunk/libavcodec/arm/dsputil_neon.c Thu Aug 13 20:47:13 2009 (r19636)
+++ trunk/libavcodec/arm/dsputil_neon.c Fri Aug 14 03:02:06 2009 (r19637)
@@ -161,6 +161,8 @@ void ff_vector_fmul_window_neon(float *d
void ff_float_to_int16_neon(int16_t *, const float *, long);
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
+void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
+
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
{
c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
@@ -272,4 +274,7 @@ void ff_dsputil_init_neon(DSPContext *c,
c->float_to_int16 = ff_float_to_int16_neon;
c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
}
+
+ if (CONFIG_VORBIS_DECODER)
+ c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
}
Modified: trunk/libavcodec/arm/dsputil_neon_s.S
==============================================================================
--- trunk/libavcodec/arm/dsputil_neon_s.S Thu Aug 13 20:47:13 2009 (r19636)
+++ trunk/libavcodec/arm/dsputil_neon_s.S Fri Aug 14 03:02:06 2009 (r19637)
@@ -19,6 +19,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "config.h"
#include "asm.S"
preserve8
@@ -795,3 +796,66 @@ NOVFP ldr lr, [sp, #16]
vst1.64 {d22,d23},[ip,:128], r5
pop {r4,r5,pc}
.endfunc
+
+#if CONFIG_VORBIS_DECODER
+function ff_vorbis_inverse_coupling_neon, export=1
+ vmov.i32 q10, #1<<31
+ subs r2, r2, #4
+ tst r2, #4
+ mov r3, r0
+ mov r12, r1
+ beq 3f
+
+ vld1.32 {d24-d25},[r1,:128]!
+ vld1.32 {d22-d23},[r0,:128]!
+ vcle.s32 q8, q12, #0
+ vand q9, q11, q10
+ veor q12, q12, q9
+ vand q2, q12, q8
+ vbic q3, q12, q8
+ vadd.f32 q12, q11, q2
+ vsub.f32 q11, q11, q3
+1: vld1.32 {d2-d3}, [r1,:128]!
+ vld1.32 {d0-d1}, [r0,:128]!
+ vcle.s32 q8, q1, #0
+ vand q9, q0, q10
+ veor q1, q1, q9
+ vst1.32 {d24-d25},[r3, :128]!
+ vst1.32 {d22-d23},[r12,:128]!
+ vand q2, q1, q8
+ vbic q3, q1, q8
+ vadd.f32 q1, q0, q2
+ vsub.f32 q0, q0, q3
+ subs r2, r2, #8
+ ble 2f
+ vld1.32 {d24-d25},[r1,:128]!
+ vld1.32 {d22-d23},[r0,:128]!
+ vcle.s32 q8, q12, #0
+ vand q9, q11, q10
+ veor q12, q12, q9
+ vst1.32 {d2-d3}, [r3, :128]!
+ vst1.32 {d0-d1}, [r12,:128]!
+ vand q2, q12, q8
+ vbic q3, q12, q8
+ vadd.f32 q12, q11, q2
+ vsub.f32 q11, q11, q3
+ b 1b
+
+2: vst1.32 {d2-d3}, [r3, :128]!
+ vst1.32 {d0-d1}, [r12,:128]!
+ bxlt lr
+
+3: vld1.32 {d2-d3}, [r1,:128]
+ vld1.32 {d0-d1}, [r0,:128]
+ vcle.s32 q8, q1, #0
+ vand q9, q0, q10
+ veor q1, q1, q9
+ vand q2, q1, q8
+ vbic q3, q1, q8
+ vadd.f32 q1, q0, q2
+ vsub.f32 q0, q0, q3
+ vst1.32 {d2-d3}, [r0,:128]!
+ vst1.32 {d0-d1}, [r1,:128]!
+ bx lr
+ .endfunc
+#endif
More information about the ffmpeg-cvslog
mailing list