[FFmpeg-cvslog] libswresample: unaligned AVX/SSE4 float and int32 6ch pack
Michael Niedermayer
git at videolan.org
Sun May 13 19:33:09 CEST 2012
ffmpeg | branch: master | Michael Niedermayer <michaelni at gmx.at> | Sun May 13 19:20:47 2012 +0200| [24c0d1583ce6c7bb68fb40167a8b7bbaccac9e4a] | committer: Michael Niedermayer
libswresample: unaligned AVX/SSE4 float and int32 6ch pack
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=24c0d1583ce6c7bb68fb40167a8b7bbaccac9e4a
---
libswresample/x86/audio_convert.asm | 43 +++++++++++++++++++++++++---------
1 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
index 1fa519a..cbff72c 100644
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -213,18 +213,34 @@ cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
mov src5q, [srcq+5*gprsize]
mov srcq, [srcq]
mov dstq, [dstq]
+%ifidn %3, a
+ test dstq, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+ test srcq, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+ test src2q, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+ test src3q, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+ test src4q, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+ test src5q, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+%else
+pack_6ch_%2_to_%1_u_int %+ SUFFIX
+%endif
sub src1q, srcq
sub src2q, srcq
sub src3q, srcq
sub src4q, srcq
sub src5q, srcq
.loop:
- mova m0, [srcq ]
- mova m1, [srcq+src1q]
- mova m2, [srcq+src2q]
- mova m3, [srcq+src3q]
- mova m4, [srcq+src4q]
- mova m5, [srcq+src5q]
+ mov%3 m0, [srcq ]
+ mov%3 m1, [srcq+src1q]
+ mov%3 m2, [srcq+src2q]
+ mov%3 m3, [srcq+src3q]
+ mov%3 m4, [srcq+src4q]
+ mov%3 m5, [srcq+src5q]
%if cpuflag(sse4)
SBUTTERFLYPS 0, 1, 6
SBUTTERFLYPS 2, 3, 6
@@ -237,12 +253,12 @@ cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
movlhps m1, m3
movhlps m5, m3
- movaps [dstq ], m0
- movaps [dstq+16], m6
- movaps [dstq+32], m4
- movaps [dstq+48], m1
- movaps [dstq+64], m2
- movaps [dstq+80], m5
+ mov %+ %3 %+ ps [dstq ], m0
+ mov %+ %3 %+ ps [dstq+16], m6
+ mov %+ %3 %+ ps [dstq+32], m4
+ mov %+ %3 %+ ps [dstq+48], m1
+ mov %+ %3 %+ ps [dstq+64], m2
+ mov %+ %3 %+ ps [dstq+80], m5
%else ; mmx
SBUTTERFLY dq, 0, 1, 6
SBUTTERFLY dq, 2, 3, 6
@@ -268,11 +284,14 @@ cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
%endmacro
INIT_MMX mmx
+CONV_FLTP_TO_FLT_6CH float,float,u
CONV_FLTP_TO_FLT_6CH float,float,a
INIT_XMM sse4
+CONV_FLTP_TO_FLT_6CH float,float,u
CONV_FLTP_TO_FLT_6CH float,float,a
%if HAVE_AVX
INIT_XMM avx
+CONV_FLTP_TO_FLT_6CH float,float,u
CONV_FLTP_TO_FLT_6CH float,float,a
%endif
More information about the ffmpeg-cvslog
mailing list