[FFmpeg-cvslog] swr/x86: 10l, missed some SSE2 instructions in code marked as SSE.

Michael Niedermayer git at videolan.org
Thu Jul 5 15:28:35 CEST 2012


ffmpeg | branch: master | Michael Niedermayer <michaelni at gmx.at> | Thu Jul  5 15:17:39 2012 +0200| [c88e60af76ad6cf3b193a7f160256061b085125e] | committer: Michael Niedermayer

swr/x86: 10l, missed some SSE2 instructions in code marked as SSE.

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c88e60af76ad6cf3b193a7f160256061b085125e
---

 libswresample/x86/audio_convert.asm |    3 +--
 libswresample/x86/swresample_x86.c  |   42 ++++++++++++++++-------------------
 2 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
index 6fc86f0..fcf856f 100644
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -377,7 +377,7 @@ CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
 PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
 PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
 
-INIT_XMM sse
+INIT_XMM sse2
 CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
 CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
 CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
@@ -401,7 +401,6 @@ UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
 UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
 UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
 
-INIT_XMM sse2
 CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
 CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
 CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
diff --git a/libswresample/x86/swresample_x86.c b/libswresample/x86/swresample_x86.c
index c9a99a5..71806c1 100644
--- a/libswresample/x86/swresample_x86.c
+++ b/libswresample/x86/swresample_x86.c
@@ -49,7 +49,7 @@ void swri_audio_convert_init_x86(struct AudioConvert *ac,
     }
 
 MULTI_CAPS_FUNC(AV_CPU_FLAG_MMX, mmx)
-MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse)
+MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE2, sse2)
 
     if(mm_flags & AV_CPU_FLAG_MMX) {
         if(channels == 6) {
@@ -58,28 +58,6 @@ MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse)
         }
     }
 
-    if(mm_flags & AV_CPU_FLAG_SSE) {
-        if(channels == 2) {
-            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
-                ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
-                ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
-                ac->simd_f =  ff_pack_2ch_int16_to_int32_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
-                ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse;
-
-            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
-                ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
-                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
-                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S32)
-                ac->simd_f =  ff_unpack_2ch_int32_to_int16_a_sse;
-        }
-    }
-
     if(mm_flags & AV_CPU_FLAG_SSE2) {
         if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
             ac->simd_f =  ff_int32_to_float_a_sse2;
@@ -91,6 +69,24 @@ MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse)
             ac->simd_f =  ff_float_to_int16_a_sse2;
 
         if(channels == 2) {
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
+                ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
+                ac->simd_f =  ff_pack_2ch_int16_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse2;
+
+            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
+                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
+                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_2ch_int32_to_int16_a_sse2;
+
             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
                 ac->simd_f =  ff_pack_2ch_int32_to_float_a_sse2;
             if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)



More information about the ffmpeg-cvslog mailing list