[Ffmpeg-devel] [PATCH] another vorbis optimization

Loren Merritt lorenm
Tue Aug 8 08:32:34 CEST 2006


Another 6% faster vorbis decoding. But I am unsure as to the cleanest way 
to integrate it with run-time cpu detection.

--Loren Merritt
-------------- next part --------------
Index: libavcodec/vorbis.c
===================================================================
--- libavcodec/vorbis.c	(revision 5954)
+++ libavcodec/vorbis.c	(working copy)
@@ -1472,7 +1472,11 @@
 }
 
 // Decode the audio packet using the functions above
+#ifdef HAVE_SSE2
+#define BIAS 0
+#else
 #define BIAS 385
+#endif
 
 static int vorbis_parse_audio_packet(vorbis_context *vc) {
     GetBitContext *gb=&vc->gb;
@@ -1695,6 +1699,24 @@
 
     AV_DEBUG("parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", get_bits_count(gb)/8, get_bits_count(gb)%8, len);
 
+#ifdef HAVE_SSE2
+    asm volatile(
+        "movd   %0, %%xmm7 \n\t"
+        "pshufd $0, %%xmm7, %%xmm7 \n\t"
+        ::"r"(15<<23)
+    );
+    for(i=0;i<len;i+=4) {
+        asm volatile(
+            "movdqa       %1, %%xmm0 \n\t"
+            "paddd    %%xmm7, %%xmm0 \n\t"
+            "cvtps2dq %%xmm0, %%xmm0 \n\t"
+            "packssdw %%xmm0, %%xmm0 \n\t"
+            "movq     %%xmm0, %0     \n\t"
+            :"=m"(((int16_t*)data)[i])
+            :"m"(vc->ret[i])
+        );
+    }
+#else
     for(i=0;i<len;++i) {
         int_fast32_t tmp= ((int32_t*)vc->ret)[i];
         if(tmp & 0xf0000){
@@ -1704,6 +1726,7 @@
         }
         ((int16_t*)data)[i]=tmp - 0x8000;
     }
+#endif
 
     *data_size=len*2;
 



More information about the ffmpeg-devel mailing list