[Ffmpeg-devel] [PATCH] another vorbis optimization
Loren Merritt
lorenm
Tue Aug 8 08:32:34 CEST 2006
Another 6% faster vorbis decoding. But I am unsure as to the cleanest way
to integrate it with run-time cpu detection.
--Loren Merritt
-------------- next part --------------
Index: libavcodec/vorbis.c
===================================================================
--- libavcodec/vorbis.c (revision 5954)
+++ libavcodec/vorbis.c (working copy)
@@ -1472,7 +1472,11 @@
}
// Decode the audio packet using the functions above
+#ifdef HAVE_SSE2
+#define BIAS 0
+#else
#define BIAS 385
+#endif
static int vorbis_parse_audio_packet(vorbis_context *vc) {
GetBitContext *gb=&vc->gb;
@@ -1695,6 +1699,24 @@
AV_DEBUG("parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", get_bits_count(gb)/8, get_bits_count(gb)%8, len);
+#ifdef HAVE_SSE2
+ asm volatile(
+ "movd %0, %%xmm7 \n\t"
+ "pshufd $0, %%xmm7, %%xmm7 \n\t"
+ ::"r"(15<<23)
+ );
+ for(i=0;i<len;i+=4) {
+ asm volatile(
+ "movdqa %1, %%xmm0 \n\t"
+ "paddd %%xmm7, %%xmm0 \n\t"
+ "cvtps2dq %%xmm0, %%xmm0 \n\t"
+ "packssdw %%xmm0, %%xmm0 \n\t"
+ "movq %%xmm0, %0 \n\t"
+ :"=m"(((int16_t*)data)[i])
+ :"m"(vc->ret[i])
+ );
+ }
+#else
for(i=0;i<len;++i) {
int_fast32_t tmp= ((int32_t*)vc->ret)[i];
if(tmp & 0xf0000){
@@ -1704,6 +1726,7 @@
}
((int16_t*)data)[i]=tmp - 0x8000;
}
+#endif
*data_size=len*2;
More information about the ffmpeg-devel
mailing list