[FFmpeg-cvslog] r10156 - trunk/libavcodec/i386/snowdsp_mmx.c
michael
subversion
Tue Aug 21 00:29:21 CEST 2007
Author: michael
Date: Tue Aug 21 00:29:21 2007
New Revision: 10156
Log:
and of course where the unneeded double subtractions blindly put in the
mmx code
this also makes the affected code 4% faster
Modified:
trunk/libavcodec/i386/snowdsp_mmx.c
Modified: trunk/libavcodec/i386/snowdsp_mmx.c
==============================================================================
--- trunk/libavcodec/i386/snowdsp_mmx.c (original)
+++ trunk/libavcodec/i386/snowdsp_mmx.c Tue Aug 21 00:29:21 2007
@@ -294,9 +294,10 @@ void ff_snow_horizontal_compose97i_mmx(D
DWTELEM * const ref = b+w2 - 1;
i = 1;
- b[0] = b[0] - (((-2 * ref[1] + W_BO) - 4 * b[0]) >> W_BS);
+ b[0] = b[0] + (((2 * ref[1] + W_BO-1) + 4 * b[0]) >> W_BS);
asm volatile(
- "pslld $1, %%mm7 \n\t" /* xmm7 already holds a '4' from 2 lifts ago. */
+ "pcmpeqd %%mm7, %%mm7 \n\t"
+ "psrld $29, %%mm7 \n\t"
::);
for(; i<w_l-3; i+=4){
asm volatile(
@@ -304,22 +305,18 @@ void ff_snow_horizontal_compose97i_mmx(D
"movq 8(%1), %%mm4 \n\t"
"paddd 4(%1), %%mm0 \n\t"
"paddd 12(%1), %%mm4 \n\t"
- "movq %%mm7, %%mm1 \n\t"
- "movq %%mm7, %%mm5 \n\t"
- "psubd %%mm0, %%mm1 \n\t"
- "psubd %%mm4, %%mm5 \n\t"
- "movq (%0), %%mm0 \n\t"
- "movq 8(%0), %%mm4 \n\t"
- "pslld $2, %%mm0 \n\t"
- "pslld $2, %%mm4 \n\t"
- "psubd %%mm0, %%mm1 \n\t"
- "psubd %%mm4, %%mm5 \n\t"
- "psrad $4, %%mm1 \n\t"
- "psrad $4, %%mm5 \n\t"
- "movq (%0), %%mm0 \n\t"
- "movq 8(%0), %%mm4 \n\t"
- "psubd %%mm1, %%mm0 \n\t"
- "psubd %%mm5, %%mm4 \n\t"
+ "paddd %%mm7, %%mm0 \n\t"
+ "paddd %%mm7, %%mm4 \n\t"
+ "psrad $2, %%mm0 \n\t"
+ "psrad $2, %%mm4 \n\t"
+ "movq (%0), %%mm1 \n\t"
+ "movq 8(%0), %%mm5 \n\t"
+ "paddd %%mm1, %%mm0 \n\t"
+ "paddd %%mm5, %%mm4 \n\t"
+ "psrad $2, %%mm0 \n\t"
+ "psrad $2, %%mm4 \n\t"
+ "paddd %%mm1, %%mm0 \n\t"
+ "paddd %%mm5, %%mm4 \n\t"
"movq %%mm0, (%0) \n\t"
"movq %%mm4, 8(%0) \n\t"
:: "r"(&b[i]), "r"(&ref[i])
More information about the ffmpeg-cvslog
mailing list