[Mplayer-cvslog] CVS: main/postproc swscale.c,1.30,1.31
Michael Niedermayer
michael at mplayer.dev.hu
Tue Oct 30 23:24:40 CET 2001
Update of /cvsroot/mplayer/main/postproc
In directory mplayer:/var/tmp.root/cvs-serv1248/postproc
Modified Files:
swscale.c
Log Message:
c optimizations
bugfix
Index: swscale.c
===================================================================
RCS file: /cvsroot/mplayer/main/postproc/swscale.c,v
retrieving revision 1.30
retrieving revision 1.31
diff -u -r1.30 -r1.31
--- swscale.c 30 Oct 2001 22:03:25 -0000 1.30
+++ swscale.c 30 Oct 2001 22:24:38 -0000 1.31
@@ -290,10 +290,10 @@
"movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\
"movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
- "paddw %%mm2, %%mm3 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- "psraw $5, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
- "psraw $5, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
+ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
+ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
+ "psrlw $5, %%mm3 \n\t"\
+ "psrlw $5, %%mm4 \n\t"\
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
@@ -785,7 +785,6 @@
);
}
#else
-//FIXME unroll C loop and dont recalculate UV
asm volatile ("\n\t"::: "memory");
if(dstbpp==32)
@@ -898,8 +897,9 @@
yuv2rgbX(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp);
return;
}
-#ifdef HAVE_MMX
if( yalpha > 2048 ) buf0 = buf1;
+
+#ifdef HAVE_MMX
if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster
{
if(dstbpp == 32)
@@ -1013,48 +1013,99 @@
}
}
#else
-//FIXME unroll C loop and dont recalculate UV
+//FIXME write 2 versions (for even & odd lines)
asm volatile ("\n\t"::: "memory");
- if(dstbpp==32 || dstbpp==24)
+ if(dstbpp==32)
{
- for(i=0;i<dstw;i++){
+ for(i=0; i<dstw-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[buf0[i]>>7];
+ int Y1=yuvtab_2568[buf0[i]>>7];
+ int Y2=yuvtab_2568[buf0[i+1]>>7];
int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
- dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
- dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
- dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
- dest+=dstbpp>>3;
+
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
+ dest[4*i+0]=clip_table[((Y1 + Cb) >>13)];
+ dest[4*i+1]=clip_table[((Y1 + Cg) >>13)];
+ dest[4*i+2]=clip_table[((Y1 + Cr) >>13)];
+
+ dest[4*i+4]=clip_table[((Y2 + Cb) >>13)];
+ dest[4*i+5]=clip_table[((Y2 + Cg) >>13)];
+ dest[4*i+6]=clip_table[((Y2 + Cr) >>13)];
+ }
+ }
+ if(dstbpp==24)
+ {
+ for(i=0; i<dstw-1; i+=2){
+ // vertical linear interpolation && yuv2rgb in a single step:
+ int Y1=yuvtab_2568[buf0[i]>>7];
+ int Y2=yuvtab_2568[buf0[i+1]>>7];
+ int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
+ int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
+
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
+ dest[0]=clip_table[((Y1 + Cb) >>13)];
+ dest[1]=clip_table[((Y1 + Cg) >>13)];
+ dest[2]=clip_table[((Y1 + Cr) >>13)];
+
+ dest[3]=clip_table[((Y2 + Cb) >>13)];
+ dest[4]=clip_table[((Y2 + Cg) >>13)];
+ dest[5]=clip_table[((Y2 + Cr) >>13)];
+ dest+=6;
}
}
else if(dstbpp==16)
{
- for(i=0;i<dstw;i++){
+ for(i=0; i<dstw-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[buf0[i]>>7];
+ int Y1=yuvtab_2568[buf0[i]>>7];
+ int Y2=yuvtab_2568[buf0[i+1]>>7];
int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
((uint16_t*)dest)[i] =
- (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
- ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) |
- ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800);
+ (clip_table[(Y1 + Cb) >>13]>>3) |
+ ((clip_table[(Y1 + Cg) >>13]<<3)&0x07E0) |
+ ((clip_table[(Y1 + Cr) >>13]<<8)&0xF800);
+
+ ((uint16_t*)dest)[i+1] =
+ (clip_table[(Y2 + Cb) >>13]>>3) |
+ ((clip_table[(Y2 + Cg) >>13]<<3)&0x07E0) |
+ ((clip_table[(Y2 + Cr) >>13]<<8)&0xF800);
}
}
else if(dstbpp==15)
{
- for(i=0;i<dstw;i++){
+ for(i=0; i<dstw-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step:
- int Y=yuvtab_2568[buf0[i]>>7];
+ int Y1=yuvtab_2568[buf0[i]>>7];
+ int Y2=yuvtab_2568[buf0[i+1]>>7];
int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
+ int Cb= yuvtab_40cf[U];
+ int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U];
+ int Cr= yuvtab_3343[V];
+
((uint16_t*)dest)[i] =
- (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
- ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) |
- ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00);
+ (clip_table[(Y1 + Cb) >>13]>>3) |
+ ((clip_table[(Y1 + Cg) >>13]<<2)&0x03E0) |
+ ((clip_table[(Y1 + Cr) >>13]<<7)&0x7C00);
+ ((uint16_t*)dest)[i+1] =
+ (clip_table[(Y2 + Cb) >>13]>>3) |
+ ((clip_table[(Y2 + Cg) >>13]<<2)&0x03E0) |
+ ((clip_table[(Y2 + Cr) >>13]<<7)&0x7C00);
}
}
#endif
More information about the MPlayer-cvslog
mailing list