[MPlayer-dev-eng] [PATCH] altivec scaler performance enhancement

Alan Curry pacman at TheWorld.com
Fri Feb 10 04:12:14 CET 2006


Michael Niedermayer writes the following:
>
>Hi
>
>On Tue, Feb 07, 2006 at 05:50:56AM -0500, Alan Curry wrote:
>> It would be better, I think, to just build the vectors that you're actually
>> going to use. Or build them all just once and preserve them in the
>> SwsContext, since they are just expanded versions of the v{Lum,Chr}Filter
>> that is already in there.
>
>yes
>

This is the patch to move the v{Y,C}CoeffsBank vectors into the SwsContext,
filling them in just once when the scaler is initialized, instead of building
them and freeing them over and over. In my tests, it cuts CPU usage in half
or better. Not just the scaler's CPU usage - the usage of the entire mplayer
process! One of my tests went from being a 94% CPU hog to 33%.

As with most of the other patches I've sent recently, this only affects
scaled YV12->RGB/BGR and (scaled or unscaled) YVU9->RGB/BGR. Try playing
anything with -vf scale=300:-2 -vo x11 and watch the system load.

-------------- next part --------------
Index: postproc/swscale.c
===================================================================
RCS file: /cvsroot/mplayer/main/postproc/swscale.c,v
retrieving revision 1.164
diff -u -r1.164 swscale.c
--- postproc/swscale.c	9 Feb 2006 14:08:02 -0000	1.164
+++ postproc/swscale.c	10 Feb 2006 02:32:14 -0000
@@ -2110,6 +2110,25 @@
 				c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
 				(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
 				srcFilter->chrV, dstFilter->chrV, c->param);
+
+#ifdef HAVE_ALTIVEC
+		c->vYCoeffsBank = memalign (16, sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
+		c->vCCoeffsBank = memalign (16, sizeof (vector signed short)*c->vChrFilterSize*c->dstH);
+
+		for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
+                  int j;
+		  short *p = (short *)&c->vYCoeffsBank[i];
+		  for (j=0;j<8;j++)
+		    p[j] = c->vLumFilter[i];
+		}
+
+		for (i=0;i<c->vChrFilterSize*c->dstH;i++) {
+                  int j;
+		  short *p = (short *)&c->vCCoeffsBank[i];
+		  for (j=0;j<8;j++)
+		    p[j] = c->vChrFilter[i];
+		}
+#endif
 	}
 
 	// Calculate Buffer Sizes so that they won't run out while handling these damn slices
@@ -2644,6 +2663,12 @@
 	c->hLumFilter = NULL;
 	if(c->hChrFilter) free(c->hChrFilter);
 	c->hChrFilter = NULL;
+#ifdef HAVE_ALTIVEC
+	if(c->vYCoeffsBank) free(c->vYCoeffsBank);
+	c->vYCoeffsBank = NULL;
+	if(c->vCCoeffsBank) free(c->vCCoeffsBank);
+	c->vCCoeffsBank = NULL;
+#endif
 
 	if(c->vLumFilterPos) free(c->vLumFilterPos);
 	c->vLumFilterPos = NULL;
Index: postproc/swscale_internal.h
===================================================================
RCS file: /cvsroot/mplayer/main/postproc/swscale_internal.h,v
retrieving revision 1.15
diff -u -r1.15 swscale_internal.h
--- postproc/swscale_internal.h	13 Jan 2006 00:23:32 -0000	1.15
+++ postproc/swscale_internal.h	10 Feb 2006 02:32:15 -0000
@@ -154,6 +154,7 @@
   vector signed short   CGV;
   vector signed short   OY;
   vector unsigned short CSHIFT;
+  vector signed short *vYCoeffsBank, *vCCoeffsBank;
 
 #endif
 
Index: postproc/yuv2rgb_altivec.c
===================================================================
RCS file: /cvsroot/mplayer/main/postproc/yuv2rgb_altivec.c,v
retrieving revision 1.9
diff -u -r1.9 yuv2rgb_altivec.c
--- postproc/yuv2rgb_altivec.c	9 Feb 2006 00:29:35 -0000	1.9
+++ postproc/yuv2rgb_altivec.c	10 Feb 2006 02:32:21 -0000
@@ -774,8 +774,6 @@
 		       uint8_t *dest, int dstW, int dstY)
 {
   int i,j;
-  short tmp __attribute__((aligned (16)));
-  int16_t *p;
   short *f;
   vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
   vector signed short R0,G0,B0,R1,G1,B1;
@@ -787,29 +785,10 @@
   vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0);
   unsigned long scratch[16] __attribute__ ((aligned (16)));
 
-  vector signed short *vYCoeffsBank, *vCCoeffsBank;
-
   vector signed short *YCoeffs, *CCoeffs;
 
-  vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH);
-  vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH);
-
-  for (i=0;i<lumFilterSize*c->dstH;i++) {
-    tmp = c->vLumFilter[i];
-    p = &vYCoeffsBank[i];
-    for (j=0;j<8;j++)
-      p[j] = tmp;
-  }
-
-  for (i=0;i<chrFilterSize*c->dstH;i++) {
-    tmp = c->vChrFilter[i];
-    p = &vCCoeffsBank[i];
-    for (j=0;j<8;j++)
-      p[j] = tmp;
-  }
-
-  YCoeffs = vYCoeffsBank+dstY*lumFilterSize;
-  CCoeffs = vCCoeffsBank+dstY*chrFilterSize;
+  YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
+  CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
 
   out = (vector unsigned char *)dest;
 
@@ -962,7 +941,4 @@
     memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
   }
 
-  if (vYCoeffsBank) free (vYCoeffsBank);
-  if (vCCoeffsBank) free (vCCoeffsBank);
-
 }


More information about the MPlayer-dev-eng mailing list