[MPlayer-dev-eng] altivec patch 3/5: 16 byte alignment

Alan Curry pacman at world.std.com
Tue Feb 7 11:53:29 CET 2006


This fixes a few places where AltiVec operations were used on memory that had
not been aligned on a 16-byte boundary. Some of those places (the pixbufs and
filters) could be changed to only do 16-byte alignment if altivec is enabled,
and in the case of the filters, to not do it on the vertical filters where
it's not necessary. That seems like too much work for the gain of a few bytes,
unless someone requests it.

-------------- next part --------------
diff -u postproc/yuv2rgb_altivec.c postproc/yuv2rgb_altivec.c
--- postproc/yuv2rgb_altivec.c	2006-02-07 04:17:40.000000000 -0500
+++ postproc/yuv2rgb_altivec.c	2006-02-07 04:10:57.000000000 -0500
@@ -68,6 +68,9 @@
 #include <inttypes.h>
 #include <assert.h>
 #include "config.h"
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
 #include "rgb2rgb.h"
 #include "swscale.h"
 #include "swscale_internal.h"
@@ -789,8 +792,8 @@
 
   vector signed short *YCoeffs, *CCoeffs;
 
-  vYCoeffsBank = malloc (sizeof (vector signed short)*lumFilterSize*c->dstH);
-  vCCoeffsBank = malloc (sizeof (vector signed short)*chrFilterSize*c->dstH);
+  vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH);
+  vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH);
 
   for (i=0;i<lumFilterSize*c->dstH;i++) {
     tmp = c->vLumFilter[i];
diff -u postproc/swscale.c postproc/swscale.c
--- postproc/swscale.c	2006-02-07 04:17:40.000000000 -0500
+++ postproc/swscale.c	2006-02-07 04:16:32.000000000 -0500
@@ -1166,7 +1166,8 @@
 	}
 
 	// Note the +1 is for the MMXscaler which reads over the end
-	*outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
+	/* align at 16 for AltiVec (needed by hScale_altivec_real) */
+	*outFilter= (int16_t*)memalign(16, *outFilterSize*(dstW+1)*sizeof(int16_t));
 	memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
 
 	/* Normalize & Store in outFilter */
@@ -2132,10 +2133,11 @@
 	c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
 	c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
 	//Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
+	/* align at 16 bytes for AltiVec */
 	for(i=0; i<c->vLumBufSize; i++)
-		c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
+		c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(16, 4000);
 	for(i=0; i<c->vChrBufSize; i++)
-		c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000);
+		c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(16, 8000);
 
 	//try to avoid drawing green stuff between the right end and the stride end
 	for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);


More information about the MPlayer-dev-eng mailing list