[MPlayer-dev-eng] [RFC] emms/sfence in fast memcpy()

Reimar Döffinger Reimar.Doeffinger at stud.uni-karlsruhe.de
Mon May 28 00:40:21 CEST 2007


Hello,
On Sun, May 27, 2007 at 11:53:39PM +0200, Michael Niedermayer wrote:
> current fast memcpy (agp and normal) variants do emms and sfence at the
> end, this is slow and particualry annoying if the copy is called once per
> line which it is in many cases
> i think that something should be done about that, though iam not
> volunteering to do the work
> 
> if we didnt do this silly redefine memcpy() then this would be easier ...
> a mp/av_memcpy() could simply leave the emms/sfence to the caller

I have already done most of the work to make all relevant places use
fast_memcpy explicitly, see attached patch.
In about 5 places it is obviously nonsense to use fast_memcpy, I would
replaces these in a second patch. There are also two fixmes in
libswscale, since I have not yet decided how to fix these. Probably just
adding a define memcpy fast_memcpy...
And could someone please explain what the point of that my_memcpy_pic is
that is (and worse duplicated) in so many filters??

Greetings,
Reimar Döffinger
-------------- next part --------------
Index: libmpcodecs/vf_fspp.c
===================================================================
--- libmpcodecs/vf_fspp.c	(revision 23390)
+++ libmpcodecs/vf_fspp.c	(working copy)
@@ -421,15 +421,15 @@
     if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
     for(y=0; y<height; y++){
         int index= 8 + 8*stride + y*stride;
-        memcpy(p->src + index, src + y*src_stride, width);//this line can be avoided by using DR & user fr.buffers
+        fast_memcpy(p->src + index, src + y*src_stride, width);//this line can be avoided by using DR & user fr.buffers
         for(x=0; x<8; x++){ 
             p->src[index         - x - 1]= p->src[index +         x    ];
             p->src[index + width + x    ]= p->src[index + width - x - 1];
         }
     }
     for(y=0; y<8; y++){
-        memcpy(p->src + (      7-y)*stride, p->src + (      y+8)*stride, stride);
-        memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);
+        fast_memcpy(p->src + (      7-y)*stride, p->src + (      y+8)*stride, stride);
+        fast_memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);
     }
     //FIXME (try edge emu)
 
@@ -456,8 +456,8 @@
 		    column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+x*8, block3+x*8, 8); //yes, this is a HOTSPOT
 		}
 	    row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, 2*(BLOCKSZ-1));
-	    memcpy(block, block+(BLOCKSZ-1)*64, 8*8*sizeof(DCTELEM)); //cycling
-	    memcpy(block3, block3+(BLOCKSZ-1)*64, 6*8*sizeof(DCTELEM));  
+	    fast_memcpy(block, block+(BLOCKSZ-1)*64, 8*8*sizeof(DCTELEM)); //cycling
+	    fast_memcpy(block3, block3+(BLOCKSZ-1)*64, 6*8*sizeof(DCTELEM));  
 	}
 	//
 	es=width+8-x0; //  8, ...      
@@ -532,7 +532,7 @@
     if(mpi->pict_type != 3 && mpi->qscale && !vf->priv->qp){
 	if(!vf->priv->non_b_qp)
 	    vf->priv->non_b_qp= malloc(mpi->qstride * ((mpi->h + 15) >> 4));
-	memcpy(vf->priv->non_b_qp, mpi->qscale, mpi->qstride * ((mpi->h + 15) >> 4));
+	fast_memcpy(vf->priv->non_b_qp, mpi->qscale, mpi->qstride * ((mpi->h + 15) >> 4));
     }
     if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){
 	char *qp_tab= vf->priv->non_b_qp;
Index: libmpcodecs/vd_mpng.c
===================================================================
--- libmpcodecs/vd_mpng.c	(revision 23390)
+++ libmpcodecs/vd_mpng.c	(working copy)
@@ -58,7 +58,7 @@
 {
  char * p = pngstr->io_ptr;
  if(size>pngLength-pngPointer && pngLength>=pngPointer) size=pngLength-pngPointer;
- memcpy( buffer,(char *)&p[pngPointer],size );
+ fast_memcpy( buffer,(char *)&p[pngPointer],size );
  pngPointer+=size;
 }
 
Index: libmpcodecs/vf_filmdint.c
===================================================================
--- libmpcodecs/vf_filmdint.c	(revision 23390)
+++ libmpcodecs/vf_filmdint.c	(working copy)
@@ -97,7 +97,7 @@
 
     for(i=0; i<height; i++)
     {
-	memcpy(dst, src, bytesPerLine);
+	fast_memcpy(dst, src, bytesPerLine);
 	src+= srcStride;
 	dst+= dstStride;
     }
@@ -711,7 +711,7 @@
     long bos = b - a;
     long cos = c - a;
     if (field) {
-	memcpy(d, b, w);
+	fast_memcpy(d, b, w);
 	h--;
 	d += ds;
 	a += ss;
@@ -720,8 +720,8 @@
     cos += ss;
     while (h > 2) {
 	if (threshold >= 128) {
-	    memcpy(d, a, w);
-	    memcpy(d+ds, a+bos, w);
+	    fast_memcpy(d, a, w);
+	    fast_memcpy(d+ds, a+bos, w);
 	} else if (mmx2 == 1) {
 	    ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold);
 	} else
@@ -730,9 +730,9 @@
 	d += 2*ds;
 	a += 2*ss;
     }
-    memcpy(d, a, w);
+    fast_memcpy(d, a, w);
     if (h == 2)
-	memcpy(d+ds, a+bos, w);
+	fast_memcpy(d+ds, a+bos, w);
     return ret;
 }
 
Index: libmpcodecs/vf_kerndeint.c
===================================================================
--- libmpcodecs/vf_kerndeint.c	(revision 23390)
+++ libmpcodecs/vf_kerndeint.c	(working copy)
@@ -128,16 +128,16 @@
 		dstp = dstp_saved + (1-order) * dst_pitch;
 
 		for (y=0; y<h; y+=2) {
-			memcpy(dstp, srcp, w);
+			fast_memcpy(dstp, srcp, w);
 			srcp += 2*src_pitch;
 			dstp += 2*dst_pitch;
 		}
 
 		// Copy through the lines that will be missed below.
-		memcpy(dstp_saved + order*dst_pitch, srcp_saved + (1-order)*src_pitch, w);
-		memcpy(dstp_saved + (2+order)*dst_pitch, srcp_saved + (3-order)*src_pitch, w);
-		memcpy(dstp_saved + (h-2+order)*dst_pitch, srcp_saved + (h-1-order)*src_pitch, w);
-		memcpy(dstp_saved + (h-4+order)*dst_pitch, srcp_saved + (h-3-order)*src_pitch, w);
+		fast_memcpy(dstp_saved + order*dst_pitch, srcp_saved + (1-order)*src_pitch, w);
+		fast_memcpy(dstp_saved + (2+order)*dst_pitch, srcp_saved + (3-order)*src_pitch, w);
+		fast_memcpy(dstp_saved + (h-2+order)*dst_pitch, srcp_saved + (h-1-order)*src_pitch, w);
+		fast_memcpy(dstp_saved + (h-4+order)*dst_pitch, srcp_saved + (h-3-order)*src_pitch, w);
 		/* For the other field choose adaptively between using the previous field
 		   or the interpolant from the current field. */
 
@@ -270,7 +270,7 @@
 		srcp = mpi->planes[z];
 		dstp = pmpi->planes[z];
 		for (y=0; y<h; y++) {
-			memcpy(dstp, srcp, w);
+			fast_memcpy(dstp, srcp, w);
 			srcp += src_pitch;
 			dstp += psrc_pitch;
 		}
Index: libmpcodecs/ad_mpc.c
===================================================================
--- libmpcodecs/ad_mpc.c	(revision 23390)
+++ libmpcodecs/ad_mpc.c	(working copy)
@@ -54,7 +54,7 @@
   if (d->pos < d->header_len) {
     if (s > d->header_len - d->pos)
       s = d->header_len - d->pos;
-    memcpy(p, &d->header[d->pos], s);
+    fast_memcpy(p, &d->header[d->pos], s);
   } else
     s = 0;
   memset(&p[s], 0, size - s);
Index: libmpcodecs/vf_tinterlace.c
===================================================================
--- libmpcodecs/vf_tinterlace.c	(revision 23390)
+++ libmpcodecs/vf_tinterlace.c	(working copy)
@@ -43,7 +43,7 @@
 
 	for(i=0; i<height; i++)
 	{
-		memcpy(dst, src, bytesPerLine);
+		fast_memcpy(dst, src, bytesPerLine);
 		src+= srcStride;
 		dst+= dstStride;
 	}
Index: libmpcodecs/native/nuppelvideo.c
===================================================================
--- libmpcodecs/native/nuppelvideo.c	(revision 23390)
+++ libmpcodecs/native/nuppelvideo.c	(working copy)
@@ -67,7 +67,7 @@
 		switch(encodedh->comptype)
 		{
 		    case '0': /* raw YUV420 */
-			memcpy(decoded, encoded + 12, out_len);
+			fast_memcpy(decoded, encoded + 12, out_len);
 			break;
 		    case '1': /* RTJpeg */
 			RTjpeg_decompressYUV420 ( ( __s8 * ) encoded + 12, decoded );
@@ -102,13 +103,13 @@
 			break;
 		    case 'L': /* copy last frame */
 #ifdef KEEP_BUFFER
-			memcpy ( decoded, previous_buffer, width*height*3/2);
+			fast_memcpy ( decoded, previous_buffer, width*height*3/2);
 #endif
 			break;
 		}
 
 #ifdef KEEP_BUFFER
-		memcpy(previous_buffer, decoded, width*height*3/2);
+		fast_memcpy(previous_buffer, decoded, width*height*3/2);
 #endif
 		break;
 	    }
Index: libmpcodecs/vf_noise.c
===================================================================
--- libmpcodecs/vf_noise.c	(revision 23390)
+++ libmpcodecs/vf_noise.c	(working copy)
@@ -278,12 +278,12 @@
 	{
 		if(src==dst) return;
 
-		if(dstStride==srcStride) memcpy(dst, src, srcStride*height);
+		if(dstStride==srcStride) fast_memcpy(dst, src, srcStride*height);
 		else
 		{
 			for(y=0; y<height; y++)
 			{
-				memcpy(dst, src, width);
+				fast_memcpy(dst, src, width);
 				dst+= dstStride;
 				src+= srcStride;
 			}
Index: libmpcodecs/vf_down3dright.c
===================================================================
--- libmpcodecs/vf_down3dright.c	(revision 23390)
+++ libmpcodecs/vf_down3dright.c	(working copy)
@@ -67,7 +67,7 @@
 					*t++ = *sR++;
 			}
 			if (p->scaleh == 1) {
-				memcpy(to + dst, to, dst);
+				fast_memcpy(to + dst, to, dst);
                                 to += dst;
 			}
 			to += dst;
Index: libmpcodecs/vf_phase.c
===================================================================
--- libmpcodecs/vf_phase.c	(revision 23390)
+++ libmpcodecs/vf_phase.c	(working copy)
@@ -45,8 +45,8 @@
 
    for(end=to+h*ts, buf=*bufp, top=1; to<end; from+=fs, to+=ts, buf+=w, top^=1)
       {
-      memcpy(to, mode==(top?BOTTOM_FIRST:TOP_FIRST)?buf:from, w);
-      memcpy(buf, from, w);
+      fast_memcpy(to, mode==(top?BOTTOM_FIRST:TOP_FIRST)?buf:from, w);
+      fast_memcpy(buf, from, w);
       }
    }
 
Index: libmpcodecs/vf_yvu9.c
===================================================================
--- libmpcodecs/vf_yvu9.c	(revision 23390)
+++ libmpcodecs/vf_yvu9.c	(working copy)
@@ -37,7 +37,7 @@
 	mpi->w, mpi->h);
 
     for(y=0;y<mpi->h;y++)
-	memcpy(dmpi->planes[0]+dmpi->stride[0]*y,
+	fast_memcpy(dmpi->planes[0]+dmpi->stride[0]*y,
 	       mpi->planes[0]+mpi->stride[0]*y,
 	       mpi->w);
 
Index: libmpcodecs/vf_uspp.c
===================================================================
--- libmpcodecs/vf_uspp.c	(revision 23390)
+++ libmpcodecs/vf_uspp.c	(working copy)
@@ -154,15 +154,15 @@
             continue; // HACK avoid crash for Y8 colourspace
         for(y=0; y<h; y++){
             int index= block + block*stride + y*stride;
-            memcpy(p->src[i] + index, src[i] + y*src_stride[i], w);
+            fast_memcpy(p->src[i] + index, src[i] + y*src_stride[i], w);
             for(x=0; x<block; x++){ 
                 p->src[i][index     - x - 1]= p->src[i][index +     x    ];
                 p->src[i][index + w + x    ]= p->src[i][index + w - x - 1];
             }
         }
         for(y=0; y<block; y++){
-            memcpy(p->src[i] + (  block-1-y)*stride, p->src[i] + (  y+block  )*stride, stride);
-            memcpy(p->src[i] + (h+block  +y)*stride, p->src[i] + (h-y+block-1)*stride, stride);
+            fast_memcpy(p->src[i] + (  block-1-y)*stride, p->src[i] + (  y+block  )*stride, stride);
+            fast_memcpy(p->src[i] + (h+block  +y)*stride, p->src[i] + (h-y+block-1)*stride, stride);
         }
 
         p->frame->linesize[i]= stride;
Index: libmpcodecs/vf_telecine.c
===================================================================
--- libmpcodecs/vf_telecine.c	(revision 23390)
+++ libmpcodecs/vf_telecine.c	(working copy)
@@ -22,7 +22,7 @@
 
 	for(i=0; i<height; i++)
 	{
-		memcpy(dst, src, bytesPerLine);
+		fast_memcpy(dst, src, bytesPerLine);
 		src+= srcStride;
 		dst+= dstStride;
 	}
Index: libmpcodecs/vf_tfields.c
===================================================================
--- libmpcodecs/vf_tfields.c	(revision 23390)
+++ libmpcodecs/vf_tfields.c	(working copy)
@@ -27,7 +27,7 @@
 
 	for(i=0; i<height; i++)
 	{
-		memcpy(dst, src, bytesPerLine);
+		fast_memcpy(dst, src, bytesPerLine);
 		src+= srcStride;
 		dst+= dstStride;
 	}
@@ -68,7 +68,7 @@
 	long crap1, crap2;
 	if (up) {
 		ssd = -ss;
-		memcpy(d, s, w);
+		fast_memcpy(d, s, w);
 		d += ds;
 		s += ss;
 	}
@@ -92,7 +92,7 @@
 		d += ds;
 		s += ss;
 	}
-	if (!up) memcpy(d, s, w);
+	if (!up) fast_memcpy(d, s, w);
 	asm volatile("emms \n\t" : : : "memory");
 }
 #endif
@@ -104,7 +104,7 @@
 	long crap1, crap2;
 	if (up) {
 		ssd = -ss;
-		memcpy(d, s, w);
+		fast_memcpy(d, s, w);
 		d += ds;
 		s += ss;
 	}
@@ -129,7 +129,7 @@
 		d += ds;
 		s += ss;
 	}
-	if (!up) memcpy(d, s, w);
+	if (!up) fast_memcpy(d, s, w);
 	asm volatile("emms \n\t" : : : "memory");
 }
 #endif
@@ -141,7 +141,7 @@
 	int crap1, crap2;
 	if (up) {
 		ssd = -ss;
-		memcpy(d, s, w);
+		fast_memcpy(d, s, w);
 		d += ds;
 		s += ss;
 	}
@@ -179,7 +179,7 @@
 		d += ds;
 		s += ss;
 	}
-	if (!up) memcpy(d, s, w);
+	if (!up) fast_memcpy(d, s, w);
 	asm volatile("emms \n\t" : : : "memory");
 }
 
@@ -193,7 +193,7 @@
 	int crap1, crap2;
 	if (up) {
 		ssd = -ss;
-		memcpy(d, s, w);
+		fast_memcpy(d, s, w);
 		d += ds; s += ss;
 	}
 	for (j=0; j<w; j++)
@@ -259,7 +259,7 @@
 	for (j=0; j<w; j++)
 		d[j] = (s[j+ssd] + 3*s[j])>>2;
 	d += ds; s += ss;
-	if (!up) memcpy(d, s, w);
+	if (!up) fast_memcpy(d, s, w);
 	asm volatile("emms \n\t" : : : "memory");
 }
 #endif
@@ -276,7 +276,7 @@
 	int i, j, ssd=ss;
 	if (up) {
 		ssd = -ss;
-		memcpy(d, s, w);
+		fast_memcpy(d, s, w);
 		d += ds;
 		s += ss;
 	}
@@ -286,7 +286,7 @@
 		d += ds;
 		s += ss;
 	}
-	if (!up) memcpy(d, s, w);
+	if (!up) fast_memcpy(d, s, w);
 }
 
 static void qpel_4tap_C(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
@@ -294,7 +294,7 @@
 	int i, j, ssd=ss;
 	if (up) {
 		ssd = -ss;
-		memcpy(d, s, w);
+		fast_memcpy(d, s, w);
 		d += ds; s += ss;
 	}
 	for (j=0; j<w; j++)
@@ -308,7 +308,7 @@
 	for (j=0; j<w; j++)
 		d[j] = (s[j+ssd] + 3*s[j] + 2)>>2;
 	d += ds; s += ss;
-	if (!up) memcpy(d, s, w);
+	if (!up) fast_memcpy(d, s, w);
 }
 
 static void (*qpel_li)(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up);
Index: libmpcodecs/vf_softpulldown.c
===================================================================
--- libmpcodecs/vf_softpulldown.c	(revision 23390)
+++ libmpcodecs/vf_softpulldown.c	(working copy)
@@ -24,7 +24,7 @@
 
 	for(i=0; i<height; i++)
 	{
-		memcpy(dst, src, bytesPerLine);
+		fast_memcpy(dst, src, bytesPerLine);
 		src+= srcStride;
 		dst+= dstStride;
 	}
Index: libmpcodecs/vf_spp.c
===================================================================
--- libmpcodecs/vf_spp.c	(revision 23390)
+++ libmpcodecs/vf_spp.c	(working copy)
@@ -380,15 +380,15 @@
 	if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
 	for(y=0; y<height; y++){
 		int index= 8 + 8*stride + y*stride;
-		memcpy(p->src + index, src + y*src_stride, width);
+		fast_memcpy(p->src + index, src + y*src_stride, width);
 		for(x=0; x<8; x++){ 
 			p->src[index         - x - 1]= p->src[index +         x    ];
 			p->src[index + width + x    ]= p->src[index + width - x - 1];
 		}
 	}
 	for(y=0; y<8; y++){
-		memcpy(p->src + (      7-y)*stride, p->src + (      y+8)*stride, stride);
-		memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);
+		fast_memcpy(p->src + (      7-y)*stride, p->src + (      y+8)*stride, stride);
+		fast_memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);
 	}
 	//FIXME (try edge emu)
 
@@ -478,7 +478,7 @@
         if(mpi->pict_type != 3 && mpi->qscale && !vf->priv->qp){
             if(!vf->priv->non_b_qp)
                 vf->priv->non_b_qp= malloc(mpi->qstride * ((mpi->h + 15) >> 4));
-            memcpy(vf->priv->non_b_qp, mpi->qscale, mpi->qstride * ((mpi->h + 15) >> 4));
+            fast_memcpy(vf->priv->non_b_qp, mpi->qscale, mpi->qstride * ((mpi->h + 15) >> 4));
         }
 	if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){
             char *qp_tab= vf->priv->non_b_qp;
Index: libmpcodecs/vf_il.c
===================================================================
--- libmpcodecs/vf_il.c	(revision 23390)
+++ libmpcodecs/vf_il.c	(working copy)
@@ -58,20 +58,20 @@
 	switch(interleave){
 	case -1:
 		for(y=0; y < m; y++){
-			memcpy(dst + dstStride* y     , src + srcStride*(y*2 + a), w);
-			memcpy(dst + dstStride*(y + m), src + srcStride*(y*2 + b), w);
+			fast_memcpy(dst + dstStride* y     , src + srcStride*(y*2 + a), w);
+			fast_memcpy(dst + dstStride*(y + m), src + srcStride*(y*2 + b), w);
 		}
 		break;
 	case 0:
 		for(y=0; y < m; y++){
-			memcpy(dst + dstStride* y*2   , src + srcStride*(y*2 + a), w);
-			memcpy(dst + dstStride*(y*2+1), src + srcStride*(y*2 + b), w);
+			fast_memcpy(dst + dstStride* y*2   , src + srcStride*(y*2 + a), w);
+			fast_memcpy(dst + dstStride*(y*2+1), src + srcStride*(y*2 + b), w);
 		}
 		break;
 	case 1:
 		for(y=0; y < m; y++){
-			memcpy(dst + dstStride*(y*2+a), src + srcStride* y     , w);
-			memcpy(dst + dstStride*(y*2+b), src + srcStride*(y + m), w);
+			fast_memcpy(dst + dstStride*(y*2+a), src + srcStride* y     , w);
+			fast_memcpy(dst + dstStride*(y*2+b), src + srcStride*(y + m), w);
 		}
 		break;
 	}
Index: libmpcodecs/vf_pp7.c
===================================================================
--- libmpcodecs/vf_pp7.c	(revision 23390)
+++ libmpcodecs/vf_pp7.c	(working copy)
@@ -290,15 +290,15 @@
     if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
     for(y=0; y<height; y++){
         int index= 8 + 8*stride + y*stride;
-        memcpy(p_src + index, src + y*src_stride, width);
+        fast_memcpy(p_src + index, src + y*src_stride, width);
         for(x=0; x<8; x++){ 
             p_src[index         - x - 1]= p_src[index +         x    ];
             p_src[index + width + x    ]= p_src[index + width - x - 1];
         }
     }
     for(y=0; y<8; y++){
-        memcpy(p_src + (       7-y)*stride, p_src + (       y+8)*stride, stride);
-        memcpy(p_src + (height+8+y)*stride, p_src + (height-y+7)*stride, stride);
+        fast_memcpy(p_src + (       7-y)*stride, p_src + (       y+8)*stride, stride);
+        fast_memcpy(p_src + (height+8+y)*stride, p_src + (height-y+7)*stride, stride);
     }
     //FIXME (try edge emu)
 
Index: libmpcodecs/vf_pullup.c
===================================================================
--- libmpcodecs/vf_pullup.c	(revision 23390)
+++ libmpcodecs/vf_pullup.c	(working copy)
@@ -31,7 +31,7 @@
 
 	for(i=0; i<height; i++)
 	{
-		memcpy(dst, src, bytesPerLine);
+		fast_memcpy(dst, src, bytesPerLine);
 		src+= srcStride;
 		dst+= dstStride;
 	}
@@ -47,10 +47,10 @@
 
 	for(i=h>>1; i; i--)
 	{
-		memcpy(dst, src0, w);
+		fast_memcpy(dst, src0, w);
 		src0 += ss;
 		dst += ds;
-		memcpy(dst, src1, w);
+		fast_memcpy(dst, src1, w);
 		src1 += ss;
 		dst += ds;
 	}
@@ -153,8 +153,8 @@
 		}
 	}
 	if (mpi->qscale) {
-		memcpy(b->planes[3], mpi->qscale, c->w[3]);
-		memcpy(b->planes[3]+c->w[3], mpi->qscale, c->w[3]);
+		fast_memcpy(b->planes[3], mpi->qscale, c->w[3]);
+		fast_memcpy(b->planes[3]+c->w[3], mpi->qscale, c->w[3]);
 	}
 
 	p = mpi->fields & MP_IMGFIELD_TOP_FIRST ? 0 :
Index: libmpcodecs/vf_ivtc.c
===================================================================
--- libmpcodecs/vf_ivtc.c	(revision 23390)
+++ libmpcodecs/vf_ivtc.c	(working copy)
@@ -47,7 +47,7 @@
 
 	for(i=0; i<height; i++)
 	{
-		memcpy(dst, src, bytesPerLine);
+		fast_memcpy(dst, src, bytesPerLine);
 		src+= srcStride;
 		dst+= dstStride;
 	}
Index: libmpcodecs/vf_yadif.c
===================================================================
--- libmpcodecs/vf_yadif.c	(revision 23390)
+++ libmpcodecs/vf_yadif.c	(working copy)
@@ -62,7 +62,7 @@
 static void store_ref(struct vf_priv_s *p, uint8_t *src[3], int src_stride[3], int width, int height){
     int i;
 
-    memcpy (p->ref[3], p->ref[0], sizeof(uint8_t *)*3);
+    fast_memcpy (p->ref[3], p->ref[0], sizeof(uint8_t *)*3);
     memmove(p->ref[0], p->ref[1], sizeof(uint8_t *)*3*3);
 
     for(i=0; i<3; i++){
@@ -363,7 +363,7 @@
                 uint8_t *dst2= &dst[i][y*dst_stride[i]];
                 filter_line(p, dst2, prev, cur, next, w, refs, parity ^ tff);
             }else{
-                memcpy(&dst[i][y*dst_stride[i]], &p->ref[1][i][y*refs], w);
+                fast_memcpy(&dst[i][y*dst_stride[i]], &p->ref[1][i][y*refs], w);
             }
         }
     }
Index: libmpcodecs/vf_detc.c
===================================================================
--- libmpcodecs/vf_detc.c	(revision 23390)
+++ libmpcodecs/vf_detc.c	(working copy)
@@ -60,7 +60,7 @@
 
 	for(i=0; i<height; i++)
 	{
-		memcpy(dst, src, bytesPerLine);
+		fast_memcpy(dst, src, bytesPerLine);
 		src+= srcStride;
 		dst+= dstStride;
 	}
Index: libmpcodecs/vf_bmovl.c
===================================================================
--- libmpcodecs/vf_bmovl.c	(revision 23390)
+++ libmpcodecs/vf_bmovl.c	(working copy)
@@ -388,14 +388,14 @@
 
 	if(vf->priv->opaque) {	// Just copy buffer memory to screen
 		for( ypos=vf->priv->y1 ; ypos < vf->priv->y2 ; ypos++ ) {
-			memcpy( dmpi->planes[0] + (ypos*dmpi->stride[0]) + vf->priv->x1,
+			fast_memcpy( dmpi->planes[0] + (ypos*dmpi->stride[0]) + vf->priv->x1,
 			        vf->priv->bitmap.y + (ypos*vf->priv->w) + vf->priv->x1,
 					vf->priv->x2 - vf->priv->x1 );
 			if(ypos%2) {
-				memcpy( dmpi->planes[1] + ((ypos/2)*dmpi->stride[1]) + (vf->priv->x1/2),
+				fast_memcpy( dmpi->planes[1] + ((ypos/2)*dmpi->stride[1]) + (vf->priv->x1/2),
 				        vf->priv->bitmap.u + (((ypos/2)*(vf->priv->w)/2)) + (vf->priv->x1/2),
 				        (vf->priv->x2 - vf->priv->x1)/2 );
-				memcpy( dmpi->planes[2] + ((ypos/2)*dmpi->stride[2]) + (vf->priv->x1/2),
+				fast_memcpy( dmpi->planes[2] + ((ypos/2)*dmpi->stride[2]) + (vf->priv->x1/2),
 				        vf->priv->bitmap.v + (((ypos/2)*(vf->priv->w)/2)) + (vf->priv->x1/2),
 				        (vf->priv->x2 - vf->priv->x1)/2 );
 			}
Index: libmpcodecs/vf_unsharp.c
===================================================================
--- libmpcodecs/vf_unsharp.c	(revision 23390)
+++ libmpcodecs/vf_unsharp.c	(working copy)
@@ -85,10 +85,10 @@
 	if( src == dst )
 	    return;
 	if( dstStride == srcStride ) 
-	    memcpy( dst, src, srcStride*height );
+	    fast_memcpy( dst, src, srcStride*height );
 	else
 	    for( y=0; y<height; y++, dst+=dstStride, src+=srcStride )
-		memcpy( dst, src, width );
+		fast_memcpy( dst, src, width );
 	return;
     }
 
Index: libmpcodecs/vd_mtga.c
===================================================================
--- libmpcodecs/vd_mtga.c	(revision 23390)
+++ libmpcodecs/vd_mtga.c	(working copy)
@@ -110,20 +110,20 @@
 	    
 	    if (packet_header & 0x80) /* runlength encoded packet */
 	    {
-		memcpy(final, data, num_bytes);
+		fast_memcpy(final, data, num_bytes);
 		
 		// Note: this will be slow when DR to vram!
 		i=num_bytes;
 		while(2*i<=replen){
-		    memcpy(final+i,final,i);
+		    fast_memcpy(final+i,final,i);
 		    i*=2;
 		}
-		memcpy(final+i,final,replen-i);
+		fast_memcpy(final+i,final,replen-i);
 		data += num_bytes;
 	    }
 	    else /* raw packet */
 	    {
-		memcpy(final, data, replen);
+		fast_memcpy(final, data, replen);
 		data += replen;
 	    }
 	    
@@ -144,7 +144,7 @@
     for (row = info->start_row; (!info->origin && row) || (info->origin && row < info->height); row += info->increment)
     {
 	final = mpi->planes[0] + mpi->stride[0] * row;
-	memcpy(final, data, info->width * num_bytes);
+	fast_memcpy(final, data, info->width * num_bytes);
 	data += info->width * num_bytes;
     }
 
Index: stream/tvi_v4l.c
===================================================================
--- stream/tvi_v4l.c	(revision 23390)
+++ stream/tvi_v4l.c	(working copy)
@@ -1391,9 +1391,9 @@
 
     // YV12 uses VIDEO_PALETTE_YUV420P, but the planes are swapped
     if (priv->format == IMGFMT_YV12) {
-        memcpy(dest, source, priv->width * priv->height);
-        memcpy(dest+priv->width * priv->height*5/4, source+priv->width * priv->height, priv->width * priv->height/4);
-        memcpy(dest+priv->width * priv->height, source+priv->width * priv->height*5/4, priv->width * priv->height/4);
+        fast_memcpy(dest, source, priv->width * priv->height);
+        fast_memcpy(dest+priv->width * priv->height*5/4, source+priv->width * priv->height, priv->width * priv->height/4);
+        fast_memcpy(dest+priv->width * priv->height, source+priv->width * priv->height*5/4, priv->width * priv->height/4);
         return;
     }
 
@@ -1404,7 +1404,7 @@
     case VIDEO_PALETTE_RGB565:
         sptr = source + (priv->height-1)*priv->bytesperline;
         for (i = 0; i < priv->height; i++) {
-            memcpy(dest, sptr, priv->bytesperline);
+            fast_memcpy(dest, sptr, priv->bytesperline);
             dest += priv->bytesperline;
             sptr -= priv->bytesperline;
         }
@@ -1412,7 +1412,7 @@
     case VIDEO_PALETTE_UYVY:
     case VIDEO_PALETTE_YUV420P:
     default:
-        memcpy(dest, source, priv->bytesperline * priv->height);
+        fast_memcpy(dest, source, priv->bytesperline * priv->height);
     }
 
 }
@@ -1640,7 +1640,7 @@
 
     pthread_mutex_lock(&priv->video_buffer_mutex);
     interval = (double)priv->video_timebuffer[priv->video_head]*1e-6;
-    memcpy(buffer, priv->video_ringbuffer[priv->video_head], len);
+    fast_memcpy(buffer, priv->video_ringbuffer[priv->video_head], len);
     priv->video_cnt--;
     priv->video_head = (priv->video_head+1)%priv->video_buffer_size_current;
     pthread_mutex_unlock(&priv->video_buffer_mutex);
@@ -1741,7 +1741,7 @@
     while (priv->audio_head == priv->audio_tail) {
         usleep(10000);
     }
-    memcpy(buffer, priv->audio_ringbuffer+priv->audio_head*priv->audio_in.blocksize, len);
+    fast_memcpy(buffer, priv->audio_ringbuffer+priv->audio_head*priv->audio_in.blocksize, len);
     priv->audio_head = (priv->audio_head+1) % priv->audio_buffer_size;
     priv->audio_cnt--;
     priv->audio_sent_blocks_total++;
Index: libao2/ao_sdl.c
===================================================================
--- libao2/ao_sdl.c	(revision 23390)
+++ libao2/ao_sdl.c	(working copy)
@@ -88,10 +88,10 @@
   if (len > free) len = free;
   if (first_len > len) first_len = len;
   // till end of buffer
-  memcpy (&buffer[write_pos], data, first_len);
+  fast_memcpy (&buffer[write_pos], data, first_len);
   if (len > first_len) { // we have to wrap around
     // remaining part from beginning of buffer
-    memcpy (buffer, &data[first_len], len - first_len);
+    fast_memcpy (buffer, &data[first_len], len - first_len);
   }
   write_pos = (write_pos + len) % BUFFSIZE;
   return len;
@@ -106,14 +106,14 @@
 #ifdef USE_SDL_INTERNAL_MIXER
   SDL_MixAudio (data, &buffer[read_pos], first_len, volume);
 #else
-  memcpy (data, &buffer[read_pos], first_len);
+  fast_memcpy (data, &buffer[read_pos], first_len);
 #endif
   if (len > first_len) { // we have to wrap around
     // remaining part from beginning of buffer
 #ifdef USE_SDL_INTERNAL_MIXER
     SDL_MixAudio (&data[first_len], buffer, len - first_len, volume);
 #else
-    memcpy (&data[first_len], buffer, len - first_len);
+    fast_memcpy (&data[first_len], buffer, len - first_len);
 #endif
   }
   read_pos = (read_pos + len) % BUFFSIZE;
Index: libao2/ao_dsound.c
===================================================================
--- libao2/ao_dsound.c	(revision 23390)
+++ libao2/ao_dsound.c	(working copy)
@@ -199,7 +199,7 @@
     if(device_num==*device_index){
         mp_msg(MSGT_AO, MSGL_V,"<--");
         if(guid){
-            memcpy(&device,guid,sizeof(GUID));
+            fast_memcpy(&device,guid,sizeof(GUID));
         }
     }
     mp_msg(MSGT_AO, MSGL_V,"\n");
@@ -337,14 +337,14 @@
   	    numsamp = dwBytes1 / (ao_data.channels * sampsize);  // number of samples for each channel in this buffer
 
   	    for( i = 0; i < numsamp; i++ ) for( j = 0; j < ao_data.channels; j++ ) {
-  	        memcpy(lpvPtr1+(i*ao_data.channels*sampsize)+(chantable[j]*sampsize),data+(i*ao_data.channels*sampsize)+(j*sampsize),sampsize);
+  	        fast_memcpy(lpvPtr1+(i*ao_data.channels*sampsize)+(chantable[j]*sampsize),data+(i*ao_data.channels*sampsize)+(j*sampsize),sampsize);
   	    }
 
   	    if (NULL != lpvPtr2 )
   	    {
   	        numsamp = dwBytes2 / (ao_data.channels * sampsize);
   	        for( i = 0; i < numsamp; i++ ) for( j = 0; j < ao_data.channels; j++ ) {
-  	            memcpy(lpvPtr2+(i*ao_data.channels*sampsize)+(chantable[j]*sampsize),data+dwBytes1+(i*ao_data.channels*sampsize)+(j*sampsize),sampsize);
+  	            fast_memcpy(lpvPtr2+(i*ao_data.channels*sampsize)+(chantable[j]*sampsize),data+dwBytes1+(i*ao_data.channels*sampsize)+(j*sampsize),sampsize);
   	        }
   	    }
 
@@ -352,8 +352,8 @@
   	    if(write_offset>=buffer_size)write_offset=dwBytes2;
   	} else {
   	    // Write to pointers without reordering. 
-	memcpy(lpvPtr1,data,dwBytes1);
-    if (NULL != lpvPtr2 )memcpy(lpvPtr2,data+dwBytes1,dwBytes2);
+	fast_memcpy(lpvPtr1,data,dwBytes1);
+    if (NULL != lpvPtr2 )fast_memcpy(lpvPtr2,data+dwBytes1,dwBytes2);
 	write_offset+=dwBytes1+dwBytes2;
     if(write_offset>=buffer_size)write_offset=dwBytes2;
   	}
Index: libao2/ao_win32.c
===================================================================
--- libao2/ao_win32.c	(revision 23390)
+++ libao2/ao_win32.c	(working copy)
@@ -299,7 +299,7 @@
            waveOutUnprepareHeader(hWaveOut, current, sizeof(WAVEHDR));
 	x=BUFFER_SIZE-buf_write_pos;          
     if(x>len) x=len;                   
-    memcpy(current->lpData+buf_write_pos,data+len2,x); 
+    fast_memcpy(current->lpData+buf_write_pos,data+len2,x); 
     if(buf_write_pos==0)full_buffers++;
     len2+=x; len-=x;                 
 	buffered_bytes+=x; buf_write_pos+=x; 
Index: libao2/ao_jack.c
===================================================================
--- libao2/ao_jack.c	(revision 23390)
+++ libao2/ao_jack.c	(working copy)
@@ -111,10 +111,10 @@
   if (len > free) len = free;
   if (first_len > len) first_len = len;
   // till end of buffer
-  memcpy (&buffer[write_pos], data, first_len);
+  fast_memcpy (&buffer[write_pos], data, first_len);
   if (len > first_len) { // we have to wrap around
     // remaining part from beginning of buffer
-    memcpy (buffer, &data[first_len], len - first_len);
+    fast_memcpy (buffer, &data[first_len], len - first_len);
   }
   write_pos = (write_pos + len) % BUFFSIZE;
   return len;
Index: gui/mplayer/pb.c
===================================================================
--- gui/mplayer/pb.c	(revision 23390)
+++ gui/mplayer/pb.c	(working copy)
@@ -88,7 +88,7 @@
    
    vo_mouse_autohide=0;
 
-   memcpy( mplPBDrawBuffer,appMPlayer.bar.Bitmap.Image,appMPlayer.bar.Bitmap.ImageSize );
+   fast_memcpy( mplPBDrawBuffer,appMPlayer.bar.Bitmap.Image,appMPlayer.bar.Bitmap.ImageSize );
    Render( &appMPlayer.barWindow,appMPlayer.barItems,appMPlayer.NumberOfBarItems,mplPBDrawBuffer,appMPlayer.bar.Bitmap.ImageSize );
    wsConvert( &appMPlayer.barWindow,mplPBDrawBuffer,appMPlayer.bar.Bitmap.ImageSize );   
   }
Index: gui/mplayer/mw.c
===================================================================
--- gui/mplayer/mw.c	(revision 23390)
+++ gui/mplayer/mw.c	(working copy)
@@ -65,7 +65,7 @@
    btnModify( evSetMoviePosition,guiIntfStruct.Position );
    btnModify( evSetVolume,guiIntfStruct.Volume );
 
-   memcpy( mplDrawBuffer,appMPlayer.main.Bitmap.Image,appMPlayer.main.Bitmap.ImageSize );
+   fast_memcpy( mplDrawBuffer,appMPlayer.main.Bitmap.Image,appMPlayer.main.Bitmap.ImageSize );
    Render( &appMPlayer.mainWindow,appMPlayer.Items,appMPlayer.NumberOfItems,mplDrawBuffer,appMPlayer.main.Bitmap.ImageSize );
    mplMainRender=0;
   }
Index: mp3lib/sr1.c
===================================================================
--- mp3lib/sr1.c	(revision 23390)
+++ mp3lib/sr1.c	(working copy)
@@ -172,7 +172,7 @@
 //  if(backstep!=512 && backstep>fsizeold)
 //    printf("\rWarning! backstep (%d>%d)                                         \n",backstep,fsizeold);
   wordpointer = bsbuf + ssize - backstep;
-  if (backstep) memcpy(wordpointer,bsbufold+fsizeold-backstep,backstep);
+  if (backstep) fast_memcpy(wordpointer,bsbufold+fsizeold-backstep,backstep);
   bitindex = 0;
   bitsleft+=8*backstep;
 //  printf("Backstep %d  (bitsleft=%d)\n",backstep,bitsleft);
Index: libvo/vo_bl.c
===================================================================
--- libvo/vo_bl.c	(revision 23390)
+++ libvo/vo_bl.c	(working copy)
@@ -174,7 +174,7 @@
 	addr.sin_family = AF_INET;
 	addr.sin_port = htons(h->port);
 
-	memcpy(&addr.sin_addr.s_addr, dest->h_addr_list[0], dest->h_length);
+	fast_memcpy(&addr.sin_addr.s_addr, dest->h_addr_list[0], dest->h_length);
 
 	h->fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
 	if (h->fd < 0) {
@@ -283,7 +283,7 @@
 
 	if (prevpts >= 0) for (i = 0; i < no_bl_files; i++)
 		bl->write_frame(&bl_files[i], tmp, (vo_pts - prevpts)/90);
-	memcpy(tmp, image, bl->width*bl->height*bl->channels);
+	fast_memcpy(tmp, image, bl->width*bl->height*bl->channels);
 	prevpts = vo_pts;
 
 	for (i = 0; i < no_bl_hosts; i++) bl->send_frame(&bl_hosts[i]);
@@ -331,7 +331,7 @@
 	dst=image; /* + zr->off_y + zr->image_width*(y/zr->vdec)+x;*/
 	// copy Y:
 	for (i = 0; i < h; i++) {
-		memcpy(dst,src,w);
+		fast_memcpy(dst,src,w);
 		dst+=bl->width;
 		src+=stride[0];
 
Index: libvo/vo_3dfx.c
===================================================================
--- libvo/vo_3dfx.c	(revision 23390)
+++ libvo/vo_3dfx.c	(working copy)
@@ -238,10 +238,10 @@
 	for (j=0;j<jmax;j++) 
 	{
 		//XXX this should be hand-rolled 32 bit memcpy for safeness.
-		memcpy(fb_YUV->U + (uint32_t) VOODOO_YUV_STRIDE*  j       ,((uint8_t*) u) + uv_imax*  j       , uv_imax);
-		memcpy(fb_YUV->V + (uint32_t) VOODOO_YUV_STRIDE*  j       ,((uint8_t*) v) + uv_imax*  j       , uv_imax);
-		memcpy(fb_YUV->Y + (uint32_t) VOODOO_YUV_STRIDE* (j<<1)   ,((uint8_t*) y) + y_imax * (j<<1)   , y_imax);
-		memcpy(fb_YUV->Y + (uint32_t) VOODOO_YUV_STRIDE*((j<<1)+1),((uint8_t*) y) + y_imax *((j<<1)+1), y_imax);
+		fast_memcpy(fb_YUV->U + (uint32_t) VOODOO_YUV_STRIDE*  j       ,((uint8_t*) u) + uv_imax*  j       , uv_imax);
+		fast_memcpy(fb_YUV->V + (uint32_t) VOODOO_YUV_STRIDE*  j       ,((uint8_t*) v) + uv_imax*  j       , uv_imax);
+		fast_memcpy(fb_YUV->Y + (uint32_t) VOODOO_YUV_STRIDE* (j<<1)   ,((uint8_t*) y) + y_imax * (j<<1)   , y_imax);
+		fast_memcpy(fb_YUV->Y + (uint32_t) VOODOO_YUV_STRIDE*((j<<1)+1),((uint8_t*) y) + y_imax *((j<<1)+1), y_imax);
 	}
   LOG("video_out_3dfx: done planar dump\n");
 }
Index: libvo/vo_fbdev.c
===================================================================
--- libvo/vo_fbdev.c	(revision 23390)
+++ libvo/vo_fbdev.c	(working copy)
@@ -1075,7 +1075,7 @@
 
 	s = src[0];
 	while (h) {
-		memcpy(d, s, w * fb_pixel_size);
+		fast_memcpy(d, s, w * fb_pixel_size);
 		d += fb_line_len;
 		s += stride[0];
 		h--;
Index: libvo/vo_zr.c
===================================================================
--- libvo/vo_zr.c	(revision 23390)
+++ libvo/vo_zr.c	(working copy)
@@ -546,7 +546,7 @@
 		source = src[0] + 2*g->yoff*zr->vdec*zr->stride + 2*g->xoff;
 		dest = zr->image + 2*zr->off_y;
 		for (i = 0; i < g->height/zr->vdec; i++) {
-			memcpy(dest, source, zr->image_width*2);
+			fast_memcpy(dest, source, zr->image_width*2);
 			dest += 2*zr->image_width;
 			source += zr->vdec*zr->stride;
 		}
@@ -619,7 +619,7 @@
 		// copy Y:
 		for (i = 0; i < h; i++) {
 			if ((i + x)%zr->vdec == 0) {
-				memcpy(dst,src,w);
+				fast_memcpy(dst,src,w);
 				dst+=zr->image_width;
 			}
 			src+=stride[0];
@@ -633,8 +633,8 @@
 					zr->image_width/2+(x/2);
 			for (i = 0; i< h/2; i++) {
 				if ((i+x/2)%zr->vdec == 0) {
-					memcpy(dst1,src1,w/2);
-					memcpy(dst2,src2,w/2);
+					fast_memcpy(dst1,src1,w/2);
+					fast_memcpy(dst2,src2,w/2);
 					dst1+=zr->image_width/2;
 					dst2+=zr->image_width/2;
 				}
Index: libvo/vo_macosx.m
===================================================================
--- libvo/vo_macosx.m	(revision 23390)
+++ libvo/vo_macosx.m	(working copy)
@@ -203,7 +203,7 @@
 	{
 		case IMGFMT_BGR32:
 		case IMGFMT_RGB32:
-			memcpy(image_data, src[0], image_width*image_height*image_bytes);
+			fast_memcpy(image_data, src[0], image_width*image_height*image_bytes);
 			break;
 
 		case IMGFMT_YUY2:
Index: libvo/vo_fbdev2.c
===================================================================
--- libvo/vo_fbdev2.c	(revision 23390)
+++ libvo/vo_fbdev2.c	(working copy)
@@ -354,7 +354,7 @@
 	int i;
 
 	for (i = 0; i < h; i++) {
-		memcpy(dest, in, w * fb_pixel_size);
+		fast_memcpy(dest, in, w * fb_pixel_size);
 		dest += next;
 		in += stride[0];
 	}
@@ -371,7 +371,7 @@
 	int i, out_offset = 0, in_offset = 0;
 
 	for (i = 0; i < in_height; i++) {
-		memcpy(center + out_offset, next_frame + in_offset,
+		fast_memcpy(center + out_offset, next_frame + in_offset,
 				in_width * fb_pixel_size);
 		out_offset += fb_line_len;
 		in_offset += in_width * fb_pixel_size;
Index: libvo/vesa_lvo.c
===================================================================
--- libvo/vesa_lvo.c	(revision 23390)
+++ libvo/vesa_lvo.c	(working copy)
@@ -167,7 +167,7 @@
     dest = lvo_mem + bespitch * y + x;
     src = image[0];
     for(i=0;i<h;i++){
-        memcpy(dest,src,w);
+        fast_memcpy(dest,src,w);
         src+=stride[0];
         dest += bespitch;
     }
@@ -177,7 +177,7 @@
     dest = lvo_mem + bespitch*mga_vid_config.src_height + bespitch2 * y + x;
     src = image[1];
     for(i=0;i<h;i++){
-        memcpy(dest,src,w);
+        fast_memcpy(dest,src,w);
         src+=stride[1];
         dest += bespitch2;
     }
@@ -187,7 +187,7 @@
                    + bespitch2 * y + x;
     src = image[2];
     for(i=0;i<h;i++){
-        memcpy(dest,src,w);
+        fast_memcpy(dest,src,w);
         src+=stride[2];
         dest += bespitch2;
     }
@@ -207,7 +207,7 @@
 	bytpp = (image_bpp+7)/8;
 	dst = lvo_mem + (image_width * y + x)*bytpp;
 	/* vlvo_draw_slice_422(image,stride,w,h,x,y); just for speed */
-	memcpy(dst,image[0],mga_vid_config.frame_size);
+	fast_memcpy(dst,image[0],mga_vid_config.frame_size);
     }
  return 0;
 }
@@ -215,7 +215,7 @@
 uint32_t vlvo_draw_frame(uint8_t *image[])
 {
 /* Note it's very strange but sometime for YUY2 draw_frame is called */
-  memcpy(lvo_mem,image[0],mga_vid_config.frame_size);
+  fast_memcpy(lvo_mem,image[0],mga_vid_config.frame_size);
   if( mp_msg_test(MSGT_VO,MSGL_DBG2) ) {
     mp_msg(MSGT_VO,MSGL_DBG2, "vesa_lvo: vlvo_flip_page() was called\n");}
   return 0;
Index: libvo/vo_dxr2.c
===================================================================
--- libvo/vo_dxr2.c	(revision 23390)
+++ libvo/vo_dxr2.c	(working copy)
@@ -165,13 +165,13 @@
   }
   
   while (len>0) if ((dxr2bufpos+len) <= BUF_SIZE) {
-    memcpy(dxr2buf+dxr2bufpos, data, len);
+    fast_memcpy(dxr2buf+dxr2bufpos, data, len);
     dxr2bufpos+=len;
     len=0;
   } else {
     int copylen=BUF_SIZE-dxr2bufpos;
     if(copylen > 0) {
-      memcpy(dxr2buf+dxr2bufpos, data, copylen);
+      fast_memcpy(dxr2buf+dxr2bufpos, data, copylen);
       dxr2bufpos += copylen;
       data+=copylen;
       len-=copylen;
Index: libvo/vo_sdl.c
===================================================================
--- libvo/vo_sdl.c	(revision 23390)
+++ libvo/vo_sdl.c	(working copy)
@@ -1052,11 +1052,11 @@
 	    	mysrc+=priv->framePlaneYUY;
 		for(i = 0; i < priv->height; i++) {
 			mysrc-=priv->stridePlaneYUY;
-			memcpy (dst, mysrc, priv->stridePlaneYUY);
+			fast_memcpy (dst, mysrc, priv->stridePlaneYUY);
                 dst+=priv->overlay->pitches[0];
 		}
 	    }
-	    else memcpy (dst, src[0], priv->framePlaneYUY);
+	    else fast_memcpy (dst, src[0], priv->framePlaneYUY);
 	    SDL_OVR_UNLOCK
             break;
 	
@@ -1075,11 +1075,11 @@
 				mysrc+=priv->framePlaneRGB;
 				for(i = 0; i < priv->height; i++) {
 					mysrc-=priv->stridePlaneRGB;
-					memcpy (dst, mysrc, priv->stridePlaneRGB);
+					fast_memcpy (dst, mysrc, priv->stridePlaneRGB);
 					dst += priv->surface->pitch;
 				}
 			}
-			else memcpy (dst, src[0], priv->framePlaneRGB);
+			else fast_memcpy (dst, src[0], priv->framePlaneRGB);
 			SDL_SRF_UNLOCK(priv->surface)
 		} else {
 			SDL_SRF_LOCK(priv->rgbsurface, -1)
@@ -1088,11 +1088,11 @@
 				mysrc+=priv->framePlaneRGB;
 				for(i = 0; i < priv->height; i++) {
 					mysrc-=priv->stridePlaneRGB;
-					memcpy (dst, mysrc, priv->stridePlaneRGB);
+					fast_memcpy (dst, mysrc, priv->stridePlaneRGB);
 					dst += priv->rgbsurface->pitch;
 				}
 			}
-			else memcpy (dst, src[0], priv->framePlaneRGB);
+			else fast_memcpy (dst, src[0], priv->framePlaneRGB);
 			SDL_SRF_UNLOCK(priv->rgbsurface)
 		}
 		break;
Index: libvo/vo_vesa.c
===================================================================
--- libvo/vo_vesa.c	(revision 23390)
+++ libvo/vo_vesa.c	(working copy)
@@ -217,7 +217,7 @@
 	color = (r << shift_r) | (g << shift_g) | (b << shift_b);
 	offset = y * bpl + (x * pixel_size);
         if(!VALID_WIN_FRAME(offset)) __vbeSwitchBank(offset);
-	memcpy(VIDEO_PTR(offset), &color, pixel_size);
+	fast_memcpy(VIDEO_PTR(offset), &color, pixel_size);
 }
 
 /*
@@ -226,7 +226,7 @@
 */
 static void __vbeCopyBlockFast(unsigned long offset,uint8_t *image,unsigned long size)
 {
-  memcpy(&win.ptr[offset],image,size);
+  fast_memcpy(&win.ptr[offset],image,size);
 }
 
 static void __vbeCopyBlock(unsigned long offset,uint8_t *image,unsigned long size)
@@ -236,7 +236,7 @@
    {
 	if(!VALID_WIN_FRAME(offset)) __vbeSwitchBank(offset);
 	delta = min(size,win.high - offset);
-	memcpy(VIDEO_PTR(offset),&image[src_idx],delta);
+	fast_memcpy(VIDEO_PTR(offset),&image[src_idx],delta);
 	src_idx += delta;
 	offset += delta;
 	size -= delta;
@@ -649,7 +649,7 @@
 	  else          fs_mode = 1;
 	} 
 	if((err=vbeInit()) != VBE_OK) { PRINT_VBE_ERR("vbeInit",err); return -1; }
-	memcpy(vib.VESASignature,"VBE2",4);
+	fast_memcpy(vib.VESASignature,"VBE2",4);
 	if(!vib_set && (err=vbeGetControllerInfo(&vib)) != VBE_OK)
 	{
 	  PRINT_VBE_ERR("vbeGetControllerInfo",err);
Index: libvo/vo_zr2.c
===================================================================
--- libvo/vo_zr2.c	(revision 23390)
+++ libvo/vo_zr2.c	(working copy)
@@ -151,7 +151,7 @@
 	}
 
 	/* copy the jpeg image to the buffer which we acquired */
-	memcpy(p->buf + p->zrq.size*p->frame, mpi->planes[0], size);
+	fast_memcpy(p->buf + p->zrq.size*p->frame, mpi->planes[0], size);
 			
 	return VO_TRUE;
 }
@@ -394,7 +394,7 @@
 	 * We make configuration changes to a temporary params structure,
 	 * compare it with the old params structure and only apply the new
 	 * config if it is different from the old one. */
-	memcpy(&zptmp, &p->zp, sizeof(zptmp));
+	fast_memcpy(&zptmp, &p->zp, sizeof(zptmp));
 
 	/* translate the configuration to zoran understandable format */
 	zptmp.decimation = 0;
@@ -423,7 +423,7 @@
 
 	if (memcmp(&zptmp, &p->zp, sizeof(zptmp))) {
 		/* config differs, we must update */
-		memcpy(&p->zp, &zptmp, sizeof(zptmp));
+		fast_memcpy(&p->zp, &zptmp, sizeof(zptmp));
 		stop_playing(p);
 		if (ioctl(p->vdes, MJPIOC_S_PARAMS, &p->zp) < 0) {
 			ERROR("error writing display params to card\n");
Index: libvo/vo_yuv4mpeg.c
===================================================================
--- libvo/vo_yuv4mpeg.c	(revision 23390)
+++ libvo/vo_yuv4mpeg.c	(working copy)
@@ -175,9 +175,9 @@
 	
 	for (i=0; i<h; i +=2)
 	{
-		memcpy(rgb_line_buffer     , ptr + stride *  i   , stride);
-		memcpy(ptr + stride *  i   , ptr + stride * (i+1), stride);
-		memcpy(ptr + stride * (i+1), rgb_line_buffer     , stride);
+		fast_memcpy(rgb_line_buffer     , ptr + stride *  i   , stride);
+		fast_memcpy(ptr + stride *  i   , ptr + stride * (i+1), stride);
+		fast_memcpy(ptr + stride * (i+1), rgb_line_buffer     , stride);
 	}
 }
 
@@ -227,16 +227,16 @@
 	while(k_start < modv)
 	{
 		i = j = k_start;
-		memcpy(rgb_line_buffer, ptr + stride * i, stride);
+		fast_memcpy(rgb_line_buffer, ptr + stride * i, stride);
 
 		while (!line_state[j])
 		{
 			line_state[j] = 1;
 			i = j;
 			j = j * 2 % modv;
-			memcpy(ptr + stride * i, ptr + stride * j, stride);
+			fast_memcpy(ptr + stride * i, ptr + stride * j, stride);
 		}
-		memcpy(ptr + stride * i, rgb_line_buffer, stride);
+		fast_memcpy(ptr + stride * i, rgb_line_buffer, stride);
 		
 		while(k_start < modv && line_state[k_start])
 			k_start++;
@@ -377,7 +377,7 @@
 		dst = image_y + image_width * y + x;
 		for (i = 0; i < h; i++)
 		{
-			memcpy(dst, src, w);
+			fast_memcpy(dst, src, w);
 			src += stride[0];
 			dst += image_width;
 		}
@@ -390,8 +390,8 @@
 			uint8_t *dstv = image_v + imgstride * (y >> 1) + (x >> 1);
 			for (i = 0; i < h / 2; i++)
 			{
-				memcpy(dstu, src1 , w >> 1);
-				memcpy(dstv, src2, w >> 1);
+				fast_memcpy(dstu, src1 , w >> 1);
+				fast_memcpy(dstv, src2, w >> 1);
 				src1 += stride[1];
 				src2 += stride[2];
 				dstu += imgstride;
@@ -405,7 +405,7 @@
 			dst = rgb_buffer + (image_width * y + x) * 3;
 			for (i = 0; i < h; i++)
 			{
-				memcpy(dst, src, w * 3);
+				fast_memcpy(dst, src, w * 3);
 				src += stride[0];
 				dst += image_width * 3;
 			}
@@ -424,7 +424,7 @@
 
 		case IMGFMT_BGR24:
 		case IMGFMT_RGB24:
-			memcpy(rgb_buffer, src[0], image_width * image_height * 3);
+			fast_memcpy(rgb_buffer, src[0], image_width * image_height * 3);
 			break;
 	}
     return 0;
Index: libvo/vo_syncfb.c
===================================================================
--- libvo/vo_syncfb.c	(revision 23390)
+++ libvo/vo_syncfb.c	(working copy)
@@ -127,7 +127,7 @@
 
 	for(h=0; h < _config.src_height; h++)
 	{
-		memcpy(dest, y, _config.src_width);
+		fast_memcpy(dest, y, _config.src_width);
 		y += _config.src_width;
 		dest += bespitch;
 	}
@@ -161,7 +161,7 @@
 
 	for(h=0; h < 16; h++)
 	{
-		memcpy(dest, y, _config.src_width);
+		fast_memcpy(dest, y, _config.src_width);
 		y += _config.src_width;
 		dest += bespitch;
 	}
@@ -190,7 +190,7 @@
 	dest = frame_mem + bufinfo.offset + (bespitch * ypos);
 	for(h=0; h < ysize; h++)
 	{
-		memcpy(dest, y, xsize);
+		fast_memcpy(dest, y, xsize);
 		y += stride[0];
 		dest += bespitch;
 	}
@@ -201,7 +201,7 @@
 	dest = frame_mem + bufinfo.offset_p2 + (bespitch * ypos)/4;
 	for(h=0; h < ysize; h++)
 	{
-		memcpy(dest, cr, xsize);
+		fast_memcpy(dest, cr, xsize);
 		cr += stride[1];
 		dest += bespitch/2;
 	}
@@ -209,7 +209,7 @@
 	dest = frame_mem + bufinfo.offset_p3 + (bespitch * ypos)/4;
 	for(h=0; h < ysize; h++)
 	{
-		memcpy(dest, cb, xsize);
+		fast_memcpy(dest, cb, xsize);
 		cb += stride[2];
 		dest += bespitch/2;
 	}
Index: libvo/vo_quartz.c
===================================================================
--- libvo/vo_quartz.c	(revision 23390)
+++ libvo/vo_quartz.c	(working copy)
@@ -1022,7 +1022,7 @@
 	switch (image_format)
 	{
 		case IMGFMT_RGB32:
-			memcpy(image_data,src[0],image_size);
+			fast_memcpy(image_data,src[0],image_size);
 			return 0;
 			
 		case IMGFMT_UYVY:
Index: libvo/vo_svga.c
===================================================================
--- libvo/vo_svga.c	(revision 23390)
+++ libvo/vo_svga.c	(working copy)
@@ -206,7 +206,7 @@
     rgbplane=PageStore[0].vbase + (y*mode_stride) + (x*modeinfo->bytesperpixel);
     for(i=0;i<h;i++){
 //i'm afraid that memcpy is better optimized than memset;)
-      memcpy(rgbplane,zerobuf,w*modeinfo->bytesperpixel);
+      fast_memcpy(rgbplane,zerobuf,w*modeinfo->bytesperpixel);
 //    memset(rgbplane,0,w*modeinfo->bytesperpixel);
       rgbplane+=mode_stride;
     }
Index: libvo/vo_directx.c
===================================================================
--- libvo/vo_directx.c	(revision 23390)
+++ libvo/vo_directx.c	(working copy)
@@ -1179,7 +1187,7 @@
 
 static int draw_frame(uint8_t *src[])
 {
-  	memcpy( image, *src, dstride * image_height );
+  	fast_memcpy( image, *src, dstride * image_height );
 	return 0;
 }
 
@@ -1262,7 +1270,7 @@
 	}
 	else //packed
 	{
-        memcpy( image, mpi->planes[0], image_height * dstride);
+        fast_memcpy( image, mpi->planes[0], image_height * dstride);
 	}
 	return VO_TRUE;
 }
Index: libvo/vo_directfb2.c
===================================================================
--- libvo/vo_directfb2.c	(revision 23390)
+++ libvo/vo_directfb2.c	(working copy)
@@ -1233,7 +1233,7 @@
 	srcp = src[0];
 	
 	for (i=0;i<h;i++) {
-            memcpy(dst,srcp,p);
+            fast_memcpy(dst,srcp,p);
 	    dst += pitch;
 	    srcp += stride[0];
         }
@@ -1245,7 +1245,7 @@
     	    p = p/2;
 
             for (i=0;i<h/2;i++) {
-                memcpy(dst,srcp,p);
+                fast_memcpy(dst,srcp,p);
 		dst += pitch/2;
 	        srcp += stride[2];
     	    }
@@ -1254,7 +1254,7 @@
 	    srcp = src[1];
 	
     	    for (i=0;i<h/2;i++) {
-                memcpy(dst,srcp,p);
+                fast_memcpy(dst,srcp,p);
 		dst += pitch/2;
 	        srcp += stride[1];
     	    }
@@ -1266,7 +1266,7 @@
 	    p = p/2;
 
     	    for (i=0;i<h/2;i++) {
-                memcpy(dst,srcp,p);
+                fast_memcpy(dst,srcp,p);
 		dst += pitch/2;
 	        srcp += stride[1];
     	    }
@@ -1275,7 +1275,7 @@
 	    srcp = src[2];
 	
     	    for (i=0;i<h/2;i++) {
-                memcpy(dst,srcp,p);
+                fast_memcpy(dst,srcp,p);
 		dst += pitch/2;
 	        srcp += stride[2];
     	    }
@@ -1327,7 +1327,7 @@
 	src = mpi->planes[0]+mpi->y*mpi->stride[0]+mpi->x;
 	
 	for (i=0;i<mpi->h;i++) {
-            memcpy(dst+i*pitch,src+i*mpi->stride[0],p);
+            fast_memcpy(dst+i*pitch,src+i*mpi->stride[0],p);
         }
 
 	
@@ -1338,14 +1338,14 @@
 	    src = mpi->planes[2]+mpi->y*mpi->stride[2]+mpi->x/2;
 
             for (i=0;i<mpi->h/2;i++) {
-	        memcpy(dst+i*pitch/2,src+i*mpi->stride[2],p);
+	        fast_memcpy(dst+i*pitch/2,src+i*mpi->stride[2],p);
     	    }
 	
     	    dst += pitch*height/4;
 	    src = mpi->planes[1]+mpi->y*mpi->stride[1]+mpi->x/2;
 	
     	    for (i=0;i<mpi->h/2;i++) {
-        	memcpy(dst+i*pitch/2,src+i*mpi->stride[1],p);
+        	fast_memcpy(dst+i*pitch/2,src+i*mpi->stride[1],p);
     	    }
 
 	} else {
@@ -1355,14 +1355,14 @@
 	    src = mpi->planes[1]+mpi->y*mpi->stride[1]+mpi->x/2;
 
     	    for (i=0;i<mpi->h/2;i++) {
-        	memcpy(dst+i*pitch/2,src+i*mpi->stride[1],p);
+        	fast_memcpy(dst+i*pitch/2,src+i*mpi->stride[1],p);
     	    }
 	
     	    dst += pitch*height/4;
 	    src = mpi->planes[2]+mpi->y*mpi->stride[2]+mpi->x/2;
 	
     	    for (i=0;i<mpi->h/2;i++) {
-        	memcpy(dst+i*pitch/2,src+i*mpi->stride[2],p);
+        	fast_memcpy(dst+i*pitch/2,src+i*mpi->stride[2],p);
     	    }
 	
 	}
Index: libvo/fastmemcpy.h
===================================================================
--- libvo/fastmemcpy.h	(revision 23390)
+++ libvo/fastmemcpy.h	(working copy)
@@ -11,14 +11,15 @@
 
 extern void * fast_memcpy(void * to, const void * from, size_t len);
 extern void * mem2agpcpy(void * to, const void * from, size_t len);
-#define memcpy(a,b,c) fast_memcpy(a,b,c)
 
 #else /* HAVE_MMX/MMX2/3DNOW/SSE/SSE2 */
 #define mem2agpcpy(a,b,c) memcpy(a,b,c)
+#define fast_memcpy(a,b,c) memcpy(a,b,c)
 #endif
 
 #else /* USE_FASTMEMCPY */
 #define mem2agpcpy(a,b,c) memcpy(a,b,c)
+#define fast_memcpy(a,b,c) memcpy(a,b,c)
 #endif
 
 static inline void * mem2agpcpy_pic(void * dst, const void * src, int bytesPerLine, int height, int dstStride, int srcStride)
@@ -62,13 +63,13 @@
 	    		srcStride = -srcStride;
 		}
 
-		memcpy(dst, src, srcStride*height);
+		fast_memcpy(dst, src, srcStride*height);
 	}
 	else
 	{
 		for(i=0; i<height; i++)
 		{
-			memcpy(dst, src, bytesPerLine);
+			fast_memcpy(dst, src, bytesPerLine);
 			src = (uint8_t*)src + srcStride;
 			dst = (uint8_t*)dst + dstStride;
 		}
Index: libvo/vo_dxr3.c
===================================================================
--- libvo/vo_dxr3.c	(revision 23390)
+++ libvo/vo_dxr3.c	(working copy)
@@ -727,7 +727,7 @@
 		    if ( !cleared )
 		     {
 		      spued->count=spubuf->count;
-		      memcpy( spued->data,spubuf->data,DATASIZE );
+		      fast_memcpy( spued->data,spubuf->data,DATASIZE );
 		      cleared=1;
 		     }
 		   }
@@ -1106,7 +1106,7 @@
     },*p;
 
     p = malloc(sizeof(m));
-    memcpy(p,m,sizeof(m));
+    fast_memcpy(p,m,sizeof(m));
     return p;
 }
 
Index: libswscale/swscale.c
===================================================================
--- libswscale/swscale.c	(revision 23390)
+++ libswscale/swscale.c	(working copy)
@@ -73,6 +73,7 @@
 #include "bswap.h"
 #include "rgb2rgb.h"
 #ifdef USE_FASTMEMCPY
+// FIXME!
 #include "libvo/fastmemcpy.h"
 #endif
 #include "libavcodec/opt.h"
Index: libswscale/rgb2rgb.c
===================================================================
--- libswscale/rgb2rgb.c	(revision 23390)
+++ libswscale/rgb2rgb.c	(working copy)
@@ -33,6 +33,7 @@
 #include "x86_cpu.h"
 #include "bswap.h"
 #ifdef USE_FASTMEMCPY
+// FIXME!!
 #include "libvo/fastmemcpy.h"
 #endif
 
Index: libmenu/vf_menu.c
===================================================================
--- libmenu/vf_menu.c	(revision 23390)
+++ libmenu/vf_menu.c	(working copy)
@@ -146,8 +146,8 @@
 
   if(mpi->type == MP_IMGTYPE_TEMP && (!(mpi->flags&MP_IMGFLAG_PRESERVE)) ) {
     dmpi = vf_get_image(vf->next,mpi->imgfmt,mpi->type, mpi->flags, mpi->w, mpi->h);
-    memcpy(mpi->planes,dmpi->planes,MP_MAX_PLANES*sizeof(unsigned char*));
-    memcpy(mpi->stride,dmpi->stride,MP_MAX_PLANES*sizeof(unsigned int));
+    fast_memcpy(mpi->planes,dmpi->planes,MP_MAX_PLANES*sizeof(unsigned char*));
+    fast_memcpy(mpi->stride,dmpi->stride,MP_MAX_PLANES*sizeof(unsigned int));
     mpi->flags|=MP_IMGFLAG_DIRECT;
     mpi->priv=(void*)dmpi;
     return;
Index: libmpdemux/demux_asf.c
===================================================================
--- libmpdemux/demux_asf.c	(revision 23390)
+++ libmpdemux/demux_asf.c	(working copy)
@@ -56,12 +56,12 @@
 	//i+=asf_scrambling_h*asf_scrambling_w;
 	for(x=0;x<asf->scrambling_w;x++)
 	  for(y=0;y<asf->scrambling_h;y++){
-	    memcpy(dst+i,s2+(y*asf->scrambling_w+x)*asf->scrambling_b,asf->scrambling_b);
+	    fast_memcpy(dst+i,s2+(y*asf->scrambling_w+x)*asf->scrambling_b,asf->scrambling_b);
 		i+=asf->scrambling_b;
 	  }
 	s2+=asf->scrambling_h*asf->scrambling_w*asf->scrambling_b;
   }
-  //if(i<len) memcpy(dst+i,src+i,len-i);
+  //if(i<len) fast_memcpy(dst+i,src+i,len-i);
   free(*src);
   *src = dst;
 }
@@ -154,7 +154,7 @@
 {
   if(dp->len!=offs && offs!=-1) mp_msg(MSGT_DEMUX,MSGL_V,"warning! fragment.len=%d BUT next fragment offset=%d  \n",dp->len,offs);
   dp->buffer=realloc(dp->buffer,dp->len+len+FF_INPUT_BUFFER_PADDING_SIZE);
-  memcpy(dp->buffer+dp->len,data,len);
+  fast_memcpy(dp->buffer+dp->len,data,len);
   memset(dp->buffer+dp->len+len, 0, FF_INPUT_BUFFER_PADDING_SIZE);
   mp_dbg(MSGT_DEMUX,MSGL_DBG4,"data appended! %d+%d\n",dp->len,len);
   dp->len+=len;
@@ -224,7 +224,7 @@
         return 0;
       }
       dp=new_demux_packet(len);
-      memcpy(dp->buffer,data,len);
+      fast_memcpy(dp->buffer,data,len);
       if (asf->asf_is_dvr_ms)
         dp->pts=time*0.0000001f;
       else
Index: libmpdemux/demux_gif.c
===================================================================
--- libmpdemux/demux_gif.c	(revision 23390)
+++ libmpdemux/demux_gif.c	(working copy)
@@ -145,7 +145,7 @@
   dp = new_demux_packet(priv->w * priv->h);
   buf = calloc(gif->Image.Width, gif->Image.Height);
   if (priv->useref)
-    memcpy(dp->buffer, priv->refimg, priv->w * priv->h);
+    fast_memcpy(dp->buffer, priv->refimg, priv->w * priv->h);
   else
     memset(dp->buffer, gif->SBackGroundColor, priv->w * priv->h);
   
@@ -199,7 +199,7 @@
       memcpy_transp_pic(dest, buf, w, h, priv->w, gif->Image.Width,
                         transparency, transparent_col);
 
-    if (refmode == 1) memcpy(priv->refimg, dp->buffer, priv->w * priv->h);
+    if (refmode == 1) fast_memcpy(priv->refimg, dp->buffer, priv->w * priv->h);
     if (refmode == 2 && priv->useref) {
       dest = priv->refimg + priv->w * t + l;
       memset(buf, gif->SBackGroundColor, len);
Index: libmpdemux/demuxer.c
===================================================================
--- libmpdemux/demuxer.c	(revision 23390)
+++ libmpdemux/demuxer.c	(working copy)
@@ -429,7 +429,7 @@
     if(!ds_fill_buffer(ds)) return bytes;
   } else {
     if(x>len) x=len;
-    if(mem) memcpy(mem+bytes,&ds->buffer[ds->buffer_pos],x);
+    if(mem) fast_memcpy(mem+bytes,&ds->buffer[ds->buffer_pos],x);
     bytes+=x;len-=x;ds->buffer_pos+=x;
   }
 }
@@ -445,7 +445,7 @@
     if(!ds_fill_buffer(ds)) return bytes;
   } else {
     if(x>len) x=len;
-    if(mem) memcpy(mem+bytes,&ds->buffer[ds->buffer_pos],x);
+    if(mem) fast_memcpy(mem+bytes,&ds->buffer[ds->buffer_pos],x);
     bytes+=x;len-=x;ds->buffer_pos+=x;
     return bytes; // stop at end of package! (for correct timestamping)
   }
Index: libaf/af_format.c
===================================================================
--- libaf/af_format.c	(revision 23390)
+++ libaf/af_format.c	(working copy)
@@ -287,7 +287,7 @@
       if(c->bps != l->bps)
 	change_bps(c->audio,l->audio,len,c->bps,l->bps);
       else
-	memcpy(l->audio,c->audio,len*c->bps);
+	fast_memcpy(l->audio,c->audio,len*c->bps);
       break;
     }
   }
Index: mencoder.c
===================================================================
--- mencoder.c	(revision 23390)
+++ mencoder.c	(working copy)
@@ -338,10 +338,10 @@
 		    if(ret>0) sh_audio->a_out_buffer_len+=ret; else at_eof=1;
 		}
 		if(len>sh_audio->a_out_buffer_len) len=sh_audio->a_out_buffer_len;
-		memcpy(buffer+size,sh_audio->a_out_buffer,len);
+		fast_memcpy(buffer+size,sh_audio->a_out_buffer,len);
 		sh_audio->a_out_buffer_len-=len; size+=len;
 		if(sh_audio->a_out_buffer_len>0)
-		    memcpy(sh_audio->a_out_buffer,&sh_audio->a_out_buffer[len],sh_audio->a_out_buffer_len);
+		    fast_memcpy(sh_audio->a_out_buffer,&sh_audio->a_out_buffer[len],sh_audio->a_out_buffer_len);
     }
     return size;
 }
@@ -771,7 +771,7 @@
 	if (!curfile) {
 		if (sh_video->bih) {
 			mux_v->bih=malloc(sh_video->bih->biSize);
-			memcpy(mux_v->bih, sh_video->bih, sh_video->bih->biSize);
+			fast_memcpy(mux_v->bih, sh_video->bih, sh_video->bih->biSize);
 		}
     else
     {
@@ -941,7 +941,7 @@
     }
     if (sh_audio->wf){
 	mux_a->wf=malloc(sizeof(WAVEFORMATEX) + sh_audio->wf->cbSize);
-	memcpy(mux_a->wf, sh_audio->wf, sizeof(WAVEFORMATEX) + sh_audio->wf->cbSize);
+	fast_memcpy(mux_a->wf, sh_audio->wf, sizeof(WAVEFORMATEX) + sh_audio->wf->cbSize);
 	if(!sh_audio->i_bps) sh_audio->i_bps=mux_a->wf->nAvgBytesPerSec;
     } else {
 	mux_a->wf = malloc(sizeof(WAVEFORMATEX));
@@ -1222,7 +1222,7 @@
 	    mux_a->wf->nAvgBytesPerSec=0.5f+(double)mux_a->size/mux_a->timer; // avg bps (VBR)
 	if(mux_a->buffer_len>=len){
 	    mux_a->buffer_len-=len;
-	    memcpy(mux_a->buffer,mux_a->buffer+len,mux_a->buffer_len);
+	    fast_memcpy(mux_a->buffer,mux_a->buffer+len,mux_a->buffer_len);
 	}
 
 
@@ -1573,9 +1573,9 @@
     int i;
 
     for(i = 0; i < height/2; i++) {
-        memcpy(tmp, &src[i*width], width);
-        memcpy(&dst[i * width], &src[(height - i) * width], width);
-        memcpy(&dst[(height - i) * width], tmp, width);
+        fast_memcpy(tmp, &src[i*width], width);
+        fast_memcpy(&dst[i * width], &src[(height - i) * width], width);
+        fast_memcpy(&dst[(height - i) * width], tmp, width);
     }
 
     free(tmp);


More information about the MPlayer-dev-eng mailing list