[MPlayer-dev-eng] [RFC] emms/sfence in fast memcpy()
Reimar Döffinger
Reimar.Doeffinger at stud.uni-karlsruhe.de
Mon May 28 00:40:21 CEST 2007
Hello,
On Sun, May 27, 2007 at 11:53:39PM +0200, Michael Niedermayer wrote:
> current fast memcpy (agp and normal) variants do emms and sfence at the
> end, this is slow and particualry annoying if the copy is called once per
> line which it is in many cases
> i think that something should be done about that, though iam not
> volunteering to do the work
>
> if we didnt do this silly redefine memcpy() then this would be easier ...
> a mp/av_memcpy() could simply leave the emms/sfence to the caller
I have already done most of the work to make all relevant places use
fast_memcpy explicitly, see attached patch.
In about 5 places it is obviously nonsense to use fast_memcpy, I would
replaces these in a second patch. There are also two fixmes in
libswscale, since I have not yet decided how to fix these. Probably just
adding a define memcpy fast_memcpy...
And could someone please explain what the point of that my_memcpy_pic is
that is (and worse duplicated) in so many filters??
Greetings,
Reimar Döffinger
-------------- next part --------------
Index: libmpcodecs/vf_fspp.c
===================================================================
--- libmpcodecs/vf_fspp.c (revision 23390)
+++ libmpcodecs/vf_fspp.c (working copy)
@@ -421,15 +421,15 @@
if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
for(y=0; y<height; y++){
int index= 8 + 8*stride + y*stride;
- memcpy(p->src + index, src + y*src_stride, width);//this line can be avoided by using DR & user fr.buffers
+ fast_memcpy(p->src + index, src + y*src_stride, width);//this line can be avoided by using DR & user fr.buffers
for(x=0; x<8; x++){
p->src[index - x - 1]= p->src[index + x ];
p->src[index + width + x ]= p->src[index + width - x - 1];
}
}
for(y=0; y<8; y++){
- memcpy(p->src + ( 7-y)*stride, p->src + ( y+8)*stride, stride);
- memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);
+ fast_memcpy(p->src + ( 7-y)*stride, p->src + ( y+8)*stride, stride);
+ fast_memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);
}
//FIXME (try edge emu)
@@ -456,8 +456,8 @@
column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+x*8, block3+x*8, 8); //yes, this is a HOTSPOT
}
row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, 2*(BLOCKSZ-1));
- memcpy(block, block+(BLOCKSZ-1)*64, 8*8*sizeof(DCTELEM)); //cycling
- memcpy(block3, block3+(BLOCKSZ-1)*64, 6*8*sizeof(DCTELEM));
+ fast_memcpy(block, block+(BLOCKSZ-1)*64, 8*8*sizeof(DCTELEM)); //cycling
+ fast_memcpy(block3, block3+(BLOCKSZ-1)*64, 6*8*sizeof(DCTELEM));
}
//
es=width+8-x0; // 8, ...
@@ -532,7 +532,7 @@
if(mpi->pict_type != 3 && mpi->qscale && !vf->priv->qp){
if(!vf->priv->non_b_qp)
vf->priv->non_b_qp= malloc(mpi->qstride * ((mpi->h + 15) >> 4));
- memcpy(vf->priv->non_b_qp, mpi->qscale, mpi->qstride * ((mpi->h + 15) >> 4));
+ fast_memcpy(vf->priv->non_b_qp, mpi->qscale, mpi->qstride * ((mpi->h + 15) >> 4));
}
if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){
char *qp_tab= vf->priv->non_b_qp;
Index: libmpcodecs/vd_mpng.c
===================================================================
--- libmpcodecs/vd_mpng.c (revision 23390)
+++ libmpcodecs/vd_mpng.c (working copy)
@@ -58,7 +58,7 @@
{
char * p = pngstr->io_ptr;
if(size>pngLength-pngPointer && pngLength>=pngPointer) size=pngLength-pngPointer;
- memcpy( buffer,(char *)&p[pngPointer],size );
+ fast_memcpy( buffer,(char *)&p[pngPointer],size );
pngPointer+=size;
}
Index: libmpcodecs/vf_filmdint.c
===================================================================
--- libmpcodecs/vf_filmdint.c (revision 23390)
+++ libmpcodecs/vf_filmdint.c (working copy)
@@ -97,7 +97,7 @@
for(i=0; i<height; i++)
{
- memcpy(dst, src, bytesPerLine);
+ fast_memcpy(dst, src, bytesPerLine);
src+= srcStride;
dst+= dstStride;
}
@@ -711,7 +711,7 @@
long bos = b - a;
long cos = c - a;
if (field) {
- memcpy(d, b, w);
+ fast_memcpy(d, b, w);
h--;
d += ds;
a += ss;
@@ -720,8 +720,8 @@
cos += ss;
while (h > 2) {
if (threshold >= 128) {
- memcpy(d, a, w);
- memcpy(d+ds, a+bos, w);
+ fast_memcpy(d, a, w);
+ fast_memcpy(d+ds, a+bos, w);
} else if (mmx2 == 1) {
ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold);
} else
@@ -730,9 +730,9 @@
d += 2*ds;
a += 2*ss;
}
- memcpy(d, a, w);
+ fast_memcpy(d, a, w);
if (h == 2)
- memcpy(d+ds, a+bos, w);
+ fast_memcpy(d+ds, a+bos, w);
return ret;
}
Index: libmpcodecs/vf_kerndeint.c
===================================================================
--- libmpcodecs/vf_kerndeint.c (revision 23390)
+++ libmpcodecs/vf_kerndeint.c (working copy)
@@ -128,16 +128,16 @@
dstp = dstp_saved + (1-order) * dst_pitch;
for (y=0; y<h; y+=2) {
- memcpy(dstp, srcp, w);
+ fast_memcpy(dstp, srcp, w);
srcp += 2*src_pitch;
dstp += 2*dst_pitch;
}
// Copy through the lines that will be missed below.
- memcpy(dstp_saved + order*dst_pitch, srcp_saved + (1-order)*src_pitch, w);
- memcpy(dstp_saved + (2+order)*dst_pitch, srcp_saved + (3-order)*src_pitch, w);
- memcpy(dstp_saved + (h-2+order)*dst_pitch, srcp_saved + (h-1-order)*src_pitch, w);
- memcpy(dstp_saved + (h-4+order)*dst_pitch, srcp_saved + (h-3-order)*src_pitch, w);
+ fast_memcpy(dstp_saved + order*dst_pitch, srcp_saved + (1-order)*src_pitch, w);
+ fast_memcpy(dstp_saved + (2+order)*dst_pitch, srcp_saved + (3-order)*src_pitch, w);
+ fast_memcpy(dstp_saved + (h-2+order)*dst_pitch, srcp_saved + (h-1-order)*src_pitch, w);
+ fast_memcpy(dstp_saved + (h-4+order)*dst_pitch, srcp_saved + (h-3-order)*src_pitch, w);
/* For the other field choose adaptively between using the previous field
or the interpolant from the current field. */
@@ -270,7 +270,7 @@
srcp = mpi->planes[z];
dstp = pmpi->planes[z];
for (y=0; y<h; y++) {
- memcpy(dstp, srcp, w);
+ fast_memcpy(dstp, srcp, w);
srcp += src_pitch;
dstp += psrc_pitch;
}
Index: libmpcodecs/ad_mpc.c
===================================================================
--- libmpcodecs/ad_mpc.c (revision 23390)
+++ libmpcodecs/ad_mpc.c (working copy)
@@ -54,7 +54,7 @@
if (d->pos < d->header_len) {
if (s > d->header_len - d->pos)
s = d->header_len - d->pos;
- memcpy(p, &d->header[d->pos], s);
+ fast_memcpy(p, &d->header[d->pos], s);
} else
s = 0;
memset(&p[s], 0, size - s);
Index: libmpcodecs/vf_tinterlace.c
===================================================================
--- libmpcodecs/vf_tinterlace.c (revision 23390)
+++ libmpcodecs/vf_tinterlace.c (working copy)
@@ -43,7 +43,7 @@
for(i=0; i<height; i++)
{
- memcpy(dst, src, bytesPerLine);
+ fast_memcpy(dst, src, bytesPerLine);
src+= srcStride;
dst+= dstStride;
}
Index: libmpcodecs/native/nuppelvideo.c
===================================================================
--- libmpcodecs/native/nuppelvideo.c (revision 23390)
+++ libmpcodecs/native/nuppelvideo.c (working copy)
@@ -67,7 +67,7 @@
switch(encodedh->comptype)
{
case '0': /* raw YUV420 */
- memcpy(decoded, encoded + 12, out_len);
+ fast_memcpy(decoded, encoded + 12, out_len);
break;
case '1': /* RTJpeg */
RTjpeg_decompressYUV420 ( ( __s8 * ) encoded + 12, decoded );
@@ -102,13 +103,13 @@
break;
case 'L': /* copy last frame */
#ifdef KEEP_BUFFER
- memcpy ( decoded, previous_buffer, width*height*3/2);
+ fast_memcpy ( decoded, previous_buffer, width*height*3/2);
#endif
break;
}
#ifdef KEEP_BUFFER
- memcpy(previous_buffer, decoded, width*height*3/2);
+ fast_memcpy(previous_buffer, decoded, width*height*3/2);
#endif
break;
}
Index: libmpcodecs/vf_noise.c
===================================================================
--- libmpcodecs/vf_noise.c (revision 23390)
+++ libmpcodecs/vf_noise.c (working copy)
@@ -278,12 +278,12 @@
{
if(src==dst) return;
- if(dstStride==srcStride) memcpy(dst, src, srcStride*height);
+ if(dstStride==srcStride) fast_memcpy(dst, src, srcStride*height);
else
{
for(y=0; y<height; y++)
{
- memcpy(dst, src, width);
+ fast_memcpy(dst, src, width);
dst+= dstStride;
src+= srcStride;
}
Index: libmpcodecs/vf_down3dright.c
===================================================================
--- libmpcodecs/vf_down3dright.c (revision 23390)
+++ libmpcodecs/vf_down3dright.c (working copy)
@@ -67,7 +67,7 @@
*t++ = *sR++;
}
if (p->scaleh == 1) {
- memcpy(to + dst, to, dst);
+ fast_memcpy(to + dst, to, dst);
to += dst;
}
to += dst;
Index: libmpcodecs/vf_phase.c
===================================================================
--- libmpcodecs/vf_phase.c (revision 23390)
+++ libmpcodecs/vf_phase.c (working copy)
@@ -45,8 +45,8 @@
for(end=to+h*ts, buf=*bufp, top=1; to<end; from+=fs, to+=ts, buf+=w, top^=1)
{
- memcpy(to, mode==(top?BOTTOM_FIRST:TOP_FIRST)?buf:from, w);
- memcpy(buf, from, w);
+ fast_memcpy(to, mode==(top?BOTTOM_FIRST:TOP_FIRST)?buf:from, w);
+ fast_memcpy(buf, from, w);
}
}
Index: libmpcodecs/vf_yvu9.c
===================================================================
--- libmpcodecs/vf_yvu9.c (revision 23390)
+++ libmpcodecs/vf_yvu9.c (working copy)
@@ -37,7 +37,7 @@
mpi->w, mpi->h);
for(y=0;y<mpi->h;y++)
- memcpy(dmpi->planes[0]+dmpi->stride[0]*y,
+ fast_memcpy(dmpi->planes[0]+dmpi->stride[0]*y,
mpi->planes[0]+mpi->stride[0]*y,
mpi->w);
Index: libmpcodecs/vf_uspp.c
===================================================================
--- libmpcodecs/vf_uspp.c (revision 23390)
+++ libmpcodecs/vf_uspp.c (working copy)
@@ -154,15 +154,15 @@
continue; // HACK avoid crash for Y8 colourspace
for(y=0; y<h; y++){
int index= block + block*stride + y*stride;
- memcpy(p->src[i] + index, src[i] + y*src_stride[i], w);
+ fast_memcpy(p->src[i] + index, src[i] + y*src_stride[i], w);
for(x=0; x<block; x++){
p->src[i][index - x - 1]= p->src[i][index + x ];
p->src[i][index + w + x ]= p->src[i][index + w - x - 1];
}
}
for(y=0; y<block; y++){
- memcpy(p->src[i] + ( block-1-y)*stride, p->src[i] + ( y+block )*stride, stride);
- memcpy(p->src[i] + (h+block +y)*stride, p->src[i] + (h-y+block-1)*stride, stride);
+ fast_memcpy(p->src[i] + ( block-1-y)*stride, p->src[i] + ( y+block )*stride, stride);
+ fast_memcpy(p->src[i] + (h+block +y)*stride, p->src[i] + (h-y+block-1)*stride, stride);
}
p->frame->linesize[i]= stride;
Index: libmpcodecs/vf_telecine.c
===================================================================
--- libmpcodecs/vf_telecine.c (revision 23390)
+++ libmpcodecs/vf_telecine.c (working copy)
@@ -22,7 +22,7 @@
for(i=0; i<height; i++)
{
- memcpy(dst, src, bytesPerLine);
+ fast_memcpy(dst, src, bytesPerLine);
src+= srcStride;
dst+= dstStride;
}
Index: libmpcodecs/vf_tfields.c
===================================================================
--- libmpcodecs/vf_tfields.c (revision 23390)
+++ libmpcodecs/vf_tfields.c (working copy)
@@ -27,7 +27,7 @@
for(i=0; i<height; i++)
{
- memcpy(dst, src, bytesPerLine);
+ fast_memcpy(dst, src, bytesPerLine);
src+= srcStride;
dst+= dstStride;
}
@@ -68,7 +68,7 @@
long crap1, crap2;
if (up) {
ssd = -ss;
- memcpy(d, s, w);
+ fast_memcpy(d, s, w);
d += ds;
s += ss;
}
@@ -92,7 +92,7 @@
d += ds;
s += ss;
}
- if (!up) memcpy(d, s, w);
+ if (!up) fast_memcpy(d, s, w);
asm volatile("emms \n\t" : : : "memory");
}
#endif
@@ -104,7 +104,7 @@
long crap1, crap2;
if (up) {
ssd = -ss;
- memcpy(d, s, w);
+ fast_memcpy(d, s, w);
d += ds;
s += ss;
}
@@ -129,7 +129,7 @@
d += ds;
s += ss;
}
- if (!up) memcpy(d, s, w);
+ if (!up) fast_memcpy(d, s, w);
asm volatile("emms \n\t" : : : "memory");
}
#endif
@@ -141,7 +141,7 @@
int crap1, crap2;
if (up) {
ssd = -ss;
- memcpy(d, s, w);
+ fast_memcpy(d, s, w);
d += ds;
s += ss;
}
@@ -179,7 +179,7 @@
d += ds;
s += ss;
}
- if (!up) memcpy(d, s, w);
+ if (!up) fast_memcpy(d, s, w);
asm volatile("emms \n\t" : : : "memory");
}
@@ -193,7 +193,7 @@
int crap1, crap2;
if (up) {
ssd = -ss;
- memcpy(d, s, w);
+ fast_memcpy(d, s, w);
d += ds; s += ss;
}
for (j=0; j<w; j++)
@@ -259,7 +259,7 @@
for (j=0; j<w; j++)
d[j] = (s[j+ssd] + 3*s[j])>>2;
d += ds; s += ss;
- if (!up) memcpy(d, s, w);
+ if (!up) fast_memcpy(d, s, w);
asm volatile("emms \n\t" : : : "memory");
}
#endif
@@ -276,7 +276,7 @@
int i, j, ssd=ss;
if (up) {
ssd = -ss;
- memcpy(d, s, w);
+ fast_memcpy(d, s, w);
d += ds;
s += ss;
}
@@ -286,7 +286,7 @@
d += ds;
s += ss;
}
- if (!up) memcpy(d, s, w);
+ if (!up) fast_memcpy(d, s, w);
}
static void qpel_4tap_C(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
@@ -294,7 +294,7 @@
int i, j, ssd=ss;
if (up) {
ssd = -ss;
- memcpy(d, s, w);
+ fast_memcpy(d, s, w);
d += ds; s += ss;
}
for (j=0; j<w; j++)
@@ -308,7 +308,7 @@
for (j=0; j<w; j++)
d[j] = (s[j+ssd] + 3*s[j] + 2)>>2;
d += ds; s += ss;
- if (!up) memcpy(d, s, w);
+ if (!up) fast_memcpy(d, s, w);
}
static void (*qpel_li)(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up);
Index: libmpcodecs/vf_softpulldown.c
===================================================================
--- libmpcodecs/vf_softpulldown.c (revision 23390)
+++ libmpcodecs/vf_softpulldown.c (working copy)
@@ -24,7 +24,7 @@
for(i=0; i<height; i++)
{
- memcpy(dst, src, bytesPerLine);
+ fast_memcpy(dst, src, bytesPerLine);
src+= srcStride;
dst+= dstStride;
}
Index: libmpcodecs/vf_spp.c
===================================================================
--- libmpcodecs/vf_spp.c (revision 23390)
+++ libmpcodecs/vf_spp.c (working copy)
@@ -380,15 +380,15 @@
if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
for(y=0; y<height; y++){
int index= 8 + 8*stride + y*stride;
- memcpy(p->src + index, src + y*src_stride, width);
+ fast_memcpy(p->src + index, src + y*src_stride, width);
for(x=0; x<8; x++){
p->src[index - x - 1]= p->src[index + x ];
p->src[index + width + x ]= p->src[index + width - x - 1];
}
}
for(y=0; y<8; y++){
- memcpy(p->src + ( 7-y)*stride, p->src + ( y+8)*stride, stride);
- memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);
+ fast_memcpy(p->src + ( 7-y)*stride, p->src + ( y+8)*stride, stride);
+ fast_memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);
}
//FIXME (try edge emu)
@@ -478,7 +478,7 @@
if(mpi->pict_type != 3 && mpi->qscale && !vf->priv->qp){
if(!vf->priv->non_b_qp)
vf->priv->non_b_qp= malloc(mpi->qstride * ((mpi->h + 15) >> 4));
- memcpy(vf->priv->non_b_qp, mpi->qscale, mpi->qstride * ((mpi->h + 15) >> 4));
+ fast_memcpy(vf->priv->non_b_qp, mpi->qscale, mpi->qstride * ((mpi->h + 15) >> 4));
}
if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){
char *qp_tab= vf->priv->non_b_qp;
Index: libmpcodecs/vf_il.c
===================================================================
--- libmpcodecs/vf_il.c (revision 23390)
+++ libmpcodecs/vf_il.c (working copy)
@@ -58,20 +58,20 @@
switch(interleave){
case -1:
for(y=0; y < m; y++){
- memcpy(dst + dstStride* y , src + srcStride*(y*2 + a), w);
- memcpy(dst + dstStride*(y + m), src + srcStride*(y*2 + b), w);
+ fast_memcpy(dst + dstStride* y , src + srcStride*(y*2 + a), w);
+ fast_memcpy(dst + dstStride*(y + m), src + srcStride*(y*2 + b), w);
}
break;
case 0:
for(y=0; y < m; y++){
- memcpy(dst + dstStride* y*2 , src + srcStride*(y*2 + a), w);
- memcpy(dst + dstStride*(y*2+1), src + srcStride*(y*2 + b), w);
+ fast_memcpy(dst + dstStride* y*2 , src + srcStride*(y*2 + a), w);
+ fast_memcpy(dst + dstStride*(y*2+1), src + srcStride*(y*2 + b), w);
}
break;
case 1:
for(y=0; y < m; y++){
- memcpy(dst + dstStride*(y*2+a), src + srcStride* y , w);
- memcpy(dst + dstStride*(y*2+b), src + srcStride*(y + m), w);
+ fast_memcpy(dst + dstStride*(y*2+a), src + srcStride* y , w);
+ fast_memcpy(dst + dstStride*(y*2+b), src + srcStride*(y + m), w);
}
break;
}
Index: libmpcodecs/vf_pp7.c
===================================================================
--- libmpcodecs/vf_pp7.c (revision 23390)
+++ libmpcodecs/vf_pp7.c (working copy)
@@ -290,15 +290,15 @@
if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
for(y=0; y<height; y++){
int index= 8 + 8*stride + y*stride;
- memcpy(p_src + index, src + y*src_stride, width);
+ fast_memcpy(p_src + index, src + y*src_stride, width);
for(x=0; x<8; x++){
p_src[index - x - 1]= p_src[index + x ];
p_src[index + width + x ]= p_src[index + width - x - 1];
}
}
for(y=0; y<8; y++){
- memcpy(p_src + ( 7-y)*stride, p_src + ( y+8)*stride, stride);
- memcpy(p_src + (height+8+y)*stride, p_src + (height-y+7)*stride, stride);
+ fast_memcpy(p_src + ( 7-y)*stride, p_src + ( y+8)*stride, stride);
+ fast_memcpy(p_src + (height+8+y)*stride, p_src + (height-y+7)*stride, stride);
}
//FIXME (try edge emu)
Index: libmpcodecs/vf_pullup.c
===================================================================
--- libmpcodecs/vf_pullup.c (revision 23390)
+++ libmpcodecs/vf_pullup.c (working copy)
@@ -31,7 +31,7 @@
for(i=0; i<height; i++)
{
- memcpy(dst, src, bytesPerLine);
+ fast_memcpy(dst, src, bytesPerLine);
src+= srcStride;
dst+= dstStride;
}
@@ -47,10 +47,10 @@
for(i=h>>1; i; i--)
{
- memcpy(dst, src0, w);
+ fast_memcpy(dst, src0, w);
src0 += ss;
dst += ds;
- memcpy(dst, src1, w);
+ fast_memcpy(dst, src1, w);
src1 += ss;
dst += ds;
}
@@ -153,8 +153,8 @@
}
}
if (mpi->qscale) {
- memcpy(b->planes[3], mpi->qscale, c->w[3]);
- memcpy(b->planes[3]+c->w[3], mpi->qscale, c->w[3]);
+ fast_memcpy(b->planes[3], mpi->qscale, c->w[3]);
+ fast_memcpy(b->planes[3]+c->w[3], mpi->qscale, c->w[3]);
}
p = mpi->fields & MP_IMGFIELD_TOP_FIRST ? 0 :
Index: libmpcodecs/vf_ivtc.c
===================================================================
--- libmpcodecs/vf_ivtc.c (revision 23390)
+++ libmpcodecs/vf_ivtc.c (working copy)
@@ -47,7 +47,7 @@
for(i=0; i<height; i++)
{
- memcpy(dst, src, bytesPerLine);
+ fast_memcpy(dst, src, bytesPerLine);
src+= srcStride;
dst+= dstStride;
}
Index: libmpcodecs/vf_yadif.c
===================================================================
--- libmpcodecs/vf_yadif.c (revision 23390)
+++ libmpcodecs/vf_yadif.c (working copy)
@@ -62,7 +62,7 @@
static void store_ref(struct vf_priv_s *p, uint8_t *src[3], int src_stride[3], int width, int height){
int i;
- memcpy (p->ref[3], p->ref[0], sizeof(uint8_t *)*3);
+ fast_memcpy (p->ref[3], p->ref[0], sizeof(uint8_t *)*3);
memmove(p->ref[0], p->ref[1], sizeof(uint8_t *)*3*3);
for(i=0; i<3; i++){
@@ -363,7 +363,7 @@
uint8_t *dst2= &dst[i][y*dst_stride[i]];
filter_line(p, dst2, prev, cur, next, w, refs, parity ^ tff);
}else{
- memcpy(&dst[i][y*dst_stride[i]], &p->ref[1][i][y*refs], w);
+ fast_memcpy(&dst[i][y*dst_stride[i]], &p->ref[1][i][y*refs], w);
}
}
}
Index: libmpcodecs/vf_detc.c
===================================================================
--- libmpcodecs/vf_detc.c (revision 23390)
+++ libmpcodecs/vf_detc.c (working copy)
@@ -60,7 +60,7 @@
for(i=0; i<height; i++)
{
- memcpy(dst, src, bytesPerLine);
+ fast_memcpy(dst, src, bytesPerLine);
src+= srcStride;
dst+= dstStride;
}
Index: libmpcodecs/vf_bmovl.c
===================================================================
--- libmpcodecs/vf_bmovl.c (revision 23390)
+++ libmpcodecs/vf_bmovl.c (working copy)
@@ -388,14 +388,14 @@
if(vf->priv->opaque) { // Just copy buffer memory to screen
for( ypos=vf->priv->y1 ; ypos < vf->priv->y2 ; ypos++ ) {
- memcpy( dmpi->planes[0] + (ypos*dmpi->stride[0]) + vf->priv->x1,
+ fast_memcpy( dmpi->planes[0] + (ypos*dmpi->stride[0]) + vf->priv->x1,
vf->priv->bitmap.y + (ypos*vf->priv->w) + vf->priv->x1,
vf->priv->x2 - vf->priv->x1 );
if(ypos%2) {
- memcpy( dmpi->planes[1] + ((ypos/2)*dmpi->stride[1]) + (vf->priv->x1/2),
+ fast_memcpy( dmpi->planes[1] + ((ypos/2)*dmpi->stride[1]) + (vf->priv->x1/2),
vf->priv->bitmap.u + (((ypos/2)*(vf->priv->w)/2)) + (vf->priv->x1/2),
(vf->priv->x2 - vf->priv->x1)/2 );
- memcpy( dmpi->planes[2] + ((ypos/2)*dmpi->stride[2]) + (vf->priv->x1/2),
+ fast_memcpy( dmpi->planes[2] + ((ypos/2)*dmpi->stride[2]) + (vf->priv->x1/2),
vf->priv->bitmap.v + (((ypos/2)*(vf->priv->w)/2)) + (vf->priv->x1/2),
(vf->priv->x2 - vf->priv->x1)/2 );
}
Index: libmpcodecs/vf_unsharp.c
===================================================================
--- libmpcodecs/vf_unsharp.c (revision 23390)
+++ libmpcodecs/vf_unsharp.c (working copy)
@@ -85,10 +85,10 @@
if( src == dst )
return;
if( dstStride == srcStride )
- memcpy( dst, src, srcStride*height );
+ fast_memcpy( dst, src, srcStride*height );
else
for( y=0; y<height; y++, dst+=dstStride, src+=srcStride )
- memcpy( dst, src, width );
+ fast_memcpy( dst, src, width );
return;
}
Index: libmpcodecs/vd_mtga.c
===================================================================
--- libmpcodecs/vd_mtga.c (revision 23390)
+++ libmpcodecs/vd_mtga.c (working copy)
@@ -110,20 +110,20 @@
if (packet_header & 0x80) /* runlength encoded packet */
{
- memcpy(final, data, num_bytes);
+ fast_memcpy(final, data, num_bytes);
// Note: this will be slow when DR to vram!
i=num_bytes;
while(2*i<=replen){
- memcpy(final+i,final,i);
+ fast_memcpy(final+i,final,i);
i*=2;
}
- memcpy(final+i,final,replen-i);
+ fast_memcpy(final+i,final,replen-i);
data += num_bytes;
}
else /* raw packet */
{
- memcpy(final, data, replen);
+ fast_memcpy(final, data, replen);
data += replen;
}
@@ -144,7 +144,7 @@
for (row = info->start_row; (!info->origin && row) || (info->origin && row < info->height); row += info->increment)
{
final = mpi->planes[0] + mpi->stride[0] * row;
- memcpy(final, data, info->width * num_bytes);
+ fast_memcpy(final, data, info->width * num_bytes);
data += info->width * num_bytes;
}
Index: stream/tvi_v4l.c
===================================================================
--- stream/tvi_v4l.c (revision 23390)
+++ stream/tvi_v4l.c (working copy)
@@ -1391,9 +1391,9 @@
// YV12 uses VIDEO_PALETTE_YUV420P, but the planes are swapped
if (priv->format == IMGFMT_YV12) {
- memcpy(dest, source, priv->width * priv->height);
- memcpy(dest+priv->width * priv->height*5/4, source+priv->width * priv->height, priv->width * priv->height/4);
- memcpy(dest+priv->width * priv->height, source+priv->width * priv->height*5/4, priv->width * priv->height/4);
+ fast_memcpy(dest, source, priv->width * priv->height);
+ fast_memcpy(dest+priv->width * priv->height*5/4, source+priv->width * priv->height, priv->width * priv->height/4);
+ fast_memcpy(dest+priv->width * priv->height, source+priv->width * priv->height*5/4, priv->width * priv->height/4);
return;
}
@@ -1404,7 +1404,7 @@
case VIDEO_PALETTE_RGB565:
sptr = source + (priv->height-1)*priv->bytesperline;
for (i = 0; i < priv->height; i++) {
- memcpy(dest, sptr, priv->bytesperline);
+ fast_memcpy(dest, sptr, priv->bytesperline);
dest += priv->bytesperline;
sptr -= priv->bytesperline;
}
@@ -1412,7 +1412,7 @@
case VIDEO_PALETTE_UYVY:
case VIDEO_PALETTE_YUV420P:
default:
- memcpy(dest, source, priv->bytesperline * priv->height);
+ fast_memcpy(dest, source, priv->bytesperline * priv->height);
}
}
@@ -1640,7 +1640,7 @@
pthread_mutex_lock(&priv->video_buffer_mutex);
interval = (double)priv->video_timebuffer[priv->video_head]*1e-6;
- memcpy(buffer, priv->video_ringbuffer[priv->video_head], len);
+ fast_memcpy(buffer, priv->video_ringbuffer[priv->video_head], len);
priv->video_cnt--;
priv->video_head = (priv->video_head+1)%priv->video_buffer_size_current;
pthread_mutex_unlock(&priv->video_buffer_mutex);
@@ -1741,7 +1741,7 @@
while (priv->audio_head == priv->audio_tail) {
usleep(10000);
}
- memcpy(buffer, priv->audio_ringbuffer+priv->audio_head*priv->audio_in.blocksize, len);
+ fast_memcpy(buffer, priv->audio_ringbuffer+priv->audio_head*priv->audio_in.blocksize, len);
priv->audio_head = (priv->audio_head+1) % priv->audio_buffer_size;
priv->audio_cnt--;
priv->audio_sent_blocks_total++;
Index: libao2/ao_sdl.c
===================================================================
--- libao2/ao_sdl.c (revision 23390)
+++ libao2/ao_sdl.c (working copy)
@@ -88,10 +88,10 @@
if (len > free) len = free;
if (first_len > len) first_len = len;
// till end of buffer
- memcpy (&buffer[write_pos], data, first_len);
+ fast_memcpy (&buffer[write_pos], data, first_len);
if (len > first_len) { // we have to wrap around
// remaining part from beginning of buffer
- memcpy (buffer, &data[first_len], len - first_len);
+ fast_memcpy (buffer, &data[first_len], len - first_len);
}
write_pos = (write_pos + len) % BUFFSIZE;
return len;
@@ -106,14 +106,14 @@
#ifdef USE_SDL_INTERNAL_MIXER
SDL_MixAudio (data, &buffer[read_pos], first_len, volume);
#else
- memcpy (data, &buffer[read_pos], first_len);
+ fast_memcpy (data, &buffer[read_pos], first_len);
#endif
if (len > first_len) { // we have to wrap around
// remaining part from beginning of buffer
#ifdef USE_SDL_INTERNAL_MIXER
SDL_MixAudio (&data[first_len], buffer, len - first_len, volume);
#else
- memcpy (&data[first_len], buffer, len - first_len);
+ fast_memcpy (&data[first_len], buffer, len - first_len);
#endif
}
read_pos = (read_pos + len) % BUFFSIZE;
Index: libao2/ao_dsound.c
===================================================================
--- libao2/ao_dsound.c (revision 23390)
+++ libao2/ao_dsound.c (working copy)
@@ -199,7 +199,7 @@
if(device_num==*device_index){
mp_msg(MSGT_AO, MSGL_V,"<--");
if(guid){
- memcpy(&device,guid,sizeof(GUID));
+ fast_memcpy(&device,guid,sizeof(GUID));
}
}
mp_msg(MSGT_AO, MSGL_V,"\n");
@@ -337,14 +337,14 @@
numsamp = dwBytes1 / (ao_data.channels * sampsize); // number of samples for each channel in this buffer
for( i = 0; i < numsamp; i++ ) for( j = 0; j < ao_data.channels; j++ ) {
- memcpy(lpvPtr1+(i*ao_data.channels*sampsize)+(chantable[j]*sampsize),data+(i*ao_data.channels*sampsize)+(j*sampsize),sampsize);
+ fast_memcpy(lpvPtr1+(i*ao_data.channels*sampsize)+(chantable[j]*sampsize),data+(i*ao_data.channels*sampsize)+(j*sampsize),sampsize);
}
if (NULL != lpvPtr2 )
{
numsamp = dwBytes2 / (ao_data.channels * sampsize);
for( i = 0; i < numsamp; i++ ) for( j = 0; j < ao_data.channels; j++ ) {
- memcpy(lpvPtr2+(i*ao_data.channels*sampsize)+(chantable[j]*sampsize),data+dwBytes1+(i*ao_data.channels*sampsize)+(j*sampsize),sampsize);
+ fast_memcpy(lpvPtr2+(i*ao_data.channels*sampsize)+(chantable[j]*sampsize),data+dwBytes1+(i*ao_data.channels*sampsize)+(j*sampsize),sampsize);
}
}
@@ -352,8 +352,8 @@
if(write_offset>=buffer_size)write_offset=dwBytes2;
} else {
// Write to pointers without reordering.
- memcpy(lpvPtr1,data,dwBytes1);
- if (NULL != lpvPtr2 )memcpy(lpvPtr2,data+dwBytes1,dwBytes2);
+ fast_memcpy(lpvPtr1,data,dwBytes1);
+ if (NULL != lpvPtr2 )fast_memcpy(lpvPtr2,data+dwBytes1,dwBytes2);
write_offset+=dwBytes1+dwBytes2;
if(write_offset>=buffer_size)write_offset=dwBytes2;
}
Index: libao2/ao_win32.c
===================================================================
--- libao2/ao_win32.c (revision 23390)
+++ libao2/ao_win32.c (working copy)
@@ -299,7 +299,7 @@
waveOutUnprepareHeader(hWaveOut, current, sizeof(WAVEHDR));
x=BUFFER_SIZE-buf_write_pos;
if(x>len) x=len;
- memcpy(current->lpData+buf_write_pos,data+len2,x);
+ fast_memcpy(current->lpData+buf_write_pos,data+len2,x);
if(buf_write_pos==0)full_buffers++;
len2+=x; len-=x;
buffered_bytes+=x; buf_write_pos+=x;
Index: libao2/ao_jack.c
===================================================================
--- libao2/ao_jack.c (revision 23390)
+++ libao2/ao_jack.c (working copy)
@@ -111,10 +111,10 @@
if (len > free) len = free;
if (first_len > len) first_len = len;
// till end of buffer
- memcpy (&buffer[write_pos], data, first_len);
+ fast_memcpy (&buffer[write_pos], data, first_len);
if (len > first_len) { // we have to wrap around
// remaining part from beginning of buffer
- memcpy (buffer, &data[first_len], len - first_len);
+ fast_memcpy (buffer, &data[first_len], len - first_len);
}
write_pos = (write_pos + len) % BUFFSIZE;
return len;
Index: gui/mplayer/pb.c
===================================================================
--- gui/mplayer/pb.c (revision 23390)
+++ gui/mplayer/pb.c (working copy)
@@ -88,7 +88,7 @@
vo_mouse_autohide=0;
- memcpy( mplPBDrawBuffer,appMPlayer.bar.Bitmap.Image,appMPlayer.bar.Bitmap.ImageSize );
+ fast_memcpy( mplPBDrawBuffer,appMPlayer.bar.Bitmap.Image,appMPlayer.bar.Bitmap.ImageSize );
Render( &appMPlayer.barWindow,appMPlayer.barItems,appMPlayer.NumberOfBarItems,mplPBDrawBuffer,appMPlayer.bar.Bitmap.ImageSize );
wsConvert( &appMPlayer.barWindow,mplPBDrawBuffer,appMPlayer.bar.Bitmap.ImageSize );
}
Index: gui/mplayer/mw.c
===================================================================
--- gui/mplayer/mw.c (revision 23390)
+++ gui/mplayer/mw.c (working copy)
@@ -65,7 +65,7 @@
btnModify( evSetMoviePosition,guiIntfStruct.Position );
btnModify( evSetVolume,guiIntfStruct.Volume );
- memcpy( mplDrawBuffer,appMPlayer.main.Bitmap.Image,appMPlayer.main.Bitmap.ImageSize );
+ fast_memcpy( mplDrawBuffer,appMPlayer.main.Bitmap.Image,appMPlayer.main.Bitmap.ImageSize );
Render( &appMPlayer.mainWindow,appMPlayer.Items,appMPlayer.NumberOfItems,mplDrawBuffer,appMPlayer.main.Bitmap.ImageSize );
mplMainRender=0;
}
Index: mp3lib/sr1.c
===================================================================
--- mp3lib/sr1.c (revision 23390)
+++ mp3lib/sr1.c (working copy)
@@ -172,7 +172,7 @@
// if(backstep!=512 && backstep>fsizeold)
// printf("\rWarning! backstep (%d>%d) \n",backstep,fsizeold);
wordpointer = bsbuf + ssize - backstep;
- if (backstep) memcpy(wordpointer,bsbufold+fsizeold-backstep,backstep);
+ if (backstep) fast_memcpy(wordpointer,bsbufold+fsizeold-backstep,backstep);
bitindex = 0;
bitsleft+=8*backstep;
// printf("Backstep %d (bitsleft=%d)\n",backstep,bitsleft);
Index: libvo/vo_bl.c
===================================================================
--- libvo/vo_bl.c (revision 23390)
+++ libvo/vo_bl.c (working copy)
@@ -174,7 +174,7 @@
addr.sin_family = AF_INET;
addr.sin_port = htons(h->port);
- memcpy(&addr.sin_addr.s_addr, dest->h_addr_list[0], dest->h_length);
+ fast_memcpy(&addr.sin_addr.s_addr, dest->h_addr_list[0], dest->h_length);
h->fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (h->fd < 0) {
@@ -283,7 +283,7 @@
if (prevpts >= 0) for (i = 0; i < no_bl_files; i++)
bl->write_frame(&bl_files[i], tmp, (vo_pts - prevpts)/90);
- memcpy(tmp, image, bl->width*bl->height*bl->channels);
+ fast_memcpy(tmp, image, bl->width*bl->height*bl->channels);
prevpts = vo_pts;
for (i = 0; i < no_bl_hosts; i++) bl->send_frame(&bl_hosts[i]);
@@ -331,7 +331,7 @@
dst=image; /* + zr->off_y + zr->image_width*(y/zr->vdec)+x;*/
// copy Y:
for (i = 0; i < h; i++) {
- memcpy(dst,src,w);
+ fast_memcpy(dst,src,w);
dst+=bl->width;
src+=stride[0];
Index: libvo/vo_3dfx.c
===================================================================
--- libvo/vo_3dfx.c (revision 23390)
+++ libvo/vo_3dfx.c (working copy)
@@ -238,10 +238,10 @@
for (j=0;j<jmax;j++)
{
//XXX this should be hand-rolled 32 bit memcpy for safeness.
- memcpy(fb_YUV->U + (uint32_t) VOODOO_YUV_STRIDE* j ,((uint8_t*) u) + uv_imax* j , uv_imax);
- memcpy(fb_YUV->V + (uint32_t) VOODOO_YUV_STRIDE* j ,((uint8_t*) v) + uv_imax* j , uv_imax);
- memcpy(fb_YUV->Y + (uint32_t) VOODOO_YUV_STRIDE* (j<<1) ,((uint8_t*) y) + y_imax * (j<<1) , y_imax);
- memcpy(fb_YUV->Y + (uint32_t) VOODOO_YUV_STRIDE*((j<<1)+1),((uint8_t*) y) + y_imax *((j<<1)+1), y_imax);
+ fast_memcpy(fb_YUV->U + (uint32_t) VOODOO_YUV_STRIDE* j ,((uint8_t*) u) + uv_imax* j , uv_imax);
+ fast_memcpy(fb_YUV->V + (uint32_t) VOODOO_YUV_STRIDE* j ,((uint8_t*) v) + uv_imax* j , uv_imax);
+ fast_memcpy(fb_YUV->Y + (uint32_t) VOODOO_YUV_STRIDE* (j<<1) ,((uint8_t*) y) + y_imax * (j<<1) , y_imax);
+ fast_memcpy(fb_YUV->Y + (uint32_t) VOODOO_YUV_STRIDE*((j<<1)+1),((uint8_t*) y) + y_imax *((j<<1)+1), y_imax);
}
LOG("video_out_3dfx: done planar dump\n");
}
Index: libvo/vo_fbdev.c
===================================================================
--- libvo/vo_fbdev.c (revision 23390)
+++ libvo/vo_fbdev.c (working copy)
@@ -1075,7 +1075,7 @@
s = src[0];
while (h) {
- memcpy(d, s, w * fb_pixel_size);
+ fast_memcpy(d, s, w * fb_pixel_size);
d += fb_line_len;
s += stride[0];
h--;
Index: libvo/vo_zr.c
===================================================================
--- libvo/vo_zr.c (revision 23390)
+++ libvo/vo_zr.c (working copy)
@@ -546,7 +546,7 @@
source = src[0] + 2*g->yoff*zr->vdec*zr->stride + 2*g->xoff;
dest = zr->image + 2*zr->off_y;
for (i = 0; i < g->height/zr->vdec; i++) {
- memcpy(dest, source, zr->image_width*2);
+ fast_memcpy(dest, source, zr->image_width*2);
dest += 2*zr->image_width;
source += zr->vdec*zr->stride;
}
@@ -619,7 +619,7 @@
// copy Y:
for (i = 0; i < h; i++) {
if ((i + x)%zr->vdec == 0) {
- memcpy(dst,src,w);
+ fast_memcpy(dst,src,w);
dst+=zr->image_width;
}
src+=stride[0];
@@ -633,8 +633,8 @@
zr->image_width/2+(x/2);
for (i = 0; i< h/2; i++) {
if ((i+x/2)%zr->vdec == 0) {
- memcpy(dst1,src1,w/2);
- memcpy(dst2,src2,w/2);
+ fast_memcpy(dst1,src1,w/2);
+ fast_memcpy(dst2,src2,w/2);
dst1+=zr->image_width/2;
dst2+=zr->image_width/2;
}
Index: libvo/vo_macosx.m
===================================================================
--- libvo/vo_macosx.m (revision 23390)
+++ libvo/vo_macosx.m (working copy)
@@ -203,7 +203,7 @@
{
case IMGFMT_BGR32:
case IMGFMT_RGB32:
- memcpy(image_data, src[0], image_width*image_height*image_bytes);
+ fast_memcpy(image_data, src[0], image_width*image_height*image_bytes);
break;
case IMGFMT_YUY2:
Index: libvo/vo_fbdev2.c
===================================================================
--- libvo/vo_fbdev2.c (revision 23390)
+++ libvo/vo_fbdev2.c (working copy)
@@ -354,7 +354,7 @@
int i;
for (i = 0; i < h; i++) {
- memcpy(dest, in, w * fb_pixel_size);
+ fast_memcpy(dest, in, w * fb_pixel_size);
dest += next;
in += stride[0];
}
@@ -371,7 +371,7 @@
int i, out_offset = 0, in_offset = 0;
for (i = 0; i < in_height; i++) {
- memcpy(center + out_offset, next_frame + in_offset,
+ fast_memcpy(center + out_offset, next_frame + in_offset,
in_width * fb_pixel_size);
out_offset += fb_line_len;
in_offset += in_width * fb_pixel_size;
Index: libvo/vesa_lvo.c
===================================================================
--- libvo/vesa_lvo.c (revision 23390)
+++ libvo/vesa_lvo.c (working copy)
@@ -167,7 +167,7 @@
dest = lvo_mem + bespitch * y + x;
src = image[0];
for(i=0;i<h;i++){
- memcpy(dest,src,w);
+ fast_memcpy(dest,src,w);
src+=stride[0];
dest += bespitch;
}
@@ -177,7 +177,7 @@
dest = lvo_mem + bespitch*mga_vid_config.src_height + bespitch2 * y + x;
src = image[1];
for(i=0;i<h;i++){
- memcpy(dest,src,w);
+ fast_memcpy(dest,src,w);
src+=stride[1];
dest += bespitch2;
}
@@ -187,7 +187,7 @@
+ bespitch2 * y + x;
src = image[2];
for(i=0;i<h;i++){
- memcpy(dest,src,w);
+ fast_memcpy(dest,src,w);
src+=stride[2];
dest += bespitch2;
}
@@ -207,7 +207,7 @@
bytpp = (image_bpp+7)/8;
dst = lvo_mem + (image_width * y + x)*bytpp;
/* vlvo_draw_slice_422(image,stride,w,h,x,y); just for speed */
- memcpy(dst,image[0],mga_vid_config.frame_size);
+ fast_memcpy(dst,image[0],mga_vid_config.frame_size);
}
return 0;
}
@@ -215,7 +215,7 @@
uint32_t vlvo_draw_frame(uint8_t *image[])
{
/* Note it's very strange but sometime for YUY2 draw_frame is called */
- memcpy(lvo_mem,image[0],mga_vid_config.frame_size);
+ fast_memcpy(lvo_mem,image[0],mga_vid_config.frame_size);
if( mp_msg_test(MSGT_VO,MSGL_DBG2) ) {
mp_msg(MSGT_VO,MSGL_DBG2, "vesa_lvo: vlvo_flip_page() was called\n");}
return 0;
Index: libvo/vo_dxr2.c
===================================================================
--- libvo/vo_dxr2.c (revision 23390)
+++ libvo/vo_dxr2.c (working copy)
@@ -165,13 +165,13 @@
}
while (len>0) if ((dxr2bufpos+len) <= BUF_SIZE) {
- memcpy(dxr2buf+dxr2bufpos, data, len);
+ fast_memcpy(dxr2buf+dxr2bufpos, data, len);
dxr2bufpos+=len;
len=0;
} else {
int copylen=BUF_SIZE-dxr2bufpos;
if(copylen > 0) {
- memcpy(dxr2buf+dxr2bufpos, data, copylen);
+ fast_memcpy(dxr2buf+dxr2bufpos, data, copylen);
dxr2bufpos += copylen;
data+=copylen;
len-=copylen;
Index: libvo/vo_sdl.c
===================================================================
--- libvo/vo_sdl.c (revision 23390)
+++ libvo/vo_sdl.c (working copy)
@@ -1052,11 +1052,11 @@
mysrc+=priv->framePlaneYUY;
for(i = 0; i < priv->height; i++) {
mysrc-=priv->stridePlaneYUY;
- memcpy (dst, mysrc, priv->stridePlaneYUY);
+ fast_memcpy (dst, mysrc, priv->stridePlaneYUY);
dst+=priv->overlay->pitches[0];
}
}
- else memcpy (dst, src[0], priv->framePlaneYUY);
+ else fast_memcpy (dst, src[0], priv->framePlaneYUY);
SDL_OVR_UNLOCK
break;
@@ -1075,11 +1075,11 @@
mysrc+=priv->framePlaneRGB;
for(i = 0; i < priv->height; i++) {
mysrc-=priv->stridePlaneRGB;
- memcpy (dst, mysrc, priv->stridePlaneRGB);
+ fast_memcpy (dst, mysrc, priv->stridePlaneRGB);
dst += priv->surface->pitch;
}
}
- else memcpy (dst, src[0], priv->framePlaneRGB);
+ else fast_memcpy (dst, src[0], priv->framePlaneRGB);
SDL_SRF_UNLOCK(priv->surface)
} else {
SDL_SRF_LOCK(priv->rgbsurface, -1)
@@ -1088,11 +1088,11 @@
mysrc+=priv->framePlaneRGB;
for(i = 0; i < priv->height; i++) {
mysrc-=priv->stridePlaneRGB;
- memcpy (dst, mysrc, priv->stridePlaneRGB);
+ fast_memcpy (dst, mysrc, priv->stridePlaneRGB);
dst += priv->rgbsurface->pitch;
}
}
- else memcpy (dst, src[0], priv->framePlaneRGB);
+ else fast_memcpy (dst, src[0], priv->framePlaneRGB);
SDL_SRF_UNLOCK(priv->rgbsurface)
}
break;
Index: libvo/vo_vesa.c
===================================================================
--- libvo/vo_vesa.c (revision 23390)
+++ libvo/vo_vesa.c (working copy)
@@ -217,7 +217,7 @@
color = (r << shift_r) | (g << shift_g) | (b << shift_b);
offset = y * bpl + (x * pixel_size);
if(!VALID_WIN_FRAME(offset)) __vbeSwitchBank(offset);
- memcpy(VIDEO_PTR(offset), &color, pixel_size);
+ fast_memcpy(VIDEO_PTR(offset), &color, pixel_size);
}
/*
@@ -226,7 +226,7 @@
*/
static void __vbeCopyBlockFast(unsigned long offset,uint8_t *image,unsigned long size)
{
- memcpy(&win.ptr[offset],image,size);
+ fast_memcpy(&win.ptr[offset],image,size);
}
static void __vbeCopyBlock(unsigned long offset,uint8_t *image,unsigned long size)
@@ -236,7 +236,7 @@
{
if(!VALID_WIN_FRAME(offset)) __vbeSwitchBank(offset);
delta = min(size,win.high - offset);
- memcpy(VIDEO_PTR(offset),&image[src_idx],delta);
+ fast_memcpy(VIDEO_PTR(offset),&image[src_idx],delta);
src_idx += delta;
offset += delta;
size -= delta;
@@ -649,7 +649,7 @@
else fs_mode = 1;
}
if((err=vbeInit()) != VBE_OK) { PRINT_VBE_ERR("vbeInit",err); return -1; }
- memcpy(vib.VESASignature,"VBE2",4);
+ fast_memcpy(vib.VESASignature,"VBE2",4);
if(!vib_set && (err=vbeGetControllerInfo(&vib)) != VBE_OK)
{
PRINT_VBE_ERR("vbeGetControllerInfo",err);
Index: libvo/vo_zr2.c
===================================================================
--- libvo/vo_zr2.c (revision 23390)
+++ libvo/vo_zr2.c (working copy)
@@ -151,7 +151,7 @@
}
/* copy the jpeg image to the buffer which we acquired */
- memcpy(p->buf + p->zrq.size*p->frame, mpi->planes[0], size);
+ fast_memcpy(p->buf + p->zrq.size*p->frame, mpi->planes[0], size);
return VO_TRUE;
}
@@ -394,7 +394,7 @@
* We make configuration changes to a temporary params structure,
* compare it with the old params structure and only apply the new
* config if it is different from the old one. */
- memcpy(&zptmp, &p->zp, sizeof(zptmp));
+ fast_memcpy(&zptmp, &p->zp, sizeof(zptmp));
/* translate the configuration to zoran understandable format */
zptmp.decimation = 0;
@@ -423,7 +423,7 @@
if (memcmp(&zptmp, &p->zp, sizeof(zptmp))) {
/* config differs, we must update */
- memcpy(&p->zp, &zptmp, sizeof(zptmp));
+ fast_memcpy(&p->zp, &zptmp, sizeof(zptmp));
stop_playing(p);
if (ioctl(p->vdes, MJPIOC_S_PARAMS, &p->zp) < 0) {
ERROR("error writing display params to card\n");
Index: libvo/vo_yuv4mpeg.c
===================================================================
--- libvo/vo_yuv4mpeg.c (revision 23390)
+++ libvo/vo_yuv4mpeg.c (working copy)
@@ -175,9 +175,9 @@
for (i=0; i<h; i +=2)
{
- memcpy(rgb_line_buffer , ptr + stride * i , stride);
- memcpy(ptr + stride * i , ptr + stride * (i+1), stride);
- memcpy(ptr + stride * (i+1), rgb_line_buffer , stride);
+ fast_memcpy(rgb_line_buffer , ptr + stride * i , stride);
+ fast_memcpy(ptr + stride * i , ptr + stride * (i+1), stride);
+ fast_memcpy(ptr + stride * (i+1), rgb_line_buffer , stride);
}
}
@@ -227,16 +227,16 @@
while(k_start < modv)
{
i = j = k_start;
- memcpy(rgb_line_buffer, ptr + stride * i, stride);
+ fast_memcpy(rgb_line_buffer, ptr + stride * i, stride);
while (!line_state[j])
{
line_state[j] = 1;
i = j;
j = j * 2 % modv;
- memcpy(ptr + stride * i, ptr + stride * j, stride);
+ fast_memcpy(ptr + stride * i, ptr + stride * j, stride);
}
- memcpy(ptr + stride * i, rgb_line_buffer, stride);
+ fast_memcpy(ptr + stride * i, rgb_line_buffer, stride);
while(k_start < modv && line_state[k_start])
k_start++;
@@ -377,7 +377,7 @@
dst = image_y + image_width * y + x;
for (i = 0; i < h; i++)
{
- memcpy(dst, src, w);
+ fast_memcpy(dst, src, w);
src += stride[0];
dst += image_width;
}
@@ -390,8 +390,8 @@
uint8_t *dstv = image_v + imgstride * (y >> 1) + (x >> 1);
for (i = 0; i < h / 2; i++)
{
- memcpy(dstu, src1 , w >> 1);
- memcpy(dstv, src2, w >> 1);
+ fast_memcpy(dstu, src1 , w >> 1);
+ fast_memcpy(dstv, src2, w >> 1);
src1 += stride[1];
src2 += stride[2];
dstu += imgstride;
@@ -405,7 +405,7 @@
dst = rgb_buffer + (image_width * y + x) * 3;
for (i = 0; i < h; i++)
{
- memcpy(dst, src, w * 3);
+ fast_memcpy(dst, src, w * 3);
src += stride[0];
dst += image_width * 3;
}
@@ -424,7 +424,7 @@
case IMGFMT_BGR24:
case IMGFMT_RGB24:
- memcpy(rgb_buffer, src[0], image_width * image_height * 3);
+ fast_memcpy(rgb_buffer, src[0], image_width * image_height * 3);
break;
}
return 0;
Index: libvo/vo_syncfb.c
===================================================================
--- libvo/vo_syncfb.c (revision 23390)
+++ libvo/vo_syncfb.c (working copy)
@@ -127,7 +127,7 @@
for(h=0; h < _config.src_height; h++)
{
- memcpy(dest, y, _config.src_width);
+ fast_memcpy(dest, y, _config.src_width);
y += _config.src_width;
dest += bespitch;
}
@@ -161,7 +161,7 @@
for(h=0; h < 16; h++)
{
- memcpy(dest, y, _config.src_width);
+ fast_memcpy(dest, y, _config.src_width);
y += _config.src_width;
dest += bespitch;
}
@@ -190,7 +190,7 @@
dest = frame_mem + bufinfo.offset + (bespitch * ypos);
for(h=0; h < ysize; h++)
{
- memcpy(dest, y, xsize);
+ fast_memcpy(dest, y, xsize);
y += stride[0];
dest += bespitch;
}
@@ -201,7 +201,7 @@
dest = frame_mem + bufinfo.offset_p2 + (bespitch * ypos)/4;
for(h=0; h < ysize; h++)
{
- memcpy(dest, cr, xsize);
+ fast_memcpy(dest, cr, xsize);
cr += stride[1];
dest += bespitch/2;
}
@@ -209,7 +209,7 @@
dest = frame_mem + bufinfo.offset_p3 + (bespitch * ypos)/4;
for(h=0; h < ysize; h++)
{
- memcpy(dest, cb, xsize);
+ fast_memcpy(dest, cb, xsize);
cb += stride[2];
dest += bespitch/2;
}
Index: libvo/vo_quartz.c
===================================================================
--- libvo/vo_quartz.c (revision 23390)
+++ libvo/vo_quartz.c (working copy)
@@ -1022,7 +1022,7 @@
switch (image_format)
{
case IMGFMT_RGB32:
- memcpy(image_data,src[0],image_size);
+ fast_memcpy(image_data,src[0],image_size);
return 0;
case IMGFMT_UYVY:
Index: libvo/vo_svga.c
===================================================================
--- libvo/vo_svga.c (revision 23390)
+++ libvo/vo_svga.c (working copy)
@@ -206,7 +206,7 @@
rgbplane=PageStore[0].vbase + (y*mode_stride) + (x*modeinfo->bytesperpixel);
for(i=0;i<h;i++){
//i'm afraid that memcpy is better optimized than memset;)
- memcpy(rgbplane,zerobuf,w*modeinfo->bytesperpixel);
+ fast_memcpy(rgbplane,zerobuf,w*modeinfo->bytesperpixel);
// memset(rgbplane,0,w*modeinfo->bytesperpixel);
rgbplane+=mode_stride;
}
Index: libvo/vo_directx.c
===================================================================
--- libvo/vo_directx.c (revision 23390)
+++ libvo/vo_directx.c (working copy)
@@ -1179,7 +1187,7 @@
static int draw_frame(uint8_t *src[])
{
- memcpy( image, *src, dstride * image_height );
+ fast_memcpy( image, *src, dstride * image_height );
return 0;
}
@@ -1262,7 +1270,7 @@
}
else //packed
{
- memcpy( image, mpi->planes[0], image_height * dstride);
+ fast_memcpy( image, mpi->planes[0], image_height * dstride);
}
return VO_TRUE;
}
Index: libvo/vo_directfb2.c
===================================================================
--- libvo/vo_directfb2.c (revision 23390)
+++ libvo/vo_directfb2.c (working copy)
@@ -1233,7 +1233,7 @@
srcp = src[0];
for (i=0;i<h;i++) {
- memcpy(dst,srcp,p);
+ fast_memcpy(dst,srcp,p);
dst += pitch;
srcp += stride[0];
}
@@ -1245,7 +1245,7 @@
p = p/2;
for (i=0;i<h/2;i++) {
- memcpy(dst,srcp,p);
+ fast_memcpy(dst,srcp,p);
dst += pitch/2;
srcp += stride[2];
}
@@ -1254,7 +1254,7 @@
srcp = src[1];
for (i=0;i<h/2;i++) {
- memcpy(dst,srcp,p);
+ fast_memcpy(dst,srcp,p);
dst += pitch/2;
srcp += stride[1];
}
@@ -1266,7 +1266,7 @@
p = p/2;
for (i=0;i<h/2;i++) {
- memcpy(dst,srcp,p);
+ fast_memcpy(dst,srcp,p);
dst += pitch/2;
srcp += stride[1];
}
@@ -1275,7 +1275,7 @@
srcp = src[2];
for (i=0;i<h/2;i++) {
- memcpy(dst,srcp,p);
+ fast_memcpy(dst,srcp,p);
dst += pitch/2;
srcp += stride[2];
}
@@ -1327,7 +1327,7 @@
src = mpi->planes[0]+mpi->y*mpi->stride[0]+mpi->x;
for (i=0;i<mpi->h;i++) {
- memcpy(dst+i*pitch,src+i*mpi->stride[0],p);
+ fast_memcpy(dst+i*pitch,src+i*mpi->stride[0],p);
}
@@ -1338,14 +1338,14 @@
src = mpi->planes[2]+mpi->y*mpi->stride[2]+mpi->x/2;
for (i=0;i<mpi->h/2;i++) {
- memcpy(dst+i*pitch/2,src+i*mpi->stride[2],p);
+ fast_memcpy(dst+i*pitch/2,src+i*mpi->stride[2],p);
}
dst += pitch*height/4;
src = mpi->planes[1]+mpi->y*mpi->stride[1]+mpi->x/2;
for (i=0;i<mpi->h/2;i++) {
- memcpy(dst+i*pitch/2,src+i*mpi->stride[1],p);
+ fast_memcpy(dst+i*pitch/2,src+i*mpi->stride[1],p);
}
} else {
@@ -1355,14 +1355,14 @@
src = mpi->planes[1]+mpi->y*mpi->stride[1]+mpi->x/2;
for (i=0;i<mpi->h/2;i++) {
- memcpy(dst+i*pitch/2,src+i*mpi->stride[1],p);
+ fast_memcpy(dst+i*pitch/2,src+i*mpi->stride[1],p);
}
dst += pitch*height/4;
src = mpi->planes[2]+mpi->y*mpi->stride[2]+mpi->x/2;
for (i=0;i<mpi->h/2;i++) {
- memcpy(dst+i*pitch/2,src+i*mpi->stride[2],p);
+ fast_memcpy(dst+i*pitch/2,src+i*mpi->stride[2],p);
}
}
Index: libvo/fastmemcpy.h
===================================================================
--- libvo/fastmemcpy.h (revision 23390)
+++ libvo/fastmemcpy.h (working copy)
@@ -11,14 +11,15 @@
extern void * fast_memcpy(void * to, const void * from, size_t len);
extern void * mem2agpcpy(void * to, const void * from, size_t len);
-#define memcpy(a,b,c) fast_memcpy(a,b,c)
#else /* HAVE_MMX/MMX2/3DNOW/SSE/SSE2 */
#define mem2agpcpy(a,b,c) memcpy(a,b,c)
+#define fast_memcpy(a,b,c) memcpy(a,b,c)
#endif
#else /* USE_FASTMEMCPY */
#define mem2agpcpy(a,b,c) memcpy(a,b,c)
+#define fast_memcpy(a,b,c) memcpy(a,b,c)
#endif
static inline void * mem2agpcpy_pic(void * dst, const void * src, int bytesPerLine, int height, int dstStride, int srcStride)
@@ -62,13 +63,13 @@
srcStride = -srcStride;
}
- memcpy(dst, src, srcStride*height);
+ fast_memcpy(dst, src, srcStride*height);
}
else
{
for(i=0; i<height; i++)
{
- memcpy(dst, src, bytesPerLine);
+ fast_memcpy(dst, src, bytesPerLine);
src = (uint8_t*)src + srcStride;
dst = (uint8_t*)dst + dstStride;
}
Index: libvo/vo_dxr3.c
===================================================================
--- libvo/vo_dxr3.c (revision 23390)
+++ libvo/vo_dxr3.c (working copy)
@@ -727,7 +727,7 @@
if ( !cleared )
{
spued->count=spubuf->count;
- memcpy( spued->data,spubuf->data,DATASIZE );
+ fast_memcpy( spued->data,spubuf->data,DATASIZE );
cleared=1;
}
}
@@ -1106,7 +1106,7 @@
},*p;
p = malloc(sizeof(m));
- memcpy(p,m,sizeof(m));
+ fast_memcpy(p,m,sizeof(m));
return p;
}
Index: libswscale/swscale.c
===================================================================
--- libswscale/swscale.c (revision 23390)
+++ libswscale/swscale.c (working copy)
@@ -73,6 +73,7 @@
#include "bswap.h"
#include "rgb2rgb.h"
#ifdef USE_FASTMEMCPY
+// FIXME!
#include "libvo/fastmemcpy.h"
#endif
#include "libavcodec/opt.h"
Index: libswscale/rgb2rgb.c
===================================================================
--- libswscale/rgb2rgb.c (revision 23390)
+++ libswscale/rgb2rgb.c (working copy)
@@ -33,6 +33,7 @@
#include "x86_cpu.h"
#include "bswap.h"
#ifdef USE_FASTMEMCPY
+// FIXME!!
#include "libvo/fastmemcpy.h"
#endif
Index: libmenu/vf_menu.c
===================================================================
--- libmenu/vf_menu.c (revision 23390)
+++ libmenu/vf_menu.c (working copy)
@@ -146,8 +146,8 @@
if(mpi->type == MP_IMGTYPE_TEMP && (!(mpi->flags&MP_IMGFLAG_PRESERVE)) ) {
dmpi = vf_get_image(vf->next,mpi->imgfmt,mpi->type, mpi->flags, mpi->w, mpi->h);
- memcpy(mpi->planes,dmpi->planes,MP_MAX_PLANES*sizeof(unsigned char*));
- memcpy(mpi->stride,dmpi->stride,MP_MAX_PLANES*sizeof(unsigned int));
+ fast_memcpy(mpi->planes,dmpi->planes,MP_MAX_PLANES*sizeof(unsigned char*));
+ fast_memcpy(mpi->stride,dmpi->stride,MP_MAX_PLANES*sizeof(unsigned int));
mpi->flags|=MP_IMGFLAG_DIRECT;
mpi->priv=(void*)dmpi;
return;
Index: libmpdemux/demux_asf.c
===================================================================
--- libmpdemux/demux_asf.c (revision 23390)
+++ libmpdemux/demux_asf.c (working copy)
@@ -56,12 +56,12 @@
//i+=asf_scrambling_h*asf_scrambling_w;
for(x=0;x<asf->scrambling_w;x++)
for(y=0;y<asf->scrambling_h;y++){
- memcpy(dst+i,s2+(y*asf->scrambling_w+x)*asf->scrambling_b,asf->scrambling_b);
+ fast_memcpy(dst+i,s2+(y*asf->scrambling_w+x)*asf->scrambling_b,asf->scrambling_b);
i+=asf->scrambling_b;
}
s2+=asf->scrambling_h*asf->scrambling_w*asf->scrambling_b;
}
- //if(i<len) memcpy(dst+i,src+i,len-i);
+ //if(i<len) fast_memcpy(dst+i,src+i,len-i);
free(*src);
*src = dst;
}
@@ -154,7 +154,7 @@
{
if(dp->len!=offs && offs!=-1) mp_msg(MSGT_DEMUX,MSGL_V,"warning! fragment.len=%d BUT next fragment offset=%d \n",dp->len,offs);
dp->buffer=realloc(dp->buffer,dp->len+len+FF_INPUT_BUFFER_PADDING_SIZE);
- memcpy(dp->buffer+dp->len,data,len);
+ fast_memcpy(dp->buffer+dp->len,data,len);
memset(dp->buffer+dp->len+len, 0, FF_INPUT_BUFFER_PADDING_SIZE);
mp_dbg(MSGT_DEMUX,MSGL_DBG4,"data appended! %d+%d\n",dp->len,len);
dp->len+=len;
@@ -224,7 +224,7 @@
return 0;
}
dp=new_demux_packet(len);
- memcpy(dp->buffer,data,len);
+ fast_memcpy(dp->buffer,data,len);
if (asf->asf_is_dvr_ms)
dp->pts=time*0.0000001f;
else
Index: libmpdemux/demux_gif.c
===================================================================
--- libmpdemux/demux_gif.c (revision 23390)
+++ libmpdemux/demux_gif.c (working copy)
@@ -145,7 +145,7 @@
dp = new_demux_packet(priv->w * priv->h);
buf = calloc(gif->Image.Width, gif->Image.Height);
if (priv->useref)
- memcpy(dp->buffer, priv->refimg, priv->w * priv->h);
+ fast_memcpy(dp->buffer, priv->refimg, priv->w * priv->h);
else
memset(dp->buffer, gif->SBackGroundColor, priv->w * priv->h);
@@ -199,7 +199,7 @@
memcpy_transp_pic(dest, buf, w, h, priv->w, gif->Image.Width,
transparency, transparent_col);
- if (refmode == 1) memcpy(priv->refimg, dp->buffer, priv->w * priv->h);
+ if (refmode == 1) fast_memcpy(priv->refimg, dp->buffer, priv->w * priv->h);
if (refmode == 2 && priv->useref) {
dest = priv->refimg + priv->w * t + l;
memset(buf, gif->SBackGroundColor, len);
Index: libmpdemux/demuxer.c
===================================================================
--- libmpdemux/demuxer.c (revision 23390)
+++ libmpdemux/demuxer.c (working copy)
@@ -429,7 +429,7 @@
if(!ds_fill_buffer(ds)) return bytes;
} else {
if(x>len) x=len;
- if(mem) memcpy(mem+bytes,&ds->buffer[ds->buffer_pos],x);
+ if(mem) fast_memcpy(mem+bytes,&ds->buffer[ds->buffer_pos],x);
bytes+=x;len-=x;ds->buffer_pos+=x;
}
}
@@ -445,7 +445,7 @@
if(!ds_fill_buffer(ds)) return bytes;
} else {
if(x>len) x=len;
- if(mem) memcpy(mem+bytes,&ds->buffer[ds->buffer_pos],x);
+ if(mem) fast_memcpy(mem+bytes,&ds->buffer[ds->buffer_pos],x);
bytes+=x;len-=x;ds->buffer_pos+=x;
return bytes; // stop at end of package! (for correct timestamping)
}
Index: libaf/af_format.c
===================================================================
--- libaf/af_format.c (revision 23390)
+++ libaf/af_format.c (working copy)
@@ -287,7 +287,7 @@
if(c->bps != l->bps)
change_bps(c->audio,l->audio,len,c->bps,l->bps);
else
- memcpy(l->audio,c->audio,len*c->bps);
+ fast_memcpy(l->audio,c->audio,len*c->bps);
break;
}
}
Index: mencoder.c
===================================================================
--- mencoder.c (revision 23390)
+++ mencoder.c (working copy)
@@ -338,10 +338,10 @@
if(ret>0) sh_audio->a_out_buffer_len+=ret; else at_eof=1;
}
if(len>sh_audio->a_out_buffer_len) len=sh_audio->a_out_buffer_len;
- memcpy(buffer+size,sh_audio->a_out_buffer,len);
+ fast_memcpy(buffer+size,sh_audio->a_out_buffer,len);
sh_audio->a_out_buffer_len-=len; size+=len;
if(sh_audio->a_out_buffer_len>0)
- memcpy(sh_audio->a_out_buffer,&sh_audio->a_out_buffer[len],sh_audio->a_out_buffer_len);
+ fast_memcpy(sh_audio->a_out_buffer,&sh_audio->a_out_buffer[len],sh_audio->a_out_buffer_len);
}
return size;
}
@@ -771,7 +771,7 @@
if (!curfile) {
if (sh_video->bih) {
mux_v->bih=malloc(sh_video->bih->biSize);
- memcpy(mux_v->bih, sh_video->bih, sh_video->bih->biSize);
+ fast_memcpy(mux_v->bih, sh_video->bih, sh_video->bih->biSize);
}
else
{
@@ -941,7 +941,7 @@
}
if (sh_audio->wf){
mux_a->wf=malloc(sizeof(WAVEFORMATEX) + sh_audio->wf->cbSize);
- memcpy(mux_a->wf, sh_audio->wf, sizeof(WAVEFORMATEX) + sh_audio->wf->cbSize);
+ fast_memcpy(mux_a->wf, sh_audio->wf, sizeof(WAVEFORMATEX) + sh_audio->wf->cbSize);
if(!sh_audio->i_bps) sh_audio->i_bps=mux_a->wf->nAvgBytesPerSec;
} else {
mux_a->wf = malloc(sizeof(WAVEFORMATEX));
@@ -1222,7 +1222,7 @@
mux_a->wf->nAvgBytesPerSec=0.5f+(double)mux_a->size/mux_a->timer; // avg bps (VBR)
if(mux_a->buffer_len>=len){
mux_a->buffer_len-=len;
- memcpy(mux_a->buffer,mux_a->buffer+len,mux_a->buffer_len);
+ fast_memcpy(mux_a->buffer,mux_a->buffer+len,mux_a->buffer_len);
}
@@ -1573,9 +1573,9 @@
int i;
for(i = 0; i < height/2; i++) {
- memcpy(tmp, &src[i*width], width);
- memcpy(&dst[i * width], &src[(height - i) * width], width);
- memcpy(&dst[(height - i) * width], tmp, width);
+ fast_memcpy(tmp, &src[i*width], width);
+ fast_memcpy(&dst[i * width], &src[(height - i) * width], width);
+ fast_memcpy(&dst[(height - i) * width], tmp, width);
}
free(tmp);
More information about the MPlayer-dev-eng
mailing list