[Mplayer-cvslog] CVS: main/libfame ac_int.h,NONE,1.1 ac_mmx.h,NONE,1.1 dequantize_float.h,NONE,1.1 dequantize_mmx.h,NONE,1.1 fame_malloc.c,NONE,1.1 fame_malloc.h,NONE,1.1 fame_monitor.c,NONE,1.1 fame_monitor.h,NONE,1.1 fame_profile_stats.c,NONE,1.1 fame_profile_stats.h,NONE,1.1 fame_rate_1param.c,NONE,1.1 fame_rate_1param.h,NONE,1.1 fame_rate_simple.c,NONE,1.1 fame_rate_simple.h,NONE,1.1 half_sse.h,NONE,1.1 quantize_float.h,NONE,1.1 quantize_mmx.h,NONE,1.1 table_mmx_const.h,NONE,1.1 AUTHORS,1.1,1.2 Makefile,1.3,1.4 README,1.1,1.2 dct_float.h,1.1,1.2 dct_mmx.h,1.1,1.2 fame.c,1.1,1.2 fame.h,1.2,1.3 fame_bitbuffer.h,1.1,1.2 fame_decoder.h,1.1,1.2 fame_decoder_mpeg.c,1.1,1.2 fame_decoder_mpeg.h,1.1,1.2 fame_encoder.h,1.1,1.2 fame_encoder_mpeg.c,1.1,1.2 fame_encoder_mpeg.h,1.1,1.2 fame_motion.c,1.1,1.2 fame_motion.h,1.1,1.2 fame_motion_fourstep.c,1.1,1.2 fame_motion_none.c,1.1,1.2 fame_motion_pmvfast.c,1.2,1.3 fame_motion_pmvfast.h,1.1,1.2 fame_profile.h,1.1,1.2 fame_profile_mpeg.c,1.1,1.2 fame_profile_mpeg.h,1.1,1.2 fame_profile_mpeg1.c,1.1,1.2 fame_profile_mpeg4_shape.c,1.1,1.2 fame_profile_mpeg4_simple.c,1.1,1.2 fame_rate.c,1.1,1.2 fame_rate.h,1.1,1.2 fame_shape.c,1.1,1.2 fame_shape.h,1.1,1.2 fame_syntax.h,1.1,1.2 fame_syntax_mpeg1.c,1.1,1.2 fame_syntax_mpeg1.h,1.1,1.2 fame_syntax_mpeg4.c,1.1,1.2 fame_syntax_mpeg4.h,1.1,1.2 fame_version.h,1.1,1.2 fetch_float.h,1.1,1.2 fetch_mmx.h,1.1,1.2 half_int.h,1.1,1.2 half_mmx.h,1.1,1.2 idct_float.h,1.1,1.2 idct_mmx.h,1.1,1.2 mae_int.h,1.1,1.2 mae_mmx.h,1.1,1.2 mae_sse.h,1.1,1.2 pad_int.h,1.1,1.2 reconstruct_float.h,1.1,1.2 reconstruct_mmx.h,1.1,1.2 table_cae.h,1.1,1.2 table_cbp_mpeg1.h,1.1,1.2 table_cbp_mpeg4.h,1.1,1.2 table_clip_mpeg1.h,1.1,1.2 table_clip_mpeg4.h,1.1,1.2 table_dc_mpeg1.h,1.1,1.2 table_dc_mpeg4.h,1.1,1.2 table_mv.h,1.1,1.2 table_quant_mpeg1.h,1.1,1.2 table_quant_mpeg4.h,1.1,1.2 table_rlehuff_mpeg1.h,1.1,1.2 table_rlehuff_mpeg4.h,1.1,1.2 table_scale.h,1.1,1.2 table_zigzag_mpeg1.h,1.1,1.2 table_zigzag_mpeg4.h,1.1,1.2 dequantise_float.h,1.1,NONE dequantise_mmx.! h,1.1,NO

Arpi of Ize arpi at mplayerhq.hu
Sat Jun 1 22:23:14 CEST 2002


Update of /cvsroot/mplayer/main/libfame
In directory mail:/var/tmp.root/cvs-serv13910

Modified Files:
	AUTHORS Makefile README dct_float.h dct_mmx.h fame.c fame.h 
	fame_bitbuffer.h fame_decoder.h fame_decoder_mpeg.c 
	fame_decoder_mpeg.h fame_encoder.h fame_encoder_mpeg.c 
	fame_encoder_mpeg.h fame_motion.c fame_motion.h 
	fame_motion_fourstep.c fame_motion_none.c 
	fame_motion_pmvfast.c fame_motion_pmvfast.h fame_profile.h 
	fame_profile_mpeg.c fame_profile_mpeg.h fame_profile_mpeg1.c 
	fame_profile_mpeg4_shape.c fame_profile_mpeg4_simple.c 
	fame_rate.c fame_rate.h fame_shape.c fame_shape.h 
	fame_syntax.h fame_syntax_mpeg1.c fame_syntax_mpeg1.h 
	fame_syntax_mpeg4.c fame_syntax_mpeg4.h fame_version.h 
	fetch_float.h fetch_mmx.h half_int.h half_mmx.h idct_float.h 
	idct_mmx.h mae_int.h mae_mmx.h mae_sse.h pad_int.h 
	reconstruct_float.h reconstruct_mmx.h table_cae.h 
	table_cbp_mpeg1.h table_cbp_mpeg4.h table_clip_mpeg1.h 
	table_clip_mpeg4.h table_dc_mpeg1.h table_dc_mpeg4.h 
	table_mv.h table_quant_mpeg1.h table_quant_mpeg4.h 
	table_rlehuff_mpeg1.h table_rlehuff_mpeg4.h table_scale.h 
	table_zigzag_mpeg1.h table_zigzag_mpeg4.h 
Added Files:
	ac_int.h ac_mmx.h dequantize_float.h dequantize_mmx.h 
	fame_malloc.c fame_malloc.h fame_monitor.c fame_monitor.h 
	fame_profile_stats.c fame_profile_stats.h fame_rate_1param.c 
	fame_rate_1param.h fame_rate_simple.c fame_rate_simple.h 
	half_sse.h quantize_float.h quantize_mmx.h table_mmx_const.h 
Removed Files:
	dequantise_float.h dequantise_mmx.h quantise_float.h 
	quantise_mmx.h 
Log Message:
libfame updated to version 0.9.0 (from 0.8.9)


--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/* prediction macros */
#define FASTCLEAR16(p) memset((p), 0, 32);
#define FASTCOPY16(d, s) memcpy((d), (s), 32);
/* TODO: AC rescaling according to qscale */
#define FASTSCALE8H(d, s) memcpy((d), (s), 32);
#define FASTSCALE8V(d, s) memcpy((d), (s), 32);

#define FASTAC8H(d, b) {				\
  memcpy((d), (b), 16);                                 \
  ((short int *)(d))[9] = ((short int *)(b))[8];	\
  ((short int *)(d))[10] = ((short int *)(b))[16];	\
  ((short int *)(d))[11] = ((short int *)(b))[24];	\
  ((short int *)(d))[12] = ((short int *)(b))[32];	\
  ((short int *)(d))[13] = ((short int *)(b))[40];	\
  ((short int *)(d))[14] = ((short int *)(b))[48];	\
  ((short int *)(d))[15] = ((short int *)(b))[56];	\
}

#define FASTAC8V(d, b) {				\
  memcpy((d), (b), 16);                                 \
  ((short int *)(d))[9] = ((short int *)(b))[8];	\
  ((short int *)(d))[10] = ((short int *)(b))[16];	\
  ((short int *)(d))[11] = ((short int *)(b))[24];	\
  ((short int *)(d))[12] = ((short int *)(b))[32];	\
  ((short int *)(d))[13] = ((short int *)(b))[40];	\
  ((short int *)(d))[14] = ((short int *)(b))[48];	\
  ((short int *)(d))[15] = ((short int *)(b))[56];	\
}

#define FASTSAD8H(v, p, b) {			\
  v += abs(b[1]) - abs(b[1] - p[1]) +		\
       abs(b[2]) - abs(b[2] - p[2]) +		\
       abs(b[3]) - abs(b[3] - p[3]) +		\
       abs(b[4]) - abs(b[4] - p[4]) +		\
       abs(b[5]) - abs(b[5] - p[5]) +		\
       abs(b[6]) - abs(b[6] - p[6]) +		\
       abs(b[7]) - abs(b[7] - p[7]);		\
}
#define FASTSAD8V(v, p, b) {			\
  v += abs(b[9]) - abs(b[9] - p[9]) +		\
       abs(b[10]) - abs(b[10] - p[10]) +	\
       abs(b[11]) - abs(b[11] - p[11]) +	\
       abs(b[12]) - abs(b[12] - p[12]) +	\
       abs(b[13]) - abs(b[13] - p[13]) +	\
       abs(b[14]) - abs(b[14] - p[14]) +	\
       abs(b[15]) - abs(b[15] - p[15]);		\
}
#define COPY8H(b, p)				\
{						\
  b[1] = p[1];					\
  b[2] = p[2];					\
  b[3] = p[3];					\
  b[4] = p[4];					\
  b[5] = p[5];					\
  b[6] = p[6];					\
  b[7] = p[7];                                  \
}
#define COPY8V(b, p)				\
{						\
  b[8] = p[9];					\
  b[16] = p[10];				\
  b[24] = p[11];				\
  b[32] = p[12];				\
  b[40] = p[13];				\
  b[48] = p[14];				\
  b[56] = p[15];                                \
}
#define FASTDIFF8H(b, p) {			\
  b[0] -= p[0];					\
  b[1] -= p[1];					\
  b[2] -= p[2];					\
  b[3] -= p[3];					\
  b[4] -= p[4];					\
  b[5] -= p[5];					\
  b[6] -= p[6];					\
  b[7] -= p[7];					\
  /* don't need 8 */				\
  b[9] -= p[9];					\
  b[10] -= p[10];				\
  b[11] -= p[11];				\
  b[12] -= p[12];				\
  b[13] -= p[13];				\
  b[14] -= p[14];				\
  b[15] -= p[15];				\
}
#define FASTSUM8H(b, p) {			\
  b[0] += p[0];					\
  b[1] += p[1];					\
  b[2] += p[2];					\
  b[3] += p[3];					\
  b[4] += p[4];					\
  b[5] += p[5];					\
  b[6] += p[6];					\
  b[7] += p[7];					\
  /* don't need 8 */				\
  b[9] += p[9];					\
  b[10] += p[10];				\
  b[11] += p[11];				\
  b[12] += p[12];				\
  b[13] += p[13];				\
  b[14] += p[14];				\
  b[15] += p[15];				\
}

#define FASTDIFF8V(b, p) {			\
  b[0] -= p[0];					\
  b[1] -= p[1];					\
  b[2] -= p[2];					\
  b[3] -= p[3];					\
  b[4] -= p[4];					\
  b[5] -= p[5];					\
  b[6] -= p[6];					\
  b[7] -= p[7];					\
  /* don't need 8 */				\
  b[9] -= p[9];					\
  b[10] -= p[10];				\
  b[11] -= p[11];				\
  b[12] -= p[12];				\
  b[13] -= p[13];				\
  b[14] -= p[14];				\
  b[15] -= p[15];				\
}
#define FASTSUM8V(b, p) {			\
  b[0] += p[0];					\
  b[1] += p[1];					\
  b[2] += p[2];					\
  b[3] += p[3];					\
  b[4] += p[4];					\
  b[5] += p[5];					\
  b[6] += p[6];					\
  b[7] += p[7];					\
  /* don't need 8 */				\
  b[9] += p[9];					\
  b[10] += p[10];				\
  b[11] += p[11];				\
  b[12] += p[12];				\
  b[13] += p[13];				\
  b[14] += p[14];				\
  b[15] += p[15];				\
}

--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/* Warning: blocks are transposed with MMX */
/* V MMX functions are functionnaly equivalent to H integer functions */
/* H MMX functions are functionnaly equivalent to V integer functions */

/* prediction macros */
#define FASTCLEAR16(p) memset((p), 0, 32);
#define FASTCOPY16(d, s) memcpy((d), (s), 32);
/* TODO: AC rescaling according to qscale */
#define FASTSCALE8H(d, s) memcpy((d), (s), 32);
#define FASTSCALE8V(d, s) memcpy((d), (s), 32);

#define FASTAC8V(d, b) {			\
 asm("movq (%0), %%mm0\n"			\
     "movq 8(%0), %%mm1\n"			\
     "movq %%mm0, (%1)\n"			\
     "movq %%mm1, 8(%1)\n"			\
      : "=r"(b), "=r"(d)                        \
      : "0"(b), "1"(d)                          \
      : "memory"); 				\
  ((short int *)(d))[9] = ((short int *)(b))[8];	\
  ((short int *)(d))[10] = ((short int *)(b))[16];	\
  ((short int *)(d))[11] = ((short int *)(b))[24];	\
  ((short int *)(d))[12] = ((short int *)(b))[32];	\
  ((short int *)(d))[13] = ((short int *)(b))[40];	\
  ((short int *)(d))[14] = ((short int *)(b))[48];	\
  ((short int *)(d))[15] = ((short int *)(b))[56];	\
}

#define FASTAC8H(d, b) {			\
 asm("movq (%0), %%mm0\n"			\
     "movq 8(%0), %%mm1\n"			\
     "movq %%mm0, (%1)\n"			\
     "movq %%mm1, 8(%1)\n"			\
      : "=r"(b), "=r"(d)                        \
      : "0"(b), "1"(d)                          \
      : "memory"); 				\
  ((short int *)(d))[9] = ((short int *)(b))[8];	\
  ((short int *)(d))[10] = ((short int *)(b))[16];	\
  ((short int *)(d))[11] = ((short int *)(b))[24];	\
  ((short int *)(d))[12] = ((short int *)(b))[32];	\
  ((short int *)(d))[13] = ((short int *)(b))[40];	\
  ((short int *)(d))[14] = ((short int *)(b))[48];	\
  ((short int *)(d))[15] = ((short int *)(b))[56];	\
}

/* blocks are transposed! */
#define FASTSAD8H(v, p, b) {			\
  long retval;		                        \
						\
  asm("movq 16(%0), %%mm0\n"			\
      "movq 16(%1), %%mm1\n"			\
      "movq 24(%0), %%mm2\n"			\
      "movq 24(%1), %%mm3\n"			\
      "pxor %%mm4, %%mm4\n"			\
      "pxor %%mm5, %%mm5\n"			\
      "pxor %%mm6, %%mm6\n"			\
      "pxor %%mm7, %%mm7\n"			\
      "psubw %%mm0, %%mm1\n"			\
      "psubw %%mm2, %%mm3\n"			\
      "pcmpgtw %%mm0, %%mm4\n"			\
      "pcmpgtw %%mm1, %%mm5\n"			\
      "pcmpgtw %%mm2, %%mm6\n"			\
      "pcmpgtw %%mm3, %%mm7\n"			\
      "pxor  %%mm4, %%mm0\n"			\
      "pxor  %%mm5, %%mm1\n"			\
      "pxor  %%mm6, %%mm2\n"			\
      "pxor  %%mm7, %%mm3\n"			\
      "psubw %%mm4, %%mm0\n"			\
      "psubw %%mm5, %%mm1\n"			\
      "psubw %%mm6, %%mm2\n"			\
      "psubw %%mm7, %%mm3\n"			\
      "psubw %%mm1, %%mm0\n"			\
      "psubw %%mm3, %%mm2\n"			\
      "paddw %%mm2, %%mm0\n"			\
      "movq %%mm0, %%mm1\n"			\
      "psrlq $0x20, %%mm1\n"			\
      "paddw %%mm1, %%mm0\n"			\
      "movq %%mm0, %%mm1\n"			\
      "psrlq $0x10, %%mm1\n"			\
      "paddw %%mm1, %%mm0\n"			\
      "movd %%mm0, %2\n"			\
      : "=r"(b), "=r"(p), "=r"(retval)		\
      : "0"(b), "1"(p), "2"(0)			\
      : "memory"); 				\
  v += (signed short) retval;                   \
}
#define FASTSAD8V(v, p, b) {			\
  long retval;		                        \
						\
  asm("movq 2(%0), %%mm0\n"			\
      "movq 2(%1), %%mm1\n"			\
      "movq 10(%0), %%mm2\n"			\
      "movq 10(%1), %%mm3\n"			\
      "pxor %%mm4, %%mm4\n"			\
      "pxor %%mm5, %%mm5\n"			\
      "pxor %%mm6, %%mm6\n"			\
      "pxor %%mm7, %%mm7\n"			\
      "psubw %%mm0, %%mm1\n"			\
      "psubw %%mm2, %%mm3\n"			\
      "pcmpgtw %%mm0, %%mm4\n"			\
      "pcmpgtw %%mm1, %%mm5\n"			\
      "pcmpgtw %%mm2, %%mm6\n"			\
      "pcmpgtw %%mm3, %%mm7\n"			\
      "pxor  %%mm4, %%mm0\n"			\
      "pxor  %%mm5, %%mm1\n"			\
      "pxor  %%mm6, %%mm2\n"			\
      "pxor  %%mm7, %%mm3\n"			\
      "psubw %%mm4, %%mm0\n"			\
      "psubw %%mm5, %%mm1\n"			\
      "psubw %%mm6, %%mm2\n"			\
      "psubw %%mm7, %%mm3\n"			\
      "psubw %%mm1, %%mm0\n"			\
      "psubw %%mm3, %%mm2\n"			\
      "paddw %%mm2, %%mm0\n"			\
      "movq %%mm0, %%mm1\n"			\
      "psrlq $0x20, %%mm1\n"			\
      "paddw %%mm1, %%mm0\n"			\
      "movq %%mm0, %%mm1\n"			\
      "psrlq $0x10, %%mm1\n"			\
      "paddw %%mm1, %%mm0\n"			\
      "movd %%mm0, %2\n"			\
      : "=r"(b), "=r"(p), "=r"(retval)		\
      : "0"(b), "1"(p), "2"(0)			\
      : "memory"); 				\
  v += (signed short) retval;                   \
}
#define COPY8H(b, p)				\
{						\
  b[8] = p[9];					\
  b[16] = p[10];				\
  b[24] = p[11];				\
  b[32] = p[12];				\
  b[40] = p[13];				\
  b[48] = p[14];				\
  b[56] = p[15];                                \
}
#define COPY8V(b, p)				\
{						\
  b[1] = p[1];					\
  b[2] = p[2];					\
  b[3] = p[3];					\
  b[4] = p[4];					\
  b[5] = p[5];					\
  b[6] = p[6];					\
  b[7] = p[7];                                  \
}

#define FASTDIFF8V(b, p) {			\
 asm("movq   (%0), %%mm0\n"			\
     "movq  8(%0), %%mm1\n"			\
     "movq 16(%0), %%mm2\n"			\
     "movq 24(%0), %%mm3\n"			\
     "psubw   (%1), %%mm0\n"			\
     "psubw  8(%1), %%mm1\n"			\
     "psubw 16(%1), %%mm2\n"			\
     "psubw 24(%1), %%mm3\n"			\
     "movq %%mm0,   (%0)\n"			\
     "movq %%mm1,  8(%0)\n"			\
     "movq %%mm2, 16(%0)\n"			\
     "movq %%mm3, 24(%0)\n"			\
      : "=r"(b), "=r"(p)                        \
      : "0"(b), "1"(p)                          \
      : "memory"); 				\
}

#define FASTSUM8V(b, p) {			\
 asm("movq   (%0), %%mm0\n"			\
     "movq  8(%0), %%mm1\n"			\
     "movq 16(%0), %%mm2\n"			\
     "movq 24(%0), %%mm3\n"			\
     "paddw   (%1), %%mm0\n"			\
     "paddw  8(%1), %%mm1\n"			\
     "paddw 16(%1), %%mm2\n"			\
     "paddw 24(%1), %%mm3\n"			\
     "movq %%mm0,   (%0)\n"			\
     "movq %%mm1,  8(%0)\n"			\
     "movq %%mm2, 16(%0)\n"			\
     "movq %%mm3, 24(%0)\n"			\
      : "=r"(b), "=r"(p)                        \
      : "0"(b), "1"(p)                          \
      : "memory"); 				\
}


#define FASTDIFF8H(b, p) {			\
 asm("movq   (%0), %%mm0\n"			\
     "movq  8(%0), %%mm1\n"			\
     "movq 16(%0), %%mm2\n"			\
     "movq 24(%0), %%mm3\n"			\
     "psubw   (%1), %%mm0\n"			\
     "psubw  8(%1), %%mm1\n"			\
     "psubw 16(%1), %%mm2\n"			\
     "psubw 24(%1), %%mm3\n"			\
     "movq %%mm0,   (%0)\n"			\
     "movq %%mm1,  8(%0)\n"			\
     "movq %%mm2, 16(%0)\n"			\
     "movq %%mm3, 24(%0)\n"			\
      : "=r"(b), "=r"(p)                        \
      : "0"(b), "1"(p)                          \
      : "memory"); 				\
}

#define FASTSUM8H(b, p) {			\
 asm("movq   (%0), %%mm0\n"			\
     "movq  8(%0), %%mm1\n"			\
     "movq 16(%0), %%mm2\n"			\
     "movq 24(%0), %%mm3\n"			\
     "paddw   (%1), %%mm0\n"			\
     "paddw  8(%1), %%mm1\n"			\
     "paddw 16(%1), %%mm2\n"			\
     "paddw 24(%1), %%mm3\n"			\
     "movq %%mm0,   (%0)\n"			\
     "movq %%mm1,  8(%0)\n"			\
     "movq %%mm2, 16(%0)\n"			\
     "movq %%mm3, 24(%0)\n"			\
      : "=r"(b), "=r"(p)                        \
      : "0"(b), "1"(p)                          \
      : "memory"); 				\
}

--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/********************** floating point dequantisation **************************/

/*  dequantize                                                               */
/*                                                                           */
/*  Description:                                                             */
/*    Dequantize and premultiply a block                                     */
/*                                                                           */
/*  Arguments:                                                               */
/*    short *block: a 8x8 block of integer                                   */
/*    dct_t *cache: the resulting 8x8 prescaled block of dct_t               */
/*    dct_t *dqmatrix: 8x8 dequantisation matrix                             */
/*    dct_t *psmatrix: 8x8 prescale coefficients                             */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */
/*                                                                           */
/*  Notes :                                                                  */
/*    Unfortunately mismatch control is different in MPEG-1 and MPEG-4 :(    */

static void inline dequantize_intra_global(short *block,
					   dct_t *cache,
					   dct_t *dqmatrix,
					   dct_t *psmatrix,
					   dct_t *mismatch /* not used */)
{
  int i, s, c;

  /* dequantize */
  c = (int) (block[0] * dqmatrix[0]);
  s = c;
  cache[0] = c;

  for(i = 1; i < 64; i++) {
    c = ((int) (block[i] * dqmatrix[i])) / 8;
    s ^= c;
    cache[i] = c;
  }

  /* mismatch control */
  if (!(s & 1)) {
    c ^= 1;
    cache[63] = c;
  }

  /* prescale for iDCT */
  for(i = 0; i < 64; i++)
    cache[i] *= psmatrix[i];
}

static void inline dequantize_intra_local(short *block,
					  dct_t *cache,
					  dct_t *dqmatrix,
					  dct_t *psmatrix,
					  dct_t *mismatch /* not used */)
{
  int i, c;

  /* dequantize */
  c = (int) (block[0] * dqmatrix[0]);
  cache[0] = c;

  for(i = 1; i < 64; i++) {
    c = ((int) (block[i] * dqmatrix[i])) / 8;
    /* mismatch control */
    if(block[i] > 0)
      c = (c - 1) | 1;
    else
      c |= 1;
    cache[i] = c;
  }

  /* prescale for iDCT */
  for(i = 0; i < 64; i++)
    cache[i] *= psmatrix[i];
}

/*  dequantize_inter                                                         */
/*                                                                           */
/*  Description:                                                             */
/*    Dequantize and premultiply a block for inter blocks                    */
/*    These two steps have to be done separetely for inter blocks :(         */
/*                                                                           */
/*  Arguments:                                                               */
/*    short *block: a 8x8 block of integer                                   */
/*    dct_t *cache: the resulting 8x8 prescaled block of dct_t               */
/*    dct_t *dqmatrix: 8x8 dequantisation matrix                             */
/*    dct_t *psmatrix: 8x8 prescale coefficients                             */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */
/*                                                                           */
/*  Notes :                                                                  */
/*    Unfortunately mismatch control is different in MPEG-1 and MPEG-4 :(    */

static void inline dequantize_inter_global(short *block,
					   dct_t *cache,
					   dct_t *dqmatrix,
					   dct_t *psmatrix,
					   dct_t *mismatch /* not used */)
{
  int i, s, c;

 /* dequantize */
  s = 0;
  for(i = 0; i < 64; i++) {
    c = 0;

    if(block[i] > 0)
      c = ((int) ((2*block[i]+1) * dqmatrix[i])) / 16;
    if(block[i] < 0)
      c = ((int) ((2*block[i]-1) * dqmatrix[i])) / 16;
    s ^= c;
    cache[i] = c;
  }

  /* mismatch control */
  if (!(s & 1)) {
    c ^= 1;
    cache[63] = c;
  }

  /* prescale for iDCT */
  for(i = 0; i < 64; i++)
    cache[i] *= psmatrix[i];
}

static void inline dequantize_inter_local(short *block,
					  dct_t *cache,
					  dct_t *dqmatrix,
					  dct_t *psmatrix,
					  dct_t *mismatch /* not used */)
{
  int i, c;

  /* dequantize */
  for(i = 0; i < 64; i++) {
    c = 0;

    if(block[i] > 0) {
      c = ((int) ((2*block[i]+1) * dqmatrix[i])) / 16;
      /* mismatch control */
      c = (c - 1) | 1;
    }
    if(block[i] < 0) {
      c = ((int) ((2*block[i]-1) * dqmatrix[i])) / 16;
      /* mismatch control */
      c |= 1;
    }
    cache[i] = c;
  }

  /* prescale for iDCT */
  for(i = 0; i < 64; i++)
    cache[i] *= psmatrix[i];
}


--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/********************** MMX accelerated dequantisation *************************/

#define DEQUANTIZE_PRESCALE_STEP(x)			     \
    "movq %%mm0, %%mm4\n"             /* mm4 = mm0 */			 \
    "movq %%mm1, %%mm5\n"             /* mm5 = mm1 */			 \
    "pmulhw 0x" #x "0(%3), %%mm0\n"   /* premultiply for iDCT */	 \
    "pmulhw 0x" #x "8(%3), %%mm1\n"   /* premultiply for iDCT */	 \
    "pmullw 0x" #x "0(%3), %%mm4\n"   /* premultiply for iDCT */	 \
    "pmullw 0x" #x "8(%3), %%mm5\n"   /* premultiply for iDCT */	 \
    "psrlw $0x0b, %%mm4\n"            /* keep 5 bits */ \
    "psrlw $0x0b, %%mm5\n"            /* keep 5 bits */ \
    "paddw " ASMSYM "_mmx_1, %%mm4\n"   /* + 1 */               \
    "paddw " ASMSYM "_mmx_1, %%mm5\n"   /* + 1 */               \
    "psrlw $0x01, %%mm4\n"            /* keep 4 bits rounded */ \
    "psrlw $0x01, %%mm5\n"            /* keep 4 bits rounded */ \
    "psllw $0x04, %%mm0\n"            /* multiply by 16 for iDCT */	 \
    "psllw $0x04, %%mm1\n"            /* multiply by 16 for iDCT */	 \
    "paddsw %%mm4, %%mm0\n"           /* add least significant part */	 \
    "paddsw %%mm5, %%mm1\n"           /* add least significant part */	 \
    "movq %%mm0, 0x" #x "0(%2)\n"     /* store in cache */		 \
    "movq %%mm1, 0x" #x "8(%2)\n"     /* store in cache */

#define DEQUANTIZE_GLOBAL_MISMATCH_CONTROL()				\
	"movq %%mm6, %%mm5\n"        /* copy mismatch */		\
	"psllq $0x20, %%mm5\n"       /* mm5 = higher 32 bits */		\
	"pxor %%mm6, %%mm5\n"        /* sum mismatch */			\
	"movq %%mm5, %%mm4\n"        /* copy mismatch */		\
	"psllq $0x10, %%mm5\n"       /* mm5 =  higher 16 bits */	\
	"movq %%mm1, %%mm6\n"        /* copy last line */		\
	"pxor %%mm5, %%mm4\n"        /* sum mismatch */			\
	"movq %%mm7, %%mm3\n"        /* mm3 = mm7 */			\
	"pcmpeqw %%mm7, %%mm3\n"     /* mm3 = 0xffffffffffffffff */	\
	"psllq $0x3f, %%mm3\n"       /* mm3 = 0x8000000000000000 */	\
	"psrlq $0x0f, %%mm3\n"       /* mm3 = 0x0001000000000000 */	\
	"pxor %%mm3, %%mm6\n"        /* temp last coeff ^= 1 */		\
	"pand %%mm3, %%mm4\n"        /* keep only lsb of mismatch */	\
	"pxor %%mm4, %%mm6\n"        /* temp last coeff  ^= !(mismatch&1) */ \
	"psubsw %%mm1, %%mm6\n"      /* mismatch = temp last coeff - last coeff */ \
	"psrlq $0x30, %%mm6\n"       /* retrieve mismatch in lower word */

static void inline dequantize_intra_global(dct_t *block,
					   dct_t *cache,
					   dct_t *dqmatrix,
					   dct_t *psmatrix,
					   dct_t *mismatch)
{
  unsigned int m;

#define DEQUANTIZE_INTRA_GLOBAL_STEP(x)  				 \
	"movq 0x" #x "0(%0), %%mm0\n"     /* load 1st line 1st half */	\
	"movq 0x" #x "8(%0), %%mm1\n"     /* load 1st line 2nd half */	\
	"movq %%mm0, %%mm2\n"        /* mm2 = 1st line 1st half */	\
	"movq %%mm1, %%mm3\n"        /* mm3 = 1st line 1st half */	\
	"psraw $0x0f, %%mm2\n"       /* mm2 = (sign(mm0) - 1) / 2 */	\
	"psraw $0x0f, %%mm3\n"       /* mm3 = (sign(mm1) - 1) / 2 */	\
	"pmullw 0x" #x "0(%1), %%mm0\n"   /* mm0=[0-3]*Q */		\
	"pmullw 0x" #x "8(%1), %%mm1\n"   /* mm1=[4-7]*Q */		\
    "psllw $0x03, %%mm2\n"            /* sign adjust before shift */	 \
    "psllw $0x03, %%mm3\n"            /* sign adjust before shift */	 \
    "psubw %%mm2, %%mm0\n"            /* sign adjust before shift */	 \
    "psubw %%mm3, %%mm1\n"            /* sign adjust before shift */	 \
    "psraw $0x03, %%mm2\n"            /* sign adjust before shift */	 \
    "psraw $0x03, %%mm3\n"            /* sign adjust before shift */	 \
    "paddw %%mm2, %%mm0\n"            /* sign adjust before shift */	 \
    "paddw %%mm3, %%mm1\n"            /* sign adjust before shift */	 \
	"psraw $0x03, %%mm0\n"       /* divide by 8 */			\
	"psraw $0x03, %%mm1\n"       /* divide by 8 */			\
	"pxor %%mm0, %%mm6\n"        /* accumulate mismatch */		\
	"pxor %%mm1, %%mm6\n"        /* accumulate mismatch */


  asm volatile ("pxor %%mm7, %%mm7\n"        /* mm7 = 0 */
		"pxor %%mm6, %%mm6\n"        /* mm6 = mismatch accumulator */
		DEQUANTIZE_INTRA_GLOBAL_STEP(0)
		DEQUANTIZE_PRESCALE_STEP(0)
		DEQUANTIZE_INTRA_GLOBAL_STEP(1)
		DEQUANTIZE_PRESCALE_STEP(1)
		DEQUANTIZE_INTRA_GLOBAL_STEP(2)
		DEQUANTIZE_PRESCALE_STEP(2)
		DEQUANTIZE_INTRA_GLOBAL_STEP(3)
		DEQUANTIZE_PRESCALE_STEP(3)
		DEQUANTIZE_INTRA_GLOBAL_STEP(4)
		DEQUANTIZE_PRESCALE_STEP(4)
		DEQUANTIZE_INTRA_GLOBAL_STEP(5)
		DEQUANTIZE_PRESCALE_STEP(5)
		DEQUANTIZE_INTRA_GLOBAL_STEP(6)
		DEQUANTIZE_PRESCALE_STEP(6)
		DEQUANTIZE_INTRA_GLOBAL_STEP(7)
		DEQUANTIZE_GLOBAL_MISMATCH_CONTROL()
		DEQUANTIZE_PRESCALE_STEP(7)
	        : "=r"(block), "=r"(dqmatrix), "=r"(cache), "=r"(psmatrix)
		: "0"(block), "1"(dqmatrix), "2"(cache), "3"(psmatrix)
		: "memory");

  asm volatile("movd %%mm6, %0\n"           /* export mismatch */
	       : "=r"(m)
	       : /* no input */
	       );
  *mismatch = (dct_t) (m<<12);
}

static void inline dequantize_intra_local(dct_t *block,
					  dct_t *cache,
					  dct_t *dqmatrix,
					  dct_t *psmatrix,
					  dct_t *mismatch /* not used */)
{
  /* coeff[i] = (2*level[i]*qscale*matrix[i])/16 */
  /* then coeff[i] = { coeff[i] + 1, if coeff[i] < 0 and coeff[i] is even */
  /*                 { coeff[i] - 1, if coeff[i] > 0 and coeff[i] is even */
  /*                 { coeff[i] otherwise                                 */
  /* implementation is                                                    */
  /* coeff[i] = (level[i]*qscale*matrix[i]+(level[i]<0)?7:0)>>3           */
  /* coeff[i] = (coeff[i]-(coeff[i]>0):1?0)|1                             */

#define DEQUANTIZE_INTRA_LOCAL_STEP(x)  				 \
    "movq 0x" #x "0(%0), %%mm0\n"     /* load 1st line 1st half */       \
    "movq 0x" #x "8(%0), %%mm1\n"     /* load 1st line 2nd half */	 \
    "movq %%mm0, %%mm2\n"             /* mm2 = 1st line 1st half */	 \
    "movq %%mm1, %%mm3\n"             /* mm3 = 1st line 2nd half */	 \
    "psraw $0x0f, %%mm2\n"            /* mm2 = (sign(mm0) - 1) / 2 */	 \
    "psraw $0x0f, %%mm3\n"            /* mm3 = (sign(mm1) - 1) / 2 */	 \
    "pmullw 0x" #x "0(%1), %%mm0\n"   /* mm0=[0-3]*Q */		         \
    "pmullw 0x" #x "8(%1), %%mm1\n"   /* mm1=[4-7]*Q */		         \
    "movq %%mm0, %%mm4\n"             /* mm4 = mm0 */       		 \
    "movq %%mm1, %%mm5\n"             /* mm5 = mm1 */			 \
    "pcmpeqw %%mm7, %%mm4\n"          /* mm4[0-3]=0xFF if mm4[0-3]==0 */ \
    "pcmpeqw %%mm7, %%mm5\n"          /* mm5[0-3]=0xFF if mm5[0-3]==0 */ \
    "pcmpeqw %%mm7, %%mm4\n"          /* mm4[0-3]=0xFF if mm0[0-3]!=0 */ \
    "pcmpeqw %%mm7, %%mm5\n"          /* mm5[0-3]=0xFF if mm1[0-3]!=0 */ \
    "psllw $0x03, %%mm2\n"            /* sign adjust before shift */	 \
    "psllw $0x03, %%mm3\n"            /* sign adjust before shift */	 \
    "psubw %%mm2, %%mm0\n"            /* sign adjust before shift */	 \
    "psubw %%mm3, %%mm1\n"            /* sign adjust before shift */	 \
    "psraw $0x03, %%mm2\n"            /* sign adjust before shift */	 \
    "psraw $0x03, %%mm3\n"            /* sign adjust before shift */	 \
    "paddw %%mm2, %%mm0\n"            /* sign adjust before shift */	 \
    "paddw %%mm3, %%mm1\n"            /* sign adjust before shift */	 \
    "psraw $0x03, %%mm0\n"            /* divide by 8 */                  \
    "psraw $0x03, %%mm1\n"            /* divide by 8 */			 \
    "pcmpeqw %%mm7, %%mm2\n"          /* invert sign */                  \
    "pcmpeqw %%mm7, %%mm3\n"          /* invert sign */                  \
    "paddw %%mm2, %%mm0\n"            /* sub 1 if >0 */                  \
    "paddw %%mm3, %%mm1\n"            /* sub 1 if >0 */                  \
    "por " ASMSYM "_mmx_1, %%mm0\n"   /* or 1 */               \
    "por " ASMSYM "_mmx_1, %%mm1\n"   /* or 1 */              \
    "pand %%mm4, %%mm0\n"             /* [0-3]=0 if [0-3] was zero */	 \
    "pand %%mm5, %%mm1\n"             /* [4-7]=0 if [4-7] was zero */

  asm volatile ("pxor %%mm7, %%mm7\n"        /* mm7 = 0 */
		"pxor %%mm6, %%mm6\n"        /* mm6 = mismatch accumulator */
		DEQUANTIZE_INTRA_LOCAL_STEP(0)
		DEQUANTIZE_PRESCALE_STEP(0)
		DEQUANTIZE_INTRA_LOCAL_STEP(1)
		DEQUANTIZE_PRESCALE_STEP(1)
		DEQUANTIZE_INTRA_LOCAL_STEP(2)
		DEQUANTIZE_PRESCALE_STEP(2)
		DEQUANTIZE_INTRA_LOCAL_STEP(3)
		DEQUANTIZE_PRESCALE_STEP(3)
		DEQUANTIZE_INTRA_LOCAL_STEP(4)
		DEQUANTIZE_PRESCALE_STEP(4)
		DEQUANTIZE_INTRA_LOCAL_STEP(5)
		DEQUANTIZE_PRESCALE_STEP(5)
		DEQUANTIZE_INTRA_LOCAL_STEP(6)
		DEQUANTIZE_PRESCALE_STEP(6)
		DEQUANTIZE_INTRA_LOCAL_STEP(7)
		DEQUANTIZE_PRESCALE_STEP(7)
	        : "=r"(block), "=r"(dqmatrix), "=r"(cache), "=r"(psmatrix)
		: "0"(block), "1"(dqmatrix), "2"(cache), "3"(psmatrix)
		: "memory");
}

static void inline dequantize_inter_global(dct_t *block,
					   dct_t *cache,
					   dct_t *dqmatrix,
					   dct_t *psmatrix,
					   dct_t *mismatch)
{
  unsigned int m;

#define DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(x)				     \
	"movq 0x" #x "0(%0), %%mm4\n"     /* load 1st line 1st half */	     \
	"pxor %%mm2, %%mm2\n"        /* mm2 = 1st line 1st half */	     \
        "movq %%mm4, %%mm0\n" \
	"movq 0x" #x "8(%0), %%mm5\n"     /* load 1st line 2nd half */	     \
	"pxor %%mm3, %%mm3\n"        /* mm3 = 1st line 1st half */	     \
        "movq %%mm5, %%mm1\n" \
	"psllw $1, %%mm0\n"            /* mm0 = 2*mm0 */	     \
	"pcmpgtw %%mm4, %%mm2\n"       /* mm2 = (mm0<0)?0xffff:0x0000 */     \
	"psllw $1, %%mm1\n"            /* mm1 = 2*mm1 */	     \
	"pcmpgtw %%mm5, %%mm3\n"       /* mm3 = (mm1<0)?0xffff:0x0000 */     \
	"pxor %%mm2, %%mm0\n"          /* mm0 = 2*|mm0|-(mm0<0)*/	     \
	"pxor %%mm3, %%mm1\n"          /* mm1 = 2*|mm1|-(mm1<0)*/	     \
	"pcmpeqw %%mm7, %%mm4\n"       /* mm4 = (mm0==0)?0xffff:0x0000 */     \
	"pcmpeqw %%mm7, %%mm5\n"       /* mm5 = (mm1==0)?0xffff:0x0000 */     \
	"psubsw %%mm2, %%mm0\n"        /* mm0 = 2*|mm0| */	     \
	"psubsw %%mm3, %%mm1\n"        /* mm1 = 2*|mm1| */	     \
	"pcmpeqw %%mm7, %%mm4\n"       /* mm4 = (mm0==0)?0x0000:0xffff */     \
	"pcmpeqw %%mm7, %%mm5\n"       /* mm5 = (mm1==0)?0x0000:0xffff */     \
	"psubw %%mm4, %%mm0\n"         /* mm0 = 2*|mm0|+(mm0!=0) */     \
	"psubw %%mm5, %%mm1\n"         /* mm1 = 2*|mm0|+(mm0!=0) */     \
	"pmullw 0x" #x "0(%1), %%mm0\n" /* mm0=(2*|mm0|+1)*Q */ \
	"pmullw 0x" #x "8(%1), %%mm1\n" /* mm1=(2*|mm0|+1)*Q */ \
	"psraw $0x04, %%mm0\n"       /* divide by 16 */			     \
	"psraw $0x04, %%mm1\n"       /* divide by 16 */			     \
	"pxor %%mm2, %%mm0\n"       /* mm0 =(2*|mm0|+1)*Q*sign(mm0)-(mm0<0)*/ \
        "pxor %%mm3, %%mm1\n"       /* mm1 =(2*|mm1|+1)*Q*sign(mm1)-(mm1<0)*/ \
	"psubsw %%mm2, %%mm0\n"        /* mm0 =(2*|mm0|+1)*Q*sign(mm0) */     \
	"psubsw %%mm3, %%mm1\n"        /* mm1 =(2*|mm1|+1)*Q*sign(mm1) */     \
	"pxor %%mm0, %%mm6\n"        /* accumulate mismatch */		     \
	"pxor %%mm1, %%mm6\n"        /* accumulate mismatch */


  asm volatile ("pxor %%mm7, %%mm7\n"        /* mm7 = 0 */
		"pxor %%mm6, %%mm6\n"        /* mm6 = mismatch accumulator */
		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(0)
		DEQUANTIZE_PRESCALE_STEP(0)
		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(1)
		DEQUANTIZE_PRESCALE_STEP(1)
		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(2)
		DEQUANTIZE_PRESCALE_STEP(2)
		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(3)
		DEQUANTIZE_PRESCALE_STEP(3)
		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(4)
		DEQUANTIZE_PRESCALE_STEP(4)
		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(5)
		DEQUANTIZE_PRESCALE_STEP(5)
		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(6)
		DEQUANTIZE_PRESCALE_STEP(6)
		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(7)
		DEQUANTIZE_GLOBAL_MISMATCH_CONTROL()
		/* WARNING : mismatch control is too small and would be zeroed */
		/* by prescale. This would cause artifacts on the long term */
		/* since the last coefficient has high chances of being 0 */
		/* and thus should be rounded up most of the time. */
		/* Thus we accumulate mismatch instead until it gets */
		/* large enough to produce significant output after iDCT */
		/* resetting the accumulator when the block is coded intra */
		DEQUANTIZE_PRESCALE_STEP(7) 
	        : "=r"(block), "=r"(dqmatrix), "=r"(cache), "=r"(psmatrix)
		: "0"(block), "1"(dqmatrix), "2"(cache), "3"(psmatrix)
		: "memory");

  asm volatile("movd %%mm6, %0\n"           /* export mismatch */
	       : "=r"(m)
	       : /* no input */
	       );
  *mismatch += (dct_t) (m<<12);

  /* threshold is ((1 << 16)/(16*psmatrix[63]) * (1 << 12) + 0.5) = 26887 */
#define MISMATCH_THRESHOLD 26887

  if(*mismatch > MISMATCH_THRESHOLD) { /* after this threshold, prescaled mismatch is >= 1 */
    cache[63] ++; /* add mismatch */
    *mismatch -= MISMATCH_THRESHOLD;
  }
  if(*mismatch < (-26887)) {
    cache[63] --; /* sub mismatch */
    *mismatch += MISMATCH_THRESHOLD;
  }
}

static void inline dequantize_inter_local(dct_t *block,
					  dct_t *cache,
					  dct_t *dqmatrix,
					  dct_t *psmatrix,
					  dct_t *mismatch /* not used */)
{
  /* coeff[i] = ((2*level[i]+sign(level[i]))*qscale*matrix[i])/16 */
  /* then coeff[i] = { coeff[i] + 1, if coeff[i] < 0 and coeff[i] is even */
  /*                 { coeff[i] - 1, if coeff[i] > 0 and coeff[i] is even */
  /*                 { coeff[i] otherwise                                 */
  /* TODO: check efficiency of new inter_global method on this */
#define DEQUANTIZE_INTER_LOCAL_STEP(x)					 \
    "movq 0x" #x "0(%0), %%mm0\n"     /* load 1st line 1st half */	 \
    "movq 0x" #x "8(%0), %%mm1\n"     /* load 1st line 2nd half */	 \
    "movq %%mm0, %%mm2\n"             /* mm2 = 1st line 1st half */	 \
    "movq %%mm1, %%mm3\n"             /* mm3 = 1st line 1st half */	 \
    "psraw $0x0f, %%mm2\n"            /* mm2 = (sign(mm0) - 1) / 2 */	 \
    "psraw $0x0f, %%mm3\n"            /* mm3 = (sign(mm0) - 1) / 2 */	 \
    "paddsw %%mm2, %%mm0\n"           /* mm0 = [0-3]+(sign([0-3])-1)/2*/ \
    "paddsw %%mm3, %%mm1\n"           /* mm1 = [4-7]+(sign([0-3])-1)/2*/ \
    "paddsw %%mm0, %%mm0\n"           /* mm0 = 2*[0-3]+sign([0-3])-1 */	 \
    "paddsw %%mm1, %%mm1\n"           /* mm1 = 2*[4-7]+sign([4-7])-1 */	 \
    "pmullw 0x" #x "0(%1), %%mm0\n"   /* mm0=(2*[0-3]+sign([0-3])-1)*Q*/ \
    "pmullw 0x" #x "8(%1), %%mm1\n"   /* mm1=(2*[4-7]+sign([4-7])-1)*Q*/ \
    "movq %%mm0, %%mm4\n"             /* mm4 = mm0 */       		 \
    "movq %%mm1, %%mm5\n"             /* mm5 = mm1 */			 \
    "paddsw 0x" #x "0(%1), %%mm0\n"   /* mm0=(2*[0-3]+sign([0-3]))*Q*/ \
    "paddsw 0x" #x "8(%1), %%mm1\n"   /* mm1=(2*[4-7]+sign([4-7]))*Q*/ \
    "pcmpeqw %%mm7, %%mm4\n"          /* mm4[0-3]=0xFF if mm4[0-3]==0 */ \
    "pcmpeqw %%mm7, %%mm5\n"          /* mm5[0-3]=0xFF if mm5[0-3]==0 */ \
    "pcmpeqw %%mm7, %%mm4\n"          /* mm4[0-3]=0xFF if mm0[0-3]!=0 */ \
    "pcmpeqw %%mm7, %%mm5\n"          /* mm5[0-3]=0xFF if mm1[0-3]!=0 */ \
    "psllw $0x04, %%mm2\n"            /* sign adjust before shift */	 \
    "psllw $0x04, %%mm3\n"            /* sign adjust before shift */	 \
    "psubw %%mm2, %%mm0\n"            /* sign adjust before shift */	 \
    "psubw %%mm3, %%mm1\n"            /* sign adjust before shift */	 \
    "psraw $0x04, %%mm2\n"            /* sign adjust before shift */	 \
    "psraw $0x04, %%mm3\n"            /* sign adjust before shift */	 \
    "paddw %%mm2, %%mm0\n"            /* sign adjust before shift */	 \
    "paddw %%mm3, %%mm1\n"            /* sign adjust before shift */	 \
    "psraw $0x04, %%mm0\n"            /* divide by 16 */		 \
    "psraw $0x04, %%mm1\n"            /* divide by 16 */		 \
    "pcmpeqw %%mm7, %%mm2\n"          /* invert sign */                  \
    "pcmpeqw %%mm7, %%mm3\n"          /* invert sign */                  \
    "paddw %%mm2, %%mm0\n"            /* sub 1 if >0 */                  \
    "paddw %%mm3, %%mm1\n"            /* sub 1 if >0 */                  \
    "por " ASMSYM "_mmx_1, %%mm0\n"   /* or 1 */               \
    "por " ASMSYM "_mmx_1, %%mm1\n"   /* or 1 */               \
    "pand %%mm4, %%mm0\n"             /* [0-3]=0 if [0-3] was zero */	 \
    "pand %%mm5, %%mm1\n"             /* [4-7]=0 if [4-7] was zero */

  asm volatile ("pxor %%mm7, %%mm7\n"        /* mm7 = 0 */
		"pxor %%mm6, %%mm6\n"        /* mm6 = mismatch accumulator */
		DEQUANTIZE_INTER_LOCAL_STEP(0)
		DEQUANTIZE_PRESCALE_STEP(0)
		DEQUANTIZE_INTER_LOCAL_STEP(1)
		DEQUANTIZE_PRESCALE_STEP(1)
		DEQUANTIZE_INTER_LOCAL_STEP(2)
		DEQUANTIZE_PRESCALE_STEP(2)
		DEQUANTIZE_INTER_LOCAL_STEP(3)
		DEQUANTIZE_PRESCALE_STEP(3)
		DEQUANTIZE_INTER_LOCAL_STEP(4)
		DEQUANTIZE_PRESCALE_STEP(4)
		DEQUANTIZE_INTER_LOCAL_STEP(5)
		DEQUANTIZE_PRESCALE_STEP(5)
		DEQUANTIZE_INTER_LOCAL_STEP(6)
		DEQUANTIZE_PRESCALE_STEP(6)
		DEQUANTIZE_INTER_LOCAL_STEP(7)
		DEQUANTIZE_PRESCALE_STEP(7)
	        : "=r"(block), "=r"(dqmatrix), "=r"(cache), "=r"(psmatrix)
		: "0"(block), "1"(dqmatrix), "2"(cache), "3"(psmatrix)
		: "memory");
}

--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Damien Vincent

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**************************** malloc wrapper for alignment ***********************************/

#define ALIGN 32

#include <stdlib.h>

void* fame_malloc(size_t size)
{
  unsigned char *ptr, *aligned, *padding;
  
  /* Struture of fame_alloc :
   * -> byte0     = padding[0]                             --> ptr
   * -> ...
   * -> byte(n-1) = padding[n-1]
   * -> byten     = n                                      --> padding
   * -> byte(n+1) = first element of the array (aligned)   --> aligned
   * -> ...
   */  

  ptr = (unsigned char*) malloc(size+ALIGN);
  aligned = (unsigned char*) (((unsigned int)ptr & (~(ALIGN-1))) + ALIGN );
  padding = aligned - 1;
  *padding = (ALIGN-1) - ((unsigned int)ptr & (ALIGN-1));

  return ((void*)aligned);
}



void fame_free(void* aligned)
{
  unsigned char *ptr, *padding;

  padding = aligned - 1;
  ptr  = padding - (*padding);

  free(ptr);
}

--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Damien Vincent

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/******************** aligned memory allocation/freeing ***********************************/

#ifndef FAME_MALLOC_H
#define FAME_MALLOC_H

void* fame_malloc(size_t size);
void fame_free(void* ptr);

#endif

--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Damien Vincent

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <stdio.h>
#include <stdlib.h>
#include "fame.h"
#include "fame_malloc.h"
#include "fame_monitor.h"
#ifdef HAS_MMX
#include "mad_mmx.h"
#include "mae_mmx.h"
#include "fetch_mmx.h"
#else
#include "mad_int.h"
#include "mae_int.h"
#include "fetch_float.h"
#endif

#define SCENE_CHANGE_THRESHOLD 10
#define SCENE_CHANGE_MAXLENGTH 300


static void monitor_init(fame_monitor_t *monitor,
			 int (* retrieve_cb)(fame_frame_statistics_t *stats),
			 unsigned int mb_width,
			 unsigned int mb_height,
			 unsigned int total_frames,
			 unsigned int flags);
static void monitor_close(fame_monitor_t *monitor);
static void monitor_enter(fame_monitor_t *monitor,
			  unsigned int frame_number,
			  fame_yuv_t **ref,
			  fame_yuv_t *frame,
			  unsigned char *shape,
			  char *coding);
fame_frame_statistics_t * monitor_leave(fame_monitor_t *monitor,
					unsigned int spent,
					unsigned int quant_scale);

FAME_CONSTRUCTOR(fame_monitor_t)
{
  FAME_OBJECT(this)->name = "statistics monitoring";
  FAME_MONITOR(this)->init = monitor_init;
  FAME_MONITOR(this)->close = monitor_close;
  FAME_MONITOR(this)->enter = monitor_enter;
  FAME_MONITOR(this)->leave = monitor_leave;
  FAME_MONITOR(this)->flags = 0;
  return(this);
}

/* activity                                                                  */
/*                                                                           */
/* Description:                                                              */
/*    returns the activity of the luminance component of the given frame     */
/*                                                                           */
/*                                                                           */
/* Arguments:                                                                */
/*    fame_yuv_t *frame: frame to compute activity for                       */
/*    unsigned char *shape: shape of the given frame  (TODO)                 */
/*    unsigned int mb_width, mb_height: macroblock's width and height        */
/*                                                                           */
/* Return value:                                                             */
/*    activity of the frame.                                                 */

unsigned int activity(fame_yuv_t *frame,
		      unsigned char *shape,
		      unsigned int mb_width,
		      unsigned int mb_height)
{ 
  int bx, by;
  int a, p;
  unsigned long m;
  unsigned char *input;
  
  a = 0;
  p = frame->p;
  input = frame->y;
  for(by = 0; by < mb_height*2; by++) {
    for(bx = 0; bx < mb_width*2; bx++) {
      mad_withoutmask(input, p, &m);
      a+=m;
      input+=8;
    }
    input += (p << 3) - p;
  }
  return a;
}

unsigned int activity2(fame_yuv_t *ref,
		       fame_yuv_t *frame,
		       unsigned char *shape,
		       unsigned int mb_width,
		       unsigned int mb_height)
{ 
  int bx, by;
  int a, pi, pr;
  unsigned long m;
  unsigned char *input, *rref;
  
  a = 0;
  pi = frame->p;
  pr = ref->p;
  input = frame->y;
  rref = ref->y;
  for(by = 0; by < mb_height*2; by++) {
    for(bx = 0; bx < mb_width*2; bx++) {
      m = MAE8x8_withoutmask(rref, input, NULL, pi);
      a+=m;
      input+=8;
      rref +=8;
    }
    input += (pi << 3) - (mb_width << 4);
    rref  += (pr << 3) - (mb_width << 4);
  }
  return a;
}

/*  monitor_init                                                             */
/*                                                                           */
/*  Description:                                                             */
/*    Initialise statistics monitor.                                         */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_monitor_t *monitor: statistics monitoring                         */
/*    store_cb: callback used to send statistics information out to the      */
/*              program using libfame.                                       */
/*    retrieve_cb: callback called to get initial statistics information     */
/*                 from the program.                                         */
/*    int flags: flags to setup monitoring.                                  */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void monitor_init(fame_monitor_t *monitor,
			 int (* retrieve_cb)(fame_frame_statistics_t *stats),
			 unsigned int mb_width,
			 unsigned int mb_height,
			 unsigned int total_frames,
			 unsigned int flags)
{
  int i;

  monitor->retrieve_stats_callback = retrieve_cb;
  monitor->mb_width = mb_width;
  monitor->mb_height = mb_height;
  monitor->old_activity = 0;
  monitor->keyframe = SCENE_CHANGE_MAXLENGTH;
  monitor->flags = flags;

 if (monitor->retrieve_stats_callback)
   monitor->flags |= FAME_MONITOR_LOAD_STATS;

  if (monitor->flags & FAME_MONITOR_LOAD_STATS) 
    {
      monitor->global_stats.total_frames = total_frames;
      monitor->frame_stats_list = 
	(fame_frame_statistics_t *) fame_malloc(total_frames*sizeof(fame_frame_statistics_t));

      if (monitor->retrieve_stats_callback)
	for (i=0; i<total_frames; i++) {
	  monitor->retrieve_stats_callback(&(monitor->frame_stats_list[i]));
	  monitor->global_stats.target_rate += 
	    monitor->frame_stats_list[i].target_bits;
	  monitor->global_stats.actual_rate +=
	    monitor->frame_stats_list[i].actual_bits;
	  monitor->global_stats.mean_spatial_activity +=
	    monitor->frame_stats_list[i].spatial_activity;
	}
      monitor->current_frame_stats = monitor->frame_stats_list;
    }
  else
    {
      monitor->current_frame_stats = 
	(fame_frame_statistics_t *) fame_malloc(sizeof(fame_frame_statistics_t));
      monitor->global_stats.total_frames = 0;
      monitor->frame_stats_list = NULL;
    }

}


/*  monitor_close                                                            */
/*                                                                           */
/*  Description:                                                             */
/*    Release statistics monitoring.                                         */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_monitor_t *monitor: statistics monitoring                         */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void monitor_close(fame_monitor_t *monitor)
{
   if (monitor->flags && FAME_MONITOR_LOAD_STATS) 
    {
      if (monitor->frame_stats_list) fame_free(monitor->frame_stats_list);
    }
  else
    {
      if (monitor->current_frame_stats) fame_free(monitor->current_frame_stats);
    }
}

/*  monitor_enter                                                               */
/*                                                                           */
/*  Description:                                                             */
/*    Prepare for a new frame.                                               */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_monitor_t *monitor: statistics monitoring                         */
/*    unsigned int frame_number: the current frame number                    */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void monitor_enter(struct _fame_monitor_t_ *monitor,
			  unsigned int frame_number,
			  fame_yuv_t **ref,
			  fame_yuv_t *frame,
			  unsigned char *shape,
			  char *coding)
{
  int threshold;

  if ((monitor->current_frame_stats)&& 
      !(monitor->flags & FAME_MONITOR_LOAD_STATS))
  {
    monitor->current_frame_stats->frame_number = frame_number;
    monitor->current_frame_stats->spatial_activity = 
      activity2(ref[0],
		frame, 
		shape,
		monitor->mb_width,
		monitor->mb_height);
    
  }
  
  /* scene change detection */
  /* the decay term (keyframe) is here to avoid very long sequences of inter */
  /* frames that would result in artifacts due to the DCT/iDCT lack of       */  /* accuracy and that would also limit random access. */
  threshold = monitor->keyframe*SCENE_CHANGE_THRESHOLD/SCENE_CHANGE_MAXLENGTH;
  if ((frame_number == 0) || (monitor->current_frame_stats &&
      monitor->current_frame_stats->spatial_activity > 
      (monitor->old_activity + threshold*monitor->mb_width*monitor->mb_height*256))) {
    monitor->current_frame_stats->coding = 'I';
  }
  else monitor->current_frame_stats->coding = 'P';

  if (monitor->current_frame_stats && *coding == 'A')
    *coding = monitor->current_frame_stats->coding;

  /* update inter frame counter */
  if(*coding == 'I')
    monitor->keyframe = SCENE_CHANGE_MAXLENGTH;
  else
    if(monitor->keyframe > 0) monitor->keyframe--;
}

/*  monitor_leave                                                               */
/*                                                                           */
/*  Description:                                                             */
/*    Finish estimating a frame.                                             */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_monitor_t *monitor: statistics monitoring                         */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

fame_frame_statistics_t * monitor_leave(fame_monitor_t *monitor,
					 unsigned int spent,
					 unsigned int quant_scale)
{
  fame_frame_statistics_t *current = NULL;
  if (monitor->current_frame_stats) {
    monitor->current_frame_stats->actual_bits = spent;
    monitor->current_frame_stats->quant_scale = quant_scale;
    monitor->old_activity = monitor->current_frame_stats->spatial_activity;
    current = monitor->current_frame_stats;

    if ((monitor->frame_stats_list) && 
	(monitor->current_frame_stats->frame_number <= monitor->global_stats.total_frames)) {
      /* TODO: update global_stats */
      monitor->current_frame_stats++;
    }
  }

#ifdef HAS_MMX
  /* restore floating point context */
  asm("emms");
#endif

  return current;
}

























































































































--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001  Damien Vincent

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#ifndef __FAME_MONITOR_H
#define __FAME_MONITOR_H

#include "fame.h"

#define FAME_MONITOR_LOAD_STATS 1


unsigned int activity(fame_yuv_t *frame,
		      unsigned char *shape,
		      unsigned int mb_width,
		      unsigned int mb_height);

unsigned int activity2(fame_yuv_t *ref,
		       fame_yuv_t *frame,
		       unsigned char *shape,
		       unsigned int mb_width,
		       unsigned int mb_height);

typedef struct _fame_monitor_t_ {
  FAME_EXTENDS(fame_object_t);

  void (* init)(struct _fame_monitor_t_ *monitor,
		int (* retrieve_cb)(fame_frame_statistics_t *stats),
		unsigned int mb_width,
		unsigned int mb_height,
		unsigned int total_frames,
		unsigned int flags);
  void (* close)(struct _fame_monitor_t_ *monitor);
  void (* enter)(struct _fame_monitor_t_ *monitor,
		 unsigned int frame_number,
		 fame_yuv_t **ref,
		 fame_yuv_t *frame,
		 unsigned char *shape,
		 char *coding);
  fame_frame_statistics_t* (* leave)(struct _fame_monitor_t_ *monitor,
					unsigned int spent,
					unsigned int quant_scale);

  fame_global_statistics_t global_stats;
  fame_frame_statistics_t *current_frame_stats;
  fame_frame_statistics_t *frame_stats_list;
  int (* retrieve_stats_callback)(fame_frame_statistics_t *frame_stats);
  int keyframe;
  unsigned int mb_width;
  unsigned int mb_height;
  unsigned int old_activity;
  unsigned int flags;
} fame_monitor_t;

#define FAME_MONITOR(x) ((fame_monitor_t *) x)

extern FAME_CONSTRUCTOR(fame_monitor_t);

#endif

--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "fame.h"
#include "fame_malloc.h"
#include "fame_profile.h"
#include "fame_profile_stats.h"
#include "fame_monitor.h"

static void profile_stats_init(fame_profile_t *profile,
			      fame_context_t *context,
			      fame_parameters_t *params,
			      unsigned char *buffer,
			      unsigned int size);
static void profile_stats_enter(fame_profile_t *profile,
				fame_yuv_t *yuv,
				unsigned char *shape);
static int profile_stats_encode(fame_profile_t *profile);
static void profile_stats_leave(fame_profile_t *profile,
				fame_frame_statistics_t *stats);
static int profile_stats_close(fame_profile_t *profile);

FAME_CONSTRUCTOR(fame_profile_stats_t)
{
  FAME_OBJECT(this)->name = "Stats profile";
  FAME_PROFILE(this)->init = profile_stats_init;
  FAME_PROFILE(this)->enter = profile_stats_enter;
  FAME_PROFILE(this)->encode = profile_stats_encode;
  FAME_PROFILE(this)->leave = profile_stats_leave;
  FAME_PROFILE(this)->close = profile_stats_close;
  this->monitor_flags = 0;
  return(this);
}

/*  profile_stats_init                                                        */
/*                                                                           */
/*  Description:                                                             */
/*    Initialize the profile.                                                */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_profile_t *profile: the profile to initialize                     */
/*    fame_parameters_t *params: the parameters for initialization           */
/*    unsigned char *buffer: the buffer to output data                       */
/*    unsigned int size: the size of the output buffer                       */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void profile_stats_init(fame_profile_t *profile,
			      fame_context_t *context,
			      fame_parameters_t *params,
			      unsigned char *buffer,
			      unsigned int size)
{
  fame_profile_stats_t *profile_stats = FAME_PROFILE_STATS(profile);

  profile_stats->width = params->width;
  profile_stats->height = params->height;
  profile_stats->coding = strdup(params->coding);
  profile_stats->total_frames = params->total_frames;
  profile_stats->frame_number = 0;


  /* Get the components */
  profile_stats->monitor =
    (fame_monitor_t *) fame_get_object(context, "monitor");

  /* Allocate reference frame */
  profile_stats->ref[0] = 
    (fame_yuv_t *) fame_malloc(sizeof(fame_yuv_t));
  profile_stats->ref[0]->y = 
    (unsigned char *) fame_malloc(profile_stats->width*profile_stats->height*12/8);
  profile_stats->ref[0]->u = 
    profile_stats->ref[0]->y + profile_stats->width*profile_stats->height;
  profile_stats->ref[0]->v = 
    profile_stats->ref[0]->u + profile_stats->width*profile_stats->height/4;
  profile_stats->ref[1] = 
    (fame_yuv_t *) fame_malloc(sizeof(fame_yuv_t));
  profile_stats->ref[1]->y = 
    (unsigned char *) fame_malloc(profile_stats->width*profile_stats->height*12/8);
  profile_stats->ref[1]->u = 
    profile_stats->ref[1]->y + profile_stats->width*profile_stats->height;
  profile_stats->ref[1]->v = 
    profile_stats->ref[1]->u + profile_stats->width*profile_stats->height/4;
  profile_stats->current = 0;

  /* Allocate reconstructed shape */
  profile_stats->ref_shape = (unsigned char *) fame_malloc(profile_stats->width*profile_stats->height);

  

  /* Initialize statistics monitoring */
  if(profile_stats->monitor && profile_stats->monitor->init)
    profile_stats->monitor->init(profile_stats->monitor,
				 params->retrieve_cb,
				 (profile_stats->width >> 4),
				 (profile_stats->height >> 4),
				 profile_stats->total_frames,
				 FAME_PROFILE_STATS(profile)->monitor_flags);  
}



static void profile_stats_enter(fame_profile_t *profile,
				fame_yuv_t *yuv,
				unsigned char *shape)
{
  fame_profile_stats_t *profile_stats = FAME_PROFILE_STATS(profile);
  char coding;
  
  /* Rotate reference frame */
  profile_stats->ref[profile_stats->current]->w = yuv->w;
  profile_stats->ref[profile_stats->current]->h = yuv->h;
  memcpy(profile_stats->ref[profile_stats->current]->y, yuv->y, profile_stats->width*profile_stats->height);


  /* Update stats and choose coding mode */
  coding = profile_stats->coding[profile_stats->frame_number % strlen(profile_stats->coding)];
  
  if (profile_stats->monitor && profile_stats->monitor->current_frame_stats)
    profile_stats->frame_stats = profile_stats->monitor->current_frame_stats;
  else
    profile_stats->frame_stats = NULL;
  if(profile_stats->monitor && profile_stats->monitor->enter)
      profile_stats->monitor->enter(profile_stats->monitor,
				    profile_stats->frame_number,
				    &(profile_stats->ref[1-profile_stats->current]),
				    profile_stats->ref[profile_stats->current],
				    profile_stats->ref_shape,
				    &coding);
  

  /* Increment frame number */
  profile_stats->frame_number++;

  if(profile_stats->monitor && profile_stats->monitor->leave)
    profile_stats->monitor->leave(profile_stats->monitor, 0, 0);

  /* Rotate reference frame */
  profile_stats->current = (profile_stats->current==1)?0:1;
}


static int profile_stats_encode(fame_profile_t *profile)
{
  return 0;
}



/*  profile_stats_leave                                                      */
/*                                                                           */
/*  Description:                                                             */
/*    Encode a single frame.                                                 */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_profile_t * profile: the profile handle returned by fame_open     */
/*    fame_yuv_t * yuv: the input frame in raw YUV format (YV12 planar)      */
/*    unsigned char * mask: the input mask (0 = transparent, 255 = opaque)   */
/*                                                                           */
/*  Return value:                                                            */
/*    int : the number of bytes written to buffer                            */

static void profile_stats_leave(fame_profile_t *profile,
			       fame_frame_statistics_t *stats)
{
  if(stats)
    *stats = *FAME_PROFILE_STATS(profile)->frame_stats;
}

/*  profile_stats_close                                                */
/*                                                                           */
/*  Description:                                                             */
/*    Flush remaining encoded data and cleanup everything.                   */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_profile_t * profile: the profile handle returned by fame_open     */
/*                                                                           */
/*  Return value:                                                            */
/*    int : the number of bytes written to buffer                            */

static int profile_stats_close(fame_profile_t *profile)
{
  fame_profile_stats_t *profile_stats = FAME_PROFILE_STATS(profile);

   /* Release statistics monitoring */
  if(profile_stats->monitor && profile_stats->monitor->close)
    profile_stats->monitor->close(profile_stats->monitor);


  /* Free reference shape */
  if(profile_stats->ref_shape)
    fame_free(profile_stats->ref_shape);

  /* Free reference frame */
  fame_free(profile_stats->ref[0]->y);
  fame_free(profile_stats->ref[0]);
  fame_free(profile_stats->ref[1]->y);
  fame_free(profile_stats->ref[1]);

  /* Return the number of bytes written to buffer */
  return(0);
}








--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#ifndef __FAME_PROFILE_STATS_H
#define __FAME_PROFILE_STATS_H

#include "fame.h"
#include "fame_profile.h"
#include "fame_monitor.h"

typedef struct _fame_profile_stats_t_ {
  FAME_EXTENDS(fame_profile_t);
  /* protected data */
  int width;
  int height;
  char *coding;
  unsigned int total_frames;
  int frame_number;
  unsigned char *ref_shape;
  fame_yuv_t *ref[2];
  int current;
  unsigned int monitor_flags;
  fame_monitor_t *monitor;
  fame_frame_statistics_t *frame_stats;
} fame_profile_stats_t;

#define FAME_PROFILE_STATS(x) ((fame_profile_stats_t *) x)

extern FAME_CONSTRUCTOR(fame_profile_stats_t);

#endif









--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2002 Yannick Vignon

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "fame.h"
#include "fame_rate_1param.h"
#include "fame_monitor.h"

#define model_power 1.7
#define power(x,y) (exp(y*log(x)))
#define quant_model(coeff, rate, act) (exp(1.0/model_power*log(coeff*act/rate)))
#define coeff_model(quant, rate, act) (exp(model_power*log(quant))*rate/act)

static void rate_init(fame_rate_t *rate,
		      int mb_width,
		      int mb_height,
		      int bitrate,
		      char *coding,
		      fame_frame_statistics_t *stats_list,
		      fame_global_statistics_t *global_stats,
		      unsigned int flags);
static void rate_enter(fame_rate_t *rate,
		       fame_yuv_t **ref,
		       fame_yuv_t *current,
		       unsigned char *shape,
		       char coding,
		       fame_frame_statistics_t *frame_stats);
static void rate_leave(fame_rate_t *rate,
		       int spent);

FAME_CONSTRUCTOR(fame_rate_1param_t)
{
  fame_rate_t_constructor(FAME_RATE(this));
  FAME_OBJECT(this)->name = "one parameter rate estimation";

  this->FAME_OVERLOADED(init) = FAME_RATE(this)->init;
  FAME_RATE(this)->init = rate_init;
  this->FAME_OVERLOADED(enter) = FAME_RATE(this)->enter;
  FAME_RATE(this)->enter = rate_enter;
  this->FAME_OVERLOADED(leave) = FAME_RATE(this)->leave;
  FAME_RATE(this)->leave = rate_leave;

  FAME_RATE(this)->flags = 0xffffffff;
  return(this);
}

/*  rate_init                                                                */
/*                                                                           */
/*  Description:                                                             */
/*    Initialise rate estimation.                                            */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_rate_t *rate: the rate estimation                                 */
/*    int mb_width: width in macroblocks                                     */
/*    int mb_height: height in macroblocks                                   */
/*                                                                           */
/*  Return value:                                                            */
/*    Rate.                                                                  */

static void rate_init(fame_rate_t *rate,
		      int mb_width,
		      int mb_height,
		      int bitrate,
		      char *coding,
		      fame_frame_statistics_t *stats_list,
		      fame_global_statistics_t *global_stats,
		      unsigned int flags)
{
  int ni, np;
  float ratio;
  int i;
  float factor;

#ifdef HAS_MMX
  asm("emms");
#endif

  FAME_RATE_1PARAM(rate)->FAME_OVERLOADED(init)(rate,
					       mb_width,
					       mb_height,
					       bitrate,
					       coding,
					       stats_list,
					       global_stats,
					       flags);

  if (rate->flags & FAME_RATE_SECOND_PASS) {
    rate->stats_list = stats_list;
    factor = 0;
    ni = np = 0;
    rate->total_frames= global_stats->total_frames;
    for (i=0; i < global_stats->total_frames; i++) {
      if(stats_list[i].spatial_activity)
	factor += exp(FAME_RATE_2PASS_POWER*log(stats_list[i].spatial_activity));
      if(stats_list[i].coding == 'I') ni++;
      if(stats_list[i].coding == 'P') np++;
    }
    ratio = 1; /* change to allocate more bits for scene changes */

    FAME_RATE_1PARAM(rate)->global_factor_P = (factor/(bitrate*global_stats->total_frames)) *  (np + ratio*ni) / (float)(ni + np);
    FAME_RATE_1PARAM(rate)->global_factor_I = FAME_RATE_1PARAM(rate)->global_factor_P / ratio;
  } 
  else {
    ni = np = 0;
    for(i = 0; i < strlen(coding); i++) {
      switch(coding[i]) {
      case 'I': ni++; break;
      case 'P': np++; break;
      case 'A': np++; break;
      }
    }
    ratio = 1;
    FAME_RATE_1PARAM(rate)->P_bits = bitrate * (np + ni) / (np + ratio * ni);
    FAME_RATE_1PARAM(rate)->I_bits = ratio * FAME_RATE_1PARAM(rate)->P_bits;
  }
  
  rate->coeff1 = 1.5;
  rate->coeff2 = 2.75;

  FAME_RATE_1PARAM(rate)->coeff_index = 0;
  FAME_RATE_1PARAM(rate)->coeff_I_index = 0;
  FAME_RATE_1PARAM(rate)->I_number = 0;
  FAME_RATE_1PARAM(rate)->P_number = 0;
}


/*  rate_enter                                                               */
/*                                                                           */
/*  Description:                                                             */
/*    Prepare for a new frame.                                               */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_rate_t *rate: the rate estimation                                 */
/*    fame_yuv_t **ref: the reference frames (half-pel)                      */
/*    fame_yuv_t *current: the current frame                                 */
/*    unsigned char *shape: the current shape                                */
/*                                                                           */
/*  Return value:                                                            */
/*    Rate.                                                                  */

static void rate_enter(struct _fame_rate_t_ *rate,
		       fame_yuv_t **ref,
		       fame_yuv_t *current,
		       unsigned char *shape,
		       char coding,
		       fame_frame_statistics_t *frame_stats)
{
  int avail, old_scale, old_coding;
  int i, window;
  float coeff1;

#ifdef HAS_MMX
  asm("emms");
#endif
  
  /* Update number of available bits */
  if (rate->flags & FAME_RATE_SECOND_PASS) {
    if (rate->available > 0) 
      avail = rate->available/4;
    else avail = 5*rate->available/6;
    switch(coding) {
    case 'I': rate->available += exp(FAME_RATE_2PASS_POWER*log(frame_stats->spatial_activity))/FAME_RATE_1PARAM(rate)->global_factor_I;
    case 'P': rate->available += exp(FAME_RATE_2PASS_POWER*log(frame_stats->spatial_activity))/FAME_RATE_1PARAM(rate)->global_factor_P;
    }
  }
  else {
    avail = rate->available/2;
    switch(coding) {
    case 'I': rate->available += FAME_RATE_1PARAM(rate)->I_bits; break;
    case 'P': rate->available += FAME_RATE_1PARAM(rate)->P_bits; break;
    };
  }
  rate->available -= avail;

  old_coding = rate->coding;

  /* Common tasks */
  FAME_RATE_1PARAM(rate)->FAME_OVERLOADED(enter)(rate,
						 ref,
						 current,
						 shape,
						 coding,
						 frame_stats);


  /* compute frame activity */
  switch (rate->coding) {
  case 'I':
    FAME_RATE_1PARAM(rate)->activity = activity(rate->current,
						rate->shape,
						rate->mb_width,
						rate->mb_height);
#ifdef HAS_MMX
  asm("emms");
#endif
    break;
  case 'P':
    if (frame_stats) 
      FAME_RATE_1PARAM(rate)->activity = frame_stats->spatial_activity;
    else FAME_RATE_1PARAM(rate)->activity = activity2(rate->ref[0],
						      rate->current,
						      rate->shape,
						      rate->mb_width,
						      rate->mb_height);
    break;
  }



  /* Update model parameter */
  /* TODO: update window size based on activity change */

  coeff1 = 0;
  window = 0;
  switch (rate->coding) {
  case 'I' :
    window = fame_min(FAME_RATE_WINDOW_SIZE, 
		      FAME_RATE_1PARAM(rate)->I_number); 
    for(i=0; i<window; i++) 
      coeff1 += FAME_RATE_1PARAM(rate)->old_I_coeff1[i];
    if (window == 0) coeff1 = rate->coeff2;
    rate->coeff2 = coeff1;
    FAME_RATE_1PARAM(rate)->P_number = 0;
    break;
  case 'P' :
    window = fame_min(FAME_RATE_WINDOW_SIZE, 
		      FAME_RATE_1PARAM(rate)->P_number);
    for(i=0; i<window; i++) 
      coeff1 += FAME_RATE_1PARAM(rate)->old_coeff1[i];
    if (window == 0) coeff1 = rate->coeff1;
    rate->coeff1 = coeff1;
    break;
  }
  if (window != 0) 
    coeff1 = coeff1/window;


  /* Compute quantization scale */
  old_scale = rate->global_scale;
  if (rate->available > 0) {
     rate->global_scale =quant_model(coeff1,
				     rate->available,
				     FAME_RATE_1PARAM(rate)->activity);
  }
  else
    rate->global_scale = 31; 
  
  if(rate->coding == old_coding) {
    /* adaptive quant scale variation */
    if(rate->global_scale > old_scale+FAME_RATE_WINDOW_SIZE-window+2)
      rate->global_scale = old_scale+FAME_RATE_WINDOW_SIZE-window+2;
    if(rate->global_scale < old_scale-FAME_RATE_WINDOW_SIZE+window-2)
      rate->global_scale = old_scale-FAME_RATE_WINDOW_SIZE+window-2;
  }
  //rate->global_scale = (rate->global_scale + old_scale)/2;
  if( rate->global_scale < 2)  rate->global_scale = 2;
  if( rate->global_scale > 31)  rate->global_scale = 31;

  rate->available += avail;  
}

/*  rate_leave                                                               */
/*                                                                           */
/*  Description:                                                             */
/*    Finish estimating a frame.                                             */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_rate_t *rate: the rate estimation                                 */
/*                                                                           */
/*  Return value:                                                            */
/*    Rate.                                                                  */

static void rate_leave(fame_rate_t *rate, int spent)
{  
#ifdef HAS_MMX
  asm("emms");
#endif

  FAME_RATE_1PARAM(rate)->FAME_OVERLOADED(leave)(rate,
						 spent);

  switch(rate->coding) {
  case 'I' :  
    FAME_RATE_1PARAM(rate)->old_I_coeff1[FAME_RATE_1PARAM(rate)->coeff_I_index] =
      coeff_model(rate->global_scale,
		  spent,
		  FAME_RATE_1PARAM(rate)->activity);

    FAME_RATE_1PARAM(rate)->coeff_I_index++;
    if (FAME_RATE_1PARAM(rate)->coeff_I_index >= FAME_RATE_WINDOW_SIZE) 
      FAME_RATE_1PARAM(rate)->coeff_I_index = 0;
    FAME_RATE_1PARAM(rate)->I_number++;

    break;
  case 'P': 
    FAME_RATE_1PARAM(rate)->old_coeff1[FAME_RATE_1PARAM(rate)->coeff_index] =
      coeff_model(rate->global_scale,
		  spent,
		  FAME_RATE_1PARAM(rate)->activity);
    FAME_RATE_1PARAM(rate)->coeff_index++;
    if (FAME_RATE_1PARAM(rate)->coeff_index >= FAME_RATE_WINDOW_SIZE) 
      FAME_RATE_1PARAM(rate)->coeff_index = 0;
    FAME_RATE_1PARAM(rate)->P_number++;
    break;
  }
}

--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001  Damien Vincent

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#ifndef __FAME_RATE_1PARAM_H
#define __FAME_RATE_1PARAM_H

#include "fame.h"
#include "fame_rate.h"
#include "fame_monitor.h"

#define FAME_RATE_WINDOW_SIZE 5
#define FAME_RATE_2PASS_POWER 0.5

typedef struct _fame_rate_1param_t_ {
  FAME_EXTENDS(fame_rate_t);
  void (* FAME_OVERLOADED(init))(struct _fame_rate_t_ *rate,
				 int mb_width,
				 int mb_height,
				 int bitrate,
				 char *coding,
				 fame_frame_statistics_t *stats_list,
				 fame_global_statistics_t *global_stats,
				 unsigned int flags);
  void (* FAME_OVERLOADED(enter))(struct _fame_rate_t_ *rate,
				  fame_yuv_t **ref,
				  fame_yuv_t *current,
				  unsigned char *shape,
				  char coding,
				  fame_frame_statistics_t *frame_stats);
  void (* FAME_OVERLOADED(leave))(struct _fame_rate_t_ *rate,
				  int spent);

  
  int I_bits, P_bits;
  int activity;
  float global_factor_I;
  float global_factor_P;
  float I_coeff1;
  float old_coeff1[FAME_RATE_WINDOW_SIZE];
  float old_I_coeff1[FAME_RATE_WINDOW_SIZE];
  int coeff_index;
  int coeff_I_index;
  int I_number;
  int P_number;
} fame_rate_1param_t;

#define FAME_RATE_1PARAM(x) ((fame_rate_1param_t *) x)

extern FAME_CONSTRUCTOR(fame_rate_1param_t);

#endif







--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2002 Yannick Vignon

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "fame.h"
#include "fame_rate_simple.h"
#include "fame_monitor.h"

#define power(x,y) (exp(y*log(x)))
#define quant_model(coeff, rate, act) (coeff*act/rate)
#define coeff_model(quant, rate, act) (quant*rate/act)

static void rate_init(fame_rate_t *rate,
		      int mb_width,
		      int mb_height,
		      int bitrate,
		      char *coding,
		      fame_frame_statistics_t *stats_list,
		      fame_global_statistics_t *global_stats,
		      unsigned int flags);
static void rate_enter(fame_rate_t *rate,
		       fame_yuv_t **ref,
		       fame_yuv_t *current,
		       unsigned char *shape,
		       char coding,
		       fame_frame_statistics_t *frame_stats);
static void rate_leave(fame_rate_t *rate,
		       int spent);

FAME_CONSTRUCTOR(fame_rate_simple_t)
{
  fame_rate_t_constructor(FAME_RATE(this));
  FAME_OBJECT(this)->name = "simple rate estimation";

  this->FAME_OVERLOADED(init) = FAME_RATE(this)->init;
  FAME_RATE(this)->init = rate_init;
  this->FAME_OVERLOADED(enter) = FAME_RATE(this)->enter;
  FAME_RATE(this)->enter = rate_enter;
  this->FAME_OVERLOADED(leave) = FAME_RATE(this)->leave;
  FAME_RATE(this)->leave = rate_leave;

  FAME_RATE(this)->flags = 0xffffffff;
  return(this);
}

/*  rate_init                                                                */
/*                                                                           */
/*  Description:                                                             */
/*    Initialise rate estimation.                                            */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_rate_t *rate: the rate estimation                                 */
/*    int mb_width: width in macroblocks                                     */
/*    int mb_height: height in macroblocks                                   */
/*                                                                           */
/*  Return value:                                                            */
/*    Rate.                                                                  */

static void rate_init(fame_rate_t *rate,
		      int mb_width,
		      int mb_height,
		      int bitrate,
		      char *coding,
		      fame_frame_statistics_t *stats_list,
		      fame_global_statistics_t *global_stats,
		      unsigned int flags)
{
  int ni, np;
  int ratio;
  int i;

#ifdef HAS_MMX
  asm("emms");
#endif

  FAME_RATE_SIMPLE(rate)->FAME_OVERLOADED(init)(rate,
					       mb_width,
					       mb_height,
					       bitrate,
					       coding,
					       stats_list,
					       global_stats,
					       flags);

  ni = np = 0;
  for(i = 0; i < strlen(coding); i++) {
    switch(coding[i]) {
    case 'I': ni++; break;
    case 'P': np++; break;
    case 'A': np++; break;
    }
  }

  ratio = 1;
  FAME_RATE_SIMPLE(rate)->P_bits = bitrate * (np + ni) / (np + ratio * ni);
  FAME_RATE_SIMPLE(rate)->I_bits = ratio * FAME_RATE_SIMPLE(rate)->P_bits;
 
  rate->coeff1 = 1/6.0;
  FAME_RATE_SIMPLE(rate)->I_coeff1 = 1;
}


/*  rate_enter                                                               */
/*                                                                           */
/*  Description:                                                             */
/*    Prepare for a new frame.                                               */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_rate_t *rate: the rate estimation                                 */
/*    fame_yuv_t **ref: the reference frames (half-pel)                      */
/*    fame_yuv_t *current: the current frame                                 */
/*    unsigned char *shape: the current shape                                */
/*                                                                           */
/*  Return value:                                                            */
/*    Rate.                                                                  */

static void rate_enter(struct _fame_rate_t_ *rate,
		       fame_yuv_t **ref,
		       fame_yuv_t *current,
		       unsigned char *shape,
		       char coding,
		       fame_frame_statistics_t *frame_stats)
{
  int old_scale;

#ifdef HAS_MMX
  asm("emms");
#endif

  
  /* Update number of available bits */
    switch(coding) {
    case 'I': rate->available += FAME_RATE_SIMPLE(rate)->I_bits; break;
    case 'P': rate->available += FAME_RATE_SIMPLE(rate)->P_bits; break;
    };


  /* Common tasks */
  FAME_RATE_SIMPLE(rate)->FAME_OVERLOADED(enter)(rate,
						ref,
						current,
						shape,
						coding,
						frame_stats);


  /* compute frame activity */
  if (frame_stats) 
    FAME_RATE_SIMPLE(rate)->activity = frame_stats->spatial_activity;
  else FAME_RATE_SIMPLE(rate)->activity = activity2(rate->ref[0],
						    rate->current,
						    rate->shape,
						    rate->mb_width,
						    rate->mb_height);


  /* Compute quantization scale */
  old_scale = rate->global_scale;
  if (rate->available > 0) {
    switch (coding) {
    case 'I':
     rate->global_scale = quant_model(FAME_RATE_SIMPLE(rate)->I_coeff1,
				      rate->available,
				      FAME_RATE_SIMPLE(rate)->activity);
     break;
    case 'P':
     rate->global_scale = quant_model(rate->coeff1,
				      rate->available,
				      FAME_RATE_SIMPLE(rate)->activity);
     break;
    }
  }
  else
    rate->global_scale = 31; 
  
  
  if( rate->global_scale < 2)  rate->global_scale = 2;
  if( rate->global_scale > 31)  rate->global_scale = 31;
  rate->global_scale = (rate->global_scale + old_scale)/2;
}

/*  rate_leave                                                               */
/*                                                                           */
/*  Description:                                                             */
/*    Finish estimating a frame.                                             */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_rate_t *rate: the rate estimation                                 */
/*                                                                           */
/*  Return value:                                                            */
/*    Rate.                                                                  */

static void rate_leave(fame_rate_t *rate, int spent)
{  
#ifdef HAS_MMX
  asm("emms");
#endif

  FAME_RATE_SIMPLE(rate)->FAME_OVERLOADED(leave)(rate,
						 spent);

  switch(rate->coding) {
  case 'I' :  
    FAME_RATE_SIMPLE(rate)->I_coeff1 =
      coeff_model(rate->global_scale,
		  spent,
		  FAME_RATE_SIMPLE(rate)->activity);
    break;
  case 'P': 
    rate->coeff1 =
      coeff_model(rate->global_scale,
		  spent,
		  FAME_RATE_SIMPLE(rate)->activity);
    break;
  }
}
















--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001  Damien Vincent

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#ifndef __FAME_RATE_SIMPLE_H
#define __FAME_RATE_SIMPLE_H

#include "fame.h"
#include "fame_rate.h"
#include "fame_monitor.h"

#define FAME_RATE_2PASS_POWER 0.5

typedef struct _fame_rate_simple_t_ {
  FAME_EXTENDS(fame_rate_t);
  void (* FAME_OVERLOADED(init))(struct _fame_rate_t_ *rate,
				 int mb_width,
				 int mb_height,
				 int bitrate,
				 char *coding,
				 fame_frame_statistics_t *stats_list,
				 fame_global_statistics_t *global_stats,
				 unsigned int flags);
  void (* FAME_OVERLOADED(enter))(struct _fame_rate_t_ *rate,
				  fame_yuv_t **ref,
				  fame_yuv_t *current,
				  unsigned char *shape,
				  char coding,
				  fame_frame_statistics_t *frame_stats);
  void (* FAME_OVERLOADED(leave))(struct _fame_rate_t_ *rate,
				  int spent);

  
  int I_bits, P_bits;
  int activity;
  float I_coeff1;
} fame_rate_simple_t;

#define FAME_RATE_SIMPLE(x) ((fame_rate_simple_t *) x)

extern FAME_CONSTRUCTOR(fame_rate_simple_t);

#endif







--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier
                            Damien Vincent

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**************************** half-pixel interpolation ***********************/

static short const _mmx_one[] = { 1, 1, 1, 1 };
static unsigned char const _mmx_one_byte[] = {1,1,1,1,1,1,1,1};


static void inline mmx_interpolate_signed(unsigned char **ref,
					  int pitch,
					  int rounding)
{
  int dummy = 0;
  short _mmx_rc[4]; /* rounding control */
  register short *rc = _mmx_rc;

  _mmx_rc[0] = 1 - rounding;
  _mmx_rc[1] = 1 - rounding;
  _mmx_rc[2] = 1 - rounding;
  _mmx_rc[3] = 1 - rounding;

  asm volatile ("movl (%0), %3\n"          /* %3 = ref[0] */
		"pxor %%mm7, %%mm7\n"      /* mm7 = 0 */
		"movq (%3), %%mm0\n"       /* mm0 = [ref] */
		"movq 1(%3), %%mm1\n"      /* mm1 = [ref+1] */
		"movq %%mm0, %%mm2\n"      /* mm2 = mm0 */
		"movq %%mm1, %%mm3\n"      /* mm3 = mm1 */
		"punpcklbw %%mm7, %%mm0\n" /* mm0 = ref[0-3]w */
		"punpcklbw %%mm7, %%mm1\n" /* mm1 = ref[1-4]w */
		"punpckhbw %%mm7, %%mm2\n" /* mm2 = ref[4-7]w */
		"punpckhbw %%mm7, %%mm3\n" /* mm3 = ref[5-8]w */
		"movq %%mm0, %%mm4\n"      /* save ref[0-3] */
		"movq %%mm2, %%mm5\n"      /* save ref[4-7] */
		"paddw %%mm1, %%mm0\n"     /* mm0 = ref[0-3] + ref[1-4] */
		"paddw %%mm3, %%mm2\n"     /* mm2 = ref[4-7] + ref[5-8] */
		"paddw (%2), %%mm0\n"      /* add 1 - residual */
		"paddw (%2), %%mm2\n"      /* add 1 - residual */
		"psrlw $1, %%mm2\n"        /* divide by 2 */
		"psrlw $1, %%mm0\n"        /* divide by 2 */
		"packuswb %%mm2, %%mm0\n"  /* pack to byte and saturate */
		"movq (%3, %1), %%mm1\n"   /* mm1 = [ref+1 line] */
		"movq 1(%3, %1), %%mm3\n"  /* mm3 = [ref+1 line+1] */
		"movq %%mm1, %%mm2\n"      /* mm2 = mm1 */
		"movq %%mm3, %%mm6\n"      /* mm6 = mm3 */
		"punpcklbw %%mm7, %%mm1\n" /* mm1 = ref[0-3+1line] */
		"punpcklbw %%mm7, %%mm3\n" /* mm3 = ref[1-4+1line] */
		"punpckhbw %%mm7, %%mm2\n" /* mm2 = ref[4-7+1line] */
		"punpckhbw %%mm7, %%mm6\n" /* mm6 = ref[5-8+1line] */
		"paddw %%mm1, %%mm4\n"     /* mm4 = ref[0-3]+ref[0-3+1l] */
		"paddw %%mm2, %%mm5\n"     /* mm5 = ref[4-7]+ref[4-7+1l] */
		"paddw (%2), %%mm4\n"      /* add 1 - residual */
		"paddw (%2), %%mm5\n"      /* add 1 - residual */
		"paddw %%mm4, %%mm3\n"     /* mm3 = ref00+ref10+ref11+1-r 0-3*/
		"paddw %%mm5, %%mm6\n"     /* mm6 = ref00+ref10+ref11+1-r 4-7*/
		"psrlw $1, %%mm4\n"        /* divide by 2 */
		"psrlw $1, %%mm5\n"        /* divide by 2 */
		"paddw " ASMSYM "_mmx_one, %%mm3\n"  /* add 1 */
		"paddw " ASMSYM "_mmx_one, %%mm6\n"  /* add 1 */
		"packuswb %%mm5, %%mm4\n"  /* pack to byte and saturate */
		"movq 1(%3), %%mm1\n"      /* mm1 = [ref+1] */
		"movq %%mm1, %%mm2\n"      /* mm2 = mm1 */
		"punpcklbw %%mm7, %%mm1\n" /* mm0 = ref[1-4] */
		"punpckhbw %%mm7, %%mm2\n" /* mm2 = ref[5-8] */
		"paddw %%mm1, %%mm3\n"     /* mm3 = all ref +2-r 0-3*/
		"paddw %%mm2, %%mm6\n"     /* mm6 = all ref +2-r 4-7*/
		"psrlw $2, %%mm3\n"        /* divide by 4 */
		"psrlw $2, %%mm6\n"        /* divide by 4 */
		"packuswb %%mm6, %%mm3\n"  /* pack to byte and saturate */
		"movl 4(%0), %3\n"         /* %3 = ref[1] */
		"movq %%mm0, (%3)\n"       /* store in frame */
		"movl 8(%0), %3\n"         /* %3 = ref[2] */
		"movq %%mm4, (%3)\n"       /* store in frame */
		"movl 12(%0), %3\n"        /* %3 = ref[3] */
		"movq %%mm3, (%3)\n"       /* store in frame */
		: "=r"(ref), "=r"(pitch), "=r"(rc), "=r"(dummy)
		: "0"(ref), "1"(pitch), "2"(rc), "3"(dummy)
		: "memory");
}


static void inline mmx_interpolate_unsigned(unsigned char **ref,
					    int pitch,
					    int rounding)
{
  int dummy = 0;
  short _mmx_rc[4]; /* rounding control */
  register short *rc = _mmx_rc;

  _mmx_rc[0] = 1 - rounding;
  _mmx_rc[1] = 1 - rounding;
  _mmx_rc[2] = 1 - rounding;
  _mmx_rc[3] = 1 - rounding;

  asm volatile (
		"movl (%0), %3\n"          /* %3 = ref[0] */

		"prefetcht0 8(%3)\n"
		"prefetcht0 8(%3, %1)\n"

		"movq (%3), %%mm0\n"       /* mm0 = [ref] */
		"movq 1(%3), %%mm1\n"      /* mm1 = [ref+1] */
		"movq %%mm0, %%mm5\n"
		"pavgb %%mm1, %%mm5\n"

		"movq (%3, %1), %%mm2\n"   /* mm2 = [ref+1 line] */
		"movq 1(%3, %1), %%mm3\n"  /* mm3 = [ref+1 line+1] */
		"movq %%mm2, %%mm6\n"
		"pavgb %%mm0, %%mm6\n"

		"movq %%mm3, %%mm7\n"
		"pavgb %%mm2, %%mm7\n"
		"pavgb %%mm5, %%mm7\n"
		"pand %%mm1, %%mm0\n"
		"pand %%mm3, %%mm2\n"
		"pxor %%mm2, %%mm0\n"
		"pand _mmx_one_byte, %%mm0\n"
		"paddb %%mm0, %%mm7\n"

		"movl 4(%0), %3\n"         /* %3 = ref[1] */
		"movq %%mm5, (%3)\n"       /* store in frame */
		"movl 8(%0), %3\n"         /* %3 = ref[2] */
		"movq %%mm6, (%3)\n"       /* store in frame */
		"movl 12(%0), %3\n"        /* %3 = ref[3] */
		"movq %%mm7, (%3)\n"       /* store in frame */
		: "=r"(ref), "=r"(pitch), "=r"(rc), "=r"(dummy)
		: "0"(ref), "1"(pitch), "2"(rc), "3"(dummy)
		: "memory");
}

/*  half_interpolate                                                         */
/*                                                                           */
/*  Description:                                                             */
/*    Compute half-pel resolution frames from reference frame.               */
/*                                                                           */
/*  Arguments:                                                               */
/*    fame_encoder_t *encoder: the encoder                                   */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline half_interpolate(int width,
				    int height,
				    fame_yuv_t **ref,
				    int rounding)
{
  int x, y, w, h;
  int pitch;
  unsigned char *planes[4];

  /* Note: reference frame is allocated a little larger */
  /*       to allow overflow when doing interpolation   */
  /*       and thus avoid a special case for borders.   */
  /* Note: interpolation is done for Cr and Cb components   */
  /*       too, as it is used when computing the difference */
  /*       in motion compensation, even if it's not used    */
  /*       in motion estimation.                            */

  /* Y component */
  w = width >> 3;
  h = height;
  planes[0] = ref[0]->y;
  planes[1] = ref[1]->y;
  planes[2] = ref[2]->y;
  planes[3] = ref[3]->y;
  pitch = width;

  for(y = 0; y < h; y++) {
    for(x = 0; x < w; x++) {
      mmx_interpolate_unsigned(planes, pitch, rounding);
      planes[0]+=8;
      planes[1]+=8;
      planes[2]+=8;
      planes[3]+=8;
    }
  }

  /* U component */
  w = width >> 4;
  h = height >> 1;
  planes[0] = ref[0]->u;
  planes[1] = ref[1]->u;
  planes[2] = ref[2]->u;
  planes[3] = ref[3]->u;
  pitch = width >> 1;

  for(y = 0; y < h; y++) {
    for(x = 0; x < w; x++) {
      mmx_interpolate_signed(planes, pitch, rounding);
      planes[0]+=8;
      planes[1]+=8;
      planes[2]+=8;
      planes[3]+=8;
    }
  }

  /* V component */
  w = width >> 4;
  h = height >> 1;
  planes[0] = ref[0]->v;
  planes[1] = ref[1]->v;
  planes[2] = ref[2]->v;
  planes[3] = ref[3]->v;
  pitch = width >> 1;

  for(y = 0; y < h; y++) {
    for(x = 0; x < w; x++) {
      mmx_interpolate_signed(planes, pitch, rounding);
      planes[0]+=8;
      planes[1]+=8;
      planes[2]+=8;
      planes[3]+=8;
    }
  }
}

--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/********************** floating point quantisation **************************/

/*  quantize                                                                 */
/*                                                                           */
/*  Description:                                                             */
/*    Postmultiply and quantize a block                                      */
/*                                                                           */
/*  Arguments:                                                               */
/*    short *block: the resulting 8x8 block of integer                       */
/*    dct_t *cache: a 8x8 block to quantize                                  */
/*    dct_t *qmatrix: 8x8 coefficients including postscale and quantisation  */
/*    dct_t *round: 8x8 matrix for correct rounding                          */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline quantize(short *block,
			    dct_t *cache,
			    dct_t *qmatrix,
			    dct_t *round)
{
  int i;

  /* TODO: adjust quantization to avoid overflow at high quality */

#define sign0round(a, b) (((a)<0) ? -(b) : (((a)>0) ? (b)  : 0) )

  for(i = 0; i < 64; i++)
    block[i] = (short) ((cache[i]+sign0round(cache[i], round[i]))*qmatrix[i]);
}

--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/********************** MMX accelerated quantization *************************/

static void inline quantize(dct_t *block,
			    dct_t *cache,
			    dct_t *qmatrix,
			    dct_t *round)
{
/*block[i] = (short) ((cache[i]+sign0round(cache[i], round[i]))*qmatrix[i]); */

#define QUANTIZE_STEP(x,y) \
                "movq 0x" #x "0(%0), %%mm0\n"	/* mm0 = 1st half data */ \
		"pxor %%mm2, %%mm2\n"		/* mm2 = 0             */ \
		"pxor %%mm6, %%mm6\n"		/* mm6 = 0             */ \
                "movq 0x" #y "0(%0), %%mm4\n"	/* mm4 = 3rd half data */ \
		"pcmpgtw %%mm0, %%mm2\n"	/* mm2 = (mm0<0)?0xffff:0 */ \
		"pxor %%mm3, %%mm3\n"		/* mm3 = 0             */ \
		"movq 0x" #x "8(%0), %%mm1\n"	/* mm1 = 2nd half data */ \
		"pcmpgtw %%mm4, %%mm6\n"	/* mm6 = (mm4<0)?0xffff:0 */ \
		"pxor %%mm7, %%mm7\n"		/* mm7 = 0             */ \
		"movq 0x" #y "8(%0), %%mm5\n"	/* mm5 = 4th half data */ \
		"pcmpgtw %%mm1, %%mm3\n"	/* mm3 = (mm1<0)?0xffff:0 */ \
		"pcmpgtw %%mm5, %%mm7\n"	/* mm7 = (mm5<0)?0xffff:0 */ \
                "pxor %%mm2, %%mm0\n"	        /* mm0 = |mm0|-(mm0<0)    */ \
		"pxor %%mm3, %%mm1\n"	        /* mm1 = |mm1|-(mm1<0)    */ \
                "paddw 0x" #x "0(%3), %%mm0\n"	/* mm2 = add rounding     */ \
                "pxor %%mm6, %%mm4\n"	        /* mm4 = |mm4|-(mm4<0)    */ \
		"paddw 0x" #x "8(%3), %%mm1\n"	/* mm3 = add rounding     */ \
		"pxor %%mm7, %%mm5\n"	        /* mm5 = |mm5|-(mm5<0)    */ \
                "paddw 0x" #y "0(%3), %%mm4\n"	/* mm6 = add rounding     */ \
                "psubw %%mm2, %%mm0\n"	        /* mm0 = |1st half data|  */ \
		"paddw 0x" #y "8(%3), %%mm5\n"	/* mm7 = add rounding     */ \
		"psubw %%mm3, %%mm1\n"	        /* mm1 = |2nd half data|  */ \
		"pmulhw 0x" #x "0(%1), %%mm0\n"	/* mm0 *= qmatrix */	\
                "psubw %%mm6, %%mm4\n"	        /* mm4 = |3rd half data|  */ \
		"pmulhw 0x" #x "8(%1), %%mm1\n"	/* mm1 *= qmatrix */	\
		"psubw %%mm7, %%mm5\n"	        /* mm5 = |4th half data|  */ \
		"pmulhw 0x" #y "0(%1), %%mm4\n"	/* mm4 *= qmatrix */	\
                "pxor %%mm2, %%mm0\n"	        /* mm0 = |quant|*sign    */ \
		"pmulhw 0x" #y "8(%1), %%mm5\n"	/* mm5 *= qmatrix */	\
                "psubw %%mm2, %%mm0\n"	        /* mm0 = add sign  */ \
		"pxor %%mm3, %%mm1\n"	        /* mm1 = |quant|*sign    */ \
		"movq %%mm0, 0x" #x "0(%2)\n"	/* 1st half data = mm0 */ \
		"psubw %%mm3, %%mm1\n"	        /* mm1 = add sign  */ \
                "pxor %%mm6, %%mm4\n"	        /* mm4 = |quant|*sign    */ \
		"movq %%mm1, 0x" #x "8(%2)\n"   /* 2nd half data = mm1 */ \
                "psubw %%mm6, %%mm4\n"	        /* mm4 = add sign  */ \
		"pxor %%mm7, %%mm5\n"	        /* mm5 = |quant|*sign    */ \
		"movq %%mm4, 0x" #y "0(%2)\n"	/* 3rd half data = mm4 */ \
		"psubw %%mm7, %%mm5\n"	        /* mm5 = add sign  */ \
		"movq %%mm5, 0x" #y "8(%2)\n"   /* 4th half data = mm5 */


  asm volatile (
		QUANTIZE_STEP(0,1)
		QUANTIZE_STEP(2,3)
		QUANTIZE_STEP(4,5)
		QUANTIZE_STEP(6,7)
		: "=r"(cache), "=r"(qmatrix), "=r"(block), "=r"(round)
		: "0"(cache), "1"(qmatrix), "2"(block), "3"(round)
		: "memory");
}

--- NEW FILE ---
/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

FAME_ALIGNED short const _mmx_128[] = { 128, 128, 128, 128 };
FAME_ALIGNED char const _mmx_128c[] = { 128, 128, 128, 128, 128, 128, 128, 128 };
FAME_ALIGNED short const _mmx_1[] = { 1, 1, 1, 1 };

/* AA&N coefficients */
/* 2*cos(4pi/16) */
/* 2*(cos(2pi/16)-cos(6pi/16)) */
/* 2*(cos(2pi/16)+cos(6pi/16)) */
/* 2*cos(6pi/16) */

#define DCT1 ((short) (1.414213562 * (double)(1 << 14) + .5)) /* 14-bit */
#define DCT2 ((short) (1.082392200 * (double)(1 << 14) + .5)) /* 14-bit */
#define DCT3 ((short) (2.613125925 * (double)(1 << 13) + .5)) /* 13-bit */
#define DCT4 ((short) (0.765366865 * (double)(1 << 15) + .5)) /* 15-bit */

FAME_ALIGNED short const _mmx_cos[] = {
  -DCT2, -DCT2, -DCT2, -DCT2,
   DCT1,  DCT1,  DCT1,  DCT1,
   DCT4,  DCT4,  DCT4,  DCT4,
   DCT3,  DCT3,  DCT3,  DCT3
};

FAME_ALIGNED short const _mmx_icos[] = {
  DCT2, DCT2, DCT2, DCT2,
  DCT1, DCT1, DCT1, DCT1,
  DCT3, DCT3, DCT3, DCT3,
  DCT4, DCT4, DCT4, DCT4
};

Index: AUTHORS
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/AUTHORS,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- AUTHORS	13 Mar 2002 01:14:34 -0000	1.1
+++ AUTHORS	1 Jun 2002 20:23:10 -0000	1.2
@@ -11,4 +11,6 @@
 	shape coding
 Thomas Cougnard <thomas.cougnard at enst-bretagne.fr>
 	MPEG-4 high level syntax
+Yannick Vignon <ye.vignon at enst-bretagne.fr>
+	bitrate control
 

Index: Makefile
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/Makefile,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- Makefile	17 Mar 2002 14:54:49 -0000	1.3
+++ Makefile	1 Jun 2002 20:23:10 -0000	1.4
@@ -3,9 +3,11 @@
 
 include ../config.mak
 
-SRCS	= cpuflags.c fame.c fame_decoder_mpeg.c fame_encoder_mpeg.c fame_motion.c fame_motion_fourstep.c fame_motion_none.c fame_motion_pmvfast.c fame_profile_mpeg.c fame_profile_mpeg1.c fame_profile_mpeg4_shape.c fame_profile_mpeg4_simple.c fame_rate.c fame_shape.c fame_syntax_mpeg1.c fame_syntax_mpeg4.c
+# cpuflags.c fame.c fame_decoder_mpeg.c fame_encoder_mpeg.c fame_motion.c fame_motion_fourstep.c fame_motion_none.c fame_motion_pmvfast.c fame_profile_mpeg.c fame_profile_mpeg1.c fame_profile_mpeg4_shape.c fame_profile_mpeg4_simple.c fame_rate.c fame_shape.c fame_syntax_mpeg1.c fame_syntax_mpeg4.c
+SRCS	= cpuflags.c fame.c fame_decoder_mpeg.c fame_encoder_mpeg.c fame_malloc.c fame_monitor.c fame_motion.c fame_motion_fourstep.c fame_motion_none.c fame_motion_pmvfast.c fame_profile_mpeg.c fame_profile_mpeg1.c fame_profile_mpeg4_shape.c fame_profile_mpeg4_simple.c fame_profile_stats.c fame_rate.c fame_rate_1param.c fame_rate_simple.c fame_shape.c fame_syntax_mpeg1.c fame_syntax_mpeg4.c
+
 OBJS	= $(SRCS:.c=.o)
-CFLAGS  = -fexpensive-optimizations -funroll-loops $(OPTFLAGS) -DPACKAGE=\"libfame\" -DVERSION=\"0.8.9\" -I.
+CFLAGS  = -fexpensive-optimizations -funroll-loops $(OPTFLAGS) -DPACKAGE=\"libfame\" -DVERSION=\"0.9.0\" -I.
 
 ifeq ($(TARGET_MMX),yes)
 CFLAGS += -DHAS_MMX

Index: README
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/README,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- README	13 Mar 2002 01:14:34 -0000	1.1
+++ README	1 Jun 2002 20:23:10 -0000	1.2
@@ -1,4 +1,4 @@
-libFAME version 0.8.9
+libFAME version 0.9.0
 
 DESCRIPTION
 ===========

Index: dct_float.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/dct_float.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- dct_float.h	13 Mar 2002 01:14:34 -0000	1.1
+++ dct_float.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -27,7 +27,7 @@
 /*  the one of FFT, but a bit more complex.                                  */
 /*  Here is a scheme of the algorithm:                                       */
 /*                                                                           */
-/* c0 \-------(+) c00 \--/---(+) c10 ----\/--(+) c20 ...                     */
+/* c0 \------/(+) c00 \--/---(+) c10 ----\/--(+) c20 ...                     */
 /* c1 \\----//(+) c01 \\//---(+) c11 ----/\--(-) c21 ...                     */
 /* c2 \\\--///(+) c02 //\\---(-) c12 -/------(+) c22 ...                     */
 /* c3 \\\\////(+) c03 /--\---(-) c13 /---------------...                     */
@@ -51,10 +51,14 @@
 
 
 /* AA&N coefficients */
-#define  DCT1 0.70711
-#define  DCT2 0.54120
-#define  DCT3 1.30658
-#define  DCT4 0.38268
+/* cos(4pi/16) */
+/* cos(2pi/16)-cos(6pi/16) */
+/* cos(2pi/16)+cos(6pi/16) */
+/* cos(6pi/16) */
+#define  DCT1 0.707106781
+#define  DCT2 0.541196100
+#define  DCT3 1.306562965
+#define  DCT4 0.382683432
 
 /*  dct_aan_row                                                              */
 /*                                                                           */

Index: dct_mmx.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/dct_mmx.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- dct_mmx.h	13 Mar 2002 01:14:34 -0000	1.1
+++ dct_mmx.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -20,28 +20,11 @@
 /* Warning: Didn't check the DCT was IEEE compliant. It is probably not.     */
 /* TODO: Write an IEEE compliant DCT/iDCT                                    */
 
-/* DCT flags */
-/* define to have a more accurate but slower DCT */
-/* #define precision */
-/* define to adjust DCT computation according to the sign */
-#define signbit
-
-/* AA&N coefficients */
-#define COS2  ((short) (0.541196100 * (double)(1 << 15) + .5)) /* 15-bit */
-#define COS4  ((short) (0.707106781 * (double)(1 << 15) + .5)) /* 15-bit */
-#define COS6  ((short) (0.382683433 * (double)(1 << 16) + .5)) /* 16-bit */
-#define COS8  ((short) (0.306562965 * (double)(1 << 16) + .5)) /* 16-bit */
-
-static short const _mmx_cos[] = {
-  -COS2, -COS2, -COS2, -COS2,
-   COS4,  COS4,  COS4,  COS4,
-   COS6,  COS6,  COS6,  COS6,
-   COS8,  COS8,  COS8,  COS8
-};
+#define precision
 
 static void inline dct_aan_pass(dct_t *cache)
 {
-  register unsigned short const *mmx_cos = _mmx_cos;
+  //  register unsigned short const *mmx_cos = _mmx_cos;
   asm volatile (
       ""                                     /* STEP 1 */
       "movq 0x00(%0),     %%mm0\n"           /* load line 0 */
@@ -81,69 +64,58 @@
       "movq %%mm4, %%mm5\n"                  /*  v14 -> mm5 */
       "paddsw %%mm1, %%mm5\n"                /*  v14 + v16 -> mm5 */
 #ifdef precision
-      "psllw $0x02, %%mm5\n"                 /* precision(va0) += 2 bit */
+      "psllw $0x01, %%mm5\n"                 /* precision(va0) += 1 bit */
 #endif
-      "pmulhw 16(%1), %%mm5\n"               /* (v14+v16)*COS6 -> mm5 (va0) */
-      "movq %%mm5, %%mm3\n"                  /* mm5->mm3 */
-      "psraw $0x0f, %%mm3\n"                 /* sign(mm5) -> mm3 */
-      "psubsw %%mm3, %%mm5\n"                /* adjust multiply */
+    "paddw " ASMSYM "_mmx_1, %%mm4\n"   /* + 1 */
+//      "pmulhw 16(%1), %%mm5\n"               /* (v14+v16)*COS6 -> mm5 (va0) */
+      "pmulhw " ASMSYM "_mmx_cos+16, %%mm5\n" /* (v14+v16)*COS6 -> mm5 (va0) */
       ""                                     /* STEP 4 */
 #ifdef precision
-      "psllw $0x03, %%mm6\n"                 /* precision(v22) += 2 bit */
+      "psllw $0x02, %%mm6\n"                 /* precision(v22) += 1 bit */
 #else
       "psllw $0x01, %%mm6\n"                 /* */
 #endif
-      "pmulhw  8(%1), %%mm6\n"               /* 2*v22*A1/2 -> mm6 (v32)*/
-      "movq %%mm6, %%mm3\n"                  /* mm6->mm3 */
-      "psraw $0x0f, %%mm3\n"                 /* sign(mm6) -> mm3 */
-      "psubsw %%mm3, %%mm6\n"                /* adjust multiply */
+    "paddw " ASMSYM "_mmx_1, %%mm4\n"   /* + 1 */
+      //      "pmulhw  8(%1), %%mm6\n"               /* 2*v22*COS4/2 -> mm6 (v32)*/
+      "pmulhw " ASMSYM "_mmx_cos+8, %%mm6\n" /* 2*v22*COS4/2 -> mm6 (v32)*/
 #ifdef precision
-      "psllw $0x03, %%mm2\n"                 /* precision(v15) += 2 bit */
+      "psllw $0x02, %%mm2\n"                 /* precision(v15) += 1 bit */
 #else
       "psllw $0x01, %%mm2\n"                 /* */
 #endif
-      "pmulhw  8(%1), %%mm2\n"               /* 2*v15*A3/2 -> mm2 (v35) */
-      "movq %%mm2, %%mm3\n"                  /* mm2->mm3 */
-      "psraw $0x0f, %%mm3\n"                 /* sign(mm2) -> mm3 */
-      "psubsw %%mm3, %%mm2\n"                /* adjust multiply */
+    "paddw " ASMSYM "_mmx_1, %%mm4\n"   /* + 1 */
+      //      "pmulhw  8(%1), %%mm2\n"               /* 2*v15*COS4/2 -> mm2 (v35) */
+      "pmulhw " ASMSYM "_mmx_cos+8, %%mm2\n" /* 2*v15*COS4/2 -> mm2 (v35) */
 #ifdef precision
-      "psllw $0x03, %%mm4\n"                 /* precision(v14) += 2 bit */
+      "psllw $0x02, %%mm4\n"                 /* precision(v14) += 1 bit */
 #else
       "psllw $0x01, %%mm4\n"                 /* */
 #endif
-      "pmulhw  0(%1), %%mm4\n"               /* 2 * v14 * -A2/2 -> mm4 */
-      "movq %%mm4, %%mm3\n"                  /* mm2->mm3 */
-      "psraw $0x0f, %%mm3\n"                 /* sign(mm2) -> mm3 */
-      "psubsw %%mm3, %%mm4\n"                /* adjust multiply */
-      "psubsw %%mm5, %%mm4\n"                /* v14 * -A2 - va0 -> mm4 (v34) */
+    "paddw " ASMSYM "_mmx_1, %%mm4\n"   /* + 1 */
+      //      "pmulhw  0(%1), %%mm4\n"               /* 2 * v14 * -COS2/2 -> mm4 */
+      "pmulhw " ASMSYM "_mmx_cos, %%mm4\n"   /* 2 * v14 * -COS2/2 -> mm4 */
+      "psubsw %%mm5, %%mm4\n"                /* v14*-COS2 - va0 -> mm4 (v34) */
 #ifdef precision
-      "psllw $0x02, %%mm1\n"                 /* precision(v16) += 2 bit */
+      "psllw $0x01, %%mm1\n"                 /* precision(v16) += 1 bit */
 #endif
       "psubsw %%mm1, %%mm5\n"                /* va0 - v16 -> mm5 */
-      "pmulhw 24(%1), %%mm1\n"               /* v16 * (A4 - 1) -> mm1 */
-      "movq %%mm1, %%mm3\n"                  /* mm2->mm3 */
-      "psraw $0x0f, %%mm3\n"                 /* sign(mm2) -> mm3 */
-      "psubsw %%mm3, %%mm1\n"                /* adjust multiply */
-      "psubsw %%mm5, %%mm1\n"                /* v16 * A4 - va0 -> mm1 (v36) */
+    "paddw " ASMSYM "_mmx_1, %%mm4\n"   /* + 1 */
+      //      "pmulhw 24(%1), %%mm1\n"               /* v16 * (COS8 - 1) -> mm1 */
+      "pmulhw " ASMSYM "_mmx_cos+24, %%mm1\n" /* v16 * (COS8 - 1) -> mm1 */
+      "psubsw %%mm5, %%mm1\n"                /* v16 * COS8 - va0 -> mm1 (v36)*/
       ""                                     /* STEP 5 */
       "movq 0x70(%0), %%mm0\n"               /* retrieve v07 -> mm0 */
 #ifdef precision
-      "psllw $0x02, %%mm7\n"                 /* precision(v13) += 2 bit */
-      "psllw $0x02, %%mm0\n"                 /* precision(v07) += 2 bit */
+      "psllw $0x01, %%mm7\n"                 /* precision(v13) += 1 bit */
+      "psllw $0x01, %%mm0\n"                 /* precision(v07) += 1 bit */
 #endif
       "movq %%mm6, %%mm3\n"                  /* v32 -> mm3 */
       "paddsw %%mm7, %%mm6\n"                /* v13 + v32 -> mm6 (v42) */
       "psubsw %%mm3, %%mm7\n"                /* v13 - v32 -> mm7 (v43) */
 #ifdef precision
-      "psraw $0x02, %%mm6\n"                 /* precision(v42) -= 2 bit */
-      "psraw $0x02, %%mm7\n"                 /* precision(v43) -= 2 bit */
+      "psraw $0x01, %%mm6\n"                 /* precision(v42) -= 1 bit */
+      "psraw $0x01, %%mm7\n"                 /* precision(v43) -= 1 bit */
 #endif
-      "movq %%mm6, %%mm3\n"                  /* mm2->mm3 */
-      "psraw $0x0f, %%mm3\n"                 /* sign(mm2) -> mm3 */
-      "psubsw %%mm3, %%mm6\n"                /* adjust multiply */
-      "movq %%mm7, %%mm3\n"                  /* mm2->mm3 */
-      "psraw $0x0f, %%mm3\n"                 /* sign(mm2) -> mm3 */
-      "psubsw %%mm3, %%mm7\n"                /* adjust multiply */
       "movq %%mm6, 0x20(%0)\n"               /* store line 2 */
       "movq %%mm7, 0x60(%0)\n"               /* store line 6 */
       "movq %%mm2, %%mm5\n"                  /* v35 -> mm5 */
@@ -157,29 +129,17 @@
       "paddsw %%mm2, %%mm1\n"                /* v45 + v36 -> mm1 (v55) */
       "psubsw %%mm5, %%mm2\n"                /* v45 - v36 -> mm2 (v56) */
 #ifdef precision
-      "psraw $0x02, %%mm4\n"                 /* precision(v54) -= 2 bit */
-      "psraw $0x02, %%mm0\n"                 /* precision(v57) -= 2 bit */
-      "psraw $0x02, %%mm1\n"                 /* precision(v55) -= 2 bit */
-      "psraw $0x02, %%mm2\n"                 /* precision(v56) -= 2 bit */
-#endif
-      "movq %%mm4, %%mm3\n"                  /* mm4->mm3 */
-      "psraw $0x0f, %%mm3\n"                 /* sign(mm4) -> mm3 */
-      "psubsw %%mm3, %%mm4\n"                /* adjust multiply */
-      "movq %%mm0, %%mm3\n"                  /* mm0->mm3 */
-      "psraw $0x0f, %%mm3\n"                 /* sign(mm0) -> mm3 */
-      "psubsw %%mm3, %%mm0\n"                /* adjust multiply */
-      "movq %%mm1, %%mm3\n"                  /* mm1->mm3 */
-      "psraw $0x0f, %%mm3\n"                 /* sign(mm1) -> mm3 */
-      "psubsw %%mm3, %%mm1\n"                /* adjust multiply */
-      "movq %%mm2, %%mm3\n"                  /* mm2->mm3 */
-      "psraw $0x0f, %%mm3\n"                 /* sign(mm2) -> mm3 */
-      "psubsw %%mm3, %%mm2\n"                /* adjust multiply */
+      "psraw $0x01, %%mm4\n"                 /* precision(v54) -= 1 bit */
+      "psraw $0x01, %%mm0\n"                 /* precision(v57) -= 1 bit */
+      "psraw $0x01, %%mm1\n"                 /* precision(v55) -= 1 bit */
+      "psraw $0x01, %%mm2\n"                 /* precision(v56) -= 1 bit */
+#endif
       "movq %%mm1, 0x10(%0)\n"               /* store line 1 */
       "movq %%mm0, 0x30(%0)\n"               /* store line 3 */
       "movq %%mm4, 0x50(%0)\n"               /* store line 5 */
       "movq %%mm2, 0x70(%0)\n"               /* store line 7 */
-      : "=r"(cache), "=r"(mmx_cos)
-      : "0"(cache), "1"(mmx_cos)
+      : "=r"(cache)/*, "=r"(mmx_cos)*/
+      : "0"(cache)/*, "1"(mmx_cos)*/
       : "memory");
 }
 

Index: fame.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -22,10 +22,12 @@
 #include <string.h>
 
 #include "fame.h"
+#include "fame_malloc.h"
 #include "cpuflags.h"
 #include "fame_profile_mpeg1.h"
 #include "fame_profile_mpeg4_simple.h"
 #include "fame_profile_mpeg4_shape.h"
+#include "fame_profile_stats.h"
 #include "fame_syntax.h"
 #include "fame_syntax_mpeg1.h"
 #include "fame_syntax_mpeg4.h"
@@ -37,6 +39,13 @@
 #include "fame_encoder_mpeg.h"
 #include "fame_decoder_mpeg.h"
 #include "fame_rate.h"
+#include "fame_rate_simple.h"
+#include "fame_rate_1param.h"
+#include "fame_monitor.h"
+
+#if defined(HAS_MMX)
+#include "table_mmx_const.h"
+#endif
 
 /* version information */
 const unsigned int libfame_major_version = LIBFAME_MAJOR_VERSION,
@@ -49,6 +58,7 @@
   fame_profile_mpeg1_t *profile_mpeg1;
   fame_profile_mpeg4_simple_t *profile_mpeg4_simple;
   fame_profile_mpeg4_shape_t *profile_mpeg4_shape;
+  fame_profile_stats_t *profile_stats;
   fame_encoder_mpeg_t *encoder_mpeg;
   fame_decoder_mpeg_t *decoder_mpeg;
   fame_motion_none_t *motion_none;
@@ -58,6 +68,13 @@
   fame_syntax_mpeg4_t *syntax_mpeg4;
   fame_shape_t *shape;
   fame_rate_t *rate;
+  fame_rate_simple_t *rate_simple;
+  fame_rate_1param_t *rate_1param;
+  fame_monitor_t *monitor;
+  /* for DEPRECATED fame_encode_frame */
+  int fame_encode_frame_first_call;
+  int slices_per_frame;
+  fame_frame_statistics_t stats;
 };
 
 /*  fame_open                                                                */
@@ -76,14 +93,15 @@
   fame_context_t *context;
   
   /* Initialize context */
-  context = (fame_context_t *) malloc(sizeof(fame_context_t));
+  context = (fame_context_t *) fame_malloc(sizeof(fame_context_t));
     
   /* Build built_in object list */
   context->type_list = NULL;
-  context->priv = (struct _fame_private_t_ *) malloc(sizeof(struct _fame_private_t_));
+  context->priv = (struct _fame_private_t_ *) fame_malloc(sizeof(struct _fame_private_t_));
   context->priv->profile_mpeg1 = FAME_NEW(fame_profile_mpeg1_t);
   context->priv->profile_mpeg4_simple = FAME_NEW(fame_profile_mpeg4_simple_t);
   context->priv->profile_mpeg4_shape = FAME_NEW(fame_profile_mpeg4_shape_t);
+  context->priv->profile_stats = FAME_NEW(fame_profile_stats_t);
   context->priv->encoder_mpeg = FAME_NEW(fame_encoder_mpeg_t);
   context->priv->decoder_mpeg = FAME_NEW(fame_decoder_mpeg_t);
   context->priv->motion_none = FAME_NEW(fame_motion_none_t);
@@ -93,6 +111,9 @@
   context->priv->syntax_mpeg4 = FAME_NEW(fame_syntax_mpeg4_t);
   context->priv->shape = FAME_NEW(fame_shape_t);
   context->priv->rate = FAME_NEW(fame_rate_t);
+  context->priv->rate_simple = FAME_NEW(fame_rate_simple_t);
+  context->priv->rate_1param = FAME_NEW(fame_rate_1param_t);
+  context->priv->monitor = FAME_NEW(fame_monitor_t);
 
   /* built-in profiles */
   fame_register(context, "profile", FAME_OBJECT(context->priv->profile_mpeg1));
@@ -100,6 +121,7 @@
   fame_register(context, "profile/mpeg4", FAME_OBJECT(context->priv->profile_mpeg4_simple));
   fame_register(context, "profile/mpeg4/simple", FAME_OBJECT(context->priv->profile_mpeg4_simple));
   fame_register(context, "profile/mpeg4/shape", FAME_OBJECT(context->priv->profile_mpeg4_shape));
+  fame_register(context, "profile/stats", FAME_OBJECT(context->priv->profile_stats));
   /* built-in encoders */
   fame_register(context, "encoder", FAME_OBJECT(context->priv->encoder_mpeg));
   fame_register(context, "encoder/mpeg", FAME_OBJECT(context->priv->encoder_mpeg));
@@ -118,7 +140,10 @@
   /* built-in shape coders */
   fame_register(context, "shape", FAME_OBJECT(context->priv->shape));
   /* built-in rate controllers */
-  fame_register(context, "rate", FAME_OBJECT(context->priv->rate));
+  fame_register(context, "rate", FAME_OBJECT(context->priv->rate_1param));
+  fame_register(context, "rate/simple", FAME_OBJECT(context->priv->rate_simple));
+  fame_register(context, "rate/1param", FAME_OBJECT(context->priv->rate_1param));
+  fame_register(context, "monitor", FAME_OBJECT(context->priv->monitor));
 
   return(context);
 }
@@ -144,7 +169,7 @@
 
   if(fame_get_object(context, type))
     fame_unregister(context, type);
-  context->type_list = (fame_list_t *) malloc(sizeof(fame_list_t));
+  context->type_list = (fame_list_t *) fame_malloc(sizeof(fame_list_t));
   context->type_list->next = next;
   context->type_list->type = type;
   context->type_list->item = object;
@@ -172,7 +197,7 @@
 	context->type_list = list->next;
       else
 	last->next = list->next;
-      free(list);
+      fame_free(list);
       return;
     }
     last = list;
@@ -229,7 +254,7 @@
 
   /* Print information message */
   if(p->verbose) {
-    FAME_INFO("libfame %s Copyright (C) 2000-2001 Vivien Chappelier\n",
+    FAME_INFO("libfame %s Copyright (C) 2000-2002 Vivien Chappelier\n",
 	      LIBFAME_VERSION);
     FAME_INFO("This library is provided under the terms of the LGPL. "
 	      "See COPYING for details\n");
@@ -242,13 +267,17 @@
     FAME_FATAL("could not find 'profile'\n");
 
   if(p->verbose) {
-    FAME_INFO("%s %dx%d @ %d/%d fps %s sequence %d%% quality\n",
+    FAME_INFO("%s %dx%d @ %.2f fps %d%% quality ",
 	      context->profile->name,
 	      p->width, p->height,
-	      p->frame_rate_num,
-	      p->frame_rate_den,
-	      p->coding,
-	      p->quality);
+	      (float)p->frame_rate_num/(float)p->frame_rate_den,
+	      p->quality); 
+    if(p->search_range)
+      FAME_INFO("%d pixel search range\n", p->search_range);
+    else
+      FAME_INFO("adaptive search range\n");
+
+    FAME_INFO("%s coding sequence\n", p->coding);
   }
 
   FAME_PROFILE(context->profile)->init(FAME_PROFILE(context->profile), context, p, buffer, size);
@@ -267,12 +296,16 @@
   if(p->verbose)
     FAME_INFO("Using floating point arithmetic\n");
 #endif
+
+  /* for DEPRECATED fame_encode_frame */
+  context->priv->fame_encode_frame_first_call = 1;
+  context->priv->slices_per_frame = p->slices_per_frame;
 }
 
-/*  fame_encode_frame                                                        */
+/*  fame_start_frame                                                         */
 /*                                                                           */
 /*  Description:                                                             */
-/*    Encode a single frame.                                                 */
+/*    Start encoding a frame.                                                */
 /*                                                                           */
 /*  Arguments:                                                               */
 /*    fame_context_t * context: the context handle returned by fame_open     */
@@ -280,13 +313,47 @@
 /*    unsigned char * mask: the input mask (0 = transparent, 255 = opaque)   */
 /*                                                                           */
 /*  Return value:                                                            */
-/*    int : the number of bytes written to buffer                            */
+/*    None.                                                                  */
 
-int fame_encode_frame(fame_context_t *context,
+void fame_start_frame(fame_context_t *context,
 		      fame_yuv_t *yuv,
 		      unsigned char *mask)
 {
-  return(FAME_PROFILE(context->profile)->encode(FAME_PROFILE(context->profile), yuv, mask));
+  FAME_PROFILE(context->profile)->enter(FAME_PROFILE(context->profile), yuv, mask);
+}
+
+/*  fame_encode_slice                                                        */
+/*                                                                           */
+/*  Description:                                                             */
+/*    Encode a slice of a frame.                                             */
+/*                                                                           */
+/*  Arguments:                                                               */
+/*    fame_context_t * context: the context handle returned by fame_open     */
+/*                                                                           */
+/*  Return value:                                                            */
+/*    int : the number of bytes written to buffer                            */
+
+int fame_encode_slice(fame_context_t *context)
+{
+  return(FAME_PROFILE(context->profile)->encode(FAME_PROFILE(context->profile)));
+}
+
+/*  fame_end_frame                                                           */
+/*                                                                           */
+/*  Description:                                                             */
+/*    Finish encoding of a frame.                                            */
+/*                                                                           */
+/*  Arguments:                                                               */
+/*    fame_context_t * context: the context handle returned by fame_open     */
+/*    fame_frame_statistics_t * stats: information about the encoding        */
+/*                                                                           */
+/*  Return value:                                                            */
+/*    None.                                                                  */
+
+void fame_end_frame(fame_context_t *context,
+		    fame_frame_statistics_t *stats)
+{
+  FAME_PROFILE(context->profile)->leave(FAME_PROFILE(context->profile), stats);
 }
 
 /*  fame_close                                                               */
@@ -314,14 +381,15 @@
     while(l->next != NULL) {
       p = l;
       l = l->next;
-      free(p);
+      fame_free(p);
     }
-    free(l);
+    fame_free(l);
   }
   
   FAME_DELETE(context->priv->profile_mpeg1);
   FAME_DELETE(context->priv->profile_mpeg4_simple);
   FAME_DELETE(context->priv->profile_mpeg4_shape);
+  FAME_DELETE(context->priv->profile_stats);
   FAME_DELETE(context->priv->encoder_mpeg);
   FAME_DELETE(context->priv->decoder_mpeg);
   FAME_DELETE(context->priv->motion_none);
@@ -331,11 +399,91 @@
   FAME_DELETE(context->priv->syntax_mpeg4);
   FAME_DELETE(context->priv->shape);
   FAME_DELETE(context->priv->rate);
-  free(context->priv);
+  FAME_DELETE(context->priv->rate_simple);
+  FAME_DELETE(context->priv->rate_1param);
+  FAME_DELETE(context->priv->monitor);
+
+  fame_free(context->priv);
  
-  free(context);
+  fame_free(context);
 
   return(bytes_written);
 }
 
+/* DEPRECATED */
+int fame_encode_frame(fame_context_t *context,
+		      fame_yuv_t *yuv,
+		      unsigned char *mask)
+{
+  if(context->priv->fame_encode_frame_first_call) {
+    context->priv->fame_encode_frame_first_call = 0;
+    fprintf(stderr,
+	    "usage of fame_encode_frame is deprecated\n"
+	    "please use fame_start_frame, fame_encode_slice\n"
+	    "and fame_end_frame functions instead\n");
+  }
+  if(context->priv->slices_per_frame != 1) {
+    fprintf(stderr,
+	    "fame_encode_frame doesn't work when slices_per_frame != 1\n");
+    memset(&context->priv->stats, 0, sizeof(context->priv->stats));
+    return(context->priv->stats.actual_bits/8);
+  }    
+   
+  fame_start_frame(context, yuv, mask); 
+  fame_encode_slice(context);
+  fame_end_frame(context, &context->priv->stats);
+
+  return(context->priv->stats.actual_bits/8);
+}
+
+#if !defined(__GNUC__)
+
+#include <stdarg.h>
+
+/* va_* based error management by Petter Reinholdtsen */
+int
+FAME_INFO(const char *format, ...)
+{
+  va_list va;
+  va_start(va, format);
+  vfprintf(stderr, format, va);
+  va_end(va);
+}
 
+int FAME_WARNING(const char *format, ...)
+{
+  va_list va;
+  fprintf(stderr, "Warning: ");
+  va_start(va, format);
+  vfprintf(stderr, format, va);
+  va_end(va);
+}
+
+int FAME_ERROR(const char *format, ...)
+{
+  va_list va;
+  fprintf(stderr, "Error: ");
+  va_start(va, format);
+  vfprintf(stderr, format, va);
+  va_end(va);
+}
+
+int FAME_FATAL(const char *format, ...)
+{
+  va_list va;
+  fprintf(stderr, "Fatal: ");
+  va_start(va, format);
+  vfprintf(stderr, format, va);
+  va_end(va);
+  exit(-1);
+}
+
+#endif /* not __GNUC__ */
+
+
+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ <= 95 && __GNUC_PATCHLEVEL__ <= 3)
+/* gcc bug?? workaround */
+void __fame_dummy_call(int q)
+{
+}
+#endif

Index: fame.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- fame.h	27 Apr 2002 22:42:25 -0000	1.2
+++ fame.h	1 Jun 2002 20:23:10 -0000	1.3
@@ -48,6 +48,18 @@
 # endif
 #endif
 
+/* Alignment */
+#if defined(__GNUC__)
+#ifdef WIN32
+#define FAME_ALIGNMENT 16
+#else
+#define FAME_ALIGNMENT 32
+#endif
+#define FAME_ALIGNED __attribute__ ((__aligned__(FAME_ALIGNMENT)))
+#else
+#define FAME_ALIGNED
+#endif
+
 /* Error management */
 #if defined(__GNUC__)
 #define FAME_INFO(format, args...) \
@@ -58,13 +70,20 @@
       fprintf(stderr, "Error: " format, ##args)
 #define FAME_FATAL(format, args...) \
       { fprintf(stderr, "Fatal: " format, ##args); exit(-1); }
+#else /* not __GNUC__ */
+/* No vararg macros */
+int FAME_INFO(const char *format, ...);
+int FAME_WARNING(const char *format, ...);
+int FAME_ERROR(const char *format, ...);
+int FAME_FATAL(const char *format, ...);
+
 #endif
 
 #ifndef fame_min
-#define fame_min(X,Y) ((X) < (Y) ? (X) : (Y))
+#define fame_min(X,Y) (((X) < (Y)) ? (X) : (Y))
 #endif
 #ifndef fame_max
-#define fame_max(X,Y) ((X) > (Y) ? (X) : (Y))
+#define fame_max(X,Y) (((X) > (Y)) ? (X) : (Y))
 #endif
 
 /* object management */
@@ -75,7 +94,7 @@
 #define FAME_OVERLOADED(x) super_ ## x
 
 typedef struct _fame_yuv_t_ {
-  unsigned int w, h;
+  unsigned int w, h, p;
   unsigned char *y;
   unsigned char *u;
   unsigned char *v;
@@ -117,6 +136,8 @@
 typedef enum { fame_mismatch_local, fame_mismatch_global } fame_mismatch_t;
 
 typedef struct _fame_context_t_ fame_context_t;
+typedef struct _fame_frame_statistics_t_ fame_frame_statistics_t;
+typedef struct _fame_global_statistics_t_ fame_global_statistics_t;
 typedef struct _fame_parameters_t_ fame_parameters_t;
 
 /******************************* object type *********************************/
@@ -140,6 +161,26 @@
   struct _fame_private_t_ *priv;
 };
 
+/******************************** statistics *********************************/
+
+struct _fame_frame_statistics_t_ {
+  unsigned int frame_number;
+  char coding;
+  signed int target_bits;
+  unsigned int actual_bits;
+  unsigned int spatial_activity;
+  unsigned int quant_scale;
+};
+
+
+struct _fame_global_statistics_t_ {
+  unsigned int total_frames;
+  unsigned int target_rate;
+  unsigned int actual_rate;
+  unsigned int mean_spatial_activity;
+};
+
+
 /******************************** parameters *********************************/
 
 struct _fame_parameters_t_ {
@@ -147,7 +188,7 @@
   int height;                       /* height of the video sequence */
   char const *coding;               /* coding sequence */
   int quality;                      /* video quality */
-  int bitrate;
+  int bitrate;                      /* video bitrate (0=VBR)*/
   int slices_per_frame;             /* number of slices per frame */
   unsigned int frames_per_sequence; /* number of frames per sequence */
   int frame_rate_num;               /* numerator of frames per second */
@@ -156,6 +197,8 @@
   unsigned int search_range;        /* motion estimation search range */
   unsigned char verbose;            /* verbosity */
   char const *profile;              /* profile name */
+  unsigned int total_frames;        /* total number of frames */
+  int (* retrieve_cb)(fame_frame_statistics_t *stats);
 };
 
 #define FAME_PARAMETERS_INITIALIZER {		                             \
@@ -169,8 +212,11 @@
   25,                                   /* 25 frames/second */               \
   1,                                    /* /1 */                             \
   100,                                  /* original shape */                 \
-  16,                                   /* 16 pixel wide search range */     \
-  1                                     /* verbose mode */                   \
+  0,                                    /* adaptative search range */        \
+  1,                                    /* verbose mode */                   \
+  "mpeg4",                              /* profile name */                   \
+  0,                                    /* number of frames */               \
+  NULL                                  /* stats retrieval callback */       \
 }
 
 /***************************** function prototypes ***************************/
@@ -196,10 +242,21 @@
 			       unsigned char *buffer,
 			       unsigned int size);
 
-extern DECLSPEC int fame_encode_frame(fame_context_t *context,
+extern DECLSPEC void fame_start_frame(fame_context_t *context,
 				      fame_yuv_t *yuv,
-				      unsigned char *shape);
+				      unsigned char *mask);
+  
+extern DECLSPEC int fame_encode_slice(fame_context_t *context);
+
+extern DECLSPEC void fame_end_frame(fame_context_t *context,
+				    fame_frame_statistics_t *stats);
+
 extern DECLSPEC int fame_close(fame_context_t *context);
+
+/* DEPRECATED */
+extern DECLSPEC int fame_encode_frame(fame_context_t *context,
+				      fame_yuv_t *yuv,
+				      unsigned char *mask);
 
 #ifdef __cplusplus
 }

Index: fame_bitbuffer.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_bitbuffer.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_bitbuffer.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_bitbuffer.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -31,7 +31,7 @@
 /* Note: This code will work on >i486 only as i386 doesn't support the bswap */
 /* instruction. Alternatives (using xchg for example) could be written for   */
 /* i386.                                                                     */
-#define bitbuffer_write(bb, c, l)					\
+#define fast_bitbuffer_write(data, shift, c, l)				\
 {									\
   int d;								\
 									\
@@ -46,20 +46,20 @@
       "lea   (%0, %%ecx, 4), %0\n" /* data += (ecx>32) */		\
       "andl $31, %1\n"             /* mask shift */			\
       "orl %3, (%0)\n"             /* put last 32 bits */		\
-      : "=r"((bb)->data), "=r"((bb)->shift), "=a"(d), "=d"(d), "=c"(d)	\
-      : "0"((bb)->data), "1"((bb)->shift), "2"((unsigned long) c), "3"(0), "c"((unsigned long) l)	\
+      : "=r"(data), "=r"(shift), "=a"(d), "=d"(d), "=c"(d)	\
+      : "0"(data), "1"(shift), "2"((unsigned long) c), "3"(0), "c"((unsigned long) l) \
       : "memory");							\
 }
 #else
-#define bitbuffer_write(bb, d, size)                    \
+#define fast_bitbuffer_write(data, shift, d, size)               \
 {							\
   /* assume size != 0 */				\
   unsigned char * ptr;					\
   unsigned char left;					\
   unsigned long c;					\
   							\
-  ptr = (bb)->data + ((bb)->shift >> 3); 	        \
-  left = 8 - ((bb)->shift & 7);			        \
+  ptr = data + ((shift) >> 3); 	        \
+  left = 8 - ((shift) & 7);			        \
 							\
   /* left align */					\
   c = (((unsigned long) (d)) << (32 - size));           \
@@ -74,11 +74,13 @@
   c <<= 8;						\
   *ptr++ |= (c >> 24);					\
   							\
-  (bb)->shift += (size);				\
-  (bb)->data += ((((bb)->shift) >> 5) << 2);	        \
-  (bb)->shift &= 31;				        \
+  shift += (size);				\
+  data += (((shift) >> 5) << 2);	        \
+  shift &= 31;				        \
 }
 #endif
+
+#define bitbuffer_write(bb, c, l) fast_bitbuffer_write((bb)->data, (bb)->shift, c, l)
 
 #define bitbuffer_init(bb, d, size)			\
 {						        \

Index: fame_decoder.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_decoder.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_decoder.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_decoder.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -27,6 +27,8 @@
 				int height,
 				unsigned char *intra_quantisation_table,
 				unsigned char *inter_quantisation_table,
+				unsigned char *intra_dc_y_scale_table,
+				unsigned char *intra_dc_c_scale_table,
 				fame_mismatch_t mismatch_type);
   void (* enter)               (struct _fame_decoder_t_ *decoder,
 				fame_yuv_t **past_ref,
@@ -34,14 +36,11 @@
 				fame_yuv_t **future_ref,
 				fame_yuv_t *yuv,
 				unsigned char *shape);
-  void (* set_quantisation)    (struct _fame_decoder_t_ *decoder,
-				unsigned char quant_scale,
-				unsigned char intra_y_scale,
-				unsigned char intra_c_scale);
   void (* reconstruct_intra_mb)(struct _fame_decoder_t_ *decoder,
 				short x,
 				short y,
 				short *blocks[6],
+				unsigned char q,
 				fame_bab_t bab_type);
   void (* reconstruct_inter_mb)(struct _fame_decoder_t_ *decoder,
 				short x,
@@ -50,8 +49,10 @@
 				fame_motion_vector_t *forward,
 				fame_motion_vector_t *backward,
 				fame_motion_coding_t motion_coding,
+				unsigned char q,
 				fame_bab_t bab_type);
   void (* pad)                 (struct _fame_decoder_t_ *decoder,
+				unsigned char *bab_map,
 				fame_box_t *box);
   void (* interpolate)         (struct _fame_decoder_t_ *decoder,
 				int rounding);

Index: fame_decoder_mpeg.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_decoder_mpeg.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_decoder_mpeg.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_decoder_mpeg.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -21,6 +21,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "fame.h"
+#include "fame_malloc.h"
 #include "fame_decoder.h"
 #include "fame_decoder_mpeg.h"
 #include "table_scale.h"
@@ -29,7 +30,7 @@
 #define arch_leave_state() asm("emms")
 #include "transpose_mmx.h"
 #include "idct_mmx.h"
-#include "dequantise_mmx.h"
+#include "dequantize_mmx.h"
 #include "reconstruct_mmx.h"
 #include "pad_int.h" /* TODO */
 #include "half_mmx.h"
@@ -37,7 +38,7 @@
 #define arch_enter_state() 
 #define arch_leave_state() 
 #include "idct_float.h"
-#include "dequantise_float.h"
+#include "dequantize_float.h"
 #include "reconstruct_float.h"
 #include "pad_int.h"
 #include "half_int.h"
@@ -48,6 +49,8 @@
 		      int height,
 		      unsigned char *intra_quantisation_table,
 		      unsigned char *inter_quantisation_table,
+		      unsigned char *intra_dc_y_scale_table,
+		      unsigned char *intra_dc_c_scale_table,
 		      fame_mismatch_t mismatch_type);
 static void mpeg_enter(fame_decoder_t *decoder,
 			fame_yuv_t **past_ref,
@@ -55,24 +58,23 @@
 			fame_yuv_t **future_ref,
 			fame_yuv_t *yuv,
 			unsigned char *shape);
-static void mpeg_set_quantisation(fame_decoder_t *decoder,
-				  unsigned char quant_scale,
-				  unsigned char intra_y_scale,
-				  unsigned char intra_c_scale);
 static void mpeg_reconstruct_intra_mb(fame_decoder_t *decoder,
-				       short x,
-				       short y,
-				       short *blocks[6],
-				       fame_bab_t bab_type);
+				      short x,
+				      short y,
+				      short *blocks[6],
+				      unsigned char q,
+				      fame_bab_t bab_type);
 static void mpeg_reconstruct_inter_mb(fame_decoder_t *decoder,
-				       short x,
-				       short y,
-				       short *blocks[6],
-				       fame_motion_vector_t *forward,
-				       fame_motion_vector_t *backward,
-				       fame_motion_coding_t motion_coding,
-				       fame_bab_t bab_type);
+				      short x,
+				      short y,
+				      short *blocks[6],
+				      fame_motion_vector_t *forward,
+				      fame_motion_vector_t *backward,
+				      fame_motion_coding_t motion_coding,
+				      unsigned char q,
+				      fame_bab_t bab_type);
 static void mpeg_pad(fame_decoder_t *decoder,
+		     unsigned char *bab_map,
 		     fame_box_t *box);
 static void mpeg_interpolate(fame_decoder_t *decoder, int rounding);
 static void mpeg_leave(fame_decoder_t *decoder);
@@ -83,7 +85,6 @@
   FAME_OBJECT(this)->name = "MPEG decoder";
   FAME_DECODER(this)->init = mpeg_init;
   FAME_DECODER(this)->enter = mpeg_enter;
-  FAME_DECODER(this)->set_quantisation = mpeg_set_quantisation;
   FAME_DECODER(this)->reconstruct_intra_mb = mpeg_reconstruct_intra_mb;
   FAME_DECODER(this)->reconstruct_inter_mb = mpeg_reconstruct_inter_mb;
   FAME_DECODER(this)->pad = mpeg_pad;
@@ -104,6 +105,8 @@
 /*    int height: height of the frame                                        */
 /*    unsigned char *intra_quantisation_table: quantisation matrix for intra */
 /*    unsigned char *inter_quantisation_table: quantisation matrix for inter */
+/*    unsigned char *intra_dc_y_scale_table: quantisation table for DC of Y  */
+/*    unsigned char *intra_dc_c_scale_table: quantisation table for DC of C  */
 /*    fame_mismatch_t mismatch_type: type of mismatch control                */
 /*                                                                           */
 /*  Return value:                                                            */
@@ -112,22 +115,69 @@
 static void mpeg_init(fame_decoder_t *decoder,
 		      int width,
 		      int height,
-		      unsigned char *intra_quantisation_table,
-		      unsigned char *inter_quantisation_table,
+		      unsigned char *iqtable,
+		      unsigned char *niqtable,
+		      unsigned char *intra_dc_y_scale_table,
+		      unsigned char *intra_dc_c_scale_table,
 		      fame_mismatch_t mismatch_type)
 {
   fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);
+  int i, q;
 
   /* set width and height */
   decoder_mpeg->width = width;
   decoder_mpeg->height = height;
-
-  /* allocate padded shape buffer */
-  decoder_mpeg->padded = (unsigned char *) malloc(decoder_mpeg->width*
-						   decoder_mpeg->height);
-  decoder_mpeg->intra_quantisation_table = intra_quantisation_table;
-  decoder_mpeg->inter_quantisation_table = inter_quantisation_table;
   decoder_mpeg->mismatch = mismatch_type;
+#ifdef HAS_MMX
+  if(mismatch_type == fame_mismatch_global)
+    for(i = 0; i < 6; i++) {
+      decoder_mpeg->mismatch_accumulator[i] = 
+	(dct_t *) fame_malloc((decoder_mpeg->width>>3)*
+			      (decoder_mpeg->height>>3)*sizeof(dct_t));
+      memset(decoder_mpeg->mismatch_accumulator[i], 0,
+	     (decoder_mpeg->width>>3)*
+	     (decoder_mpeg->height>>3)*sizeof(dct_t));
+    }
+#endif
+    
+  /* compute quantization matrixes */
+  for(q = 1; q < 32; q++) {
+    /* compute the intra quantisation and dequantisation DC scaler */
+#ifdef HAS_MMX
+    asm("emms");
+    decoder_mpeg->yidqmatrixes[q][0] =
+      (short) (intra_dc_y_scale_table[q] << 3);
+    decoder_mpeg->cidqmatrixes[q][0] =
+      (short) (intra_dc_c_scale_table[q] << 3);
+#else
+    decoder_mpeg->yidqmatrixes[q][0] = intra_dc_y_scale_table[q];
+    decoder_mpeg->cidqmatrixes[q][0] = intra_dc_c_scale_table[q];
+#endif
+
+    /* compute the intra quantisation and dequantisation matrix */
+    for(i = 1; i < 64; i++)
+    {
+#ifdef HAS_MMX
+      decoder_mpeg->yidqmatrixes[q][i] = decoder_mpeg->cidqmatrixes[q][i] =
+	(short) q*iqtable[i];
+#else
+      decoder_mpeg->yidqmatrixes[q][i] = decoder_mpeg->cidqmatrixes[q][i] =
+	q*iqtable[i];
+#endif
+    }
+
+    /* compute the inter quantisation and dequantisation matrix */
+    for(i = 0; i < 64; i++)
+    {
+#ifdef HAS_MMX
+      decoder_mpeg->nidqmatrixes[q][i] = (short) q*niqtable[i];
+      decoder_mpeg->psmatrix[i] = (short) ((double)(1UL << 16) * prescale[i] + 0.5);
+#else
+      decoder_mpeg->nidqmatrixes[q][i] = q*niqtable[i];
+      decoder_mpeg->psmatrix[i] = prescale[i];
+#endif
+    }		     
+  }
 }
 
 /*  mpeg_enter                                                               */
@@ -164,79 +214,42 @@
   arch_enter_state();
 }
 
-/*  mpeg_set_quantisation                                                    */
+/*  mpeg_pad_mb                                                              */
 /*                                                                           */
 /*  Description:                                                             */
-/*    Updates the matrices used for quantisation.                            */
+/*    Perform repetitive padding for motion estimation on a border block.    */
 /*                                                                           */
 /*  Arguments:                                                               */
 /*    fame_decoder_t *decoder: the decoder                                   */
-/*    unsigned char quant_scale: quantisation scale                          */
-/*    unsigned char intra_y_scale: intra DC scaler for Y component           */
-/*    unsigned char intra_c_scale: intra DC scaler for U and V components    */
+/*    short x: the x location of the macroblock in macroblock units          */
+/*    short y: the y location of the macroblock in macroblock units          */
+/*    fame_box_t box: bounding box                                           */
+/*                                                                           */
 /*  Return value:                                                            */
 /*    None.                                                                  */
 
-static void mpeg_set_quantisation(fame_decoder_t *decoder,
-				  unsigned char quant_scale,
-				  unsigned char intra_y_scale,
-				  unsigned char intra_c_scale)
+static void mpeg_pad_mb(fame_decoder_t *decoder,
+			short x,
+			short y)
 {
   fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);
-  unsigned char *iqtable = decoder_mpeg->intra_quantisation_table;
-  unsigned char *niqtable = decoder_mpeg->inter_quantisation_table;
-  dct_t *yiqmatrix = decoder_mpeg->yiqmatrix;
-  dct_t *ciqmatrix = decoder_mpeg->ciqmatrix;
-  dct_t *niqmatrix = decoder_mpeg->niqmatrix;
-  dct_t *yidqmatrix = decoder_mpeg->yidqmatrix;
-  dct_t *cidqmatrix = decoder_mpeg->cidqmatrix;
-  dct_t *nidqmatrix = decoder_mpeg->nidqmatrix;
-  dct_t *psmatrix = decoder_mpeg->psmatrix;
-  int i;
-
-  /* compute the intra quantisation and dequantisation DC scaler */
-#ifdef HAS_MMX
-  asm("emms");
-  yiqmatrix[0] = (short) ((double)(1UL<<16)*postscale[0]/intra_y_scale); /*16*/
-  yidqmatrix[0] =  (short) (intra_y_scale << 3);
-  ciqmatrix[0] = (short) ((double)(1UL<<16)*postscale[0]/intra_c_scale); /*16*/
-  cidqmatrix[0] =  (short) (intra_c_scale << 3);
-#else
-  yiqmatrix[0] = postscale[0] / intra_y_scale;
-  yidqmatrix[0] =  intra_y_scale;
-  ciqmatrix[0] = postscale[0] / intra_c_scale;
-  cidqmatrix[0] =  intra_c_scale;
-#endif
+  int spitch, rpitch;
+  unsigned char *shape, *Y, *U, *V;
 
-  /* compute the intra quantisation and dequantisation matrix */
-  for(i = 1; i < 64; i++)
-  {
-#ifdef HAS_MMX
-    yiqmatrix[i] = ciqmatrix[i] =
-      (short) ((double)(1UL<<19)*postscale[i] / (quant_scale*iqtable[i]));
-    yidqmatrix[i] = cidqmatrix[i] = (short) quant_scale*iqtable[i];
-#else
-    yiqmatrix[i] = ciqmatrix[i] =
-      8.0 * postscale[i] / (quant_scale * iqtable[i]);
-    yidqmatrix[i] = cidqmatrix[i] = quant_scale*iqtable[i];
-#endif
-  }
+  /* Make offsets to blocks */
+  spitch = decoder_mpeg->input->p;
+  shape = decoder_mpeg->shape + (y << 4) * spitch + (x << 4);
 
-  /* compute the inter quantisation and dequantisation matrix */
-  for(i = 0; i < 64; i++)
-  {
-#ifdef HAS_MMX
-    niqmatrix[i] = (short) ((double)(1UL<<19)*postscale[i]/
-			   (quant_scale*niqtable[i]));
-    nidqmatrix[i] = (short) quant_scale*niqtable[i];
-    psmatrix[i] = (short) ((double)(1UL << 16) * prescale[i]);
-#else
-    niqmatrix[i] = 8.0 * postscale[i] / (quant_scale * niqtable[i]);
-    nidqmatrix[i] = quant_scale*niqtable[i];
-    psmatrix[i] = prescale[i];
-#endif
-  }		     
-}  
+  /* Make offsets to blocks */
+  rpitch = decoder_mpeg->new_ref[0]->p;
+  Y = decoder_mpeg->new_ref[0]->y + (y << 4) * rpitch + (x << 4); /* Y */
+  U = decoder_mpeg->new_ref[0]->u + (y << 3) * (rpitch >> 1) + (x << 3); /*Cb*/
+  V = decoder_mpeg->new_ref[0]->v + (y << 3) * (rpitch >> 1) + (x << 3); /*Cr*/
+
+  repetitive_fill_Y(Y, shape, rpitch, spitch);
+  repetitive_fill_C(U, shape, rpitch, spitch);
+  repetitive_fill_C(V, shape, rpitch, spitch);
+}
 
 /*  mpeg_reconstruct_intra_mb                                                */
 /*                                                                           */
@@ -249,81 +262,116 @@
 /*    short x: the x location of the macroblock in macroblock units          */
 /*    short y: the y location of the macroblock in macroblock units          */
 /*    short *blocks[6]:  the DCT coded blocks                                */
+/*    unsigned char q: the quantizer scale for this block                    */
 /*    fame_bab_t bab_type: binary alpha block type                           */
 /*                                                                           */
 /*  Return value:                                                            */
 /*    None.                                                                  */
 
 static void mpeg_reconstruct_intra_mb(fame_decoder_t *decoder,
-				       short x,
-				       short y,
-				       short *blocks[6],
-				       fame_bab_t bab_type)
+				      short x,
+				      short y,
+				      short *blocks[6],
+				      unsigned char q,
+				      fame_bab_t bab_type)
 {
   fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);
-  unsigned long offset[6];
-  int i, pitch;
+  unsigned long offset0,offset1,offset2,offset3,offset4,offset5;
+  int pitch;
+  void (* dequantize)(short *block,
+		      dct_t *cache,
+		      dct_t *dqmatrix,
+		      dct_t *psmatrix,
+		      dct_t *mismatch);
+  void (* idct_)(dct_t *block);
+  void (* reconstruct_)(unsigned char *plane,
+			dct_t *block,
+			int pitch);
 
-  pitch = decoder_mpeg->width;
+  pitch = decoder_mpeg->new_ref[0]->p;
 
   /* Make offsets to blocks */
-  offset[0] = (y << 4) * pitch + (x << 4);         /* Y(0,0) */
-  offset[1] = offset[0] + 8;                       /* Y(0,1) */
-  offset[2] = offset[0] + (pitch << 3);            /* Y(1,0) */
-  offset[3] = offset[2] + 8;                       /* Y(1,1) */
-  offset[4] = (y << 3) * (pitch >> 1) + (x << 3);  /* Cb     */
-  offset[5] = (y << 3) * (pitch >> 1) + (x << 3);  /* Cr     */
+  offset0 = (y << 4) * pitch + (x << 4);         /* Y(0,0) */
+  offset1 = offset0 + 8;                       /* Y(0,1) */
+  offset2 = offset0 + (pitch << 3);            /* Y(1,0) */
+  offset3 = offset2 + 8;                       /* Y(1,1) */
+  offset4 = (y << 3) * (pitch >> 1) + (x << 3);  /* Cb     */
+  offset5 = (y << 3) * (pitch >> 1) + (x << 3);  /* Cr     */
+
+  if(decoder_mpeg->mismatch == fame_mismatch_local)
+    dequantize = dequantize_intra_local;
+  else
+    dequantize = dequantize_intra_global;
+  idct_ = idct;
+  reconstruct_ = reconstruct;
 
   /* Reconstruct blocks */
-  for(i = 0; i < 4; i++) { /* Y */
-    if(decoder_mpeg->mismatch == fame_mismatch_local) {
-      dequantise_intra_local(blocks[i],
-			     decoder_mpeg->tmpblock,
-			     decoder_mpeg->yidqmatrix,
-			     decoder_mpeg->psmatrix);
-    } else {
-      dequantise_intra_global(blocks[i],
-			      decoder_mpeg->tmpblock,
-			      decoder_mpeg->yidqmatrix,
-			      decoder_mpeg->psmatrix);
-    }
-    idct(decoder_mpeg->tmpblock);
-    reconstruct_Y(decoder_mpeg->new_ref[0]->y + offset[i],
-		  decoder_mpeg->tmpblock,
-		  pitch);
-  }
+  /* Y(0,0) */
+  dequantize(blocks[0],
+	     decoder_mpeg->tmpblock,
+	     decoder_mpeg->yidqmatrixes[q],
+	     decoder_mpeg->psmatrix,
+	     &decoder_mpeg->mismatch_accumulator[0][y*(pitch>>3)+x]);
+  idct_(decoder_mpeg->tmpblock);
+  reconstruct_(decoder_mpeg->new_ref[0]->y + offset0,
+	       decoder_mpeg->tmpblock,
+	       pitch);
+
+  dequantize(blocks[1],
+	     decoder_mpeg->tmpblock,
+	     decoder_mpeg->yidqmatrixes[q],
+	     decoder_mpeg->psmatrix,
+	     &decoder_mpeg->mismatch_accumulator[1][y*(pitch>>3)+x]);
+  idct_(decoder_mpeg->tmpblock);
+  reconstruct_(decoder_mpeg->new_ref[0]->y + offset1,
+	       decoder_mpeg->tmpblock,
+	       pitch);
+
+  dequantize(blocks[2],
+	     decoder_mpeg->tmpblock,
+	     decoder_mpeg->yidqmatrixes[q],
+	     decoder_mpeg->psmatrix,
+	     &decoder_mpeg->mismatch_accumulator[2][y*(pitch>>3)+x]);
+  idct_(decoder_mpeg->tmpblock);
+  reconstruct_(decoder_mpeg->new_ref[0]->y + offset2,
+	       decoder_mpeg->tmpblock,
+	       pitch);
+
+  dequantize(blocks[3],
+	     decoder_mpeg->tmpblock,
+	     decoder_mpeg->yidqmatrixes[q],
+	     decoder_mpeg->psmatrix,
+	     &decoder_mpeg->mismatch_accumulator[3][y*(pitch>>3)+x]);
+  idct_(decoder_mpeg->tmpblock);
+  reconstruct_(decoder_mpeg->new_ref[0]->y + offset3,
+	       decoder_mpeg->tmpblock,
+	       pitch);
+  
   /* U */
-  if(decoder_mpeg->mismatch == fame_mismatch_local) {
-    dequantise_intra_local(blocks[4],
-			   decoder_mpeg->tmpblock,
-			   decoder_mpeg->cidqmatrix,
-			   decoder_mpeg->psmatrix);
-  } else {
-    dequantise_intra_global(blocks[4],
-			   decoder_mpeg->tmpblock,
-			   decoder_mpeg->cidqmatrix,
-			   decoder_mpeg->psmatrix);
-  }
-  idct(decoder_mpeg->tmpblock);
-  reconstruct_C(decoder_mpeg->new_ref[0]->u + offset[4],
-		decoder_mpeg->tmpblock,
-		pitch >> 1);
+  dequantize(blocks[4],
+	     decoder_mpeg->tmpblock,
+	     decoder_mpeg->cidqmatrixes[q],
+	     decoder_mpeg->psmatrix,
+	     &decoder_mpeg->mismatch_accumulator[4][y*(pitch>>3)+x]);
+  idct_(decoder_mpeg->tmpblock);
+  reconstruct_(decoder_mpeg->new_ref[0]->u + offset4,
+	       decoder_mpeg->tmpblock,
+	       pitch >> 1);
+  
   /* V */
-  if(decoder_mpeg->mismatch == fame_mismatch_local) {
-    dequantise_intra_local(blocks[5],
-			   decoder_mpeg->tmpblock,
-			   decoder_mpeg->cidqmatrix,
-			   decoder_mpeg->psmatrix);
-  } else {
-    dequantise_intra_global(blocks[5],
-			    decoder_mpeg->tmpblock,
-			    decoder_mpeg->cidqmatrix,
-			    decoder_mpeg->psmatrix);
-  }
-  idct(decoder_mpeg->tmpblock);
-  reconstruct_C(decoder_mpeg->new_ref[0]->v + offset[5],
-		decoder_mpeg->tmpblock,
-		pitch >> 1);
+  dequantize(blocks[5],
+	     decoder_mpeg->tmpblock,
+	     decoder_mpeg->cidqmatrixes[q],
+	     decoder_mpeg->psmatrix,
+	     &decoder_mpeg->mismatch_accumulator[5][y*(pitch>>3)+x]);
+  idct_(decoder_mpeg->tmpblock);
+  reconstruct_(decoder_mpeg->new_ref[0]->v + offset5,
+	       decoder_mpeg->tmpblock,
+	       pitch >> 1);
+
+  /* fill the block if needed */
+  if(bab_type >= bab_border_16x16)
+    mpeg_pad_mb(decoder, x, y);
 }
 
 /*  mpeg_reconstruct_inter_mb                                               */
@@ -339,6 +387,7 @@
 /*    short *blocks[6]:  the DCT coded blocks                                */
 /*    fame_motion_vector_t *forward: forward motion vectors                  */
 /*    fame_motion_vector_t *backward: backward motion vectors                */
+/*    unsigned char q: the quantizer scale for this block                    */
 /*    fame_bab_t bab_type: binary alpha block type                           */
 /*                                                                           */
 /*  Return value:                                                            */
@@ -351,6 +400,7 @@
 				      fame_motion_vector_t *forward,
 				      fame_motion_vector_t *backward,
 				      fame_motion_coding_t motion_coding,
+				      unsigned char q,
 				      fame_bab_t bab_type)
 {
   fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);
@@ -359,8 +409,13 @@
   int coded[6];
   signed long residual[6];
   int i, j, pitch;
+  void (* dequantize)(short *block,
+		      dct_t *cache,
+		      dct_t *dqmatrix,
+		      dct_t *psmatrix,
+		      dct_t *mismatch);
 
-  pitch = decoder_mpeg->width;
+  pitch = decoder_mpeg->new_ref[0]->p;
 
   /* Make offsets to blocks */
   offset[0] = (y << 4) * pitch + (x << 4);         /* Y(0,0) */
@@ -393,24 +448,20 @@
       }
   }
 
+ if(decoder_mpeg->mismatch == fame_mismatch_local)
+    dequantize = dequantize_inter_local;
+  else
+    dequantize = dequantize_inter_global;
+
   /* Reconstruct blocks */
   for(i = 0; i < 4; i++) { /* Y */
     if(coded[i]) {
-
-      if(decoder_mpeg->mismatch == fame_mismatch_local) {
-	dequantise_inter_local(blocks[i],
-			       decoder_mpeg->tmpblock,
-			       decoder_mpeg->nidqmatrix,
-			       decoder_mpeg->psmatrix);
-      } else {
-	dequantise_inter_global(blocks[i],
-				decoder_mpeg->tmpblock,
-				decoder_mpeg->nidqmatrix,
-				decoder_mpeg->psmatrix);
-      }
-
+      dequantize(blocks[i],
+		 decoder_mpeg->tmpblock,
+		 decoder_mpeg->nidqmatrixes[q],
+		 decoder_mpeg->psmatrix,
+		 &decoder_mpeg->mismatch_accumulator[i][y*(pitch>>3)+x]);
       idct(decoder_mpeg->tmpblock);
-
       sum(decoder_mpeg->new_ref[0]->y + offset[i],
 	  decoder_mpeg->future_ref[residual[i]]->y + offset[i] + motion[i],
 	  &forward[i].error,
@@ -426,17 +477,11 @@
   
   /* U */
   if(coded[4]) {
-    if(decoder_mpeg->mismatch == fame_mismatch_local) {
-      dequantise_inter_local(blocks[4],
-			     decoder_mpeg->tmpblock,
-			     decoder_mpeg->nidqmatrix,
-			     decoder_mpeg->psmatrix);
-    } else {
-      dequantise_inter_global(blocks[4],
-			      decoder_mpeg->tmpblock,
-			      decoder_mpeg->nidqmatrix,
-			      decoder_mpeg->psmatrix);
-    }
+    dequantize(blocks[4],
+	       decoder_mpeg->tmpblock,
+	       decoder_mpeg->nidqmatrixes[q],
+	       decoder_mpeg->psmatrix,
+	       &decoder_mpeg->mismatch_accumulator[4][y*(pitch>>3)+x]);
     idct(decoder_mpeg->tmpblock);
     sum(decoder_mpeg->new_ref[0]->u + offset[4],
 	decoder_mpeg->future_ref[residual[4]]->u + offset[4] + motion[4],
@@ -452,17 +497,11 @@
   
   /* V */
   if(coded[5]) {
-    if(decoder_mpeg->mismatch == fame_mismatch_local) {
-      dequantise_inter_local(blocks[5],
-			     decoder_mpeg->tmpblock,
-			     decoder_mpeg->nidqmatrix,
-			     decoder_mpeg->psmatrix);
-    } else {
-      dequantise_inter_global(blocks[5],
-			      decoder_mpeg->tmpblock,
-			      decoder_mpeg->nidqmatrix,
-			      decoder_mpeg->psmatrix);
-    }
+    dequantize(blocks[5],
+	       decoder_mpeg->tmpblock,
+	       decoder_mpeg->nidqmatrixes[q],
+	       decoder_mpeg->psmatrix,
+	       &decoder_mpeg->mismatch_accumulator[5][y*(pitch>>3)+x]);
     idct(decoder_mpeg->tmpblock);
     sum(decoder_mpeg->new_ref[0]->v + offset[5],
 	decoder_mpeg->future_ref[residual[5]]->v + offset[5] + motion[5],
@@ -471,36 +510,56 @@
 	pitch >> 1);
   } else {
     move(decoder_mpeg->new_ref[0]->v + offset[5],
-	 decoder_mpeg->future_ref[residual[4]]->v + offset[5] + motion[5],
+	 decoder_mpeg->future_ref[residual[5]]->v + offset[5] + motion[5],
 	 pitch >> 1);
     forward[5].error = 0;
   }
+
+  /* fill the block if needed */
+  if(bab_type >= bab_border_16x16)
+    mpeg_pad_mb(decoder, x, y);
 }
 
 /*  mpeg_pad                                                                 */
 /*                                                                           */
 /*  Description:                                                             */
-/*    Perform repetitive padding of arbitrary shape for motion estimation.   */
+/*    Perform extended padding for motion estimation.                        */
 /*                                                                           */
 /*  Arguments:                                                               */
 /*    fame_decoder_t *decoder: the decoder                                   */
+/*    unsigned char *bab_map: binary alpha block type map                    */
 /*    fame_box_t box: bounding box                                           */
 /*                                                                           */
 /*  Return value:                                                            */
 /*    None.                                                                  */
 
 static void mpeg_pad(fame_decoder_t *decoder,
+		     unsigned char *bab_map,
 		     fame_box_t *box)
 {
   fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);
-  if(decoder_mpeg->shape) {
-    pad(decoder_mpeg->width,
+  int i;
+  void (* pad)(int i,
+	       int width,
+	       int height,
+	       fame_yuv_t **frame,
+	       unsigned char *shape,  /* not used */
+	       unsigned char *bab_map, /* not used */
+	       fame_box_t *box);
+
+  if(decoder_mpeg->shape)
+    pad = extended_pad_withmask;
+  else
+    pad = extended_pad_withoutmask;
+
+  for(i = 0; i < 4; i++)
+    pad(i,
+	decoder_mpeg->width,
 	decoder_mpeg->height,
-	decoder_mpeg->new_ref[0],
+	decoder_mpeg->new_ref,
 	decoder_mpeg->shape,
-	decoder_mpeg->padded,
+	bab_map,
 	box);
-  }
 }
 
 
@@ -556,6 +615,13 @@
 {
   fame_decoder_mpeg_t *decoder_mpeg = FAME_DECODER_MPEG(decoder);
 
-  /* free shape padding buffer */
-  free(decoder_mpeg->padded);
+#ifdef HAS_MMX
+  /* free mismatch accumulator */
+  { 
+    int i;
+    if(decoder_mpeg->mismatch == fame_mismatch_global)
+      for(i = 0; i < 6; i++)
+	fame_free(decoder_mpeg->mismatch_accumulator[i]); 
+  }
+#endif
 }

Index: fame_decoder_mpeg.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_decoder_mpeg.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_decoder_mpeg.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_decoder_mpeg.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -25,15 +25,10 @@
 
 typedef struct _fame_decoder_mpeg_t_ {
   FAME_EXTENDS(fame_decoder_t);
-  unsigned char *intra_quantisation_table; /* intra quantisation table */
-  unsigned char *inter_quantisation_table; /* inter quantisation table */
-  dct_t yiqmatrix[64];                     /* Y intra quantisation matrix    */
-  dct_t yidqmatrix[64];                    /* Y intra dequantisation matrix  */
-  dct_t ciqmatrix[64];                     /* C intra quantisation matrix    */
-  dct_t cidqmatrix[64];                    /* C intra dequantisation matrix  */
-  dct_t niqmatrix[64];                    /* non-intra quantisation matrix   */
-  dct_t nidqmatrix[64];                   /* non-intra dequantisation matrix */
-  dct_t psmatrix[64];                     /* prescale matrix                 */
+  dct_t yidqmatrixes[32][64];           /* Y intra dequantisation matrixes */
+  dct_t cidqmatrixes[32][64];           /* C intra dequantisation matrixes */
+  dct_t nidqmatrixes[32][64];           /* non-intra dequantisation matrixes */
+  dct_t psmatrix[64];                   /* prescale matrix                   */
   dct_t tmpblock[64];                     /* temporary block                 */
   short blocks[6][64];                    /* DCT, quantised blocks           */
   int width;                              /* width of frames                 */
@@ -45,6 +40,8 @@
   unsigned char *shape;                   /* shape mask                      */
   unsigned char *padded;                  /* buffer for shape padding        */
   fame_mismatch_t mismatch;               /* mismatch type for dequantisation*/
+  dct_t *mismatch_accumulator[6];         /* mismatch accumulator for each block */
+  int rounding;
 } fame_decoder_mpeg_t;
 
 #define FAME_DECODER_MPEG(x) ((fame_decoder_mpeg_t *) x)

Index: fame_encoder.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_encoder.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_encoder.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_encoder.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -29,6 +29,8 @@
 				int height,
 				unsigned char *intra_quantisation_table,
 				unsigned char *inter_quantisation_table,
+				unsigned char *intra_dc_y_scale_table,
+				unsigned char *intra_dc_c_scale_table,
 				fame_mismatch_t mismatch_type);
   void (* enter)               (struct _fame_encoder_t_ *encoder,
 				fame_yuv_t **past_ref,
@@ -36,14 +38,11 @@
 				fame_yuv_t **future_ref,
 				fame_yuv_t *yuv,
 				unsigned char *shape);
-  void (* set_quantisation)    (struct _fame_encoder_t_ *encoder,
-				unsigned char quant_scale,
-				unsigned char intra_y_scale,
-				unsigned char intra_c_scale);
   void (* encode_intra_mb)     (struct _fame_encoder_t_ *encoder,
 				short x,
 				short y,
 				short *blocks[6],
+				unsigned char q,
 				fame_bab_t bab_type);
   void (* encode_inter_mb)     (struct _fame_encoder_t_ *encoder,
 				short x,
@@ -52,6 +51,7 @@
 				fame_motion_vector_t *forward,
 				fame_motion_vector_t *backward,
 				fame_motion_coding_t motion_coding,
+				unsigned char q,
 				fame_bab_t bab_type);
   void (* leave)               (struct _fame_encoder_t_ *encoder);
   void (* close)               (struct _fame_encoder_t_ *encoder);

Index: fame_encoder_mpeg.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_encoder_mpeg.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_encoder_mpeg.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_encoder_mpeg.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -20,6 +20,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <math.h>
 #include "fame.h"
 #include "fame_encoder.h"
 #include "fame_encoder_mpeg.h"
@@ -29,21 +30,28 @@
 #define arch_leave_state() asm("emms")
 #include "transpose_mmx.h"
 #include "dct_mmx.h"
-#include "quantise_mmx.h"
+#include "quantize_mmx.h"
 #include "fetch_mmx.h"
 #else
 #define arch_enter_state() 
 #define arch_leave_state() 
 #include "dct_float.h"
-#include "quantise_float.h"
+#include "quantize_float.h"
 #include "fetch_float.h"
 #endif
 
+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ <= 95 && __GNUC_PATCHLEVEL__ <= 3)
+/* gcc bug?? workaround */
+extern void __fame_dummy_call(int q);
+#endif
+
 static void mpeg_init(fame_encoder_t *encoder,
 		      int width,
 		      int height,
 		      unsigned char *intra_quantisation_table,
 		      unsigned char *inter_quantisation_table,
+		      unsigned char *intra_dc_y_scale_table,
+		      unsigned char *intra_dc_c_scale_table,
 		      fame_mismatch_t mismatch_type);
 static void mpeg_enter(fame_encoder_t *encoder,
 			fame_yuv_t **past_ref,
@@ -51,23 +59,21 @@
 			fame_yuv_t **future_ref,
 			fame_yuv_t *yuv,
 			unsigned char *shape);
-static void mpeg_set_quantisation(fame_encoder_t *encoder,
-				  unsigned char quant_scale,
-				  unsigned char intra_y_scale,
-				  unsigned char intra_c_scale);
 static void mpeg_encode_intra_mb(fame_encoder_t *encoder,
-				  short x,
-				  short y,
-				  short *blocks[6],
-				  fame_bab_t bab_type);
+				 short x,
+				 short y,
+				 short *blocks[6],
+				 unsigned char q,
+				 fame_bab_t bab_type);
 static void mpeg_encode_inter_mb(fame_encoder_t *encoder,
-				  short x,
-				  short y,
-				  short *blocks[6],
-				  fame_motion_vector_t *forward,
-				  fame_motion_vector_t *backward,
-				  fame_motion_coding_t motion_coding,
-				  fame_bab_t bab_type);
+				 short x,
+				 short y,
+				 short *blocks[6],
+				 fame_motion_vector_t *forward,
+				 fame_motion_vector_t *backward,
+				 fame_motion_coding_t motion_coding,
+				 unsigned char q,
+				 fame_bab_t bab_type);
 static void mpeg_leave(fame_encoder_t *encoder);
 static void mpeg_close(fame_encoder_t *encoder);
 
@@ -76,7 +82,6 @@
   FAME_OBJECT(this)->name = "MPEG encoder";
   FAME_ENCODER(this)->init = mpeg_init;
   FAME_ENCODER(this)->enter = mpeg_enter;
-  FAME_ENCODER(this)->set_quantisation = mpeg_set_quantisation;
   FAME_ENCODER(this)->encode_intra_mb = mpeg_encode_intra_mb;
   FAME_ENCODER(this)->encode_inter_mb = mpeg_encode_inter_mb;
   FAME_ENCODER(this)->leave = mpeg_leave;
@@ -95,6 +100,8 @@
 /*    int height: height of the frame                                        */
 /*    unsigned char *intra_quantisation_table: quantisation matrix for intra */
 /*    unsigned char *inter_quantisation_table: quantisation matrix for inter */
+/*    unsigned char *intra_dc_y_scale_table: quantisation table for DC of Y  */
+/*    unsigned char *intra_dc_c_scale_table: quantisation table for DC of C  */
 /*    fame_mismatch_t mismatch_type: type of mismatch control                */
 /*                                                                           */
 /*  Return value:                                                            */
@@ -103,11 +110,14 @@
 static void mpeg_init(fame_encoder_t *encoder,
 		      int width,
 		      int height,
-		      unsigned char *intra_quantisation_table,
-		      unsigned char *inter_quantisation_table,
+		      unsigned char *iqtable,
+		      unsigned char *niqtable,
+		      unsigned char *intra_dc_y_scale_table,
+		      unsigned char *intra_dc_c_scale_table,
 		      fame_mismatch_t mismatch_type)
 {
   fame_encoder_mpeg_t *encoder_mpeg = FAME_ENCODER_MPEG(encoder);
+  int i, q;
 
   /* set width and height */
   encoder_mpeg->width = width;
@@ -116,9 +126,70 @@
   /* allocate padded shape buffer */
   encoder_mpeg->padded = (unsigned char *) malloc(encoder_mpeg->width*
 						   encoder_mpeg->height);
-  encoder_mpeg->intra_quantisation_table = intra_quantisation_table;
-  encoder_mpeg->inter_quantisation_table = inter_quantisation_table;
   encoder_mpeg->mismatch = mismatch_type;
+
+  /* compute quantization matrixes */
+  for(q = 1; q < 32; q++) {
+    /* compute the intra quantisation and dequantisation DC scaler */
+#ifdef HAS_MMX
+    asm("emms");
+    encoder_mpeg->yiqmatrixes[q][0] = 
+      (dct_t) ((double)(1UL<<16)*postscale[0]/intra_dc_y_scale_table[q]);
+    encoder_mpeg->ciqmatrixes[q][0] =
+      (dct_t) ((double)(1UL<<16)*postscale[0]/intra_dc_c_scale_table[q]);
+    encoder_mpeg->yiqround[q][0] = 
+      (dct_t) ((double)intra_dc_y_scale_table[q]/(2*postscale[0])+0.5);
+    encoder_mpeg->ciqround[q][0] = 
+      (dct_t) ((double)intra_dc_c_scale_table[q]/(2*postscale[0])+0.5);
+#else
+    encoder_mpeg->yiqmatrixes[q][0] = postscale[0] / intra_dc_y_scale_table[q];
+    encoder_mpeg->ciqmatrixes[q][0] = postscale[0] / intra_dc_c_scale_table[q];
+    encoder_mpeg->yiqround[q][0] = ((dct_t) intra_dc_y_scale_table[q])/(2*postscale[0]);
+    encoder_mpeg->ciqround[q][0] = ((dct_t) intra_dc_c_scale_table[q])/(2*postscale[0]);
+#endif
+
+    /* compute the intra quantisation and dequantisation matrix */
+    for(i = 1; i < 64; i++)
+    {
+#ifdef HAS_MMX
+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ <= 95 && __GNUC_PATCHLEVEL__ <= 3)
+//#error Your GCC is too old, and may produce bad code for libfame.
+      /* gcc bug here?? try to comment/uncomment the following line*/
+      /* or was I wrong in some earlier asm directive??! */
+      /* force unoptimized access to q */
+      __fame_dummy_call(q);
+#endif
+      encoder_mpeg->yiqmatrixes[q][i] = encoder_mpeg->ciqmatrixes[q][i] =
+	(dct_t) ((double)(1UL<<19)*postscale[i] / (q*iqtable[i]));
+      encoder_mpeg->yiqround[q][i] = encoder_mpeg->ciqround[q][i] = 
+	(dct_t) ((double)((1+(6*q+3)/4) * iqtable[i]) / (4 * 8.0 * postscale[i]) + 0.5);
+#else
+      encoder_mpeg->yiqmatrixes[q][i] = encoder_mpeg->ciqmatrixes[q][i] =
+	8.0 * postscale[i] / (q * iqtable[i]);
+      /* mpeg-4 rounding gives better rate-distortion results */
+      /* than mpeg-1 except maybe for q = 1 (need more tests) */
+      encoder_mpeg->yiqround[q][i] = encoder_mpeg->ciqround[q][i] = 
+	((dct_t) (1+(6*q+3)/4) * iqtable[i]) / (4 * 8.0 * postscale[i]);
+#endif
+    }
+
+    /* compute the inter quantisation and dequantisation matrix */
+    for(i = 0; i < 64; i++)
+    {
+#ifdef HAS_MMX
+      encoder_mpeg->niqmatrixes[q][i] =
+	(dct_t) ((double)(1UL<<19)*postscale[i]/(q*niqtable[i]));
+      encoder_mpeg->niqround[q][i] =
+	(dct_t) ((double)niqtable[i] / (4 * 8.0 * postscale[i]) + 0.5);
+
+#else
+      encoder_mpeg->niqmatrixes[q][i] =
+	8.0 * postscale[i] / (q * niqtable[i]);
+      encoder_mpeg->niqround[q][i] = 
+	(dct_t) niqtable[i] / (4 * 8.0 * postscale[i]);
+#endif
+    }		     
+  }
 }
 
 /*  mpeg_enter                                                               */
@@ -154,83 +225,6 @@
   encoder_mpeg->shape = shape;
   arch_enter_state();
 }
-
-/*  mpeg_set_quantisation                                                    */
-/*                                                                           */
-/*  Description:                                                             */
-/*    Updates the matrices used for quantisation.                            */
-/*                                                                           */
-/*  Arguments:                                                               */
-/*    fame_encoder_t *encoder: the encoder                                   */
-/*    unsigned char quant_scale: quantisation scale                          */
-/*    unsigned char intra_y_scale: intra DC scaler for Y component           */
-/*    unsigned char intra_c_scale: intra DC scaler for U and V components    */
-/*  Return value:                                                            */
-/*    None.                                                                  */
-
-static void mpeg_set_quantisation(fame_encoder_t *encoder,
-				  unsigned char quant_scale,
-				  unsigned char intra_y_scale,
-				  unsigned char intra_c_scale)
-{
-  fame_encoder_mpeg_t *encoder_mpeg = FAME_ENCODER_MPEG(encoder);
-  unsigned char *iqtable = encoder_mpeg->intra_quantisation_table;
-  unsigned char *niqtable = encoder_mpeg->inter_quantisation_table;
-  dct_t *yiqmatrix = encoder_mpeg->yiqmatrix;
-  dct_t *ciqmatrix = encoder_mpeg->ciqmatrix;
-  dct_t *niqmatrix = encoder_mpeg->niqmatrix;
-  dct_t *yidqmatrix = encoder_mpeg->yidqmatrix;
-  dct_t *cidqmatrix = encoder_mpeg->cidqmatrix;
-  dct_t *nidqmatrix = encoder_mpeg->nidqmatrix;
-  dct_t *psmatrix = encoder_mpeg->psmatrix;
-  int i;
-
-  /* initialize quantisation matrices */
-  encoder_mpeg->quant_scale = quant_scale;
-
-  /* compute the intra quantisation and dequantisation DC scaler */
-#ifdef HAS_MMX
-  asm("emms");
-  yiqmatrix[0] = (short) ((double)(1UL<<16)*postscale[0]/intra_y_scale); /*16*/
-  yidqmatrix[0] =  (short) (intra_y_scale << 3);
-  ciqmatrix[0] = (short) ((double)(1UL<<16)*postscale[0]/intra_c_scale); /*16*/
-  cidqmatrix[0] =  (short) (intra_c_scale << 3);
-#else
-  yiqmatrix[0] = postscale[0] / intra_y_scale;
-  yidqmatrix[0] =  intra_y_scale;
-  ciqmatrix[0] = postscale[0] / intra_c_scale;
-  cidqmatrix[0] =  intra_c_scale;
-#endif
-
-  /* compute the intra quantisation and dequantisation matrix */
-  for(i = 1; i < 64; i++)
-  {
-#ifdef HAS_MMX
-    yiqmatrix[i] = ciqmatrix[i] =
-      (short) ((double)(1UL<<19)*postscale[i] / (quant_scale*iqtable[i]));
-    yidqmatrix[i] = cidqmatrix[i] = (short) quant_scale*iqtable[i];
-#else
-    yiqmatrix[i] = ciqmatrix[i] =
-      8.0 * postscale[i] / (quant_scale * iqtable[i]);
-    yidqmatrix[i] = cidqmatrix[i] = quant_scale*iqtable[i];
-#endif
-  }
-
-  /* compute the inter quantisation and dequantisation matrix */
-  for(i = 0; i < 64; i++)
-  {
-#ifdef HAS_MMX
-    niqmatrix[i] = (short) ((double)(1UL<<19)*postscale[i]/
-			   (quant_scale*niqtable[i]));
-    nidqmatrix[i] = (short) quant_scale*niqtable[i];
-    psmatrix[i] = (short) ((double)(1UL << 16) * prescale[i]);
-#else
-    niqmatrix[i] = 8.0 * postscale[i] / (quant_scale * niqtable[i]);
-    nidqmatrix[i] = quant_scale*niqtable[i];
-    psmatrix[i] = prescale[i];
-#endif
-  }		     
-}
   
 /*  mpeg_encode_intra_mb                                                    */
 /*                                                                           */
@@ -243,92 +237,125 @@
 /*    short x: the x location of the macroblock in macroblock units          */
 /*    short y: the y location of the macroblock in macroblock units          */
 /*    short *blocks[6]: the DCT coded blocks                                 */
+/*    unsigned char q: the quantizer scale for this block                    */
 /*    fame_bab_t bab_type: binary alpha block type                           */
 /*                                                                           */
 /*  Return value:                                                            */
 /*    None.                                                                  */
-  
+
 static void mpeg_encode_intra_mb(fame_encoder_t *encoder,
-				  short x,
-				  short y,
-				  short *blocks[6],
-				  fame_bab_t bab_type)
+				 short x,
+				 short y,
+				 short *blocks[6],
+				 unsigned char q,
+				 fame_bab_t bab_type)
 {
   fame_encoder_mpeg_t *encoder_mpeg = FAME_ENCODER_MPEG(encoder);
-  unsigned long offset[6];
+  unsigned long offset0, offset1, offset2, offset3, offset4, offset5;
   int i, pitch;
+  void (* prefetch_Y)(unsigned char *input,
+		      dct_t *output,
+		      unsigned char *shape,
+		      int pitch);
+  void (* prefetch_C)(unsigned char *input,
+		      dct_t *output,
+		      unsigned char *shape,
+		      int pitch);
+  void (* dct_)(dct_t *block);
+  void (* quantize_)(short *block, dct_t *qblock, dct_t *matrix, dct_t *round);
 
-  pitch = encoder_mpeg->width;
+  pitch = encoder_mpeg->input->p;
 
   /* Make offsets to blocks */
-  offset[0] = (y << 4) * pitch + (x << 4);         /* Y(0,0) */
-  offset[1] = offset[0] + 8;                       /* Y(0,1) */
-  offset[2] = offset[0] + (pitch << 3);            /* Y(1,0) */
-  offset[3] = offset[2] + 8;                       /* Y(1,1) */
-  offset[4] = (y << 3) * (pitch >> 1) + (x << 3);  /* Cb     */
-  offset[5] = (y << 3) * (pitch >> 1) + (x << 3);  /* Cr     */
+  offset0 = (y << 4) * pitch + (x << 4);         /* Y(0,0) */
+  offset1 = offset0 + 8;                       /* Y(0,1) */
+  offset2 = offset0 + (pitch << 3);            /* Y(1,0) */
+  offset3 = offset2 + 8;                       /* Y(1,1) */
+  offset4 = (y << 3) * (pitch >> 1) + (x << 3);  /* Cb     */
+  offset5 = (y << 3) * (pitch >> 1) + (x << 3);  /* Cr     */
 
   /* Encode blocks */
   for(i = 0; i < 6; i++)
     blocks[i] = encoder_mpeg->blocks[i];
 
-  if(bab_type != bab_all_coded) 
+  if(bab_type != bab_all_coded)
   {
-    for(i = 0; i < 4; i++) { /* Y */
-      prefetch_Y_withmask(encoder_mpeg->input->y + offset[i],
-			  encoder_mpeg->tmpblock,
-			  encoder_mpeg->shape + offset[i],
-			  pitch);
-      dct(encoder_mpeg->tmpblock);
-      quantise(encoder_mpeg->blocks[i],
-	       encoder_mpeg->tmpblock,
-	       encoder_mpeg->yiqmatrix);
-    }
-    /* U */
-    prefetch_C_withmask(encoder_mpeg->input->u + offset[4],
-			encoder_mpeg->tmpblock,
-			encoder_mpeg->shape + offset[0], /*top left corner of mb*/
-			pitch >> 1);
-    dct(encoder_mpeg->tmpblock);
-    quantise(encoder_mpeg->blocks[4],
+    prefetch_Y = prefetch_Y_withmask;
+    prefetch_C = prefetch_C_withmask;
+  }
+  else 
+  {
+    prefetch_Y = prefetch_withoutmask;
+    prefetch_C = prefetch_withoutmask;
+  }
+  dct_ = dct;
+  quantize_ = quantize;
+
+  /* Y (0,0) */
+  prefetch_Y(encoder_mpeg->input->y + offset0,
 	     encoder_mpeg->tmpblock,
-	     encoder_mpeg->ciqmatrix);
-    /* V */
-    prefetch_C_withmask(encoder_mpeg->input->v + offset[5],
-			encoder_mpeg->tmpblock,
-			encoder_mpeg->shape + offset[0], /*top left corner of mb*/
-			pitch >> 1);
-    dct(encoder_mpeg->tmpblock);
-    quantise(encoder_mpeg->blocks[5],
+	     encoder_mpeg->shape + offset0,
+	     pitch);
+  dct_(encoder_mpeg->tmpblock);
+  quantize_(encoder_mpeg->blocks[0],
+	    encoder_mpeg->tmpblock,
+	    encoder_mpeg->yiqmatrixes[q],
+	    encoder_mpeg->yiqround[q]);
+
+  /* Y (0,1) */
+  prefetch_Y(encoder_mpeg->input->y + offset1,
 	     encoder_mpeg->tmpblock,
-	     encoder_mpeg->ciqmatrix);
-  } else {
-    for(i = 0; i < 4; i++) { /* Y */
-      prefetch_Y_withoutmask(encoder_mpeg->input->y + offset[i],
-			     encoder_mpeg->tmpblock,
-			     pitch);
-      dct(encoder_mpeg->tmpblock);
-      quantise(encoder_mpeg->blocks[i],
-	       encoder_mpeg->tmpblock,
-	       encoder_mpeg->yiqmatrix);
-    }
-    /* U */
-    prefetch_C_withoutmask(encoder_mpeg->input->u + offset[4],
-			   encoder_mpeg->tmpblock,
-			   pitch >> 1);
-    dct(encoder_mpeg->tmpblock);
-    quantise(encoder_mpeg->blocks[4],
+	     encoder_mpeg->shape + offset1,
+	     pitch);
+  dct_(encoder_mpeg->tmpblock);
+  quantize_(encoder_mpeg->blocks[1],
+	    encoder_mpeg->tmpblock,
+	    encoder_mpeg->yiqmatrixes[q],
+	    encoder_mpeg->yiqround[q]);
+
+  /* Y (1,0) */
+  prefetch_Y(encoder_mpeg->input->y + offset2,
 	     encoder_mpeg->tmpblock,
-	     encoder_mpeg->ciqmatrix);
-    /* V */
-    prefetch_C_withoutmask(encoder_mpeg->input->v + offset[5],
-			   encoder_mpeg->tmpblock,
-			   pitch >> 1);
-    dct(encoder_mpeg->tmpblock);
-    quantise(encoder_mpeg->blocks[5],
+	     encoder_mpeg->shape + offset2,
+	     pitch);
+  dct_(encoder_mpeg->tmpblock);
+  quantize_(encoder_mpeg->blocks[2],
+	    encoder_mpeg->tmpblock,
+	    encoder_mpeg->yiqmatrixes[q],
+	    encoder_mpeg->yiqround[q]);
+
+  /* Y (1,1) */
+  prefetch_Y(encoder_mpeg->input->y + offset3,
 	     encoder_mpeg->tmpblock,
-	     encoder_mpeg->ciqmatrix);
-  }
+	     encoder_mpeg->shape + offset3,
+	     pitch);
+  dct_(encoder_mpeg->tmpblock);
+  quantize_(encoder_mpeg->blocks[3],
+	    encoder_mpeg->tmpblock,
+	    encoder_mpeg->yiqmatrixes[q],
+	    encoder_mpeg->yiqround[q]);
+  
+  /* U */
+  prefetch_C(encoder_mpeg->input->u + offset4,
+	     encoder_mpeg->tmpblock,
+	     encoder_mpeg->shape + offset0, /* top left corner of mb */
+	     pitch >> 1);
+  dct_(encoder_mpeg->tmpblock);
+  quantize_(encoder_mpeg->blocks[4],
+	    encoder_mpeg->tmpblock,
+	    encoder_mpeg->ciqmatrixes[q],
+	    encoder_mpeg->ciqround[q]);
+
+  /* V */
+  prefetch_C(encoder_mpeg->input->v + offset5,
+	     encoder_mpeg->tmpblock,
+	     encoder_mpeg->shape + offset0, /* top left corner of mb */
+	     pitch >> 1);
+  dct_(encoder_mpeg->tmpblock);
+  quantize_(encoder_mpeg->blocks[5],
+	    encoder_mpeg->tmpblock,
+	    encoder_mpeg->ciqmatrixes[q],
+	    encoder_mpeg->ciqround[q]);
 }
 
 /*  mpeg_encode_inter_mb                                                    */
@@ -345,6 +372,7 @@
 /*    fame_bab_t bab_type: binary alpha block type                           */
 /*    fame_motion_vector_t *forward: forward motion vectors                  */
 /*    fame_motion_vector_t *backward: backward motion vectors                */
+/*    unsigned char q: the quantizer scale for this block                    */
 /*                                                                           */
 /*  Return value:                                                            */
 /*    None.                                                                  */
@@ -356,80 +384,149 @@
 				 fame_motion_vector_t *forward,
 				 fame_motion_vector_t *backward,
 				 fame_motion_coding_t motion_coding,
+				 unsigned char q,
 				 fame_bab_t bab_type)
 {
   fame_encoder_mpeg_t *encoder_mpeg = FAME_ENCODER_MPEG(encoder);
-  unsigned long offset[6];
-  signed long motion[6];
-  signed long residual[6];
+  unsigned long offset0, offset1, offset2, offset3, offset4, offset5;
+  signed long motion0, motion1, motion2, motion3, motion4, motion5;
+  signed long residual0, residual1, residual2, residual3, residual4, residual5;
   int i, pitch;
-
-  pitch = encoder_mpeg->width;
+  void (* diff_)(unsigned char *input,
+		 unsigned char *ref,
+		 dct_t *output,
+		 int ipitch,
+		 int rpitch);
+  void (* dct_)(dct_t *block);
+  void (* quantize_)(short *block, dct_t *qblock, dct_t *matrix, dct_t *round);
 
   /* Make offsets to blocks */
-  offset[0] = (y << 4) * pitch + (x << 4);         /* Y(0,0) */
-  offset[1] = offset[0] + 8;                       /* Y(0,1) */
-  offset[2] = offset[0] + (pitch << 3);            /* Y(1,0) */
-  offset[3] = offset[2] + 8;                       /* Y(1,1) */
-  offset[4] = (y << 3) * (pitch >> 1) + (x << 3);  /* Cb     */
-  offset[5] = (y << 3) * (pitch >> 1) + (x << 3);  /* Cr     */
+  pitch = encoder_mpeg->input->p;
+  offset0 = (y << 4) * pitch + (x << 4);         /* Y(0,0) */
+  offset1 = offset0 + 8;                       /* Y(0,1) */
+  offset2 = offset0 + (pitch << 3);            /* Y(1,0) */
+  offset3 = offset2 + 8;                       /* Y(1,1) */
+  offset4 = (y << 3) * (pitch >> 1) + (x << 3);  /* Cb     */
+  offset5 = (y << 3) * (pitch >> 1) + (x << 3);  /* Cr     */
 
   /* Compute motion offsets (motion is half-pixel coded) */
-  for(i = 0; i < 4; i++) {
-    /* full-pel motion */
-    motion[i] = (forward[i].dy >> 1) * pitch + (forward[i].dx >> 1);
-    /* half-pel motion */
-    residual[i] = ((forward[i].dy & 1) << 1) | (forward[i].dx & 1);
-  }
-  for(i = 4; i < 6; i++) {
-    /* full-pel motion */
-    motion[i] = (forward[i].dy >> 1) * (pitch >> 1) + (forward[i].dx >> 1);
-    /* half-pel motion */
-    residual[i] = ((forward[i].dy & 1) << 1) | (forward[i].dx & 1);
-  }
+  /* half-pel motion */
+  residual0 = ((forward[0].dy & 1) << 1) | (forward[0].dx & 1);
+  residual1 = ((forward[1].dy & 1) << 1) | (forward[1].dx & 1);
+  residual2 = ((forward[2].dy & 1) << 1) | (forward[2].dx & 1);
+  residual3 = ((forward[3].dy & 1) << 1) | (forward[3].dx & 1);
+  residual4 = ((forward[4].dy & 1) << 1) | (forward[4].dx & 1);
+  residual5 = ((forward[5].dy & 1) << 1) | (forward[5].dx & 1);
+  /* full-pel motion */
+  pitch = encoder_mpeg->future_ref[residual0]->p;
+  motion0 = ((y<<4)+(forward[0].dy>>1)  )*pitch+(forward[0].dx>>1)+(x<<4)  ;
+  pitch = encoder_mpeg->future_ref[residual1]->p;
+  motion1 = ((y<<4)+(forward[1].dy>>1)  )*pitch+(forward[1].dx>>1)+(x<<4)+8;
+  pitch = encoder_mpeg->future_ref[residual2]->p;
+  motion2 = ((y<<4)+(forward[2].dy>>1)+8)*pitch+(forward[2].dx>>1)+(x<<4)  ;
+  pitch = encoder_mpeg->future_ref[residual3]->p;
+  motion3 = ((y<<4)+(forward[3].dy>>1)+8)*pitch+(forward[3].dx>>1)+(x<<4)+8;
+  pitch = encoder_mpeg->future_ref[residual4]->p;
+  motion4 = ((y<<3)+(forward[4].dy>>1))*(pitch>>1)+(forward[4].dx>>1)+(x<<3);
+  pitch = encoder_mpeg->future_ref[residual5]->p;
+  motion5 = ((y<<3)+(forward[5].dy>>1))*(pitch>>1)+(forward[5].dx>>1)+(x<<3);
 
   /* Encode blocks */
+  pitch = encoder_mpeg->input->p;
   for(i = 0; i < 6; i++)
     blocks[i] = encoder_mpeg->blocks[i];
 
-  /* might sound strange but yes, this is future_ref in case */
-  /* of P frames */
-  for(i = 0; i < 4; i++) {
-    /* Y */
-    if(forward[i].error < encoder_mpeg->quant_scale*16)
-      blocks[i] = NULL;
-    else {
-      diff(encoder_mpeg->input->y + offset[i],
-	   encoder_mpeg->future_ref[residual[i]]->y + offset[i] + motion[i],
-	   encoder_mpeg->tmpblock,
-	   pitch);
-      dct(encoder_mpeg->tmpblock);
-      quantise(encoder_mpeg->blocks[i],
-	       encoder_mpeg->tmpblock,
-	       encoder_mpeg->niqmatrix);
-    }
+  diff_ = diff;
+  dct_ = dct;
+  quantize_ = quantize;
+
+  /* Y */
+  if(forward[0].error < encoder_mpeg->quant_scale*16)
+    blocks[0] = NULL;
+  else {
+    diff_(encoder_mpeg->input->y + offset0,
+	  encoder_mpeg->future_ref[residual0]->y + motion0,
+	  encoder_mpeg->tmpblock,
+	  pitch,
+	  pitch+32);
+    dct_(encoder_mpeg->tmpblock);
+    quantize_(encoder_mpeg->blocks[0],
+	     encoder_mpeg->tmpblock,
+	     encoder_mpeg->niqmatrixes[q],
+	     encoder_mpeg->niqround[q]);
+  }
+
+  if(forward[1].error < encoder_mpeg->quant_scale*16)
+    blocks[1] = NULL;
+  else {
+    diff_(encoder_mpeg->input->y + offset1,
+	  encoder_mpeg->future_ref[residual1]->y + motion1,
+	  encoder_mpeg->tmpblock,
+	  pitch,
+	  pitch+32);
+
+    dct_(encoder_mpeg->tmpblock);
+    quantize_(encoder_mpeg->blocks[1],
+	      encoder_mpeg->tmpblock,
+	      encoder_mpeg->niqmatrixes[q],
+	      encoder_mpeg->niqround[q]);
   }
+
+  if(forward[2].error < encoder_mpeg->quant_scale*16)
+    blocks[2] = NULL;
+  else {
+    diff_(encoder_mpeg->input->y + offset2,
+	  encoder_mpeg->future_ref[residual2]->y + motion2,
+	  encoder_mpeg->tmpblock,
+	  pitch,
+	  pitch+32);
+    dct_(encoder_mpeg->tmpblock);
+    quantize_(encoder_mpeg->blocks[2],
+	      encoder_mpeg->tmpblock,
+	      encoder_mpeg->niqmatrixes[q],
+	      encoder_mpeg->niqround[q]);
+  }
+
+  if(forward[3].error < encoder_mpeg->quant_scale*16)
+    blocks[3] = NULL;
+  else  {
+    diff_(encoder_mpeg->input->y + offset3,
+	  encoder_mpeg->future_ref[residual3]->y + motion3,
+	  encoder_mpeg->tmpblock,
+	  pitch,
+	  pitch+32);
+    dct_(encoder_mpeg->tmpblock);
+    quantize_(encoder_mpeg->blocks[3],
+	      encoder_mpeg->tmpblock,
+	      encoder_mpeg->niqmatrixes[q],
+	      encoder_mpeg->niqround[q]);
+  }
+
   /* U */
   /* TODO: skip block with error < quant_scale*16 */
-  diff(encoder_mpeg->input->u + offset[4],
-       encoder_mpeg->future_ref[residual[4]]->u + offset[4] + motion[4],
-       encoder_mpeg->tmpblock,
-       pitch >> 1);
-  dct(encoder_mpeg->tmpblock);
-  quantise(encoder_mpeg->blocks[4],
-	   encoder_mpeg->tmpblock,
-	   encoder_mpeg->niqmatrix);
+  diff_(encoder_mpeg->input->u + offset4,
+	encoder_mpeg->future_ref[residual4]->u + motion4,
+	encoder_mpeg->tmpblock,
+	pitch >> 1,
+	(pitch+32) >> 1);
+  dct_(encoder_mpeg->tmpblock);
+  quantize_(encoder_mpeg->blocks[4],
+	    encoder_mpeg->tmpblock,
+	    encoder_mpeg->niqmatrixes[q],
+	    encoder_mpeg->niqround[q]);
 
   /* V */
   /* TODO: skip block with error < quant_scale*16 */
-  diff(encoder_mpeg->input->v + offset[5],
-       encoder_mpeg->future_ref[residual[5]]->v + offset[5] + motion[5],
-       encoder_mpeg->tmpblock,
-       pitch >> 1);
-  dct(encoder_mpeg->tmpblock);
-  quantise(encoder_mpeg->blocks[5],
-	   encoder_mpeg->tmpblock,
-	   encoder_mpeg->niqmatrix);
+  diff_(encoder_mpeg->input->v + offset5,
+	encoder_mpeg->future_ref[residual5]->v + motion5,
+	encoder_mpeg->tmpblock,
+	pitch >> 1,
+	(pitch+32) >> 1);
+  dct_(encoder_mpeg->tmpblock);
+  quantize_(encoder_mpeg->blocks[5],
+	    encoder_mpeg->tmpblock,
+	    encoder_mpeg->niqmatrixes[q],
+	    encoder_mpeg->niqround[q]);
 }
 
 /*  mpeg_leave                                                              */

Index: fame_encoder_mpeg.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_encoder_mpeg.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_encoder_mpeg.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_encoder_mpeg.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -25,27 +25,24 @@
 
 typedef struct _fame_encoder_mpeg_t_ {
   FAME_EXTENDS(fame_encoder_t);
-  unsigned char quant_scale;               /* quantisation scale */
-  unsigned char *intra_quantisation_table; /* intra quantisation table */
-  unsigned char *inter_quantisation_table; /* inter quantisation table */
-  dct_t yiqmatrix[64];                     /* Y intra quantisation matrix    */
-  dct_t yidqmatrix[64];                    /* Y intra dequantisation matrix  */
-  dct_t ciqmatrix[64];                     /* C intra quantisation matrix    */
-  dct_t cidqmatrix[64];                    /* C intra dequantisation matrix  */
-  dct_t niqmatrix[64];                    /* non-intra quantisation matrix   */
-  dct_t nidqmatrix[64];                   /* non-intra dequantisation matrix */
-  dct_t psmatrix[64];                     /* prescale matrix                 */
-  dct_t tmpblock[64];                     /* temporary block                 */
-  short blocks[6][64];                    /* DCT, quantised blocks           */
-  int width;                              /* width of frames                 */
-  int height;                             /* height of frames                */
-  fame_yuv_t *input;                      /* input frame                     */
-  fame_yuv_t **past_ref;                  /* past reference frame            */
-  fame_yuv_t **new_ref;                   /* reconstructed reference frame   */
-  fame_yuv_t **future_ref;                /* future reference frame          */
-  unsigned char *shape;                   /* shape mask                      */
-  unsigned char *padded;                  /* buffer for shape padding        */
-  fame_mismatch_t mismatch;               /* mismatch type for dequantisation*/
+  unsigned char quant_scale;               /* quantization scale */
+  dct_t yiqmatrixes[32][64];            /* Y intra quantization matrixes     */
+  dct_t ciqmatrixes[32][64];            /* C intra quantization matrixes     */
+  dct_t niqmatrixes[32][64];            /* non-intra quantization matrixes   */
+  dct_t yiqround[32][64];               /* Y intra quantization rounding     */
+  dct_t ciqround[32][64];               /* C intra quantization rounding     */
+  dct_t niqround[32][64];               /* non-intra quantization rounding   */
+  dct_t tmpblock[64];                   /* temporary block                   */
+  short blocks[6][64];                  /* DCT, quantised blocks             */
+  int width;                            /* width of frames                   */
+  int height;                           /* height of frames                  */
+  fame_yuv_t *input;                    /* input frame                       */
+  fame_yuv_t **past_ref;                /* past reference frame              */
+  fame_yuv_t **new_ref;                 /* reconstructed reference frame     */
+  fame_yuv_t **future_ref;              /* future reference frame            */
+  unsigned char *shape;                 /* shape mask                        */
+  unsigned char *padded;                /* buffer for shape padding          */
+  fame_mismatch_t mismatch;             /* mismatch type for dequantisation  */
 } fame_encoder_mpeg_t;
 
 #define FAME_ENCODER_MPEG(x) ((fame_encoder_mpeg_t *) x)

Index: fame_motion.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_motion.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_motion.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_motion.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -114,6 +114,9 @@
   motion->ref = ref;
   motion->current = current;
   motion->search_range = search_range;
+  for(motion-> fcode = 1, search_range = motion->search_range;
+      search_range > 16;
+      motion->fcode++, search_range >>= 1);
   motion->shape = shape;
 
   if(shape == NULL)

Index: fame_motion.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_motion.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_motion.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_motion.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -22,8 +22,10 @@
 
 #include "fame.h"
 
-#define FAME_MOTION_SUBPEL_SEARCH 1 /* support subpixel search */
-#define FAME_MOTION_BLOCK_SEARCH  2 /* support 8x8 block search */
+#define FAME_MOTION_SUBPEL_SEARCH 1        /* support subpixel search */
+#define FAME_MOTION_BLOCK_SEARCH  2        /* support 8x8 block search */
+#define FAME_MOTION_UNRESTRICTED_SEARCH  4 /* support out of frame MVs */
+#define FAME_MOTION_FLIP_ROUNDING  8       /* support rouding control */
 
 typedef unsigned int (*compute_error_t) (unsigned char *ref,
 					 unsigned char *input,
@@ -46,7 +48,8 @@
   fame_motion_coding_t (* estimation)(struct _fame_motion_t_ *motion,
 				      int mb_x,
 				      int mb_y,
-				      fame_motion_vector_t *vectors);
+				      fame_motion_vector_t *vectors,
+				      unsigned char quant);
   void (* leave)(struct _fame_motion_t_ *motion);
 
   int mb_width;
@@ -55,6 +58,7 @@
   fame_yuv_t *current;
   unsigned char *shape;
   int search_range;
+  int fcode;
   unsigned int flags;
   compute_error_t MAE8x8;
 } fame_motion_t;

Index: fame_motion_fourstep.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_motion_fourstep.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_motion_fourstep.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_motion_fourstep.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -27,11 +27,13 @@
 static fame_motion_coding_t fourstep_estimation(fame_motion_t *motion,
 						int mb_x,
 						int mb_y,
-						fame_motion_vector_t *vectors);
+						fame_motion_vector_t *vectors,
+						unsigned char quant);
 static void find_vector(fame_yuv_t **ref,
 			unsigned char *current,
 			unsigned char *shape,
 			int offset[4],
+			int edged_offset[4],
 			int x,
 			int y,
 			int width,
@@ -40,12 +42,14 @@
 			int search_range,
 			int step_count,
 			compute_error_t eval_error,
-			fame_motion_vector_t *mv);
+			fame_motion_vector_t *mv,
+			int unrestricted);
 
 static void find_subvector(fame_yuv_t **ref,
 			   unsigned char *current,
 			   unsigned char *shape,
 			   int offset,
+			   int edged_offset,
 			   int x,
 			   int y,
 			   int width,
@@ -54,31 +58,36 @@
 			   int search_range,
 			   int step_count,
 			   compute_error_t eval_error,
-			   fame_motion_vector_t *mv);
+			   fame_motion_vector_t *mv,
+			   int unrestricted);
 
 static void find_half_vector(fame_yuv_t **ref,
 			     unsigned char *current,
 			     unsigned char *shape,
 			     int offset[4],
+			     int edged_offset[4],
 			     int x,
 			     int y,
 			     int width,
 			     int height,
 			     int pitch,
 			     compute_error_t eval_error,
-			     fame_motion_vector_t *mv);
+			     fame_motion_vector_t *mv,
+			     int unrestricted);
 
 static void find_half_subvector(fame_yuv_t **ref,
 				unsigned char *current,
 				unsigned char *shape,
 				int offset,
+				int edged_offset,
 				int x,
 				int y,
 				int width,
 				int height,
 				int pitch,
 				compute_error_t eval_error,
-				fame_motion_vector_t *mv);
+				fame_motion_vector_t *mv,
+				int unrestricted);
 
 FAME_CONSTRUCTOR(fame_motion_fourstep_t)
 {
@@ -223,13 +232,16 @@
 static fame_motion_coding_t fourstep_estimation(fame_motion_t *motion,
 						int mb_x,
 						int mb_y,
-						fame_motion_vector_t *vectors)
+						fame_motion_vector_t *vectors,
+						unsigned char quant)
 {
   int pitch;
   unsigned char *current;
   unsigned char *shape;
+  int unrestricted;
   int x, y, width, height;
   int offset[4];
+  int edged_offset[4];
   int count;
   int sad_inter, sad_inter4v, mad_inter;
   fame_motion_vector_t subvectors[4];
@@ -239,21 +251,33 @@
   y = mb_y << 4;
   width = motion->mb_width << 4;
   height = motion->mb_height << 4;
-  pitch = width;
   current = motion->current->y;
+  pitch = motion->current->p;
   shape = motion->shape;
+  unrestricted = (motion->flags & FAME_MOTION_UNRESTRICTED_SEARCH)?1:0;
 
   /* saturate prediction to borders */
-  if((x<<1)+vectors[0].dx<0) vectors[0].dx = (-x)<<1;
-  if((y<<1)+vectors[0].dy<0) vectors[0].dy = (-y)<<1;
-  if((x<<1)+vectors[0].dx>((width-16)<<1)) vectors[0].dx = (width-16-x)<<1;
-  if((y<<1)+vectors[0].dy>((height-16)<<1)) vectors[0].dy = (height-16-y)<<1;
+  if(unrestricted) {
+    if((x<<1)+vectors[0].dx<(-16)<<1) vectors[0].dx = (-x-16)<<1;
+    if((y<<1)+vectors[0].dy<(-16)<<1) vectors[0].dy = (-y-16)<<1;
+    if((x<<1)+vectors[0].dx>(width<<1)) vectors[0].dx = (width-x)<<1;
+    if((y<<1)+vectors[0].dy>(height<<1)) vectors[0].dy = (height-y)<<1;
+  } else {
+    if((x<<1)+vectors[0].dx<0) vectors[0].dx = (-x)<<1;
+    if((y<<1)+vectors[0].dy<0) vectors[0].dy = (-y)<<1;
+    if((x<<1)+vectors[0].dx>((width-16)<<1)) vectors[0].dx = (width-16-x)<<1;
+    if((y<<1)+vectors[0].dy>((height-16)<<1)) vectors[0].dy = (height-16-y)<<1;
+  }
   
   /* compute zero motion MAD and number of pixels in shape */
-  offset[0] = y * width + x;
-  offset[1] = y * width + x+8;
-  offset[2] = (y+8) * width + x;
-  offset[3] = (y+8) * width + x+8;
+  offset[0] = y * pitch + x;
+  offset[1] = y * pitch + x+8;
+  offset[2] = (y+8) * pitch + x;
+  offset[3] = (y+8) * pitch + x+8;
+  edged_offset[0] = y * (pitch+32) + x;
+  edged_offset[1] = y * (pitch+32) + x+8;
+  edged_offset[2] = (y+8) * (pitch+32) + x;
+  edged_offset[3] = (y+8) * (pitch+32) + x+8;
     
   if(motion->shape) {
     vectors[0].count = mad_withmask(current+offset[0], shape+offset[0], pitch, &vectors[0].deviation);
@@ -268,11 +292,12 @@
   }
     
   /* integer sample 4-step search */
-  find_vector(motion->ref, current, shape, offset,
+  find_vector(motion->ref, current, shape, offset, edged_offset,
 	      x, y, width, height, pitch,
 	      motion->search_range, 4,
 	      motion->MAE8x8,
-	      vectors);
+	      vectors,
+	      unrestricted);
 
   if(motion->flags & FAME_MOTION_BLOCK_SEARCH) {
     /* subvector search */
@@ -281,16 +306,20 @@
       subvectors[k].dy = vectors[k].dy;
       subvectors[k].error = vectors[k].error;
       /* integer sample 2-step search */
-      find_subvector(motion->ref, current, shape, offset[k],
+      find_subvector(motion->ref, current, shape,
+		     offset[k], edged_offset[k],
 		     x, y, width, height, pitch,
 		     motion->search_range, 2,
 		     motion->MAE8x8,
-		     &subvectors[k]);
+		     &subvectors[k],
+		     unrestricted);
       /* half sample search */
-      find_half_subvector(motion->ref, current, shape, offset[k],
+      find_half_subvector(motion->ref, current, shape,
+			  offset[k], edged_offset[k],
 			  x, y, width, height, pitch,
 			  motion->MAE8x8,
-			  &subvectors[k]);
+			  &subvectors[k],
+			  unrestricted);
     }
   }
 
@@ -298,10 +327,11 @@
   /* we do this step before intra/inter decision since in our case */
   /* we've already computed subpel planes for all the image and thus */
   /* the overhead cost for estimating subpel vector is relatively small */
-  find_half_vector(motion->ref, current, shape, offset,
+  find_half_vector(motion->ref, current, shape, offset, edged_offset,
 		   x, y, width, height, pitch,
 		   motion->MAE8x8,
-		   vectors);
+		   vectors,
+		   unrestricted);
 
   sad_inter = vectors[0].error + vectors[1].error + 
               vectors[2].error + vectors[3].error;
@@ -312,7 +342,6 @@
   count = vectors[0].count + vectors[1].count + vectors[2].count + vectors[3].count;
 
   /* inter4v/inter mode decision */
-#if 0 // TEMP
   if((motion->flags & FAME_MOTION_BLOCK_SEARCH) &&
      sad_inter4v + ((count>>1)+1) < sad_inter) {
     /* inter4v prediction */
@@ -323,7 +352,6 @@
       vectors[k].error = subvectors[k].error;
     }
   }
-#endif
 
   /* intra/inter mode decision */
   if(mad_inter + count + count < sad_inter)
@@ -364,6 +392,7 @@
 			unsigned char *current,
 			unsigned char *shape,
 			int offset[4],
+			int edged_offset[4],
 			int x,
 			int y,
 			int width,
@@ -372,7 +401,8 @@
 			int search_range,
 			int step_count,
 			compute_error_t eval_error,
-			fame_motion_vector_t *mv)
+			fame_motion_vector_t *mv,
+			int unrestricted)
 {
   int i;
   int counter_fourstep;
@@ -402,27 +432,27 @@
 
   step = 1 << (step_count-1);
 
-  motion = (mv[0].dx >> 1) + (mv[0].dy >> 1) * pitch;
+  motion = (mv[0].dx >> 1) + (mv[0].dy >> 1) * (pitch+32);
   residual = (mv[0].dx & 1) + ((mv[0].dy & 1) << 1);
 
   /* initial step */
   mv[0].error = 
-    eval_error(ref[residual]->y+offset[0]+motion,
+    eval_error(ref[residual]->y+edged_offset[0]+motion,
 	       current+offset[0],
 	       shape+offset[0],
 	       pitch) - (mv[0].count >> 1);
   mv[1].error = 
-    eval_error(ref[residual]->y+offset[1]+motion,
+    eval_error(ref[residual]->y+edged_offset[1]+motion,
 	       current+offset[1],
 	       shape+offset[1],
 	       pitch) - (mv[1].count >> 1);
   mv[2].error = 
-    eval_error(ref[residual]->y+offset[2]+motion,
+    eval_error(ref[residual]->y+edged_offset[2]+motion,
 	       current+offset[2],
 	       shape+offset[2],
 	       pitch) - (mv[2].count >> 1);
   mv[3].error = 
-    eval_error(ref[residual]->y+offset[3]+motion,
+    eval_error(ref[residual]->y+edged_offset[3]+motion,
 	       current+offset[3],
 	       shape+offset[3],
 	       pitch) - (mv[3].count >> 1);
@@ -435,11 +465,18 @@
 
   while(step)
   {
-    ptr_stepx[-1] = fame_min(search_range+mv[0].dx,fame_min((x<<1)+mv[0].dx, step<<1))>>1;
-    ptr_stepx[+1] = fame_min(search_range-mv[0].dx,fame_min(((width-x-16)<<1)-mv[0].dx, step<<1))>>1;
-    ptr_stepy[-1] = fame_min(search_range+mv[0].dy,fame_min((y<<1)+mv[0].dy, step<<1))>>1;
-    ptr_stepy[+1] = fame_min(search_range-mv[0].dy,fame_min(((height-y-16)<<1)-mv[0].dy, step<<1))>>1;
-    
+    if(unrestricted) {
+      ptr_stepx[-1] = fame_min(((search_range-1)<<1)+mv[0].dx,fame_min(((x+16)<<1)+mv[0].dx, step<<1))>>1;
+      ptr_stepx[+1] = fame_min(((search_range-1)<<1)-mv[0].dx,fame_min(((width-x)<<1)-mv[0].dx, step<<1))>>1;
+      ptr_stepy[-1] = fame_min(((search_range-1)<<1)+mv[0].dy,fame_min(((y+16)<<1)+mv[0].dy, step<<1))>>1;
+      ptr_stepy[+1] = fame_min(((search_range-1)<<1)-mv[0].dy,fame_min(((height-y)<<1)-mv[0].dy, step<<1))>>1;
+    } else {
+      ptr_stepx[-1] = fame_min(((search_range-1)<<1)+mv[0].dx,fame_min((x<<1)+mv[0].dx, step<<1))>>1;
+      ptr_stepx[+1] = fame_min(((search_range-1)<<1)-mv[0].dx,fame_min(((width-x-16)<<1)-mv[0].dx, step<<1))>>1;
+      ptr_stepy[-1] = fame_min(((search_range-1)<<1)+mv[0].dy,fame_min((y<<1)+mv[0].dy, step<<1))>>1;
+      ptr_stepy[+1] = fame_min(((search_range-1)<<1)-mv[0].dy,fame_min(((height-y-16)<<1)-mv[0].dy, step<<1))>>1;
+    }
+
     /* update the step */
     if(last_motion == NULL_MOTION || counter_fourstep == 0) 
     {
@@ -460,26 +497,26 @@
 	ptr_stepy[current_td->directions[i].dy];
       
       test[0].error =
-	eval_error(ref[residual]->y+offset[0]+motion+
-		   (test[0].dx+test[0].dy*pitch),
+	eval_error(ref[residual]->y+edged_offset[0]+motion+
+		   (test[0].dx+test[0].dy*(pitch+32)),
 		   current+offset[0],
 		   shape+offset[0],
 		   pitch);
       test[1].error =
-	eval_error(ref[residual]->y+offset[1]+motion+
-		   (test[0].dx+test[0].dy*pitch),
+	eval_error(ref[residual]->y+edged_offset[1]+motion+
+		   (test[0].dx+test[0].dy*(pitch+32)),
 		   current+offset[1],
 		   shape+offset[1],
 		   pitch);
       test[2].error =
-	eval_error(ref[residual]->y+offset[2]+motion+
-		   (test[0].dx+test[0].dy*pitch),
+	eval_error(ref[residual]->y+edged_offset[2]+motion+
+		   (test[0].dx+test[0].dy*(pitch+32)),
 		   current+offset[2],
 		   shape+offset[2],
 		   pitch);
       test[3].error =
-	eval_error(ref[residual]->y+offset[3]+motion+
-		   (test[0].dx+test[0].dy*pitch),
+	eval_error(ref[residual]->y+edged_offset[3]+motion+
+		   (test[0].dx+test[0].dy*(pitch+32)),
 		   current+offset[3],
 		   shape+offset[3],
 		   pitch);
@@ -508,7 +545,7 @@
       mv[0].dx += rel[0].dx << 1;
       mv[0].dy += rel[0].dy << 1;	
       current_td = &(td[last_motion]);
-      motion = (mv[0].dx >> 1) + (mv[0].dy >> 1) * pitch;
+      motion = (mv[0].dx >> 1) + (mv[0].dy >> 1) * (pitch+32);
     }
     else
     {
@@ -524,6 +561,7 @@
 			   unsigned char *current,
 			   unsigned char *shape,
 			   int offset,
+			   int edged_offset,
 			   int x,
 			   int y,
 			   int width,
@@ -532,7 +570,8 @@
 			   int search_range,
 			   int step_count,
 			   compute_error_t eval_error,
-			   fame_motion_vector_t *mv)
+			   fame_motion_vector_t *mv,
+			   int unrestricted)
 {
   int i;
   int counter_fourstep;
@@ -555,7 +594,7 @@
   counter_fourstep = step_count;
   rel.error = INFINITE_ERROR / 4;
   step = 1 << (step_count - 1);
-  motion = (mv->dx >> 1) + (mv->dy >> 1) * pitch;
+  motion = (mv->dx >> 1) + (mv->dy >> 1) * (pitch+32);
   residual = (mv->dx & 1) + ((mv->dy & 1) << 1);
 
   /* initial step */
@@ -568,11 +607,18 @@
 
   while(step)
   {
-    ptr_stepx[-1] = fame_min(search_range+mv->dx,fame_min((x<<1)+mv->dx, step<<1))>>1;
-    ptr_stepx[1]  = fame_min(search_range-mv->dx,fame_min(((width-x-16)<<1)-mv->dx, step<<1))>>1;
-    ptr_stepy[-1] = fame_min(search_range+mv->dy,fame_min((y<<1)+mv->dy, step<<1))>>1;
-    ptr_stepy[1]  = fame_min(search_range-mv->dy,fame_min(((height-y-16)<<1)-mv->dy, step<<1))>>1;
-    
+    if(unrestricted) {
+      ptr_stepx[-1] = fame_min(((search_range-1)<<1)+mv->dx,fame_min(((x+16)<<1)+mv->dx, step<<1))>>1;
+      ptr_stepx[1]  = fame_min(((search_range-1)<<1)-mv->dx,fame_min(((width-x)<<1)-mv->dx, step<<1))>>1;
+      ptr_stepy[-1] = fame_min(((search_range-1)<<1)+mv->dy,fame_min(((y+16)<<1)+mv->dy, step<<1))>>1;
+      ptr_stepy[1]  = fame_min(((search_range-1)<<1)-mv->dy,fame_min(((height-y)<<1)-mv->dy, step<<1))>>1;
+    } else {
+      ptr_stepx[-1] = fame_min(((search_range-1)<<1)+mv->dx,fame_min((x<<1)+mv->dx, step<<1))>>1;
+      ptr_stepx[1]  = fame_min(((search_range-1)<<1)-mv->dx,fame_min(((width-x-16)<<1)-mv->dx, step<<1))>>1;
+      ptr_stepy[-1] = fame_min(((search_range-1)<<1)+mv->dy,fame_min((y<<1)+mv->dy, step<<1))>>1;
+      ptr_stepy[1]  = fame_min(((search_range-1)<<1)-mv->dy,fame_min(((height-y-16)<<1)-mv->dy, step<<1))>>1;
+    }
+
     /* update the step */
     if(last_motion==NULL_MOTION || counter_fourstep==0) 
     {
@@ -593,8 +639,8 @@
 	ptr_stepy[current_td->directions[i].dy];
       
       test.error =
-	eval_error(ref[residual]->y+offset+motion+
-		   (test.dx+test.dy*pitch),
+	eval_error(ref[residual]->y+edged_offset+motion+
+		   (test.dx+test.dy*(pitch+32)),
 		   current+offset,
 		   shape+offset,
 		   pitch);
@@ -615,7 +661,7 @@
       mv->dx += rel.dx << 1;
       mv->dy += rel.dy << 1;	
       current_td = &(td[last_motion]);
-      motion = (mv->dx >> 1) + (mv->dy >> 1) * pitch;
+      motion = (mv->dx >> 1) + (mv->dy >> 1) * (pitch+32);
     }
     else
     {
@@ -655,32 +701,36 @@
 			     unsigned char *current,
 			     unsigned char *shape,
 			     int offset[4],
+			     int edged_offset[4],
 			     int x,
 			     int y,
 			     int width,
 			     int height,
 			     int pitch,
 			     compute_error_t eval,
-			     fame_motion_vector_t *mv)
+			     fame_motion_vector_t *mv,
+			     int unrestricted)
 {
   int xh, yh;
   int e[4], best[4];
   int m, r;
   int i;
+  int edge;
 
   xh = yh = 0;
   best[0] = mv[0].error;
   best[1] = mv[1].error;
   best[2] = mv[2].error;
   best[3] = mv[3].error;
+  edge = unrestricted << 4;
 
-  if((y << 1) + mv[0].dy > 0) {
-    if((x << 1) + mv[0].dx > 0) {
+  if(((y+edge) << 1) + mv[0].dy > 0) {
+    if(((x+edge) << 1) + mv[0].dx > 0) {
       /* -0.5 -0.5 */
       for(i = 0; i < 4; i++) {
-	m = ((mv[i].dx-1)>>1) + ((mv[i].dy-1)>>1) * pitch;
+	m = ((mv[i].dx-1)>>1) + ((mv[i].dy-1)>>1) * (pitch+32);
 	r = ((mv[i].dx-1) & 1) + (((mv[i].dy-1) & 1) << 1);
-	e[i] = eval(ref[r]->y+offset[i]+m, current + offset[i],
+	e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
 		    shape + offset[i], pitch);
       }
       if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
@@ -691,9 +741,9 @@
     }
     /* 0 -0.5 */
     for(i = 0; i < 4; i++) {
-      m = ((mv[i].dx)>>1) + ((mv[i].dy-1)>>1) * pitch;
+      m = ((mv[i].dx)>>1) + ((mv[i].dy-1)>>1) * (pitch+32);
       r = ((mv[i].dx) & 1) + (((mv[i].dy-1) & 1) << 1);
-      e[i] = eval(ref[r]->y+offset[i]+m, current + offset[i],
+      e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
 		  shape + offset[i], pitch);
     }
     if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
@@ -701,12 +751,12 @@
       xh =  0;
       yh = -1;
     }
-    if((x << 1) + mv->dx < ((width-16) << 1)) {
+    if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
       /* +0.5 -0.5 */
       for(i = 0; i < 4; i++) {
-	m = ((mv[i].dx+1)>>1) + ((mv[i].dy-1)>>1) * pitch;
+	m = ((mv[i].dx+1)>>1) + ((mv[i].dy-1)>>1) * (pitch+32);
 	r = ((mv[i].dx+1) & 1) + (((mv[i].dy-1) & 1) << 1);
-	e[i] = eval(ref[r]->y+offset[i]+m, current + offset[i],
+	e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
 		    shape + offset[i], pitch);
       }
       if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
@@ -717,12 +767,12 @@
     }
   }
   
-  if((x << 1) + mv->dx > 0) {
+  if(((x+edge) << 1) + mv->dx > 0) {
     /* -0.5 0 */
     for(i = 0; i < 4; i++) {
-      m = ((mv[i].dx-1)>>1) + ((mv[i].dy)>>1) * pitch;
+      m = ((mv[i].dx-1)>>1) + ((mv[i].dy)>>1) * (pitch+32);
       r = ((mv[i].dx-1) & 1) + (((mv[i].dy) & 1) << 1);
-      e[i] = eval(ref[r]->y+offset[i]+m, current + offset[i],
+      e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
 		  shape + offset[i], pitch);
     }
     if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
@@ -731,12 +781,12 @@
       yh =  0;
     }
   }
-  if((x << 1) + mv->dx < ((width-16) << 1)) {
+  if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
     /* +0.5 0 */
     for(i = 0; i < 4; i++) {
-      m = ((mv[i].dx+1)>>1) + ((mv[i].dy)>>1) * pitch;
+      m = ((mv[i].dx+1)>>1) + ((mv[i].dy)>>1) * (pitch+32);
       r = ((mv[i].dx+1) & 1) + (((mv[i].dy) & 1) << 1);
-      e[i] = eval(ref[r]->y+offset[i]+m, current + offset[i],
+      e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
 		  shape + offset[i], pitch);
     }
     if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
@@ -746,13 +796,13 @@
     }
   }
   
-  if((y << 1) + mv->dy < ((height-16) << 1)) {
-    if((x << 1) + mv->dx > 0) {
+  if(((y-edge) << 1) + mv->dy < ((height-16) << 1)) {
+    if(((x+edge) << 1) + mv->dx > 0) {
       /* -0.5 +0.5 */
       for(i = 0; i < 4; i++) {
-	m = ((mv[i].dx-1)>>1) + ((mv[i].dy+1)>>1) * pitch;
+	m = ((mv[i].dx-1)>>1) + ((mv[i].dy+1)>>1) * (pitch+32);
 	r = ((mv[i].dx-1) & 1) + (((mv[i].dy+1) & 1) << 1);
-	e[i] = eval(ref[r]->y+offset[i]+m, current + offset[i],
+	e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
 		    shape + offset[i], pitch);
 	}
       if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
@@ -763,9 +813,9 @@
     }
     /* 0 +0.5 */
     for(i = 0; i < 4; i++) {
-      m = ((mv[i].dx)>>1) + ((mv[i].dy+1)>>1) * pitch;
+      m = ((mv[i].dx)>>1) + ((mv[i].dy+1)>>1) * (pitch+32);
       r = ((mv[i].dx) & 1) + (((mv[i].dy+1) & 1) << 1);
-      e[i] = eval(ref[r]->y+offset[i]+m, current + offset[i],
+      e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
 		  shape + offset[i], pitch);
     }
     if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
@@ -773,12 +823,12 @@
       xh =  0;
       yh = +1;
     }
-    if((x << 1) + mv->dx < ((width-16) << 1)) {
+    if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
       /* +0.5 +0.5 */
       for(i = 0; i < 4; i++) {
-	m = ((mv[i].dx+1)>>1) + ((mv[i].dy+1)>>1) * pitch;
+	m = ((mv[i].dx+1)>>1) + ((mv[i].dy+1)>>1) * (pitch+32);
 	r = ((mv[i].dx+1) & 1) + (((mv[i].dy+1) & 1) << 1);
-	e[i] = eval(ref[r]->y+offset[i]+m, current + offset[i],
+	e[i] = eval(ref[r]->y+edged_offset[i]+m, current + offset[i],
 		    shape + offset[i], pitch);
       }
       if(e[0] + e[1] + e[2] + e[3] < best[0] + best[1] + best[2] + best[3]) {
@@ -807,27 +857,31 @@
 				unsigned char *current,
 				unsigned char *shape,
 				int offset,
+				int edged_offset,
 				int x,
 				int y,
 				int width,
 				int height,
 				int pitch,
 				compute_error_t eval,
-				fame_motion_vector_t *mv)
+				fame_motion_vector_t *mv,
+				int unrestricted)
 {
   int xh, yh;
   int e, best;
   int m, r;
+  int edge;
 
   xh = yh = 0;
   best = mv->error;
+  edge = unrestricted << 4;
 
-  if((y << 1) + mv->dy > 0) {
-    if((x << 1) + mv->dx > 0) {
+  if(((y+edge) << 1) + mv->dy > 0) {
+    if(((x+edge) << 1) + mv->dx > 0) {
       /* -0.5 -0.5 */
-      m = ((mv->dx-1)>>1) + ((mv->dy-1)>>1) * pitch;
+      m = ((mv->dx-1)>>1) + ((mv->dy-1)>>1) * (pitch+32);
       r = ((mv->dx-1) & 1) + (((mv->dy-1) & 1) << 1);
-      e = eval(ref[r]->y+offset+m, current + offset,
+      e = eval(ref[r]->y+edged_offset+m, current + offset,
 	       shape + offset, pitch);
       if(e < best) {
 	best = e;
@@ -836,20 +890,20 @@
       }
     }
     /* 0 -0.5 */
-    m = ((mv->dx)>>1) + ((mv->dy-1)>>1) * pitch;
+    m = ((mv->dx)>>1) + ((mv->dy-1)>>1) * (pitch+32);
     r = ((mv->dx) & 1) + (((mv->dy-1) & 1) << 1);
-    e = eval(ref[r]->y+offset+m, current + offset,
+    e = eval(ref[r]->y+edged_offset+m, current + offset,
 	     shape + offset, pitch);
     if(e < best) {
       best = e;
       xh =  0;
       yh = -1;
     }
-    if((x << 1) + mv->dx < ((width-16) << 1)) {
+    if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
       /* +0.5 -0.5 */
-      m = ((mv->dx+1)>>1) + ((mv->dy-1)>>1) * pitch;
+      m = ((mv->dx+1)>>1) + ((mv->dy-1)>>1) * (pitch+32);
       r = ((mv->dx+1) & 1) + (((mv->dy-1) & 1) << 1);
-      e = eval(ref[r]->y+offset+m, current + offset,
+      e = eval(ref[r]->y+edged_offset+m, current + offset,
 	       shape + offset, pitch);
       if(e < best) {
 	best = e;
@@ -859,11 +913,11 @@
     }
   }
     
-  if((x << 1) + mv->dx > 0) {
+  if(((x+edge) << 1) + mv->dx > 0) {
     /* -0.5 0 */
-    m = ((mv->dx-1)>>1) + ((mv->dy)>>1) * pitch;
+    m = ((mv->dx-1)>>1) + ((mv->dy)>>1) * (pitch+32);
     r = ((mv->dx-1) & 1) + (((mv->dy) & 1) << 1);
-    e = eval(ref[r]->y+offset+m, current + offset,
+    e = eval(ref[r]->y+edged_offset+m, current + offset,
 	     shape + offset, pitch);
     if(e < best) {
       best = e;
@@ -871,11 +925,11 @@
       yh =  0;
     }
   }
-  if((x << 1) + mv->dx < ((width-16) << 1)) {
+  if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
     /* +0.5 0 */
-    m = ((mv->dx+1)>>1) + ((mv->dy)>>1) * pitch;
+    m = ((mv->dx+1)>>1) + ((mv->dy)>>1) * (pitch+32);
     r = ((mv->dx+1) & 1) + (((mv->dy) & 1) << 1);
-    e = eval(ref[r]->y+offset+m, current + offset,
+    e = eval(ref[r]->y+edged_offset+m, current + offset,
 	     shape + offset, pitch);
     if(e < best) {
       best = e;
@@ -884,12 +938,12 @@
     }
   }
     
-  if((y << 1) + mv->dy < ((height-16) << 1)) {
-    if((x << 1) + mv->dx > 0) {
+  if(((y-edge) << 1) + mv->dy < ((height-16) << 1)) {
+    if(((x+edge) << 1) + mv->dx > 0) {
       /* -0.5 +0.5 */
-      m = ((mv->dx-1)>>1) + ((mv->dy+1)>>1) * pitch;
+      m = ((mv->dx-1)>>1) + ((mv->dy+1)>>1) * (pitch+32);
       r = ((mv->dx-1) & 1) + (((mv->dy+1) & 1) << 1);
-      e = eval(ref[r]->y+offset+m, current + offset,
+      e = eval(ref[r]->y+edged_offset+m, current + offset,
 	       shape + offset, pitch);
       if(e < best) {
 	best = e;
@@ -898,20 +952,20 @@
       }
     }
     /* 0 +0.5 */
-    m = ((mv->dx)>>1) + ((mv->dy+1)>>1) * pitch;
+    m = ((mv->dx)>>1) + ((mv->dy+1)>>1) * (pitch+32);
     r = ((mv->dx) & 1) + (((mv->dy+1) & 1) << 1);
-    e = eval(ref[r]->y+offset+m, current + offset,
+    e = eval(ref[r]->y+edged_offset+m, current + offset,
 	     shape + offset, pitch);
     if(e < best) {
       best = e;
       xh =  0;
       yh = +1;
     }
-    if((x << 1) + mv->dx < ((width-16) << 1)) {
+    if(((x-edge) << 1) + mv->dx < ((width-16) << 1)) {
       /* +0.5 +0.5 */
-      m = ((mv->dx+1)>>1) + ((mv->dy+1)>>1) * pitch;
+      m = ((mv->dx+1)>>1) + ((mv->dy+1)>>1) * (pitch+32);
       r = ((mv->dx+1) & 1) + (((mv->dy+1) & 1) << 1);
-      e = eval(ref[r]->y+offset+m, current + offset,
+      e = eval(ref[r]->y+edged_offset+m, current + offset,
 	       shape + offset, pitch);
       if(e < best) {
 	best = e;

Index: fame_motion_none.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_motion_none.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_motion_none.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_motion_none.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -27,7 +27,8 @@
 static fame_motion_coding_t none_estimation(fame_motion_t *motion,
 					    int mb_x,
 					    int mb_y,
-					    fame_motion_vector_t *vectors);
+					    fame_motion_vector_t *vectors,
+					    unsigned char quant);
 
 static void find_vector(fame_yuv_t **ref,
 			unsigned char *current,
@@ -105,9 +106,10 @@
 /*    fame_motion_coding_t: intra or inter coded.                            */
 
 static fame_motion_coding_t none_estimation(fame_motion_t *motion,
-						int mb_x,
-						int mb_y,
-						fame_motion_vector_t *vectors)
+					    int mb_x,
+					    int mb_y,
+					    fame_motion_vector_t *vectors,
+					    unsigned char quant)
 {
   int pitch;
   unsigned char *current;
@@ -123,8 +125,8 @@
   y = mb_y << 4;
   width = motion->mb_width << 4;
   height = motion->mb_height << 4;
-  pitch = width;
   current = motion->current->y;
+  pitch = motion->current->p;
   shape = motion->shape;
 
   /* saturate prediction to borders */
@@ -134,10 +136,10 @@
   if((y<<1)+vectors[0].dy>((height-16)<<1)) vectors[0].dy = (height-16-y)<<1;
   
   /* compute zero motion MAD and number of pixels in shape */
-  offset[0] = y * width + x;
-  offset[1] = y * width + x+8;
-  offset[2] = (y+8) * width + x;
-  offset[3] = (y+8) * width + x+8;
+  offset[0] = y * pitch + x;
+  offset[1] = y * pitch + x+8;
+  offset[2] = (y+8) * pitch + x;
+  offset[3] = (y+8) * pitch + x+8;
     
   if(motion->shape) {
     vectors[0].count = mad_withmask(current+offset[0], shape+offset[0], pitch, &vectors[0].deviation);
@@ -254,33 +256,36 @@
 			compute_error_t eval_error,
 			fame_motion_vector_t *mv)
 {
-  int motion;
+  int motion0, motion1, motion2, motion3;
   int residual;
 
   mv[3].dx = mv[2].dx = mv[1].dx = mv[0].dx;
   mv[3].dy = mv[2].dy = mv[1].dy = mv[0].dy;
 
-  motion = (mv[0].dx >> 1) + (mv[0].dy >> 1) * pitch;
   residual = (mv[0].dx & 1) + ((mv[0].dy & 1) << 1);
+  motion0 = ((mv[0].dy >> 1) + y) * (pitch + 32) + (mv[0].dx >> 1) + x;
+  motion1 = ((mv[0].dy >> 1) + y) * (pitch + 32) + (mv[0].dx >> 1) + x + 8;
+  motion2 = ((mv[0].dy >> 1) + y + 8) * (pitch + 32) + (mv[0].dx >> 1) + x;
+  motion3 = ((mv[0].dy >> 1) + y + 8) * (pitch + 32) + (mv[0].dx >> 1) + x + 8;
 
   /* initial step */
   mv[0].error = 
-    eval_error(ref[residual]->y+offset[0]+motion,
+    eval_error(ref[residual]->y+motion0,
 	       current+offset[0],
 	       shape+offset[0],
 	       pitch) - (mv[0].count >> 1);
   mv[1].error = 
-    eval_error(ref[residual]->y+offset[1]+motion,
+    eval_error(ref[residual]->y+motion1,
 	       current+offset[1],
 	       shape+offset[1],
 	       pitch) - (mv[1].count >> 1);
   mv[2].error = 
-    eval_error(ref[residual]->y+offset[2]+motion,
+    eval_error(ref[residual]->y+motion2,
 	       current+offset[2],
 	       shape+offset[2],
 	       pitch) - (mv[2].count >> 1);
   mv[3].error = 
-    eval_error(ref[residual]->y+offset[3]+motion,
+    eval_error(ref[residual]->y+motion3,
 	       current+offset[3],
 	       shape+offset[3],
 	       pitch) - (mv[3].count >> 1);

Index: fame_motion_pmvfast.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_motion_pmvfast.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- fame_motion_pmvfast.c	23 Mar 2002 15:57:25 -0000	1.2
+++ fame_motion_pmvfast.c	1 Jun 2002 20:23:10 -0000	1.3
@@ -20,13 +20,67 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "fame.h"
+#include "fame_malloc.h"
 #include "fame_motion.h"
 #include "fame_motion_pmvfast.h"
+#ifdef HAS_MMX
+#include "mad_mmx.h"
+#else
 #include "mad_int.h"
+#endif
[...1097 lines suppressed...]
-  /*           depends on the covariance (in a first approximation)       */
-  /*        -> If (1) < (2) - 2*N : Choose INTRA                          */
-  /*           Substract 2*N to favour INTER mode when there is no        */
-  /*           significant difference                                     */
-  if(mad_inter + count + count < sad_inter) {
-#if DEBUG
-    fprintf(debug_log, "Coding = intra\n");
-#endif
-    return(motion_intra);
-  } else {
 #if DEBUG
-    fprintf(debug_log, "Coding = inter\n");
+  fprintf(debug_log, "Coding = inter\n");
 #endif
-    return(motion_inter);
-  }
+  return(motion_inter);
 }
 
 /* End of motion_pmvfast.c */

Index: fame_motion_pmvfast.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_motion_pmvfast.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_motion_pmvfast.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_motion_pmvfast.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -45,7 +45,7 @@
 				  int search_range);
   void (* FAME_OVERLOADED(leave))(fame_motion_t *motion);
 
-  fame_motion_vector_t *vectors;
+  fame_motion_vector_t *vectors[2];
 } fame_motion_pmvfast_t;
 
 extern FAME_CONSTRUCTOR(fame_motion_pmvfast_t);

Index: fame_profile.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_profile.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_profile.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_profile.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -31,9 +31,12 @@
 		fame_parameters_t *params,
 		unsigned char *buffer,
 		unsigned int size);
-  int (* encode)(struct _fame_profile_t_ *profile,
+  void (* enter)(struct _fame_profile_t_ *profile,
 		 fame_yuv_t *yuv,
-		 unsigned char *shape);
+		 unsigned char *shape);		 
+  int (* encode)(struct _fame_profile_t_ *profile);
+  void (* leave)(struct _fame_profile_t_ *profile,
+		 fame_frame_statistics_t *stats);
   int (* close)(struct _fame_profile_t_ *profile);
   fame_profile_private_data_t *data;
 } fame_profile_t;

Index: fame_profile_mpeg.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_profile_mpeg.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_profile_mpeg.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_profile_mpeg.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -20,8 +20,10 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include <math.h>
 
 #include "fame.h"
+#include "fame_malloc.h"
 #include "fame_profile.h"
 #include "fame_profile_mpeg.h"
 #include "fame_encoder.h"
@@ -29,22 +31,28 @@
 #include "fame_motion.h"
 #include "fame_syntax.h"
 #include "fame_shape.h"
+#include "fame_monitor.h"
 
 static void profile_mpeg_init(fame_profile_t *profile,
 			      fame_context_t *context,
 			      fame_parameters_t *params,
 			      unsigned char *buffer,
 			      unsigned int size);
-static int profile_mpeg_encode(fame_profile_t *profile,
+static void profile_mpeg_enter(fame_profile_t *profile,
 			       fame_yuv_t *yuv,
 			       unsigned char *shape);
+static int profile_mpeg_encode(fame_profile_t *profile);
+static void profile_mpeg_leave(fame_profile_t *profile,
+			       fame_frame_statistics_t *stats);
 static int profile_mpeg_close(fame_profile_t *profile);
 
 FAME_CONSTRUCTOR(fame_profile_mpeg_t)
 {
   FAME_OBJECT(this)->name = "MPEG profile";
   FAME_PROFILE(this)->init = profile_mpeg_init;
+  FAME_PROFILE(this)->enter = profile_mpeg_enter;
   FAME_PROFILE(this)->encode = profile_mpeg_encode;
+  FAME_PROFILE(this)->leave = profile_mpeg_leave;
   FAME_PROFILE(this)->close = profile_mpeg_close;
   this->encoder_flags = 0;
   this->decoder_flags = 0;
@@ -52,6 +60,7 @@
   this->syntax_flags = 0;
   this->shape_flags = 0;
   this->rate_flags = 0;
+  this->monitor_flags = 0;
   return(this);
 }
 
@@ -76,6 +85,7 @@
 			      unsigned int size)
 {
   fame_profile_mpeg_t *profile_mpeg = FAME_PROFILE_MPEG(profile);
+  int i, j;
 
   profile_mpeg->width = params->width;
   profile_mpeg->height = params->height;
@@ -85,21 +95,27 @@
   profile_mpeg->slices_per_frame = params->slices_per_frame;
   profile_mpeg->frames_per_gop = strlen(profile_mpeg->coding);
   profile_mpeg->frames_per_sequence = params->frames_per_sequence;
-  profile_mpeg->mbs_per_slice = ((((params->height + 15) >> 4) + 
-			     profile_mpeg->slices_per_frame - 1) / 
-			    profile_mpeg->slices_per_frame);
+  profile_mpeg->total_frames = params->total_frames;
+  profile_mpeg->lines_per_slice = ((((params->height + 15) >> 4) + 
+				    profile_mpeg->slices_per_frame - 1) / 
+				   profile_mpeg->slices_per_frame);
   profile_mpeg->slice_number = 0;
   profile_mpeg->frame_number = 0;
   profile_mpeg->gop_number = 0;
   profile_mpeg->fps_num = params->frame_rate_num;
   profile_mpeg->fps_den = params->frame_rate_den;
   profile_mpeg->alpha_th = 255*(100 - params->shape_quality)/100;
-  profile_mpeg->search_range = params->search_range;
-  profile_mpeg->intra_y_scale = 0;
-  profile_mpeg->intra_c_scale = 0;
+  if(params->search_range) {
+    profile_mpeg->search_range = params->search_range;
+    profile_mpeg->search_range_adaptive = 0;
+  } else {
+    profile_mpeg->search_range = 32;
+    profile_mpeg->search_range_adaptive = 1;
+  }
   profile_mpeg->intra_matrix = NULL;
   profile_mpeg->inter_matrix = NULL;
   profile_mpeg->verbose = params->verbose;
+  profile_mpeg->rounding = 0;
 
   /* Get the components */
   profile_mpeg->decoder = 
@@ -114,6 +130,8 @@
     (fame_shape_t *) fame_get_object(context, "shape");
   profile_mpeg->rate = 
     (fame_rate_t *) fame_get_object(context, "rate");
+  profile_mpeg->monitor =
+    (fame_monitor_t *) fame_get_object(context, "monitor");
 
   /* VBR/CBR coding */
   if(profile_mpeg->bitrate == 0)
@@ -126,115 +144,87 @@
   profile_mpeg->dirty = 0;
 
   /* Allocate reference frame ring */
-  profile_mpeg->ref[0][0] = 
-    (fame_yuv_t *) malloc(sizeof(fame_yuv_t));
-  profile_mpeg->ref[0][0]->y = 
-    (unsigned char *) malloc(profile_mpeg->width*profile_mpeg->height*12/8+
-			     (profile_mpeg->width>>1)+1);/*for interpolation*/
-  profile_mpeg->ref[0][0]->u = 
-    profile_mpeg->ref[0][0]->y + profile_mpeg->width*profile_mpeg->height;
-  profile_mpeg->ref[0][0]->v = 
-    profile_mpeg->ref[0][0]->u + profile_mpeg->width*profile_mpeg->height/4;
-  profile_mpeg->ref[0][1] = 
-    (fame_yuv_t *) malloc(sizeof(fame_yuv_t));
-  profile_mpeg->ref[0][1]->y = 
-    (unsigned char *) malloc(profile_mpeg->width*profile_mpeg->height*12/8);
-  profile_mpeg->ref[0][1]->u = 
-    profile_mpeg->ref[0][1]->y + profile_mpeg->width*profile_mpeg->height;
-  profile_mpeg->ref[0][1]->v =
-    profile_mpeg->ref[0][1]->u + profile_mpeg->width*profile_mpeg->height/4;
-  profile_mpeg->ref[0][2] =
-    (fame_yuv_t *) malloc(sizeof(fame_yuv_t));
-  profile_mpeg->ref[0][2]->y =
-    (unsigned char *) malloc(profile_mpeg->width*profile_mpeg->height*12/8);
-  profile_mpeg->ref[0][2]->u =
-    profile_mpeg->ref[0][2]->y + profile_mpeg->width*profile_mpeg->height;
-  profile_mpeg->ref[0][2]->v =
-    profile_mpeg->ref[0][2]->u + profile_mpeg->width*profile_mpeg->height/4;
-  profile_mpeg->ref[0][3] = 
-    (fame_yuv_t *) malloc(sizeof(fame_yuv_t));
-  profile_mpeg->ref[0][3]->y =
-    (unsigned char *) malloc(profile_mpeg->width*profile_mpeg->height*12/8);
-  profile_mpeg->ref[0][3]->u =
-    profile_mpeg->ref[0][3]->y + profile_mpeg->width*profile_mpeg->height;
-  profile_mpeg->ref[0][3]->v =
-    profile_mpeg->ref[0][3]->u + profile_mpeg->width*profile_mpeg->height/4;
-  profile_mpeg->ref[1][0] =
-    (fame_yuv_t *) malloc(sizeof(fame_yuv_t));
-  profile_mpeg->ref[1][0]->y =
-    (unsigned char *) malloc(profile_mpeg->width*profile_mpeg->height*12/8+
-			     (profile_mpeg->width>>1)+1);/*for interpolation*/
-  profile_mpeg->ref[1][0]->u =
-    profile_mpeg->ref[1][0]->y + profile_mpeg->width*profile_mpeg->height;
-  profile_mpeg->ref[1][0]->v =
-    profile_mpeg->ref[1][0]->u + profile_mpeg->width*profile_mpeg->height/4;
-  profile_mpeg->ref[1][1] =
-    (fame_yuv_t *) malloc(sizeof(fame_yuv_t));
-  profile_mpeg->ref[1][1]->y =
-    (unsigned char *) malloc(profile_mpeg->width*profile_mpeg->height*12/8);
-  profile_mpeg->ref[1][1]->u =
-    profile_mpeg->ref[1][1]->y + profile_mpeg->width*profile_mpeg->height;
-  profile_mpeg->ref[1][1]->v =
-    profile_mpeg->ref[1][1]->u + profile_mpeg->width*profile_mpeg->height/4;
-  profile_mpeg->ref[1][2] =
-    (fame_yuv_t *) malloc(sizeof(fame_yuv_t));
-  profile_mpeg->ref[1][2]->y =
-    (unsigned char *) malloc(profile_mpeg->width*profile_mpeg->height*12/8);
-  profile_mpeg->ref[1][2]->u =
-    profile_mpeg->ref[1][2]->y + profile_mpeg->width*profile_mpeg->height;
-  profile_mpeg->ref[1][2]->v =
-    profile_mpeg->ref[1][2]->u + profile_mpeg->width*profile_mpeg->height/4;
-  profile_mpeg->ref[1][3] =
-    (fame_yuv_t *) malloc(sizeof(fame_yuv_t));
-  profile_mpeg->ref[1][3]->y =
-    (unsigned char *) malloc(profile_mpeg->width*profile_mpeg->height*12/8);
-  profile_mpeg->ref[1][3]->u =
-    profile_mpeg->ref[1][3]->y + profile_mpeg->width*profile_mpeg->height;
-  profile_mpeg->ref[1][3]->v =
-    profile_mpeg->ref[1][3]->u + profile_mpeg->width*profile_mpeg->height/4;
-  profile_mpeg->past = profile_mpeg->current = profile_mpeg->future = 0;
-
-  /* Allocate reconstructed shape */
-  if(profile_mpeg->shape)
-    profile_mpeg->ref_shape = (unsigned char *) malloc(profile_mpeg->width*profile_mpeg->height);
-  else
+  for(j = 0; j < 2; j++) /* 2 references */
+    for(i = 0; i < 4; i++) { /* 4 planes per reference (interpolation) */
+      profile_mpeg->ref[j][i] = 
+	(fame_yuv_t *) fame_malloc(sizeof(fame_yuv_t));
+      profile_mpeg->ref[j][i]->w = profile_mpeg->width;
+      profile_mpeg->ref[j][i]->h = profile_mpeg->height;
+      profile_mpeg->ref[j][i]->p = profile_mpeg->width+32;
+
+      profile_mpeg->ref[j][i]->y = 
+	(unsigned char *) fame_malloc((profile_mpeg->width+32)*
+				      (profile_mpeg->height+32)*12/8);
+      profile_mpeg->ref[j][i]->u = 
+	profile_mpeg->ref[j][i]->y +
+	(profile_mpeg->width+32)*(profile_mpeg->height+32);
+      profile_mpeg->ref[j][i]->v = 
+	profile_mpeg->ref[j][i]->u +
+	(profile_mpeg->width+32)*(profile_mpeg->height+32)/4;
+      /* add offset to beggining of picture (padding) */
+      profile_mpeg->ref[j][i]->y += 16*(profile_mpeg->width+32)+16;
+      profile_mpeg->ref[j][i]->u += 8*(profile_mpeg->width+32)/2+8;
+      profile_mpeg->ref[j][i]->v += 8*(profile_mpeg->width+32)/2+8;
+    }
+  
+  /* Allocate reconstructed shape and BAB map */
+  if(profile_mpeg->shape) {
+    profile_mpeg->ref_shape = 
+      (unsigned char *) fame_malloc(profile_mpeg->width*profile_mpeg->height);
+    profile_mpeg->bab_map = 
+      (unsigned char *) fame_malloc(((profile_mpeg->width >> 4) + 2)*
+				    ((profile_mpeg->height >> 4) + 2));
+  } else {
     profile_mpeg->ref_shape = NULL;
+    profile_mpeg->bab_map = NULL;
+  }
+
+  /* Initialize reference pointers */
+  profile_mpeg->past = 1;
+  profile_mpeg->future = 0;
+  profile_mpeg->current = 1;
+
 
   /* Initialize motion estimation */
   if(profile_mpeg->motion && profile_mpeg->motion->init)
     profile_mpeg->motion->init(profile_mpeg->motion, 
-				(profile_mpeg->width >> 4),
-				(profile_mpeg->height >> 4),
-				FAME_PROFILE_MPEG(profile)->motion_flags);
+			       (profile_mpeg->width >> 4),
+			       (profile_mpeg->height >> 4),
+			       FAME_PROFILE_MPEG(profile)->motion_flags);
   
   /* Initialize the syntax */
   if(profile_mpeg->syntax && profile_mpeg->syntax->init)
     profile_mpeg->syntax->init(profile_mpeg->syntax,
 				(profile_mpeg->width >> 4),
 				(profile_mpeg->height >> 4),
-				profile_mpeg->search_range,
 				&profile_mpeg->intra_matrix,
 				&profile_mpeg->inter_matrix,
+				profile_mpeg->intra_dc_y_scale_table,
+				profile_mpeg->intra_dc_c_scale_table,
 				&profile_mpeg->mismatch,
 				FAME_PROFILE_MPEG(profile)->syntax_flags);
   
   /* Initialize the encoder */
   if(profile_mpeg->encoder && profile_mpeg->encoder->init)
     profile_mpeg->encoder->init(profile_mpeg->encoder,
-				 profile_mpeg->width,
-				 profile_mpeg->height,
-				 profile_mpeg->intra_matrix,
-				 profile_mpeg->inter_matrix,
-				 profile_mpeg->mismatch);
-
+				profile_mpeg->width,
+				profile_mpeg->height,
+				profile_mpeg->intra_matrix,
+				profile_mpeg->inter_matrix,
+				profile_mpeg->intra_dc_y_scale_table,
+				profile_mpeg->intra_dc_c_scale_table,
+				profile_mpeg->mismatch);
+  
   /* Initialize the decoder */
   if(profile_mpeg->decoder && profile_mpeg->decoder->init)
     profile_mpeg->decoder->init(profile_mpeg->decoder,
-				 profile_mpeg->width,
-				 profile_mpeg->height,
-				 profile_mpeg->intra_matrix,
-				 profile_mpeg->inter_matrix,
-				 profile_mpeg->mismatch);
+				profile_mpeg->width,
+				profile_mpeg->height,
+				profile_mpeg->intra_matrix,
+				profile_mpeg->inter_matrix,
+				profile_mpeg->intra_dc_y_scale_table,
+				profile_mpeg->intra_dc_c_scale_table,
+				profile_mpeg->mismatch);
 
   /* Initialize the shape coder */
   if(profile_mpeg->shape && profile_mpeg->shape->init)
@@ -242,6 +232,14 @@
 			       (profile_mpeg->width >> 4),
 			       (profile_mpeg->height >> 4),
 			       FAME_PROFILE_MPEG(profile)->shape_flags);
+  /* Initialize statistics monitoring */
+  if(profile_mpeg->monitor && profile_mpeg->monitor->init)
+    profile_mpeg->monitor->init(profile_mpeg->monitor,
+				params->retrieve_cb,
+				(profile_mpeg->width >> 4),
+				(profile_mpeg->height >> 4),
+				profile_mpeg->total_frames,
+				FAME_PROFILE_MPEG(profile)->monitor_flags);  
 
   /* Initialize rate control */
   if(profile_mpeg->rate && profile_mpeg->rate->init)
@@ -251,13 +249,15 @@
 			     profile_mpeg->bitrate/
 			     profile_mpeg->fps_num*profile_mpeg->fps_den,
 			     profile_mpeg->coding,
+			     profile_mpeg->monitor->frame_stats_list,
+			     &(profile_mpeg->monitor->global_stats),
 			     FAME_PROFILE_MPEG(profile)->rate_flags);
 }
 
-/*  profile_mpeg_encode                                                      */
+/*  profile_mpeg_enter                                                       */
 /*                                                                           */
 /*  Description:                                                             */
-/*    Encode a single frame.                                                 */
+/*    Start encoding a picture.                                              */
 /*                                                                           */
 /*  Arguments:                                                               */
 /*    fame_profile_t * profile: the profile handle returned by fame_open     */
@@ -265,29 +265,45 @@
 /*    unsigned char * mask: the input mask (0 = transparent, 255 = opaque)   */
 /*                                                                           */
 /*  Return value:                                                            */
-/*    int : the number of bytes written to buffer                            */
+/*    None.                                                                  */
 
-static int profile_mpeg_encode(fame_profile_t *profile,
+static void profile_mpeg_enter(fame_profile_t *profile,
 			       fame_yuv_t *yuv,
 			       unsigned char *shape)
 {
   fame_profile_mpeg_t *profile_mpeg = FAME_PROFILE_MPEG(profile);
-  int x, y;
-  /* the 4 Y and 2 C blocks in a macroblock */
-  short * blocks[6]; 
-  /* the binary alpha block */
-  unsigned char *bab;
-  fame_bab_t bab_type;
-  fame_motion_coding_t motion_coding;
-  char coding, next;
-  fame_motion_vector_t forward[6];
-  fame_box_t bounding_box;
-  unsigned char pattern;
-  int intra, inter;
 
+  /* Update stats and choose coding mode */
+  profile_mpeg->current_coding = profile_mpeg->coding[profile_mpeg->frame_number % strlen(profile_mpeg->coding)];
+  profile_mpeg->next_coding = profile_mpeg->coding[(profile_mpeg->frame_number + 1) % strlen(profile_mpeg->coding)];
+
+  if (profile_mpeg->monitor && profile_mpeg->monitor->current_frame_stats)
+    profile_mpeg->frame_stats = profile_mpeg->monitor->current_frame_stats;
+  else
+    profile_mpeg->frame_stats = NULL;
+
+  /* Clear BAB map */
+  if(profile_mpeg->bab_map)
+    memset(profile_mpeg->bab_map, bab_not_coded,
+	   ((profile_mpeg->width >> 4) + 2)*((profile_mpeg->height >> 4) + 2));
+  
+  /* input pitch = input width if not set */
+  if(yuv->p == 0) yuv->p = yuv->w; 
+
+  /* Initialize statistics */
+  if(profile_mpeg->monitor && profile_mpeg->monitor->enter)
+    profile_mpeg->monitor->enter(profile_mpeg->monitor,
+				 profile_mpeg->frame_number,
+				 profile_mpeg->ref[profile_mpeg->future],
+				 yuv,
+				 shape,
+				 &profile_mpeg->current_coding);
+  
   /* Initialize syntax buffer */
   if(profile_mpeg->syntax && profile_mpeg->syntax->use)
-    profile_mpeg->syntax->use(profile_mpeg->syntax, profile_mpeg->buffer, profile_mpeg->dirty);
+    profile_mpeg->syntax->use(profile_mpeg->syntax,
+			      profile_mpeg->buffer,
+			      profile_mpeg->dirty);
 
   /* Generate sequence */
   if(profile_mpeg->frame_number % profile_mpeg->frames_per_sequence == 0)
@@ -307,91 +323,132 @@
       profile_mpeg->syntax->start_GOP(profile_mpeg->syntax, profile_mpeg->frame_number);
 
   /* TODO: find bounding box */
-  bounding_box.x = 0;
-  bounding_box.y = 0;
-  bounding_box.w = profile_mpeg->width;
-  bounding_box.h = profile_mpeg->height;
+  profile_mpeg->bounding_box.x = 0;
+  profile_mpeg->bounding_box.y = 0;
+  profile_mpeg->bounding_box.w = profile_mpeg->width;
+  profile_mpeg->bounding_box.h = profile_mpeg->height;
+
+  /* Reset rounding control */
+  if(profile_mpeg->current_coding == 'I')
+    profile_mpeg->rounding = 0;
 
   /* Generate picture */
-  coding = profile_mpeg->coding[profile_mpeg->frame_number % strlen(profile_mpeg->coding)];
-  next = profile_mpeg->coding[(profile_mpeg->frame_number + 1) % strlen(profile_mpeg->coding)];
   if(profile_mpeg->syntax && profile_mpeg->syntax->start_picture)
     profile_mpeg->syntax->start_picture(profile_mpeg->syntax,
-					 coding,
-					 profile_mpeg->frame_number%profile_mpeg->frames_per_gop,
-					 &bounding_box);
+					profile_mpeg->current_coding,
+					profile_mpeg->frame_number%profile_mpeg->frames_per_gop,
+					&profile_mpeg->bounding_box,
+					profile_mpeg->rounding,
+					profile_mpeg->search_range);
 
   /* Enter the encoder */
   if(profile_mpeg->encoder && profile_mpeg->encoder->enter)
     profile_mpeg->encoder->enter(profile_mpeg->encoder,
-				  profile_mpeg->ref[profile_mpeg->past],
-				  profile_mpeg->ref[profile_mpeg->current],
-				  profile_mpeg->ref[profile_mpeg->future],
-				  yuv,
-				  profile_mpeg->ref_shape);
+				 profile_mpeg->ref[profile_mpeg->past],
+				 profile_mpeg->ref[profile_mpeg->current],
+				 profile_mpeg->ref[profile_mpeg->future],
+				 yuv,
+				 profile_mpeg->ref_shape);
   
   if(profile_mpeg->decoder && profile_mpeg->decoder->enter)
     profile_mpeg->decoder->enter(profile_mpeg->decoder,
-				  profile_mpeg->ref[profile_mpeg->past],
-				  profile_mpeg->ref[profile_mpeg->current],
-				  profile_mpeg->ref[profile_mpeg->future],
-				  yuv,
-				  profile_mpeg->ref_shape);
+				 profile_mpeg->ref[profile_mpeg->past],
+				 profile_mpeg->ref[profile_mpeg->current],
+				 profile_mpeg->ref[profile_mpeg->future],
+				 yuv,
+				 profile_mpeg->ref_shape);
 
   if(profile_mpeg->shape && profile_mpeg->shape->enter)
     profile_mpeg->shape->enter(profile_mpeg->shape,
-				shape,
-				profile_mpeg->ref_shape,
-				profile_mpeg->alpha_th);
-  
+			       shape,
+			       profile_mpeg->ref_shape,
+			       profile_mpeg->alpha_th);
+
   if(profile_mpeg->motion && profile_mpeg->motion->enter)
     profile_mpeg->motion->enter(profile_mpeg->motion,
-				 profile_mpeg->ref[profile_mpeg->future],
-				 yuv,
-				 profile_mpeg->ref_shape,
-				 profile_mpeg->search_range);
+				profile_mpeg->ref[profile_mpeg->future],
+				yuv,
+				profile_mpeg->ref_shape,
+				profile_mpeg->search_range);
 
   if(profile_mpeg->rate && profile_mpeg->rate->enter)
     profile_mpeg->rate->enter(profile_mpeg->rate,
 			      profile_mpeg->ref[profile_mpeg->future],
 			      yuv,
 			      profile_mpeg->ref_shape,
-			      coding);			      
+			      profile_mpeg->current_coding,
+			      profile_mpeg->frame_stats);			      
 
   /* estimate quantiser scale for frame */
   if(profile_mpeg->rate && profile_mpeg->rate->global_estimation)
     profile_mpeg->quant_scale =
       profile_mpeg->rate->global_estimation(profile_mpeg->rate);
 
+  /* initialize block count */
+  profile_mpeg->intra = profile_mpeg->inter = 0;
+
+  /* initialize slice offset */
+  profile_mpeg->slice_start = 0;
+  profile_mpeg->total = 0;
+}
+
+/*  profile_mpeg_encode                                                      */
+/*                                                                           */
+/*  Description:                                                             */
+/*    Encode a single slice.                                                 */
+/*                                                                           */
+/*  Arguments:                                                               */
+/*    fame_profile_t * profile: the profile handle returned by fame_open     */
+/*                                                                           */
+/*  Return value:                                                            */
+/*    int : the number of bytes written to buffer                            */
+
+static int profile_mpeg_encode(fame_profile_t *profile)
+{
+  fame_profile_mpeg_t *profile_mpeg = FAME_PROFILE_MPEG(profile);
+  int x, y;
+  int slice_end;
+  /* the 4 Y and 2 C blocks in a macroblock */
+  short * blocks[6]; 
+  /* the binary alpha block */
+  unsigned char *bab;
+  fame_bab_t bab_type;
+  fame_motion_coding_t motion_coding;
+  fame_motion_vector_t forward[6];
+  unsigned char pattern;
+  int mv_count = 0;
+  int mv_norm = 0;
+
+  /* Test for end of frame */
+  if(profile_mpeg->slice_start >= (profile_mpeg->height >> 4))
+    return(0);
+
+  /* Clear syntax buffer */
+  if(profile_mpeg->slice_start != 0) {
+    if(profile_mpeg->syntax && profile_mpeg->syntax->use)
+      profile_mpeg->syntax->use(profile_mpeg->syntax,
+				profile_mpeg->buffer,
+				profile_mpeg->dirty);
+    profile_mpeg->dirty = 0;
+  }
+
   bab_type = bab_all_coded;
   motion_coding = motion_intra;
   pattern = 0x0f; /* all blocks coded */
-  
-  intra = inter = 0;
-  for (y = 0; y < (profile_mpeg->height >> 4); y++) 
-  {
-    /* Generate slice */
-    if(y % profile_mpeg->mbs_per_slice == 0) {
-      if(profile_mpeg->syntax && profile_mpeg->syntax->start_slice)
-	profile_mpeg->syntax->start_slice(profile_mpeg->syntax,
-					   y,
-					   profile_mpeg->mbs_per_slice*
-					   (profile_mpeg->width>>4),
-					   profile_mpeg->quant_scale,
-					   &profile_mpeg->intra_y_scale,
-					   &profile_mpeg->intra_c_scale);
-      if(profile_mpeg->encoder && profile_mpeg->encoder->set_quantisation)
-	profile_mpeg->encoder->set_quantisation(profile_mpeg->encoder,
-						 profile_mpeg->quant_scale,
-						 profile_mpeg->intra_y_scale,
-						 profile_mpeg->intra_c_scale);
-      if(profile_mpeg->decoder && profile_mpeg->decoder->set_quantisation)
-	profile_mpeg->decoder->set_quantisation(profile_mpeg->decoder,
-						 profile_mpeg->quant_scale,
-						 profile_mpeg->intra_y_scale,
-						 profile_mpeg->intra_c_scale);
-    }
 
+  /* Generate slice */
+  if(profile_mpeg->syntax && profile_mpeg->syntax->start_slice)
+    profile_mpeg->syntax->start_slice(profile_mpeg->syntax,
+				      profile_mpeg->slice_start,
+				      profile_mpeg->lines_per_slice*
+				      (profile_mpeg->width>>4),
+				      profile_mpeg->quant_scale);
+
+  slice_end = fame_min(profile_mpeg->height >> 4,
+		       profile_mpeg->slice_start+profile_mpeg->lines_per_slice);
+
+  /* Encode macroblocks */
+  for (y = profile_mpeg->slice_start; y < slice_end; y++) 
     for (x = 0; x < (profile_mpeg->width >> 4); x++)
     {
       if(profile_mpeg->shape && profile_mpeg->shape->encode_intra_shape)
@@ -399,83 +456,178 @@
 							    x, y, &bab, &pattern);
 
       /* compensate motion */
-      if(coding == 'P') {
+      if(profile_mpeg->current_coding == 'P') {
 	if(profile_mpeg->syntax && profile_mpeg->syntax->predict_vector)
 	  profile_mpeg->syntax->predict_vector(profile_mpeg->syntax,
 						x, y, 0, forward);
 	if(profile_mpeg->motion && profile_mpeg->motion->estimation)
-	  motion_coding = profile_mpeg->motion->estimation(profile_mpeg->motion, x, y, forward);
+	  motion_coding = profile_mpeg->motion->estimation(profile_mpeg->motion, x, y, forward, profile_mpeg->quant_scale);
 
 	/* U and V vectors */
 	if(profile_mpeg->syntax && profile_mpeg->syntax->compute_chrominance_vectors)
-	  profile_mpeg->syntax->compute_chrominance_vectors(profile_mpeg->syntax, forward);
+	  profile_mpeg->syntax->compute_chrominance_vectors(profile_mpeg->syntax, forward, pattern);
       } else
 	motion_coding = motion_intra;
 
       if(motion_coding == motion_intra) {
-	intra++;
+	profile_mpeg->intra++;
 	/* Code intra macroblock */
-	if(profile_mpeg->encoder && profile_mpeg->encoder->encode_intra_mb)
+	if(profile_mpeg->encoder &&
+	   profile_mpeg->encoder->encode_intra_mb &&
+	   bab_type != bab_not_coded)
 	  profile_mpeg->encoder->encode_intra_mb(profile_mpeg->encoder,
-						  x, y, blocks,
-						  bab_type);
+						 x, y, blocks,
+						 profile_mpeg->quant_scale,
+						 bab_type);
+	if(profile_mpeg->next_coding != 'I')
+	  if(profile_mpeg->decoder && 
+	     profile_mpeg->decoder->reconstruct_intra_mb &&
+	     bab_type != bab_not_coded)
+	    profile_mpeg->decoder->reconstruct_intra_mb(profile_mpeg->decoder,
+							x, y, blocks,
+							profile_mpeg->quant_scale,
+							bab_type);
 
-	if(next != 'I')
-	  if(profile_mpeg->decoder && profile_mpeg->decoder->reconstruct_intra_mb)
-	    profile_mpeg->decoder->reconstruct_intra_mb(profile_mpeg->decoder, x, y, 
-							 blocks, bab_type);
-	
 	/* Write macroblock */
-	if(profile_mpeg->syntax && profile_mpeg->syntax->write_intra_mb)
+	if(profile_mpeg->syntax &&
+	   profile_mpeg->syntax->write_intra_mb)
 	  profile_mpeg->syntax->write_intra_mb(profile_mpeg->syntax,
-						x, y, blocks,
-						bab, bab_type, pattern);
+					       x, y, blocks,
+					       bab, profile_mpeg->bab_map,
+					       bab_type, 0, pattern);
       } else {
-	inter++;
+	profile_mpeg->inter++;
 	/* TODO: check for coded in syntax macroblock depending on error   */
 	/* of motion estimation, for inter blocks only.                    */
-	
+	if(profile_mpeg->search_range_adaptive) {
+	  if(forward[0].dx == forward[1].dx &&
+	     forward[0].dx == forward[2].dx &&
+	     forward[0].dx == forward[3].dx &&
+	     forward[0].dy == forward[1].dy &&
+	     forward[0].dy == forward[2].dy &&
+	     forward[0].dy == forward[3].dy) {
+	    mv_count++;
+	    mv_norm += forward[0].dx*forward[0].dx + forward[0].dy*forward[0].dy;
+	  } else {
+	    mv_count += 4;
+	    mv_norm += forward[0].dx*forward[0].dx + forward[0].dy*forward[0].dy;
+	    mv_norm += forward[1].dx*forward[1].dx + forward[1].dy*forward[1].dy;
+	    mv_norm += forward[2].dx*forward[2].dx + forward[2].dy*forward[2].dy;
+	    mv_norm += forward[3].dx*forward[3].dx + forward[3].dy*forward[3].dy;
+	  }
+	}
+
 	/* Code inter macroblock */
-	if(profile_mpeg->encoder && profile_mpeg->encoder->encode_inter_mb)
+	if(profile_mpeg->encoder &&
+	   profile_mpeg->encoder->encode_inter_mb &&
+	   bab_type != bab_not_coded)
 	  profile_mpeg->encoder->encode_inter_mb(profile_mpeg->encoder,
-						  x, y, blocks,
-						  forward, NULL, motion_coding,
-						  bab_type);
+						 x, y, blocks,
+						 forward, NULL, motion_coding,
+						 profile_mpeg->quant_scale,
+						 bab_type);
 
-	if(next != 'I')
-	  if(profile_mpeg->decoder && profile_mpeg->decoder->reconstruct_inter_mb)
+	if(profile_mpeg->next_coding != 'I')
+	  if(profile_mpeg->decoder &&
+	     profile_mpeg->decoder->reconstruct_inter_mb &&
+	     bab_type != bab_not_coded)
 	    profile_mpeg->decoder->reconstruct_inter_mb(profile_mpeg->decoder, 
-							 x, y, blocks,
-							 forward, NULL, motion_coding,
-							 bab_type);
+							x, y, blocks,
+							forward, NULL, motion_coding,
+							profile_mpeg->quant_scale,
+							bab_type);
 	/* Write macroblock */
-	if(profile_mpeg->syntax && profile_mpeg->syntax->write_inter_mb)
-	  profile_mpeg->syntax->write_inter_mb(profile_mpeg->syntax, x, y, blocks,
-						bab, bab_type, pattern,
-						forward, NULL, motion_coding);
+	if(profile_mpeg->syntax &&
+	   profile_mpeg->syntax->write_inter_mb)
+	  profile_mpeg->syntax->write_inter_mb(profile_mpeg->syntax,
+					       x, y, blocks,
+					       bab, profile_mpeg->bab_map,
+					       bab_type, 0, pattern,
+					       forward, NULL, motion_coding);
       }
     }
+
+  if(mv_count) {
+    /* adapt search range according to MV standard deviation */
+    mv_norm /= mv_count;
+#ifdef HAS_MMX
+    asm("emms");
+#endif
+    mv_norm = (int) sqrt(mv_norm);
+
+    if(profile_mpeg->search_range < 3*mv_norm && 
+       profile_mpeg->search_range < 1024) {
+      profile_mpeg->search_range <<= 1;
+    } else if(profile_mpeg->search_range > 6*mv_norm &&
+	      profile_mpeg->search_range > 16) {
+      profile_mpeg->search_range >>= 1;
+    }
   }
 
-  /* Pad and interpolate for half-pel estimation/compensation */
-  if(next != 'I')
-    if(profile_mpeg->decoder && profile_mpeg->decoder->pad)
-      profile_mpeg->decoder->pad(profile_mpeg->decoder, &bounding_box);
+  /* End of slice */
+  if(profile_mpeg->syntax && profile_mpeg->syntax->end_slice)
+    profile_mpeg->syntax->end_slice(profile_mpeg->syntax);
+  
+  /* Return the number of bytes encoded */
+  if(profile_mpeg->syntax && profile_mpeg->syntax->flush)
+    profile_mpeg->dirty = profile_mpeg->syntax->flush(profile_mpeg->syntax);
+  else
+    profile_mpeg->dirty = 0;
 
-  if(next != 'I' && (profile_mpeg->motion->flags & FAME_MOTION_SUBPEL_SEARCH))
+  profile_mpeg->total += profile_mpeg->dirty;
+
+  profile_mpeg->slice_start = slice_end;
+
+  return(profile_mpeg->dirty);
+}
+
+/*  profile_mpeg_leave                                                       */
+/*                                                                           */
+/*  Description:                                                             */
+/*    Finish encoding a picture.                                             */
+/*                                                                           */
+/*  Arguments:                                                               */
+/*    fame_profile_t * profile: the profile handle returned by fame_open     */
+/*    fame_frame_statistics_t * stats: information about the encoding        */
+/*                                                                           */
+/*  Return value:                                                            */
+/*    None.                                                                  */
+static void profile_mpeg_leave(fame_profile_t *profile,
+			       fame_frame_statistics_t *stats)
+{
+  fame_profile_mpeg_t *profile_mpeg = FAME_PROFILE_MPEG(profile);
+
+  /* Pad and interpolate for half-pel estimation/compensation */
+  if(profile_mpeg->motion->flags & FAME_MOTION_FLIP_ROUNDING)
+    profile_mpeg->rounding ^= 1;
+  if(profile_mpeg->next_coding != 'I' &&
+     (profile_mpeg->motion->flags & FAME_MOTION_SUBPEL_SEARCH))
     if(profile_mpeg->decoder && profile_mpeg->decoder->interpolate)
-      profile_mpeg->decoder->interpolate(profile_mpeg->decoder, 0);
+      profile_mpeg->decoder->interpolate(profile_mpeg->decoder, profile_mpeg->rounding);
+
+  
+  if(profile_mpeg->next_coding != 'I' &&
+     (profile_mpeg->shape ||
+      (profile_mpeg->motion->flags & FAME_MOTION_UNRESTRICTED_SEARCH)))
+    if(profile_mpeg->decoder && profile_mpeg->decoder->pad)
+      profile_mpeg->decoder->pad(profile_mpeg->decoder,
+				 profile_mpeg->bab_map,
+				 &profile_mpeg->bounding_box);
 
+#undef DEBUG_WRITE_RECONSTRUCTED_FRAMES
 #ifdef DEBUG_WRITE_RECONSTRUCTED_FRAMES
   /* Write reconstructed frame to standard output */
-  fwrite(profile_mpeg->ref[profile_mpeg->current][0]->y,
-	 profile_mpeg->width*profile_mpeg->height,
+  fwrite(profile_mpeg->ref[profile_mpeg->current][1]->y -
+	 (16*(profile_mpeg->width+32)+16),
+	 (profile_mpeg->width+32)*(profile_mpeg->height+32),
 	 1, stdout);
-  fwrite(profile_mpeg->ref[profile_mpeg->current][0]->u,
-	 profile_mpeg->width*profile_mpeg->height/4,
+  fwrite(profile_mpeg->ref[profile_mpeg->current][1]->u -
+	 (8*(profile_mpeg->width+32)/2+8),
+	 (profile_mpeg->width+32)*(profile_mpeg->height+32)/4,
 	 1, stdout);
-  fwrite(profile_mpeg->ref[profile_mpeg->current][0]->v,
-	 profile_mpeg->width*profile_mpeg->height/4,
+  fwrite(profile_mpeg->ref[profile_mpeg->current][1]->v -
+	 (8*(profile_mpeg->width+32)/2+8),
+	 (profile_mpeg->width+32)*(profile_mpeg->height+32)/4,
 	 1, stdout);
 #endif
 
@@ -492,7 +644,7 @@
     profile_mpeg->motion->leave(profile_mpeg->motion);
 
   /* Rotate reference ring */
-  switch(coding) {
+  switch(profile_mpeg->current_coding) {
     case 'I':
     case 'P':
       profile_mpeg->past = profile_mpeg->future;
@@ -508,31 +660,37 @@
   /* Increment frame number */
   profile_mpeg->frame_number++;
 
-  /* End of picture */
-  if(profile_mpeg->syntax && profile_mpeg->syntax->end_picture)
-    profile_mpeg->syntax->end_picture(profile_mpeg->syntax);
-
-  /* Return the number of bytes encoded */
-  if(profile_mpeg->syntax && profile_mpeg->syntax->flush)
-    profile_mpeg->dirty = profile_mpeg->syntax->flush(profile_mpeg->syntax);
-  else
-    profile_mpeg->dirty = 0;
-
+  /* Update rate control */
   if(profile_mpeg->rate && profile_mpeg->rate->leave)
-    profile_mpeg->rate->leave(profile_mpeg->rate, profile_mpeg->dirty * 8);
+    profile_mpeg->rate->leave(profile_mpeg->rate, profile_mpeg->total * 8);
 
   /* Show picture info */
-  if(profile_mpeg->verbose)
-    FAME_INFO("inter/intra %3d%% %dkbits/s quality %d%% %c frame #%d\033[K\r",
-	      100*inter/(intra+inter), 
-	      (profile_mpeg->dirty * profile_mpeg->fps_num * 8) / 
+  if(profile_mpeg->verbose) {
+    if(profile_mpeg->intra+profile_mpeg->inter) {
+      FAME_INFO("inter/intra %3d%% ",
+		100*profile_mpeg->inter/
+		(profile_mpeg->intra+profile_mpeg->inter));
+    }
+    FAME_INFO("%dkbits/s quality %d%% range %d %c frame #%d\033[K\r",
+	      (profile_mpeg->total * profile_mpeg->fps_num * 8) / 
 	      (profile_mpeg->fps_den * 1000),
 	      (31 - profile_mpeg->quant_scale) * 100 / 30,
-	      coding, profile_mpeg->frame_number);
-  return(profile_mpeg->dirty);
+	      profile_mpeg->search_range,
+	      profile_mpeg->current_coding,
+	      profile_mpeg->frame_number);
+  }
+
+  if(profile_mpeg->monitor && profile_mpeg->monitor->leave)
+    profile_mpeg->monitor->leave(profile_mpeg->monitor, 
+				 profile_mpeg->total * 8,
+				 profile_mpeg->quant_scale);
+
+  if(stats)
+    *stats = *profile_mpeg->frame_stats;
 }
 
-/*  profile_mpeg_close                                                */
+
+/*  profile_mpeg_close                                                       */
 /*                                                                           */
 /*  Description:                                                             */
 /*    Flush remaining encoded data and cleanup everything.                   */
@@ -546,6 +704,7 @@
 static int profile_mpeg_close(fame_profile_t *profile)
 {
   fame_profile_mpeg_t *profile_mpeg = FAME_PROFILE_MPEG(profile);
+  int i, j;
   int bytes_written;
 
   /* Initialize syntax buffer */
@@ -586,27 +745,25 @@
   if(profile_mpeg->rate && profile_mpeg->rate->close)
     profile_mpeg->rate->close(profile_mpeg->rate);
 
+ /* Release statistics monitoring */
+  if(profile_mpeg->monitor && profile_mpeg->monitor->close)
+    profile_mpeg->monitor->close(profile_mpeg->monitor);
+
   /* Free reference shape */
   if(profile_mpeg->ref_shape)
-    free(profile_mpeg->ref_shape);
+    fame_free(profile_mpeg->ref_shape);
+
+  /* Free BAB map */
+  if(profile_mpeg->bab_map)
+    fame_free(profile_mpeg->bab_map);
 
   /* Free reference ring */
-  free(profile_mpeg->ref[0][0]->y);
-  free(profile_mpeg->ref[0][0]);
-  free(profile_mpeg->ref[0][1]->y);
-  free(profile_mpeg->ref[0][1]);
-  free(profile_mpeg->ref[0][2]->y);
-  free(profile_mpeg->ref[0][2]);
-  free(profile_mpeg->ref[0][3]->y);
-  free(profile_mpeg->ref[0][3]);
-  free(profile_mpeg->ref[1][0]->y);
-  free(profile_mpeg->ref[1][0]);
-  free(profile_mpeg->ref[1][1]->y);
-  free(profile_mpeg->ref[1][1]);
-  free(profile_mpeg->ref[1][2]->y);
-  free(profile_mpeg->ref[1][2]);
-  free(profile_mpeg->ref[1][3]->y);
-  free(profile_mpeg->ref[1][3]);
+  for(j = 0; j < 2; j++) /* 2 references */
+    for(i = 0; i < 4; i++) { /* 4 planes per reference (interpolation) */
+      /* remove offset */
+      profile_mpeg->ref[j][i]->y -= 16*(profile_mpeg->width+32)+16;
+      fame_free(profile_mpeg->ref[j][i]->y);
+    }
 
   /* Print newline for picture codes */
   if(profile_mpeg->verbose)

Index: fame_profile_mpeg.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_profile_mpeg.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_profile_mpeg.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_profile_mpeg.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -28,6 +28,7 @@
 #include "fame_syntax.h"
 #include "fame_shape.h"
 #include "fame_rate.h"
+#include "fame_monitor.h"
 
 typedef struct _fame_profile_mpeg_t_ {
   FAME_EXTENDS(fame_profile_t);
@@ -40,7 +41,8 @@
   int slices_per_frame;
   int frames_per_gop;
   unsigned int frames_per_sequence;
-  int mbs_per_slice;
+  unsigned int total_frames;
+  int lines_per_slice;
   int slice_number;
   int frame_number;
   int gop_number;
@@ -48,30 +50,41 @@
   int fps_den;
   int alpha_th;
   unsigned int search_range;
-  unsigned char intra_y_scale;
-  unsigned char intra_c_scale;
+  unsigned int search_range_adaptive;
+  unsigned char intra_dc_y_scale_table[32];
+  unsigned char intra_dc_c_scale_table[32];
   unsigned char *intra_matrix;
   unsigned char *inter_matrix;
+  int rounding;
   fame_mismatch_t mismatch;
   unsigned char verbose;
   unsigned char *ref_shape;
+  unsigned char *bab_map;
   fame_yuv_t *ref[2][4];
   unsigned int past, current, future;
   unsigned char *buffer;
   unsigned int size;
   unsigned int dirty;
+  unsigned int total;
   unsigned int decoder_flags;
   unsigned int encoder_flags;
   unsigned int motion_flags;
   unsigned int syntax_flags;
   unsigned int shape_flags;
   unsigned int rate_flags;
+  unsigned int monitor_flags;
   fame_decoder_t *decoder;
   fame_encoder_t *encoder;
   fame_motion_t *motion;
   fame_syntax_t *syntax;
   fame_shape_t *shape;
   fame_rate_t *rate;
+  fame_monitor_t *monitor;
+  char current_coding, next_coding;
+  int intra, inter;
+  fame_frame_statistics_t *frame_stats;
+  int slice_start;
+  fame_box_t bounding_box;
 } fame_profile_mpeg_t;
 
 #define FAME_PROFILE_MPEG(x) ((fame_profile_mpeg_t *) x)

Index: fame_profile_mpeg1.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_profile_mpeg1.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_profile_mpeg1.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_profile_mpeg1.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -76,7 +76,7 @@
 
   FAME_PROFILE_MPEG1(profile)->FAME_OVERLOADED(init)(profile, context, params, buffer, size);
 
-  if(profile_mpeg->encoder == NULL)
+  if(profile_mpeg->decoder == NULL)
     FAME_ERROR("Could not find decoder object");
   if(profile_mpeg->encoder == NULL)
     FAME_ERROR("Could not find encoder object");

Index: fame_profile_mpeg4_shape.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_profile_mpeg4_shape.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_profile_mpeg4_shape.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_profile_mpeg4_shape.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -74,7 +74,10 @@
   fame_register(context, "shape",  fame_get_object(context, "shape"));
   FAME_PROFILE_MPEG(profile)->motion_flags =
     FAME_MOTION_SUBPEL_SEARCH | 
-    FAME_MOTION_BLOCK_SEARCH;
+    FAME_MOTION_UNRESTRICTED_SEARCH | 
+    FAME_MOTION_BLOCK_SEARCH |
+    FAME_MOTION_FLIP_ROUNDING;
+
   FAME_PROFILE_MPEG(profile)->shape_flags =
     (params->shape_quality == 100) ? FAME_SHAPE_LOSSLESS : 0;
   FAME_PROFILE_MPEG(profile)->syntax_flags =
@@ -83,7 +86,7 @@
 
   FAME_PROFILE_MPEG4_SHAPE(profile)->FAME_OVERLOADED(init)(profile, context, params, buffer, size);
 
-  if(profile_mpeg->encoder == NULL)
+  if(profile_mpeg->decoder == NULL)
     FAME_ERROR("Could not find decoder object");
   if(profile_mpeg->encoder == NULL)
     FAME_ERROR("Could not find encoder object");

Index: fame_profile_mpeg4_simple.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_profile_mpeg4_simple.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_profile_mpeg4_simple.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_profile_mpeg4_simple.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -72,19 +72,17 @@
   fame_register(context, "motion", fame_get_object(context, "motion"));
   fame_register(context, "syntax", fame_get_object(context, "syntax/mpeg4"));
   fame_register(context, "shape", NULL);
-  /* TEMP : disable 8x8 search, as motion coding overhead is too large in most cases */
+
   /* TODO : adaptive motion range using slices */
-#if 0
   FAME_PROFILE_MPEG(profile)->motion_flags =
     FAME_MOTION_SUBPEL_SEARCH | 
-    FAME_MOTION_BLOCK_SEARCH;
-#else
-  FAME_PROFILE_MPEG(profile)->motion_flags =
-    FAME_MOTION_SUBPEL_SEARCH;
-#endif
+    FAME_MOTION_UNRESTRICTED_SEARCH | 
+    FAME_MOTION_BLOCK_SEARCH |
+    FAME_MOTION_FLIP_ROUNDING;
+
   FAME_PROFILE_MPEG4_SIMPLE(profile)->FAME_OVERLOADED(init)(profile, context, params, buffer, size);
 
-  if(profile_mpeg->encoder == NULL)
+  if(profile_mpeg->decoder == NULL)
     FAME_ERROR("Could not find decoder object");
   if(profile_mpeg->encoder == NULL)
     FAME_ERROR("Could not find encoder object");

Index: fame_rate.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_rate.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_rate.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_rate.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -21,24 +21,23 @@
 #include <stdlib.h>
 #include "fame.h"
 #include "fame_rate.h"
-#ifdef HAS_MMX
-#include "mad_mmx.h"
-#else
-#include "mad_int.h"
-#endif
+#include "fame_monitor.h"
 
 static void rate_init(fame_rate_t *rate,
 		      int mb_width,
 		      int mb_height,
 		      int bitrate,
 		      char *coding,
+		      fame_frame_statistics_t *stats_list,
+		      fame_global_statistics_t *global_stats,
 		      unsigned int flags);
 static void rate_close(fame_rate_t *rate);
 static void rate_enter(fame_rate_t *rate,
 		       fame_yuv_t **ref,
 		       fame_yuv_t *current,
 		       unsigned char *shape,
-		       char coding);
+		       char coding,
+		       fame_frame_statistics_t *frame_stats);
 static int rate_global_estimation(fame_rate_t *rate);
 static int rate_local_estimation(fame_rate_t *rate,
 				 int mb_x, int mb_y,
@@ -77,30 +76,21 @@
 		      int mb_height,
 		      int bitrate,
 		      char *coding,
+		      fame_frame_statistics_t *stats_list,
+		      fame_global_statistics_t *global_stats,
 		      unsigned int flags)
 {
-  int ni, np;
-  int ratio;
-  int i;
-
   rate->mb_width = mb_width;
   rate->mb_height = mb_height;
-  rate->flags &= flags;
+  rate->bitrate = bitrate;
   rate->available = 0;
+  rate->spent = 0;
   rate->global_scale = 8;
-  ni = np = 0;
-  for(i = 0; i < strlen(coding); i++) {
-    switch(coding[i]) {
-      case 'I': ni++; break;
-      case 'P': np++; break;
-    }
-  }
-  ratio = 1;
-  rate->P_bits = bitrate * (np + ni) / (np + ratio * ni);
-  rate->I_bits = ratio * rate->P_bits;
-  rate->quant_step = 16;
-  rate->I_factor = (3 << 8);
-  rate->P_factor = (6 << 8);
+  rate->flags &= flags;
+  if (stats_list) 
+    rate->flags |= FAME_RATE_SECOND_PASS;
+  else
+    rate->flags &= ~FAME_RATE_SECOND_PASS;
 }
 
 /*  rate_close                                                               */
@@ -136,37 +126,18 @@
 		       fame_yuv_t **ref,
 		       fame_yuv_t *current,
 		       unsigned char *shape,
-		       char coding)
+		       char coding,
+		       fame_frame_statistics_t *frame_stats)
 {
   rate->ref = ref;
   rate->current = current;
   rate->shape = shape;
   rate->coding = coding;
-  /* estimate activity */
-  { 
-    int bx, by;
-    int a, p;
-    unsigned long m;
-    unsigned char *input;
-      
-    a = 0;
-    p = rate->mb_width*16;
-    input = rate->current->y;
-    for(by = 0; by < rate->mb_height*2; by++) {
-      for(bx = 0; bx < rate->mb_width*2; bx++) {
-	mad_withoutmask(input, p, &m);
-	a+=m;
-	input+=8;
-      }
-      input += (p << 3) - p;
-    }
-    rate->activity = a;
-  }
 
-  switch(coding) {
-    case 'I': rate->available += rate->I_bits; break;
-    case 'P': rate->available += rate->P_bits; break;
-  };
+  if (frame_stats) 
+    {
+      frame_stats->target_bits = rate->available;      
+    }
 }
 
 /*  rate_leave                                                               */
@@ -181,36 +152,14 @@
 /*    Rate.                                                                  */
 
 static void rate_leave(fame_rate_t *rate, int spent)
-{
-  switch(rate->coding) {
-    case 'I' : rate->I_factor = (rate->activity << 8)/(rate->global_scale*spent); break;
-    case 'P' : rate->P_factor = (rate->activity << 8)/(rate->global_scale*spent); break;
-  }
+{  
   rate->spent = spent;
   rate->available -= spent;
 }
 
 static int rate_global_estimation(fame_rate_t *rate)
 {
-  int quant_scale;
-  unsigned int factor;
-
-  switch(rate->coding) {
-    case 'I': factor = rate->I_factor; break;
-    case 'P': factor = rate->P_factor; break;
-    default: factor = 0;
-  }
-  if(rate->available > 0 && factor > 0) 
-    quant_scale = (rate->activity << 8)/(factor*rate->available);
-  else
-    quant_scale = 31;
- 
-  if(quant_scale < rate->global_scale/2) quant_scale = rate->global_scale/2;
-  if(quant_scale > rate->global_scale*2) quant_scale = rate->global_scale*2;
-  if(quant_scale < 2) quant_scale = 2;
-  if(quant_scale > 31) quant_scale = 31;
-  rate->global_scale = quant_scale;
-  return(quant_scale);
+  return(rate->global_scale);
 }
 
 static int rate_local_estimation(fame_rate_t *rate,
@@ -219,3 +168,15 @@
 {
   return(rate->global_scale);
 }
+
+
+
+
+
+
+
+
+
+
+
+

Index: fame_rate.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_rate.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_rate.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_rate.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -21,6 +21,9 @@
 #define __FAME_RATE_H
 
 #include "fame.h"
+#include "fame_monitor.h"
+
+#define FAME_RATE_SECOND_PASS 1
 
 typedef struct _fame_rate_t_ {
   FAME_EXTENDS(fame_object_t);
@@ -30,13 +33,16 @@
 		int mb_height,
 		int bitrate,
 		char *coding,
+		fame_frame_statistics_t *stats_list,
+		fame_global_statistics_t *global_stats,
 		unsigned int flags);
   void (* close)(struct _fame_rate_t_ *rate);
   void (* enter)(struct _fame_rate_t_ *rate,
 		 fame_yuv_t **ref,
 		 fame_yuv_t *current,
 		 unsigned char *shape,
-		 char coding);
+		 char coding,
+		 fame_frame_statistics_t *frame_stats);
   int (* global_estimation)(struct _fame_rate_t_ *rate);
   int (* local_estimation)(struct _fame_rate_t_ *rate,
 			   int mb_x, int mb_y,
@@ -52,13 +58,11 @@
   char coding;
   int bitrate;
   int available;
-  int global_scale;
-  int I_bits, P_bits;
-  int activity;
-  int activity_diff;
   int spent;
-  int I_factor, P_factor;
-  int quant_step;
+  int global_scale;
+  float coeff1, coeff2;
+  int total_frames;
+  fame_frame_statistics_t *stats_list;
   unsigned int flags;
 } fame_rate_t;
 

Index: fame_shape.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_shape.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_shape.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_shape.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -143,6 +143,7 @@
 {
   shape->mb_width = mb_width;
   shape->mb_height = mb_height;
+  shape->pitch = (shape->mb_width << 4);
   shape->flags = flags;
 }
 
@@ -198,7 +199,7 @@
 
   *bab = NULL;
   m_alpha_th = (shape->alpha_th>>4);
-  pitch = (shape->mb_width << 4);
+  pitch = shape->pitch;
   pitch4 = pitch<<2;
   offset = (mb_y << 4) * pitch + (mb_x << 4);
 

Index: fame_shape.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_shape.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_shape.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_shape.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -44,6 +44,7 @@
 
   int mb_width;
   int mb_height;
+  int pitch;
   unsigned char *input;
   unsigned char *recon;
   unsigned char bab16x16[20][20];

Index: fame_syntax.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_syntax.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_syntax.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_syntax.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -30,9 +30,10 @@
   void (* init)            (struct _fame_syntax_t_ *syntax,
 			    int mb_width,
 			    int mb_height,
-			    unsigned int search_range,
 			    unsigned char **intra_default_matrix,
 			    unsigned char **inter_default_matrix,
+			    unsigned char *intra_dc_y_scale_table,
+			    unsigned char *intra_dc_c_scale_table,
 			    fame_mismatch_t *mismatch_type,
 			    unsigned int flags);
   void (* use)             (struct _fame_syntax_t_ *syntax,
@@ -51,14 +52,14 @@
   void (* start_picture)   (struct _fame_syntax_t_ *syntax,
 			    char frame_type,
 			    int frame_number,
-			    fame_box_t *box);
+			    fame_box_t *box,
+			    int rounding_control,
+			    int search_range);
   void (* start_slice)     (struct _fame_syntax_t_ *syntax,
 			    int vpos,
 			    int length,
-			    unsigned char qscale,
-			    unsigned char *intra_y_scale,
-			    unsigned char *intra_c_scale);
-  void (* end_picture)     (struct _fame_syntax_t_ *syntax);
+			    unsigned char qscale);
+  void (* end_slice)       (struct _fame_syntax_t_ *syntax);
   void (* end_sequence)    (struct _fame_syntax_t_ *syntax);
   void (* predict_vector)  (struct _fame_syntax_t_ *syntax,
 			    int mb_x,
@@ -66,20 +67,25 @@
 			    int k,
 			    fame_motion_vector_t *mv);
   void (* compute_chrominance_vectors)(struct _fame_syntax_t_ *syntax,
-				       struct _fame_motion_vector_t_ *vectors);
+				       struct _fame_motion_vector_t_ *vectors,
+				       unsigned char pattern);
   void (* write_intra_mb)  (struct _fame_syntax_t_ *syntax,
 			    int mb_x,
 			    int mb_y,
 			    short *blocks[6],
 			    unsigned char *bab,
+			    unsigned char *bab_map,
 			    fame_bab_t bab_type,
+			    unsigned char dquant,
 			    unsigned char pattern);
   void (* write_inter_mb)  (struct _fame_syntax_t_ *syntax,
 			    int mb_x,
 			    int mb_y,
 			    short *blocks[6],
 			    unsigned char *bab,
+			    unsigned char *bab_map,
 			    fame_bab_t bab_type,
+			    unsigned char dquant,
 			    unsigned char pattern,
 			    fame_motion_vector_t *forward,
 			    fame_motion_vector_t *backward,

Index: fame_syntax_mpeg1.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_syntax_mpeg1.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_syntax_mpeg1.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_syntax_mpeg1.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -21,6 +21,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "fame.h"
+#include "fame_malloc.h"
 #include "fame_syntax.h"
 #include "fame_bitbuffer.h"
 #include "fame_syntax_mpeg1.h"
@@ -47,9 +48,10 @@
 static void mpeg1_init(fame_syntax_t *syntax, 
 		       int mb_width,
 		       int mb_height,
-		       unsigned int search_range,
  		       unsigned char **intra_default_matrix,
 		       unsigned char **inter_default_matrix,
+		       unsigned char *intra_dc_y_scale_table,
+		       unsigned char *intra_dc_c_scale_table,
 		       fame_mismatch_t *mismatch_type,
 		       unsigned int flags);
 static void mpeg1_close(fame_syntax_t *syntax);
@@ -69,14 +71,14 @@
 static void mpeg1_start_picture(fame_syntax_t *syntax,
 				char frame_type,
 				int frame_number,
-				fame_box_t *box);
+				fame_box_t *box,
+				int rounding_control,
+				int search_range);
 static void mpeg1_start_slice(fame_syntax_t *syntax,
 			      int vpos,
 			      int length,
-			      unsigned char qscale,
-			      unsigned char *intra_y_scale,
-			      unsigned char *intra_c_scale);
-static void mpeg1_end_picture(fame_syntax_t *syntax);
+			      unsigned char qscale);
+static void mpeg1_end_slice(fame_syntax_t *syntax);
 static void mpeg1_end_sequence(fame_syntax_t *syntax);
 static void mpeg1_predict_vector(fame_syntax_t *syntax,
 				 int mb_x,
@@ -84,20 +86,25 @@
 				 int k,
 				 fame_motion_vector_t *mv);
 static void mpeg1_compute_chrominance_vectors(fame_syntax_t *syntax,
-					      fame_motion_vector_t *vectors);
+					      fame_motion_vector_t *vectors,
+					      unsigned char pattern);
 static void mpeg1_write_intra_mb(fame_syntax_t *syntax,
 				 int mb_x,
 				 int mb_y,
 				 short *blocks[6],
 				 unsigned char *bab,
+				 unsigned char *bab_map,
 				 fame_bab_t bab_type,
+				 unsigned char dquant,
 				 unsigned char pattern);
 static void mpeg1_write_inter_mb(fame_syntax_t *syntax,
 				 int mb_x,
 				 int mb_y,
 				 short *blocks[6],
 				 unsigned char *bab,
+				 unsigned char *bab_map,
 				 fame_bab_t bab_type,
+				 unsigned char dquant,
 				 unsigned char pattern,
 				 fame_motion_vector_t *forward,
 				 fame_motion_vector_t *backward,
@@ -113,7 +120,7 @@
   FAME_SYNTAX(this)->start_GOP = mpeg1_start_GOP;
   FAME_SYNTAX(this)->start_picture = mpeg1_start_picture;
   FAME_SYNTAX(this)->start_slice = mpeg1_start_slice;
-  FAME_SYNTAX(this)->end_picture = mpeg1_end_picture;
+  FAME_SYNTAX(this)->end_slice = mpeg1_end_slice;
   FAME_SYNTAX(this)->end_sequence = mpeg1_end_sequence;
   FAME_SYNTAX(this)->predict_vector = mpeg1_predict_vector;
   FAME_SYNTAX(this)->compute_chrominance_vectors = mpeg1_compute_chrominance_vectors;
@@ -159,28 +166,16 @@
 static void mpeg1_init(fame_syntax_t *syntax,
 		       int mb_width,
 		       int mb_height,
-		       unsigned int search_range,
  		       unsigned char **intra_matrix,
 		       unsigned char **inter_matrix,
+		       unsigned char *intra_dc_y_scale_table,
+		       unsigned char *intra_dc_c_scale_table,
 		       fame_mismatch_t *mismatch_type,
 		       unsigned int flags)
 {
   fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
+  int qscale;
 
-  if(search_range < 8)
-    syntax_mpeg1->f_code = 1;
-  else if(search_range < 16)
-    syntax_mpeg1->f_code = 2;
-  else if(search_range < 32)
-    syntax_mpeg1->f_code = 3;
-  else if(search_range < 64)
-    syntax_mpeg1->f_code = 4;
-  else if(search_range < 128)
-    syntax_mpeg1->f_code = 5;
-  else if(search_range < 256)
-    syntax_mpeg1->f_code = 6;
-  else
-    syntax_mpeg1->f_code = 7;
   syntax_mpeg1->mb_width = mb_width;
   syntax_mpeg1->mb_height = mb_height;
 
@@ -188,11 +183,12 @@
   syntax_mpeg1->fps_den =  1;
 
   /* initialize vlc_table */
-  syntax_mpeg1->vlc_table = (fame_vlc_t *) malloc(64*511*sizeof(fame_vlc_t));
+  syntax_mpeg1->vlc_table = (fame_vlc_t *) fame_malloc(64*511*sizeof(fame_vlc_t));
   mpeg1_init_vlc_table(syntax_mpeg1->vlc_table);
   /* center vlc_table */
   syntax_mpeg1->vlc_table += 64*255;
 
+  /* fill in quantization tables */
   if(*intra_matrix) {
     /* TODO: flag to write matrix to bitstream */
     /* TEMP: use default matrix instead */
@@ -211,6 +207,13 @@
     *inter_matrix = mpeg1_inter_quantisation_table;
   }
 
+  /* fill in intra DC quantizer values */
+  for(qscale = 0; qscale < 32; qscale++) {
+    /* MPEG-1 uses linear quantization */
+    intra_dc_y_scale_table[qscale] = 8;
+    intra_dc_c_scale_table[qscale] = 8;
+  }
+
   /* MPEG-1 uses local mismatch control */
   *mismatch_type = fame_mismatch_local;
 }
@@ -220,7 +223,7 @@
   fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
 
   syntax_mpeg1->vlc_table -= 64*255;  /* uncenter vlc_table */
-  free(syntax_mpeg1->vlc_table);
+  fame_free(syntax_mpeg1->vlc_table);
 }
 
 static void mpeg1_use(fame_syntax_t *syntax,
@@ -327,10 +330,31 @@
   bitbuffer_write(&syntax_mpeg1->buffer, 0, 5);
 }
 
-static void mpeg1_start_picture(fame_syntax_t *syntax, char frame_type, int frame_number, fame_box_t *box)
+static void mpeg1_start_picture(fame_syntax_t *syntax,
+				char frame_type,
+				int frame_number,
+				fame_box_t *box,
+				int rounding_control,
+				int search_range)
 {
   fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
 
+  /* compute fcode */
+  if(search_range < 8)
+    syntax_mpeg1->f_code = 1;
+  else if(search_range < 16)
+    syntax_mpeg1->f_code = 2;
+  else if(search_range < 32)
+    syntax_mpeg1->f_code = 3;
+  else if(search_range < 64)
+    syntax_mpeg1->f_code = 4;
+  else if(search_range < 128)
+    syntax_mpeg1->f_code = 5;
+  else if(search_range < 256)
+    syntax_mpeg1->f_code = 6;
+  else
+    syntax_mpeg1->f_code = 7;
+
   switch(frame_type) {
     case 'I':
       syntax_mpeg1->frame_type = frame_type_I;
@@ -381,21 +405,16 @@
 static void mpeg1_start_slice(fame_syntax_t *syntax,
 			      int vpos,
 			      int length,
-			      unsigned char qscale,
-			      unsigned char *intra_y_scale,
-			      unsigned char *intra_c_scale)
+			      unsigned char qscale)
 {
   fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
 
-  /* MPEG-1 uses linear quantization */
-  *intra_y_scale = 8;
-  *intra_c_scale = 8;
-
   /* reset the predictors to their original values */
   syntax_mpeg1->y_dc_pred = 128;
-  syntax_mpeg1->cr_dc_pred = syntax_mpeg1->cb_dc_pred = 0;
+  syntax_mpeg1->cr_dc_pred = syntax_mpeg1->cb_dc_pred = 128;
   syntax_mpeg1->mv_pred.dx = syntax_mpeg1->mv_pred.dy = 0;
-  syntax_mpeg1->prev_mb_addr = -1;
+  syntax_mpeg1->slice_start = syntax_mpeg1->prev_mb_addr = 
+    vpos * syntax_mpeg1->mb_width - 1;
   syntax_mpeg1->slice_length = length;
   syntax_mpeg1->previous_coding = motion_intra;
   /* new slice starting at vpos      */
@@ -406,7 +425,7 @@
   bitbuffer_write(&syntax_mpeg1->buffer, 0, 1);
 }
 
-static void mpeg1_end_picture(fame_syntax_t *syntax)
+static void mpeg1_end_slice(fame_syntax_t *syntax)
 {
   fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
 
@@ -437,20 +456,21 @@
 {
   fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
   short v;
+  fame_bitbuffer_t * const buffer = &syntax_mpeg1->buffer;
+  unsigned char * data = buffer->data;
+  unsigned long shift = buffer->shift;
 
   /* encode DC coefficient */
   v = block[0] - *pred;
 
-  v = mpeg1_table_clip[2048+v];
+  v = mpeg1_table_clip[v];
   *pred += v;
-  bitbuffer_write(&syntax_mpeg1->buffer,
-		  table[v+255].code,
-		  table[v+255].length);
+  fast_bitbuffer_write(data, shift, table[v+255].code, table[v+255].length);
 
   /* encode AC coefficients */
 #if defined(HAS_BSWAP)
   {
-    unsigned long dummy1, dummy2, dummy3, dummy4;
+    unsigned long dummy1, dummy2;
 
     /* Note:
         movsx mpeg1_table_clip+4096(, %%eax ,2), %%eax
@@ -460,63 +480,75 @@
        because the first instruction failed on a PIII!! (wrong sign extension)
        whereas it worked well on my P75 :)
     */
+    /* Ok, a bit of explanations for a couple of tricks:
+         The DC value of block is already coded and stored in v so we can use it to store something.
+	 We add one index to the zigzag table so that after coding block[63] we go to index 0. There
+	 we need to escape the zero counting loop (1), what we ensure by putting a non-zero value in
+	 the DC coefficient. Then we can test for index == 0 to exit.
+	 Now this non-zero value is a bit special :)
+	 In order to have one more 'half' register, we store sp value (16 less significant bit of the
+	 32 bit register esp) *plus one* in the DC coefficient. Since the stack is aligned at an 
+	 address multiple of 4 bytes (at least), we are sure that sp != 0xffff and thus sp+1 will
+	 never be zero. We then retrieve sp at the end for it is needed by 'pop' instructions.
+     */
 
-    block[0] = 1; /* non-zero value to stop the rle loop */
     __asm__ __volatile__ ("pushl %%ebx\n"             /* save ebx            */
 			  "pushl %%ebp\n"             /* save stack pointer  */
+			  "inc %%sp\n"                /* make sure sp != 0   */
+			  "movw %%sp, (%%edx)\n"      /* store sp+1 in DC ;) */
 			  "movl %%esi, %%ebp\n"       /* ebp = vlc_table     */
-			  "xorl %%ecx, %%ecx\n"       /* ecx = 0             */
+			  "xorl %%eax, %%eax\n"       /* eax = 0             */
 			  "movl $" ASMSYM "mpeg1_zigzag_table+1, %%esi\n" /*esi = zigzag*/
 			  "lea 1(%%esi), %%ebx\n"     /* ebx = zigzag_table+1*/
 			  "neg %%ebx\n"               /* ebx = -(esi+1)      */
 			  ".p2align 4,,7\n"           /* align for jump      */
-			  "0: xorl %%eax, %%eax\n"    /* eax = 0             */
-			  "1: movb (%%esi), %%cl\n"   /* ecx = index in block*/
+			  "0: xorw %%sp, %%sp\n"      /* sp = 0              */
+			  "1: movb (%%esi), %%al\n"   /* eax = index in block*/
 			  "incl %%esi\n"              /* (faster than lodsb) */
-			  "addw (%%edx, %%ecx, 2), %%ax\n" /* eax = unzig    */
+			  "addw (%%edx, %%eax, 2), %%sp\n" /* sp = unzig     */
 			  "jz 1b\n"                   /* coeff == 0 then loop*/
-			  "orl %%ecx, %%ecx\n"        /* index == 0 then quit*/
+			  "orl %%eax, %%eax\n"        /* index == 0 then quit*/
 			  "jz 2f\n"                   /* (faster than jcxz)  */
-			  "movsx %%ax, %%eax\n"       /* extend sign         */
-			  "movw " ASMSYM "mpeg1_table_clip+4096(, %%eax ,2), %%ax\n" /*clip*/
+			  "movsx %%sp, %%eax\n"       /* extend sign         */
+			  "movw " ASMSYM "mpeg1_table_clip_data+4096(, %%eax ,2), %%ax\n" /*clip*/
 			  "movsx %%ax, %%eax\n"       /* extend sign         */
 			  "addl %%esi, %%ebx\n"       /* ebx = run           */
 			  "shll $7, %%eax\n"          /* eax *= 128(indexing)*/
 			  "lea (%%eax, %%ebx, 2), %%eax\n" /*eax = 2 * offset*/
 			  "lea (%%ebp, %%eax, 4), %%ebx\n" /* ebx = &vlc     */
 			  "movl (%%ebx), %%eax\n"     /* eax = code          */
-			  "movl 4(%%ebx), %%ecx\n"    /* ecx = length        */
-			  "addl 8(%%edi), %%ecx\n"    /* ecx = shift + length*/
+			  "addl 4(%%ebx), %%ecx\n"    /* ecx = shift+=length */
 			  "xorl %%ebx, %%ebx\n"       /* ebx = 0             */
 			  "shrd %%cl, %%eax, %%ebx\n" /* adjust code to fit  */
-			  "movl %%ecx, 8(%%edi)\n"    /* shift += length     */
 			  "shr %%cl, %%eax\n"         /* adjust code to fit  */
 			  "bswap %%eax\n"      /* reverse byte order of code */
-			  "movl 4(%%edi), %%ecx\n"    /* ecx = data          */
 			  "bswap %%ebx\n"      /* reverse byte order of code */
-			  "or %%eax, (%%ecx)\n"       /* put first 32 bits   */
-			  "movl 8(%%edi), %%eax\n"    /* eax = shift + length*/
+			  "or %%eax, (%%edi)\n"       /* put first 32 bits   */
+			  "movl %%ecx, %%eax\n"       /* eax = shift + length*/
 			  "shrl $5, %%eax\n"          /* get dword increment */
-			  "andl $31, 8(%%edi)\n"      /* mask shift          */
-			  "lea   (%%ecx, %%eax, 4), %%ecx\n"/* data+=(ecx>32)*/
-			  "orl %%ebx, (%%ecx)\n"      /* put last 32 bits    */
-			  "movl %%ecx, 4(%%edi)\n"    /* save data           */
-			  "xorl %%ecx, %%ecx\n"       /* ecx = 0             */
+			  "andl $31, %%ecx\n"         /* mask shift          */
+			  "lea   (%%edi, %%eax, 4), %%edi\n"/* data+=(ecx>32)*/
+			  "orl %%ebx, (%%edi)\n"      /* put last 32 bits    */
+			  "xorl %%eax, %%eax\n"       /* eax = 0             */
 			  "lea 1(%%esi), %%ebx\n"     /* ebx = esi + 1 (last)*/
 			  "neg %%ebx\n"               /* ebx = -(esi + 1)    */
 			  "jmp 0b\n"                  /* loop                */
-			  "2: popl %%ebp\n"           /* reload stack pointer*/
+			  "2:\n"
+			  "movw (%%edx), %%sp\n"      /* retrieve sp+1       */
+			  "dec %%sp\n"                /* restore esp */
+			  "popl %%ebp\n"              /* reload stack pointer*/
 			  "popl %%ebx\n"              /* reload ebx          */
-			  : "=c"(dummy1),
-			    "=a"(dummy2),
+			  : "=c"(shift),
+			    "=a"(dummy1),
 			    "=d"(block),
-			    "=D"(dummy3),
-			    "=S"(dummy4)
+			    "=D"(data),
+			    "=S"(dummy2)
 			  : "d"(block),
-			    "D"(&syntax_mpeg1->buffer),
+			    "c"(shift),
+			    "D"(data),
                             "S"(syntax_mpeg1->vlc_table)
 			  : "memory");
-    block[0] = v;
+    block[0] = v; /* restore DC value */
   }
 #else
   {
@@ -532,8 +564,8 @@
       /* count zeroes */
       if(v != 0) {
 	/* write code */
-	vlc = syntax_mpeg1->vlc_table + (mpeg1_table_clip[2048+v] << 6) + i - last;
-	bitbuffer_write(&syntax_mpeg1->buffer, vlc->code, vlc->length);
+	vlc = syntax_mpeg1->vlc_table + (mpeg1_table_clip[v] << 6) + i - last;
+	fast_bitbuffer_write(data, shift, vlc->code, vlc->length);
 
 	/* reset zeroes count */
 	last = i+1;
@@ -543,7 +575,10 @@
 #endif /* HAS_BSWAP */
 
   /* mark end of block */
-  bitbuffer_write(&syntax_mpeg1->buffer, 2, 2);
+  fast_bitbuffer_write(data, shift, 2, 2);
+
+  buffer->data = data;
+  buffer->shift = shift;
 }
 
 static void mpeg1_write_intra_mb(fame_syntax_t *syntax,
@@ -551,7 +586,9 @@
 				 int mb_y,
 				 short *blocks[6],
 				 unsigned char *bab,
+				 unsigned char *bab_map,
 				 fame_bab_t bab_type,
+				 unsigned char dquant,
 				 unsigned char pattern)
 {
   fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
@@ -572,10 +609,18 @@
 
   switch(syntax_mpeg1->frame_type) {
     case frame_type_I:
-      bitbuffer_write(&syntax_mpeg1->buffer, 1, 1); /* macroblock type   */
+      if(dquant) {
+	bitbuffer_write(&syntax_mpeg1->buffer, 1, 2); /* intra and dquant*/
+      } else {
+	bitbuffer_write(&syntax_mpeg1->buffer, 1, 1); /* intra coded */
+      }
     break;
     case frame_type_P:
-      bitbuffer_write(&syntax_mpeg1->buffer, 3, 5); /* intra coded */
+      if(dquant) {
+	bitbuffer_write(&syntax_mpeg1->buffer, 1, 6); /* intra and dquant */
+      } else {
+	bitbuffer_write(&syntax_mpeg1->buffer, 3, 5); /* intra coded */
+      }
     break;
   }
 
@@ -596,15 +641,18 @@
   short i, v;
   unsigned long last;
   fame_vlc_t const *vlc;
+  fame_bitbuffer_t * const buffer = &syntax_mpeg1->buffer;
+  unsigned char * data = buffer->data;
+  unsigned long shift = buffer->shift;
 
   /* TODO: optimized loop if HAS_BSWAP */
   /* encode DC coefficient */
-  v = mpeg1_table_clip[2048+block[0]];
+  v = mpeg1_table_clip[block[0]];
   if(v == 1) {
-    bitbuffer_write(&syntax_mpeg1->buffer, 2, 2);
+    fast_bitbuffer_write(data, shift, 2, 2);
     i = 1;
   } else if(v == -1) {
-    bitbuffer_write(&syntax_mpeg1->buffer, 3, 2);
+    fast_bitbuffer_write(data, shift, 3, 2);
     i = 1;
   } else {
     i = 0;
@@ -619,8 +667,8 @@
     /* count zeroes */
     if(v != 0) {
       /* write code */
-      vlc = syntax_mpeg1->vlc_table + (mpeg1_table_clip[2048+v] << 6) + i - last;
-      bitbuffer_write(&syntax_mpeg1->buffer, vlc->code, vlc->length);
+      vlc = syntax_mpeg1->vlc_table + (mpeg1_table_clip[v] << 6) + i - last;
+      fast_bitbuffer_write(data, shift, vlc->code, vlc->length);
       
       /* reset zeroes count */
       last = i+1;
@@ -628,7 +676,10 @@
   }
 
   /* mark end of block */
-  bitbuffer_write(&syntax_mpeg1->buffer, 2, 2);
+  fast_bitbuffer_write(data, shift, 2, 2);
+
+  buffer->data = data;
+  buffer->shift = shift;
 }
 
 static void inline mpeg1_write_vector(fame_syntax_t *syntax, short delta)
@@ -648,17 +699,19 @@
     length = 8 << f_code;
     f_code--;
 
+    if(delta >= length) {
+      delta = delta - length - length;
+    }
+
+    if(delta < -length) {
+      delta = delta + length + length;
+    }
+
     if(delta > 0) {
-      if(delta >= length) {
-	delta = delta - length - length;
-      }
       delta--;
       residual = delta & ((1 << f_code) - 1);
       code = ((delta - residual) >> f_code) + 1;
     } else {
-      if(delta < -length) {
-	delta = delta + length + length;
-      }
       delta = -delta;
       delta--;
       residual = delta & ((1 << f_code) - 1);
@@ -680,7 +733,9 @@
 				 int mb_y,
 				 short *blocks[6],
 				 unsigned char *bab,
+				 unsigned char *bab_map,
 				 fame_bab_t bab_type,
+				 unsigned char dquant,
 				 unsigned char pattern,
 				 fame_motion_vector_t *forward,
 				 fame_motion_vector_t *backward,
@@ -731,11 +786,11 @@
     cbp |= 0x01;
 
   if(!cbp && !motion_forward && !motion_backward && 
-     syntax_mpeg1->prev_mb_addr != -1 &&
+     syntax_mpeg1->prev_mb_addr != syntax_mpeg1->slice_start &&
      (syntax_mpeg1->prev_mb_addr + incr) != (syntax_mpeg1->slice_length - 1)) {
     /* reset the DC predictors */
     syntax_mpeg1->y_dc_pred = 128;
-    syntax_mpeg1->cr_dc_pred = syntax_mpeg1->cb_dc_pred = 0;
+    syntax_mpeg1->cr_dc_pred = syntax_mpeg1->cb_dc_pred = 128;
     /* reset the motion predictors */
     syntax_mpeg1->mv_pred.dx = syntax_mpeg1->mv_pred.dy = 0;
     /* skip macroblock */
@@ -756,12 +811,20 @@
     case frame_type_P:
       motion_backward = 0;
       if(!motion_forward) {
-	bitbuffer_write(&syntax_mpeg1->buffer, 1, 2); /* no motion, pattern */
+	if(dquant) {
+	  bitbuffer_write(&syntax_mpeg1->buffer, 1, 5); /*no motion,pat,dq   */
+	} else {
+	  bitbuffer_write(&syntax_mpeg1->buffer, 1, 2); /*no motion,pat,nodq */
+	}
       } else {
 	if(!cbp) {
 	  bitbuffer_write(&syntax_mpeg1->buffer, 1, 3); /* motion,no pattern */
 	} else {
-	  bitbuffer_write(&syntax_mpeg1->buffer, 1, 1); /* motion,pattern */
+	  if(dquant) {
+	    bitbuffer_write(&syntax_mpeg1->buffer, 2, 5); /* motion,pat,dq */
+	  } else {
+	    bitbuffer_write(&syntax_mpeg1->buffer, 1, 1); /* motion,pat,no dq*/
+	  }
 	}
       }
     break;
@@ -799,11 +862,12 @@
 
   /* reset the predictors to their original values */
   syntax_mpeg1->y_dc_pred = 128;
-  syntax_mpeg1->cr_dc_pred = syntax_mpeg1->cb_dc_pred = 0;
+  syntax_mpeg1->cr_dc_pred = syntax_mpeg1->cb_dc_pred = 128;
 }
 
 static void mpeg1_compute_chrominance_vectors(fame_syntax_t *syntax,
-					      fame_motion_vector_t *vectors)
+					      fame_motion_vector_t *vectors,
+					      unsigned char pattern)
 {
   int x, y;
 

Index: fame_syntax_mpeg1.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_syntax_mpeg1.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_syntax_mpeg1.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_syntax_mpeg1.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -36,6 +36,7 @@
   frame_type_t frame_type;
   unsigned char f_code;
   unsigned int prev_mb_addr;
+  unsigned int slice_start;
   unsigned int slice_length;
   int mb_width, mb_height;
   fame_motion_coding_t previous_coding;

Index: fame_syntax_mpeg4.c
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_syntax_mpeg4.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_syntax_mpeg4.c	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_syntax_mpeg4.c	1 Jun 2002 20:23:10 -0000	1.2
@@ -22,6 +22,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "fame.h"
+#include "fame_malloc.h"
 #include "fame_syntax.h"
 #include "fame_bitbuffer.h"
 #include "fame_syntax_mpeg4.h"
@@ -33,251 +34,11 @@
 #include "table_cbp_mpeg4.h"
 #include "table_mv.h"
 #include "table_clip_mpeg4.h"
[...1887 lines suppressed...]
+      else      vectors[4].dx = -((((-x) >> 3) << 1) + rounding_8[(-x) & 7]);
+      if(y > 0) vectors[4].dy = ((y >> 3) << 1) + rounding_8[y & 7];
+      else      vectors[4].dy = -((((-y) >> 3) << 1) + rounding_8[(-y) & 7]);
+      break;
+    case 3:
+      if(x > 0) vectors[4].dx = ((x / 12) << 1) + rounding_12[x % 12];
+      else      vectors[4].dx = -((((-x) / 12) << 1) + rounding_12[(-x) % 12]);
+      if(y > 0) vectors[4].dy = ((y / 12) << 1) + rounding_12[y % 12];
+      else      vectors[4].dy = -((((-y) / 12) << 1) + rounding_12[(-y) % 12]);
+      break;
+    case 4:
+      if(x > 0) vectors[4].dx = ((x >> 4) << 1) + rounding_16[x & 15];
+      else      vectors[4].dx = -((((-x) >> 4) << 1) + rounding_16[(-x) & 15]);
+      if(y > 0) vectors[4].dy = ((y >> 4) << 1) + rounding_16[y & 15];
+      else      vectors[4].dy = -((((-y) >> 4) << 1) + rounding_16[(-y) & 15]);
+      break;
+  }
   vectors[5].dx = vectors[4].dx;
   vectors[5].dy = vectors[4].dy;
 }

Index: fame_syntax_mpeg4.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_syntax_mpeg4.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_syntax_mpeg4.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_syntax_mpeg4.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -56,12 +56,11 @@
   fame_motion_vector_t *mv_pred; /* current motion predictors */
   fame_syntax_cae_t *cae_h;   /* horizontal context arithmetic encoder */
   fame_syntax_cae_t *cae_v;   /* vertical context arithmetic encoder */
-  unsigned char *prev_babs;   /* previously encoded bab types */
   fame_vlc_t *intra_table;    /* variable length table for AC coeffs coding */
   fame_vlc_t *inter_table;    /* variable length table for AC coeffs coding */
-  unsigned char y_dc_scaler;  /* intra DC scaler for the Y component */
-  unsigned char c_dc_scaler;  /* intra DC scaler for the C component */
-  short *symbol;              /* arithmetic symbol table */
+  unsigned char y_dc_scaler[32]; /* intra DC scaler for the Y component */
+  unsigned char c_dc_scaler[32]; /* intra DC scaler for the C component */
+  int *symbol;              /* arithmetic symbol table */
   char profile_and_level_indication;   /* Video Object Sequence */
   char is_visual_object_identifier;    /* Visual Object */
   char visual_object_verid;

Index: fame_version.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fame_version.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fame_version.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fame_version.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -22,9 +22,9 @@
 
 #ifndef LIBFAME_MAJOR_VERSION
 #define LIBFAME_MAJOR_VERSION (0)
-#define LIBFAME_MINOR_VERSION (8)
-#define LIBFAME_MICRO_VERSION (9)
-#define LIBFAME_VERSION "0.8.9"
+#define LIBFAME_MINOR_VERSION (9)
+#define LIBFAME_MICRO_VERSION (0)
+#define LIBFAME_VERSION "0.9.0"
 #endif
 
 extern const unsigned int libfame_major_version,

Index: fetch_float.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fetch_float.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fetch_float.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fetch_float.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -1,3 +1,4 @@
+
 /*
     libfame - Fast Assembly MPEG Encoder Library
     Copyright (C) 2000-2001 Vivien Chappelier
@@ -31,9 +32,10 @@
 /*  Return value:                                                            */
 /*    None.                                                                  */
 
-static void inline prefetch_Y_withoutmask(unsigned char *input,
-					  dct_t *output,
-					  int pitch)
+static void inline prefetch_withoutmask(unsigned char *input,
+					dct_t *output,
+					unsigned char *mask /* not used */, 
+					int pitch)
 {
   int i, j;
   
@@ -44,33 +46,6 @@
   }
 }
 
-/*  prefetch_C                                                               */
-/*                                                                           */
-/*  Description:                                                             */
-/*    Get a 8x8 block of data from the source image C plane,                 */
-/*    substracting 128 to translate the value in [-128,+127]                 */
-/*                                                                           */
-/*  Arguments:                                                               */
-/*    unsigned char *input: the source image, plane C                        */
-/*    dct_t *output: the output 8x8 block                                    */
-/*    int pitch: the number of bytes to the next line in the input plane     */
-/*                                                                           */
-/*  Return value:                                                            */
-/*    None.                                                                  */
-
-static void inline prefetch_C_withoutmask(unsigned char *input,
-					  dct_t *output,
-					  int pitch)
-{
-  int i, j;
-
-  for(i = 0; i < 8; i++) {
-    for(j = 0; j < 8; j++)
-      *output++ = (dct_t) input[j] - 128.0;
-    input += pitch;
-  }
-}
-
 /*  prefetch_Y_shape                                                         */
 /*                                                                           */
 /*  Description:                                                             */
@@ -170,7 +145,6 @@
 /*                                                                           */
 /*  Description:                                                             */
 /*    Get a 8x8 block of data from the source image C plane,                 */
-/*    substracting 128 to translate the value in [-128,+127]                 */
 /*    and perform LPE padding of arbitrary shape border block                */
 /*                                                                           */
 /*  Arguments:                                                               */
@@ -212,7 +186,6 @@
   }
 
   if(n) s /= n;
-  else s = 128;
 
   p = input;
   m = mask;
@@ -222,9 +195,9 @@
 	   m[(j<<1)+1] |
 	   m[(j<<1)+(pitch<<1)] |
 	   m[(j<<1)+(pitch<<1)+1]))
-	*output++ = (dct_t) s - 128.0;
+	*output++ = (dct_t) s;
       else
-	*output++ = (dct_t) p[j] - 128.0;
+	*output++ = (dct_t) p[j];
     }
     p += pitch;
     m += (pitch << 2);
@@ -323,14 +296,15 @@
 static void inline diff(unsigned char *input,
 		        unsigned char *ref,
 			dct_t *output,
-			int pitch)
+			int ipitch,
+			int rpitch)
 {
   int i, j;
 
   for(i = 0; i < 8; i++) {
     for(j = 0; j < 8; j++)
       *output++ = (dct_t) input[j] - (dct_t) ref[j];
-    input += pitch;
-    ref += pitch;
+    input += ipitch;
+    ref += rpitch;
   }
 }

Index: fetch_mmx.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/fetch_mmx.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fetch_mmx.h	13 Mar 2002 01:14:34 -0000	1.1
+++ fetch_mmx.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -18,12 +18,10 @@
 */
 /*********************** fetch data from current picture *********************/
 
-static short const _mmx_128[] = { 128, 128, 128, 128 };
-static char const _mmx_128c[] = { 128, 128, 128, 128, 128, 128, 128, 128 };
-
-static void inline prefetch_Y_withoutmask(unsigned char *input,
-					  dct_t *output,
-					  int pitch)
+static void inline prefetch_withoutmask(unsigned char *input,
+					dct_t *output,
+					unsigned char *mask, /* unused */
+					int pitch)
 {
   int dummy;
   
@@ -98,8 +96,8 @@
   int dummy1, dummy2;
   int mean, count;
 
-    /* compute mean of visible pixels */
-    asm volatile (
+  /* compute mean of visible pixels */
+  asm volatile (
 	"pxor %%mm7, %%mm7\n"      /* mm7 = zero        */
 	"pxor %%mm6, %%mm6\n"      /* mm6 = mask    accumulator */
 	"pxor %%mm5, %%mm5\n"      /* mm5 = texture accumulator */
@@ -203,7 +201,7 @@
 	: "0"(input), "1"(pitch), "2"(mask)
 	: "memory");
 
-    asm volatile (/* line accumulation */
+  asm volatile (/* line accumulation */
 	"movq %%mm6, %%mm0\n"       /* copy column pixel counts */
 	"psrlq $32, %%mm6\n"        /* move high dword low */
 	"paddb %%mm6, %%mm0\n"      /* sum column counts */
@@ -224,9 +222,9 @@
 	: "=a" (count), "=c" (mean)
 	);
 
-    if(count) mean /= count;
+  if(count) mean /= count;
 
-    asm volatile (/* replicate mean for padding */
+  asm volatile (/* replicate mean for padding */
 	"movd %%ecx, %%mm6\n"       /* mm6 will hold mean value */
 	"punpcklbw %%mm6, %%mm6\n"  /* replicate mean to dword */
 	"punpcklwd %%mm6, %%mm6\n"  /* replicate mean to dword */
@@ -234,131 +232,48 @@
 	:
 	: "c" (mean));
 
-    /* fetch and fill empty pixels with mean value */
-    asm volatile (/* 1st pass */
-	"movq (%0),     %%mm0\n"   /* load 1st texture line */
-	"movq (%0, %2), %%mm2\n"   /* load 2nd texture line */
-	"movq (%3),     %%mm1\n"   /* load 1st mask line */
-	"movq (%3, %2), %%mm3\n"   /* load 2nd mask line */
-	"pcmpgtb %%mm7, %%mm1\n"   /* saturate 1st mask line */
-	"pcmpgtb %%mm7, %%mm3\n"   /* saturate 2nd mask line */
-	"pand %%mm1, %%mm0\n"      /* mask 1st texture line */
-	"pand %%mm3, %%mm2\n"      /* mask 2nd texture line */
-	"pcmpeqb %%mm7, %%mm1\n"   /* invert mask */
-	"pcmpeqb %%mm7, %%mm3\n"   /* invert mask */
-	"movq %%mm6, %%mm4\n"      /* load mean value */
-	"movq %%mm6, %%mm5\n"      /* load mean value */
-	"pand %%mm1, %%mm4\n"      /* mask mean for 1st texture line */
-	"pand %%mm3, %%mm5\n"      /* mask mean for 2nd texture line */
-	"por %%mm4, %%mm0\n"       /* join texture and mean */
-	"por %%mm5, %%mm2\n"       /* join texture and mean */
-	"movq %%mm0, %%mm1\n"      /* copy 1st texture line */
-	"movq %%mm2, %%mm3\n"      /* copy 2nd texture line */
-	"punpcklbw %%mm7, %%mm0\n" /* unpack texture to word */
-	"punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */
-	"punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */
-	"punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */
-	"movq %%mm0, 0x00(%1)\n"   /* store texture */
-	"movq %%mm1, 0x08(%1)\n"   /* store texture */
-	"movq %%mm2, 0x10(%1)\n"   /* store texture */
-	"movq %%mm3, 0x18(%1)\n"   /* store texture */
-	"addl %2, %0\n"            /* move one texture line down */
-	"addl %2, %0\n"            /* move one texture line down */
-	"addl %2, %3\n"            /* move one mask line down */
-	"addl %2, %3\n"            /* move one mask line down */
-	/* 2nd pass */
-	"movq (%0),     %%mm0\n"   /* load 3rd texture line */
-	"movq (%0, %2), %%mm2\n"   /* load 4th texture line */
-	"movq (%3),     %%mm1\n"   /* load 3rd mask line */
-	"movq (%3, %2), %%mm3\n"   /* load 4th mask line */
-	"pcmpgtb %%mm7, %%mm1\n"   /* saturate 3rd mask line */
-	"pcmpgtb %%mm7, %%mm3\n"   /* saturate 4th mask line */
-	"pand %%mm1, %%mm0\n"      /* mask 3rd texture line */
-	"pand %%mm3, %%mm2\n"      /* mask 4th texture line */
-	"pcmpeqb %%mm7, %%mm1\n"   /* invert mask */
-	"pcmpeqb %%mm7, %%mm3\n"   /* invert mask */
-	"movq %%mm6, %%mm4\n"      /* load mean value */
-	"movq %%mm6, %%mm5\n"      /* load mean value */
-	"pand %%mm1, %%mm4\n"      /* mask mean for 3rd texture line */
-	"pand %%mm3, %%mm5\n"      /* mask mean for 4th texture line */
-	"por %%mm4, %%mm0\n"       /* join texture and mean */
-	"por %%mm5, %%mm2\n"       /* join texture and mean */
-	"movq %%mm0, %%mm1\n"      /* copy 3rd texture line */
-	"movq %%mm2, %%mm3\n"      /* copy 4th texture line */
-	"punpcklbw %%mm7, %%mm0\n" /* unpack texture to word */
-	"punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */
-	"punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */
-	"punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */
-	"movq %%mm0, 0x20(%1)\n"   /* store texture */
-	"movq %%mm1, 0x28(%1)\n"   /* store texture */
-	"movq %%mm2, 0x30(%1)\n"   /* store texture */
-	"movq %%mm3, 0x38(%1)\n"   /* store texture */
-	"addl %2, %0\n"            /* move one texture line down */
-	"addl %2, %0\n"            /* move one texture line down */
-	"addl %2, %3\n"            /* move one mask line down */
-	"addl %2, %3\n"            /* move one mask line down */
-	/* 3rd pass */
-	"movq (%0),     %%mm0\n"   /* load 5th texture line */
-	"movq (%0, %2), %%mm2\n"   /* load 6th texture line */
-	"movq (%3),     %%mm1\n"   /* load 4th mask line */
-	"movq (%3, %2), %%mm3\n"   /* load 6th mask line */
-	"pcmpgtb %%mm7, %%mm1\n"   /* saturate 5th mask line */
-	"pcmpgtb %%mm7, %%mm3\n"   /* saturate 6th mask line */
-	"pand %%mm1, %%mm0\n"      /* mask 5th texture line */
-	"pand %%mm3, %%mm2\n"      /* mask 6th texture line */
-	"pcmpeqb %%mm7, %%mm1\n"   /* invert mask */
-	"pcmpeqb %%mm7, %%mm3\n"   /* invert mask */
-	"movq %%mm6, %%mm4\n"      /* load mean value */
-	"movq %%mm6, %%mm5\n"      /* load mean value */
-	"pand %%mm1, %%mm4\n"      /* mask mean for 5th texture line */
-	"pand %%mm3, %%mm5\n"      /* mask mean for 6th texture line */
-	"por %%mm4, %%mm0\n"       /* join texture and mean */
-	"por %%mm5, %%mm2\n"       /* join texture and mean */
-	"movq %%mm0, %%mm1\n"      /* copy 5th texture line */
-	"movq %%mm2, %%mm3\n"      /* copy 6th texture line */
-	"punpcklbw %%mm7, %%mm0\n" /* unpack texture to word */
-	"punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */
-	"punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */
-	"punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */
-	"movq %%mm0, 0x40(%1)\n"   /* store texture */
-	"movq %%mm1, 0x48(%1)\n"   /* store texture */
-	"movq %%mm2, 0x50(%1)\n"   /* store texture */
-	"movq %%mm3, 0x58(%1)\n"   /* store texture */
-	"addl %2, %0\n"            /* move one texture line down */
-	"addl %2, %0\n"            /* move one texture line down */
-	"addl %2, %3\n"            /* move one mask line down */
-	"addl %2, %3\n"            /* move one mask line down */
-	/* 4th pass */
-	"movq (%0),     %%mm0\n"   /* load 7th texture line */
-	"movq (%0, %2), %%mm2\n"   /* load 8th texture line */
-	"movq (%3),     %%mm1\n"   /* load 7th mask line */
-	"movq (%3, %2), %%mm3\n"   /* load 8th mask line */
-	"pcmpgtb %%mm7, %%mm1\n"   /* saturate 7th mask line */
-	"pcmpgtb %%mm7, %%mm3\n"   /* saturate 8th mask line */
-	"pand %%mm1, %%mm0\n"      /* mask 7th texture line */
-	"pand %%mm3, %%mm2\n"      /* mask 8th texture line */
-	"pcmpeqb %%mm7, %%mm1\n"   /* invert mask */
-	"pcmpeqb %%mm7, %%mm3\n"   /* invert mask */
-	"movq %%mm6, %%mm4\n"      /* load mean value */
-	"movq %%mm6, %%mm5\n"      /* load mean value */
-	"pand %%mm1, %%mm4\n"      /* mask mean for 7th texture line */
-	"pand %%mm3, %%mm5\n"      /* mask mean for 8th texture line */
-	"por %%mm4, %%mm0\n"       /* join texture and mean */
-	"por %%mm5, %%mm2\n"       /* join texture and mean */
-	"movq %%mm0, %%mm1\n"      /* copy 7th texture line */
-	"movq %%mm2, %%mm3\n"      /* copy 8th texture line */
-	"punpcklbw %%mm7, %%mm0\n" /* unpack texture to word */
-	"punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */
-	"punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */
-	"punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */
-	"movq %%mm0, 0x60(%1)\n"   /* store texture */
-	"movq %%mm1, 0x68(%1)\n"   /* store texture */
-	"movq %%mm2, 0x70(%1)\n"   /* store texture */
-	"movq %%mm3, 0x78(%1)\n"   /* store texture */
-	: "=r"(dummy1), "=r"(output), "=r"(pitch), "=r"(dummy2)
-	: "0"(input), "1"(output), "2"(pitch), "3"(mask)
-	: "memory");
-    /* TODO: bilinear filtering */
+  /* fetch and fill empty pixels with mean value */
+#define PREFETCH_Y_MASK_STEP(x, y)					\
+    "movq (%0),     %%mm0\n"   /* load 1st texture line */		\
+    "movq (%0, %2), %%mm2\n"   /* load 2nd texture line */		\
+    "movq (%3),     %%mm1\n"   /* load 1st mask line */			\
+    "movq (%3, %2), %%mm3\n"   /* load 2nd mask line */			\
+    "pcmpgtb %%mm7, %%mm1\n"   /* saturate 1st mask line */		\
+    "pcmpgtb %%mm7, %%mm3\n"   /* saturate 2nd mask line */		\
+    "pand %%mm1, %%mm0\n"      /* mask 1st texture line */		\
+    "pand %%mm3, %%mm2\n"      /* mask 2nd texture line */		\
+    "pcmpeqb %%mm7, %%mm1\n"   /* invert mask */			\
+    "pcmpeqb %%mm7, %%mm3\n"   /* invert mask */			\
+    "movq %%mm6, %%mm4\n"      /* load mean value */			\
+    "movq %%mm6, %%mm5\n"      /* load mean value */			\
+    "pand %%mm1, %%mm4\n"      /* mask mean for 1st texture line */	\
+    "pand %%mm3, %%mm5\n"      /* mask mean for 2nd texture line */	\
+    "por %%mm4, %%mm0\n"       /* join texture and mean */		\
+    "por %%mm5, %%mm2\n"       /* join texture and mean */		\
+    "movq %%mm0, %%mm1\n"      /* copy 1st texture line */		\
+    "movq %%mm2, %%mm3\n"      /* copy 2nd texture line */		\
+    "punpcklbw %%mm7, %%mm0\n" /* unpack texture to word */		\
+    "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */		\
+    "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */		\
+    "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */		\
+    "movq %%mm0, 0x" #x "0(%1)\n"   /* store texture */			\
+    "movq %%mm1, 0x" #x "8(%1)\n"   /* store texture */			\
+    "movq %%mm2, 0x" #y "0(%1)\n"   /* store texture */			\
+    "movq %%mm3, 0x" #y "8(%1)\n"   /* store texture */			\
+    "addl %2, %0\n"            /* move one texture line down */		\
+    "addl %2, %0\n"            /* move one texture line down */		\
+    "addl %2, %3\n"            /* move one mask line down */		\
+    "addl %2, %3\n"            /* move one mask line down */
+  
+  asm volatile (PREFETCH_Y_MASK_STEP(0, 1)
+		PREFETCH_Y_MASK_STEP(2, 3)
+		PREFETCH_Y_MASK_STEP(4, 5)
+		PREFETCH_Y_MASK_STEP(6, 7)
+		: "=r"(dummy1), "=r"(output), "=r"(pitch), "=r"(dummy2)
+		: "0"(input), "1"(output), "2"(pitch), "3"(mask)
+		: "memory");
+
+  /* TODO: bilinear filtering */
 }
 
 static void inline prefetch_C_withmask(unsigned char *input,
@@ -366,7 +281,6 @@
 				       unsigned char *mask,
 				       int pitch)
 {
-  register unsigned short const *mmx_128 = _mmx_128;
   int dummy1, dummy2;
   int mean, count;
 
@@ -582,19 +496,19 @@
       );
 
   if(count) mean /= count;
-  else mean = 128;
 
-  asm volatile (/* replicate mean for padding */
+  /* replicate mean for padding */
+  asm volatile (
       "movd %%ecx, %%mm6\n"       /* mm6 will hold mean value */
       "punpcklbw %%mm6, %%mm6\n"  /* replicate mean to dword */
       "punpcklwd %%mm6, %%mm6\n"  /* replicate mean to dword */
       "punpckldq %%mm6, %%mm6\n"  /* replicate mean to qword */
-      "movq (%0), %%mm5\n"        /* load 128 value for later */
-      : "=r"(mmx_128)
-      : "c" (mean), "0"(mmx_128));
+      :
+      : "c" (mean));
 
   /* fetch and fill empty pixels with mean value */
-  asm volatile (/* 1st pass */
+  asm volatile (
+      /* 1st pass */
       "movq (%3),     %%mm0\n"   /* load 1st mask line up left part */
       "movq (%3, %2, 2), %%mm1\n"   /* load 1st mask line down left part */
       "por %%mm0, %%mm1\n"       /* conservative subsample */
@@ -639,10 +553,6 @@
       "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */
       "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */
       "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */
-      "psubsw %%mm5, %%mm0\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm1\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm2\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm3\n"    /* adjust color to signed */
       "movq %%mm0, 0x00(%1)\n"   /* store texture */
       "movq %%mm1, 0x08(%1)\n"   /* store texture */
       "movq %%mm2, 0x10(%1)\n"   /* store texture */
@@ -694,10 +604,6 @@
       "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */
       "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */
       "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */
-      "psubsw %%mm5, %%mm0\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm1\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm2\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm3\n"    /* adjust color to signed */
       "movq %%mm0, 0x20(%1)\n"   /* store texture */
       "movq %%mm1, 0x28(%1)\n"   /* store texture */
       "movq %%mm2, 0x30(%1)\n"   /* store texture */
@@ -749,10 +655,6 @@
       "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */
       "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */
       "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */
-      "psubsw %%mm5, %%mm0\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm1\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm2\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm3\n"    /* adjust color to signed */
       "movq %%mm0, 0x40(%1)\n"   /* store texture */
       "movq %%mm1, 0x48(%1)\n"   /* store texture */
       "movq %%mm2, 0x50(%1)\n"   /* store texture */
@@ -804,10 +706,6 @@
       "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */
       "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */
       "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */
-      "psubsw %%mm5, %%mm0\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm1\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm2\n"    /* adjust color to signed */
-      "psubsw %%mm5, %%mm3\n"    /* adjust color to signed */
       "movq %%mm0, 0x60(%1)\n"   /* store texture */
       "movq %%mm1, 0x68(%1)\n"   /* store texture */
       "movq %%mm2, 0x70(%1)\n"   /* store texture */
@@ -820,96 +718,11 @@
   /* TODO: bilinear filtering */
 }
 
-static void inline prefetch_C_withoutmask(unsigned char *input,
-					  dct_t *output,
-					  int pitch)
-{
-  register unsigned short const *mmx_128 = _mmx_128;
-  int dummy;
-
-  asm volatile ("pxor %%mm7, %%mm7\n"
-		"movq (%3), %%mm6\n"
-		"movq (%0),     %%mm0\n"
-		"movq (%0, %2), %%mm2\n"
-		"movq %%mm0, %%mm1\n"
-		"movq %%mm2, %%mm3\n"
-		"punpcklbw %%mm7, %%mm0\n"
-		"punpckhbw %%mm7, %%mm1\n"
-		"punpcklbw %%mm7, %%mm2\n"
-		"punpckhbw %%mm7, %%mm3\n"
-		"addl %2, %0\n"
-		"addl %2, %0\n"
-		"psubsw %%mm6, %%mm0\n"
-		"psubsw %%mm6, %%mm1\n"
-		"psubsw %%mm6, %%mm2\n"
-		"psubsw %%mm6, %%mm3\n"
-		"movq %%mm0, 0x00(%1)\n"
-		"movq %%mm1, 0x08(%1)\n"
-		"movq %%mm2, 0x10(%1)\n"
-		"movq %%mm3, 0x18(%1)\n"
-		"movq (%0),     %%mm0\n"
-		"movq (%0, %2), %%mm2\n"
-		"movq %%mm0, %%mm1\n"
-		"movq %%mm2, %%mm3\n"
-		"punpcklbw %%mm7, %%mm0\n"
-		"punpckhbw %%mm7, %%mm1\n"
-		"punpcklbw %%mm7, %%mm2\n"
-		"punpckhbw %%mm7, %%mm3\n"
-		"addl %2, %0\n"
-		"addl %2, %0\n"
-		"psubsw %%mm6, %%mm0\n"
-		"psubsw %%mm6, %%mm1\n"
-		"psubsw %%mm6, %%mm2\n"
-		"psubsw %%mm6, %%mm3\n"
-		"movq %%mm0, 0x20(%1)\n"
-		"movq %%mm1, 0x28(%1)\n"
-		"movq %%mm2, 0x30(%1)\n"
-		"movq %%mm3, 0x38(%1)\n"
-		"movq (%0),     %%mm0\n"
-		"movq (%0, %2), %%mm2\n"
-		"movq %%mm0, %%mm1\n"
-		"movq %%mm2, %%mm3\n"
-		"punpcklbw %%mm7, %%mm0\n"
-		"punpckhbw %%mm7, %%mm1\n"
-		"punpcklbw %%mm7, %%mm2\n"
-		"punpckhbw %%mm7, %%mm3\n"
-		"addl %2, %0\n"
-		"addl %2, %0\n"
-		"psubsw %%mm6, %%mm0\n"
-		"psubsw %%mm6, %%mm1\n"
-		"psubsw %%mm6, %%mm2\n"
-		"psubsw %%mm6, %%mm3\n"
-		"movq %%mm0, 0x40(%1)\n"
-		"movq %%mm1, 0x48(%1)\n"
-		"movq %%mm2, 0x50(%1)\n"
-		"movq %%mm3, 0x58(%1)\n"
-		"movq (%0),     %%mm0\n"
-		"movq (%0, %2), %%mm2\n"
-		"movq %%mm0, %%mm1\n"
-		"movq %%mm2, %%mm3\n"
-		"punpcklbw %%mm7, %%mm0\n"
-		"punpckhbw %%mm7, %%mm1\n"
-		"punpcklbw %%mm7, %%mm2\n"
-		"punpckhbw %%mm7, %%mm3\n"
-		"addl %2, %0\n"
-		"addl %2, %0\n"
-		"psubsw %%mm6, %%mm0\n"
-		"psubsw %%mm6, %%mm1\n"
-		"psubsw %%mm6, %%mm2\n"
-		"psubsw %%mm6, %%mm3\n"
-		"movq %%mm0, 0x60(%1)\n"
-		"movq %%mm1, 0x68(%1)\n"
-		"movq %%mm2, 0x70(%1)\n"
-		"movq %%mm3, 0x78(%1)\n"
-		: "=r"(dummy), "=r"(output), "=r"(pitch), "=r"(mmx_128)
-		: "0"(input), "1"(output), "2"(pitch), "3"(mmx_128)
-		: "memory");
-}
-
 static void inline diff(unsigned char *input,
 		        unsigned char *ref,
 			dct_t *output,
-			int pitch)
+			int ipitch,
+			int rpitch)
 {
   int dummy1, dummy2;
 
@@ -928,7 +741,7 @@
     "movq %%mm0, 0x" #x "0(%1)\n"		\
     "movq %%mm1, 0x" #x "8(%1)\n"		\
     "addl %2, %0\n"				\
-    "addl %2, %3\n"
+    "addl %4, %3\n"
 
   asm volatile ("pxor %%mm7, %%mm7\n"
 		DIFF_STEP(0)
@@ -939,8 +752,8 @@
 		DIFF_STEP(5)
 		DIFF_STEP(6)
 		DIFF_STEP(7)
-		: "=r"(dummy1), "=r"(output), "=r"(pitch), "=r"(dummy2)
-		: "0"(input), "1"(output), "2"(pitch), "3"(ref)
+		: "=r"(dummy1), "=r"(output), "=r"(ipitch), "=r"(dummy2), "=r"(rpitch)
+		: "0"(input), "1"(output), "2"(ipitch), "3"(ref), "4"(rpitch)
 		: "memory");
 }
 

Index: half_int.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/half_int.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- half_int.h	13 Mar 2002 01:14:34 -0000	1.1
+++ half_int.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -53,7 +53,7 @@
   ref01 = ref[1]->y;
   ref10 = ref[2]->y;
   ref11 = ref[3]->y;
-  pitch = width;
+  pitch = ref[0]->p;
 
   for(y = 0; y < h; y++) {
     for(x = 0; x < w; x++) {
@@ -70,7 +70,17 @@
       ref10++;
       ref11++;
     }
+    /* correct borders */
+    ref01[-1] = ref00[-1];
+    ref11[-1] = ref10[-1];
+    ref00 += pitch - w;
+    ref01 += pitch - w;
+    ref10 += pitch - w;
+    ref11 += pitch - w;
   }
+  /* correct borders */
+  memcpy(ref10 - pitch, ref00-pitch, w); 
+  memcpy(ref11 - pitch, ref01-pitch, w);
 
   /* U component */
   w = width >> 1;
@@ -79,7 +89,7 @@
   ref01 = ref[1]->u;
   ref10 = ref[2]->u;
   ref11 = ref[3]->u;
-  pitch = width >> 1;
+  pitch = ref[0]->p >> 1;
 
   for(y = 0; y < h; y++) {
     for(x = 0; x < w; x++) {
@@ -96,7 +106,17 @@
       ref10++;
       ref11++;
     }
+    /* correct borders */
+    ref01[-1] = ref00[-1];
+    ref11[-1] = ref10[-1];
+    ref00 += pitch - w;
+    ref01 += pitch - w;
+    ref10 += pitch - w;
+    ref11 += pitch - w;
   }
+  /* correct borders */
+  memcpy(ref10 - pitch, ref00-pitch, w); 
+  memcpy(ref11 - pitch, ref01-pitch, w);
 
   /* V component */
   w = width >> 1;
@@ -105,7 +125,7 @@
   ref01 = ref[1]->v;
   ref10 = ref[2]->v;
   ref11 = ref[3]->v;
-  pitch = width >> 1;
+  pitch = ref[0]->p >> 1;
 
   for(y = 0; y < h; y++) {
     for(x = 0; x < w; x++) {
@@ -122,5 +142,15 @@
       ref10++;
       ref11++;
     }
+    /* correct borders */
+    ref01[-1] = ref00[-1];
+    ref11[-1] = ref10[-1];
+    ref00 += pitch - w;
+    ref01 += pitch - w;
+    ref10 += pitch - w;
+    ref11 += pitch - w;
   }
+  /* correct borders */
+  memcpy(ref10 - pitch, ref00-pitch, w); 
+  memcpy(ref11 - pitch, ref01-pitch, w);
 }

Index: half_mmx.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/half_mmx.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- half_mmx.h	13 Mar 2002 01:14:34 -0000	1.1
+++ half_mmx.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -126,7 +126,7 @@
   planes[1] = ref[1]->y;
   planes[2] = ref[2]->y;
   planes[3] = ref[3]->y;
-  pitch = width;
+  pitch = ref[0]->p;
 
   for(y = 0; y < h; y++) {
     for(x = 0; x < w; x++) {
@@ -136,7 +136,18 @@
       planes[2]+=8;
       planes[3]+=8;
     }
+    /* correct borders */
+    planes[1][-1] = planes[0][-1];
+    planes[3][-1] = planes[2][-1];
+
+    planes[0] += pitch - (w<<3);
+    planes[1] += pitch - (w<<3);
+    planes[2] += pitch - (w<<3);
+    planes[3] += pitch - (w<<3);
   }
+  /* correct borders */
+  memcpy(planes[2] - pitch, planes[0]-pitch, w<<3); 
+  memcpy(planes[3] - pitch, planes[1]-pitch, w<<3);
 
   /* U component */
   w = width >> 4;
@@ -145,7 +156,7 @@
   planes[1] = ref[1]->u;
   planes[2] = ref[2]->u;
   planes[3] = ref[3]->u;
-  pitch = width >> 1;
+  pitch = ref[0]->p >> 1;
 
   for(y = 0; y < h; y++) {
     for(x = 0; x < w; x++) {
@@ -155,7 +166,18 @@
       planes[2]+=8;
       planes[3]+=8;
     }
+    /* correct borders */
+    planes[1][-1] = planes[0][-1];
+    planes[3][-1] = planes[2][-1];
+
+    planes[0] += pitch - (w<<3);
+    planes[1] += pitch - (w<<3);
+    planes[2] += pitch - (w<<3);
+    planes[3] += pitch - (w<<3);
   }
+  /* correct borders */
+  memcpy(planes[2] - pitch, planes[0]-pitch, w<<3); 
+  memcpy(planes[3] - pitch, planes[1]-pitch, w<<3); 
 
   /* V component */
   w = width >> 4;
@@ -164,7 +186,7 @@
   planes[1] = ref[1]->v;
   planes[2] = ref[2]->v;
   planes[3] = ref[3]->v;
-  pitch = width >> 1;
+  pitch = ref[0]->p >> 1;
 
   for(y = 0; y < h; y++) {
     for(x = 0; x < w; x++) {
@@ -174,5 +196,16 @@
       planes[2]+=8;
       planes[3]+=8;
     }
+    /* correct borders */
+    planes[1][-1] = planes[0][-1];
+    planes[3][-1] = planes[2][-1];
+
+    planes[0] += pitch - (w<<3);
+    planes[1] += pitch - (w<<3);
+    planes[2] += pitch - (w<<3);
+    planes[3] += pitch - (w<<3);
   }
+  /* correct borders */
+  memcpy(planes[2] - pitch, planes[0]-pitch, w<<3); 
+  memcpy(planes[3] - pitch, planes[1]-pitch, w<<3); 
 }

Index: idct_float.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/idct_float.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- idct_float.h	13 Mar 2002 01:14:34 -0000	1.1
+++ idct_float.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -196,7 +196,6 @@
 /*                                                                           */
 /*  Return value:                                                            */
 /*    None.                                                                  */
-
 static void inline idct(dct_t *block)
 {
    int i;

Index: idct_mmx.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/idct_mmx.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- idct_mmx.h	13 Mar 2002 01:14:34 -0000	1.1
+++ idct_mmx.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -18,21 +18,10 @@
 */
 /*************************** MMX accelerated iDCT ****************************/
 
-#define ICOS2 ((short) (1.082392200 * (double)(1 << 14) + .5)) /* 14-bit */
-#define ICOS4 ((short) (1.414213562 * (double)(1 << 14) + .5)) /* 14-bit */
-#define ICOS6 ((short) (2.613125925 * (double)(1 << 13) + .5)) /* 13-bit */
-#define ICOS8 ((short) (0.765366865 * (double)(1 << 15) + .5)) /* 15-bit */
-
-short const _mmx_icos[] = {
-  ICOS2, ICOS2, ICOS2, ICOS2,
-  ICOS4, ICOS4, ICOS4, ICOS4,
-  ICOS6, ICOS6, ICOS6, ICOS6,
-  ICOS8, ICOS8, ICOS8, ICOS8
-};
-
 static void inline idct_aan_pass(dct_t * block)
 {
-  register unsigned short const *mmx_icos = _mmx_icos;
+  //  register unsigned short const *mmx_icos = _mmx_icos;
+
   asm volatile (
       /*
 	STEP 1
@@ -76,12 +65,9 @@
 	block[row*8+6] = v45;           - v71, v11, v44, v65, v24 -
       */
       "psllw $0x02, %%mm5\n"               /* adjust v22 for multiply      */
-      "pmulhw 8(%1), %%mm5\n"              /* 4*v15*ICOS4/4 -> mm5 (v23)   */
-#ifdef signbit
-      "movq %%mm5, %%mm6\n"                /* mm5->mm6                     */
-      "psraw $0x0f, %%mm6\n"               /* sign(mm5) -> mm6             */
-      "psubsw %%mm6, %%mm5\n"              /* adjust multiply              */
-#endif
+      "paddw " ASMSYM "_mmx_1, %%mm5\n"   /* + 1 for rounding */
+      //      "pmulhw 8(%1), %%mm5\n"              /* 4*v15*ICOS4/4 -> mm5 (v23)   */
+      "pmulhw " ASMSYM "_mmx_icos+8, %%mm5\n" /* 4*v15*ICOS4/4 -> mm5 (v23)*/
       "psubsw %%mm4, %%mm5\n"              /* v23 - v62 -> mm5 (v24)       */
       "movq %%mm3, %%mm6\n"                /* v44 -> mm6                   */
       "paddsw %%mm5, %%mm6\n"              /* v44 + v24 -> mm6 (v45)       */
@@ -139,35 +125,25 @@
 	block[row*8+4] += v55;          - -
       */
       "psllw $0x02, %%mm0\n"               /* adjust v12 for multiply      */
-      "pmulhw 8(%1), %%mm0\n"              /* 4*v12*ICOS4/4 -> mm0 (v13)   */
-#ifdef signbit
-      "movq %%mm0, %%mm7\n"                /* mm0->mm7                     */
-      "psraw $0x0f, %%mm7\n"               /* sign(mm0) -> mm7             */
-      "psubsw %%mm7, %%mm0\n"              /* adjust multiply              */
-#endif
+      "paddw " ASMSYM "_mmx_1, %%mm0\n"   /* + 1 for rounding */
+      //      "pmulhw 8(%1), %%mm0\n"              /* 4*v12*ICOS4/4 -> mm0 (v13)   */
+      "pmulhw " ASMSYM "_mmx_icos+8, %%mm0\n" /* 4*v12*ICOS4/4 -> mm0 (v13)  */
       "movq   %%mm2, %%mm6\n"              /* v51 -> mm6                   */
       "psubsw %%mm1, %%mm6\n"              /* v51 - v71 -> mm6 (va2)       */
       "psllw $0x03, %%mm2\n"               /* adjust v51 for multiply      */
-      "pmulhw 16(%1), %%mm2\n"             /* 8*v51*ICOS6/8 -> mm2 (v53)   */
-#ifdef signbit
-      "movq %%mm2, %%mm7\n"                /* mm2->mm7                     */
-      "psraw $0x0f, %%mm7\n"               /* sign(mm2) -> mm7             */
-      "psubsw %%mm7, %%mm2\n"              /* adjust multiply              */
-#endif
+      "paddw " ASMSYM "_mmx_1, %%mm2\n"   /* + 1 for rounding */
+      /* should add another one here but it seems to look better without */
+      //      "pmulhw 16(%1), %%mm2\n"             /* 8*v51*ICOS6/8 -> mm2 (v53)   */
+      "pmulhw " ASMSYM "_mmx_icos+16, %%mm2\n" /* 8*v51*ICOS6/8 -> mm2 (v53) */
       "psllw $0x02, %%mm1\n"               /* adjust v71 for multiply      */
-      "pmulhw 0(%1), %%mm1\n"              /* 4*v71*ICOS2/4 -> mm1 (v73)   */
-#ifdef signbit
-      "movq %%mm1, %%mm7\n"                /* mm1->mm7                     */
-      "psraw $0x0f, %%mm7\n"               /* sign(mm1) -> mm7             */
-      "psubsw %%mm7, %%mm1\n"              /* adjust multiply              */
-#endif
+      "paddw " ASMSYM "_mmx_1, %%mm1\n"   /* + 1 for rounding */
+      /* should add another one here but it seems to look better without */
+      //      "pmulhw 0(%1), %%mm1\n"              /* 4*v71*ICOS2/4 -> mm1 (v73)   */
+      "pmulhw " ASMSYM "_mmx_icos, %%mm1\n" /* 4*v71*ICOS2/4 -> mm1 (v73)   */
       "psllw $0x01, %%mm6\n"               /* adjust va2 for multiply      */
-      "pmulhw 24(%1), %%mm6\n"             /* 2*v12*ICOS8/2 -> mm6 (va3)   */
-#ifdef signbit
-      "movq %%mm6, %%mm7\n"                /* mm6->mm7                     */
-      "psraw $0x0f, %%mm7\n"               /* sign(mm6) -> mm7             */
-      "psubsw %%mm7, %%mm6\n"              /* adjust multiply              */
-#endif
+      "paddw " ASMSYM "_mmx_1, %%mm6\n"   /* + 1 for rounding */
+      //      "pmulhw 24(%1), %%mm6\n"             /* 2*v12*ICOS8/2 -> mm6 (va3)   */
+      "pmulhw " ASMSYM "_mmx_icos+24, %%mm6\n" /* 2*v12*ICOS8/2 -> mm6 (va3) */
       "psubsw %%mm6, %%mm2\n"              /* v53 - va3 -> mm2 (v54)       */
       "psubsw %%mm6, %%mm1\n"              /* v73 - va3 -> mm1 (v74)       */
       "psubsw %%mm3, %%mm1\n"              /* v74 - v32 -> mm3 (v75)       */
@@ -191,8 +167,8 @@
       "paddsw %%mm0, %%mm7\n"              /* v65 + v55 -> mm7             */
       "movq %%mm6, 0x30(%0)\n"             /* mm6 -> line 3                */
       "movq %%mm7, 0x40(%0)\n"             /* mm7 -> line 4                */
-      : "=r"(block), "=r"(mmx_icos)
-      : "0"(block), "1"(mmx_icos)
+      : "=r"(block)/*, "=r"(mmx_icos)*/
+      : "0"(block)/*, "1"(mmx_icos)*/
       : "memory");
 }
 

Index: mae_int.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/mae_int.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- mae_int.h	13 Mar 2002 01:14:34 -0000	1.1
+++ mae_int.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -33,7 +33,7 @@
       if(shape[j])
 	error += (unsigned int) abs(input[j] - ref[j]);
 
-    ref += pitch;
+    ref += pitch + 32; /* edge */
     input += pitch;
     shape += pitch;
   }
@@ -55,7 +55,7 @@
     for(j=0; j<8; j++)
       error += (unsigned int) abs(input[j] - ref[j]);
 
-    ref += pitch;
+    ref += pitch + 32; /* edge */
     input += pitch;
   }
         

Index: mae_mmx.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/mae_mmx.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- mae_mmx.h	13 Mar 2002 01:14:34 -0000	1.1
+++ mae_mmx.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -35,6 +35,7 @@
     "addl %2, %0\n"				\
     "addl %2, %1\n"				\
     "addl %2, %4\n"				\
+    "addl $32, %1\n"				\
     "psubusb %%mm1, %%mm0\n"			\
     "psubusb %%mm2, %%mm1\n"			\
     "por %%mm1, %%mm0\n"			\
@@ -80,24 +81,35 @@
 #define MAE_STEP_NOMASK()			\
     "movq (%0), %%mm0\n"			\
     "movq (%1), %%mm1\n"			\
-    "movq %%mm0, %%mm2\n"			\
+    "addl %2, %1\n"				\
     "addl %2, %0\n"				\
+    "addl $32, %1\n"				\
+    "movq (%0), %%mm2\n"			\
+    "movq %%mm0, %%mm4\n"			\
+    "movq (%1), %%mm3\n"			\
+    "movq %%mm2, %%mm5\n"			\
     "addl %2, %1\n"				\
+    "addl %2, %0\n"				\
+    "addl $32, %1\n"				\
     "psubusb %%mm1, %%mm0\n"			\
-    "psubusb %%mm2, %%mm1\n"			\
+    "psubusb %%mm3, %%mm2\n"			\
+    "psubusb %%mm4, %%mm1\n"			\
+    "psubusb %%mm5, %%mm3\n"			\
     "por %%mm1, %%mm0\n"			\
-    "movq %%mm0, %%mm2\n"			\
+    "por %%mm3, %%mm2\n"			\
+    "movq %%mm0, %%mm4\n"			\
+    "movq %%mm2, %%mm5\n"			\
     "punpcklbw %%mm7, %%mm0\n"			\
-    "punpckhbw %%mm7, %%mm2\n"			\
-    "paddw %%mm0, %%mm6\n"			\
-    "paddw %%mm2, %%mm6\n"
+    "punpcklbw %%mm7, %%mm2\n"			\
+    "punpckhbw %%mm7, %%mm4\n"			\
+    "punpckhbw %%mm7, %%mm5\n"			\
+    "paddw %%mm0, %%mm4\n"			\
+    "paddw %%mm2, %%mm5\n"                      \
+    "paddw %%mm4, %%mm6\n"			\
+    "paddw %%mm5, %%mm6\n"
 
   asm volatile ("pxor %%mm7, %%mm7\n"
 		"pxor %%mm6, %%mm6\n" 
-		MAE_STEP_NOMASK()
-		MAE_STEP_NOMASK()
-		MAE_STEP_NOMASK()
-		MAE_STEP_NOMASK()
 		MAE_STEP_NOMASK()
 		MAE_STEP_NOMASK()
 		MAE_STEP_NOMASK()

Index: mae_sse.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/mae_sse.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- mae_sse.h	13 Mar 2002 01:14:34 -0000	1.1
+++ mae_sse.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -1,59 +1,100 @@
-static unsigned int mmxext_absdiff(unsigned char *ref, unsigned char *input, int pitch)
+/*
+    libfame - Fast Assembly MPEG Encoder Library
+    Copyright (C) 2000-2001 Vivien Chappelier
+                            Damien Vincent
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+static unsigned int MAE8x8_withmask(unsigned char *ref,
+				    unsigned char *input,
+				    unsigned char *shape,
+				    int pitch)
 {
   int dummy;
   unsigned long retval;
 
-  asm volatile ("pxor %%mm6, %%mm6\n"
+#define MAE_STEP_MASK() 			\
+    "movq (%0), %%mm0\n"  			\
+    "movq (%1), %%mm1\n"			\
+    "movq (%4), %%mm5\n"			\
+    "pcmpgtb %%mm7, %%mm5\n"			\
+    "movq %%mm0, %%mm2\n"			\
+    "addl %2, %0\n"				\
+    "addl %2, %1\n"				\
+    "addl %2, %4\n"				\
+    "psubusb %%mm1, %%mm0\n"			\
+    "psubusb %%mm2, %%mm1\n"			\
+    "por %%mm1, %%mm0\n"			\
+    "pand %%mm5,%%mm0\n"			\
+    "movq %%mm0, %%mm2\n"			\
+    "punpcklbw %%mm7, %%mm0\n"			\
+    "punpckhbw %%mm7, %%mm2\n"			\
+    "paddw %%mm0, %%mm6\n"			\
+    "paddw %%mm2, %%mm6\n"			\
+    
+  asm volatile ("pxor %%mm7, %%mm7\n"
+		"pxor %%mm6, %%mm6\n" 
+		MAE_STEP_MASK()
+		MAE_STEP_MASK()
+		MAE_STEP_MASK()
+		MAE_STEP_MASK()
+		MAE_STEP_MASK()
+		MAE_STEP_MASK()
+		MAE_STEP_MASK()
+		MAE_STEP_MASK()
+		"movq %%mm6, %%mm7\n"
+		"psrlq $0x20, %%mm7\n"
+		"paddw %%mm7, %%mm6\n"
+		"movq %%mm6, %%mm7\n"
+		"psrlq $0x10, %%mm7\n"
+		"paddw %%mm7, %%mm6\n"
+		"movd %%mm6, %3\n"
+		: "=r"(dummy), "=r"(ref), "=r"(pitch), "=r"(retval), "=r"(shape)
+		: "0"(input), "1"(ref), "2"(pitch), "3"(0), "4"(shape)
+		: "memory");
+
+  return (retval&65535);
+}
 
-		"movq (%0), %%mm0\n"      // 1st line
-		"movq (%1), %%mm1\n"
-		"addl %2, %0\n"
-		"addl %2, %1\n"
-		"psadbw %%mm1, %%mm0\n"
-		"paddw %%mm0, %%mm6\n"
-		"movq (%0), %%mm0\n"      // 2nd line
-		"movq (%1), %%mm1\n"
-		"addl %2, %0\n"
-		"addl %2, %1\n"
-		"psadbw %%mm1, %%mm0\n"
-		"paddw %%mm0, %%mm6\n"
-		"movq (%0), %%mm0\n"      // 3rd line
-		"movq (%1), %%mm1\n"
-		"addl %2, %0\n"
-		"addl %2, %1\n"
-		"psadbw %%mm1, %%mm0\n"
-		"paddw %%mm0, %%mm6\n"
-		"movq (%0), %%mm0\n"      // 4th line
-		"movq (%1), %%mm1\n"
-		"addl %2, %0\n"
-		"addl %2, %1\n"
-		"psadbw %%mm1, %%mm0\n"
-		"paddw %%mm0, %%mm6\n"
-		"movq (%0), %%mm0\n"      // 5th line
-		"movq (%1), %%mm1\n"
-		"addl %2, %0\n"
-		"addl %2, %1\n"
-		"psadbw %%mm1, %%mm0\n"
-		"paddw %%mm0, %%mm6\n"
-		"movq (%0), %%mm0\n"      // 6th line
-		"movq (%1), %%mm1\n"
-		"addl %2, %0\n"
-		"addl %2, %1\n"
-		"psadbw %%mm1, %%mm0\n"
-		"paddw %%mm0, %%mm6\n"
-		"movq (%0), %%mm0\n"      // 7th line
-		"movq (%1), %%mm1\n"
-		"addl %2, %0\n"
-		"addl %2, %1\n"
-		"psadbw %%mm1, %%mm0\n"
-		"paddw %%mm0, %%mm6\n"
-		"movq (%0), %%mm0\n"      // 8th line
-		"movq (%1), %%mm1\n"
-		"addl %2, %0\n"
-		"addl %2, %1\n"
-		"psadbw %%mm1, %%mm0\n"
-		"paddw %%mm0, %%mm6\n"
 
+static unsigned int MAE8x8_withoutmask(unsigned char *ref,
+				       unsigned char *input,
+				       unsigned char *shape,
+				       int pitch)
+{
+  int dummy;
+  unsigned long retval;
+
+#define MAE_STEP_NOMASK()			\
+    "movq (%0), %%mm0\n"                        \
+    "movq (%1), %%mm1\n"			\
+    "addl %2, %0\n"				\
+    "addl %2, %1\n"				\
+    "psadbw %%mm1, %%mm0\n"			\
+    "paddw %%mm0, %%mm6\n"
+
+  asm volatile ("pxor %%mm6, %%mm6\n"
+		MAE_STEP_NOMASK()
+		MAE_STEP_NOMASK()
+		MAE_STEP_NOMASK()
+		MAE_STEP_NOMASK()
+		MAE_STEP_NOMASK()
+		MAE_STEP_NOMASK()
+		MAE_STEP_NOMASK()
+		MAE_STEP_NOMASK()
 		"movd %%mm6, %3\n"
 		: "=r"(dummy), "=r"(ref), "=r"(pitch), "=r"(retval)
 		: "0"(input), "1"(ref), "2"(pitch), "3"(retval)

Index: pad_int.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/pad_int.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- pad_int.h	13 Mar 2002 01:14:34 -0000	1.1
+++ pad_int.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -18,220 +18,422 @@
 */
 /**************************** motion estimation shape padding ****************/
 
-/*  pad                                                                      */
+static void repetitive_fill_Y(unsigned char *plane,
+			      unsigned char *shape,
+			      int rpitch,
+			      int spitch)
+{
+  int i, j, k;
+  int p, last;
+  int inside;
+  int row[16];
+  unsigned char *l;
+
+  /* TODO: MMX version */
+  /* TODO: int32 version */
+
+  /* horizontal padding */
+  last = 0;
+  for(j = 0; j < 16; j++) {
+    p = -1;
+    inside = 1;
+    row[j] = 1;
+    for(i = 0; i < 16; i++) {
+      if(inside && !shape[i]) { /* edge down */
+	if(i) p = plane[i-1];
+	inside = 0;
+	last = i;
+      } else if(!inside && shape[i]) { /* edge up */
+	inside = 1;
+	if(p < 0) p = plane[i];
+	else p = (p+plane[i]+1)>>1;
+	memset(plane+last, p, i-last); /* fill */
+      }
+    }
+
+    if(!inside) {
+      if(!last)	row[j] = 0; /* empty row */
+      else memset(plane+last, p, 16-last); /* fill */
+    }
+    plane += rpitch;
+    shape += spitch;
+  }
+
+  plane -= rpitch << 4; /* restore plane pointer */
+
+  /* vertical padding */
+  last = 0;
+  l = NULL;
+  inside = 1;
+  for(i = 0; i < 16; i++) {
+    if(inside && !row[i]) { /* edge down */
+      if(i) l = plane+(i-1)*rpitch;
+      inside = 0;
+      last = i;
+    } else if(!inside && row[i]) { /* edge up */
+      inside = 1;
+      if(l == NULL) l = plane+i*rpitch;
+      else {
+	for(k = 0; k < 16; k++) /* average */
+	  plane[last*rpitch+k] = (unsigned char)
+	    (((unsigned short)l[k]+
+	      (unsigned short)plane[i*rpitch+k]+1)>>1);
+	l = plane+last*rpitch;
+	last ++; /* already filled */
+      }
+      for(k = last; k < i; k++)
+	memcpy(plane+k*rpitch, l, 16); /* fill */
+    }
+  }
+
+  if(!inside) {
+    if(last)
+      for(k = last; k < 16; k++)
+	memcpy(plane+k*rpitch, l, 16); /* fill */
+  }
+}
+
+static void repetitive_fill_C(unsigned char *plane,
+			      unsigned char *shape,
+			      int rpitch,
+			      int spitch)
+{
+  int i, j, k;
+  int p, last;
+  int inside;
+  int row[8];
+  unsigned char *l;
+
+  /* TODO: MMX version */
+  /* TODO: int32 version */
+
+  rpitch >>= 1;
+
+  /* horizontal padding */
+  last = 0;
+  for(j = 0; j < 8; j++) {
+    p = -1;
+    inside = 1;
+    row[j] = 1;
+    for(i = 0; i < 8; i++) {
+      if(inside && !(shape[(i<<1)] | shape[(i<<1)+1] |
+		     shape[(i<<1)+spitch] | shape[(i<<1)+spitch+1])) {
+	/* edge down */
+	if(i) p = plane[i-1];
+	inside = 0;
+	last = i;
+      } else if(!inside && (shape[(i<<1)] | shape[(i<<1)+1] |
+			    shape[(i<<1)+spitch] | shape[(i<<1)+spitch+1])) {
+	/* edge up */
+	inside = 1;
+	if(p < 0) p = plane[i];
+	else p = (p+plane[i]+1)>>1;
+	memset(plane+last, p, i-last); /* fill */
+      }
+    }
+
+    if(!inside) {
+      if(!last)	row[j] = 0; /* empty row */
+      else memset(plane+last, p, 8-last); /* fill */
+    }
+    plane += rpitch;
+    shape += spitch << 1;
+  }
+
+  plane -= rpitch << 3; /* restore plane pointer */
+
+  /* vertical padding */
+  last = 0;
+  l = NULL;
+  inside = 1;
+  for(i = 0; i < 8; i++) {
+    if(inside && !row[i]) { /* edge down */
+      if(i) l = plane+(i-1)*rpitch;
+      inside = 0;
+      last = i;
+    } else if(!inside && row[i]) { /* edge up */
+      inside = 1;
+      if(l == NULL) l = plane+i*rpitch;
+      else {
+	for(k = 0; k < 8; k++) /* mean */
+	  plane[last*rpitch+k] = (unsigned char)
+	    (((unsigned short)l[k]+
+	      (unsigned short)plane[i*rpitch+k]+1)>>1);
+	l = plane+last*rpitch;
+	last ++; /* already filled */
+      }
+      for(k = last; k < i; k++)
+	memcpy(plane+k*rpitch, l, 8); /* fill */
+    }
+  }
+
+  if(!inside) {
+    if(last)
+      for(k = last; k < 8; k++)
+	memcpy(plane+k*rpitch, l, 8); /* fill */
+  }
+}
+
+/*  extended_pad_withmask                                                    */
 /*                                                                           */
 /*  Description:                                                             */
-/*    Perform repetitive padding of arbitrary shape for motion estimation.   */
+/*    Perform extended padding of arbitrary shape for motion estimation.     */
 /*                                                                           */
 /*  Arguments:                                                               */
+/*    int i: reference number                                                */
 /*    int width: width of the frame                                          */
 /*    int height: height of the frame                                        */
 /*    fame_yuv_t frame: the frame to pad                                     */
 /*    unsigned char *shape: shape of the frame                               */
 /*    unsigned char *padded: temporary shape buffer                          */
-/*    fame_encoder_t *encoder: the encoder                                   */
+/*    unsigned char *bab_map: binary alpha block type map                    */
 /*    fame_box_t box: bounding box                                           */
 /*                                                                           */
 /*  Return value:                                                            */
 /*    None.                                                                  */
 
-static void inline pad(int width,
-		       int height,
-		       fame_yuv_t *frame,
-		       unsigned char *shape,
-		       unsigned char *padded,
-		       fame_box_t *box)
+static void inline extended_pad_withmask(int i,
+					 int width,
+					 int height,
+					 fame_yuv_t **frame,
+					 unsigned char *shape,
+					 unsigned char *bab_map,
+					 fame_box_t *box)
 {
-  int x, y, r1, r2, e;
-  unsigned char *s, *sp;
-  unsigned char *Y, *U, *V;
-  unsigned char p1, p2;
-  unsigned char ps;
-  unsigned char p1u, p2u;
-  unsigned char p1v, p2v;
-  int bx, by, bw, bh;
+  int x, y, k;
   int pitch;
+  unsigned char *Y, *U, *V;
+  unsigned char *Yh, *Uh, *Vh;
+  unsigned char *Yv, *Uv, *Vv;
+  unsigned char *d, *s, *b;
 
+  width += 15;  /* roundup */
+  height += 15; /* roundup */
+  width >>= 4;  /* convert to macroblock unit */
+  height >>= 4; /* convert to macroblock unit */
+  width += 2;   /* add 1 for border on both sides */
+  height += 2;  /* add 1 for border on both sides */
   pitch = width;
-
-  memcpy(padded, shape, width*height);
-
-  bx = box->x;
-  by = box->y;
-  bw = box->w;
-  bh = box->h;
-
-  /* horizontal padding Y */
-  s = shape + by*pitch;
-  sp = padded + by*pitch;
-  Y = frame->y + by*pitch;
-  for(y = by; y < bh; y++)
-  {
-    x = bx;
-    while(x < bw)
-    {
-      p1 = p2 = 128;
-      ps = 0;
-      e = fame_min(x+32, bw);
-      r2 = x;
-      for(r1 = x; r1 < e; r1++)
-	if(s[r1]) {
-	  p1 = Y[r1];
-	  p2 = p1;
-	  ps = 255;
-	  for(r2 = r1 + 1; r2 < bw; r2++)
-	    if(s[r2]) {
-	      p2 = (unsigned char) (((short) p1 + (short) Y[r2]) >> 1);
-	      break;
-	    }
-	  if(p1 == p2)
-	    r2 = fame_min((r1 & (~15))+32, r2);
-	  break;
+ 
+  if(i) {
+    Y = frame[i]->y;
+    U = frame[i]->u;
+    V = frame[i]->v;
+    Yh = frame[i&2]->y;
+    Uh = frame[i&2]->u;
+    Vh = frame[i&2]->v;
+    Yv = frame[i&1]->y;
+    Uv = frame[i&1]->u;
+    Vv = frame[i&1]->v;
+    b = bab_map + 1 + pitch; /* first block inside the bounding box */
+    for(y = 1; y < height-1; y ++) {
+      for(x = 1; x < width-1; x ++) {
+	if((i&1) && b[0] >= bab_border_16x16 && b[1] == bab_not_coded) {
+	  /* fix rightmost half sample (= int sample) :  */
+	  /* ... x X x X x 0                             */
+	  /*             ^--- = X+0/2 = X/2, should be X */
+	  d = Y + 15; s = Yh + 15;
+	  for(k = 0; k < 16; k++, d += (pitch<<4), s += (pitch<<4)) *d = *s;
+	  d = U + 7; s = Uh + 7;
+	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3)) *d = *s;
+	  d = V + 7; s = Vh + 7;
+	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3)) *d = *s;
 	}
-      for(; x < r1; x++) {
-	Y[x] = p1;
-	/* compatibility with extended padding */
-	if((x & (~15)) == (r1 & (~15)) &&
-	   (sp < padded+16*pitch || sp[-16*pitch] == 0))
-	  sp[x] = ps;
-      }
-      for(; x < r2; x++) {
-	Y[x] = p2;
-	sp[x] = ps;
-      }
-    }
-    s += pitch;
-    sp += pitch;
-    Y += pitch;
-  }
-
-  /* vertical padding Y */
-  s = padded + bx;
-  Y = frame->y + bx;
-  for(x = bx; x < bw; x++)
-  {
-    y = by;
-    while(y < bh)
-    {
-      p1 = p2 = 128;
-      e = fame_min(y+32, bh);
-      r2 = y;
-      for(r1 = y; r1 < e; r1++)
-	if(s[r1*pitch]) {
-	  p1 = Y[r1*pitch];
-	  p2 = p1;
-	  for(r2 = r1 + 1; r2 < bh; r2++)
-	    if(s[r2*pitch]) {
-	      p2 = (unsigned char) (((short) p1 + (short) Y[r2*pitch]) >> 1);
-	      break;
-	    }
-	  if(p1 == p2)
-	    r2 = fame_min((r1 & (~15))+32, r2);
-	  break;
+	if((i&2) && b[0] >= bab_border_16x16 && b[pitch] == bab_not_coded) {
+	  /* fix bottommost half sample (= int sample) : */
+	  /*          ...                                */
+	  /*           X                                 */
+	  /*           x<-- = X+0/2 = X/2, should be X   */
+	  /*           0                                 */
+	  d = Y + (pitch<<8) - (pitch<<4); s = Yv + (pitch<<8) - (pitch<<4);
+	  memcpy(d, s, 16);
+	  d = U + (pitch<<6) - (pitch<<3); s = Uv + (pitch<<6) - (pitch<<3);
+	  memcpy(d, s, 8);
+	  d = V + (pitch<<6) - (pitch<<3); s = Vv + (pitch<<6) - (pitch<<3);
+	  memcpy(d, s, 8);
 	}
-      for(; y < r1; y++)
-	Y[y*pitch] = p1;
-      for(; y < r2; y++)
-	Y[y*pitch] = p2;
+	b++;
+	Y += 16;
+	U += 8;
+	V += 8;
+	Yh += 16;
+	Uh += 8;
+	Vh += 8;
+	Yv += 16;
+	Uv += 8;
+	Vv += 8;
+      }
+      b += 2; /* borders */
+      Y += (pitch << 8) - ((width-2) << 4);
+      U += (pitch << 6) - ((width-2) << 3);
+      V += (pitch << 6) - ((width-2) << 3);
+      Yh += (pitch << 8) - ((width-2) << 4);
+      Uh += (pitch << 6) - ((width-2) << 3);
+      Vh += (pitch << 6) - ((width-2) << 3);
+      Yv += (pitch << 8) - ((width-2) << 4);
+      Uv += (pitch << 6) - ((width-2) << 3);
+      Vv += (pitch << 6) - ((width-2) << 3);
     }
-    s ++;
-    Y ++;
   }
-  
 
+  Y = frame[i]->y - (pitch << 8) - 16;
+  U = frame[i]->u - (pitch << 6) - 8;
+  V = frame[i]->v - (pitch << 6) - 8;
+  Yh = frame[i&2]->y - (pitch << 8) - 16;
+  Uh = frame[i&2]->u - (pitch << 6) - 8;
+  Vh = frame[i&2]->v - (pitch << 6) - 8;
+  Yv = frame[i&1]->y - (pitch << 8) - 16;
+  Uv = frame[i&1]->u - (pitch << 6) - 8;
+  Vv = frame[i&1]->v - (pitch << 6) - 8;
 
-  bx >>= 1;
-  by >>= 1;
-  bw >>= 1;
-  bh >>= 1;
-
-  /* horizontal padding Cr, Cb */
-  s = shape + (by<<1)*pitch;
-  U = frame->u + by*(pitch>>1);
-  V = frame->v + by*(pitch>>1);
-  for(y = by; y < bh; y++)
-  {
-    x = bx;
-    while(x < bw)
-    {
-      p1u = p2u = 128;
-      p1v = p2v = 128;
-      e = fame_min(x+16, bw);
-      r2 = x;
-      for(r1 = x; r1 < e; r1++)
-	if(s[(r1 << 1)] | s[(r1 << 1) + 1] |
-	   s[pitch + (r1 << 1)] | s[pitch + (r1 << 1) + 1]) {
-	  p1u = U[r1];
-	  p1v = V[r1];
-	  p2u = p1u;
-	  p2v = p1v;
-	  for(r2 = r1 + 1; r2 < bw; r2++)
-	    if(s[(r2 << 1)] | s[(r2 << 1) + 1] |
-	       s[pitch + (r2 << 1)] | s[pitch + (r2 << 1) + 1]) {
-	      p2u = (unsigned char) (((short) p1u + (short) U[r2]) >> 1);
-	      p2v = (unsigned char) (((short) p1v + (short) V[r2]) >> 1);
-	      break;
-	    }
-	  if(p1u == p2u && p1v == p2v)
-	    r2 = fame_min((r1 & (~7))+16, r2);
-	  break;
+  for(y = 0; y < height; y ++) {
+    for(x = 0; x < width; x ++) {
+      if(*bab_map == bab_not_coded) {
+	if(x > 0 && bab_map[-1] >= bab_all_coded) {
+	  /* pad from left */
+	  d = Y; s = Yh-1;
+	  for(k = 0; k < 16; k++, d += (pitch<<4), s += (pitch<<4))
+	    memset(d, *s, 16);
+	  d = U; s = Uh-1;
+	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3))
+	    memset(d, *s, 8);
+	  d = V; s = Vh-1;
+	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3))
+	    memset(d, *s, 8);
 	}
-      for(; x < r1; x++) {
-	U[x] = p1u;
-	V[x] = p1v;
-      }
-      for(; x < r2; x++) {
-	U[x] = p2u;
-	V[x] = p2v;
-      }
-    }
-    s += pitch << 1;
-    U += pitch >> 1;
-    V += pitch >> 1;
-  }
-
-  /* vertical padding Cr, Cb */
-  s = padded + (bx<<1);
-  U = frame->u + bx;
-  V = frame->v + bx;
-  for(x = bx; x < bw; x++)
-  {
-    y = by;
-    while(y < bh)
-    {
-      p1u = p2u = 128;
-      p1v = p2v = 128;
-      e = fame_min(y+16, bh);
-      r2 = y;
-      for(r1 = y; r1 < e; r1++)
-	if(s[(r1 << 1)*pitch] | s[(r1 << 1)*pitch + pitch] |
-	   s[(r1 << 1)*pitch + 1] | s[(r1 << 1)*pitch + pitch + 1]) {
-	  p1u = U[r1*(pitch >> 1)];
-	  p1v = V[r1*(pitch >> 1)];
-	  p2u = p1u;
-	  p2v = p1v;
-	  for(r2 = r1 + 1; r2 < bh; r2++)
-	    if(s[(r2 << 1)*pitch] | s[(r2 << 1)*pitch + pitch] |
-	       s[(r2 << 1)*pitch + 1] | s[(r2 << 1)*pitch + pitch + 1]) {
-	      p2u = (unsigned char) (((short)p1u+(short)U[r2*(pitch>>1)])>>1);
-	      p2v = (unsigned char) (((short)p1v+(short)V[r2*(pitch>>1)])>>1);
-	      break;
-	    }
-	  if(p1u == p2u && p1v == p2v)
-	    r2 = fame_min((r1 & (~7))+16, r2);
-	  break;
+	else if(y > 0 && bab_map[-pitch] >= bab_all_coded) {
+	  /* pad from above */
+	  d = Y; s = Yv - (pitch << 4); /* Y */
+	  for(k = 0; k < 16; k++, d += (pitch<<4)) memcpy(d, s, 16);
+	  d = U; s = Uv - (pitch << 3); /* U */
+	  for(k = 0; k < 8; k++, d += (pitch<<3)) memcpy(d, s, 8);
+	  d = V; s = Vv - (pitch << 3); /* V */
+	  for(k = 0; k < 8; k++, d += (pitch<<3)) memcpy(d, s, 8);
+	} 
+	else if(x < width-1 && bab_map[1] >= bab_all_coded) {
+	  /* pad from right */
+	  d = Y; s = Yh+16;
+	  for(k = 0; k < 16; k++, d += (pitch<<4), s += (pitch<<4))
+	    memset(d, *s, 16);
+	  d = U; s = Uh+8;
+	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3))
+	    memset(d, *s, 8);
+	  d = V; s = Vh+8;
+	  for(k = 0; k < 8; k++, d += (pitch<<3), s += (pitch<<3))
+	    memset(d, *s, 8);
+	}
+	else if(y < height-1 && bab_map[pitch] >= bab_all_coded) {
+	  /* pad from below */
+	  d = Y; s = Yv + (pitch << 8); /* Y */
+	  for(k = 0; k < 16; k++, d += (pitch<<4)) memcpy(d, s, 16);
+	  d = U; s = Uv + (pitch << 6); /* U */
+	  for(k = 0; k < 8; k++, d += (pitch<<3)) memcpy(d, s, 8);
+	  d = V; s = Vv + (pitch << 6); /* V */
+	  for(k = 0; k < 8; k++, d += (pitch<<3)) memcpy(d, s, 8);
+	} 
+	else
+	{
+	  /* pad with grey */
+	  d = Y; for(k = 0; k < 16; k++, d += (pitch<<4)) memset(d, 128, 16);
+	  d = U; for(k = 0; k < 8; k++, d += (pitch<<3)) memset(d, 128, 8);
+	  d = V; for(k = 0; k < 8; k++, d += (pitch<<3)) memset(d, 128, 8);
 	}
-      for(; y < r1; y++) {
-	U[y*(pitch >> 1)] = p1u;
-	V[y*(pitch >> 1)] = p1v;
-      }
-      for(; y < r2; y++) {
-	U[y*(pitch >> 1)] = p2u;
-	V[y*(pitch >> 1)] = p2v;
       }
+      bab_map++;
+      Y += 16;
+      U += 8;
+      V += 8;
+      Yh += 16;
+      Uh += 8;
+      Vh += 8;
+      Yv += 16;
+      Uv += 8;
+      Vv += 8;
     }
-    s += 2;
-    U ++;
-    V ++;
+    Y += (pitch << 8) - (width << 4);
+    U += (pitch << 6) - (width << 3);
+    V += (pitch << 6) - (width << 3);
+    Yh += (pitch << 8) - (width << 4);
+    Uh += (pitch << 6) - (width << 3);
+    Vh += (pitch << 6) - (width << 3);
+    Yv += (pitch << 8) - (width << 4);
+    Uv += (pitch << 6) - (width << 3);
+    Vv += (pitch << 6) - (width << 3);
   }
+}
+
+
+/*  extended_pad_withoutmask                                                 */
+/*                                                                           */
+/*  Description:                                                             */
+/*    Perform extended padding of rectangular video for motion estimation.   */
+/*                                                                           */
+/*  Arguments:                                                               */
+/*    int i: reference number                                                */
+/*    int width: width of the frame                                          */
+/*    int height: height of the frame                                        */
+/*    fame_yuv_t frame: the frame to pad                                     */
+/*    unsigned char *shape: shape of the frame                               */
+/*    unsigned char *padded: temporary shape buffer                          */
+/*    unsigned char *bab_map: binary alpha block type map                    */
+/*    fame_box_t box: bounding box                                           */
+/*                                                                           */
+/*  Return value:                                                            */
+/*    None.                                                                  */
+
+static void inline extended_pad_withoutmask(int i,
+				   int width,
+				   int height,
+				   fame_yuv_t **frame,
+				   unsigned char *shape,   /* not used */
+				   unsigned char *bab_map, /* not used */
+				   fame_box_t *box)
+{
+  int y, w, h, wr, hr, p, e;
+  unsigned char *s, *d;
+
+#define extended_pad_component(comp)		\
+{						\
+  /* pad horizontally for Y */			\
+  d = frame[i]->comp;				\
+  s = frame[i&2]->comp;				\
+  for(y = 0; y < h; y++) {			\
+    memset(d-e, s[0], e);			\
+    memset(d+w, s[w-1], wr - w + e);		\
+    d += p;					\
+    s += p;					\
+  }						\
+  /* pad vertically for Y */			\
+  s = frame[i&1]->comp;				\
+  d = frame[i]->comp - p;			\
+  for(y = 0;y < e; y++) {			\
+    memcpy(d - e, s - e, wr + 2*e);		\
+    d -= p;					\
+  }						\
+  s = frame[i&1]->comp + (h-1)*p;		\
+  d = frame[i]->comp + h*p;			\
+  for(y = h; y < hr+e; y++) {			\
+    memcpy(d - e, s - e, wr + 2*e);		\
+    d += p;					\
+  }						\
+}
+
+  e = 16;
+  w = frame[i]->w;
+  h = frame[i]->h;
+  wr = (w+15)&(~15); /* round to the next 16-pixel boundary */
+  hr = (h+15)&(~15); /* round to the next 16-pixel boundary */
+  p = frame[i]->p;
+  
+  extended_pad_component(y);
+
+  p >>= 1;
+  h >>= 1;
+  w >>= 1;
+  hr >>= 1;
+  wr >>= 1;
+  e >>= 1;
+
+  extended_pad_component(u);
+  extended_pad_component(v);  
 }

Index: reconstruct_float.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/reconstruct_float.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- reconstruct_float.h	13 Mar 2002 01:14:34 -0000	1.1
+++ reconstruct_float.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -18,10 +18,10 @@
 */
 /*************************** reconstruct reference frame *********************/
 
-/*  reconstruct_Y                                                            */
+/*  reconstruct                                                            */
 /*                                                                           */
 /*  Description:                                                             */
-/*    Reconstruct a Y block in reference plane.                              */
+/*    Reconstruct a block in reference plane.                              */
 /*                                                                           */
 /*  Arguments:                                                               */
 /*    unsigned char *plane: the reference plane                              */
@@ -34,9 +34,9 @@
 /*  Return value:                                                            */
 /*    None.                                                                  */
 
-static void inline reconstruct_Y(unsigned char *plane,
-			         dct_t *block,
-				 int pitch)
+static void inline reconstruct(unsigned char *plane,
+			       dct_t *block,
+			       int pitch)
 {
   int i, j;
   int v;
@@ -45,8 +45,8 @@
   {
     for(j = 0; j < 8; j++)
     {
-      v = (int) block[(i<<3)+j];
-      if(v < 0) v = 0;
+      v = 0;
+      if(block[(i<<3)+j] > 0) v = (int) (block[(i<<3)+j] + 0.5);
       if(v > 255) v = 255;
       plane[j] = (unsigned char) v;
     }
@@ -54,40 +54,6 @@
   }
 }
 
-/*  reconstruct_C                                                            */
-/*                                                                           */
-/*  Description:                                                             */
-/*    Reconstruct a C block in reference plane.                              */
-/*                                                                           */
-/*  Arguments:                                                               */
-/*    unsigned char *plane: the reference plane                              */
-/*    short width: width of the plane                                        */
-/*    short height: height of the plane                                      */
-/*    dct_t *block: the block to reconstruct                                 */
-/*    short x: x position of the block in pixel units.                       */
-/*    short y: y position of the block in pixel units.                       */
-/*                                                                           */
-/*  Return value:                                                            */
-/*    None.                                                                  */
-
-
-static void inline reconstruct_C(unsigned char *plane,
-				 dct_t *block,
-				 int pitch)
-{
-  int i, j;
-  int v;
-
-  for(i = 0; i < 8; i++) {
-    for(j = 0; j < 8; j++) {
-      v = (int) block[(i<<3)+j] + 128.0;
-      if(v < 0) v = 0;
-      if(v > 255) v = 255;
-      plane[j] = (unsigned char) v;
-    }
-    plane += pitch;
-  }
-}
 
 /*  sum                                                                      */
 /*                                                                           */

Index: reconstruct_mmx.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/reconstruct_mmx.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- reconstruct_mmx.h	13 Mar 2002 01:14:34 -0000	1.1
+++ reconstruct_mmx.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -18,12 +18,9 @@
 */
 /*************************** reconstruct reference frame *********************/
 
-short const _mmx_128[] = { 128, 128, 128, 128 };
-char const _mmx_128c[] = { 128, 128, 128, 128, 128, 128, 128, 128 };
-
-static void inline reconstruct_Y(unsigned char *plane,
-				 dct_t *block,
-				 int pitch)
+static void inline reconstruct(unsigned char *plane,
+			       dct_t *block,
+			       int pitch)
 {
   int dummy;
 
@@ -87,81 +84,6 @@
 		: "memory");
 }
 
-static void inline reconstruct_C(unsigned char *plane,
-				 dct_t *block,
-				 int pitch)
-{
-  int dummy;
-  register unsigned char const *mmx_128c = _mmx_128c;
-
-  asm volatile ("movq (%3), %%mm4\n"
-		"movq 0x00(%1), %%mm0\n"
-		"movq 0x08(%1), %%mm1\n"
-		"movq 0x10(%1), %%mm2\n"
-		"movq 0x18(%1), %%mm3\n"
-		"psraw $0x04, %%mm0\n"
-		"psraw $0x04, %%mm1\n"
-		"psraw $0x04, %%mm2\n"
-		"psraw $0x04, %%mm3\n"
-		"packsswb %%mm1, %%mm0\n"
-		"packsswb %%mm3, %%mm2\n"
-		"paddb %%mm4, %%mm0\n"
-		"paddb %%mm4, %%mm2\n"
-		"movq %%mm0, (%0)\n"
-		"movq %%mm2, (%0, %2)\n"
-		"addl %2, %0\n"
-		"addl %2, %0\n"
-		"movq 0x20(%1), %%mm0\n"
-		"movq 0x28(%1), %%mm1\n"
-		"movq 0x30(%1), %%mm2\n"
-		"movq 0x38(%1), %%mm3\n"
-		"psraw $0x04, %%mm0\n"
-		"psraw $0x04, %%mm1\n"
-		"psraw $0x04, %%mm2\n"
-		"psraw $0x04, %%mm3\n"
-		"packsswb %%mm1, %%mm0\n"
-		"packsswb %%mm3, %%mm2\n"
-		"paddb %%mm4, %%mm0\n"
-		"paddb %%mm4, %%mm2\n"
-		"movq %%mm0, (%0)\n"
-		"movq %%mm2, (%0, %2)\n"
-		"addl %2, %0\n"
-		"addl %2, %0\n"
-		"movq 0x40(%1), %%mm0\n"
-		"movq 0x48(%1), %%mm1\n"
-		"movq 0x50(%1), %%mm2\n"
-		"movq 0x58(%1), %%mm3\n"
-		"psraw $0x04, %%mm0\n"
-		"psraw $0x04, %%mm1\n"
-		"psraw $0x04, %%mm2\n"
-		"psraw $0x04, %%mm3\n"
-		"packsswb %%mm1, %%mm0\n"
-		"packsswb %%mm3, %%mm2\n"
-		"paddb %%mm4, %%mm0\n"
-		"paddb %%mm4, %%mm2\n"
-		"movq %%mm0, (%0)\n"
-		"movq %%mm2, (%0, %2)\n"
-		"addl %2, %0\n"
-		"addl %2, %0\n"
-		"movq 0x60(%1), %%mm0\n"
-		"movq 0x68(%1), %%mm1\n"
-		"movq 0x70(%1), %%mm2\n"
-		"movq 0x78(%1), %%mm3\n"
-		"psraw $0x04, %%mm0\n"
-		"psraw $0x04, %%mm1\n"
-		"psraw $0x04, %%mm2\n"
-		"psraw $0x04, %%mm3\n"
-		"packsswb %%mm1, %%mm0\n"
-		"packsswb %%mm3, %%mm2\n"
-		"paddb %%mm4, %%mm0\n"
-		"paddb %%mm4, %%mm2\n"
-		"movq %%mm0, (%0)\n"
-		"movq %%mm2, (%0, %2)\n"
-		: "=r"(dummy), "=r"(block), "=r"(pitch), "=r"(mmx_128c)
-		: "0"(plane), "1"(block), "2"(pitch), "3"(mmx_128c)
-		: "memory");
-}
-
 static void inline sum(unsigned char *plane,
 		       unsigned char *ref,
 		       unsigned int *sum,
@@ -191,8 +113,8 @@
     "psrlw $0x0c, %%mm4\n"          /* keep only 'fixed point' part */	\
     "paddsw %%mm3, %%mm0\n"         /* add fixed point to number */	\
     "paddsw %%mm4, %%mm1\n"         /* add fixed point to number */	\
-    "psraw $0x04, %%mm0\n"          /* mm0 /= 64 (rounded) */		\
-    "psraw $0x04, %%mm1\n"          /* mm1 /= 64 (rounded) */		\
+    "psraw $0x04, %%mm0\n"          /* mm0 /= 16 (rounded) */		\
+    "psraw $0x04, %%mm1\n"          /* mm1 /= 16 (rounded) */		\
     "por %%mm0, %%mm6\n"            /* accumulate sum */		\
     "por %%mm1, %%mm6\n"            /* accumulate sum */		\
     "paddw %%mm0, %%mm2\n"          /* add to ref */			\
@@ -229,7 +151,7 @@
 			unsigned char *ref,
 			int pitch)
 {
-  int dummy;
+  int dummy1, dummy2;
 
 #define MOVE_STEP() 					\
   "movq (%2), %%mm0\n"      /* mm0 = [ref] */		\
@@ -245,7 +167,7 @@
 		MOVE_STEP()
 		MOVE_STEP()
 		MOVE_STEP()
-		: "=r"(dummy), "=r"(pitch), "=r"(ref)
+		: "=r"(dummy1), "=r"(pitch), "=r"(dummy2)
 		: "0"(plane), "1"(pitch), "2"(ref)
 		: "memory");
 }

Index: table_cae.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_cae.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_cae.h	13 Mar 2002 01:14:34 -0000	1.1
+++ table_cae.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -1,4 +1,6 @@
-static unsigned short const cae_intra_prob[1024] = {
+#include "fame.h"
+
+FAME_ALIGNED static unsigned short const cae_intra_prob[1024] = {
   0xfef3, 0x4054, 0xfdeb, 0x45f8, 0xfc3d, 0x216c, 0xfafc, 0x1615, 
   0x9cee, 0x0f5c, 0x745d, 0x0115, 0xb060, 0x0474, 0x8000, 0x07fb, 
   0x1193, 0x0050, 0x199a, 0x0478, 0x5249, 0x01d1, 0x8000, 0x031f, 
@@ -129,7 +131,7 @@
   0xef17, 0x1ba5, 0xc47b, 0x05ea, 0xea8f, 0x0a4a, 0xb598, 0x00eb
 };
 
-static unsigned short const cae_inter_prob[512] = {
+FAME_ALIGNED static unsigned short const cae_inter_prob[512] = {
   0xfffc, 0xf5fa, 0xfe7c, 0xd555, 0xf406, 0x2000, 0xf471, 0x22e9, 
   0xffc8, 0xfb4f, 0xfeab, 0xd000, 0xff2a, 0xf436, 0xf5ab, 0x95a8, 
   0xfe33, 0xd09d, 0xf9ec, 0xe666, 0x8000, 0x8000, 0xd555, 0x8000, 

Index: table_cbp_mpeg1.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_cbp_mpeg1.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_cbp_mpeg1.h	13 Mar 2002 01:14:34 -0000	1.1
+++ table_cbp_mpeg1.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -19,8 +19,7 @@
 
 #include "fame.h"
 
-
-static const fame_vlc_t mb_addr_inc[34] = {
+FAME_ALIGNED static const fame_vlc_t mb_addr_inc[34] = {
   { 0x1, 1},
   { 0x3, 3},
   { 0x2, 3},
@@ -57,7 +56,7 @@
   { 0x08, 11}
 };
 
-static const fame_vlc_t mb_pattern_table[64] = {
+FAME_ALIGNED static const fame_vlc_t mb_pattern_table[64] = {
   { 0x01,  9 },
   { 0x0b,  5 },
   { 0x09,  5 },

Index: table_cbp_mpeg4.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_cbp_mpeg4.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_cbp_mpeg4.h	13 Mar 2002 01:14:34 -0000	1.1
+++ table_cbp_mpeg4.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -19,35 +19,56 @@
 
 #include "fame.h"
 
-static const fame_vlc_t mcbpc_I[4] = {
+FAME_ALIGNED static const fame_vlc_t mcbpc_I[4] = {
   { 1, 1 },
   { 1, 3 },
   { 2, 3 },
-  { 3, 3 }
+  { 3, 3 },
 };
 
-static const fame_vlc_t mcbpc_P_intra[4] = {
+FAME_ALIGNED static const fame_vlc_t mcbpc_I_dq[4] = {
+  { 1, 4 },
+  { 1, 6 },
+  { 2, 6 },
+  { 3, 6 }
+};
+
+FAME_ALIGNED static const fame_vlc_t mcbpc_P_intra[4] = {
   { 3, 5 },
   { 4, 8 },
   { 3, 8 },
   { 3, 7 }
 };
 
-static const fame_vlc_t mcbpc_P_inter[4] = {
+FAME_ALIGNED static const fame_vlc_t mcbpc_P_intra_dq[4] = {
+  { 4, 6 },
+  { 4, 9 },
+  { 3, 9 },
+  { 2, 9 }
+};
+
+FAME_ALIGNED static const fame_vlc_t mcbpc_P_inter[4] = {
   { 1, 1 },
   { 3, 4 },
   { 2, 4 },
   { 5, 6 }
 };
 
-static const fame_vlc_t mcbpc_P_inter4v[4] = {
+FAME_ALIGNED static const fame_vlc_t mcbpc_P_inter_dq[4] = {
+  { 0, 12 },
+  { 1, 12 },
+  { 2, 12 },
+  { 3, 12 }
+};
+
+FAME_ALIGNED static const fame_vlc_t mcbpc_P_inter4v[4] = {
   { 2, 3 },
   { 5, 7 },
   { 4, 7 },
   { 5, 8 }
 };
 
-static const fame_vlc_t cbpy_four[16] = {
+FAME_ALIGNED static const fame_vlc_t cbpy_four[16] = {
   { 0x03, 4 },
   { 0x05, 5 },
   { 0x04, 5 },
@@ -66,7 +87,7 @@
   { 0x03, 2 }
 };
 
-static const fame_vlc_t cbpy_three[8] = {
+FAME_ALIGNED static const fame_vlc_t cbpy_three[8] = {
   { 0x03, 3 },
   { 0x01, 6 },
   { 0x01, 5 },
@@ -77,19 +98,19 @@
   { 0x01, 1 }
 };
 
-static const fame_vlc_t cbpy_two[4] = {
+FAME_ALIGNED static const fame_vlc_t cbpy_two[4] = {
   { 0x01, 4 },
   { 0x01, 3 },
   { 0x01, 2 },
   { 0x01, 1 }
 };
 
-static const fame_vlc_t cbpy_one[2] = {
+FAME_ALIGNED static const fame_vlc_t cbpy_one[2] = {
   { 0x01, 2 },
   { 0x01, 1 },
 };
 
-static const fame_vlc_t *cbpy[4] = {
+FAME_ALIGNED static const fame_vlc_t *cbpy[4] = {
   cbpy_one,
   cbpy_two,
   cbpy_three,

Index: table_clip_mpeg1.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_clip_mpeg1.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_clip_mpeg1.h	13 Mar 2002 01:14:34 -0000	1.1
+++ table_clip_mpeg1.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -17,7 +17,7 @@
     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 
-static short mpeg1_table_clip[4096] = {
+FAME_ALIGNED static short mpeg1_table_clip_data[4096] = {
   -255, -255, -255, -255, -255, -255, -255, -255, 
   -255, -255, -255, -255, -255, -255, -255, -255, 
   -255, -255, -255, -255, -255, -255, -255, -255, 
@@ -531,3 +531,5 @@
   255, 255, 255, 255, 255, 255, 255, 255, 
   255, 255, 255, 255, 255, 255, 255, 255
 };
+
+static short const * const mpeg1_table_clip = mpeg1_table_clip_data + 2048;

Index: table_clip_mpeg4.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_clip_mpeg4.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_clip_mpeg4.h	13 Mar 2002 01:14:34 -0000	1.1
+++ table_clip_mpeg4.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -17,7 +17,7 @@
     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 
-static short mpeg4_table_clip[4096] = {
+FAME_ALIGNED static short const mpeg4_table_clip_data[4096] = {
   -255, -255, -255, -255, -255, -255, -255, -255, 
   -255, -255, -255, -255, -255, -255, -255, -255, 
   -255, -255, -255, -255, -255, -255, -255, -255, 
@@ -531,3 +531,5 @@
   255, 255, 255, 255, 255, 255, 255, 255, 
   255, 255, 255, 255, 255, 255, 255, 255
 };
+
+static short const * const mpeg4_table_clip = mpeg4_table_clip_data + 2048;

Index: table_dc_mpeg1.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_dc_mpeg1.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_dc_mpeg1.h	13 Mar 2002 01:14:34 -0000	1.1
+++ table_dc_mpeg1.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -19,7 +19,7 @@
 
 #include "fame.h"
 
-static fame_vlc_t encode_ydc_table[511] = 
+FAME_ALIGNED static fame_vlc_t encode_ydc_table[511] = 
 {
   {0x7e00, 15},
   {0x7e01, 15},
@@ -534,7 +534,7 @@
   {0x7eff, 15}
 };
 
-static fame_vlc_t encode_cdc_table[511] = 
+FAME_ALIGNED static fame_vlc_t encode_cdc_table[511] = 
 {
   {0x0fe00, 16},
   {0x0fe01, 16},

Index: table_dc_mpeg4.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_dc_mpeg4.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_dc_mpeg4.h	13 Mar 2002 01:14:34 -0000	1.1
+++ table_dc_mpeg4.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -19,7 +19,7 @@
 
 #include "fame.h"
 
-static const fame_vlc_t encode_ydc_table[511] = {
+FAME_ALIGNED static const fame_vlc_t encode_ydc_table[511] = {
 	{ 0x100, 15 },
 	{ 0x101, 15 },
 	{ 0x102, 15 },
@@ -533,7 +533,7 @@
 	{ 0x1ff, 15 },
 };
 
-static const fame_vlc_t encode_cdc_table[511] = {
+FAME_ALIGNED static const fame_vlc_t encode_cdc_table[511] = {
 	{ 0x100, 16 },
 	{ 0x101, 16 },
 	{ 0x102, 16 },

Index: table_mv.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_mv.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_mv.h	13 Mar 2002 01:14:35 -0000	1.1
+++ table_mv.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -1,4 +1,4 @@
-static const fame_vlc_t mb_motion_table[65] = {
+FAME_ALIGNED static const fame_vlc_t mb_motion_table[65] = {
   { 0x05, 13 }, /* -32  */
   { 0x07, 13 }, /* -31  */
   { 0x05, 12 }, /* -30  */

Index: table_quant_mpeg1.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_quant_mpeg1.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_quant_mpeg1.h	13 Mar 2002 01:14:35 -0000	1.1
+++ table_quant_mpeg1.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -18,7 +18,19 @@
 */
 
 /* The default MPEG-1 intra quantization table */
-static unsigned char mpeg1_intra_quantisation_table[64] = {
+#if defined(HAS_MMX)
+/* transpose table */
+FAME_ALIGNED static unsigned char mpeg1_intra_quantisation_table[64] = {
+      8,16,19,22,22,26,26,27,
+     16,16,22,22,26,27,27,29,
+     19,22,26,26,27,29,29,35,
+     22,24,27,27,29,32,34,38,
+     26,27,29,29,32,35,38,46,
+     27,29,34,34,35,40,46,56,
+     29,34,34,37,40,48,56,69,
+     34,37,38,40,48,58,69,83 };
+#else
+FAME_ALIGNED static unsigned char mpeg1_intra_quantisation_table[64] = {
       8,16,19,22,26,27,29,34,
      16,16,22,24,27,29,34,37,
      19,22,26,27,29,34,34,38,
@@ -27,9 +39,11 @@
      26,27,29,32,35,40,48,58,
      26,27,29,34,38,46,56,69,
      27,29,35,38,46,56,69,83 };
+#endif
 
 /* The default MPEG-1 inter quantization table */
-static unsigned char mpeg1_inter_quantisation_table[64] = {
+/* table is symetric */
+FAME_ALIGNED static unsigned char mpeg1_inter_quantisation_table[64] = {
      16,16,16,16,16,16,16,16,
      16,16,16,16,16,16,16,16,
      16,16,16,16,16,16,16,16,

Index: table_quant_mpeg4.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_quant_mpeg4.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_quant_mpeg4.h	13 Mar 2002 01:14:35 -0000	1.1
+++ table_quant_mpeg4.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -18,7 +18,19 @@
 */
 
 /* The default MPEG-4 intra quantization table */
-static unsigned char mpeg4_intra_quantisation_table[64] = {
+#if defined(HAS_MMX)
+/* transpose table */
+FAME_ALIGNED static unsigned char mpeg4_intra_quantisation_table[64] = {
+      8,17,20,21,22,23,25,27,
+     17,18,21,22,23,24,26,28,
+     18,19,22,23,24,26,28,30,
+     19,21,23,24,26,28,30,32,
+     21,23,24,26,28,30,32,35,
+     23,25,26,28,30,32,35,38,
+     25,27,28,30,32,35,38,41,
+     27,28,30,32,35,38,41,45 };
+#else
+FAME_ALIGNED static unsigned char mpeg4_intra_quantisation_table[64] = {
       8,17,18,19,21,23,25,27,
      17,18,19,21,23,25,27,28,
      20,21,22,23,24,26,28,30,
@@ -27,9 +39,11 @@
      23,24,26,28,30,32,35,38,
      25,26,28,30,32,35,38,41,
      27,28,30,32,35,38,41,45 };
+#endif
 
 /* The default MPEG-4 inter quantisation table */
-static unsigned char mpeg4_inter_quantisation_table[64] = {
+/* table is symetric */
+FAME_ALIGNED static unsigned char mpeg4_inter_quantisation_table[64] = {
      16,17,18,19,20,21,22,23,
      17,18,19,20,21,22,23,24,
      18,19,20,21,22,23,24,25,

Index: table_rlehuff_mpeg1.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_rlehuff_mpeg1.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_rlehuff_mpeg1.h	13 Mar 2002 01:14:35 -0000	1.1
+++ table_rlehuff_mpeg1.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -19,7 +19,7 @@
 
 #define HUFFMAXRUN 32
 
-static fame_vlc_t rlehuff_table[511] =
+FAME_ALIGNED static fame_vlc_t rlehuff_table[511] =
 {
   { 0x8001, 16},
   { 0x8002, 16},
@@ -534,7 +534,7 @@
   { 0x00FF, 16}
 };
                
-static unsigned long huff_table0[] =
+FAME_ALIGNED static unsigned long huff_table0[] =
                 { 0x21, 0x23, 0x25, 0x27, 0x29, 0x2b, 0x2d, 0x2f,
                   0x31, 0x21, 0x23, 0x25, 0x27, 0x29, 0x2b, 0x2d,
 		  0x2f, 0x31, 0x33, 0x35, 0x37, 0x39, 0x3b, 0x3d,
@@ -546,7 +546,7 @@
 		  0x2e, 0x2c, 0x2a, 0x28, 0x26, 0x24, 0x22, 0x20,
 		  0x30, 0x2e, 0x2c, 0x2a, 0x28, 0x26, 0x24, 0x22,
 		  0x20 };
-static unsigned long huff_bits0[] = { 16, 16, 16, 16, 16, 16, 16, 16,
+FAME_ALIGNED static unsigned long huff_bits0[] = { 16, 16, 16, 16, 16, 16, 16, 16,
 		 16, 15, 15, 15, 15, 15, 15, 15,
 		 15, 15, 15, 15, 15, 15, 15, 15,
 		 15, 14, 14, 14, 14, 13, 13, 13,
@@ -558,112 +558,112 @@
 		 16, 16, 16, 16, 16, 16, 16, 16,
 		 16 };
 
-static unsigned long huff_table1[] = { 0x21, 0x23, 0x25, 0x27, 0x33, 0x35, 0x37, 0x39,
+FAME_ALIGNED static unsigned long huff_table1[] = { 0x21, 0x23, 0x25, 0x27, 0x33, 0x35, 0x37, 0x39,
 		  0x3b, 0x3d, 0x3f, 0x2b, 0x2d, 0x37, 0x19, 0x4b,
 		  0x0d, 0x07, 0x00, 0x06, 0x0c, 0x4a, 0x18, 0x36,
 		  0x2c, 0x2a, 0x3e, 0x3c, 0x3a, 0x38, 0x36, 0x34,
 		  0x32, 0x26, 0x24, 0x22, 0x20 };
-static unsigned long huff_bits1[] = { 17, 17, 17, 17, 16, 16, 16, 16,
+FAME_ALIGNED static unsigned long huff_bits1[] = { 17, 17, 17, 17, 16, 16, 16, 16,
 		 16, 16, 16, 14, 14, 13, 11,  9,
 		  7,  4,  0,  4,  7,  9, 11, 13,
 		 14, 14, 16, 16, 16, 16, 16, 16,
 		 16, 17, 17, 17, 17 };
 
-static unsigned long huff_table2[] = { 0x29, 0x29, 0x17, 0x09, 0x0b, 0x00, 0x0a, 0x08,
+FAME_ALIGNED static unsigned long huff_table2[] = { 0x29, 0x29, 0x17, 0x09, 0x0b, 0x00, 0x0a, 0x08,
 		  0x16, 0x28, 0x28 };
-static unsigned long huff_bits2[] = { 14, 13, 11,  8,  5,  0,  5,  8,
+FAME_ALIGNED static unsigned long huff_bits2[] = { 14, 13, 11,  8,  5,  0,  5,  8,
 		 11, 13, 14 }; 
 
-static unsigned long huff_table3[] = { 0x27, 0x39, 0x49, 0x0f, 0x00, 0x0e, 0x48, 0x38,
+FAME_ALIGNED static unsigned long huff_table3[] = { 0x27, 0x39, 0x49, 0x0f, 0x00, 0x0e, 0x48, 0x38,
 		  0x26 };
-static unsigned long huff_bits3[] = { 14, 13, 9, 6, 0, 6, 9, 13,
+FAME_ALIGNED static unsigned long huff_bits3[] = { 14, 13, 9, 6, 0, 6, 9, 13,
 		 14 };
 
-static unsigned long huff_table4[] = { 0x25, 0x1f, 0x0d, 0x00, 0x0c, 0x1e, 0x24 };
-static unsigned long huff_bits4[] = { 13, 11, 6, 0, 6, 11, 13 };
+FAME_ALIGNED static unsigned long huff_table4[] = { 0x25, 0x1f, 0x0d, 0x00, 0x0c, 0x1e, 0x24 };
+FAME_ALIGNED static unsigned long huff_bits4[] = { 13, 11, 6, 0, 6, 11, 13 };
 
-static unsigned long huff_table5[] = { 0x25, 0x13, 0x0f, 0x00, 0x0e, 0x12, 0x24 };
-static unsigned long huff_bits5[] = { 14, 11, 7, 0, 7, 11, 14 };
+FAME_ALIGNED static unsigned long huff_table5[] = { 0x25, 0x13, 0x0f, 0x00, 0x0e, 0x12, 0x24 };
+FAME_ALIGNED static unsigned long huff_bits5[] = { 14, 11, 7, 0, 7, 11, 14 };
 
-static unsigned long huff_table6[] = { 0x29, 0x3d, 0x0b, 0x00, 0x0a, 0x3c, 0x28 };
-static unsigned long huff_bits6[] = { 17, 13, 7, 0, 7, 13, 17 };
+FAME_ALIGNED static unsigned long huff_table6[] = { 0x29, 0x3d, 0x0b, 0x00, 0x0a, 0x3c, 0x28 };
+FAME_ALIGNED static unsigned long huff_bits6[] = { 17, 13, 7, 0, 7, 13, 17 };
 
-static unsigned long huff_table7[] = { 0x2b, 0x09, 0x00, 0x08, 0x2a };
-static unsigned long huff_bits7[] = { 13, 7, 0, 7, 13 };
+FAME_ALIGNED static unsigned long huff_table7[] = { 0x2b, 0x09, 0x00, 0x08, 0x2a };
+FAME_ALIGNED static unsigned long huff_bits7[] = { 13, 7, 0, 7, 13 };
 
-static unsigned long huff_table8[] = { 0x23, 0x0f, 0x00, 0x0e, 0x22 };
-static unsigned long huff_bits8[] = {13, 8, 0, 8, 13 };
+FAME_ALIGNED static unsigned long huff_table8[] = { 0x23, 0x0f, 0x00, 0x0e, 0x22 };
+FAME_ALIGNED static unsigned long huff_bits8[] = {13, 8, 0, 8, 13 };
 
-static unsigned long huff_table9[] = {0x23, 0x0b, 0x00, 0x0a, 0x22 };
-static unsigned long huff_bits9[] = {14, 8, 0, 8, 14 };
+FAME_ALIGNED static unsigned long huff_table9[] = {0x23, 0x0b, 0x00, 0x0a, 0x22 };
+FAME_ALIGNED static unsigned long huff_bits9[] = {14, 8, 0, 8, 14 };
 
-static unsigned long huff_table10[] = {0x21, 0x4f, 0x00, 0x4e, 0x20 };
-static unsigned long huff_bits10[] = {14, 9, 0, 9, 14 };
+FAME_ALIGNED static unsigned long huff_table10[] = {0x21, 0x4f, 0x00, 0x4e, 0x20 };
+FAME_ALIGNED static unsigned long huff_bits10[] = {14, 9, 0, 9, 14 };
 
-static unsigned long huff_table11[] = {0x35, 0x47, 0x00, 0x46, 0x34 };
-static unsigned long huff_bits11[] = {17, 9, 0, 9, 17 };
+FAME_ALIGNED static unsigned long huff_table11[] = {0x35, 0x47, 0x00, 0x46, 0x34 };
+FAME_ALIGNED static unsigned long huff_bits11[] = {17, 9, 0, 9, 17 };
 
-static unsigned long huff_table12[] = {0x33, 0x45, 0x00, 0x44, 0x32 };
-static unsigned long huff_bits12[] = {17, 9, 0, 9, 17 };
+FAME_ALIGNED static unsigned long huff_table12[] = {0x33, 0x45, 0x00, 0x44, 0x32 };
+FAME_ALIGNED static unsigned long huff_bits12[] = {17, 9, 0, 9, 17 };
 
-static unsigned long huff_table13[] = {0x31, 0x41, 0x00, 0x40, 0x30 };
-static unsigned long huff_bits13[] = {17, 9, 0, 9, 17 };
+FAME_ALIGNED static unsigned long huff_table13[] = {0x31, 0x41, 0x00, 0x40, 0x30 };
+FAME_ALIGNED static unsigned long huff_bits13[] = {17, 9, 0, 9, 17 };
 
-static unsigned long huff_table14[] = {0x2f, 0x1d, 0x00, 0x1c, 0x2e };
-static unsigned long huff_bits14[] = {17, 11, 0, 11, 17 };
+FAME_ALIGNED static unsigned long huff_table14[] = {0x2f, 0x1d, 0x00, 0x1c, 0x2e };
+FAME_ALIGNED static unsigned long huff_bits14[] = {17, 11, 0, 11, 17 };
 
-static unsigned long huff_table15[] = {0x2d, 0x1b, 0x00, 0x1a, 0x2c };
-static unsigned long huff_bits15[] = {17, 11, 0, 11, 17 };
+FAME_ALIGNED static unsigned long huff_table15[] = {0x2d, 0x1b, 0x00, 0x1a, 0x2c };
+FAME_ALIGNED static unsigned long huff_bits15[] = {17, 11, 0, 11, 17 };
 
-static unsigned long huff_table16[] = {0x2b, 0x11, 0x00, 0x10, 0x2a };
-static unsigned long huff_bits16[] = {17, 11, 0, 11, 17 }; 
+FAME_ALIGNED static unsigned long huff_table16[] = {0x2b, 0x11, 0x00, 0x10, 0x2a };
+FAME_ALIGNED static unsigned long huff_bits16[] = {17, 11, 0, 11, 17 }; 
 
-static unsigned long huff_table17[] = {0x3f, 0x00, 0x3e };
-static unsigned long huff_bits17[] = {13, 0, 13 };
+FAME_ALIGNED static unsigned long huff_table17[] = {0x3f, 0x00, 0x3e };
+FAME_ALIGNED static unsigned long huff_bits17[] = {13, 0, 13 };
 
-static unsigned long huff_table18[] = {0x35, 0x00, 0x34 };
-static unsigned long huff_bits18[] = {13, 0, 13 };
+FAME_ALIGNED static unsigned long huff_table18[] = {0x35, 0x00, 0x34 };
+FAME_ALIGNED static unsigned long huff_bits18[] = {13, 0, 13 };
 
-static unsigned long huff_table19[] = {0x33, 0x00, 0x32 };
-static unsigned long huff_bits19[] = {13, 0, 13 };
+FAME_ALIGNED static unsigned long huff_table19[] = {0x33, 0x00, 0x32 };
+FAME_ALIGNED static unsigned long huff_bits19[] = {13, 0, 13 };
 
-static unsigned long huff_table20[] = {0x2f, 0x00, 0x2e };
-static unsigned long huff_bits20[] = {13, 0, 13 };
+FAME_ALIGNED static unsigned long huff_table20[] = {0x2f, 0x00, 0x2e };
+FAME_ALIGNED static unsigned long huff_bits20[] = {13, 0, 13 };
 
-static unsigned long huff_table21[] = {0x2d, 0x00, 0x2c };
-static unsigned long huff_bits21[] = {13, 0, 13 };
+FAME_ALIGNED static unsigned long huff_table21[] = {0x2d, 0x00, 0x2c };
+FAME_ALIGNED static unsigned long huff_bits21[] = {13, 0, 13 };
 
-static unsigned long huff_table22[] = {0x3f, 0x00, 0x3e };
-static unsigned long huff_bits22[] = {14, 0, 14 };
+FAME_ALIGNED static unsigned long huff_table22[] = {0x3f, 0x00, 0x3e };
+FAME_ALIGNED static unsigned long huff_bits22[] = {14, 0, 14 };
 
-static unsigned long huff_table23[] = {0x3d, 0x00, 0x3c };
-static unsigned long huff_bits23[] = {14, 0, 14 }; 
+FAME_ALIGNED static unsigned long huff_table23[] = {0x3d, 0x00, 0x3c };
+FAME_ALIGNED static unsigned long huff_bits23[] = {14, 0, 14 }; 
 
-static unsigned long huff_table24[] = {0x3b, 0x00, 0x3a };
-static unsigned long huff_bits24[] = {14, 0, 14 };
+FAME_ALIGNED static unsigned long huff_table24[] = {0x3b, 0x00, 0x3a };
+FAME_ALIGNED static unsigned long huff_bits24[] = {14, 0, 14 };
 
-static unsigned long huff_table25[] = {0x39, 0x00, 0x38 };
-static unsigned long huff_bits25[] = {14, 0, 14 };
+FAME_ALIGNED static unsigned long huff_table25[] = {0x39, 0x00, 0x38 };
+FAME_ALIGNED static unsigned long huff_bits25[] = {14, 0, 14 };
 
-static unsigned long huff_table26[] = {0x37, 0x00, 0x36 };
-static unsigned long huff_bits26[] = {14, 0, 14 };
+FAME_ALIGNED static unsigned long huff_table26[] = {0x37, 0x00, 0x36 };
+FAME_ALIGNED static unsigned long huff_bits26[] = {14, 0, 14 };
 
-static unsigned long huff_table27[] = {0x3f, 0x00, 0x3e };
-static unsigned long huff_bits27[] = {17, 0, 17 };
+FAME_ALIGNED static unsigned long huff_table27[] = {0x3f, 0x00, 0x3e };
+FAME_ALIGNED static unsigned long huff_bits27[] = {17, 0, 17 };
 
-static unsigned long huff_table28[] = {0x3d, 0x00, 0x3c };
-static unsigned long huff_bits28[] = {17, 0, 17 };
+FAME_ALIGNED static unsigned long huff_table28[] = {0x3d, 0x00, 0x3c };
+FAME_ALIGNED static unsigned long huff_bits28[] = {17, 0, 17 };
 
-static unsigned long huff_table29[] = {0x3b, 0x00, 0x3a };
-static unsigned long huff_bits29[] = {17, 0, 17 };
+FAME_ALIGNED static unsigned long huff_table29[] = {0x3b, 0x00, 0x3a };
+FAME_ALIGNED static unsigned long huff_bits29[] = {17, 0, 17 };
 
-static unsigned long huff_table30[] = {0x39, 0x00, 0x38 };
-static unsigned long huff_bits30[] = {17, 0, 17 };
+FAME_ALIGNED static unsigned long huff_table30[] = {0x39, 0x00, 0x38 };
+FAME_ALIGNED static unsigned long huff_bits30[] = {17, 0, 17 };
 
-static unsigned long huff_table31[] = {0x37, 0x00, 0x36 };
-static unsigned long huff_bits31[] = {17, 0, 17 };
+FAME_ALIGNED static unsigned long huff_table31[] = {0x37, 0x00, 0x36 };
+FAME_ALIGNED static unsigned long huff_bits31[] = {17, 0, 17 };
 
-static unsigned long * const huff_table[] = {
+FAME_ALIGNED static unsigned long * const huff_table[] = {
    huff_table0,
    huff_table1,
    huff_table2,
@@ -698,7 +698,7 @@
    huff_table31
 };
                 
-static unsigned long * const huff_bits[] = {
+FAME_ALIGNED static unsigned long * const huff_bits[] = {
    huff_bits0,
    huff_bits1,
    huff_bits2,
@@ -733,7 +733,7 @@
    huff_bits31
 };
 
-static short rlehuff_max_level[32] = 
+FAME_ALIGNED static short rlehuff_max_level[32] = 
 {41,19, 6, 5, 4, 4, 4, 3, 3, 3,
   3, 3, 3, 3, 3, 3, 3, 2, 2, 2,
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

Index: table_rlehuff_mpeg4.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_rlehuff_mpeg4.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_rlehuff_mpeg4.h	13 Mar 2002 01:14:35 -0000	1.1
+++ table_rlehuff_mpeg4.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -21,7 +21,7 @@
 /* intra VLC table when not last coefficient                                 */
 /*****************************************************************************/
 
-static fame_vlc_t rlehuff_intra_notlast_run_0[2*27+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_0[2*27+1] = 
 {
   { 0xa5, 13}, /* -27 */
   { 0xa3, 13}, /* -26 */
@@ -80,7 +80,7 @@
   { 0xa4, 13}, /*  27 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_1[2*10+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_1[2*10+1] = 
 {
   { 0xab, 13}, /* -10 */
   { 0xa7, 13}, /*  -9 */
@@ -105,7 +105,7 @@
   { 0xaa, 13}, /*  10 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_2[2*5+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_2[2*5+1] = 
 {
   { 0xad, 13}, /*  -5 */
   { 0x19, 11}, /*  -4 */
@@ -120,7 +120,7 @@
   { 0xac, 13}, /*   5 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_3[2*4+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_3[2*4+1] = 
 {
   { 0x17, 11}, /*  -4 */
   { 0x3b, 10}, /*  -3 */
@@ -133,7 +133,7 @@
   { 0x16, 11}, /*   4 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_4[2*3+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_4[2*3+1] = 
 {
   { 0x15, 11}, /*  -3 */
   { 0x45, 10}, /*  -2 */
@@ -144,7 +144,7 @@
   { 0x14, 11}, /*   3 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_5[2*3+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_5[2*3+1] = 
 {
   { 0x11, 11}, /*  -3 */
   { 0x39, 10}, /*  -2 */
@@ -155,7 +155,7 @@
   { 0x10, 11}, /*   3 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_6[2*3+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_6[2*3+1] = 
 {
   { 0xa9, 13}, /*  -3 */
   { 0x37, 10}, /*  -2 */
@@ -166,7 +166,7 @@
   { 0xa8, 13}, /*   3 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_7[2*3+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_7[2*3+1] = 
 {
   { 0xaf, 13}, /*  -3 */
   { 0x35, 10}, /*  -2 */
@@ -177,7 +177,7 @@
   { 0xae, 13}, /*   3 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_8[2*2+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_8[2*2+1] = 
 {
   { 0x13, 11}, /*  -2 */
   { 0x33,  9}, /*  -1 */
@@ -186,7 +186,7 @@
   { 0x12, 11}, /*   2 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_9[2*2+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_9[2*2+1] = 
 {
   { 0x47, 12}, /*  -2 */
   { 0x31,  9}, /*  -1 */
@@ -195,42 +195,42 @@
   { 0x46, 12}, /*   2 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_10[1*2+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_10[1*2+1] = 
 {
   { 0x2f,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x2e,  9}, /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_11[1*2+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_11[1*2+1] = 
 {
   { 0x33, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x32, 10}, /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_12[1*2+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_12[1*2+1] = 
 {
   { 0x31, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x30, 10}, /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_13[1*2+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_13[1*2+1] = 
 {
   { 0x0f, 11}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x0e, 11}, /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_notlast_run_14[1*2+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_notlast_run_14[1*2+1] = 
 {
   { 0xb1, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xb0, 13}, /*   1 */
 };
 
-static fame_vlc_t *rlehuff_intra_notlast_vlc[15] = {
+FAME_ALIGNED static fame_vlc_t *rlehuff_intra_notlast_vlc[15] = {
   rlehuff_intra_notlast_run_0 + 27,
   rlehuff_intra_notlast_run_1 + 10,
   rlehuff_intra_notlast_run_2 + 5,
@@ -252,7 +252,7 @@
 /* inter VLC table when not last coefficient                                 */
 /*****************************************************************************/
 
-static fame_vlc_t rlehuff_inter_notlast_run_0[2*12+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_0[2*12+1] = 
 {
   { 0x41, 12}, /* -12 */ 
   { 0x0d, 12}, /* -11 */ 
@@ -281,7 +281,7 @@
   { 0x40, 12}  /*  12 */ 
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_1[2*6+1] = 
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_1[2*6+1] = 
 {
   { 0xa1, 13}, /*  -6 */
   { 0x43, 12}, /*  -5 */
@@ -298,7 +298,7 @@
   { 0xa0, 13}  /*   6 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_2[2*4+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_2[2*4+1] = {
   { 0xa3, 13}, /*  -4 */
   { 0x1d, 11}, /*  -3 */
   { 0x3b,  9}, /*  -2 */
@@ -310,7 +310,7 @@
   { 0xa2, 13}  /*   4 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_3[2*3+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_3[2*3+1] = {
   { 0x1b, 11}, /*  -3 */
   { 0x47, 10}, /*  -2 */
   { 0x1b,  6}, /*  -1 */
@@ -320,7 +320,7 @@
   { 0x1a, 11}  /*   3 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_4[2*3+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_4[2*3+1] = {
   { 0xa5, 13}, /*  -3 */
   { 0x45, 10}, /*  -2 */
   { 0x19,  6}, /*  -1 */
@@ -330,7 +330,7 @@
   { 0xa4, 13}  /*   3 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_5[2*3+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_5[2*3+1] = {
   { 0xa7, 13}, /*  -3 */
   { 0x19, 11}, /*  -2 */
   { 0x17,  6}, /*  -1 */
@@ -340,7 +340,7 @@
   { 0xa6, 13}  /*   3 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_6[2*3+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_6[2*3+1] = {
   { 0xa9, 13}, /*  -3 */
   { 0x17, 11}, /*  -2 */
   { 0x27,  7}, /*  -1 */
@@ -350,7 +350,7 @@
   { 0xa8, 13}  /*   3 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_7[2*2+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_7[2*2+1] = {
   { 0x15, 11}, /*  -2 */
   { 0x25,  7}, /*  -1 */
   { 0x00,  0}, /*   0 */
@@ -358,7 +358,7 @@
   { 0x14, 11}  /*   2 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_8[2*2+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_8[2*2+1] = {
   { 0x13, 11}, /*  -2 */
   { 0x23,  7}, /*  -1 */
   { 0x00,  0}, /*   0 */
@@ -366,7 +366,7 @@
   { 0x12, 11}  /*   2 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_9[2*2+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_9[2*2+1] = {
   { 0x11, 11}, /*  -2 */
   { 0x21,  7}, /*  -1 */
   { 0x00,  0}, /*   0 */
@@ -374,7 +374,7 @@
   { 0x10, 11}  /*   2 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_10[2*2+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_10[2*2+1] = {
   { 0xab, 13}, /*  -2 */
   { 0x2d,  8}, /*  -1 */
   { 0x00,  0}, /*   0 */
@@ -382,103 +382,103 @@
   { 0xaa, 13}  /*   2 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_11[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_11[2*1+1] = {
   { 0x2b,  8}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x2a,  8}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_12[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_12[2*1+1] = {
   { 0x29,  8}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x28,  8}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_13[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_13[2*1+1] = {
   { 0x39,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x38,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_14[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_14[2*1+1] = {
   { 0x37,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x36,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_15[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_15[2*1+1] = {
   { 0x43, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x42, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_16[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_16[2*1+1] = {
   { 0x41, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x40, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_17[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_17[2*1+1] = {
   { 0x3f, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x3e, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_18[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_18[2*1+1] = {
   { 0x3d, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x3c, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_19[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_19[2*1+1] = {
   { 0x3b, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x3a, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_20[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_20[2*1+1] = {
   { 0x39, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x38, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_21[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_21[2*1+1] = {
   { 0x37, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x36, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_22[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_22[2*1+1] = {
   { 0x35, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x34, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_23[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_23[2*1+1] = {
   { 0x45, 12}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x44, 12}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_24[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_24[2*1+1] = {
   { 0x47, 12}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x46, 12}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_25[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_25[2*1+1] = {
   { 0xad, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xac, 13}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_notlast_run_26[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_notlast_run_26[2*1+1] = {
   { 0xaf, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xae, 13}  /*   1 */
 };
 
-static fame_vlc_t *rlehuff_inter_notlast_vlc[27] = {
+FAME_ALIGNED static fame_vlc_t *rlehuff_inter_notlast_vlc[27] = {
   rlehuff_inter_notlast_run_0 + 12,
   rlehuff_inter_notlast_run_1 + 6,
   rlehuff_inter_notlast_run_2 + 4,
@@ -512,7 +512,7 @@
 /* intra VLC table when last coefficient                                     */
 /*****************************************************************************/
 
-static fame_vlc_t rlehuff_intra_last_run_0[2*8+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_0[2*8+1] = {
   { 0xb3, 13}, /*  -8 */
   { 0x09, 12}, /*  -7 */
   { 0x0b, 12}, /*  -6 */
@@ -532,7 +532,7 @@
   { 0xb2, 13}  /*   8 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_1[2*3+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_1[2*3+1] = {
   { 0x0b, 11}, /*  -3 */
   { 0x2d, 10}, /*  -2 */
   { 0x1f,  7}, /*  -1 */
@@ -542,7 +542,7 @@
   { 0x0a, 11}  /*   3 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_2[2*2+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_2[2*2+1] = {
   { 0x09, 11}, /*  -2 */
   { 0x1d,  7}, /*  -1 */
   { 0x00,  0}, /*   0 */
@@ -550,7 +550,7 @@
   { 0x08, 11}  /*   2 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_3[2*2+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_3[2*2+1] = {
   { 0x49, 12}, /*  -2 */
   { 0x23,  8}, /*  -1 */
   { 0x00,  0}, /*   0 */
@@ -558,7 +558,7 @@
   { 0x48, 12}  /*   2 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_4[2*2+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_4[2*2+1] = {
   { 0x4b, 12}, /*  -2 */
   { 0x21,  8}, /*  -1 */
   { 0x00,  0}, /*   0 */
@@ -566,7 +566,7 @@
   { 0x4a, 12}  /*   2 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_5[2*2+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_5[2*2+1] = {
   { 0xb5, 13}, /*  -2 */
   { 0x27,  8}, /*  -1 */
   { 0x00,  0}, /*   0 */
@@ -574,7 +574,7 @@
   { 0xb4, 13}  /*   2 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_6[2*2+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_6[2*2+1] = {
   { 0xb7, 13}, /*  -2 */
   { 0x2b,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
@@ -582,91 +582,91 @@
   { 0xb6, 13}  /*   2 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_7[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_7[2*1+1] = {
   { 0x29,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x28,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_8[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_8[2*1+1] = {
   { 0x27,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x26,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_9[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_9[2*1+1] = {
   { 0x35,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x34,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_10[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_10[2*1+1] = {
   { 0x2b, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x2a, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_11[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_11[2*1+1] = {
   { 0x29, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x28, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_12[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_12[2*1+1] = {
   { 0x27, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x26, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_13[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_13[2*1+1] = {
   { 0x25, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x24, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_14[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_14[2*1+1] = {
   { 0x23, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x22, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_15[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_15[2*1+1] = {
   { 0x4d, 12}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x4c, 12}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_16[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_16[2*1+1] = {
   { 0x4f, 12}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x4e, 12}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_17[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_17[2*1+1] = {
   { 0xb9, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xb8, 13}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_18[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_18[2*1+1] = {
   { 0xbb, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xba, 13}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_19[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_19[2*1+1] = {
   { 0xbd, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xbc, 13}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_intra_last_run_20[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_intra_last_run_20[2*1+1] = {
   { 0xbf, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xbe, 13}  /*   1 */
 };
 
-static fame_vlc_t *rlehuff_intra_last_vlc[21] = {
+FAME_ALIGNED static fame_vlc_t *rlehuff_intra_last_vlc[21] = {
   rlehuff_intra_last_run_0 + 8,
   rlehuff_intra_last_run_1 + 3,
   rlehuff_intra_last_run_2 + 2,
@@ -695,7 +695,7 @@
 /* inter VLC table when last coefficient                                     */
 /*****************************************************************************/
 
-static fame_vlc_t rlehuff_inter_last_run_0[2*3+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_0[2*3+1] = {
   { 0x0b, 12}, /*  -3 */
   { 0x33, 10}, /*  -2 */
   { 0x0f,  5}, /*  -1 */
@@ -705,7 +705,7 @@
   { 0x0a, 12}  /*   3 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_1[2*2+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_1[2*2+1] = {
   { 0x09, 12}, /*  -2 */
   { 0x1f,  7}, /*  -1 */
   { 0x00,  0}, /*   0 */
@@ -713,241 +713,241 @@
   { 0x08, 12}  /*   2 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_2[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_2[2*1+1] = {
   { 0x1d,  7}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x1c,  7}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_3[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_3[2*1+1] = {
   { 0x1b,  7}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x1a,  7}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_4[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_4[2*1+1] = {
   { 0x19,  7}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x18,  7}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_5[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_5[2*1+1] = {
   { 0x27,  8}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x26,  8}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_6[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_6[2*1+1] = {
   { 0x25,  8}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x24,  8}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_7[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_7[2*1+1] = {
   { 0x23,  8}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x22,  8}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_8[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_8[2*1+1] = {
   { 0x21,  8}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x20,  8}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_9[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_9[2*1+1] = {
   { 0x35,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x34,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_10[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_10[2*1+1] = {
   { 0x33,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x32,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_11[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_11[2*1+1] = {
   { 0x31,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x30,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_12[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_12[2*1+1] = {
   { 0x2f,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x2e,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_13[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_13[2*1+1] = {
   { 0x2d,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x2c,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_14[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_14[2*1+1] = {
   { 0x2b,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x2a,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_15[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_15[2*1+1] = {
   { 0x29,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x28,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_16[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_16[2*1+1] = {
   { 0x27,  9}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x26,  9}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_17[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_17[2*1+1] = {
   { 0x31, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x30, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_18[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_18[2*1+1] = {
   { 0x2f, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x2e, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_19[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_19[2*1+1] = {
   { 0x2d, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x2c, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_20[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_20[2*1+1] = {
   { 0x2b, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x2a, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_21[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_21[2*1+1] = {
   { 0x29, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x28, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_22[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_22[2*1+1] = {
   { 0x27, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x26, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_23[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_23[2*1+1] = {
   { 0x25, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x24, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_24[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_24[2*1+1] = {
   { 0x23, 10}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x22, 10}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_25[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_25[2*1+1] = {
   { 0x0f, 11}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x0e, 11}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_26[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_26[2*1+1] = {
   { 0x0d, 11}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x0c, 11}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_27[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_27[2*1+1] = {
   { 0x0b, 11}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x0a, 11}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_28[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_28[2*1+1] = {
   { 0x09, 11}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x08, 11}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_29[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_29[2*1+1] = {
   { 0x49, 12}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x48, 12}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_30[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_30[2*1+1] = {
   { 0x4b, 12}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x4a, 12}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_31[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_31[2*1+1] = {
   { 0x4d, 12}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x4c, 12}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_32[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_32[2*1+1] = {
   { 0x4f, 12}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0x4e, 12}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_33[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_33[2*1+1] = {
   { 0xb1, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xb0, 13}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_34[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_34[2*1+1] = {
   { 0xb3, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xb2, 13}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_35[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_35[2*1+1] = {
   { 0xb5, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xb4, 13}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_36[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_36[2*1+1] = {
   { 0xb7, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xb6, 13}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_37[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_37[2*1+1] = {
   { 0xb9, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xb8, 13}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_38[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_38[2*1+1] = {
   { 0xbb, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xba, 13}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_39[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_39[2*1+1] = {
   { 0xbd, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xbc, 13}  /*   1 */
 };
 
-static fame_vlc_t rlehuff_inter_last_run_40[2*1+1] = {
+FAME_ALIGNED static fame_vlc_t rlehuff_inter_last_run_40[2*1+1] = {
   { 0xbf, 13}, /*  -1 */
   { 0x00,  0}, /*   0 */
   { 0xbe, 13}  /*   1 */
 };
 
-static fame_vlc_t *rlehuff_inter_last_vlc[41] = {
+FAME_ALIGNED static fame_vlc_t *rlehuff_inter_last_vlc[41] = {
   rlehuff_inter_last_run_0 + 3,
   rlehuff_inter_last_run_1 + 2,
   rlehuff_inter_last_run_2 + 1,
@@ -995,31 +995,31 @@
 /* Maximum run & level                                                       */
 /*****************************************************************************/
 
-static char rlehuff_intra_max_level[64] = 
+FAME_ALIGNED static char rlehuff_intra_max_level[64] = 
 {27,10, 5, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
-static char rlehuff_intra_max_level_last[64] = 
+FAME_ALIGNED static char rlehuff_intra_max_level_last[64] = 
 { 8, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
-static char rlehuff_inter_max_level[64] = 
+FAME_ALIGNED static char rlehuff_inter_max_level[64] = 
 {12, 6, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
-static char rlehuff_inter_max_level_last[64] = 
+FAME_ALIGNED static char rlehuff_inter_max_level_last[64] = 
 { 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
-static char rlehuff_intra_max_run[256] = 
+FAME_ALIGNED static char rlehuff_intra_max_run[256] = 
 { 0,14, 9, 7, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1037,7 +1037,7 @@
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 
-static char rlehuff_intra_max_run_last[256] = 
+FAME_ALIGNED static char rlehuff_intra_max_run_last[256] = 
 { 0,20, 6, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1055,7 +1055,7 @@
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 
-static char rlehuff_inter_max_run[256] = 
+FAME_ALIGNED static char rlehuff_inter_max_run[256] = 
 { 0,26,10, 6, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 ,0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1073,7 +1073,7 @@
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 
-static char rlehuff_inter_max_run_last[256] = 
+FAME_ALIGNED static char rlehuff_inter_max_run_last[256] = 
 { 0,40, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

Index: table_scale.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_scale.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_scale.h	13 Mar 2002 01:14:35 -0000	1.1
+++ table_scale.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -18,8 +18,18 @@
 */
 /**************** prescale and postscale factors for AA&N  DCT/iDCT **********/
 
+/* For float AA&N IDCT method, divisors are equal to quantization
+ * coefficients scaled by scalefactor[row]*scalefactor[col], where
+ *   scalefactor[0] = 1
+ *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+ * We apply a further scale factor of 8.
+ * What's actually stored is 1/divisor so that the inner loop can
+ * use a multiplication rather than a division.
+ */
+
 /* Postscale coefficients */
-static float const postscale[64] = {
+/* postscale[i][j] = 1/(8*scalefactor[i]*scalefactor[j]) */
+FAME_ALIGNED static float const postscale[64] = {
   0.125000, 0.090120, 0.095671, 0.106304,
   0.125000, 0.159095, 0.230970, 0.453064,
   0.090120, 0.064973, 0.068975, 0.076641,
@@ -35,11 +45,12 @@
   0.230970, 0.166520, 0.176777, 0.196424,
   0.230970, 0.293969, 0.426777, 0.837153,
   0.453064, 0.326641, 0.346760, 0.385299,
-  0.453064, 0.576641, 0.837153, 1.000000
+  0.453064, 0.576641, 0.837153, 1.642134
 };
 
 /* Prescale coefficients */
-static float const prescale[64] = {
+/* prescale[i][j] = scalefactor[i]*scalefactor[j]/8 */
+FAME_ALIGNED static float const prescale[64] = {
   0.125000000, 0.173379981, 0.163320371, 0.146984450,
   0.125000000, 0.098211870, 0.067649512, 0.034487422,
   0.173379981, 0.240484941, 0.226531862, 0.203873289,

Index: table_zigzag_mpeg1.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_zigzag_mpeg1.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_zigzag_mpeg1.h	13 Mar 2002 01:14:35 -0000	1.1
+++ table_zigzag_mpeg1.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -22,7 +22,7 @@
 
 /* zigzaging table */
 /* Note: cannot be made static due to bug with inline assembly */
-unsigned char mpeg1_zigzag_table[] = {
+FAME_ALIGNED unsigned char mpeg1_zigzag_table[] = {
    0,  8,  1,  2,  9, 16, 24, 17,
   10,  3,  4, 11, 18, 25, 32, 40,
   33, 26, 19, 12,  5,  6, 13, 20,
@@ -36,7 +36,7 @@
 #else
 /* zigzaging table */
 /* Note: cannot be made static due to bug with inline assembly */
-unsigned char mpeg1_zigzag_table[] = {
+FAME_ALIGNED unsigned char mpeg1_zigzag_table[] = {
    0,  1,  8, 16,  9,  2,  3, 10,
   17, 24, 32, 25, 18, 11,  4,  5,
   12, 19, 26, 33, 40, 48, 41, 34,

Index: table_zigzag_mpeg4.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfame/table_zigzag_mpeg4.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- table_zigzag_mpeg4.h	13 Mar 2002 01:14:35 -0000	1.1
+++ table_zigzag_mpeg4.h	1 Jun 2002 20:23:10 -0000	1.2
@@ -21,7 +21,7 @@
 /* use transposed tables to avoid transpose operation after DCT/iDCT */
 
 /* zigzaging table */
-unsigned char mpeg4_zigzag_table[] = {
+FAME_ALIGNED unsigned char mpeg4_zigzag_table[] = {
    0,  8,  1,  2,  9, 16, 24, 17,
   10,  3,  4, 11, 18, 25, 32, 40,
   33, 26, 19, 12,  5,  6, 13, 20,
@@ -33,7 +33,7 @@
    0
 };
 
-unsigned char mpeg4_zigzag_alternate_horizontal_table[] = {
+FAME_ALIGNED unsigned char mpeg4_zigzag_alternate_horizontal_table[] = {
     0,  8, 16, 24,  1,  9,  2, 10,
    17, 25, 32, 40, 48, 56, 57, 49,
    41, 33, 26, 18,  3, 11,  4, 12,
@@ -45,7 +45,7 @@
     0
 };
 
-unsigned char mpeg4_zigzag_alternate_vertical_table[] = {
+FAME_ALIGNED unsigned char mpeg4_zigzag_alternate_vertical_table[] = {
    0,  1,  2,  3,  8,  9, 16, 17,
   10, 11,  4,  5,  6,  7, 15, 14,
   13, 12, 19, 18, 24, 25, 32, 33,
@@ -60,7 +60,7 @@
 #else
 
 /* zigzaging table */
-unsigned char mpeg4_zigzag_table[] = {
+FAME_ALIGNED unsigned char mpeg4_zigzag_table[] = {
    0,  1,  8, 16,  9,  2,  3, 10,
   17, 24, 32, 25, 18, 11,  4,  5,
   12, 19, 26, 33, 40, 48, 41, 34,
@@ -72,7 +72,7 @@
    0
 };
 
-unsigned char mpeg4_zigzag_alternate_horizontal_table[] = {
+FAME_ALIGNED unsigned char mpeg4_zigzag_alternate_horizontal_table[] = {
    0,  1,  2,  3,  8,  9, 16, 17,
   10, 11,  4,  5,  6,  7, 15, 14,
   13, 12, 19, 18, 24, 25, 32, 33,
@@ -84,7 +84,7 @@
    0  
 };
 
-unsigned char mpeg4_zigzag_alternate_vertical_table[] = {
+FAME_ALIGNED unsigned char mpeg4_zigzag_alternate_vertical_table[] = {
     0,  8, 16, 24,  1,  9,  2, 10,
    17, 25, 32, 40, 48, 56, 57, 49,
    41, 33, 26, 18,  3, 11,  4, 12,

--- dequantise_float.h DELETED ---

--- dequantise_mmx.h DELETED ---

--- quantise_float.h DELETED ---

--- quantise_mmx.h DELETED ---




More information about the MPlayer-cvslog mailing list