[Mplayer-cvslog] CVS: main/libmp1e/audio audio.h,NONE,1.1 fft.c,NONE,1.1 filter.c,NONE,1.1 filter_mmx.s,NONE,1.1 libaudio.h,NONE,1.1 mp2.c,NONE,1.1 mpeg.h,NONE,1.1 psycho.c,NONE,1.1 tables.c,NONE,1.1

David Holm mswitch at mplayer.dev.hu
Wed Dec 5 00:55:57 CET 2001


Update of /cvsroot/mplayer/main/libmp1e/audio
In directory mplayer:/var/tmp.root/cvs-serv18232

Added Files:
	audio.h fft.c filter.c filter_mmx.s libaudio.h mp2.c mpeg.h 
	psycho.c tables.c 
Log Message:


--- NEW FILE ---
/*
 *  MPEG-1 Real Time Encoder
 *
 *  Copyright (C) 1999-2000 Michael H. Schimek
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/* $Id: audio.h,v 1.1 2001/12/04 23:55:53 mswitch Exp $ */

#include <stdint.h>
#include <pthread.h>
#include "../common/fifo.h"
#include "../common/bstream.h"
#include "../common/sync.h"

#include "libaudio.h"
#include "mpeg.h"

#define BLKSIZE 1024
#define HBLKSIZE 513
#define CBANDS 63
#define e_save_new e_save_oldest

typedef struct mp2_context {
	/* Buffers */

	unsigned char		wrap[(SAMPLES_PER_FRAME + 512 - 32) * 4];

	int			sb_samples[2][3][12][SBLIMIT];	// channel, scale group, sample
	char			bit_alloc[2][SBLIMIT];
	struct {
		unsigned char		mant, exp;
	}			scalar[2][3][SBLIMIT];		// channel, scale group
	union {
		unsigned int		scf[2][SBLIMIT];	// scaling factors
		mmx_t			fb_temp[17];		// filterbank temp
	}			sf;
	char			scfsi[2][SBLIMIT];		// scale factor selector information
	float			mnr[2][SBLIMIT];
	int			sblimit;
	int			sum_nbal;

	struct bs_rec		out;

	/* Tables */

	unsigned char		sb_group[SBLIMIT];
	unsigned char		bits[4][MAX_BA_INDICES];
	struct {
		unsigned char		quant, shift;
	}			qs[4][MAX_BA_INDICES];
	unsigned char		nbal[SBLIMIT];
	unsigned short		bit_incr[8][MAX_BA_INDICES];
	float			mnr_incr[8][SBLIMIT];
	unsigned char		steps[4];
	unsigned char		packed[4];

	/* Psycho buffers */

	FLOAT			h_save[2][3][BLKSIZE];
	FLOAT			e_save[2][2][HBLKSIZE];
	struct {
		FLOAT			e, c;
	}			grouped[CBANDS];
	FLOAT			nb[HBLKSIZE];
	FLOAT			sum_energy[SBLIMIT];

	/* Psycho tables */

	char			partition[HBLKSIZE];		// frequency line to cband mapping
	struct {
		char			off, cnt;
	}			s_limits[CBANDS];
	FLOAT			s_packed[2048];			// packed spreading function
	FLOAT			absthres[HBLKSIZE];		// absolute thresholds, linear
	FLOAT			xnorm[CBANDS];
	FLOAT			p1[CBANDS];
	FLOAT			p2[CBANDS];
	FLOAT			p3[CBANDS - 20];
	FLOAT			p4[CBANDS - 20];

	int			ch;
	FLOAT *			e_save_old;
	FLOAT *			e_save_oldest;
	FLOAT *			h_save_new;
	FLOAT *			h_save_old;
	FLOAT *			h_save_oldest;

	int			psycho_loops;

	/* Input */

	consumer		cons;
	buffer *		ibuf;
	unsigned int		i16, e16, incr;
	double			nominal_time_elapsed;
	double			nominal_sample_period;
	synchr_stream 		sstr;
	uint32_t		format_sign;
	bool			format_scale;

	/* Output */

	fifo *			fifo;
	producer		prod;

	unsigned int		header_template;
	double			coded_frame_period;
	int			sampling_freq;
	int			bits_per_frame;
	int			spf_rest, spf_lag;

	/* Options */

	rte_codec		codec;

	int			mpeg_version;
	int			sampling_freq_code;
	int			bit_rate_code;
	int			audio_mode;

} mp2_context;

/* psycho.c */

extern void		mp1e_mp2_psycho_init(mp2_context *, int sampling_freq);
extern void		mp1e_mp2_psycho(mp2_context *, short *, float *, int);

/* fft.c */

extern void		mp1e_mp2_fft_init(int test);
extern void		mp1e_mp2_fft_step_1(short *, FLOAT *);
extern void		mp1e_mp2_fft_step_2(short *, FLOAT *);

/* filter.c */

extern void		mp1e_mp2_subband_filter_init(int test);

extern void		mp1e_mp2_mmx_window_mono(short *z, mmx_t *) __attribute__ ((regparm (2)));
extern void		mp1e_mp2_mmx_window_left(short *z, mmx_t *) __attribute__ ((regparm (2)));
extern void		mp1e_mp2_mmx_window_right(short *z, mmx_t *) __attribute__ ((regparm (2)));
extern void		mp1e_mp2_mmx_filterbank(int *, mmx_t *) __attribute__ ((regparm (2)));

static inline void
mmx_filter_mono(mp2_context *mp2, short *p, int *samples)
{
	int j;

	mp2->sf.fb_temp[0].d[0] = 0L;
	mp2->sf.fb_temp[0].d[1] = -1L;
	mp2->sf.fb_temp[1].d[0] = 32768L;
	mp2->sf.fb_temp[1].d[1] = 32768L;

	for (j = 0; j < 3 * SCALE_BLOCK; j++, p += 32, samples += 32) {
		mp1e_mp2_mmx_window_mono(p, mp2->sf.fb_temp);
		mp1e_mp2_mmx_filterbank(samples, mp2->sf.fb_temp);
	}

	emms();
}

static inline void
mmx_filter_stereo(mp2_context *mp2, short *p, int *samples)
{
	int j;

	mp2->sf.fb_temp[0].d[0] = 0L;
	mp2->sf.fb_temp[0].d[1] = -1L;
	mp2->sf.fb_temp[1].d[0] = 32768L;
	mp2->sf.fb_temp[1].d[1] = 32768L;

	for (j = 0; j < 3 * SCALE_BLOCK; j++, p += 32 * 2, samples += 32) {
		/*
		 *  Subband window code could be optimized,
		 *  I've just adapted the mono version.
		 */
		mp1e_mp2_mmx_window_left(p, mp2->sf.fb_temp);
		mp1e_mp2_mmx_filterbank(samples, mp2->sf.fb_temp);

		mp1e_mp2_mmx_window_right(p, mp2->sf.fb_temp);
		mp1e_mp2_mmx_filterbank(samples + 3 * SCALE_BLOCK * 32, mp2->sf.fb_temp);
	}

	emms();
}

--- NEW FILE ---
/*
 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

[...2736 lines suppressed...]
		printv(1, " passed\n");
}

void
mp1e_mp2_fft_init(int test)
{
	fftw_compute_twiddle();

	init_hann_table();

	if (test)
		fft_test();
}








--- NEW FILE ---
/*
 *  MPEG Real Time Encoder
 *  MPEG-1 Audio Layer II Subband Filter Bank
 *
 *  Copyright (C) 1999-2001 Michael H. Schimek
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/* $Id: filter.c,v 1.1 2001/12/04 23:55:53 mswitch Exp $ */

#include <stdio.h>
#include <stdlib.h>
#include "../common/math.h"
#include "audio.h"

/* Tables */

short mp1e_mp2_fb_window_coeff[512] __attribute__ ((aligned (CACHE_LINE)));
short mp1e_mp2_fb_filter_coeff[352] __attribute__ ((aligned (CACHE_LINE)));

static void
reference_filter(short *s, double *d)
{
	double z[512], y[64];
        int i, j;

    	for (i = 0; i < 512; i++)
		z[i] = ((double) s[511 - i]) / 32768.0 * C[i];

	for (i = 0; i < 64; i++)
		for (j = 0, y[i] = 0; j < 8; j++)
			y[i] += z[i + 64 * j];

	for (i = 0; i < 32; i++)
		for (j = 0, d[i] = 0; j < 64; j++)
			d[i] += cos((double)((2 * i + 1) * (16 - j)) * M_PI / 64.0) * y[j];
}

/*
 *  Compare output of the MMX and reference subband filter
 */
static void
filter_test(void)
{
	static const char *nmode[] = { "mono", "left", "right" };
	static const char *nsrc[] = { "random", "dc min", "dc max (overflow)", " sinus" };
	short *s, src[1024];
	int i, j, k, sb2[64], step;
	mmx_t temp[17];
	double sb1[64];
	double d, dmax[32], dsum[32];
	double err_avgs[4], err_maxs[4];
	const int repeat = 10000;
	int mode;

	for (k = 0; k < 4; k++)
		err_avgs[k] = err_maxs[k] = -1;

	for (mode = 0; mode < 3; mode++) {
		temp[0].uq = 0xFFFFFFFF00000000LL;
		temp[1]    = MMXRD(32768L);

		step = (mode == 0) ? 1 : 2;

		srand(0x76543210);

		for (k = 0; k < 4; k++) {
			double err_avg, err_max;

			printv(1, "\n");

			for (i = 0; i < 32; i++)
				dmax[i] = dsum[i] = 0.0;

			for (j = 0; j < repeat; j++) {
				printv(1, "\rAudio filterbank %s test, %s channel ... %u %%",
				       nsrc[k], nmode[mode], j * 100 / repeat);

				s = src + ((mode == 2) ? 1 : 0);
				for (i = 0; i < 512; s += step, i++)
					switch (k) {
					case 0:
						*s = rand();
						break;
					case 1:
						*s = 1;
						break;
					case 2:
						*s = -32768;
						break;
					case 3:
						*s = 32767 * sin(M_PI * ((double) i / j));
						break;
					}

				switch (mode) {
				case 0:
					mp1e_mp2_mmx_window_mono(src, temp);
					break;
				case 1:
					mp1e_mp2_mmx_window_left(src, temp);
					break;
				case 2:
					mp1e_mp2_mmx_window_right(src, temp);
					break;
				}
			
				mp1e_mp2_mmx_filterbank(sb2, temp);

				emms();

				if (mode == 1)
					for (i = 0; i < 512; i++)
						src[i] = src[i * 2];
				else if (mode == 2)
					for (i = 0; i < 512; i++)
						src[i] = src[i * 2 + 1];

				reference_filter(src, sb1);

				for (i = 0; i < 32; i++) {
					d = fabs(sb1[i] - ((double) sb2[i]) / (double)(1L << 30));
					if (d > dmax[0])
						dmax[0] = d;
					dsum[0] += d;
				}
			}

			err_avg = dsum[0] / (32.0 * repeat);
			err_max = dmax[0];

			printv(1, " - passed");
			printv(1, ", avg %22.12f^-1 max %22.12f^-1",
			       1.0 / err_avg, 1.0 / err_max);

			if (mode == 0) {
				err_avgs[k] = err_avg;
				err_maxs[k] = err_max;
			} else if (fabs(err_avgs[k] - err_avg) > 1e-12
				   || fabs(err_maxs[k] - err_max) > 1e-12) {
				printv(1, " - failed, mono != left != right\n");
				exit(EXIT_FAILURE);
			}

			if (err_avg > 0.00001 || err_max > 0.00005) {
				printv(1, " - failed, avg %12.2f^-1 max %12.2f^-1\n",
				       1.0 / err_avg, 1.0 / err_max);
				exit(EXIT_FAILURE);
			}

			printv(1, " - passed");
			printv(2, ", avg %12.2f^-1 max %12.2f^-1",
			       1.0 / err_avg, 1.0 / err_max);
		}
	}

	printv(1, "\n");
}

void
mp1e_mp2_subband_filter_init(int test)
{
	int i, j, k, l;

	for (k = 31; k >= 0; k -= 4)
	    for (i = 0; i < 4; i++)
		for (j = 0; j < 8; j += 2)
		    for (l = 0; l < 2; l++) {
			mp1e_mp2_fb_window_coeff[(31 - k) * 16 + i * 2 + j * 4 + l] =
			    lroundn((double)(1 << 19)
				* C[(16 + (k - i)) + 64 * (7 - j - l)]);
			mp1e_mp2_fb_window_coeff[32 + (31 - k) * 16 + i * 2 + j * 4 + l] =
			    lroundn((double)(1 << 19)
				* C[((16 - (k + 1 - (3 - i))) & 63)
				    + 64 * (7 - j - l)]
				* ((k - (3 - i) + 48) > 63 ? -1.0 : +1.0));
		    }

	mp1e_mp2_fb_filter_coeff[0] = 0;
	mp1e_mp2_fb_filter_coeff[1] = +lroundn(32768.0 * sqrt(2.0) / 2.0);
	mp1e_mp2_fb_filter_coeff[2] = 0;
	mp1e_mp2_fb_filter_coeff[3] = -lroundn(32768.0 * sqrt(2.0) / 2.0);

	for (k = 16, l = 4; k > 1; k >>= 1)
	    for (i = 0; i < 32 / k; i++) {
		for (j = 16 + k / 2; j < 48; j += k) {
		    mp1e_mp2_fb_filter_coeff[l++] = lroundn(
			cos((double)((2 * i + 1) * (16 - j)) * M_PI / 64.0) * 32768.0);
	    }
	}

	for (i = 8; i < 24; i += 8)
	    for (j = 2; j < 4; j++) {
		int temp = mp1e_mp2_fb_filter_coeff[i + j];

		mp1e_mp2_fb_filter_coeff[i + j] = mp1e_mp2_fb_filter_coeff[i + j + 2];
		mp1e_mp2_fb_filter_coeff[i + j + 2] = temp;
	    }

	if (test)
		filter_test();
}

--- NEW FILE ---
#
#  MPEG-1 Real Time Encoder
# 
#  Copyright (C) 1999-2000 Michael H. Schimek
# 
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License version 2 as
#  published by the Free Software Foundation.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
# 
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#

# $Id: filter_mmx.s,v 1.1 2001/12/04 23:55:53 mswitch Exp $

	.text
	.align		16
	.globl		mp1e_mp2_mmx_filterbank

mp1e_mp2_mmx_filterbank:

	pushl		%ebx;
	pushl		%ecx;				leal		64(%edx),%ebx;
	movq		(%ebx),%mm0;			movl		$mp1e_mp2_fb_filter_coeff,%ecx;
	movq		8(%ebx),%mm1;			movq		%mm0,%mm2;	// 07 06 05 04
	movq		16(%ebx),%mm6;			punpcklwd	%mm1,%mm0;	// 05 01 04 00
	movq		24(%ebx),%mm7;			punpckhwd	%mm1,%mm2;	// 07 03 06 02
	movq		%mm0,%mm4;			punpcklwd	%mm2,%mm4;	// 06 04 02 00
	movq		%mm6,%mm3;			punpckhwd	%mm2,%mm0;	// 07 05 03 01
	movq		40(%ebx),%mm1;			punpcklwd	%mm7,%mm6;
	movq		%mm0,32(%edx);			punpckhwd	%mm7,%mm3;
	movq		%mm6,%mm5;			punpcklwd	%mm3,%mm5;
	movq		32(%ebx),%mm0;			punpckhwd	%mm3,%mm6;
	movq		%mm0,%mm2;			punpcklwd	%mm1,%mm0;
	movq		%mm6,32+8(%edx);		punpckhwd	%mm1,%mm2;
	movq		%mm0,%mm6;			punpcklwd	%mm2,%mm6;
	movq		48(%ebx),%mm3;			punpckhwd	%mm2,%mm0;
	movq		56(%ebx),%mm1;			movq		%mm3,%mm2;
	movq		%mm0,32+16(%edx);		punpcklwd	%mm1,%mm3;
	movq		%mm3,%mm7;			punpckhwd	%mm1,%mm2;
	movq		%mm4,%mm0;			punpcklwd	%mm2,%mm7;
	punpckhwd	%mm2,%mm3;			punpcklwd	%mm5,%mm0;	// 10 02 08 00
	movq		%mm3,32+24(%edx);		punpckhwd	%mm5,%mm4;	// 14 06 12 04
	movq		%mm0,%mm5;			punpcklwd	%mm4,%mm0;	// 12 08 04 00
	movq		%mm6,%mm1;			punpckhwd	%mm4,%mm5;	// 14 10 06 02 =
	punpcklwd	%mm7,%mm1;			punpckhwd	%mm7,%mm6;	// 26 18 24 16; 30 22 28 20
	movq		%mm1,%mm7;			punpcklwd	%mm6,%mm1;	// 28 24 20 16
	movq		%mm5,64(%eax);			punpckhwd	%mm6,%mm7;	// 30 26 22 18 =
	movq		%mm0,%mm2;			punpcklwd	%mm1,%mm0;	// 20 04 16 00 =
	movq		%mm7,72(%eax);			punpckhwd	%mm1,%mm2;	// 28 12 24 08 =
	movq		%mm0,%mm6;			punpckldq	%mm0,%mm0;	// 16 00 16 00
	pmaddwd		(%ecx),%mm0;			punpckhwd	%mm2,%mm6;	// 28 20 12 04 =
	movq		%mm6,%mm4;			punpckldq	%mm2,%mm2;	// 28 20 12  4; 24 08 24 08
	pmaddwd		8(%ecx),%mm2;			punpckldq	%mm4,%mm4;	// 12  4 12  4
	paddd		16(%edx),%mm0;			punpckhdq	%mm6,%mm6;	// 28 20 28 20
	movq		24+16(%ecx),%mm3;		movq		%mm0,%mm1;
	paddd		%mm2,%mm0;			psubd		%mm2,%mm1;
	movq		16(%ecx),%mm5;			psrad		$3,%mm0;
	movq		8+16(%ecx),%mm7;		pmaddwd		%mm6,%mm3;
	movq		16+16(%ecx),%mm2;		pmaddwd		%mm4,%mm5;
	pmaddwd		%mm6,%mm7;			movq		%mm1,%mm6;
	pmaddwd		%mm4,%mm2;			psllq		$32,%mm6;
	movq		%mm0,%mm4;			psrlq		$32,%mm1;
	por		%mm6,%mm1;			paddd		%mm7,%mm5;	// 1 0
	paddd		%mm3,%mm2;			psrad		$3,%mm1;
	psrad		$3,%mm2;			psrad		$3,%mm5;
	paddd		%mm5,%mm0;			movq		%mm1,%mm3;
	movq		%mm0,(%eax);			paddd		%mm2,%mm1;
	movq		32+16(%ecx),%mm0;		psubd		%mm5,%mm4;	// 5 4
	movq		%mm1,8(%eax);			psubd		%mm2,%mm3;	// 7 6
	movq		%mm3,%mm6;			punpckldq	%mm4,%mm6;	// 4 6
	movq		16+32+16(%ecx),%mm2;		punpckhdq	%mm4,%mm3;	// 5 7
	movq		8+32+16(%ecx),%mm1;		movq		%mm3,%mm4;
	movq		64(%eax),%mm5;			punpckldq	%mm6,%mm3;	// 6 7
	movq		72(%eax),%mm7;			punpckhdq	%mm6,%mm4;	// 4 5
	movq		%mm3,16(%eax);			pmaddwd		%mm5,%mm0;	// 14 10  6  2
	movq		%mm4,24(%eax);			pmaddwd		%mm7,%mm1;	// 30 26 22 18
	movq		24+32+16(%ecx),%mm3;		pmaddwd		%mm5,%mm2;
	movq		0+80(%ecx),%mm4;		pmaddwd		%mm7,%mm3;	// 0 0
	movq		8+80(%ecx),%mm6;		pmaddwd		%mm5,%mm4;	// 14 10 06 02
	paddd		%mm1,%mm0;			pmaddwd		%mm7,%mm6;	// 30 26 22 18
	paddd		%mm3,%mm2;			movq		%mm0,%mm1;	// 1 1
	movq		(%eax),%mm3;			punpckldq	%mm2,%mm1;	// 1 0
	paddd		%mm4,%mm6;	    		punpckhdq	%mm2,%mm0;	// 1 0
	movq		16+80(%ecx),%mm2;		psrad		$3,%mm1;
	pmaddwd		%mm5,%mm2;			psrad		$3,%mm0;
	movq		%mm3,%mm4;			paddd		%mm0,%mm1;
	movq		24+80(%ecx),%mm0;		paddd		%mm1,%mm3;
	pmaddwd		%mm7,%mm0;			psubd		%mm1,%mm4;	// 0 0
	movq		%mm3,(%eax);			psrad		$3,%mm6;
	movq		24+112(%ecx),%mm3;		movq		%mm6,%mm1;	// 1 1
	pmaddwd		%mm7,%mm3;			paddd		%mm0,%mm2;	// 0 0
	movq		16+112(%ecx),%mm0;		psrad		$3,%mm2;
	pmaddwd		%mm5,%mm0;			punpckldq	%mm2,%mm1;	// 1 0
	punpckhdq	%mm2,%mm6;			leal		32+16(%ecx),%ecx; // 1 0
	movq		8(%eax),%mm2;			paddd		%mm6,%mm1;
	paddd		%mm3,%mm0;			movq		%mm2,%mm3;
	psubd		%mm1,%mm2;			paddd		%mm1,%mm3;
	movq		%mm2,%mm6;			punpckldq	%mm4,%mm2;	// 0 2
	movq		8+64(%ecx),%mm1;		punpckhdq	%mm4,%mm6;	// 1 3
	pmaddwd		%mm7,%mm1;			movq		%mm6,%mm4;
	movq		%mm3,8(%eax);			punpckldq	%mm2,%mm6;	// 0 1
	movq		64(%ecx),%mm3;			punpckhdq	%mm2,%mm4;	// 2 3
	movq		16(%eax),%mm2;			pmaddwd		%mm5,%mm3;
	movq		%mm4,56(%eax);			movq		%mm2,%mm4;
	movq		%mm6,48(%eax);			leal		30*4(%eax),%ebx;
	movq		8+96(%ecx),%mm6;		paddd		%mm1,%mm3;
	movq		%mm3,%mm1;			punpckhdq	%mm0,%mm3;
	pmaddwd		%mm7,%mm6;			punpckldq	%mm0,%mm1;
	movq		96(%ecx),%mm0;			psrad		$3,%mm1;
	pmaddwd		%mm5,%mm0;			psrad		$3,%mm3;
	pmaddwd		24+96(%ecx),%mm7;		paddd		%mm3,%mm1;
	pmaddwd		16+96(%ecx),%mm5;		paddd		%mm1,%mm2;
	paddd		%mm6,%mm0;			psubd		%mm1,%mm4;
	movq		%mm2,16(%eax);			psrad		$3,%mm0;
	paddd		%mm7,%mm5;			psrad		$3,%mm5;
	movq		24(%eax),%mm7;			movq		%mm0,%mm3;
	movq		%mm7,%mm6;			punpckhdq	%mm5,%mm0;
	leal		128(%ecx),%ecx;			punpckldq	%mm5,%mm3;
	movq		32+8(%edx),%mm1;		paddd		%mm0,%mm3;
	paddd		%mm3,%mm7;			psubd		%mm3,%mm6;
	movq		%mm6,%mm2;			punpckldq	%mm4,%mm6;	// 0 2
	movq		%mm7,24(%eax);			punpckhdq	%mm4,%mm2;	// 1 3
	movq		%mm2,%mm4;			punpckldq	%mm6,%mm2;	// 0 1
	movq		32(%edx),%mm0;			punpckhdq	%mm6,%mm4;	// 2 3
	movq		%mm2,32(%eax);			movq		%mm0,%mm2;
	movq		%mm4,40(%eax);			jmp		2f;

	.align		16
2:	
	pmaddwd		(%ecx),%mm0;			movq		%mm1,%mm3;
	pmaddwd		8(%ecx),%mm1;			movq		%mm3,%mm5;
	pmaddwd		32(%ecx),%mm2;			leal		16(%eax),%eax;
	pmaddwd		40(%ecx),%mm3;			leal		-16(%ebx),%ebx;
	movq		32+16(%edx),%mm4;		paddd		%mm1,%mm0;
	movq		32+24(%edx),%mm1;		movq		%mm4,%mm6;
	pmaddwd		16(%ecx),%mm4;			movq		%mm1,%mm7;
	pmaddwd		24(%ecx),%mm1;			paddd		%mm3,%mm2;
	pmaddwd		48(%ecx),%mm6;			psrad		$3,%mm0;
	pmaddwd		56(%ecx),%mm7;			psrad		$3,%mm2;
	paddd		%mm1,%mm4;			movq		%mm0,%mm3;
	movq		-16(%eax),%mm1;			psrad		$3,%mm4;
	paddd		%mm7,%mm6;			movq		%mm5,%mm7;
	pmaddwd		8+64(%ecx),%mm5;		psrad		$3,%mm6;
	pmaddwd		40+64(%ecx),%mm7;		punpckhdq	%mm2,%mm0;
	punpckldq	%mm2,%mm3;			movq		%mm4,%mm2;
	paddd		%mm0,%mm3;			punpckhdq	%mm6,%mm4;
	paddd		%mm4,%mm3;			punpckldq	%mm6,%mm2;
	movq		32+24(%edx),%mm4;		paddd		%mm2,%mm3;
	movq		32(%edx),%mm2;			movq		%mm1,%mm0;
	movq		%mm4,%mm6;			psubd		%mm3,%mm0;
	pmaddwd		24+64(%ecx),%mm4;		paddd		%mm3,%mm1;
	movq		%mm1,-16(%eax);			movq		%mm2,%mm1;
	pmaddwd		64(%ecx),%mm1;			cmpl		%ebx,%eax;
	pmaddwd		32+64(%ecx),%mm2;		leal		32*4(%ecx),%ecx;
	movd		%mm0,20(%ebx);			psrlq		$32,%mm0;
	movq		32+16(%edx),%mm3;		paddd		%mm5,%mm1;
	movd		%mm0,16(%ebx);			movq		%mm3,%mm0;
	pmaddwd		56-64(%ecx),%mm6;		paddd		%mm7,%mm2;
	pmaddwd		16-64(%ecx),%mm0;		psrad		$3,%mm1;
	pmaddwd		48-64(%ecx),%mm3;		psrad		$3,%mm2;
	movq		%mm1,%mm7;			punpckhdq	%mm2,%mm1;
	paddd		%mm4,%mm0;			punpckldq	%mm2,%mm7;
	movq		-8(%eax),%mm4;			paddd		%mm6,%mm3;
	movq		%mm0,%mm2;			punpckhdq	%mm3,%mm0;
	paddd		%mm1,%mm7;			punpckldq	%mm3,%mm2;
	psrad		$3,%mm2;			movq		%mm4,%mm3;
	psrad		$3,%mm0;			paddd		%mm2,%mm7;
	movq		32+8(%edx),%mm1;		paddd		%mm0,%mm7;
	paddd		%mm7,%mm4;			psubd		%mm7,%mm3;
	movq		32(%edx),%mm0;			movq		%mm3,%mm6;
	movq		%mm4,-8(%eax);			psrlq		$32,%mm6;
	movd		%mm3,12(%ebx);			movq		%mm0,%mm2;
	movd		%mm6,8(%ebx);			jle		2b
	popl		%ecx;
	popl		%ebx;
	ret

# void
# mp1e_mp2_mmx_window_mono(short *z [eax], mmx_t *temp [edx])
	
	.text
	.align		16
	.globl		mp1e_mp2_mmx_window_mono

mp1e_mp2_mmx_window_mono:

	pushl %edi;					leal 32(%eax),%eax;			// read z[0], z[1], ...
	pushl %esi;					movl $0,16(%edx);			// .so.ud[0] shift-out
	pushl %ecx;					leal 130(%edx),%edi;			// .y[31-2+4]
	pushl %ebx;					movl $mp1e_mp2_fb_window_coeff,%esi;
	movl $8,%ecx;					movl $28*2,%ebx;

	.align		16
1:
	movq		128*0(%eax),%mm0;		andl		$127,%ebx;	// a3 a2 a1 a0
	movq		128*1(%eax),%mm1;		movq		%mm0,%mm4;	// b3 b2 b1 b0;
	movq		128*2(%eax),%mm2;		punpcklwd	%mm1,%mm0;	// c3 c2 c1 c0; b1 a1 b0 a0
	movq		128*3(%eax),%mm3;		movq		%mm2,%mm5;	// d3 d2 d1 d0;
	pmaddwd		(%esi),%mm0;			punpcklwd	%mm3,%mm2;	// 1 0 ba; d1 c1 d0 c0
	pmaddwd		16(%esi),%mm2;			punpckhwd	%mm1,%mm4;	// 1 0 dc; b3 a3 b2 a2
	movq		128*4(%eax),%mm1;		punpckhwd	%mm3,%mm5;	// e3 e2 e1 e0; d3 c3 d2 c2
	movq		128*5(%eax),%mm3;		movq		%mm1,%mm6;	// f3 f2 f1 f0;
	pmaddwd		8(%esi),%mm4;			punpcklwd	%mm3,%mm1;	// 3 2 ba; f1 e1 f0 e0
	pmaddwd		24(%esi),%mm5;			punpckhwd	%mm3,%mm6;	// 3 2 dc; f3 e3 f2 e2
	movq		128*6(%eax),%mm3;		paddd		%mm2,%mm0;	// ; 1 0 dcba
	movq		128*7(%eax),%mm7;		movq		%mm3,%mm2;
	pmaddwd		32(%esi),%mm1;			punpcklwd	%mm7,%mm3;
	pmaddwd		40(%esi),%mm6;			punpckhwd	%mm7,%mm2;
	pmaddwd		48(%esi),%mm3;			paddd		%mm5,%mm4;	// ; 3 2 dcba
	pmaddwd		56(%esi),%mm2;			paddd		%mm1,%mm0;	// ; 1 0 fedcba
	movq		128*0-64(%eax,%ebx),%mm1;	paddd		%mm6,%mm4;	// A3 A2 A1 A0; 3 2 fedcba;
	movq		128*1-64(%eax,%ebx),%mm5;	paddd		%mm3,%mm0;	// B3 B2 B1 B0; 1 0 hgfedcba =
	movq		128*2-64(%eax,%ebx),%mm3;	movq		%mm1,%mm6;	// C3 C2 C1 C0;
	movq		128*3-64(%eax,%ebx),%mm7;	punpcklwd	%mm5,%mm1;	// D3 D2 D1 D0; B1 A1 B0 A0
	movq		%mm0,24(%edx);			movq		%mm3,%mm0;
	pmaddwd		64(%esi),%mm1;			punpcklwd	%mm7,%mm3;	// 1' 0' ba; D1 C1 D0 C0
	pmaddwd		64+16(%esi),%mm3;		punpckhwd	%mm5,%mm6;	// 1' 0' dc; B3 A3 B2 A2
	movq		128*4-64(%eax,%ebx),%mm5;	punpckhwd	%mm7,%mm0;	// E3 E2 E1 E0; D3 C3 D2 C2
	movq		128*5-64(%eax,%ebx),%mm7;	paddd		%mm2,%mm4;	// F3 F2 F1 F0; 3 2 hgfedcba =
	pmaddwd		64+8(%esi),%mm6;		paddd		%mm3,%mm1;	// 3' 2' ba; 1' 0' dcba 
	pmaddwd		64+24(%esi),%mm0;		movq		%mm5,%mm2;	// 3' 2' dc;
	movq		128*6-64(%eax,%ebx),%mm3;	punpcklwd	%mm7,%mm5;	// ; F1 E1 F0 E0
	pmaddwd		64+32(%esi),%mm5;   		punpckhwd	%mm7,%mm2;	// 1' 0' fe; F3 E3 F2 E2 
	movq		128*7-64(%eax,%ebx),%mm7;	paddd		%mm0,%mm6;	// ; 3' 2' dcba 
	pmaddwd		64+40(%esi),%mm2;		movq		%mm3,%mm0;	// 3' 2' fe;
	paddd		%mm5,%mm1;			punpcklwd	%mm7,%mm3;	// 1' 0' fedcba;  
	pmaddwd		64+48(%esi),%mm3;		punpckhwd	%mm7,%mm0;	// 1' 0' hg;
	pmaddwd		64+56(%esi),%mm0;		paddd		%mm2,%mm6;	// 3' 2' hg; 3' 2' fedcba
	movq		24(%edx),%mm5;			addl		$8,%eax;	// 1 0 hgfedcba =
	movq		16(%edx),%mm7;			paddd		%mm3,%mm1;	// so 0; 1' 0' hgfedcba =
	pxor		%mm5,%mm7;			paddd		%mm0,%mm6;	// c^B A; 3' 2' hgfedcba =
	pand		(%edx),%mm5;			subl		$8,%edi;	// B 0
	pxor		%mm5,%mm7;			pxor		%mm4,%mm5;	// so A; B^D C
	pand		(%edx),%mm4;			paddd		%mm7,%mm6;	// D 0; 31 32
	paddd		8(%edx),%mm6;			pxor		%mm4,%mm5;	// B C
	movq		%mm4,16(%edx);			paddd		%mm5,%mm1;	// so 0; 29 30
	paddd		8(%edx),%mm1;			psrad		$16,%mm6;
	psrad		$16,%mm1;			subl		$16,%ebx;
	packssdw	%mm6,%mm1;			decl		%ecx;
	leal		128(%esi),%esi;			punpckhdq	%mm4,%mm4;
	movq		%mm1,(%edi);			jnz		1b;
	psrad		$1,%mm4;			popl		%ebx;		// (#0 * (1.0 * exp2(15))) >> 16
	movq		%mm4,16(%edx);			popl		%ecx;
	popl		%esi;
	popl		%edi;
	ret

	.text
	.align		16
	.globl		mp1e_mp2_mmx_window_left

mp1e_mp2_mmx_window_left:

	pushl %edi;					leal 64(%eax),%eax;			// read z[0], z[2], ...
	pushl %esi;					movl $0,16(%edx);			// .so.ud[0] shift-out
	pushl %ecx;					leal 130(%edx),%edi;			// .y[31-2+4]
	pushl %ebx;					movl $mp1e_mp2_fb_window_coeff,%esi;
	movl $8,%ecx;					movl $28*4,%ebx;

	.align		16
1:
	movq		256*0(%eax),%mm0;		andl		$255,%ebx;	// xx a1 xx a0
	movq		256*1(%eax),%mm1;		movq		%mm0,%mm2;	// xx b1 xx b0
	movq		256*2(%eax),%mm3;		punpcklwd	%mm1,%mm0;	// xx c1 xx c0; xx xx b0 a0
	movq		256*3(%eax),%mm4;		punpckhwd	%mm1,%mm2;	// xx d1 xx d0; xx xx b1 a1
	movq		%mm3,%mm5;			punpckldq	%mm2,%mm0;	// b1 a1 b0 a0
	pmaddwd		(%esi),%mm0;			punpcklwd	%mm4,%mm3;	// 1 0 ba; xx xx d0 c0
	movq		256*4(%eax),%mm1;		punpckhwd	%mm4,%mm5;	// xx e1 xx e0; xx xx d1 c1
	movq		256*5(%eax),%mm2;		punpckldq	%mm5,%mm3;	// xx f1 xx f0; d1 c1 d0 c0
	movq		%mm1,%mm6;			punpcklwd	%mm2,%mm1;	// xx xx f2 e2
	pmaddwd		16(%esi),%mm3;			punpckhwd	%mm2,%mm6;	// 1 0 dc; xx xx f3 e3
	movq		256*6(%eax),%mm7;		punpckldq	%mm6,%mm1;	// xx g1 xx g0; f3 e3 f2 e2
	pmaddwd		32(%esi),%mm1;			movq		%mm7,%mm5;	// 1 0 fe
	movq		256*7(%eax),%mm6;		paddd		%mm3,%mm0;	// xx h1 xx h0
	movq		256*0+8(%eax),%mm4;		punpcklwd	%mm6,%mm7;	// xx a3 xx a2; xx xx h2 g2
	paddd		%mm1,%mm0;			punpckhwd	%mm6,%mm5;	// xx xx h3 g3
	movq		256*1+8(%eax),%mm1;		punpckldq	%mm5,%mm7;	// xx b3 xx b2; h3 g3 h2 g2
	pmaddwd		48(%esi),%mm7;			movq		%mm4,%mm6;	// 1 0 hg
	movq		256*2+8(%eax),%mm5;		punpcklwd	%mm1,%mm4;	// xx c3 xx c2; xx xx b2 a2
	movq		256*3+8(%eax),%mm3;		punpckhwd	%mm1,%mm6;	// xx d3 xx d2; xx xx b3 a3
	paddd		%mm7,%mm0;			punpckldq	%mm6,%mm4;	// b3 a3 b2 a2
	movq		%mm5,%mm7;			punpcklwd	%mm3,%mm5;	// xx xx d2 c2
	pmaddwd		8(%esi),%mm4;			punpckhwd	%mm3,%mm7;	// 3 2 ba; xx xx d3 c3
	movq		256*4+8(%eax),%mm2;		punpckldq	%mm7,%mm5;	// xx e3 xx e2; d3 c3 d2 c2
	movq		256*5+8(%eax),%mm1;		movq		%mm2,%mm6;	// xx f3 xx f2
	pmaddwd		24(%esi),%mm5;			punpcklwd	%mm1,%mm2;	// 3 2 dc; xx xx f2 e2
	movq		256*6+8(%eax),%mm7;		punpckhwd	%mm1,%mm6;	// xx g3 xx g2; xx xx f3 e3
	movq		256*7+8(%eax),%mm3;		punpckldq	%mm6,%mm2;	// xx h3 xx h2; f3 e3 f2 e2
	pmaddwd		40(%esi),%mm2;			paddd		%mm5,%mm4;	// 3 2 fe
	movq		256*0-128(%eax,%ebx),%mm1;	movq		%mm7,%mm5;	// xx A1 xx A0
	movq		256*1-128(%eax,%ebx),%mm6;	punpcklwd	%mm3,%mm5;	// xx B1 xx B0; xx xx h2 g2
	paddd		%mm2,%mm4;			punpckhwd	%mm3,%mm7;	// xx xx h3 g3
	movq		%mm1,%mm2;			punpckldq	%mm7,%mm5;	// h3 g3 h2 g2
	pmaddwd		56(%esi),%mm5;			punpcklwd	%mm6,%mm1;	// 3 2 hg; xx xx B0 A0
	movq		256*2-128(%eax,%ebx),%mm3;	punpckhwd	%mm6,%mm2;	// xx C1 xx C0; xx xx B1 A1
	movq		256*3-128(%eax,%ebx),%mm7;	punpckldq	%mm2,%mm1;	// xx D1 xx D0; B1 A1 B0 A0
	pmaddwd		64+0(%esi),%mm1;		paddd		%mm5,%mm4;	// 1 0 BA; mm4 = 3 2 hgfedcba
	movq		%mm3,%mm6;			punpcklwd	%mm7,%mm3;	// xx xx D0 C0
	movq		256*4-128(%eax,%ebx),%mm2;	punpckhwd	%mm7,%mm6;	// xx E1 xx E0; xx xx D1 C1
	movq		256*5-128(%eax,%ebx),%mm5;	punpckldq	%mm6,%mm3;	// xx F1 xx F0; D1 C1 D0 C0
	movq		%mm0,24(%edx);			movq		%mm2,%mm6;	
	pmaddwd		64+16(%esi),%mm3;		punpcklwd	%mm5,%mm2;	// 1 0 DC; xx xx F0 E0
	movq		256*6-128(%eax,%ebx),%mm7;	punpckhwd	%mm5,%mm6;	// xx xx F1 E1
	movq		256*7-128(%eax,%ebx),%mm5;	punpckldq	%mm6,%mm2;	// xx E1 xx E0; F1 E1 F0 E0
	pmaddwd		64+32(%esi),%mm2;		movq		%mm7,%mm6;	// xx F1 xx F0
	paddd		%mm3,%mm1;			punpcklwd	%mm5,%mm7;	// 1 0 FE; xx xx F0 E0
	movq		256*0-128+8(%eax,%ebx),%mm0;	punpckhwd	%mm5,%mm6;	// xx A1 xx A0; xx xx F1 E1
	paddd		%mm2,%mm1;			punpckldq	%mm6,%mm7;	// F1 E1 F0 E0
	movq		256*1-128+8(%eax,%ebx),%mm3;	movq		%mm0,%mm6;	// xx B1 xx B0
	pmaddwd		64+48(%esi),%mm7;		punpcklwd	%mm3,%mm0;	// 1 0 FE; xx xx B0 A0
	movq		256*2-128+8(%eax,%ebx),%mm2;	punpckhwd	%mm3,%mm6;	// xx C1 xx C0; xx xx B1 A1
	movq		256*3-128+8(%eax,%ebx),%mm5;	punpckldq	%mm6,%mm0;	// xx D1 xx D0; B1 A1 B0 A0
	pmaddwd		64+8(%esi),%mm0;		paddd		%mm7,%mm1;	// 1 0 BA; mm1 = 1' 0' hgfedcba
	movq		%mm2,%mm7;			punpcklwd	%mm5,%mm2;	// xx xx D0 C0
	movq		256*4-128+8(%eax,%ebx),%mm3;	punpckhwd	%mm5,%mm7;	// xx E1 xx E0; xx xx D1 C1
	movq		256*5-128+8(%eax,%ebx),%mm5;	punpckldq	%mm7,%mm2;	// xx F1 xx F0; D1 C1 D0 C0
	movq		%mm3,%mm7;			punpcklwd	%mm5,%mm3;	// xx xx F0 E0
	pmaddwd		64+24(%esi),%mm2;		punpckhwd	%mm5,%mm7;	// 1 0 DC; xx xx F1 E1
	movq		256*6-128+8(%eax,%ebx),%mm6;	punpckldq	%mm7,%mm3;	// xx E1 xx E0; F1 E1 F0 E0
	movq		256*7-128+8(%eax,%ebx),%mm5;	movq		%mm6,%mm7;	// xx F1 xx F0
	pmaddwd		64+40(%esi),%mm3;		punpcklwd	%mm5,%mm6;	// 1 0 FE; xx xx F0 E0
	paddd		%mm2,%mm0;			punpckhwd	%mm5,%mm7;	// xx xx F1 E1
	movq		24(%edx),%mm5;			punpckldq	%mm7,%mm6;	// F1 E1 F0 E0; mm5 = 1 0 hgfedcba
	pmaddwd		64+56(%esi),%mm6;		paddd		%mm3,%mm0;	// 1 0 FE
	movq		16(%edx),%mm7;			addl		$16,%eax;	// so
	pxor		%mm5,%mm7;			subl		$8,%edi;
	pand		(%edx),%mm5;			paddd		%mm6,%mm0;	// B 0; mm0 = 3' 2' hgfedcba
	pxor		%mm5,%mm7;			pxor		%mm4,%mm5;	// so A; B^D C
	pand		(%edx),%mm4;			paddd		%mm7,%mm0;	// D 0; 31 32
	paddd		8(%edx),%mm0;			pxor		%mm4,%mm5;	// B C
	movq		%mm4,16(%edx);			paddd		%mm5,%mm1;	// so 0; 29 30
	paddd		8(%edx),%mm1;			psrad		$16,%mm0;
	psrad		$16,%mm1;			subl		$32,%ebx;
	packssdw	%mm0,%mm1;			decl		%ecx;
	leal		128(%esi),%esi;			punpckhdq	%mm4,%mm4;
	movq		%mm1,(%edi);			jnz		1b;
	psrad		$1,%mm4;			popl		%ebx;		// (#0 * (1.0 * exp2(15))) >> 16
	movq		%mm4,16(%edx);			popl		%ecx;
	popl		%esi;
	popl		%edi;
	ret

	.text
	.align		16
	.globl		mp1e_mp2_mmx_window_right

mp1e_mp2_mmx_window_right:

	pushl %edi;					leal 64(%eax),%eax;			// read z[1], z[3], ...
	pushl %esi;					movl $0,16(%edx);			// .so.ud[0] shift-out
	pushl %ecx;					leal 130(%edx),%edi;			// .y[31-2+4]
	pushl %ebx;					movl $mp1e_mp2_fb_window_coeff,%esi;
	movl $8,%ecx;					movl $28*4,%ebx;

	.align		16
1:
	movq		256*0(%eax),%mm0;		andl		$255,%ebx;	// a1 xx a0 xx
	movq		256*1(%eax),%mm1;		movq		%mm0,%mm2;	// b1 xx b0 xx
	movq		256*2(%eax),%mm3;		punpcklwd	%mm1,%mm0;	// c1 xx c0 xx; b0 a0 xx xx
	movq		256*3(%eax),%mm4;		punpckhwd	%mm1,%mm2;	// d1 xx d0 xx; b1 a1 xx xx
	movq		%mm3,%mm5;			punpckhdq	%mm2,%mm0;	// b1 a1 b0 a0
	pmaddwd		(%esi),%mm0;			punpcklwd	%mm4,%mm3;	// 1 0 ba; d0 c0 xx xx
	movq		256*4(%eax),%mm1;		punpckhwd	%mm4,%mm5;	// e1 xx e0 xx; d1 c1 xx xx
	movq		256*5(%eax),%mm2;		punpckhdq	%mm5,%mm3;	// f1 xx f0 xx; d1 c1 d0 c0
	movq		%mm1,%mm6;			punpcklwd	%mm2,%mm1;	// f2 e2 xx xx
	pmaddwd		16(%esi),%mm3;			punpckhwd	%mm2,%mm6;	// 1 0 dc; f3 e3 xx xx
	movq		256*6(%eax),%mm7;		punpckhdq	%mm6,%mm1;	// g1 xx g0 xx; f3 e3 f2 e2
	pmaddwd		32(%esi),%mm1;			movq		%mm7,%mm5;	// 1 0 fe
	movq		256*7(%eax),%mm6;		paddd		%mm3,%mm0;	// h1 xx h0 xx
	movq		256*0+8(%eax),%mm4;		punpcklwd	%mm6,%mm7;	// a3 xx a2 xx; h2 g2 xx xx
	paddd		%mm1,%mm0;			punpckhwd	%mm6,%mm5;	// h3 g3 xx xx
	movq		256*1+8(%eax),%mm1;		punpckhdq	%mm5,%mm7;	// b3 xx b2 xx; h3 g3 h2 g2
	pmaddwd		48(%esi),%mm7;			movq		%mm4,%mm6;	// 1 0 hg
	movq		256*2+8(%eax),%mm5;		punpcklwd	%mm1,%mm4;	// c3 xx c2 xx; b2 a2 xx xx
	movq		256*3+8(%eax),%mm3;		punpckhwd	%mm1,%mm6;	// d3 xx d2 xx; b3 a3 xx xx
	paddd		%mm7,%mm0;			punpckhdq	%mm6,%mm4;	// b3 a3 b2 a2
	movq		%mm5,%mm7;			punpcklwd	%mm3,%mm5;	// d2 c2 xx xx
	pmaddwd		8(%esi),%mm4;			punpckhwd	%mm3,%mm7;	// 3 2 ba; d3 c3 xx xx
	movq		256*4+8(%eax),%mm2;		punpckhdq	%mm7,%mm5;	// e3 xx e2 xx; d3 c3 d2 c2
	movq		256*5+8(%eax),%mm1;		movq		%mm2,%mm6;	// f3 xx f2 xx
	pmaddwd		24(%esi),%mm5;			punpcklwd	%mm1,%mm2;	// 3 2 dc; f2 e2 xx xx
	movq		256*6+8(%eax),%mm7;		punpckhwd	%mm1,%mm6;	// g3 xx g2 xx; f3 e3 xx xx
	movq		256*7+8(%eax),%mm3;		punpckhdq	%mm6,%mm2;	// h3 xx h2 xx; f3 e3 f2 e2
	pmaddwd		40(%esi),%mm2;			paddd		%mm5,%mm4;	// 3 2 fe
	movq		256*0-128(%eax,%ebx),%mm1;	movq		%mm7,%mm5;	// xx A1 xx A0
	movq		256*1-128(%eax,%ebx),%mm6;	punpcklwd	%mm3,%mm5;	// xx B1 xx B0; xx xx h2 g2
	paddd		%mm2,%mm4;			punpckhwd	%mm3,%mm7;	// xx xx h3 g3
	movq		%mm1,%mm2;			punpckhdq	%mm7,%mm5;	// h3 g3 h2 g2
	pmaddwd		56(%esi),%mm5;			punpcklwd	%mm6,%mm1;	// 3 2 hg; xx xx B0 A0
	movq		256*2-128(%eax,%ebx),%mm3;	punpckhwd	%mm6,%mm2;	// xx C1 xx C0; xx xx B1 A1
	movq		256*3-128(%eax,%ebx),%mm7;	punpckhdq	%mm2,%mm1;	// xx D1 xx D0; B1 A1 B0 A0
	pmaddwd		64+0(%esi),%mm1;		paddd		%mm5,%mm4;	// 1 0 BA; mm4 = 3 2 hgfedcba
	movq		%mm3,%mm6;			punpcklwd	%mm7,%mm3;	// xx xx D0 C0
	movq		256*4-128(%eax,%ebx),%mm2;	punpckhwd	%mm7,%mm6;	// xx E1 xx E0; xx xx D1 C1
	movq		256*5-128(%eax,%ebx),%mm5;	punpckhdq	%mm6,%mm3;	// xx F1 xx F0; D1 C1 D0 C0
	movq		%mm0,24(%edx);			movq		%mm2,%mm6;	
	pmaddwd		64+16(%esi),%mm3;		punpcklwd	%mm5,%mm2;	// 1 0 DC; xx xx F0 E0
	movq		256*6-128(%eax,%ebx),%mm7;	punpckhwd	%mm5,%mm6;	// xx xx F1 E1
	movq		256*7-128(%eax,%ebx),%mm5;	punpckhdq	%mm6,%mm2;	// xx E1 xx E0; F1 E1 F0 E0
	pmaddwd		64+32(%esi),%mm2;		movq		%mm7,%mm6;	// xx F1 xx F0
	paddd		%mm3,%mm1;			punpcklwd	%mm5,%mm7;	// 1 0 FE; xx xx F0 E0
	movq		256*0-128+8(%eax,%ebx),%mm0;	punpckhwd	%mm5,%mm6;	// xx A1 xx A0; xx xx F1 E1
	paddd		%mm2,%mm1;			punpckhdq	%mm6,%mm7;	// F1 E1 F0 E0
	movq		256*1-128+8(%eax,%ebx),%mm3;	movq		%mm0,%mm6;	// xx B1 xx B0
	pmaddwd		64+48(%esi),%mm7;		punpcklwd	%mm3,%mm0;	// 1 0 FE; xx xx B0 A0
	movq		256*2-128+8(%eax,%ebx),%mm2;	punpckhwd	%mm3,%mm6;	// xx C1 xx C0; xx xx B1 A1
	movq		256*3-128+8(%eax,%ebx),%mm5;	punpckhdq	%mm6,%mm0;	// xx D1 xx D0; B1 A1 B0 A0
	pmaddwd		64+8(%esi),%mm0;		paddd		%mm7,%mm1;	// 1 0 BA; mm1 = 1' 0' hgfedcba
	movq		%mm2,%mm7;			punpcklwd	%mm5,%mm2;	// xx xx D0 C0
	movq		256*4-128+8(%eax,%ebx),%mm3;	punpckhwd	%mm5,%mm7;	// xx E1 xx E0; xx xx D1 C1
	movq		256*5-128+8(%eax,%ebx),%mm5;	punpckhdq	%mm7,%mm2;	// xx F1 xx F0; D1 C1 D0 C0
	movq		%mm3,%mm7;			punpcklwd	%mm5,%mm3;	// xx xx F0 E0
	pmaddwd		64+24(%esi),%mm2;		punpckhwd	%mm5,%mm7;	// 1 0 DC; xx xx F1 E1
	movq		256*6-128+8(%eax,%ebx),%mm6;	punpckhdq	%mm7,%mm3;	// xx E1 xx E0; F1 E1 F0 E0
	movq		256*7-128+8(%eax,%ebx),%mm5;	movq		%mm6,%mm7;	// xx F1 xx F0
	pmaddwd		64+40(%esi),%mm3;		punpcklwd	%mm5,%mm6;	// 1 0 FE; xx xx F0 E0
	paddd		%mm2,%mm0;			punpckhwd	%mm5,%mm7;	// xx xx F1 E1
	movq		24(%edx),%mm5;			punpckhdq	%mm7,%mm6;	// F1 E1 F0 E0; mm5 = 1 0 hgfedcba
	pmaddwd		64+56(%esi),%mm6;		paddd		%mm3,%mm0;	// 1 0 FE
	movq		16(%edx),%mm7;			addl		$16,%eax;	// so
	pxor		%mm5,%mm7;			subl		$8,%edi;
	pand		(%edx),%mm5;			paddd		%mm6,%mm0;	// B 0; mm0 = 3' 2' hgfedcba
	pxor		%mm5,%mm7;			pxor		%mm4,%mm5;	// so A; B^D C
	pand		(%edx),%mm4;			paddd		%mm7,%mm0;	// D 0; 31 32
	paddd		8(%edx),%mm0;			pxor		%mm4,%mm5;	// B C
	movq		%mm4,16(%edx);			paddd		%mm5,%mm1;	// so 0; 29 30
	paddd		8(%edx),%mm1;			psrad		$16,%mm0;
	psrad		$16,%mm1;			subl		$32,%ebx;
	packssdw	%mm0,%mm1;			decl		%ecx;
	leal		128(%esi),%esi;			punpckhdq	%mm4,%mm4;
	movq		%mm1,(%edi);			jnz		1b;
	psrad		$1,%mm4;			popl		%ebx;		// (#0 * (1.0 * exp2(15))) >> 16
	movq		%mm4,16(%edx);			popl		%ecx;
	popl		%esi;
	popl		%edi;
	ret

--- NEW FILE ---
/*
 *  MPEG-1 Real Time Encoder
 *
 *  Copyright (C) 1999-2000 Michael H. Schimek
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/* $Id: libaudio.h,v 1.1 2001/12/04 23:55:53 mswitch Exp $ */

#include "../common/fifo.h"
#include "../systems/libsystems.h"

#include "../rtepriv.h"

// XXX remove
struct pcm_context {
	fifo		fifo;
	producer	producer;

	rte_sndfmt      format;
	int		sampling_rate;
	bool		stereo;
};

/* mp2.c */

extern rte_codec_class	mp1e_mpeg1_layer2_codec;
extern rte_codec_class	mp1e_mpeg2_layer2_codec;

extern void		mp1e_mp2_module_init(int test);

/* preliminary */
extern void		mp1e_mp2_init(rte_codec *, unsigned int,
				      fifo *cap_fifo, multiplexer *);
//extern void *		mp1e_mp2_thread(void *foo);

/* historic */
extern void		audio_parameters(int *sampling_freq, int *bit_rate);
//extern long long	audio_frame_count;
//extern long long	audio_frames_dropped;

/* oss.c (mp1e) */

extern void		mix_init(void);
extern char *		mix_sources(void);

/* misc (mp1e) */

extern fifo *		open_pcm_oss(char *dev_name, int sampling_rate, bool stereo);
extern fifo *		open_pcm_alsa(char *dev_name, int sampling_rate, bool stereo);
extern fifo *		open_pcm_esd(char *, int sampling_rate, bool stereo);
extern fifo *		open_pcm_afl(char *name, int, bool);

--- NEW FILE ---
/*
 *  MPEG Real Time Encoder
 *  MPEG-1 Audio Layer II
 *  MPEG-2 Audio Layer II Low Frequency Extensions
 *
 *  Copyright (C) 1999-2001 Michael H. Schimek
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
[...1280 lines suppressed...]
	.option_set	= option_set,
	.option_print	= option_print,

	.parameters	= parameters,

	.mainloop	= mainloop,
};

void
mp1e_mp2_module_init(int test)
{
	assert(sizeof(mpeg1_options) == sizeof(mpeg2_options));
	memcpy(mpeg2_options, mpeg1_options, sizeof(mpeg2_options));

	mpeg2_options[0].menu.num = (int *) &bit_rate_value[MPEG_VERSION_2][1];
	mpeg2_options[1].menu.num = (int *) &sampling_freq_value[MPEG_VERSION_2][0];

	mp1e_mp2_subband_filter_init(test);
	mp1e_mp2_fft_init(test);
}

--- NEW FILE ---
/*
 *  MPEG Real Time Encoder
 *  MPEG-1/2 Audio Layer II Definitions
 *
 *  Copyright (C) 1999-2000 Michael H. Schimek
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/* $Id: mpeg.h,v 1.1 2001/12/04 23:55:53 mswitch Exp $ */

#define MPEG_VERSION_1           	3 /* ISO/IEC 11172-3 */
#define MPEG_VERSION_2			2 /* ISO/IEC 13818-3 */
#define MPEG_VERSION_2_5		0 /* Fraunhofer extension, not supported */
#define MPEG_VERSIONS			4

#define LAYER_II			2

#define AUDIO_MODE_STEREO		0
#define AUDIO_MODE_JOINT_STEREO		1
#define AUDIO_MODE_DUAL_CHANNEL		2
#define AUDIO_MODE_MONO			3

#define TABLES				5
#define SBLIMIT				32
#define MAX_BA_INDICES			16
#define NUM_SG				8

#define GRANULE				96
#define SCALE_BLOCK			12
#define BITS_PER_SLOT			8
#define SAMPLES_PER_FRAME		1152
#define HEADER_BITS			32

struct absthr_rec {
	int			line;	/* fft higher line */
	float			thr;	/* absolute threshold (dB) */
};

extern const int		bit_rate_value[MPEG_VERSIONS][16];
extern const int		sampling_freq_value[MPEG_VERSIONS][4];
extern const unsigned char	subband_group[TABLES][SBLIMIT];
extern const unsigned char	bits_table[NUM_SG][MAX_BA_INDICES];
extern const unsigned int	steps_table[NUM_SG][MAX_BA_INDICES];
extern const unsigned char	quant_table[NUM_SG][MAX_BA_INDICES];
extern const unsigned char	pack_table[NUM_SG];

extern const float		SNR[18];
extern const double		C[512];
extern const struct absthr_rec	absthr[6][134];

--- NEW FILE ---
/*
 *  MPEG Real Time Encoder
 *  MPEG-1 Audio Layer II Psychoacoustic Analysis Model 2
 *
 *  Copyright (C) 1999-2000 Michael H. Schimek
 *  Copyright (C) 1996 ISO MPEG Audio Subgroup Software Simulation Group
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/* $Id: psycho.c,v 1.1 2001/12/04 23:55:53 mswitch Exp $ */

#include "../common/log.h"
#include "../common/mmx.h"
#include "../common/math.h"
#include "../common/profile.h"
#include "audio.h"

/* Tables */

static float		static_snr[SBLIMIT] __attribute__ ((aligned (CACHE_LINE)));

/* Initialization */

static void
create_absthres(mp2_context *mp2, int sampling_freq)
{
	int table, i, higher, lower = 0;

	for (table = 0; table < 6; table++)
		if (absthr[table][0].thr == sampling_freq)
			break;

	ASSERT("create threshold table for sampling rate %d Hz",
		table < 6, sampling_freq);

	printv(2, "Psychoacoustic threshold table #%d\n", table);

	for (i = 1; i <= absthr[table][0].line; i++)
		for (higher = absthr[table][i].line; lower < higher; lower++)
			mp2->absthres[lower] = pow(10.0, (absthr[table][i].thr + 41.837375) * 0.1);

	while (lower < HBLKSIZE)
		mp2->absthres[lower++] = pow(10.0, (96.0 + 41.837375) * 0.1);

	// DUMP(mp2->absthres, 0, HBLKSIZE);
}

void
mp1e_mp2_psycho_init(mp2_context *mp2, int sampling_freq)
{
	static const float crit_band[27] = {
		0, 100, 200, 300, 400, 510, 630, 770,
        	920, 1080, 1270, 1480, 1720, 2000, 2320, 2700,
        	3150, 3700, 4400, 5300, 6400, 7700, 9500, 12000,
        	15500, 25000, 30000
	};
	static const float bmax[27] = {
		20.0, 20.0, 20.0, 20.0, 20.0, 17.0, 15.0,
    	        10.0,  7.0,  4.4,  4.5,  4.5,  4.5,  4.5,
        	 4.5,  4.5,  4.5,  4.5,  4.5,  4.5,  4.5,
        	 4.5,  4.5,  4.5,  3.5,  3.5,  3.5
	};
	double bval[CBANDS], temp[HBLKSIZE];
	double temp1, temp2, freq_mult, bval_lo;
	FLOAT *s_pp = mp2->s_packed, s[CBANDS][CBANDS];
	int numlines[CBANDS];
	int i, j, k, b;

	mp2->e_save_old    = mp2->e_save[0][1];
	mp2->e_save_oldest = mp2->e_save[0][0];
	mp2->h_save_new    = mp2->h_save[0][2];
	mp2->h_save_old    = mp2->h_save[0][1];
	mp2->h_save_oldest = mp2->h_save[0][0];

	create_absthres(mp2, sampling_freq);

	/* Compute fft frequency multiplicand */

	freq_mult = (double) sampling_freq / BLKSIZE;

	/* Calculate fft frequency, then bark value of each line */
	
	for (i = 0; i < HBLKSIZE; i++) { /* 513 */
		temp1 = i * freq_mult;
		for (j = 1; temp1 > crit_band[j]; j++);
		temp[i] = j - 1 + (temp1 - crit_band[j - 1]) / (crit_band[j] - crit_band[j - 1]);
	}

	mp2->partition[0] = 0;
	temp2 = 1; /* counter of the # of frequency lines in each partition */
	bval[0] = temp[0];
	bval_lo = temp[0];

	for (i = 1; i < HBLKSIZE; i++) {
		if ((temp[i] - bval_lo) > 0.33) {
			mp2->partition[i] = mp2->partition[i - 1] + 1;
			bval[(int) mp2->partition[i - 1]] = bval[(int) mp2->partition[i - 1]] / temp2;
			bval[(int) mp2->partition[i]] = temp[i];
    			bval_lo = temp[i];
    			numlines[(int) mp2->partition[i - 1]] = temp2;
			temp2 = 1;
    		} else {
    			mp2->partition[i] = mp2->partition[i - 1];
	    		bval[(int) mp2->partition[i]] += temp[i];
			temp2++;
		}
	}

	numlines[(int) mp2->partition[i - 1]] = temp2;
	bval[(int) mp2->partition[i - 1]] = bval[(int) mp2->partition[i - 1]] / temp2;

	/*
	 *  Compute the spreading function, s[j][i], the value of the
	 *  spreading function, centered at band j, for band i
	 */
	for (j = 0; j < CBANDS; j++) {
		for (i = 0; i < CBANDS; i++) {
			double temp1, temp2, temp3;

			temp1 = (bval[i] - bval[j]) * 1.05;

			if (temp1 >= 0.5 && temp1 <= 2.5) {
				temp2 = temp1 - 0.5;
				temp2 = 8.0 * (temp2 * temp2 - 2.0 * temp2);
			} else
				temp2 = 0;

			temp1 += 0.474;
			temp3 = 15.811389 + 7.5 * temp1
				- 17.5 * sqrt((double)(1.0 + temp1 * temp1));

			if (temp3 <= -100.0)
				s[i][j] = 0.0;
			else
				s[i][j] = pow(10.0, (temp2 + temp3) / 10.0);
		}
	}

	for (b = 0; b < CBANDS; b++) {
		double NMT, TMN, minval, bc, rnorm;

		/* Noise Masking Tone value (in dB) for all partitions */
		NMT = 5.5;

		/* Tone Masking Noise value (in dB) for this partition */
		TMN = MAX(15.5 + bval[b], 24.5);

		/* SNR lower limit */
		minval = bmax[(int)(bval[b] + 0.5)];

		bc = (TMN - NMT) * 0.1;

		if (b < 20) {
			mp2->p1[b] = pow(10.0, -(minval - NMT) / (TMN - NMT)) / 2.0;
			mp2->p2[b] = pow(MIN(0.5, mp2->p1[b]) * 2.0, bc);
		} else {
			mp2->p4[b - 20] = bc;
			mp2->p3[b - 20] = pow(10.0, (minval - NMT) / bc);
			mp2->p2[b] = pow(mp2->p3[b - 20] * 2.0, bc);
			mp2->p1[b] = pow(0.05 * 2.0, bc);
		}

		/* Calculate normalization factor for the net spreading function */

		for (i = 0, rnorm = 0.0; i < CBANDS; i++)
			rnorm += s[b][i];

		mp2->xnorm[b] = pow(10.0, -NMT * 0.1) / (rnorm * numlines[b]);

		/* Pack spreading function */

	        for (k = 0; s[b][k] == 0.0; k++);
			mp2->s_limits[b].off = k;
	        for (j = 0; s[b][k + j] != 0.0 && (k + j) < CBANDS; j++)
		        *s_pp++ = s[b][k + j];
	        mp2->s_limits[b].cnt = j;
	}

	for (i = 0; i < 3; i++)
		for (j = 0; j < HBLKSIZE; j++) {
			mp2->h_save[0][i][j] = 1.0; /* force oldest, old phi = 0.0 */
			mp2->e_save[0][i & 1][j] = 1.0; /* should be 0.0, see below */
		}

	for (j = 0; j < SBLIMIT; j++)
		static_snr[j] = pow(1.005, 1024 + j * j - 64 * j);

	// DUMP(static_snr, 0, SBLIMIT);
}

/*
 *  Psychoacoustic analysis
 *
 *  This code is equivalent to the psychoacoustic model 2 in the
 *  MPEG-2 Audio Simulation Software Distribution 10, just
 *  a bit :) optimized for speed.
 *
 *  Doesn't return SNR in dB but linear values (pow(10.0, SNR * 0.1)),
 *  the mnr_incr calculation and bit allocation has been modified accordingly.
 */
void
mp1e_mp2_psycho(mp2_context *mp2, short *buffer, float *snr, int step)
{
	int i, j;

	if (!mp2->psycho_loops) {
		memcpy(snr, static_snr, sizeof(static_snr));
		return;
	}

	for (i = 0; i < mp2->psycho_loops;) {
		/*
		 *  Filterbank 3 * 12 * 32 samples: 0..1151 (look ahead 480 samples)
		 *  Pass #1: 0..1023
		 *  Pass #2: 576..1599 (sic)
	         */

		// DUMP(buffer, 0, 16);

		pr_start(30, "FFT & Hann window");

		if (step == 1)
			mp1e_mp2_fft_step_1(buffer, mp2->h_save_new);
		else
			mp1e_mp2_fft_step_2(buffer, mp2->h_save_new);

		pr_end(30);


		pr_start(31, "Psy/1");

		/*
		 *  Calculate the grouped, energy-weighted unpredictability measure,
		 *  grouped[].c, and the grouped energy, grouped[].e
		 *
		 *  Optimize here
                 */

		memset(mp2->grouped, 0, sizeof(mp2->grouped));

		{
			double energy, e_sqrt, r_prime, temp3;
			double r0 = mp2->h_save_new[0];
		        double r2 = mp2->h_save_oldest[0];

			mp2->sum_energy[0] = mp2->grouped[0].e = energy = r0 * r0;

			r_prime = 2.0 * mp2->e_save_old[0] - mp2->e_save_oldest[0];
			mp2->e_save_new[0] = e_sqrt = sqrt(energy);

			if ((temp3 = e_sqrt + fabs(r_prime)) != 0.0) {
				if (r2 < 0.0)
					r_prime = -r_prime;	
				mp2->grouped[0].c = energy * fabs(r0 - r_prime) / temp3;
			}
		}

		for (j = 1; j < HBLKSIZE - 1; j++) {
	    		int pt = mp2->partition[j];
			int j4 = j >> 4;
			double energy, e_sqrt, r_prime, temp3;
			double i0 = mp2->h_save_new[BLKSIZE - j];
			double i1 = mp2->h_save_old[BLKSIZE - j];
			double i2 = mp2->h_save_oldest[BLKSIZE - j];
			double r0 = mp2->h_save_new[j];
			double r1 = mp2->h_save_old[j];
			double r2 = mp2->h_save_oldest[j];

			energy = r0 * r0 + i0 * i0;
			mp2->grouped[pt].e += energy;
			if (j & 15)
				mp2->sum_energy[j4] += energy;
			else {
				mp2->sum_energy[j4 - 1] += energy;
				mp2->sum_energy[j4] = energy;
			}

			r_prime = 2.0 * mp2->e_save_old[j] - mp2->e_save_oldest[j];
			e_sqrt = sqrt(energy);

			temp3 = e_sqrt + fabs(r_prime);

			if (temp3 == 0.0)
				; /* mp2->grouped[pt].c += energy * 0.0; */
			else {
				double c1, s1, c2, s2, ll;
/* original code
				double phi = atan2(i0, r0);
				double phi_prime = 2.0 * atan2(i1, r1) - atan2(i2, r2);

				c1 = e_sqrt * cos(phi) - r_prime * cos(phi_prime);
				s1 = e_sqrt * sin(phi) - r_prime * sin(phi_prime);

				mp2->grouped[pt].c += energy * sqrt(c1 * c1 + s1 * s1) / temp3;
*/
				c2 = r1 * r1;
				s2 = i1 * i1;

				c1 = r1 * i1 * 2.0;
				s1 = c2 - s2;
				ll = c2 + s2;

				c2 = s1 * r2 + c1 * i2;
				s2 = c1 * r2 - s1 * i2;

				r_prime /= ll * mp2->e_save_oldest[j];
				/*
				 *  This should be r_prime /= ll * sqrt(r2 * r2 + i2 * i2),
				 *  initializing e_save to all zeroes. By initializing to all
				 *  ones we can omit one sqrt(). The +1 error in temp3
				 *  (frame #1 only) is negligible.
				 */

				c1 = r0 - r_prime * c2;
				s1 = i0 - r_prime * s2;

				mp2->grouped[pt].c += energy * sqrt(c1 * c1 + s1 * s1) / temp3;
			}

			mp2->e_save_new[j] = e_sqrt;
		}

#if 0 // a shortcut we don't take

		for (j = 206; j < HBLKSIZE - 1; j++) {
	    		int pt = mp2->partition[j], j4 = j >> 4;
			double i0 = mp2->h_save_new[BLKSIZE - j];
			double r0 = mp2->h_save_new[j];

			mp2->grouped[pt].e += energy = r0 * r0 + i0 * i0;
			mp2->grouped[pt].c += energy * 0.3;
			if (j & 15)
				mp2->sum_energy[j4] += energy;
			else {
				mp2->sum_energy[j4 - 1] += energy;
				mp2->sum_energy[j4] = energy;
			}
		}
#endif
		{
			double energy, e_sqrt, r_prime, temp3;
		        double r0 = mp2->h_save_new[HBLKSIZE - 1];
			double r2 = mp2->h_save_oldest[HBLKSIZE - 1];

			mp2->grouped[CBANDS - 1].e += energy = r0 * r0;
			mp2->sum_energy[SBLIMIT - 1] += energy;

			r_prime = 2.0 * mp2->e_save_old[HBLKSIZE - 1] - mp2->e_save_oldest[HBLKSIZE - 1];
			mp2->e_save_new[HBLKSIZE - 1] = e_sqrt = sqrt(energy);

			if ((temp3 = e_sqrt + fabs(r_prime)) != 0.0) {
				if (r2 < 0.0)
					r_prime = -r_prime;
				mp2->grouped[CBANDS - 1].c += energy * fabs(r0 - r_prime) / temp3;
			}
		}

		// DUMP(mp2->grouped_e, 0, CBANDS);    
		// DUMP(mp2->grouped_c, 0, CBANDS);

		pr_end(31);
		pr_start(32, "Psy/2");

		{
			FLOAT *s_pp = mp2->s_packed;

			for (j = 0; j < 20; j++) {
				double ecb = 0.0, cb = 0.0;
				int k, cnt = mp2->s_limits[j].cnt, off = mp2->s_limits[j].off;

				/*
				 *  Convolve the grouped energy-weighted unpredictability measure
				 *  and the grouped energy with the spreading function, s[j][k]
				 */
				for (k = 0; k < cnt; k++) {
				    	double ss = *s_pp++;
					ecb += ss * mp2->grouped[off + k].e;
    				    	cb += ss * mp2->grouped[off + k].c;
				}

				if (ecb == 0.0)
					mp2->nb[j] = 0.0;
				else {
					cb /= ecb;

					/*
					 *  Calculate the required SNR for each of the
					 *  frequency partitions
					 */
					ecb *= mp2->xnorm[j];

			    		if (cb < 0.05)
						/* pow(0.05 * 2.0, 1.9); */
						mp2->nb[j] = ecb * 0.0125892541179416766333743;
					else if (cb > 0.5 || cb > mp2->p1[j])
						mp2->nb[j] = ecb * mp2->p2[j];
					else
						mp2->nb[j] = ecb * pow(cb * 2.0, 1.9);
				}
			}

			for (; j < CBANDS; j++) { /* 63 */
				double ecb = 0.0, cb = 0.0;
				int k, cnt = mp2->s_limits[j].cnt, off = mp2->s_limits[j].off;

				for (k = 0; k < cnt; k++) {
					double ss = *s_pp++;
					ecb += ss * mp2->grouped[off + k].e;
        				cb += ss * mp2->grouped[off + k].c;
				}

				if (ecb == 0.0)
					mp2->nb[j] = 0.0;
				else {
					cb /= ecb;
					ecb *= mp2->xnorm[j];

					if (cb < 0.05)
						mp2->nb[j] = ecb * mp2->p1[j];
					else if (cb > 0.5)
						/* not exactly, but the error is negligible */
						mp2->nb[j] = ecb; 
					else if (cb > mp2->p3[j - 20])
						mp2->nb[j] = ecb * pow(cb * 2.0, mp2->p4[j - 20]);
					else
						mp2->nb[j] = ecb * mp2->p2[j];
				}
			}

			// DUMP(mp2->nb, 0, CBANDS);
		}

		// DUMP(mp2->sum_energy, 0, SBLIMIT);

		pr_end(32);
		pr_start(33, "Psy/3");

		/*
		 *  Calculate the permissible noise energy level in each of the frequency
		 *  partitions and translate the 512 threshold values to the 32 filter
		 *  bands of the coder
		 */

		if (i == 0) {
			for (j = 0; j < 13; j++) {
				double minthres = 60802371420160.0;
				int k;

				for (k = 0; k < 17; k++) {
					double fthr = MAX(mp2->absthres[j * 16 + k],
							  mp2->nb[(int) mp2->partition[j * 16 + k]]);

					if (minthres > fthr)
						minthres = fthr;
				}

				snr[j] = mp2->sum_energy[j] / (minthres * 17.0);
			}

			for (j = 13; j < mp2->sblimit; j ++) {
    				double minthres = 0.0;
				int k;

				for (k = 0; k < 17; k++)
					minthres += MAX(mp2->absthres[j * 16 + k],
							mp2->nb[(int) mp2->partition[j * 16 + k]]);

				snr[j] = mp2->sum_energy[j] / minthres;
			}
		} else {
			for (j = 0; j < 13; j++) {
				double t, minthres = 60802371420160.0;
				int k;

				for (k = 0; k < 17; k++) {
					double fthr = MAX(mp2->absthres[j * 16 + k],
							  mp2->nb[(int) mp2->partition[j * 16 + k]]);

					if (minthres > fthr)
						minthres = fthr;
				}

				t = mp2->sum_energy[j] / (minthres * 17.0);
				if (t > snr[j]) snr[j] = t;
			}

			for (j = 13; j < mp2->sblimit; j++) {
    				double t, minthres = 0.0;
				int k;

				for (k = 0; k < 17; k++)
					minthres += MAX(mp2->absthres[j * 16 + k],
							mp2->nb[(int) mp2->partition[j * 16 + k]]);

				t = mp2->sum_energy[j] / minthres;
				if (t > snr[j]) snr[j] = t;
			}
		}

		// DUMP(snr, 0, SBLIMIT);

		pr_end(33);

		buffer += 576;

		{
			FLOAT *t;

			t = mp2->e_save_oldest;
			mp2->e_save_oldest = mp2->e_save_old;
			mp2->e_save_old = t;

			t = mp2->h_save_oldest;
			mp2->h_save_oldest = mp2->h_save_old;
			mp2->h_save_old = mp2->h_save_new;
			mp2->h_save_new = t;
		}

		/************************************************************
		 **                                                        **
		 **                     Add one to i                       **
		 **							   **
		 ************************************************************/

					 i = i + 1;
	}

	if (step == 2) {
		if (mp2->ch) {
			mp2->h_save_oldest -= sizeof(mp2->h_save[0]) / sizeof(FLOAT);
			mp2->h_save_old    -= sizeof(mp2->h_save[0]) / sizeof(FLOAT);
			mp2->h_save_new    -= sizeof(mp2->h_save[0]) / sizeof(FLOAT);
			mp2->e_save_oldest -= sizeof(mp2->e_save[0]) / sizeof(FLOAT);
			mp2->e_save_old    -= sizeof(mp2->e_save[0]) / sizeof(FLOAT);
		} else {
			mp2->h_save_oldest += sizeof(mp2->h_save[0]) / sizeof(FLOAT);
			mp2->h_save_old    += sizeof(mp2->h_save[0]) / sizeof(FLOAT);
			mp2->h_save_new    += sizeof(mp2->h_save[0]) / sizeof(FLOAT);
			mp2->e_save_oldest += sizeof(mp2->e_save[0]) / sizeof(FLOAT);
			mp2->e_save_old    += sizeof(mp2->e_save[0]) / sizeof(FLOAT);
		}

		mp2->ch ^= 1;
	}
}

--- NEW FILE ---
/*
 *  MPEG Real Time Encoder
 *  MPEG-1 Audio Layer II Tables
 *
 *  Copyright (C) 1999-2000 Michael H. Schimek
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
[...1056 lines suppressed...]
	{ 344, 4.91 },
	{ 352, 5.31 },
	{ 360, 5.73 },
	{ 368, 6.18 },
	{ 376, 6.67 },
	{ 384, 7.19 },
	{ 392, 7.74 },
	{ 400, 8.33 },
	{ 408, 8.96 },
	{ 416, 9.63 },
	{ 424, 10.33 },
	{ 432, 11.08 },
	{ 440, 11.87 },
	{ 448, 12.71 },
	{ 456, 13.59 },
	{ 464, 14.53 },
	{ 472, 15.51 },
	{ 480, 16.54 }
    }
};




More information about the MPlayer-cvslog mailing list