[FFmpeg-devel] [PATCH] mips: Implementation of AC3 fixed point decoder and optimization for MIPS.
Babic, Nedeljko
nbabic at mips.com
Fri Aug 17 14:13:53 CEST 2012
Hello,
Has anyone had a chance to review this patch?
Thanks,
Nedeljko
________________________________________
From: Nedeljko Babic [nbabic at mips.com]
Sent: Thursday, August 09, 2012 15:50
To: ffmpeg-devel at ffmpeg.org
Cc: Lukac, Zeljko; Babic, Nedeljko
Subject: [PATCH] mips: Implementation of AC3 fixed point decoder and optimization for MIPS.
AC3 fixed point decoder is implemented in C and appropriate functions
are optimized for MIPS architecture. Some of DSP, format convert
utils and FFT fixed point functions are optimized.
Signed-off-by: Nedeljko Babic <nbabic at mips.com>
---
doc/mips.txt | 6 +
libavcodec/allcodecs.c | 3 +
libavcodec/dsputil.c | 24 +
libavcodec/dsputil.h | 4 +
libavcodec/fft.c | 1 +
libavcodec/fft.h | 12 +
libavcodec/fmtconvert.c | 79 ++
libavcodec/fmtconvert.h | 57 +-
libavcodec/kbdwin.c | 32 +
libavcodec/kbdwin.h | 6 +-
libavcodec/mips/Makefile | 4 +
libavcodec/mips/ac3dec_fixed.c | 1660 +++++++++++++++++++++++++++++++
libavcodec/mips/ac3dec_fixed.h | 234 +++++
libavcodec/mips/dsputil_mips_fixed.c | 153 +++
libavcodec/mips/fft_mips_fixed.c | 906 +++++++++++++++++
libavcodec/mips/fft_table_fixed.h | 105 ++
libavcodec/mips/fmtconvert_mips_fixed.c | 226 +++++
libavutil/common.h | 12 +
18 files changed, 3520 insertions(+), 4 deletions(-)
create mode 100644 libavcodec/mips/ac3dec_fixed.c
create mode 100644 libavcodec/mips/ac3dec_fixed.h
create mode 100644 libavcodec/mips/dsputil_mips_fixed.c
create mode 100644 libavcodec/mips/fft_mips_fixed.c
create mode 100644 libavcodec/mips/fft_table_fixed.h
create mode 100644 libavcodec/mips/fmtconvert_mips_fixed.c
diff --git a/doc/mips.txt b/doc/mips.txt
index 6fa6fb4..5b2e710 100644
--- a/doc/mips.txt
+++ b/doc/mips.txt
@@ -47,6 +47,8 @@ Files that have MIPS copyright notice in them:
* libavutil/mips/
libm_mips.h
* libavcodec/mips/
+ ac3dec_fixed.c
+ ac3dec_fixed.h
acelp_filters_mips.c
acelp_vectors_mips.c
amrwbdec_mips.c
@@ -57,9 +59,13 @@ Files that have MIPS copyright notice in them:
compute_antialias_float.h
lsp_mips.h
dsputil_mips.c
+ dsputil_mips_fixed.c
fft_mips.c
+ fft_mips_fixed.c
fft_table.h
+ fft_table_fixed.h
fft_init_table.c
fmtconvert_mips.c
+ fmtconvert_mips_fixed.c
mpegaudiodsp_mips_fixed.c
mpegaudiodsp_mips_float.c
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 8305bc2..319286f 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -273,6 +273,9 @@ void avcodec_register_all(void)
REGISTER_DECODER (AAC_LATM, aac_latm);
REGISTER_ENCDEC (AC3, ac3);
REGISTER_ENCODER (AC3_FIXED, ac3_fixed);
+#if (ARCH_MIPS)
+ REGISTER_DECODER (AC3_FIXED, ac3_fixed);
+#endif /* ARCH_MIPS */
REGISTER_ENCDEC (ALAC, alac);
REGISTER_DECODER (ALS, als);
REGISTER_DECODER (AMRNB, amrnb);
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index a1d69c4..8f9aa1c 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2515,6 +2515,26 @@ static void vector_fmul_window_c(float *dst, const float *src0,
}
}
+#if (ARCH_MIPS)
+static void vector_fmul_window_fixed_c(int *dst, const int16_t *src0,
+ const int16_t *src1, const int16_t *win, int len)
+{
+ int i,j;
+ dst += len;
+ win += len;
+ src0+= len;
+
+ for (i=-len, j=len-1; i<0; i++, j--) {
+ int s0 = src0[i];
+ int s1 = src1[j];
+ int wi = win[i];
+ int wj = win[j];
+ dst[i] = (s0*wj - s1*wi + 0x4000) >> 15;
+ dst[j] = (s0*wi + s1*wj + 0x4000) >> 15;
+ }
+}
+#endif /* ARCH_MIPS */
+
static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
int len)
{
@@ -3042,6 +3062,9 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->vector_fmul_reverse = vector_fmul_reverse_c;
c->vector_fmul_add = vector_fmul_add_c;
c->vector_fmul_window = vector_fmul_window_c;
+#if (ARCH_MIPS)
+ c->vector_fmul_window_fixed = vector_fmul_window_fixed_c;
+#endif
c->vector_clipf = vector_clipf_c;
c->scalarproduct_int16 = scalarproduct_int16_c;
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
@@ -3177,6 +3200,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
if (HAVE_MIPSFPU) ff_dsputil_init_mips (c, avctx);
+ if (HAVE_MIPSDSPR2) ff_dsputil_init_mips_fixed(c);
for (i = 0; i < 4; i++) {
for (j = 0; j < 16; j++) {
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 18dd316..d437844 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -398,6 +398,9 @@ typedef struct DSPContext {
void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
+#if (ARCH_MIPS)
+ void (*vector_fmul_window_fixed)(int *dst, const int16_t *src0, const int16_t *src1, const int16_t *win, int len);
+#endif /* ARCH_MIPS */
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
/**
@@ -624,6 +627,7 @@ void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_mips(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_mips_fixed(DSPContext* c);
void ff_dsputil_init_dwt(DSPContext *c);
void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
diff --git a/libavcodec/fft.c b/libavcodec/fft.c
index 39c8972..a57b62e 100644
--- a/libavcodec/fft.c
+++ b/libavcodec/fft.c
@@ -167,6 +167,7 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
#else
if (CONFIG_MDCT) s->mdct_calcw = ff_mdct_calcw_c;
if (ARCH_ARM) ff_fft_fixed_init_arm(s);
+ if (ARCH_MIPS) ff_fft_fixed_init_mips(s);
#endif
for(j=4; j<=nbits; j++) {
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 15e5a12..deabbab 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -80,6 +80,10 @@ struct FFTContext {
void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+#if (ARCH_MIPS)
+ void (*fft_fixed_calc)(struct FFTContext *s, FFTComplex *z);
+ void (*imdct_fixed_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+#endif /* ARCH_MIPS */
void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input);
int fft_permutation;
@@ -140,6 +144,9 @@ void ff_fft_init_arm(FFTContext *s);
void ff_fft_init_mips(FFTContext *s);
#else
void ff_fft_fixed_init_arm(FFTContext *s);
+#if (ARCH_MIPS)
+void ff_fft_fixed_init_mips(FFTContext *s);
+#endif
#endif
void ff_fft_end(FFTContext *s);
@@ -147,6 +154,11 @@ void ff_fft_end(FFTContext *s);
#define ff_mdct_init FFT_NAME(ff_mdct_init)
#define ff_mdct_end FFT_NAME(ff_mdct_end)
+#if (ARCH_MIPS)
+int ff_mdct_fixed_init_hardcoded_128(FFTContext *s, int nbits, int inverse, int scale);
+int ff_mdct_fixed_init_hardcoded(FFTContext *s, int nbits, int inverse, int scale);
+int ff_mdct_fixed_init(FFTContext *s, int nbits, int inverse, int scale);
+#endif /* ARCH_MIPS */
int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
void ff_mdct_end(FFTContext *s);
diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index e47c205..b7b0345 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -28,7 +28,18 @@ static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul,
for(i=0; i<len; i++)
dst[i] = src[i] * mul;
}
+#if (ARCH_MIPS)
+static void int32_to_fixed_fmul_scalar_c(int16_t *dst, const int *src, int mul, int len) {
+ int i;
+ for(i=0; i<len; i++)
+ dst[i] = (src[i] * mul + 0x8000) >> 16;
+}
+static av_always_inline int fixed_to_int16_one(const int *src)
+{
+ return av_clip_int16_c_fixed(*src);
+}
+#endif /* ARCH_MIPS */
static av_always_inline int float_to_int16_one(const float *src){
return av_clip_int16(lrintf(*src));
}
@@ -56,6 +67,37 @@ static void float_to_int16_interleave_c(int16_t *dst, const float **src,
}
}
+#if (ARCH_MIPS)
+static void fixed_to_int16_interleave_c(int16_t *dst, const int **src,
+ long len, int channels)
+{
+ int i,j,c;
+ if(channels==2) {
+ for(i=0; i<len; i++) {
+ dst[2*i] = fixed_to_int16_one(src[0]+i);
+ dst[2*i+1] = fixed_to_int16_one(src[1]+i);
+ }
+ }
+ else {
+ if(channels==6) {
+ for(i=0; i<len; i++) {
+ dst[6*i] = fixed_to_int16_one(src[0]+i);
+ dst[6*i+1] = fixed_to_int16_one(src[1]+i);
+ dst[6*i+2] = fixed_to_int16_one(src[2]+i);
+ dst[6*i+3] = fixed_to_int16_one(src[3]+i);
+ dst[6*i+4] = fixed_to_int16_one(src[4]+i);
+ dst[6*i+5] = fixed_to_int16_one(src[5]+i);
+ }
+ }
+ else {
+ for(c=0; c<channels; c++)
+ for(i=0, j=c; i<len; i++, j+=channels)
+ dst[j] = fixed_to_int16_one(src[c]+i);
+ }
+ }
+}
+#endif /* ARCH_MIPS */
+
void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
int channels)
{
@@ -75,9 +117,45 @@ void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
}
}
+#if (ARCH_MIPS)
+void ff_fixed_interleave_c(int *dst, const int **src, unsigned int len,
+ int channels)
+{
+ int j, c;
+ unsigned int i;
+ if (channels == 6) {
+ for (i = 0; i < len; i++) {
+ dst[6*i] = src[0][i];
+ dst[6*i+1] = src[1][i];
+ dst[6*i+2] = src[2][i];
+ dst[6*i+3] = src[3][i];
+ dst[6*i+4] = src[4][i];
+ dst[6*i+5] = src[5][i];
+ }
+ }
+ else if (channels == 2) {
+ for (i = 0; i < len; i++) {
+ dst[2*i] = src[0][i];
+ dst[2*i+1] = src[1][i];
+ }
+ } else if (channels == 1 && len < INT_MAX / sizeof(int)) {
+ memcpy(dst, src[0], len * sizeof(int));
+ } else {
+ for (c = 0; c < channels; c++)
+ for (i = 0, j = c; i < len; i++, j += channels)
+ dst[j] = src[c][i];
+ }
+}
+#endif /* ARCH_MIPS */
+
av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
{
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
+#if (ARCH_MIPS)
+ c->int32_to_fixed_fmul_scalar = int32_to_fixed_fmul_scalar_c;
+ c->fixed_to_int16_interleave = fixed_to_int16_interleave_c;
+ c->fixed_interleave = ff_fixed_interleave_c;
+#endif /* ARCH_MIPS */
c->float_to_int16 = float_to_int16_c;
c->float_to_int16_interleave = float_to_int16_interleave_c;
c->float_interleave = ff_float_interleave_c;
@@ -86,6 +164,7 @@ av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
if (HAVE_ALTIVEC) ff_fmt_convert_init_altivec(c, avctx);
if (HAVE_MMX) ff_fmt_convert_init_x86(c, avctx);
if (HAVE_MIPSFPU) ff_fmt_convert_init_mips(c);
+ if (HAVE_MIPSDSPR1) ff_fmt_convert_init_mips_fixed(c, avctx);
}
/* ffdshow custom code */
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index ab2caa2..49e7992 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -36,7 +36,55 @@ typedef struct FmtConvertContext {
* constraints: multiple of 8
*/
void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
-
+#if (ARCH_MIPS)
+ /**
+ * Multiply a array of int32_t by a int32_t value and convert to int16_t.
+ * @param dst destination array of int16_t.
+ * constraints: 16-byte aligned
+ * @param src source array of int32_t.
+ * constraints: 16-byte aligned
+ * @param len number of elements in array.
+ * constraints: multiple of 8
+ */
+ void (*int32_to_fixed_fmul_scalar)(int16_t *dst, const int *src, int mul, int len);
+ /**
+ * Convert an array of int32_t to an array of int16_t.
+ *
+ * @param dst destination array of int16_t.
+ * constraints: 16-byte aligned
+ * @param src source array of int32_t.
+ * constraints: 16-byte aligned
+ * @param len number of elements to convert.
+ * constraints: multiple of 8
+ */
+ void (*fixed_to_int16)(int16_t *dst, const int *src, long len);
+ /**
+ * Convert multiple arrays of int32_t to an interleaved array of int16_t.
+ *
+ * @param dst destination array of interleaved int16_t.
+ * constraints: 16-byte aligned
+ * @param src source array of int32_t arrays, one for each channel.
+ * constraints: 16-byte aligned
+ * @param len number of elements to convert.
+ * constraints: multiple of 8
+ * @param channels number of channels
+ */
+ void (*fixed_to_int16_interleave)(int16_t *dst, const int **src,
+ long len, int channels);
+ /**
+ * Convert multiple arrays of int32_t to an array of interleaved int32_t.
+ *
+ * @param dst destination array of interleaved int32_t.
+ * constraints: 16-byte aligned
+ * @param src source array of int32_t arrays, one for each channel.
+ * constraints: 16-byte aligned
+ * @param len number of elements to convert.
+ * constraints: multiple of 8
+ * @param channels number of channels
+ */
+ void (*fixed_interleave)(int *dst, const int **src, unsigned int len,
+ int channels);
+#endif /* ARCH_MIPS */
/**
* Convert an array of float to an array of int16_t.
*
@@ -86,7 +134,12 @@ typedef struct FmtConvertContext {
void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
int channels);
-
+#if (ARCH_MIPS)
+void ff_fixed_interleave_c(int *dst, const int **src, unsigned int len,
+ int channels);
+void fixed_interleave(int *dst, const int **src, unsigned int len, int channels);
+void ff_fmt_convert_init_mips_fixed(FmtConvertContext *c, AVCodecContext *avctx);
+#endif /* ARCH_MIPS */
av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx);
void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx);
diff --git a/libavcodec/kbdwin.c b/libavcodec/kbdwin.c
index 2722312..4f76b20 100644
--- a/libavcodec/kbdwin.c
+++ b/libavcodec/kbdwin.c
@@ -46,3 +46,35 @@ av_cold void ff_kbd_window_init(float *window, float alpha, int n)
for (i = 0; i < n; i++)
window[i] = sqrt(local_window[i] / sum);
}
+
+#if (ARCH_MIPS)
+av_cold void ff_kbd_fixed_window_init(int16_t *window, float alpha, int n)
+{
+ int i, j;
+ double sum = 0.0, bessel, tmp;
+ double local_window[FF_KBD_WINDOW_MAX];
+ double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
+
+ assert(n <= FF_KBD_WINDOW_MAX);
+
+ for (i = 0; i < n; i++) {
+ tmp = i * (n - i) * alpha2;
+ bessel = 1.0;
+ for (j = BESSEL_I0_ITER; j > 0; j--)
+ bessel = bessel * tmp / (j * j) + 1;
+ sum += bessel;
+ local_window[i] = sum;
+ }
+
+ sum++;
+ for (i = 0; i < n; i++)
+ {
+ int tmp;
+
+ tmp = (int)(32767*sqrt(local_window[i] / sum) + 0.5);
+ if (tmp > 32767)
+ tmp = 32767;
+ window[i] = (int16_t)tmp;
+ }
+}
+#endif
diff --git a/libavcodec/kbdwin.h b/libavcodec/kbdwin.h
index 4b93975..66621a2 100644
--- a/libavcodec/kbdwin.h
+++ b/libavcodec/kbdwin.h
@@ -18,7 +18,7 @@
#ifndef AVCODEC_KBDWIN_H
#define AVCODEC_KBDWIN_H
-
+#include "config.h"
/**
* Maximum window size for ff_kbd_window_init.
*/
@@ -31,5 +31,7 @@
* @param n size of half window, max FF_KBD_WINDOW_MAX
*/
void ff_kbd_window_init(float *window, float alpha, int n);
-
+#if (ARCH_MIPS)
+void ff_kbd_fixed_window_init(int16_t *window, float alpha, int n);
+#endif
#endif /* AVCODEC_KBDWIN_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index ff46768..4830039 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -17,3 +17,7 @@ OBJS-$(CONFIG_FFT) += mips/fft_init_table.o
MIPSFPU-OBJS-$(CONFIG_FFT) += mips/fft_mips.o
MIPSFPU-OBJS-$(HAVE_INLINE_ASM) += mips/fmtconvert_mips.o
MIPSFPU-OBJS-$(HAVE_INLINE_ASM) += mips/dsputil_mips.o
+MIPSDSPR1-OBJS-$(HAVE_INLINE_ASM) += mips/fmtconvert_mips_fixed.o
+MIPSDSPR2-OBJS-$(HAVE_INLINE_ASM) += mips/dsputil_mips_fixed.o
+OBJS-$(CONFIG_FFT) += mips/fft_mips_fixed.o
+OBJS-$(CONFIG_AC3_FIXED_DECODER) += mips/ac3dec_fixed.o
diff --git a/libavcodec/mips/ac3dec_fixed.c b/libavcodec/mips/ac3dec_fixed.c
new file mode 100644
index 0000000..50a30dd
--- /dev/null
+++ b/libavcodec/mips/ac3dec_fixed.c
@@ -0,0 +1,1660 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Stanislav Ocovaj (socovaj at mips.com)
+ *
+ * AC3 fixed-point decoder for MIPS platforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FFT_FLOAT 0
+
+#include <stdio.h>
+#include <stddef.h>
+#include <math.h>
+#include <string.h>
+
+#include "libavutil/crc.h"
+#include "libavcodec/internal.h"
+#include "libavcodec/aac_ac3_parser.h"
+#include "libavcodec/ac3_parser.h"
+#include "ac3dec_fixed.h"
+#include "libavcodec/ac3dec_data.h"
+#include "libavcodec/ac3dsp.h"
+#include "libavcodec/kbdwin.h"
+
+#define INT2FIXED(x) (((x) << 15) * ( x < -32767 ? -1 : 1))
+#define MULT_FINT(x, y) (((long long)(x) * (y) ) >> 16 )
+#define ADD_FINT(x, y) ( (x) + (y) )
+#define SUB_FINT(a, b) ( (a) - (b) )
+#define SUB_INT_WITH_FINT_AND_CONVERT_TO_FINT(x, y) ( INT2FIXED(x) - (y) )
+#define DIV_INT_WITH_FINT_AND_CONVERT_TO_FINT(x, y) \
+ ( (((long long)(x) << 30) * ( x < -32767 ? -1 : 1) ) / (y) )
+#define MULT_INT_WITH_FINT_AND_CONVERT_TO_FINT(x, y) \
+ (((long long)INT2FIXED(x) * (y) ) >> 15 )
+
+/**
+ * table for ungrouping 3 values in 7 bits.
+ * used for exponents and bap=2 mantissas
+ */
+static uint8_t ungroup_3_in_7_bits_tab[128][3];
+
+
+/** tables for ungrouping mantissas */
+static int b1_mantissas[32][3];
+static int b2_mantissas[128][3];
+static int b3_mantissas[8];
+static int b4_mantissas[128][2];
+static int b5_mantissas[16];
+
+/**
+ * Quantization table: levels for symmetric. bits for asymmetric.
+ * reference: Table 7.18 Mapping of bap to Quantizer
+ */
+static const uint8_t quantization_tab[16] = {
+ 0, 3, 5, 7, 11, 15,
+ 5, 6, 7, 8, 9, 10, 11, 12, 14, 16
+};
+
+/** Adjustments in dB gain */
+static const int gain_levels_fixed[9] = {
+ LEVEL_FIXED_PLUS_3DB,
+ LEVEL_FIXED_PLUS_1POINT5DB,
+ LEVEL_FIXED_ONE,
+ LEVEL_FIXED_MINUS_1POINT5DB,
+ LEVEL_FIXED_MINUS_3DB,
+ LEVEL_FIXED_MINUS_4POINT5DB,
+ LEVEL_FIXED_MINUS_6DB,
+ LEVEL_FIXED_ZERO,
+ LEVEL_FIXED_MINUS_9DB
+};
+
+/**
+ * Table for center mix levels
+ * reference: Section 5.4.2.4 cmixlev
+ */
+static const uint8_t center_levels[4] = { 4, 5, 6, 5 };
+
+/**
+ * Table for surround mix levels
+ * reference: Section 5.4.2.5 surmixlev
+ */
+static const uint8_t surround_levels[4] = { 4, 6, 7, 6 };
+
+/**
+ * Table for default stereo downmixing coefficients
+ * reference: Section 7.8.2 Downmixing Into Two Channels
+ */
+static const uint8_t ac3_default_coeffs[8][5][2] = {
+ { { 2, 7 }, { 7, 2 }, },
+ { { 4, 4 }, },
+ { { 2, 7 }, { 7, 2 }, },
+ { { 2, 7 }, { 5, 5 }, { 7, 2 }, },
+ { { 2, 7 }, { 7, 2 }, { 6, 6 }, },
+ { { 2, 7 }, { 5, 5 }, { 7, 2 }, { 8, 8 }, },
+ { { 2, 7 }, { 7, 2 }, { 6, 7 }, { 7, 6 }, },
+ { { 2, 7 }, { 5, 5 }, { 7, 2 }, { 6, 7 }, { 7, 6 }, },
+};
+
+/**
+ * Symmetrical Dequantization
+ * reference: Section 7.3.3 Expansion of Mantissas for Symmetrical Quantization
+ * Tables 7.19 to 7.23
+ */
+static inline int
+symmetric_dequant(int code, int levels)
+{
+ return ((code - (levels >> 1)) << 24) / levels;
+}
+
+/**
+ * Initialize tables at runtime.
+ */
+static av_cold void ac3_tables_init(void)
+{
+ int i;
+
+ /* generate table for ungrouping 3 values in 7 bits
+ reference: Section 7.1.3 Exponent Decoding */
+ for(i=0; i<128; i++) {
+ ungroup_3_in_7_bits_tab[i][0] = i / 25;
+ ungroup_3_in_7_bits_tab[i][1] = (i % 25) / 5;
+ ungroup_3_in_7_bits_tab[i][2] = (i % 25) % 5;
+ }
+
+ /* generate grouped mantissa tables
+ reference: Section 7.3.5 Ungrouping of Mantissas */
+ for(i=0; i<32; i++) {
+ /* bap=1 mantissas */
+ b1_mantissas[i][0] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][0], 3);
+ b1_mantissas[i][1] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][1], 3);
+ b1_mantissas[i][2] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][2], 3);
+ }
+ for(i=0; i<128; i++) {
+ /* bap=2 mantissas */
+ b2_mantissas[i][0] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][0], 5);
+ b2_mantissas[i][1] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][1], 5);
+ b2_mantissas[i][2] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][2], 5);
+
+ /* bap=4 mantissas */
+ b4_mantissas[i][0] = symmetric_dequant(i / 11, 11);
+ b4_mantissas[i][1] = symmetric_dequant(i % 11, 11);
+ }
+ /* generate ungrouped mantissa tables
+ reference: Tables 7.21 and 7.23 */
+ for(i=0; i<7; i++) {
+ /* bap=3 mantissas */
+ b3_mantissas[i] = symmetric_dequant(i, 7);
+ }
+ for(i=0; i<15; i++) {
+ /* bap=5 mantissas */
+ b5_mantissas[i] = symmetric_dequant(i, 15);
+ }
+}
+
+/**
+ * AVCodec initialization
+ */
+av_cold int ac3_fixed_decode_init(AVCodecContext *avctx)
+{
+ AC3FixedDecodeContext *s = avctx->priv_data;
+ s->avctx = avctx;
+
+ ff_ac3_common_init();
+ ac3_tables_init();
+
+#if !CONFIG_HARDCODED_TABLES
+ ff_mdct_fixed_init(&s->imdct_256, 8, 1, 1);
+ ff_mdct_fixed_init(&s->imdct_512, 9, 1, 1);
+#else
+ ff_mdct_fixed_init_hardcoded(&s->imdct_256, 8, 1, 1);
+ ff_mdct_fixed_init_hardcoded_128(&s->imdct_512, 9, 1, 1);
+#endif
+
+ ff_kbd_fixed_window_init(s->window, 5.0, 256);
+ ff_dsputil_init(&s->dsp, avctx);
+
+ ff_fmt_convert_init(&s->fmt_conv, avctx);
+ av_lfg_init(&s->dith_state, 0);
+
+
+ if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
+ //DONE s->mul_bias = 1.0f;
+ s->mul_bias = 65536;
+ avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+ } else {
+ //DONE s->mul_bias = 32767.0f;
+ s->mul_bias = 2147418112;
+ avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+ }
+
+ /* allow downmixing to stereo or mono */
+ if (avctx->channels > 0 && avctx->request_channels > 0 &&
+ avctx->request_channels < avctx->channels &&
+ avctx->request_channels <= 2) {
+ avctx->channels = avctx->request_channels;
+ }
+ s->downmixed = 1;
+
+ avcodec_get_frame_defaults(&s->frame);
+ avctx->coded_frame = &s->frame;
+
+ return 0;
+}
+
+/**
+ * Parse the 'sync info' and 'bit stream info' from the AC-3 bitstream.
+ * GetBitContext within AC3DecodeContext must point to
+ * the start of the synchronized AC-3 bitstream.
+ */
+static int ac3_parse_header_fixed(AC3FixedDecodeContext *s)
+{
+ GetBitContext *gbc = &s->gbc;
+ int i;
+
+ /* read the rest of the bsi. read twice for dual mono mode. */
+ i = !(s->channel_mode);
+ do {
+ skip_bits(gbc, 5); /* skip dialog normalization */
+ if (get_bits1(gbc))
+ skip_bits(gbc, 8); /* skip compression */
+ if (get_bits1(gbc))
+ skip_bits(gbc, 8); /* skip language code */
+ if (get_bits1(gbc))
+ skip_bits(gbc, 7); /* skip audio production information */
+ } while (i--);
+
+ skip_bits(gbc, 2); /* skip copyright bit and original bitstream bit */
+
+ /* skip the timecodes (or extra bitstream information for Alternate Syntax)
+ TODO: read & use the xbsi1 downmix levels */
+ if (get_bits1(gbc))
+ skip_bits(gbc, 14); /* kip timecode1 / xbsi1 */
+ if (get_bits1(gbc))
+ skip_bits(gbc, 14); /* skip timecode2 / xbsi2 */
+
+ /* skip additional bitstream info */
+ if (get_bits1(gbc)) {
+ i = get_bits(gbc, 6);
+ do {
+ skip_bits(gbc, 8);
+ } while(i--);
+ }
+ return 0;
+}
+
+/**
+ * Common function to parse AC-3 or E-AC-3 frame header
+ */
+static int parse_frame_header_fixed(AC3FixedDecodeContext *s)
+{
+ AC3HeaderInfo hdr;
+ int err;
+
+ err = avpriv_ac3_parse_header(&s->gbc, &hdr);
+ if(err)
+ return err;
+
+ /* get decoding parameters from header info */
+ s->bit_alloc_params.sr_code = hdr.sr_code;
+ s->bitstream_mode = hdr.bitstream_mode;
+ s->channel_mode = hdr.channel_mode;
+ s->channel_layout = hdr.channel_layout;
+ s->lfe_on = hdr.lfe_on;
+ s->bit_alloc_params.sr_shift = hdr.sr_shift;
+ s->sample_rate = hdr.sample_rate;
+ s->bit_rate = hdr.bit_rate;
+ s->channels = hdr.channels;
+ s->fbw_channels = s->channels - s->lfe_on;
+ s->lfe_ch = s->fbw_channels + 1;
+ s->frame_size = hdr.frame_size;
+ s->center_mix_level = hdr.center_mix_level;
+ s->surround_mix_level = hdr.surround_mix_level;
+ s->num_blocks = hdr.num_blocks;
+ s->frame_type = hdr.frame_type;
+ s->substreamid = hdr.substreamid;
+
+ if(s->lfe_on) {
+ s->start_freq[s->lfe_ch] = 0;
+ s->end_freq[s->lfe_ch] = 7;
+ s->num_exp_groups[s->lfe_ch] = 2;
+ s->channel_in_cpl[s->lfe_ch] = 0;
+ }
+
+ if (hdr.bitstream_id <= 10) {
+ s->eac3 = 0;
+ s->snr_offset_strategy = 2;
+ s->block_switch_syntax = 1;
+ s->dither_flag_syntax = 1;
+ s->bit_allocation_syntax = 1;
+ s->fast_gain_syntax = 0;
+ s->first_cpl_leak = 0;
+ s->dba_syntax = 1;
+ s->skip_syntax = 1;
+ memset(s->channel_uses_aht, 0, sizeof(s->channel_uses_aht));
+ return ac3_parse_header_fixed(s);
+ }
+ else {
+ av_log(s->avctx, AV_LOG_ERROR, "E-AC-3 support not compiled in\n");
+ return -1;
+ }
+}
+
+/**
+ * Set stereo downmixing coefficients based on frame header info.
+ * reference: Section 7.8.2 Downmixing Into Two Channels
+ */
+static void set_downmix_coeffs_fixed(AC3FixedDecodeContext *s)
+{
+ int i;
+
+ int cmix = gain_levels_fixed[center_levels[s->center_mix_level]];
+ int smix = gain_levels_fixed[surround_levels[s->surround_mix_level]];
+ int norm0, norm1;
+
+ for(i=0; i<s->fbw_channels; i++) {
+ s->downmix_coeffs[i][0] = gain_levels_fixed[ac3_default_coeffs[s->channel_mode][i][0]];
+ s->downmix_coeffs[i][1] = gain_levels_fixed[ac3_default_coeffs[s->channel_mode][i][1]];
+ }
+ if(s->channel_mode > 1 && s->channel_mode & 1) {
+ s->downmix_coeffs[1][0] = s->downmix_coeffs[1][1] = cmix;
+ }
+ if(s->channel_mode == AC3_CHMODE_2F1R || s->channel_mode == AC3_CHMODE_3F1R) {
+ int nf = s->channel_mode - 2;
+ s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf][1] = (smix * 23170 + 0x4000) >> 15;
+ }
+ if(s->channel_mode == AC3_CHMODE_2F2R || s->channel_mode == AC3_CHMODE_3F2R) {
+ int nf = s->channel_mode - 4;
+ s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf+1][1] = smix;
+ }
+
+ /* renormalize FLOAT2FIXED(0.0f) */
+ norm0 = norm1 = 0;
+ for(i=0; i<s->fbw_channels; i++) {
+ norm0 += s->downmix_coeffs[i][0];
+ norm1 += s->downmix_coeffs[i][1];
+ }
+ for(i=0; i<s->fbw_channels; i++) {
+ s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] << 12) / norm0;
+ s->downmix_coeffs[i][1] = (s->downmix_coeffs[i][1] << 12) / norm1;
+ }
+
+ if(s->output_mode == AC3_CHMODE_MONO) {
+ for(i=0; i<s->fbw_channels; i++)
+ //s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] + s->downmix_coeffs[i][1]) * LEVEL_FIXED_MINUS_3DB;
+ s->downmix_coeffs[i][0] = ((s->downmix_coeffs[i][0] + s->downmix_coeffs[i][1]) * 23170 + 0x4000) >> 15;
+ }
+}
+
+/**
+ * Decode the grouped exponents according to exponent strategy.
+ * reference: Section 7.1.3 Exponent Decoding
+ */
+static int decode_exponents(GetBitContext *gbc, int exp_strategy, int ngrps,
+ uint8_t absexp, int8_t *dexps)
+{
+ int i, j, grp, group_size;
+ int dexp[256];
+ int expacc, prevexp;
+
+ /* unpack groups */
+ group_size = exp_strategy + (exp_strategy == EXP_D45);
+ for(grp=0,i=0; grp<ngrps; grp++) {
+ expacc = get_bits(gbc, 7);
+ dexp[i++] = ungroup_3_in_7_bits_tab[expacc][0];
+ dexp[i++] = ungroup_3_in_7_bits_tab[expacc][1];
+ dexp[i++] = ungroup_3_in_7_bits_tab[expacc][2];
+ }
+
+ /* convert to absolute exps and expand groups */
+ prevexp = absexp;
+ for(i=0,j=0; i<ngrps*3; i++) {
+ prevexp += dexp[i] - 2;
+ if (prevexp > 24U)
+ return -1;
+ switch (group_size) {
+ case 4: dexps[j++] = prevexp;
+ dexps[j++] = prevexp;
+ case 2: dexps[j++] = prevexp;
+ case 1: dexps[j++] = prevexp;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Generate transform coefficients for each coupled channel in the coupling
+ * range using the coupling coefficients and coupling coordinates.
+ * reference: Section 7.4.3 Coupling Coordinate Format
+ */
+static void calc_transform_coeffs_cpl_fixed(AC3FixedDecodeContext *s)
+{
+ int bin, band, ch;
+
+ bin = s->start_freq[CPL_CH];
+ for (band = 0; band < s->num_cpl_bands; band++) {
+ int band_start = bin;
+ int band_end = bin + s->cpl_band_sizes[band];
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
+ if (s->channel_in_cpl[ch]) {
+ int cpl_coord = s->cpl_coords[ch][band] << 5;
+ for (bin = band_start; bin < band_end; bin++) {
+ s->fixed_coeffs[ch][bin] = MULH(s->fixed_coeffs[CPL_CH][bin] << 4, cpl_coord);
+ }
+ if (ch == 2 && s->phase_flags[band]) {
+ for (bin = band_start; bin < band_end; bin++)
+ s->fixed_coeffs[2][bin] = -s->fixed_coeffs[2][bin];
+ }
+ }
+ }
+ bin = band_end;
+ }
+}
+
+/**
+ * Grouped mantissas for 3-level 5-level and 11-level quantization
+ */
+typedef struct {
+ int b1_mant[2];
+ int b2_mant[2];
+ int b4_mant;
+ int b1;
+ int b2;
+ int b4;
+} mant_groups;
+
+static void ac3_decode_fixed_transform_coeffs_ch(
+ AC3FixedDecodeContext *s,
+ int ch_index,
+ mant_groups *m
+)
+{
+ int start_freq = s->start_freq[ch_index];
+ int end_freq = s->end_freq[ch_index];
+ uint8_t *baps = s->bap[ch_index];
+ int8_t *exps = s->dexps[ch_index];
+ int *coeffs = s->fixed_coeffs[ch_index];
+ int dither = (ch_index == CPL_CH) || s->dither_flag[ch_index];
+ GetBitContext *gbc = &s->gbc;
+ int freq;
+
+ for(freq = start_freq; freq < end_freq; freq++) {
+
+ int bap = baps[freq];
+ int mantissa;
+
+ if(bap == 1) {
+ if(m->b1) {
+ m->b1--;
+ mantissa = m->b1_mant[m->b1];
+ coeffs[freq] = mantissa >> exps[freq];
+ continue;
+ }
+ {
+ int bits = get_bits(gbc, 5);
+ mantissa = b1_mantissas[bits][0];
+ m->b1_mant[1] = b1_mantissas[bits][1];
+ m->b1_mant[0] = b1_mantissas[bits][2];
+ m->b1 = 2;
+ coeffs[freq] = mantissa >> exps[freq];
+ continue;
+ }
+ }
+
+ if(bap == 3) {
+ mantissa = b3_mantissas[get_bits(gbc, 3)];
+ coeffs[freq] = mantissa >> exps[freq];
+ continue;
+ }
+
+ if(bap == 0) {
+ if (dither) {
+ mantissa = (av_lfg_get(&s->dith_state) & 0x7FFFFF) - 0x400000;
+ coeffs[freq] = mantissa >> exps[freq];
+ continue;
+ }
+ coeffs[freq] = 0;
+ continue;
+ }
+
+ if(bap == 2) {
+ if(m->b2) {
+ m->b2--;
+ mantissa = m->b2_mant[m->b2];
+ coeffs[freq] = mantissa >> exps[freq];
+ continue;
+ }
+ {
+ int bits = get_bits(gbc, 7);
+ mantissa = b2_mantissas[bits][0];
+ m->b2_mant[1] = b2_mantissas[bits][1];
+ m->b2_mant[0] = b2_mantissas[bits][2];
+ m->b2 = 2;
+ coeffs[freq] = mantissa >> exps[freq];
+ continue;
+ }
+ }
+
+ if(bap == 4) {
+ if(m->b4) {
+ m->b4 = 0;
+ mantissa = m->b4_mant;
+ coeffs[freq] = mantissa >> exps[freq];
+ continue;
+ }
+ {
+ int bits = get_bits(gbc, 7);
+ mantissa = b4_mantissas[bits][0];
+ m->b4_mant = b4_mantissas[bits][1];
+ m->b4 = 1;
+ coeffs[freq] = mantissa >> exps[freq];
+ continue;
+ }
+ }
+
+ if(bap == 5) {
+ mantissa = b5_mantissas[get_bits(gbc, 4)];
+ coeffs[freq] = mantissa >> exps[freq];
+ continue;
+ }
+
+ {
+ /* 6 to 15 */
+ mantissa = get_bits(gbc, quantization_tab[bap]);
+ /* Shift mantissa and sign-extend it. */
+ mantissa = (mantissa << (32-quantization_tab[bap]))>>8;
+ coeffs[freq] = mantissa >> exps[freq];
+ }
+ }
+}
+
+static void decode_fixed_transform_coeffs_ch(AC3FixedDecodeContext *s, int blk, int ch, \
+ mant_groups *m)
+{
+ if (!s->channel_uses_aht[ch]) {
+ ac3_decode_fixed_transform_coeffs_ch(s, ch, m);
+ } else {
+ /* if AHT is used, mantissas for all blocks are encoded in the first
+ block of the frame. */
+ int bin;
+ for (bin = s->start_freq[ch]; bin < s->end_freq[ch]; bin++) {
+ s->fixed_coeffs[ch][bin] = s->pre_mantissa[ch][bin][blk] >> s->dexps[ch][bin];
+ }
+ }
+}
+
+/**
+ * Decode the transform coefficients for a particular channel
+ * reference: Section 7.3 Quantization and Decoding of Mantissas
+ */
+/**
+ * Remove random dithering from coupling range coefficients with zero-bit
+ * mantissas for coupled channels which do not use dithering.
+ * reference: Section 7.3.4 Dither for Zero Bit Mantissas (bap=0)
+ */
+static void remove_dithering_fixed(AC3FixedDecodeContext *s)
+{
+ int ch, i;
+ for(ch=1; ch<=s->fbw_channels; ch++) {
+ if(!s->dither_flag[ch] && s->channel_in_cpl[ch]) {
+ for(i = s->start_freq[CPL_CH]; i<s->end_freq[CPL_CH]; i++) {
+ if(!s->bap[CPL_CH][i])
+ s->fixed_coeffs[ch][i] = 0;
+ }
+ }
+ }
+}
+
+static void scale_coefs (
+ int16_t *dst,
+ const int *src,
+ int dynrng,
+ int len)
+{
+ int i, shift, round;
+ int16_t mul;
+ int temp, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+ mul = (dynrng & 0x1f) + 0x20;
+ shift = 12 - ((dynrng << 24) >> 29);
+ round = 1 << (shift-1);
+ for (i=0; i<len; i+=8) {
+
+#if !(HAVE_INLINE_ASM && HAVE_MIPS32R2)
+
+ temp = src[i] * mul;
+ temp1 = src[i+1] * mul;
+ temp = temp + round;
+ temp2 = src[i+2] * mul;
+
+ temp1 = temp1 + round;
+ dst[i] = temp >> shift;
+ temp3 = src[i+3] * mul;
+ temp2 = temp2 + round;
+
+ dst[i+1] = temp1 >> shift;
+ temp4 = src[i + 4] * mul;
+ temp3 = temp3 + round;
+ dst[i+2] = temp2 >> shift;
+
+ temp5 = src[i+5] * mul;
+ temp4 = temp4 + round;
+ dst[i+3] = temp3 >> shift;
+ temp6 = src[i+6] * mul;
+
+ dst[i+4] = temp4 >> shift;
+ temp5 = temp5 + round;
+ temp7 = src[i+7] * mul;
+ temp6 = temp6 + round;
+
+ dst[i+5] = temp5 >> shift;
+ temp7 = temp7 + round;
+ dst[i+6] = temp6 >> shift;
+ dst[i+7] = temp7 >> shift;
+#else
+
+ __asm__ volatile (
+ "lw %[temp], 0(%[src_i]) \n\t"
+ "lw %[temp1], 4(%[src_i]) \n\t"
+ "lw %[temp2], 8(%[src_i]) \n\t"
+ "mul %[temp], %[temp], %[mul] \n\t"
+ "lw %[temp3], 12(%[src_i]) \n\t"
+ "mul %[temp1], %[temp1], %[mul] \n\t"
+ "lw %[temp4], 16(%[src_i]) \n\t"
+ "addu %[temp], %[temp], %[round] \n\t"
+ "mul %[temp3], %[temp3], %[mul] \n\t"
+ "addu %[temp1], %[temp1], %[round] \n\t"
+ "srav %[temp], %[temp], %[shift] \n\t"
+ "mul %[temp2], %[temp2], %[mul] \n\t"
+ "srav %[temp1], %[temp1], %[shift] \n\t"
+ "lw %[temp5], 20(%[src_i]) \n\t"
+ "addu %[temp2], %[temp2], %[round] \n\t"
+ "addu %[temp3], %[temp3], %[round] \n\t"
+ "mul %[temp4], %[temp4], %[mul] \n\t"
+ "srav %[temp2], %[temp2], %[shift] \n\t"
+ "srav %[temp3], %[temp3], %[shift] \n\t"
+ "mul %[temp5], %[temp5], %[mul] \n\t"
+ "lw %[temp6], 24(%[src_i]) \n\t"
+ "lw %[temp7], 28(%[src_i]) \n\t"
+ "addu %[temp4], %[temp4], %[round] \n\t"
+ "mul %[temp6], %[temp6], %[mul] \n\t"
+ "mul %[temp7], %[temp7], %[mul] \n\t"
+ "addu %[temp5], %[temp5], %[round] \n\t"
+ "srav %[temp4], %[temp4], %[shift] \n\t"
+ "srav %[temp5], %[temp5], %[shift] \n\t"
+ "addu %[temp6], %[temp6], %[round] \n\t"
+ "addu %[temp7], %[temp7], %[round] \n\t"
+ "srav %[temp6], %[temp6], %[shift] \n\t"
+ "srav %[temp7], %[temp7], %[shift] \n\t"
+
+ : [temp] "=&r" (temp), [temp1] "=&r" (temp1),
+ [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+ [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
+ [temp6] "=&r" (temp6), [temp7] "=&r" (temp7)
+ : [src_i] "r" (src + i), [mul] "r" (mul),
+ [round] "r" (round), [shift] "r" (shift)
+ );
+
+ dst[i ]=temp;
+ dst[i+1]=temp1;
+ dst[i+2]=temp2;
+ dst[i+3]=temp3;
+
+ dst[i+4]=temp4;
+ dst[i+5]=temp5;
+ dst[i+6]=temp6;
+ dst[i+7]=temp7;
+#endif
+ }
+}
+
+/**
+ * Decode the transform coefficients.
+ */
+static void decode_transform_coeffs_fixed(AC3FixedDecodeContext *s, int blk)
+{
+ int ch, end;
+ int got_cplchan = 0;
+ mant_groups m;
+
+ m.b1 = m.b2 = m.b4 = 0;
+
+ for (ch = 1; ch <= s->channels; ch++) {
+ /* transform coefficients for full-bandwidth channel */
+ decode_fixed_transform_coeffs_ch(s, blk, ch, &m);
+ /* tranform coefficients for coupling channel come right after the
+ coefficients for the first coupled channel*/
+ if (s->channel_in_cpl[ch]) {
+ if (!got_cplchan) {
+ decode_fixed_transform_coeffs_ch(s, blk, CPL_CH, &m);
+ calc_transform_coeffs_cpl_fixed(s);
+ got_cplchan = 1;
+ }
+ end = s->end_freq[CPL_CH];
+ } else {
+ end = s->end_freq[ch];
+ }
+ do
+ s->fixed_coeffs[ch][end] = 0;
+ while(++end < 256);
+ }
+
+ /* zero the dithered coefficients for appropriate channels */
+ remove_dithering_fixed(s);
+}
+
+/**
+ * Stereo rematrixing.
+ * reference: Section 7.5.4 Rematrixing : Decoding Technique
+ */
+static void do_rematrixing_fixed(AC3FixedDecodeContext *s)
+{
+ int bnd, i;
+ int end, bndend;
+
+ end = FFMIN(s->end_freq[1], s->end_freq[2]);
+
+ for(bnd=0; bnd<s->num_rematrixing_bands; bnd++) {
+ if(s->rematrixing_flags[bnd]) {
+ bndend = FFMIN(end, ff_ac3_rematrix_band_tab[bnd+1]);
+ for(i=ff_ac3_rematrix_band_tab[bnd]; i<bndend; i++) {
+ int tmp0 = s->fixed_coeffs[1][i];
+ s->fixed_coeffs[1][i] += s->fixed_coeffs[2][i];
+ s->fixed_coeffs[2][i] = tmp0 - s->fixed_coeffs[2][i];
+ }
+ }
+ }
+}
+
+/**
+ * Inverse MDCT Transform.
+ * Convert frequency domain coefficients to time-domain audio samples.
+ * reference: Section 7.9.4 Transformation Equations
+ */
+static inline void do_imdct_fixed(AC3FixedDecodeContext *s, int channels)
+{
+ int ch;
+ for (ch=1; ch<=channels; ch++) {
+ if (s->block_switch[ch]) {
+ int i;
+ FFTSample *x = s->tmp_output+128;
+
+ for(i=0; i<128; i++)
+ x[i] = s->transform_coeffs[ch][2*i];
+ s->imdct_256.imdct_fixed_half(&s->imdct_256, s->tmp_output, x);
+ s->dsp.vector_fmul_window_fixed(s->output[ch-1], s->delay[ch-1],
+ s->tmp_output, s->window, 128);
+
+ for(i=0; i<128; i++)
+ x[i] = s->transform_coeffs[ch][2*i+1];
+ s->imdct_256.imdct_fixed_half(&s->imdct_256, s->delay[ch-1], x);
+
+ } else {
+ s->imdct_512.imdct_fixed_half(&s->imdct_512, s->tmp_output,
+ s->transform_coeffs[ch]);
+ s->dsp.vector_fmul_window_fixed(s->output[ch-1], s->delay[ch-1],
+ s->tmp_output, s->window, 128);
+ memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(int16_t));
+ }
+ }
+}
+
+/**
+ * Downmix the output to mono or stereo.
+ */
+void ff_ac3_downmix_c_fixed(int (*samples)[256], int (*matrix)[2], int out_ch, int in_ch, int len)
+{
+ int i, j;
+ int v0, v1;
+
+ if (out_ch == 2)
+ {
+ for(i=0; i<len; i++)
+ {
+ v0 = v1 = 0;
+ for(j=0; j<in_ch; j++)
+ {
+ v0 += samples[j][i] * matrix[j][0];
+ v1 += samples[j][i] * matrix[j][1];
+ }
+ samples[0][i] = (v0 + 2048) >> 12;
+ samples[1][i] = (v1 + 2048) >> 12;
+ }
+ }
+ else if (out_ch == 1)
+ {
+ for(i=0; i<len; i++)
+ {
+ v0 = 0;
+ for(j=0; j<in_ch; j++)
+ v0 += samples[j][i] * matrix[j][0];
+ samples[0][i] = (v0 + 2048) >> 12;
+ }
+ }
+}
+
+static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd,
+ int start, int end,
+ int snr_offset, int floor,
+ const uint8_t *bap_tab, uint8_t *bap)
+{
+ int bin, band;
+
+ /* special case, if snr offset is -960, set all bap's to zero */
+ if (snr_offset == -960) {
+ memset(bap, 0, AC3_MAX_COEFS);
+ return;
+ }
+
+ bin = start;
+ band = ff_ac3_bin_to_band_tab[start];
+ do {
+ int m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor;
+ int band_end = FFMIN(ff_ac3_band_start_tab[band+1], end);
+ for (; bin < band_end; bin++) {
+ int address = av_clip((psd[bin] - m) >> 5, 0, 63);
+ bap[bin] = bap_tab[address];
+ }
+ } while (end > ff_ac3_band_start_tab[band++]);
+}
+
+/**
+ * Decode band structure for coupling, spectral extension, or enhanced coupling.
+ * The band structure defines how many subbands are in each band. For each
+ * subband in the range, 1 means it is combined with the previous band, and 0
+ * means that it starts a new band.
+ *
+ * @param[in] gbc bit reader context
+ * @param[in] blk block number
+ * @param[in] eac3 flag to indicate E-AC-3
+ * @param[in] ecpl flag to indicate enhanced coupling
+ * @param[in] start_subband subband number for start of range
+ * @param[in] end_subband subband number for end of range
+ * @param[in] default_band_struct default band structure table
+ * @param[out] num_bands number of bands (optionally NULL)
+ * @param[out] band_sizes array containing the number of bins in each band (optionally NULL)
+ */
+static void decode_band_structure(GetBitContext *gbc, int blk, int eac3,
+ int ecpl, int start_subband, int end_subband,
+ const uint8_t *default_band_struct,
+ int *num_bands, uint8_t *band_sizes)
+{
+ int subbnd, bnd, n_subbands, n_bands=0;
+ uint8_t bnd_sz[22];
+ uint8_t coded_band_struct[22];
+ const uint8_t *band_struct;
+
+ n_subbands = end_subband - start_subband;
+
+ /* decode band structure from bitstream or use default */
+ if (!eac3 || get_bits1(gbc)) {
+ for (subbnd = 0; subbnd < n_subbands - 1; subbnd++) {
+ coded_band_struct[subbnd] = get_bits1(gbc);
+ }
+ band_struct = coded_band_struct;
+ } else if (!blk) {
+ band_struct = &default_band_struct[start_subband+1];
+ } else {
+ /* no change in band structure */
+ return;
+ }
+
+ /* calculate number of bands and band sizes based on band structure.
+ note that the first 4 subbands in enhanced coupling span only 6 bins
+ instead of 12. */
+ if (num_bands || band_sizes ) {
+ n_bands = n_subbands;
+ bnd_sz[0] = ecpl ? 6 : 12;
+ for (bnd = 0, subbnd = 1; subbnd < n_subbands; subbnd++) {
+ int subbnd_size = (ecpl && subbnd < 4) ? 6 : 12;
+ if (band_struct[subbnd-1]) {
+ n_bands--;
+ bnd_sz[bnd] += subbnd_size;
+ } else {
+ bnd_sz[++bnd] = subbnd_size;
+ }
+ }
+ }
+
+ /* set optional output params */
+ if (num_bands)
+ *num_bands = n_bands;
+ if (band_sizes)
+ memcpy(band_sizes, bnd_sz, n_bands);
+}
+
+int end_freq_inv_tab[8] =
+{
+ 50529027, 44278013, 39403370, 32292987, 27356480, 23729101, 20951060, 18755316
+};
+
+static int ac3_fixed_sqrt(int x)
+{
+ int retval;
+ int bit_mask;
+ int guess;
+ int square;
+ int i;
+ long long accu;
+
+ retval = 0;
+ bit_mask = 0x400000;
+
+ for (i=0; i<23; i++)
+ {
+ guess = retval + bit_mask;
+ accu = (long long)guess * guess;
+ square = (int)(accu >> 23);
+ if (x >= square)
+ retval += bit_mask;
+ bit_mask >>= 1;
+ }
+ return retval;
+}
+
+/**
+ * Decode a single audio block from the AC-3 bitstream.
+ */
+static int decode_audio_block_fixed(AC3FixedDecodeContext *s, int blk)
+{
+ int fbw_channels = s->fbw_channels;
+ int channel_mode = s->channel_mode;
+ int i, bnd, seg, ch;
+ int different_transforms;
+ int cpl_in_use;
+ GetBitContext *gbc = &s->gbc;
+ uint8_t bit_alloc_stages[AC3_MAX_CHANNELS];
+
+ memset(bit_alloc_stages, 0, AC3_MAX_CHANNELS);
+
+ /* block switch flags */
+ different_transforms = 0;
+ if (s->block_switch_syntax) {
+ for (ch = 1; ch <= fbw_channels; ch++) {
+ s->block_switch[ch] = get_bits1(gbc);
+ if(ch > 1 && s->block_switch[ch] != s->block_switch[1])
+ different_transforms = 1;
+ }
+ }
+
+ /* dithering flags */
+ if (s->dither_flag_syntax) {
+ for (ch = 1; ch <= fbw_channels; ch++) {
+ s->dither_flag[ch] = get_bits1(gbc);
+ }
+ }
+
+ /* dynamic range */
+ i = !(s->channel_mode);
+ do {
+ if(get_bits1(gbc)) {
+ s->dynamic_range[i] = get_bits(gbc, 8);
+ } else if(blk == 0) {
+ s->dynamic_range[i] = 0;
+ }
+ } while(i--);
+
+ /* spectral extension strategy */
+ if (s->eac3 && (!blk || get_bits1(gbc))) {
+ s->spx_in_use = get_bits1(gbc);
+ if (s->spx_in_use) {
+ int dst_start_freq, dst_end_freq, src_start_freq,
+ start_subband, end_subband;
+
+ /* determine which channels use spx */
+ if (s->channel_mode == AC3_CHMODE_MONO) {
+ s->channel_uses_spx[1] = 1;
+ } else {
+ for (ch = 1; ch <= fbw_channels; ch++)
+ s->channel_uses_spx[ch] = get_bits1(gbc);
+ }
+
+ /* get the frequency bins of the spx copy region and the spx start
+ and end subbands */
+ dst_start_freq = get_bits(gbc, 2);
+ start_subband = get_bits(gbc, 3) + 2;
+ if (start_subband > 7)
+ start_subband += start_subband - 7;
+ end_subband = get_bits(gbc, 3) + 5;
+ s->spx_dst_end_freq = end_freq_inv_tab[end_subband];
+ end_subband += 5;
+ if (end_subband > 7)
+ end_subband += end_subband - 7;
+ dst_start_freq = dst_start_freq * 12 + 25;
+ src_start_freq = start_subband * 12 + 25;
+ dst_end_freq = end_subband * 12 + 25;
+
+ /* check validity of spx ranges */
+ if (start_subband >= end_subband) {
+ av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension "
+ "range (%d >= %d)\n", start_subband, end_subband);
+ return -1;
+ }
+ if (dst_start_freq >= src_start_freq) {
+ av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension "
+ "copy start bin (%d >= %d)\n", dst_start_freq, src_start_freq);
+ return -1;
+ }
+
+ s->spx_dst_start_freq = dst_start_freq;
+ s->spx_src_start_freq = src_start_freq;
+
+ decode_band_structure(gbc, blk, s->eac3, 0,
+ start_subband, end_subband,
+ ff_eac3_default_spx_band_struct,
+ &s->num_spx_bands,
+ s->spx_band_sizes);
+ } else {
+ for (ch = 1; ch <= fbw_channels; ch++) {
+ s->channel_uses_spx[ch] = 0;
+ s->first_spx_coords[ch] = 1;
+ }
+ }
+ }
+
+ /* spectral extension coordinates */
+ if (s->spx_in_use) {
+ for (ch = 1; ch <= fbw_channels; ch++) {
+ if (s->channel_uses_spx[ch]) {
+ if (s->first_spx_coords[ch] || get_bits1(gbc)) {
+
+ int spx_blend;
+ int bin, master_spx_coord;
+
+ s->first_spx_coords[ch] = 0;
+
+ spx_blend = MULT_INT_WITH_FINT_AND_CONVERT_TO_FINT(get_bits(gbc, 5) , 2048);
+ master_spx_coord = get_bits(gbc, 2) * 3;
+
+ bin = s->spx_src_start_freq;
+ for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
+ long long accu;
+ int bandsize;
+ int spx_coord_exp, spx_coord_mant;
+ int nratio, sblend, nblend;
+
+ /* calculate blending factors */
+ bandsize = s->spx_band_sizes[bnd];
+ accu = (long long)((bin << 23) + (bandsize << 22)) * s->spx_dst_end_freq;
+ nratio = (int)(accu >> 32);
+ nratio -= spx_blend << 18;
+
+ if (nratio < 0)
+ {
+ nblend = 0;
+ sblend = 0x800000;
+ }
+ else if (nratio > 0x7fffff)
+ {
+ nblend = 0x800000;
+ sblend = 0;
+ }
+ else
+ {
+ nblend = ac3_fixed_sqrt(nratio);
+ accu = (long long)nblend * 1859775393;
+ nblend = (int)((accu + (1<<29)) >> 30);
+ sblend = ac3_fixed_sqrt(0x800000 - nratio);
+ }
+
+ bin += bandsize;
+
+ /* decode spx coordinates */
+ spx_coord_exp = get_bits(gbc, 4);
+ spx_coord_mant = get_bits(gbc, 2);
+ if (spx_coord_exp == 15) spx_coord_mant <<= 1;
+ else spx_coord_mant += 4;
+ spx_coord_mant <<= (25 - spx_coord_exp - master_spx_coord);
+
+ /* multiply noise and signal blending factors by spx coordinate */
+ accu = (long long)nblend * spx_coord_mant;
+ s->spx_noise_blend[ch][bnd] = (int)((accu + (1<<22)) >> 23);
+ accu = (long long)sblend * spx_coord_mant;
+ s->spx_signal_blend[ch][bnd] = (int)((accu + (1<<22)) >> 23);
+ }
+ }
+ } else {
+ s->first_spx_coords[ch] = 1;
+ }
+ }
+ }
+
+ /* coupling strategy */
+ if (s->eac3 ? s->cpl_strategy_exists[blk] : get_bits1(gbc)) {
+ memset(bit_alloc_stages, 3, AC3_MAX_CHANNELS);
+ if (!s->eac3)
+ s->cpl_in_use[blk] = get_bits1(gbc);
+ if (s->cpl_in_use[blk]) {
+ /* coupling in use */
+ int cpl_start_subband, cpl_end_subband;
+
+ if (channel_mode < AC3_CHMODE_STEREO) {
+ av_log(s->avctx, AV_LOG_ERROR, "coupling not allowed in mono or dual-mono\n");
+ return -1;
+ }
+
+ /* check for enhanced coupling */
+ if (s->eac3 && get_bits1(gbc)) {
+ /* TODO: parse enhanced coupling strategy info */
+ av_log_missing_feature(s->avctx, "Enhanced coupling", 1);
+ return -1;
+ }
+
+ /* determine which channels are coupled */
+ if (s->eac3 && s->channel_mode == AC3_CHMODE_STEREO) {
+ s->channel_in_cpl[1] = 1;
+ s->channel_in_cpl[2] = 1;
+ } else {
+ for (ch = 1; ch <= fbw_channels; ch++)
+ s->channel_in_cpl[ch] = get_bits1(gbc);
+ }
+
+ /* phase flags in use */
+ if (channel_mode == AC3_CHMODE_STEREO)
+ s->phase_flags_in_use = get_bits1(gbc);
+
+ /* coupling frequency range */
+ cpl_start_subband = get_bits(gbc, 4);
+ cpl_end_subband = s->spx_in_use ? (s->spx_src_start_freq - 37) / 12 :
+ get_bits(gbc, 4) + 3;
+
+ if (cpl_start_subband >= cpl_end_subband) {
+ av_log(s->avctx, AV_LOG_ERROR, "invalid coupling range (%d >= %d)\n",
+ cpl_start_subband, cpl_end_subband);
+ return -1;
+ }
+
+ s->start_freq[CPL_CH] = cpl_start_subband * 12 + 37;
+ s->end_freq[CPL_CH] = cpl_end_subband * 12 + 37;
+
+ decode_band_structure(gbc, blk, s->eac3, 0, cpl_start_subband,
+ cpl_end_subband,
+ ff_eac3_default_cpl_band_struct,
+ &s->num_cpl_bands, s->cpl_band_sizes);
+ } else {
+ /* coupling not in use */
+ for (ch = 1; ch <= fbw_channels; ch++) {
+ s->channel_in_cpl[ch] = 0;
+ s->first_cpl_coords[ch] = 1;
+ }
+ s->first_cpl_leak = s->eac3;
+ s->phase_flags_in_use = 0;
+ }
+ } else if (!s->eac3) {
+ if(!blk) {
+ av_log(s->avctx, AV_LOG_ERROR, "new coupling strategy must be present in block 0\n");
+ return -1;
+ } else {
+ s->cpl_in_use[blk] = s->cpl_in_use[blk-1];
+ }
+ }
+ cpl_in_use = s->cpl_in_use[blk];
+
+ /* coupling coordinates */
+ if (cpl_in_use) {
+ int cpl_coords_exist = 0;
+
+ for (ch = 1; ch <= fbw_channels; ch++) {
+ if (s->channel_in_cpl[ch]) {
+ if ((s->eac3 && s->first_cpl_coords[ch]) || get_bits1(gbc)) {
+ int master_cpl_coord, cpl_coord_exp, cpl_coord_mant;
+ s->first_cpl_coords[ch] = 0;
+ cpl_coords_exist = 1;
+ master_cpl_coord = 3 * get_bits(gbc, 2);
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+ cpl_coord_exp = get_bits(gbc, 4);
+ cpl_coord_mant = get_bits(gbc, 4);
+ if (cpl_coord_exp == 15)
+ s->cpl_coords[ch][bnd] = cpl_coord_mant << 22;
+ else
+ s->cpl_coords[ch][bnd] = (cpl_coord_mant + 16) << 21;
+ s->cpl_coords[ch][bnd] >>= (cpl_coord_exp + master_cpl_coord);
+ }
+ } else if (!blk) {
+ av_log(s->avctx, AV_LOG_ERROR, "new coupling coordinates must be present in block 0\n");
+ return -1;
+ }
+ } else {
+ /* channel not in coupling */
+ s->first_cpl_coords[ch] = 1;
+ }
+ }
+ /* phase flags */
+ if (channel_mode == AC3_CHMODE_STEREO && cpl_coords_exist) {
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+ s->phase_flags[bnd] = s->phase_flags_in_use? get_bits1(gbc) : 0;
+ }
+ }
+ }
+
+ /* stereo rematrixing strategy and band structure */
+ if (channel_mode == AC3_CHMODE_STEREO) {
+ if ((s->eac3 && !blk) || get_bits1(gbc)) {
+ s->num_rematrixing_bands = 4;
+ if (cpl_in_use && s->start_freq[CPL_CH] <= 61) {
+ s->num_rematrixing_bands -= 1 + (s->start_freq[CPL_CH] == 37);
+ } else if (s->spx_in_use && s->spx_src_start_freq <= 61) {
+ s->num_rematrixing_bands--;
+ }
+ for(bnd=0; bnd<s->num_rematrixing_bands; bnd++)
+ s->rematrixing_flags[bnd] = get_bits1(gbc);
+ } else if (!blk) {
+ av_log(s->avctx, AV_LOG_WARNING, "Warning: new rematrixing strategy not present in block 0\n");
+ s->num_rematrixing_bands = 0;
+ }
+ }
+
+ /* exponent strategies for each channel */
+ for (ch = !cpl_in_use; ch <= s->channels; ch++) {
+ if (!s->eac3)
+ s->exp_strategy[blk][ch] = get_bits(gbc, 2 - (ch == s->lfe_ch));
+ if(s->exp_strategy[blk][ch] != EXP_REUSE)
+ bit_alloc_stages[ch] = 3;
+ }
+
+ /* channel bandwidth */
+ for (ch = 1; ch <= fbw_channels; ch++) {
+ s->start_freq[ch] = 0;
+ if (s->exp_strategy[blk][ch] != EXP_REUSE) {
+ int group_size;
+ int prev = s->end_freq[ch];
+ if (s->channel_in_cpl[ch])
+ s->end_freq[ch] = s->start_freq[CPL_CH];
+ else if (s->channel_uses_spx[ch])
+ s->end_freq[ch] = s->spx_src_start_freq;
+ else {
+ int bandwidth_code = get_bits(gbc, 6);
+ if (bandwidth_code > 60) {
+ av_log(s->avctx, AV_LOG_ERROR, "bandwidth code = %d > 60\n", bandwidth_code);
+ return -1;
+ }
+ s->end_freq[ch] = bandwidth_code * 3 + 73;
+ }
+ group_size = 3 << (s->exp_strategy[blk][ch] - 1);
+ s->num_exp_groups[ch] = (s->end_freq[ch]+group_size-4) / group_size;
+ if(blk > 0 && s->end_freq[ch] != prev)
+ memset(bit_alloc_stages, 3, AC3_MAX_CHANNELS);
+ }
+ }
+ if (cpl_in_use && s->exp_strategy[blk][CPL_CH] != EXP_REUSE) {
+ s->num_exp_groups[CPL_CH] = (s->end_freq[CPL_CH] - s->start_freq[CPL_CH]) /
+ (3 << (s->exp_strategy[blk][CPL_CH] - 1));
+ }
+
+ /* decode exponents for each channel */
+ for (ch = !cpl_in_use; ch <= s->channels; ch++) {
+ if (s->exp_strategy[blk][ch] != EXP_REUSE) {
+ s->dexps[ch][0] = get_bits(gbc, 4) << !ch;
+ if (decode_exponents(gbc, s->exp_strategy[blk][ch],
+ s->num_exp_groups[ch], s->dexps[ch][0],
+ &s->dexps[ch][s->start_freq[ch]+!!ch])) {
+ av_log(s->avctx, AV_LOG_ERROR, "exponent out-of-range\n");
+ return -1;
+ }
+ if(ch != CPL_CH && ch != s->lfe_ch)
+ skip_bits(gbc, 2); /* skip gainrng */
+ }
+ }
+
+ /* bit allocation information */
+ if (s->bit_allocation_syntax) {
+ if (get_bits1(gbc)) {
+ s->bit_alloc_params.slow_decay = ff_ac3_slow_decay_tab[get_bits(gbc, 2)] >> s->bit_alloc_params.sr_shift;
+ s->bit_alloc_params.fast_decay = ff_ac3_fast_decay_tab[get_bits(gbc, 2)] >> s->bit_alloc_params.sr_shift;
+ s->bit_alloc_params.slow_gain = ff_ac3_slow_gain_tab[get_bits(gbc, 2)];
+ s->bit_alloc_params.db_per_bit = ff_ac3_db_per_bit_tab[get_bits(gbc, 2)];
+ s->bit_alloc_params.floor = ff_ac3_floor_tab[get_bits(gbc, 3)];
+ for(ch=!cpl_in_use; ch<=s->channels; ch++)
+ bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+ } else if (!blk) {
+ av_log(s->avctx, AV_LOG_ERROR, "new bit allocation info must be present in block 0\n");
+ return -1;
+ }
+ }
+
+ /* signal-to-noise ratio offsets and fast gains (signal-to-mask ratios) */
+ if(!s->eac3 || !blk){
+ if(s->snr_offset_strategy && get_bits1(gbc)) {
+ int snr = 0;
+ int csnr;
+ csnr = (get_bits(gbc, 6) - 15) << 4;
+ for (i = ch = !cpl_in_use; ch <= s->channels; ch++) {
+ /* snr offset */
+ if (ch == i || s->snr_offset_strategy == 2)
+ snr = (csnr + get_bits(gbc, 4)) << 2;
+ /* run at least last bit allocation stage if snr offset changes */
+ if(blk && s->snr_offset[ch] != snr) {
+ bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 1);
+ }
+ s->snr_offset[ch] = snr;
+
+ /* fast gain (normal AC-3 only) */
+ if (!s->eac3) {
+ int prev = s->fast_gain[ch];
+ s->fast_gain[ch] = ff_ac3_fast_gain_tab[get_bits(gbc, 3)];
+ /* run last 2 bit allocation stages if fast gain changes */
+ if(blk && prev != s->fast_gain[ch])
+ bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+ }
+ }
+ } else if (!s->eac3 && !blk) {
+ av_log(s->avctx, AV_LOG_ERROR, "new snr offsets must be present in block 0\n");
+ return -1;
+ }
+ }
+
+ /* fast gain (E-AC-3 only) */
+ if (s->fast_gain_syntax && get_bits1(gbc)) {
+ for (ch = !cpl_in_use; ch <= s->channels; ch++) {
+ int prev = s->fast_gain[ch];
+ s->fast_gain[ch] = ff_ac3_fast_gain_tab[get_bits(gbc, 3)];
+ /* run last 2 bit allocation stages if fast gain changes */
+ if(blk && prev != s->fast_gain[ch])
+ bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+ }
+ } else if (s->eac3 && !blk) {
+ for (ch = !cpl_in_use; ch <= s->channels; ch++)
+ s->fast_gain[ch] = ff_ac3_fast_gain_tab[4];
+ }
+
+ /* coupling leak information */
+ if (cpl_in_use) {
+ if (s->first_cpl_leak || get_bits1(gbc)) {
+ int fl = get_bits(gbc, 3);
+ int sl = get_bits(gbc, 3);
+ /* run last 2 bit allocation stages for coupling channel if
+ coupling leak changes */
+ if(blk && (fl != s->bit_alloc_params.cpl_fast_leak ||
+ sl != s->bit_alloc_params.cpl_slow_leak)) {
+ bit_alloc_stages[CPL_CH] = FFMAX(bit_alloc_stages[CPL_CH], 2);
+ }
+ s->bit_alloc_params.cpl_fast_leak = fl;
+ s->bit_alloc_params.cpl_slow_leak = sl;
+ } else if (!s->eac3 && !blk) {
+ av_log(s->avctx, AV_LOG_ERROR, "new coupling leak info must be present in block 0\n");
+ return -1;
+ }
+ s->first_cpl_leak = 0;
+ }
+
+ /* delta bit allocation information */
+ if (s->dba_syntax && get_bits1(gbc)) {
+ /* delta bit allocation exists (strategy) */
+ for (ch = !cpl_in_use; ch <= fbw_channels; ch++) {
+ s->dba_mode[ch] = get_bits(gbc, 2);
+ if (s->dba_mode[ch] == DBA_RESERVED) {
+ av_log(s->avctx, AV_LOG_ERROR, "delta bit allocation strategy reserved\n");
+ return -1;
+ }
+ bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+ }
+ /* channel delta offset, len and bit allocation */
+ for (ch = !cpl_in_use; ch <= fbw_channels; ch++) {
+ if (s->dba_mode[ch] == DBA_NEW) {
+ s->dba_nsegs[ch] = get_bits(gbc, 3) + 1;
+ for (seg = 0; seg < s->dba_nsegs[ch]; seg++) {
+ s->dba_offsets[ch][seg] = get_bits(gbc, 5);
+ s->dba_lengths[ch][seg] = get_bits(gbc, 4);
+ s->dba_values[ch][seg] = get_bits(gbc, 3);
+ }
+ /* run last 2 bit allocation stages if new dba values */
+ bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+ }
+ }
+ } else if(blk == 0) {
+ for(ch=0; ch<=s->channels; ch++) {
+ s->dba_mode[ch] = DBA_NONE;
+ }
+ }
+
+ /* Bit allocation */
+ for(ch=!cpl_in_use; ch<=s->channels; ch++) {
+ if(bit_alloc_stages[ch] > 2) {
+ /* Exponent mapping into PSD and PSD integration */
+ ff_ac3_bit_alloc_calc_psd(s->dexps[ch],
+ s->start_freq[ch], s->end_freq[ch],
+ s->psd[ch], s->band_psd[ch]);
+ }
+ if(bit_alloc_stages[ch] > 1) {
+ /* Compute excitation function, Compute masking curve, and
+ Apply delta bit allocation */
+ if (ff_ac3_bit_alloc_calc_mask(&s->bit_alloc_params, s->band_psd[ch],
+ s->start_freq[ch], s->end_freq[ch],
+ s->fast_gain[ch], (ch == s->lfe_ch),
+ s->dba_mode[ch], s->dba_nsegs[ch],
+ s->dba_offsets[ch], s->dba_lengths[ch],
+ s->dba_values[ch], s->mask[ch])) {
+ av_log(s->avctx, AV_LOG_ERROR, "error in bit allocation\n");
+ return -1;
+ }
+ }
+ if(bit_alloc_stages[ch] > 0) {
+ /* Compute bit allocation */
+ const uint8_t *bap_tab = s->channel_uses_aht[ch] ?
+ ff_eac3_hebap_tab : ff_ac3_bap_tab;
+ ac3_bit_alloc_calc_bap_c(s->mask[ch], s->psd[ch],
+ s->start_freq[ch], s->end_freq[ch],
+ s->snr_offset[ch],
+ s->bit_alloc_params.floor,
+ bap_tab, s->bap[ch]);
+ }
+ }
+
+ /* unused dummy data */
+ if (s->skip_syntax && get_bits1(gbc)) {
+ int skipl = get_bits(gbc, 9);
+ while(skipl--)
+ skip_bits(gbc, 8);
+ }
+
+ /* unpack the transform coefficients
+ this also uncouples channels if coupling is in use. */
+ decode_transform_coeffs_fixed(s, blk);
+
+ /* TODO: generate enhanced coupling coordinates and uncouple */
+
+ /* recover coefficients if rematrixing is in use */
+ if(s->channel_mode == AC3_CHMODE_STEREO)
+ do_rematrixing_fixed(s);
+
+ /* apply scaling to coefficients (headroom, dynrng) */
+ for(ch=1; ch<=s->channels; ch++) {
+ int dynrng;
+ if(s->channel_mode == AC3_CHMODE_DUALMONO) {
+ dynrng = s->dynamic_range[2-ch];
+ } else {
+ dynrng = s->dynamic_range[0];
+ }
+ scale_coefs(s->transform_coeffs[ch], s->fixed_coeffs[ch], dynrng, 256);
+ }
+
+ do_imdct_fixed(s, s->channels);
+
+ if (s->channels != s->out_channels && !((s->output_mode & AC3_OUTPUT_LFEON) &&
+ s->fbw_channels == s->out_channels))
+ ff_ac3_downmix_c_fixed(s->output, s->downmix_coeffs, s->out_channels, s->fbw_channels, 256);
+
+ return 0;
+}
+
+/**
+ * Decode a single AC-3 fixed frame.
+ */
+int ac3_fixed_decode_frame(AVCodecContext * avctx, void *data,
+ int *got_frame_ptr, AVPacket *avpkt)
+{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
+ AC3FixedDecodeContext *s = avctx->priv_data;
+ int *out_samples_flt;
+ int16_t *out_samples_s16;
+ int blk, ch, err, ret;
+ const uint8_t *channel_map;
+ const int *output[AC3_MAX_CHANNELS];
+
+ /* copy input buffer to decoder context to avoid reading past the end
+ of the buffer, which can be caused by a damaged input stream. */
+ if (buf_size >= 2 && AV_RB16(buf) == 0x770B) {
+ /* seems to be byte-swapped AC-3 */
+ int cnt = FFMIN(buf_size, AC3_FRAME_BUFFER_SIZE) >> 1;
+ s->dsp.bswap16_buf((uint16_t *)s->input_buffer, (const uint16_t *)buf, cnt);
+ } else
+ memcpy(s->input_buffer, buf, FFMIN(buf_size, AC3_FRAME_BUFFER_SIZE));
+ buf = s->input_buffer;
+ /* initialize the GetBitContext with the start of valid AC-3 Frame */
+ init_get_bits(&s->gbc, buf, buf_size * 8);
+
+ /* parse the syncinfo */
+ err = parse_frame_header_fixed(s);
+
+ if (err) {
+ switch(err) {
+ case AAC_AC3_PARSE_ERROR_SYNC:
+ av_log(avctx, AV_LOG_ERROR, "frame sync error\n");
+ return -1;
+ case AAC_AC3_PARSE_ERROR_BSID:
+ av_log(avctx, AV_LOG_ERROR, "invalid bitstream id\n");
+ break;
+ case AAC_AC3_PARSE_ERROR_SAMPLE_RATE:
+ av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
+ break;
+ case AAC_AC3_PARSE_ERROR_FRAME_SIZE:
+ av_log(avctx, AV_LOG_ERROR, "invalid frame size\n");
+ break;
+ case AAC_AC3_PARSE_ERROR_FRAME_TYPE:
+ /* skip frame if CRC is ok. otherwise use error concealment. */
+ break;
+ default:
+ av_log(avctx, AV_LOG_ERROR, "invalid header\n");
+ break;
+ }
+ } else {
+ /* check that reported frame size fits in input buffer */
+ if (s->frame_size > buf_size) {
+ av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
+ err = AAC_AC3_PARSE_ERROR_FRAME_SIZE;
+ }
+ }
+
+ /* if frame is ok, set audio parameters */
+ if (!err) {
+ avctx->sample_rate = s->sample_rate;
+ avctx->bit_rate = s->bit_rate;
+
+ /* channel config */
+ s->out_channels = s->channels;
+ s->output_mode = s->channel_mode;
+ if(s->lfe_on)
+ s->output_mode |= AC3_OUTPUT_LFEON;
+ if (avctx->request_channels > 0 && avctx->request_channels <= 2 &&
+ avctx->request_channels < s->channels) {
+ s->out_channels = avctx->request_channels;
+ s->output_mode = avctx->request_channels == 1 ? AC3_CHMODE_MONO : AC3_CHMODE_STEREO;
+ s->channel_layout = avpriv_ac3_channel_layout_tab[s->output_mode];
+ }
+ avctx->channels = s->out_channels;
+ avctx->channel_layout = s->channel_layout;
+
+ s->loro_center_mix_level = gain_levels_fixed[s-> center_mix_level];
+ s->loro_surround_mix_level = gain_levels_fixed[s->surround_mix_level];
+ s->ltrt_center_mix_level = LEVEL_MINUS_3DB;
+ s->ltrt_surround_mix_level = LEVEL_MINUS_3DB;
+ /* set downmixing coefficients if needed */
+ if(s->channels != s->out_channels && !((s->output_mode & AC3_OUTPUT_LFEON) &&
+ s->fbw_channels == s->out_channels)) {
+ set_downmix_coeffs_fixed(s);
+ }
+ } else if (!s->out_channels) {
+ s->out_channels = avctx->channels;
+ if(s->out_channels < s->channels)
+ s->output_mode = s->out_channels == 1 ? AC3_CHMODE_MONO : AC3_CHMODE_STEREO;
+ }
+ /* set audio service type based on bitstream mode for AC-3 */
+ avctx->audio_service_type = s->bitstream_mode;
+ if (s->bitstream_mode == 0x7 && s->channels > 1)
+ avctx->audio_service_type = AV_AUDIO_SERVICE_TYPE_KARAOKE;
+
+ /* get output buffer */
+ s->frame.nb_samples = s->num_blocks * 256;
+ if ((ret = avctx->get_buffer(avctx, &s->frame)) < 0) {
+ av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+ return ret;
+ }
+ out_samples_flt = (int *)s->frame.data[0];
+ out_samples_s16 = (int16_t *)s->frame.data[0];
+
+ /* decode the audio blocks */
+ channel_map = ff_ac3_dec_channel_map[s->output_mode & ~AC3_OUTPUT_LFEON][s->lfe_on];
+ for (ch = 0; ch < s->out_channels; ch++)
+ output[ch] = s->output[channel_map[ch]];
+ for (blk = 0; blk < s->num_blocks; blk++) {
+ if (!err && decode_audio_block_fixed(s, blk)) {
+ av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n");
+ err = 1;
+ }
+
+ if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
+ s->fmt_conv.fixed_interleave(out_samples_flt, output, 256,
+ s->out_channels);
+ out_samples_flt += 256 * s->out_channels;
+ } else {
+ s->fmt_conv.fixed_to_int16_interleave(out_samples_s16, output, 256,
+ s->out_channels);
+ out_samples_s16 += 256 * s->out_channels;
+ }
+ }
+
+ *got_frame_ptr = 1;
+ *(AVFrame *)data = s->frame;
+
+ return FFMIN(buf_size, s->frame_size);
+}
+
+/**
+ * Uninitialize the AC-3 decoder.
+ */
+ int ac3_fixed_decode_end(AVCodecContext *avctx)
+{
+ AC3FixedDecodeContext *s = avctx->priv_data;
+ ff_mdct_end(&s->imdct_512);
+ ff_mdct_end(&s->imdct_256);
+
+ return 0;
+}
+
+#define OFFSET(x) offsetof(AC3FixedDecodeContext, x)
+#define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
+static const AVOption options[] = {
+ { "drc_scale", "percentage of dynamic range compression to apply", OFFSET(drc_scale), AV_OPT_TYPE_FLOAT, {1.0}, 0.0, 1.0, PAR },
+
+{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), AV_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, 0, "dmix_mode"},
+{"ltrt_cmixlev", "Lt/Rt Center Mix Level", OFFSET(ltrt_center_mix_level), AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level), AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"loro_cmixlev", "Lo/Ro Center Mix Level", OFFSET(loro_center_mix_level), AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level), AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+
+ { NULL},
+};
+
+static const AVClass ac3_decoder_class = {
+ .class_name = "AC3 fixed decoder",
+ .item_name = av_default_item_name,
+ .option = options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_ac3_fixed_decoder = {
+ .name = "ac3_fixed",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = CODEC_ID_AC3,
+ .priv_data_size = sizeof (AC3FixedDecodeContext),
+ .init = ac3_fixed_decode_init,
+ .close = ac3_fixed_decode_end,
+ .decode = ac3_fixed_decode_frame,
+ .capabilities = CODEC_CAP_DR1,
+ .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
+ .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT,
+ AV_SAMPLE_FMT_S16,
+ AV_SAMPLE_FMT_NONE },
+ .priv_class = &ac3_decoder_class,
+};
diff --git a/libavcodec/mips/ac3dec_fixed.h b/libavcodec/mips/ac3dec_fixed.h
new file mode 100644
index 0000000..ee05f46
--- /dev/null
+++ b/libavcodec/mips/ac3dec_fixed.h
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Stanislav Ocovaj (socovaj at mips.com)
+ *
+ * AC3 fixed-point decoder for MIPS platforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_AC3DEC_FIXED_H
+#define AVCODEC_MIPS_AC3DEC_FIXED_H
+
+#include "libavutil/lfg.h"
+#include "libavcodec/ac3.h"
+#include "libavcodec/ac3dsp.h"
+#include "libavcodec/get_bits.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/fft.h"
+#include "libavcodec/fmtconvert.h"
+
+#define AC3_OUTPUT_LFEON 8
+
+#define SPX_MAX_BANDS 17
+
+#define LEVEL_FIXED_PLUS_3DB 92681
+#define LEVEL_FIXED_PLUS_1POINT5DB 77935
+#define LEVEL_FIXED_MINUS_1POINT5DB 65536
+#define LEVEL_FIXED_MINUS_3DB 55108
+#define LEVEL_FIXED_MINUS_4POINT5DB 46340
+#define LEVEL_FIXED_MINUS_6DB 38967
+#define LEVEL_FIXED_MINUS_9DB 32768
+#define LEVEL_FIXED_ZERO 0
+#define LEVEL_FIXED_ONE 23170
+
+/** Large enough for maximum possible frame size when the specification limit is ignored */
+#define AC3_FRAME_BUFFER_SIZE 32768
+
+typedef struct {
+ AVClass *class; ///< class for AVOptions
+ AVCodecContext *avctx; ///< parent context
+ AVFrame frame; ///< AVFrame for decoded output
+ GetBitContext gbc; ///< bitstream reader
+
+///@name Bit stream information
+///@{
+ int frame_type; ///< frame type (strmtyp)
+ int substreamid; ///< substream identification
+ int frame_size; ///< current frame size, in bytes
+ int bit_rate; ///< stream bit rate, in bits-per-second
+ int sample_rate; ///< sample frequency, in Hz
+ int num_blocks; ///< number of audio blocks
+ int bitstream_mode; ///< bitstream mode (bsmod)
+ int channel_mode; ///< channel mode (acmod)
+ int channel_layout; ///< channel layout
+ int lfe_on; ///< lfe channel in use
+ int channel_map; ///< custom channel map
+ int center_mix_level; ///< Center mix level index
+ int surround_mix_level; ///< Surround mix level index
+ int eac3; ///< indicates if current frame is E-AC-3
+///@}
+
+ int preferred_stereo_downmix;
+ float ltrt_center_mix_level;
+ float ltrt_surround_mix_level;
+ float loro_center_mix_level;
+ float loro_surround_mix_level;
+
+///@name Frame syntax parameters
+ int snr_offset_strategy; ///< SNR offset strategy (snroffststr)
+ int block_switch_syntax; ///< block switch syntax enabled (blkswe)
+ int dither_flag_syntax; ///< dither flag syntax enabled (dithflage)
+ int bit_allocation_syntax; ///< bit allocation model syntax enabled (bamode)
+ int fast_gain_syntax; ///< fast gain codes enabled (frmfgaincode)
+ int dba_syntax; ///< delta bit allocation syntax enabled (dbaflde)
+ int skip_syntax; ///< skip field syntax enabled (skipflde)
+ ///@}
+
+///@name Standard coupling
+ int cpl_in_use[AC3_MAX_BLOCKS]; ///< coupling in use (cplinu)
+ int cpl_strategy_exists[AC3_MAX_BLOCKS];///< coupling strategy exists (cplstre)
+ int channel_in_cpl[AC3_MAX_CHANNELS]; ///< channel in coupling (chincpl)
+ int phase_flags_in_use; ///< phase flags in use (phsflginu)
+ int phase_flags[AC3_MAX_CPL_BANDS]; ///< phase flags (phsflg)
+ int num_cpl_bands; ///< number of coupling bands (ncplbnd)
+ uint8_t cpl_band_sizes[AC3_MAX_CPL_BANDS]; ///< number of coeffs in each coupling band
+ int firstchincpl; ///< first channel in coupling
+ int first_cpl_coords[AC3_MAX_CHANNELS]; ///< first coupling coordinates states (firstcplcos)
+ int cpl_coords[AC3_MAX_CHANNELS][AC3_MAX_CPL_BANDS]; ///< coupling coordinates (cplco)
+///@}
+
+///@name Spectral extension
+///@{
+ int spx_in_use; ///< spectral extension in use (spxinu)
+ uint8_t channel_uses_spx[AC3_MAX_CHANNELS]; ///< channel uses spectral extension (chinspx)
+ int8_t spx_atten_code[AC3_MAX_CHANNELS]; ///< spx attenuation code (spxattencod)
+ int spx_src_start_freq; ///< spx start frequency bin
+ int spx_dst_end_freq; ///< spx end frequency bin
+ int spx_dst_start_freq; ///< spx starting frequency bin for copying (copystartmant)
+ ///< the copy region ends at the start of the spx region.
+ int num_spx_bands; ///< number of spx bands (nspxbnds)
+ uint8_t spx_band_sizes[SPX_MAX_BANDS]; ///< number of bins in each spx band
+ uint8_t first_spx_coords[AC3_MAX_CHANNELS]; ///< first spx coordinates states (firstspxcos)
+ int spx_noise_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS]; ///< spx noise blending factor (nblendfact)
+ int spx_signal_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS];///< spx signal blending factor (sblendfact)
+///@}
+
+///@name Adaptive hybrid transform
+ int channel_uses_aht[AC3_MAX_CHANNELS]; ///< channel AHT in use (chahtinu)
+ int pre_mantissa[AC3_MAX_CHANNELS][AC3_MAX_COEFS][AC3_MAX_BLOCKS]; ///< pre-IDCT mantissas
+///@}
+
+///@name Channel
+ int fbw_channels; ///< number of full-bandwidth channels
+ int channels; ///< number of total channels
+ int lfe_ch; ///< index of LFE channel
+ int downmix_coeffs[AC3_MAX_CHANNELS][2]; ///< stereo downmix coefficients
+ int downmixed; ///< indicates if coeffs are currently downmixed
+ int output_mode; ///< output channel configuration
+ int out_channels; ///< number of output channels
+///@}
+
+///@name Dynamic range
+ int dynamic_range[2]; ///< dynamic range
+ int drc_scale; ///< percentage of dynamic range compression to be applied
+///@}
+
+///@name Bandwidth
+ int start_freq[AC3_MAX_CHANNELS]; ///< start frequency bin (strtmant)
+ int end_freq[AC3_MAX_CHANNELS]; ///< end frequency bin (endmant)
+///@}
+
+///@name Rematrixing
+ int num_rematrixing_bands; ///< number of rematrixing bands (nrematbnd)
+ int rematrixing_flags[4]; ///< rematrixing flags (rematflg)
+///@}
+
+///@name Exponents
+ int num_exp_groups[AC3_MAX_CHANNELS]; ///< Number of exponent groups (nexpgrp)
+ int8_t dexps[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< decoded exponents
+ int exp_strategy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS]; ///< exponent strategies (expstr)
+///@}
+
+///@name Bit allocation
+ AC3BitAllocParameters bit_alloc_params; ///< bit allocation parameters
+ int first_cpl_leak; ///< first coupling leak state (firstcplleak)
+ int snr_offset[AC3_MAX_CHANNELS]; ///< signal-to-noise ratio offsets (snroffst)
+ int fast_gain[AC3_MAX_CHANNELS]; ///< fast gain values/SMR's (fgain)
+ uint8_t bap[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< bit allocation pointers
+ int16_t psd[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< scaled exponents
+ int16_t band_psd[AC3_MAX_CHANNELS][AC3_CRITICAL_BANDS]; ///< interpolated exponents
+ int16_t mask[AC3_MAX_CHANNELS][AC3_CRITICAL_BANDS]; ///< masking curve values
+ int dba_mode[AC3_MAX_CHANNELS]; ///< delta bit allocation mode
+ int dba_nsegs[AC3_MAX_CHANNELS]; ///< number of delta segments
+ uint8_t dba_offsets[AC3_MAX_CHANNELS][8]; ///< delta segment offsets
+ uint8_t dba_lengths[AC3_MAX_CHANNELS][8]; ///< delta segment lengths
+ uint8_t dba_values[AC3_MAX_CHANNELS][8]; ///< delta values for each segment
+///@}
+
+///@name Zero-mantissa dithering
+ int dither_flag[AC3_MAX_CHANNELS]; ///< dither flags (dithflg)
+ AVLFG dith_state; ///< for dither generation
+///@}
+
+///@name IMDCT
+ int block_switch[AC3_MAX_CHANNELS]; ///< block switch flags (blksw)
+ FFTContext imdct_512; ///< for 512 sample IMDCT
+ FFTContext imdct_256; ///< for 256 sample IMDCT
+///@}
+
+///@name Optimization
+ DSPContext dsp; ///< for optimization
+ //AC3DSPContext ac3dsp;
+ FmtConvertContext fmt_conv; ///< optimized conversion functions
+ int mul_bias; ///< scaling for fixed_to_int16 conversion
+///@}
+
+///@name Aligned arrays
+ DECLARE_ALIGNED(16, int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< fixed-point transform coefficients
+ DECLARE_ALIGNED(32, FFTSample, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< transform coefficients
+ DECLARE_ALIGNED(32, FFTSample, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< delay - added to the next block
+ DECLARE_ALIGNED(32, int16_t, window)[AC3_BLOCK_SIZE]; ///< window coefficients
+ DECLARE_ALIGNED(32, FFTSample, tmp_output)[AC3_BLOCK_SIZE]; ///< temporary storage for output before windowing
+ DECLARE_ALIGNED(32, int, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< output after imdct transform and windowing
+ DECLARE_ALIGNED(32, uint8_t, input_buffer)[AC3_FRAME_BUFFER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< temp buffer to prevent overread
+///@}
+} AC3FixedDecodeContext;
+
+int ac3_fixed_decode_init(AVCodecContext *avctx);
+int ac3_fixed_decode_end(AVCodecContext *avctx);
+int ac3_fixed_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
+ AVPacket *avpkt);
+void ff_ac3_downmix_c_fixed(int (*samples)[256], int (*matrix)[2], int out_ch, int in_ch, int len);
+
+#endif /* AVCODEC_MIPS_AC3DEC_FIXED_H */
diff --git a/libavcodec/mips/dsputil_mips_fixed.c b/libavcodec/mips/dsputil_mips_fixed.c
new file mode 100644
index 0000000..e1b8037
--- /dev/null
+++ b/libavcodec/mips/dsputil_mips_fixed.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Dragan Mrdjan (dmrdjan at mips.com)
+ *
+ * DSP utils optimized for MIPS fixed-point platforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/dsputil.c
+ */
+
+#include "config.h"
+#include "libavcodec/dsputil.h"
+
+static void vector_fmul_window_mips_fixed(int *dst, const int16_t *src0, const int16_t *src1, const int16_t *win, int len)
+{
+ int i,j;
+ int *dst_i, *dst_j;
+ const int16_t * src0_i, *src1_j;
+ const int16_t *win_i, *win_j;
+ int16_t s0, s01, s02, s03, s1, s11, s12, s13;
+ int16_t wi, wi1, wi2, wi3, wj, wj1, wj2, wj3;
+
+ dst += len;
+ win += len;
+ src0 += len;
+
+ for(i=-len, j=len-1; i<0; i+=4, j-=4) {
+ dst_i = dst + i;
+ dst_j = dst + j;
+ src0_i = src0 + i;
+ src1_j = src1 + j;
+ win_i = win + i;
+ win_j = win + j;
+
+ __asm__ volatile (
+ "lh %[s0], 0(%[src0_i]) \n\t"
+ "lh %[s1], 0(%[src1_j]) \n\t"
+ "lh %[wi], 0(%[win_i]) \n\t"
+ "lh %[wj], 0(%[win_j]) \n\t"
+ "append %[s0], %[s1], 16 \n\t"
+ "append %[wj], %[wi], 16 \n\t"
+ "mult $ac0, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac0, %[s0], %[wj] \n\t"
+ "mult $ac1, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac1, %[s0], %[wj] \n\t"
+ "lh %[s01], 2(%[src0_i]) \n\t"
+ "lh %[s11], -2(%[src1_j]) \n\t"
+ "extr_r.w %[s1], $ac0, 16 \n\t"
+ "lh %[wi1], 2(%[win_i]) \n\t"
+ "lh %[wj1], -2(%[win_j]) \n\t"
+ "extr_r.w %[wj], $ac1, 16 \n\t"
+ "append %[s01], %[s11], 16 \n\t"
+ "append %[wj1], %[wi1], 16 \n\t"
+ "mult $ac2, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac2, %[s01], %[wj1] \n\t"
+ "sw %[s1], 0(%[dst_i]) \n\t"
+ "sw %[wj], 0(%[dst_j]) \n\t"
+ "mult $ac3, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac3, %[s01], %[wj1] \n\t"
+ "extr_r.w %[s11], $ac2, 16 \n\t"
+ "extr_r.w %[wj1], $ac3, 16 \n\t"
+ "lh %[s02], 4(%[src0_i]) \n\t"
+ "lh %[s12], -4(%[src1_j]) \n\t"
+ "lh %[wi2], 4(%[win_i]) \n\t"
+ "lh %[wj2], -4(%[win_j]) \n\t"
+ "append %[s02], %[s12], 16 \n\t"
+ "append %[wj2], %[wi2], 16 \n\t"
+ "mult $ac0, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac0, %[s02], %[wj2] \n\t"
+ "sw %[s11], 4(%[dst_i]) \n\t"
+ "sw %[wj1], -4(%[dst_j]) \n\t"
+ "mult $ac1, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac1, %[s02], %[wj2] \n\t"
+ "extr_r.w %[s12], $ac0, 16 \n\t"
+ "lh %[s03], 6(%[src0_i]) \n\t"
+ "lh %[s13], -6(%[src1_j]) \n\t"
+ "lh %[wi3], 6(%[win_i]) \n\t"
+ "lh %[wj3], -6(%[win_j]) \n\t"
+ "append %[s03], %[s13], 16 \n\t"
+ "append %[wj3], %[wi3], 16 \n\t"
+ "mult $ac2, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac2, %[s03], %[wj3] \n\t"
+ "sw %[s12], 8(%[dst_i]) \n\t"
+ "extr_r.w %[wj2], $ac1, 16 \n\t"
+ "mult $ac3, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac3, %[s03], %[wj3] \n\t"
+ "extr_r.w %[s13], $ac2, 16 \n\t"
+ "extr_r.w %[wj3], $ac3, 16 \n\t"
+ "sw %[wj2], -8(%[dst_j]) \n\t"
+ "sw %[s13], 12(%[dst_i]) \n\t"
+ "sw %[wj3], -12(%[dst_j]) \n\t"
+
+ : [s0] "=&r" (s0), [s1] "=&r" (s1), [wi] "=&r" (wi),
+ [wj] "=&r" (wj), [s03] "=&r" (s03), [s01] "=&r" (s01),
+ [s11] "=&r" (s11), [wi1] "=&r" (wi1), [wj1] "=&r" (wj1),
+ [s13] "=&r" (s13), [s02] "=&r" (s02), [s12] "=&r" (s12),
+ [wi2] "=&r" (wi2), [wj2] "=&r" (wj2), [wi3] "=&r" (wi3),
+ [wj3] "=&r" (wj3)
+ : [src0_i] "r" (src0_i), [win_j] "r" (win_j ), [src1_j] "r" (src1_j),
+ [win_i] "r" (win_i), [dst_i] "r" (dst_i), [dst_j] "r" (dst_j)
+ : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+ "$ac3hi", "$ac3lo"
+ );
+ }
+}
+
+void ff_dsputil_init_mips_fixed(DSPContext* c) {
+ c->vector_fmul_window_fixed = vector_fmul_window_mips_fixed;
+}
diff --git a/libavcodec/mips/fft_mips_fixed.c b/libavcodec/mips/fft_mips_fixed.c
new file mode 100644
index 0000000..9fc9287
--- /dev/null
+++ b/libavcodec/mips/fft_mips_fixed.c
@@ -0,0 +1,906 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors: Stanislav Ocovaj (socovaj at mips.com)
+ * Dragan Mrdjan (dmrdjan at mips.com)
+ * Zoran Lukic (zlukic at mips.com)
+ * Bojan Zivkovic (bojan at mips.com)
+ *
+ * Optimization of FFT and MDCT/IMDCT transforms for MIPS fixed-point
+ * architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FFT_FLOAT 0
+#include "libavcodec/fft.h"
+#include "libavcodec/mips/fft_table.h"
+
+#include "fft_table_fixed.h"
+
+av_cold int ff_mdct_fixed_init(FFTContext *s, int nbits, int inverse, int scale)
+{
+ int n, n4, i;
+ double alpha, theta;
+ int tstep;
+
+ memset(s, 0, sizeof(*s));
+ n = 1 << nbits;
+ s->mdct_bits = nbits;
+ s->mdct_size = n;
+ n4 = n >> 2;
+ s->mdct_permutation = FF_MDCT_PERM_NONE;
+
+ if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+ goto fail;
+
+ s->tcos = av_malloc((n * sizeof(int)) / 2);
+
+ if (!s->tcos)
+ goto fail;
+
+ switch (s->mdct_permutation) {
+ case FF_MDCT_PERM_NONE:
+ s->tsin = s->tcos + n4;
+ tstep = 1;
+ break;
+ case FF_MDCT_PERM_INTERLEAVE:
+ s->tsin = s->tcos + 1;
+ tstep = 2;
+ break;
+ default:
+ goto fail;
+ }
+ theta = 0.125 + (scale < 0 ? n4 : 0);
+
+ for(i=0;i<n4;i++) {
+ int tmp;
+
+ alpha = 2 * M_PI * (i + theta) / n;
+ tmp = (int)(-cos(alpha) * 65536);
+ tmp = (tmp + 1) >> 1;
+ if (tmp > 32767)
+ tmp = 32767;
+ s->tcos[i*tstep] = (FFTSample)tmp;
+ tmp = (int)(-sin(alpha) * 65536);
+ tmp = (tmp + 1) >> 1;
+ if (tmp > 32767)
+ tmp = 32767;
+ s->tsin[i*tstep] = tmp;
+ }
+
+ return 0;
+fail:
+ ff_mdct_end(s);
+ return -1;
+}
+
+av_cold int ff_mdct_fixed_init_hardcoded_128(FFTContext *s, int nbits, int inverse, int scale)
+ {
+ int n, n4, i;
+ int tstep;
+
+ memset(s, 0, sizeof(*s));
+ n = 1 << nbits;
+ s->mdct_bits = nbits;
+ s->mdct_size = n;
+ n4 = n >> 2;
+ s->mdct_permutation = FF_MDCT_PERM_NONE;
+
+ if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+ goto fail;
+
+ s->tcos = av_malloc((n * sizeof(int)) / 2);
+
+ if (!s->tcos)
+ goto fail;
+
+ switch (s->mdct_permutation) {
+ case FF_MDCT_PERM_NONE:
+ s->tsin = s->tcos + n4;
+ tstep = 1;
+ break;
+ case FF_MDCT_PERM_INTERLEAVE:
+ s->tsin = s->tcos + 1;
+ tstep = 2;
+ break;
+ default:
+ goto fail;
+ }
+ for(i=0;i<n4;i++) {
+ s->tcos[i*tstep] = tcos_fixed_128[i];
+ s->tsin[i*tstep] = tsin_fixed_128[i];
+ }
+ return 0;
+fail:
+ ff_mdct_end(s);
+ return -1;
+}
+
+#if HAVE_MIPSDSPR2 && HAVE_INLINE_ASM
+static void ff_imdct_fixed_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+ int k, n8, n4, n2, n, j, j2;
+ int ax0, ax1, ax2, ax3;
+ const uint16_t *revtab = s->revtab;
+ const FFTSample *tcos = s->tcos;
+ const FFTSample *tsin = s->tsin;
+ const FFTSample *in1, *in2, *in3, *in4;
+ FFTComplex *z = (FFTComplex *)output;
+
+ FFTSample t0, t1, t2, t3, t01, t11, t21, t31;
+
+ n = 1 << s->mdct_bits;
+ n2 = n >> 1;
+ n4 = n >> 2;
+ n8 = n >> 3;
+
+ /* pre rotation */
+ in1 = input;
+ in3 = input + 2;
+ in2 = input + n2 - 1;
+ in4 = input + n2 - 3;
+
+ for(k=0; k<n4; k+=4) {
+ int k1 = k * 2;
+ int k2 = k1 + 2;
+
+ __asm__ volatile (
+ "lh %[ax0], 0(%[in2]) \n\t"
+ "lh %[ax1], 0(%[in1]) \n\t"
+ "lhx %[ax2], %[k1](%[tcos]) \n\t"
+ "lhx %[ax3], %[k1](%[tsin]) \n\t"
+ "multu $ac0, $0, $0 \n\t"
+ "multu $ac1, $0, $0 \n\t"
+ "append %[ax0], %[ax1], 16 \n\t"
+ "append %[ax2], %[ax3], 16 \n\t"
+ "multu $ac2, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac0, %[ax0], %[ax2] \n\t"
+ "dpaqx_s.w.ph $ac1, %[ax0], %[ax2] \n\t"
+ "lh %[ax0], -4(%[in2]) \n\t"
+ "lh %[ax1], 4(%[in1]) \n\t"
+ "lhx %[ax2], %[k2](%[tcos]) \n\t"
+ "lhx %[ax3], %[k2](%[tsin]) \n\t"
+ "append %[ax0], %[ax1], 16 \n\t"
+ "append %[ax2], %[ax3], 16 \n\t"
+ "mulsaq_s.w.ph $ac2, %[ax0], %[ax2] \n\t"
+ "multu $ac3, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac3, %[ax0], %[ax2] \n\t"
+ "extr_r.w %[t0], $ac0, 16 \n\t"
+ "extr_r.w %[t2], $ac1, 16 \n\t"
+ "extr_r.w %[t1], $ac2, 16 \n\t"
+ "extr_r.w %[t3], $ac3, 16 \n\t"
+
+ : [ax0] "=&r" (ax0), [ax2] "=&r" (ax2),[ax1] "=&r" (ax1), [ax3] "=&r" (ax3),
+ [t0] "=&r" (t0), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3)
+ : [in1] "r" (in1), [in2] "r" (in2), [tcos] "r" (tcos),
+ [tsin] "r" (tsin), [k1] "r" (k1), [k2] "r" (k2)
+ : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+ "$ac3hi", "$ac3lo"
+ );
+
+ j = revtab[k];
+ j2 = revtab[k+1];
+
+ z[j].re = t0;
+ z[j].im = t2;
+ z[j2].re = t1;
+ z[j2].im = t3;
+
+ k1 += 4;
+ k2 += 4;
+
+ __asm__ volatile (
+ "lh %[ax0], -8(%[in2]) \n\t"
+ "lh %[ax1], 8(%[in1]) \n\t"
+ "lhx %[ax2], %[k1](%[tcos]) \n\t"
+ "lhx %[ax3], %[k1](%[tsin]) \n\t"
+ "multu $ac0, $0, $0 \n\t"
+ "multu $ac1, $0, $0 \n\t"
+ "append %[ax0], %[ax1], 16 \n\t"
+ "append %[ax2], %[ax3], 16 \n\t"
+ "multu $ac2, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac0, %[ax0], %[ax2] \n\t"
+ "dpaqx_s.w.ph $ac1, %[ax0], %[ax2] \n\t"
+ "lh %[ax0], -12(%[in2]) \n\t"
+ "lh %[ax1], 12(%[in1]) \n\t"
+ "lhx %[ax2], %[k2](%[tcos]) \n\t"
+ "lhx %[ax3], %[k2](%[tsin]) \n\t"
+ "append %[ax0], %[ax1], 16 \n\t"
+ "append %[ax2], %[ax3], 16 \n\t"
+ "mulsaq_s.w.ph $ac2, %[ax0], %[ax2] \n\t"
+ "multu $ac3, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac3, %[ax0], %[ax2] \n\t"
+ "extr_r.w %[t0], $ac0, 16 \n\t"
+ "extr_r.w %[t2], $ac1, 16 \n\t"
+ "extr_r.w %[t1], $ac2, 16 \n\t"
+ "extr_r.w %[t3], $ac3, 16 \n\t"
+
+ : [ax0] "=&r" (ax0), [ax2] "=&r" (ax2), [ax1] "=&r" (ax1), [ax3] "=&r" (ax3),
+ [t0] "=&r" (t0), [t2] "=&r" (t2), [t1] "=r" (t1), [t3] "=r" (t3)
+ : [in1] "r" (in1), [in2] "r" (in2), [tcos] "r" (tcos),
+ [tsin] "r" (tsin),[k1] "r" (k1), [k2] "r" (k2)
+ : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+ "$ac3hi", "$ac3lo"
+ );
+
+ j = revtab[k+2];
+ j2 = revtab[k+3];
+
+ z[j ].re = t0;
+ z[j ].im = t2;
+ z[j2].re = t1;
+ z[j2].im = t3;
+ in1 += 8;
+ in2 -= 8;
+ }
+
+ s->fft_fixed_calc(s, z);
+
+ /* post rotation + reordering */
+
+ for(k=0; k<n8; k+=2 ) {
+ int k1 = 2 * (n8 - k - 1), k2 = k1 - 2;
+ int k11 = 2 * (n8 + k), k21 = k11 + 2;
+ in1 = (const FFTSample*)(z + (n8 - k - 1));
+ in2 = (const FFTSample*)(z + (n8 + k));
+
+ __asm__ volatile (
+ "lh %[ax0], 2(%[in1]) \n\t"
+ "lh %[ax1], 0(%[in1]) \n\t"
+ "lhx %[ax2], %[k1](%[tsin]) \n\t"
+ "lhx %[ax3], %[k1](%[tcos]) \n\t"
+ "multu $ac0, $0, $0 \n\t"
+ "multu $ac1, $0, $0 \n\t"
+ "append %[ax0], %[ax1], 16 \n\t"
+ "append %[ax2], %[ax3], 16 \n\t"
+ "mulsaq_s.w.ph $ac0, %[ax0], %[ax2] \n\t"
+ "dpaqx_s.w.ph $ac1, %[ax0], %[ax2] \n\t"
+ "lh %[ax0], -2(%[in1]) \n\t"
+ "lh %[ax1], -4(%[in1]) \n\t"
+ "lhx %[ax2], %[k2](%[tsin]) \n\t"
+ "lhx %[ax3], %[k2](%[tcos]) \n\t"
+ "append %[ax0], %[ax1], 16 \n\t"
+ "append %[ax2], %[ax3], 16 \n\t"
+ "multu $ac2, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac2, %[ax0], %[ax2] \n\t"
+ "multu $ac3, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac3, %[ax0], %[ax2] \n\t"
+ "extr_r.w %[t0], $ac0, 16 \n\t"
+ "extr_r.w %[t2], $ac1, 16 \n\t"
+ "extr_r.w %[t1], $ac2, 16 \n\t"
+ "extr_r.w %[t3], $ac3, 16 \n\t"
+
+ : [ax0] "=&r" (ax0), [ax1] "=&r" (ax1), [ax2] "=&r" (ax2), [ax3] "=&r" (ax3),
+ [t0] "=r" (t0), [t2] "=r" (t2), [t1] "=r" (t1), [t3] "=r" (t3)
+ : [in1] "r" (in1), [k1] "r" (k1), [tsin] "r" (tsin), [tcos] "r" (tcos),
+ [z] "r" (z), [k2] "r" (k2)
+ : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+ "$ac3hi", "$ac3lo"
+ );
+
+ __asm__ volatile (
+ "lh %[ax0], 2(%[in2]) \n\t"
+ "lh %[ax1], 0(%[in2]) \n\t"
+ "lhx %[ax2], %[k11](%[tsin]) \n\t"
+ "lhx %[ax3], %[k11](%[tcos]) \n\t"
+ "multu $ac0, $0, $0 \n\t"
+ "multu $ac1, $0, $0 \n\t"
+ "append %[ax0], %[ax1], 16 \n\t"
+ "append %[ax2], %[ax3], 16 \n\t"
+ "mulsaq_s.w.ph $ac0, %[ax0], %[ax2] \n\t"
+ "dpaqx_s.w.ph $ac1, %[ax0], %[ax2] \n\t"
+ "lh %[ax0], 6(%[in2]) \n\t"
+ "lh %[ax1], 4(%[in2]) \n\t"
+ "lhx %[ax2], %[k21](%[tsin]) \n\t"
+ "lhx %[ax3], %[k21](%[tcos]) \n\t"
+ "append %[ax0], %[ax1], 16 \n\t"
+ "append %[ax2], %[ax3], 16 \n\t"
+ "multu $ac2, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac2, %[ax0], %[ax2] \n\t"
+ "multu $ac3, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac3, %[ax0], %[ax2] \n\t"
+ "extr_r.w %[t01], $ac0, 16 \n\t"
+ "extr_r.w %[t21], $ac1, 16 \n\t"
+ "extr_r.w %[t11], $ac2, 16 \n\t"
+ "extr_r.w %[t31], $ac3, 16 \n\t"
+
+ : [ax0] "=&r" (ax0), [ax1] "=&r" (ax1), [ax2] "=&r" (ax2), [ax3] "=&r" (ax3),
+ [t01] "=r" (t01), [t21] "=r" (t21), [t11] "=r" (t11), [t31] "=r" (t31)
+ : [in2] "r" (in2), [k11] "r" (k11), [tsin] "r" (tsin),[tcos] "r" (tcos),
+ [z] "r" (z), [k21] "r" (k21)
+ : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+ "$ac3hi", "$ac3lo"
+ );
+
+ z[n8-k-1].re = t0;
+ z[n8+k ].im = t2;
+ z[n8-k-1].im = t21;
+ z[n8+k ].re = t01;
+
+ z[n8-k-2].re = t1;
+ z[n8+k+1].im = t3;
+ z[n8-k-2].im = t31;
+ z[n8+k+1].re = t11;
+ z[n8+k+1].im = t3;
+ }
+}
+#else
+#define CMUL_SR(dre, dim, are, aim, bre, bim) do { \
+ (dre) = ( ((are) * (bre) - (aim) * (bim) + 0x4000) >> 15 ); \
+ (dim) = ( ((are) * (bim) + (aim) * (bre) + 0x4000) >> 15 ); \
+ } while(0)
+
+static void ff_imdct_fixed_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+ int k, n8, n4, n2, n, j;
+ const uint16_t *revtab = s->revtab;
+ const FFTSample *tcos = s->tcos;
+ const FFTSample *tsin = s->tsin;
+ const FFTSample *in1, *in2;
+ FFTComplex *z = (FFTComplex *)output;
+
+ n = 1 << s->mdct_bits;
+ n2 = n >> 1;
+ n4 = n >> 2;
+ n8 = n >> 3;
+
+ /* pre rotation */
+ in1 = input;
+ in2 = input + n2 - 1;
+ for(k=0; k<n4; k++) {
+ j=revtab[k];
+ CMUL_SR(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
+ in1 += 2;
+ in2 -= 2;
+ }
+ s->fft_fixed_calc(s, z);
+
+ /* post rotation + reordering */
+ for(k=0; k<n8; k++) {
+
+ FFTSample r0, i0, r1, i1;
+ CMUL_SR(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
+ CMUL_SR(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]);
+ z[n8-k-1].re = r0;
+ z[n8-k-1].im = i0;
+ z[n8+k ].re = r1;
+ z[n8+k ].im = i1;
+ }
+}
+#endif /* HAVE_MIPSDSPR2 && HAVE_INLINE_ASM */
+
+av_cold int ff_mdct_fixed_init_hardcoded(FFTContext *s, int nbits, int inverse, int scale)
+{
+ int n, n4, i;
+ int tstep;
+
+ memset(s, 0, sizeof(*s));
+ n = 1 << nbits;
+ s->mdct_bits = nbits;
+ s->mdct_size = n;
+ n4 = n >> 2;
+ s->mdct_permutation = FF_MDCT_PERM_NONE;
+
+ if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+ goto fail;
+
+ s->tcos = av_malloc((n * sizeof(int)) / 2);
+
+ if (!s->tcos)
+ goto fail;
+
+ switch (s->mdct_permutation) {
+ case FF_MDCT_PERM_NONE:
+ s->tsin = s->tcos + n4;
+ tstep = 1;
+ break;
+ case FF_MDCT_PERM_INTERLEAVE:
+ s->tsin = s->tcos + 1;
+ tstep = 2;
+ break;
+ default:
+ goto fail;
+ }
+ for(i=0;i<n4;i++) {
+
+ s->tcos[i*tstep] = tcos_fixed[i];
+ s->tsin[i*tstep] = tsin_fixed[i];
+ }
+ return 0;
+fail:
+ ff_mdct_end(s);
+ return -1;
+}
+
+#if HAVE_MIPSDSPR2 && HAVE_INLINE_ASM
+static void ff_fft_fixed_calc_mips(FFTContext *s, FFTComplex *z)
+{
+
+ int nbits, i, n, num_transforms, offset, step;
+ int n4, n2, n34;
+ FFTSample tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+ int step2;
+ int temp1, temp2, temp3, temp4;
+ int z0, z1, z2, z3;
+ int t12, t34, t56, t78, t0a, t1a, t2a, t3a;
+ int in1, in2, in3, in4;
+ FFTComplex *tmpz, *addr1, *addr2, *addr3;
+ int w_re, w_im;
+ FFTSample *w_re_ptr, *w_im_ptr;
+ int pom;
+ const int fft_size = (1 << s->nbits);
+
+ FFTComplex *tmpz_n2, *tmpz_n34, *tmpz_n4;
+ FFTComplex *tmpz_n2_i, *tmpz_n34_i, *tmpz_n4_i, *tmpz_i;
+
+ int z_re_n2, z_im_n2, z_re_n34, z_im_n34, z_re, z_im, z_re_n4, z_im_n4;
+
+ num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
+ for (n=0; n<num_transforms; n++)
+ {
+ offset = fft_offsets_lut[n] << 2;
+ tmpz = z + offset;
+
+ /* fft4 */
+ __asm__ volatile (
+ "lw %[z0], 0(%[tmpz]) \n\t"
+ "lw %[z1], 4(%[tmpz]) \n\t"
+ "lw %[z2], 8(%[tmpz]) \n\t"
+ "lw %[z3], 12(%[tmpz]) \n\t"
+ "addq.ph %[t12], %[z0], %[z1] \n\t"
+ "subq.ph %[t34], %[z0], %[z1] \n\t"
+ "addq.ph %[t56], %[z2], %[z3] \n\t"
+ "subq.ph %[t78], %[z2], %[z3] \n\t"
+ "addq.ph %[t0a], %[t12], %[t56] \n\t"
+ "packrl.ph %[t78], %[t78], %[t78] \n\t"
+ "subq.ph %[t2a], %[t12], %[t56] \n\t"
+ "addq.ph %[t1a], %[t34], %[t78] \n\t"
+ "subq.ph %[t3a], %[t34], %[t78] \n\t"
+ "packrl.ph %[t1a], %[t1a], %[t1a] \n\t"
+ "packrl.ph %[t3a], %[t3a], %[t3a] \n\t"
+ "sw %[t0a], 0(%[tmpz]) \n\t"
+ "packrl.ph %[z1], %[t1a], %[t3a] \n\t"
+ "packrl.ph %[z3], %[t3a], %[t1a] \n\t"
+ "sw %[t2a], 8(%[tmpz]) \n\t"
+ "sw %[z3], 4(%[tmpz]) \n\t"
+ "sw %[z1], 12(%[tmpz]) \n\t"
+
+ : [z0] "=&r" (z0), [z1] "=&r" (z1), [t12] "=&r" (t12),
+ [z2] "=&r" (z2), [z3] "=&r" (z3), [t34] "=&r" (t34),
+ [t56] "=&r" (t56), [t78] "=&r" (t78), [t0a] "=&r" (t0a),
+ [t1a] "=&r" (t1a), [t2a] "=&r" (t2a), [t3a] "=&r" (t3a)
+ : [tmpz] "r" (tmpz)
+ : "memory"
+ );
+ }
+
+ if (fft_size < 8)
+ return;
+
+ pom = 23170;
+
+ num_transforms = (num_transforms >> 1) | 1;
+ for (n=0; n<num_transforms; n++)
+ {
+ offset = fft_offsets_lut[n] << 3;
+ tmpz = z + offset;
+
+ /* fft8 */
+ __asm__ volatile (
+ "lw %[in1], 16(%[tmpz]) \t\n"
+ "lw %[in2], 20(%[tmpz]) \t\n"
+ "lw %[in3], 24(%[tmpz]) \t\n"
+ "lw %[in4], 28(%[tmpz]) \t\n"
+ "addq.ph %[temp1], %[in1], %[in2] \t\n"
+ "subq.ph %[temp3], %[in1], %[in2] \t\n"
+ "seh %[tmp1], %[temp1] \t\n"
+ "sra %[temp1], %[temp1], 16 \t\n"
+ "seh %[tmp2], %[temp1] \t\n"
+ "addq.ph %[temp2], %[in3], %[in4] \t\n"
+ "subq.ph %[temp4], %[in3], %[in4] \t\n"
+ "seh %[tmp3], %[temp2] \t\n"
+ "sra %[temp2], %[temp2], 16 \t\n"
+ "seh %[tmp4], %[temp2] \t\n"
+ "add %[tmp5], %[tmp1], %[tmp3] \t\n"
+ "sub %[tmp7], %[tmp1], %[tmp3] \t\n"
+ "add %[tmp6], %[tmp2], %[tmp4] \t\n"
+ "sub %[tmp8], %[tmp2], %[tmp4] \t\n"
+ "seh %[tmp1], %[temp3] \t\n"
+ "sra %[temp3], %[temp3], 16 \t\n"
+ "seh %[tmp2], %[temp3] \t\n"
+ "seh %[tmp3], %[temp4] \t\n"
+ "sra %[temp4], %[temp4], 16 \t\n"
+ "seh %[tmp4], %[temp4] \t\n"
+ "lw %[in1], 0(%[tmpz]) \t\n"
+ "move %[temp1], %[tmp6] \t\n"
+ "append %[temp1], %[tmp5], 16 \t\n"
+ "subq.ph %[temp3], %[in1], %[temp1] \t\n"
+ "addq.ph %[temp4], %[in1], %[temp1] \t\n"
+ "sw %[temp3], 16(%[tmpz]) \t\n"
+ "sw %[temp4], 0(%[tmpz]) \t\n"
+ "lw %[in2], 8(%[tmpz]) \t\n"
+ "negu %[temp1], %[tmp7] \t\n"
+ "append %[temp1], %[tmp8], 16 \t\n"
+ "subq.ph %[temp2], %[in2], %[temp1] \t\n"
+ "addq.ph %[temp3], %[in2], %[temp1] \t\n"
+ "sw %[temp2], 24(%[tmpz]) \t\n"
+ "sw %[temp3], 8(%[tmpz]) \t\n"
+ "add %[tmp5], %[tmp1], %[tmp2] \t\n"
+ "mul %[tmp5], %[tmp5], %[pom] \t\n"
+ "sub %[tmp6], %[tmp2], %[tmp1] \t\n"
+ "mul %[tmp6], %[tmp6], %[pom] \t\n"
+ "sub %[tmp7], %[tmp3], %[tmp4] \t\n"
+ "mul %[tmp7], %[tmp7], %[pom] \t\n"
+ "add %[tmp8], %[tmp3], %[tmp4] \t\n"
+ "mul %[tmp8], %[tmp8], %[pom] \t\n"
+ "shra_r.w %[tmp5], %[tmp5], 15 \t\n"
+ "lw %[in1], 4(%[tmpz]) \t\n"
+ "shra_r.w %[tmp6], %[tmp6], 15 \t\n"
+ "lw %[in2], 12(%[tmpz]) \t\n"
+ "shra_r.w %[tmp7], %[tmp7], 15 \t\n"
+ "add %[tmp1], %[tmp5], %[tmp7] \t\n"
+ "shra_r.w %[tmp8], %[tmp8], 15 \t\n"
+ "add %[tmp2], %[tmp6], %[tmp8] \t\n"
+ "sub %[tmp3], %[tmp5], %[tmp7] \t\n"
+ "sub %[tmp4], %[tmp6], %[tmp8] \t\n"
+ "move %[temp1], %[tmp2] \t\n"
+ "append %[temp1], %[tmp1], 16 \t\n"
+ "subq.ph %[temp2], %[in1], %[temp1] \t\n"
+ "addq.ph %[temp3], %[in1], %[temp1] \t\n"
+ "sw %[temp2], 20(%[tmpz]) \t\n"
+ "sw %[temp3], 4(%[tmpz]) \t\n"
+ "negu %[temp1], %[tmp3] \t\n"
+ "append %[temp1], %[tmp4], 16 \t\n"
+ "subq.ph %[temp2], %[in2], %[temp1] \t\n"
+ "addq.ph %[temp3], %[in2], %[temp1] \t\n"
+ "sw %[temp2], 28(%[tmpz]) \t\n"
+ "sw %[temp3], 12(%[tmpz]) \t\n"
+
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+ [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
+ [tmp7] "=&r" (tmp7), [tmp8] "=&r" (tmp8), [temp1] "=&r" (temp1),
+ [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [temp4] "=&r" (temp4),
+ [in1] "=&r" (in1), [in2] "=&r" (in2), [in3] "=&r" (in3),
+ [in4] "=&r" (in4)
+ : [tmpz] "r" (tmpz), [pom] "r" (pom)
+ : "memory"
+ );
+ }
+
+ step = 1 << (MAX_LOG2_NFFT - 4);
+ n4 = 4;
+
+ for (nbits=4; nbits<=s->nbits; nbits++)
+ {
+ n2 = 2*n4;
+ n34 = 3*n4;
+ num_transforms = (num_transforms >> 1) | 1;
+ for (n=0; n<num_transforms; n++)
+ {
+ offset = fft_offsets_lut[n] << nbits;
+ tmpz = z + offset;
+
+ __asm__ volatile (
+ "sll %[z0], %[n2], 2 \n\t"
+ "sll %[z1], %[n34], 2 \n\t"
+ "sll %[z2], %[n4], 2 \n\t"
+ "addu %[addr1], %[tmpz], %[z0] \n\t"
+ "addu %[addr2], %[tmpz], %[z1] \n\t"
+ "addu %[addr3], %[tmpz], %[z2] \n\t"
+ "lw %[z0], 0(%[addr1]) \n\t"
+ "lw %[z1], 0(%[addr2]) \n\t"
+ "lw %[z2], 0(%[tmpz]) \n\t"
+ "sll %[step2], %[step], 2 \n\t"
+ "lw %[z3], 0(%[addr3]) \n\t"
+ "addq.ph %[t56], %[z0], %[z1] \n\t"
+ "subq.ph %[t12], %[z0], %[z1] \n\t"
+ "addq.ph %[t0a], %[z2], %[t56] \n\t"
+ "packrl.ph %[z3], %[z3], %[z3] \n\t"
+ "subq.ph %[t2a], %[z2], %[t56] \n\t"
+ "addq.ph %[t1a], %[z3], %[t12] \n\t"
+ "subq.ph %[t3a], %[z3], %[t12] \n\t"
+ "sw %[t0a], 0(%[tmpz]) \n\t"
+ "sw %[t2a], 0(%[addr1]) \n\t"
+ "packrl.ph %[z0], %[t1a], %[t3a] \n\t"
+ "packrl.ph %[z1], %[t3a], %[t1a] \n\t"
+ "sw %[z0], 0(%[addr2]) \n\t"
+ "sw %[z1], 0(%[addr3]) \n\t"
+
+ : [z0] "=&r" (z0), [z1] "=&r" (z1), [t12] "=&r" (t12),
+ [z2] "=&r" (z2), [z3] "=&r" (z3), [step2] "=&r" (step2),
+ [t56] "=&r" (t56), [t0a] "=&r" (t0a), [t1a] "=&r" (t1a),
+ [t2a] "=&r" (t2a), [t3a] "=&r" (t3a), [addr1] "=&r" (addr1),
+ [addr2] "=&r" (addr2), [addr3] "=&r" (addr3)
+ : [n2] "r" (n2), [n34] "r" (n34), [n4] "r" (n4), [tmpz] "r" (tmpz),
+ [step] "r" (step)
+ : "memory"
+ );
+
+ w_re_ptr = (FFTSample*)(ff_cos_65536_fixed + step);
+ w_im_ptr = (FFTSample*)(ff_cos_65536_fixed + MAX_FFT_SIZE/4 - step);
+
+ for (i=1; i<n4; i ++ )
+ {
+ w_re = w_re_ptr[0];
+ w_im = w_im_ptr[0];
+
+ tmpz_n2 = tmpz + n2;
+ tmpz_n4 = tmpz + n4;
+ tmpz_n34 = tmpz + n34;
+
+ tmpz_n2_i = tmpz_n2 + i;
+ tmpz_n4_i = tmpz_n4 + i;
+ tmpz_n34_i = tmpz_n34 + i;
+ tmpz_i = tmpz + i;
+
+ __asm__ volatile (
+ "lh %[z_re_n2], 0(%[tmpz_n2_i]) \n\t"
+ "lh %[z_im_n2], 2(%[tmpz_n2_i]) \n\t"
+ "lh %[z_re_n34], 0(%[tmpz_n34_i]) \n\t"
+ "lh %[z_im_n34], 2(%[tmpz_n34_i]) \n\t"
+ "mult $ac0, %[w_re], %[z_re_n2] \n\t"
+ "mult $ac2, %[w_re], %[z_re_n34] \n\t"
+ "mult $ac1, %[w_re], %[z_im_n2] \n\t"
+ "mult $ac3, %[w_re], %[z_im_n34] \n\t"
+ "madd $ac0, %[w_im], %[z_im_n2] \n\t"
+ "msub $ac2, %[w_im], %[z_im_n34] \n\t"
+ "msub $ac1, %[w_im], %[z_re_n2] \n\t"
+ "madd $ac3, %[w_im], %[z_re_n34] \n\t"
+ "lh %[z_re], 0(%[tmpz_i]) \n\t"
+ "extr_r.w %[tmp1], $ac0, 15 \n\t"
+ "extr_r.w %[tmp3], $ac2, 15 \n\t"
+ "extr_r.w %[tmp2], $ac1, 15 \n\t"
+ "extr_r.w %[tmp4], $ac3, 15 \n\t"
+ "lh %[z_im], 2(%[tmpz_i]) \n\t"
+ "lh %[z_re_n4], 0(%[tmpz_n4_i]) \n\t"
+ "lh %[z_im_n4], 2(%[tmpz_n4_i]) \n\t"
+ "add %[tmp5], %[tmp1], %[tmp3] \n\t"
+ "sub %[tmp1], %[tmp1], %[tmp3] \n\t"
+ "add %[tmp6], %[tmp2], %[tmp4] \n\t"
+ "sub %[tmp2], %[tmp2], %[tmp4] \n\t"
+ "subq_s.ph %[z_re_n2], %[z_re], %[tmp5] \n\t"
+ "addq_s.ph %[z_re], %[z_re], %[tmp5] \n\t"
+ "subq_s.ph %[z_im_n2], %[z_im], %[tmp6] \n\t"
+ "addq_s.ph %[z_im], %[z_im], %[tmp6] \n\t"
+ "sh %[z_re_n2], 0(%[tmpz_n2_i]) \n\t"
+ "sh %[z_re], 0(%[tmpz_i]) \n\t"
+ "sh %[z_im_n2], 2(%[tmpz_n2_i]) \n\t"
+ "sh %[z_im], 2(%[tmpz_i]) \n\t"
+ "subq_s.ph %[z_re_n34], %[z_re_n4], %[tmp2] \n\t"
+ "addq_s.ph %[z_re_n4], %[z_re_n4], %[tmp2] \n\t"
+ "addq_s.ph %[z_im_n34], %[z_im_n4], %[tmp1] \n\t"
+ "subq_s.ph %[z_im_n4], %[z_im_n4], %[tmp1] \n\t"
+ "sh %[z_re_n34], 0(%[tmpz_n34_i]) \n\t"
+ "sh %[z_re_n4], 0(%[tmpz_n4_i]) \n\t"
+ "sh %[z_im_n34], 2(%[tmpz_n34_i]) \n\t"
+ "sh %[z_im_n4], 2(%[tmpz_n4_i]) \n\t"
+
+ : [z_re_n2] "=&r" (z_re_n2), [z_re] "=&r" (z_re), [z_im] "=&r" (z_im),
+ [z_im_n2] "=&r" (z_im_n2), [z_re_n34] "=&r" (z_re_n34),
+ [z_im_n4] "=&r" (z_im_n4), [z_re_n4] "=&r" (z_re_n4),
+ [z_im_n34] "=&r" (z_im_n34), [tmp1] "=r" (tmp1),
+ [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
+ [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6)
+ : [w_re] "r" (w_re), [w_im] "r" (w_im), [tmpz_n2_i] "r" (tmpz_n2_i),
+ [tmpz_n34_i] "r" (tmpz_n34_i), [tmpz_n4_i] "r" (tmpz_n4_i),
+ [tmpz_i] "r" (tmpz_i)
+ : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+ "$ac3hi", "$ac3lo"
+ );
+ w_re_ptr += step;
+ w_im_ptr -= step;
+ }
+ }
+ step >>= 1;
+ n4 <<= 1;
+ }
+}
+#else
+static void ff_fft_fixed_calc_mips(FFTContext *s, FFTComplex *z) {
+
+ int nbits, i, n, num_transforms, offset, step;
+ int n4, n2, n34;
+ int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+
+ FFTComplex *tmpz;
+
+ int w_re, w_im;
+ FFTSample *w_re_ptr, *w_im_ptr;
+ int pom;
+ const int fft_size = (1 << s->nbits);
+
+ num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
+ for (n=0; n<num_transforms; n++)
+ {
+ offset = fft_offsets_lut[n] << 2;
+ tmpz = z + offset;
+
+ tmp1 = tmpz[0].re + tmpz[1].re;
+ tmp5 = tmpz[2].re + tmpz[3].re;
+ tmp2 = tmpz[0].im + tmpz[1].im;
+ tmp6 = tmpz[2].im + tmpz[3].im;
+ tmp3 = tmpz[0].re - tmpz[1].re;
+ tmp8 = tmpz[2].im - tmpz[3].im;
+ tmp4 = tmpz[0].im - tmpz[1].im;
+ tmp7 = tmpz[2].re - tmpz[3].re;
+
+ tmpz[0].re = tmp1 + tmp5;
+ tmpz[2].re = tmp1 - tmp5;
+ tmpz[0].im = tmp2 + tmp6;
+ tmpz[2].im = tmp2 - tmp6;
+ tmpz[1].re = tmp3 + tmp8;
+ tmpz[3].re = tmp3 - tmp8;
+ tmpz[1].im = tmp4 - tmp7;
+ tmpz[3].im = tmp4 + tmp7;
+
+ }
+ if (fft_size < 8)
+ return;
+
+ num_transforms = (num_transforms >> 1) | 1;
+ for (n=0; n<num_transforms; n++)
+ {
+ offset = fft_offsets_lut[n] << 3;
+ tmpz = z + offset;
+
+ tmp1 = tmpz[4].re + tmpz[5].re;
+ tmp3 = tmpz[6].re + tmpz[7].re;
+ tmp2 = tmpz[4].im + tmpz[5].im;
+ tmp4 = tmpz[6].im + tmpz[7].im;
+
+ tmp5 = tmp1 + tmp3;
+ tmp7 = tmp1 - tmp3;
+ tmp6 = tmp2 + tmp4;
+ tmp8 = tmp2 - tmp4;
+
+ tmp1 = tmpz[4].re - tmpz[5].re;
+ tmp2 = tmpz[4].im - tmpz[5].im;
+ tmp3 = tmpz[6].re - tmpz[7].re;
+ tmp4 = tmpz[6].im - tmpz[7].im;
+
+ tmpz[4].re = tmpz[0].re - tmp5;
+ tmpz[0].re = tmpz[0].re + tmp5;
+ tmpz[4].im = tmpz[0].im - tmp6;
+ tmpz[0].im = tmpz[0].im + tmp6;
+ tmpz[6].re = tmpz[2].re - tmp8;
+ tmpz[2].re = tmpz[2].re + tmp8;
+ tmpz[6].im = tmpz[2].im + tmp7;
+ tmpz[2].im = tmpz[2].im - tmp7;
+
+ pom = 23170;
+
+ tmp5 = (pom * (tmp1 + tmp2) + 0x4000) >> 15;
+ tmp7 = (pom * (tmp3 - tmp4) + 0x4000) >> 15;
+ tmp6 = (pom * (tmp2 - tmp1) + 0x4000) >> 15;
+ tmp8 = (pom * (tmp3 + tmp4) + 0x4000) >> 15;
+
+ tmp1 = tmp5 + tmp7;
+ tmp3 = tmp5 - tmp7;
+ tmp2 = tmp6 + tmp8;
+ tmp4 = tmp6 - tmp8;
+
+ tmpz[5].re = tmpz[1].re - tmp1;
+ tmpz[1].re = tmpz[1].re + tmp1;
+ tmpz[5].im = tmpz[1].im - tmp2;
+ tmpz[1].im = tmpz[1].im + tmp2;
+ tmpz[7].re = tmpz[3].re - tmp4;
+ tmpz[3].re = tmpz[3].re + tmp4;
+ tmpz[7].im = tmpz[3].im + tmp3;
+ tmpz[3].im = tmpz[3].im - tmp3;
+ }
+
+ step = 1 << (MAX_LOG2_NFFT - 4);
+ n4 = 4;
+ for (nbits=4; nbits<=s->nbits; nbits++)
+ {
+ n2 = 2*n4;
+ n34 = 3*n4;
+ num_transforms = (num_transforms >> 1) | 1;
+ for (n=0; n<num_transforms; n++)
+ {
+ offset = fft_offsets_lut[n] << nbits;
+ tmpz = z + offset;
+
+ tmp5 = tmpz[ n2].re + tmpz[n34].re;
+ tmp1 = tmpz[ n2].re - tmpz[n34].re;
+ tmp6 = tmpz[ n2].im + tmpz[n34].im;
+ tmp2 = tmpz[ n2].im - tmpz[n34].im;
+
+ tmpz[ n2].re = tmpz[ 0].re - tmp5;
+ tmpz[ 0].re = tmpz[ 0].re + tmp5;
+ tmpz[ n2].im = tmpz[ 0].im - tmp6;
+ tmpz[ 0].im = tmpz[ 0].im + tmp6;
+ tmpz[n34].re = tmpz[n4].re - tmp2;
+ tmpz[ n4].re = tmpz[n4].re + tmp2;
+ tmpz[n34].im = tmpz[n4].im + tmp1;
+ tmpz[ n4].im = tmpz[n4].im - tmp1;
+
+ w_re_ptr = (FFTSample*)(ff_cos_65536_fixed + step);
+ w_im_ptr = (FFTSample*)(ff_cos_65536_fixed + MAX_FFT_SIZE/4 - step);
+
+ for (i=1; i<n4; i++)
+ {
+ w_re = w_re_ptr[0];
+ w_im = w_im_ptr[0];
+
+ tmp1 = (w_re * tmpz[ n2+i].re + w_im * tmpz[ n2+i].im + 0x4000) >> 15;
+ tmp2 = (w_re * tmpz[ n2+i].im - w_im * tmpz[ n2+i].re + 0x4000) >> 15;
+ tmp3 = (w_re * tmpz[n34+i].re - w_im * tmpz[n34+i].im + 0x4000) >> 15;
+ tmp4 = (w_re * tmpz[n34+i].im + w_im * tmpz[n34+i].re + 0x4000) >> 15;
+
+ tmp5 = tmp1 + tmp3;
+ tmp1 = tmp1 - tmp3;
+ tmp6 = tmp2 + tmp4;
+ tmp2 = tmp2 - tmp4;
+
+ tmpz[n2+i ].re = av_clip_int16(tmpz[i ].re - tmp5);
+ tmpz[i ].re = av_clip_int16(tmpz[i ].re + tmp5);
+ tmpz[n2+i ].im = av_clip_int16(tmpz[i ].im - tmp6);
+ tmpz[i ].im = av_clip_int16(tmpz[i ].im + tmp6);
+ tmpz[n34+i].re = av_clip_int16(tmpz[n4+i].re - tmp2);
+ tmpz[n4+i ].re = av_clip_int16(tmpz[n4+i].re + tmp2);
+ tmpz[n34+i].im = av_clip_int16(tmpz[n4+i].im + tmp1);
+ tmpz[n4+i ].im = av_clip_int16(tmpz[n4+i].im - tmp1);
+
+ w_re_ptr += step;
+ w_im_ptr -= step;
+ }
+ }
+ step >>= 1;
+ n4 <<= 1;
+ }
+}
+#endif /* HAVE_MIPSDSPR2 && HAVE_INLINE_ASM */
+
+void ff_fft_fixed_init_mips(FFTContext *s) {
+
+#if !HAVE_MIPSFPU
+ int n=0;
+ ff_fft_lut_init(fft_offsets_lut, 0, 1 << 16, &n);
+#endif
+
+#if CONFIG_MDCT
+ s->imdct_fixed_half = ff_imdct_fixed_half_mips;
+#endif /* CONFIG_MDCT */
+ s->fft_fixed_calc = ff_fft_fixed_calc_mips;
+}
diff --git a/libavcodec/mips/fft_table_fixed.h b/libavcodec/mips/fft_table_fixed.h
new file mode 100644
index 0000000..637cf99
--- /dev/null
+++ b/libavcodec/mips/fft_table_fixed.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors: Stanislav Ocovaj (socovaj at mips.com)
+ *
+ * Tables necessary for performing fixed-point MDCT/IMDCT transforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_FFT_FIXED_TABLE_H
+#define AVCODEC_MIPS_FFT_FIXED_TABLE_H
+
+/* TODO: Support MDCT/IMDCT other than 64 and 128 */
+
+FFTSample tsin_fixed[64] = { -100, -904, -1708, -2510, -3311, -4110, -4907,
+ -5701, -6491, -7277, -8059, -8836, -9608, -10374, -11133, -11886,
+ -12632, -13370, -14100, -14822, -15535, -16238, -16932, -17615, -18288,
+ -18950, -19600, -20239, -20865, -21479, -22080, -22667, -23241, -23801,
+ -24346, -24877, -25393, -25894, -26379, -26848, -27301, -27737, -28157,
+ -28560, -28946, -29314, -29664, -29997, -30312, -30608, -30886, -31145,
+ -31386, -31607, -31810, -31993, -32157, -32302, -32427, -32533, -32619,
+ -32686, -32733, -32760 };
+
+FFTSample tcos_fixed[64] = { -32767, -32755, -32723, -32671, -32600, -32509,
+ -32398, -32268, -32118, -31949, -31761, -31554, -31327, -31082, -30818,
+ -30535, -30235, -29915, -29578, -29223, -28851, -28461, -28054, -27630,
+ -27189, -26732, -26259, -25770, -25266, -24746, -24211, -23662, -23099,
+ -22521, -21931, -21326, -20709, -20080, -19439, -18785, -18121, -17445,
+ -16759, -16063, -15357, -14642, -13919, -13186, -12446, -11699, -10944,
+ -10183, -9415, -8642, -7864, -7081, -6294, -5503, -4708, -3911, -3111,
+ -2310, -1507, -703 };
+
+FFTSample tsin_fixed_128[128] = { -50, -452, -854, -1256, -1658, -2059, -2460,
+ -2861, -3261, -3661, -4061, -4459, -4857, -5255, -5651, -6047, -6442,
+ -6835, -7228, -7620, -8010, -8400, -8788, -9174, -9560, -9944, -10326,
+ -10707, -11086, -11464, -11839, -12213, -12586, -12956, -13324, -13691,
+ -14055, -14417, -14777, -15135, -15491, -15844, -16195, -16543, -16889,
+ -17232, -17573, -17911, -18246, -18579, -18909, -19236, -19560, -19881,
+ -20199, -20514, -20826, -21135, -21441, -21743, -22042, -22338, -22631,
+ -22920, -23205, -23488, -23766, -24041, -24313, -24580, -24845, -25105,
+ -25361, -25614, -25863, -26108, -26349, -26586, -26819, -27048, -27273,
+ -27494, -27711, -27923, -28131, -28335, -28535, -28731, -28922, -29109,
+ -29291, -29469, -29643, -29812, -29977, -30137, -30292, -30443, -30590,
+ -30732, -30869, -31001, -31129, -31253, -31371, -31485, -31594, -31698,
+ -31798, -31892, -31982, -32067, -32148, -32223, -32294, -32359, -32420,
+ -32476, -32527, -32573, -32615, -32651, -32682, -32709, -32730, -32747,
+ -32759, -32766 };
+
+FFTSample tcos_fixed_128[128] = { -32767, -32764, -32756, -32743, -32726, -32703,
+ -32675, -32642, -32605, -32562, -32515, -32463, -32405, -32343, -32276,
+ -32205, -32128, -32047, -31960, -31869, -31773, -31673, -31567, -31457,
+ -31342, -31222, -31098, -30969, -30835, -30697, -30554, -30406, -30254,
+ -30097, -29936, -29770, -29600, -29425, -29246, -29062, -28875, -28682,
+ -28486, -28285, -28080, -27870, -27657, -27439, -27217, -26991, -26761,
+ -26527, -26289, -26047, -25801, -25551, -25298, -25040, -24779, -24514,
+ -24245, -23973, -23697, -23417, -23134, -22848, -22558, -22265, -21968,
+ -21668, -21365, -21058, -20748, -20436, -20120, -19801, -19479, -19154,
+ -18826, -18496, -18163, -17827, -17488, -17146, -16802, -16456, -16107,
+ -15756, -15402, -15046, -14687, -14327, -13964, -13599, -13232, -12864,
+ -12493, -12120, -11746, -11369, -10991, -10612, -10230, -9848, -9463,
+ -9078, -8691, -8302, -7913, -7522, -7130, -6737, -6343, -5948, -5552,
+ -5155, -4758, -4360, -3961, -3561, -3161, -2761, -2360, -1959, -1557,
+ -1155, -753, -351 };
+
+#endif /* AVCODEC_MIPS_FFT_FIXED_TABLE_H */
diff --git a/libavcodec/mips/fmtconvert_mips_fixed.c b/libavcodec/mips/fmtconvert_mips_fixed.c
new file mode 100644
index 0000000..bc3ada0
--- /dev/null
+++ b/libavcodec/mips/fmtconvert_mips_fixed.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Zoran Lukic (zlukic at mips.com)
+ *
+ * Format Conversion Utils optimized for MIPS fixed-point architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/fmtconvert.c
+ */
+
+#include "libavcodec/fmtconvert.h"
+
+static void int32_to_fixed_fmul_scalar_mips(int16_t *dst, const int *src,
+ int mul, int len)
+{
+ int i;
+ int16_t temp1, temp3, temp5, temp7, temp9, temp11, temp13, temp15;
+
+ for (i=0; i<len; i+=8) {
+ __asm__ volatile (
+ "lw %[temp1], 0(%[src_i]) \n\t"
+ "lw %[temp3], 4(%[src_i]) \n\t"
+ "lw %[temp5], 8(%[src_i]) \n\t"
+ "lw %[temp7], 12(%[src_i]) \n\t"
+ "lw %[temp9], 16(%[src_i]) \n\t"
+ "lw %[temp11], 20(%[src_i]) \n\t"
+ "lw %[temp13], 24(%[src_i]) \n\t"
+ "lw %[temp15], 28(%[src_i]) \n\t"
+ "mul %[temp1], %[temp1], %[mul] \n\t"
+ "mul %[temp3], %[temp3], %[mul] \n\t"
+ "mul %[temp5], %[temp5], %[mul] \n\t"
+ "mul %[temp7], %[temp7], %[mul] \n\t"
+ "mul %[temp9], %[temp9], %[mul] \n\t"
+ "mul %[temp11], %[temp11], %[mul] \n\t"
+ "mul %[temp13], %[temp13], %[mul] \n\t"
+ "mul %[temp15], %[temp15], %[mul] \n\t"
+ "addiu %[temp1], %[temp1], 0x8000 \n\t"
+ "addiu %[temp3], %[temp3], 0x8000 \n\t"
+ "addiu %[temp5], %[temp5], 0x8000 \n\t"
+ "addiu %[temp7], %[temp7], 0x8000 \n\t"
+ "addiu %[temp9], %[temp9], 0x8000 \n\t"
+ "addiu %[temp11], %[temp11], 0x8000 \n\t"
+ "addiu %[temp13], %[temp13], 0x8000 \n\t"
+ "addiu %[temp15], %[temp15], 0x8000 \n\t"
+ "sra %[temp1], %[temp1], 0x10 \n\t"
+ "sra %[temp3], %[temp3], 0x10 \n\t"
+ "sra %[temp5], %[temp5], 0x10 \n\t"
+ "sra %[temp7], %[temp7], 0x10 \n\t"
+ "sra %[temp9], %[temp9], 0x10 \n\t"
+ "sra %[temp11], %[temp11], 0x10 \n\t"
+ "sra %[temp13], %[temp13], 0x10 \n\t"
+ "sra %[temp15], %[temp15], 0x10 \n\t"
+ "sh %[temp1], 0(%[dst_i]) \n\t"
+ "sh %[temp3], 2(%[dst_i]) \n\t"
+ "sh %[temp5], 4(%[dst_i]) \n\t"
+ "sh %[temp7], 6(%[dst_i]) \n\t"
+ "sh %[temp9], 8(%[dst_i]) \n\t"
+ "sh %[temp11], 10(%[dst_i]) \n\t"
+ "sh %[temp13], 12(%[dst_i]) \n\t"
+ "sh %[temp15], 14(%[dst_i]) \n\t"
+
+ : [temp1] "=r" (temp1), [temp11] "=r" (temp11),
+ [temp13] "=r" (temp13), [temp15] "=r" (temp15),
+ [temp3] "=r" (temp3), [temp5] "=r" (temp5),
+ [temp7] "=r" (temp7), [temp9] "=r" (temp9)
+ : [dst_i] "r" (dst+i), [src_i] "r" (src+i),
+ [mul] "r" (mul)
+ : "memory"
+ );
+ }
+}
+
+static inline int fixed_to_int16_one_mips(const int *src)
+{
+ int16_t ret;
+ int temp1, temp7, temp8;
+ __asm__ volatile (
+ "lw %[temp1], 0(%[src_i1]) \n\t"
+ "li %[temp8], 0xf000 \n\t"
+ "li %[ret1], 0xefff \n\t"
+ "slt %[temp7], %[temp1], %[temp8] \n\t"
+ "movn %[ret1], %[temp1], %[temp7] \n\t"
+ "seh %[ret1], %[ret1] \n\t"
+ : [temp1] "=r" (temp1), [temp7] "=r" (temp7),
+ [temp8] "=r" (temp8), [ret1] "=r" (ret)
+ : [src_i1] "r" (src)
+ : "memory"
+ );
+ return (int16_t) ret;
+}
+
+static void fixed_to_int16_interleave_mips(int16_t *dst, const int **src,
+ long len, int channels)
+{
+ int i,j,c;
+ if(channels==2) {
+ for(i=0; i<len; i++) {
+ int temp, temp1, temp7, temp8;
+ __asm__ volatile (
+ "lw %[temp], 0(%[src_i]) \n\t"
+ "lw %[temp1], 0(%[src_i1]) \n\t"
+ "li %[temp8], 0xf000 \n\t"
+ "li %[ret], 0xefff \n\t"
+ "li %[ret1], 0xefff \n\t"
+ "slt %[temp7], %[temp], %[temp8] \n\t"
+ "movn %[ret], %[temp], %[temp7] \n\t"
+ "slt %[temp7], %[temp1], %[temp8] \n\t"
+ "movn %[ret1], %[temp1], %[temp7] \n\t"
+ "seh %[ret], %[ret] \n\t"
+ "seh %[ret1], %[ret1] \n\t"
+
+ : [temp] "=&r" (temp), [temp1] "=&r" (temp1),
+ [temp7] "=&r" (temp7), [temp8] "=&r" (temp8),
+ [ret] "=&r" (dst[2*i]), [ret1] "=&r" (dst[2*i+1])
+ : [src_i] "r" (src[0]+i), [src_i1] "r" (src[1]+i)
+ : "memory"
+ );
+ }
+ }
+ else {
+ if(channels==6) {
+ for(i=0; i<len; i++) {
+ int temp, temp1, temp2, temp3, temp4, temp5, temp7, temp8;
+ __asm__ volatile (
+ "lw %[temp], 0(%[src_i]) \n\t"
+ "lw %[temp1], 0(%[src_i1]) \n\t"
+ "lw %[temp2], 0(%[src_i2]) \n\t"
+ "lw %[temp3], 0(%[src_i3]) \n\t"
+ "lw %[temp4], 0(%[src_i4]) \n\t"
+ "lw %[temp5], 0(%[src_i5]) \n\t"
+ "li %[temp8], 0xf000 \n\t"
+ "li %[ret], 0xefff \n\t"
+ "li %[ret1], 0xefff \n\t"
+ "li %[ret2], 0xefff \n\t"
+ "li %[ret3], 0xefff \n\t"
+ "li %[ret4], 0xefff \n\t"
+ "li %[ret5], 0xefff \n\t"
+ "slt %[temp7], %[temp], %[temp8] \n\t"
+ "movn %[ret], %[temp], %[temp7] \n\t"
+ "slt %[temp7], %[temp1], %[temp8] \n\t"
+ "movn %[ret1], %[temp1], %[temp7] \n\t"
+ "slt %[temp7], %[temp2], %[temp8] \n\t"
+ "movn %[ret2], %[temp2], %[temp7] \n\t"
+ "slt %[temp7], %[temp3], %[temp8] \n\t"
+ "movn %[ret3], %[temp3], %[temp7] \n\t"
+ "slt %[temp7], %[temp4], %[temp8] \n\t"
+ "movn %[ret4], %[temp4], %[temp7] \n\t"
+ "slt %[temp7], %[temp5], %[temp8] \n\t"
+ "movn %[ret5], %[temp5], %[temp7] \n\t"
+ "seh %[ret], %[ret] \n\t"
+ "seh %[ret1], %[ret1] \n\t"
+ "seh %[ret2], %[ret2] \n\t"
+ "seh %[ret5], %[ret5] \n\t"
+ "seh %[ret3], %[ret3] \n\t"
+ "seh %[ret4], %[ret4] \n\t"
+
+ : [temp] "=&r" (temp), [temp1] "=&r" (temp1),
+ [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+ [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
+ [temp7] "=&r" (temp7), [temp8] "=&r" (temp8),
+ [ret] "=&r" (dst[6*i]), [ret1] "=&r" (dst[6*i+1]),
+ [ret2] "=&r" (dst[6*i+2]), [ret3] "=&r" (dst[6*i+3]),
+ [ret4] "=&r" (dst[6*i+4]), [ret5] "=&r" (dst[6*i+5])
+ : [src_i] "r" (src[0]+i), [src_i1] "r" (src[1]+i),
+ [src_i2] "r" (src[2]+i), [src_i3] "r" (src[3]+i),
+ [src_i4] "r" (src[4]+i), [src_i5] "r" (src[5]+i)
+ : "memory"
+ );
+ }
+ }
+ else {
+ for(c=0; c<channels; c++)
+ for(i=0, j=c; i<len; i++, j+=channels)
+ dst[j] = fixed_to_int16_one_mips(src[c]+i);
+ }
+ }
+}
+
+void ff_fmt_convert_init_mips_fixed(FmtConvertContext *c, AVCodecContext *avctx) {
+ c->int32_to_fixed_fmul_scalar = int32_to_fixed_fmul_scalar_mips;
+ c->fixed_to_int16_interleave = fixed_to_int16_interleave_mips;
+}
diff --git a/libavutil/common.h b/libavutil/common.h
index a11a325..07433d7 100644
--- a/libavutil/common.h
+++ b/libavutil/common.h
@@ -163,6 +163,18 @@ static av_always_inline av_const int16_t av_clip_int16_c(int a)
else return a;
}
+#if (ARCH_MIPS)
+/**
+ * Clip a signed integer value into the 0, 65536 range
+ * @param a value to clip
+ * @return clipped value
+ */
+static av_always_inline av_const int16_t av_clip_int16_c_fixed(int a)
+{
+ return (a > 0xefff ? 0xefff : a);
+}
+#endif /* ARCH_MIPS */
+
/**
* Clip a signed 64-bit integer value into the -2147483648,2147483647 range.
* @param a value to clip
--
1.7.3.4
More information about the ffmpeg-devel
mailing list