[FFmpeg-devel] [PATCH] mips: Implementation of AC3 fixed point decoder and optimization for MIPS.

Babic, Nedeljko nbabic at mips.com
Fri Aug 17 14:13:53 CEST 2012


Hello,

Has anyone had a chance to review this patch?

Thanks,
Nedeljko
________________________________________
From: Nedeljko Babic [nbabic at mips.com]
Sent: Thursday, August 09, 2012 15:50
To: ffmpeg-devel at ffmpeg.org
Cc: Lukac, Zeljko; Babic, Nedeljko
Subject: [PATCH] mips: Implementation of AC3 fixed point decoder and optimization for MIPS.

AC3 fixed point decoder is implemented in C and appropriate functions
 are optimized for MIPS architecture. Some of DSP, format convert
 utils and FFT fixed point functions are optimized.

Signed-off-by: Nedeljko Babic <nbabic at mips.com>
---
 doc/mips.txt                            |    6 +
 libavcodec/allcodecs.c                  |    3 +
 libavcodec/dsputil.c                    |   24 +
 libavcodec/dsputil.h                    |    4 +
 libavcodec/fft.c                        |    1 +
 libavcodec/fft.h                        |   12 +
 libavcodec/fmtconvert.c                 |   79 ++
 libavcodec/fmtconvert.h                 |   57 +-
 libavcodec/kbdwin.c                     |   32 +
 libavcodec/kbdwin.h                     |    6 +-
 libavcodec/mips/Makefile                |    4 +
 libavcodec/mips/ac3dec_fixed.c          | 1660 +++++++++++++++++++++++++++++++
 libavcodec/mips/ac3dec_fixed.h          |  234 +++++
 libavcodec/mips/dsputil_mips_fixed.c    |  153 +++
 libavcodec/mips/fft_mips_fixed.c        |  906 +++++++++++++++++
 libavcodec/mips/fft_table_fixed.h       |  105 ++
 libavcodec/mips/fmtconvert_mips_fixed.c |  226 +++++
 libavutil/common.h                      |   12 +
 18 files changed, 3520 insertions(+), 4 deletions(-)
 create mode 100644 libavcodec/mips/ac3dec_fixed.c
 create mode 100644 libavcodec/mips/ac3dec_fixed.h
 create mode 100644 libavcodec/mips/dsputil_mips_fixed.c
 create mode 100644 libavcodec/mips/fft_mips_fixed.c
 create mode 100644 libavcodec/mips/fft_table_fixed.h
 create mode 100644 libavcodec/mips/fmtconvert_mips_fixed.c

diff --git a/doc/mips.txt b/doc/mips.txt
index 6fa6fb4..5b2e710 100644
--- a/doc/mips.txt
+++ b/doc/mips.txt
@@ -47,6 +47,8 @@ Files that have MIPS copyright notice in them:
 * libavutil/mips/
       libm_mips.h
 * libavcodec/mips/
+      ac3dec_fixed.c
+      ac3dec_fixed.h
       acelp_filters_mips.c
       acelp_vectors_mips.c
       amrwbdec_mips.c
@@ -57,9 +59,13 @@ Files that have MIPS copyright notice in them:
       compute_antialias_float.h
       lsp_mips.h
       dsputil_mips.c
+      dsputil_mips_fixed.c
       fft_mips.c
+      fft_mips_fixed.c
       fft_table.h
+      fft_table_fixed.h
       fft_init_table.c
       fmtconvert_mips.c
+      fmtconvert_mips_fixed.c
       mpegaudiodsp_mips_fixed.c
       mpegaudiodsp_mips_float.c
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 8305bc2..319286f 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -273,6 +273,9 @@ void avcodec_register_all(void)
     REGISTER_DECODER (AAC_LATM, aac_latm);
     REGISTER_ENCDEC  (AC3, ac3);
     REGISTER_ENCODER (AC3_FIXED, ac3_fixed);
+#if (ARCH_MIPS)
+    REGISTER_DECODER (AC3_FIXED, ac3_fixed);
+#endif /* ARCH_MIPS */
     REGISTER_ENCDEC  (ALAC, alac);
     REGISTER_DECODER (ALS, als);
     REGISTER_DECODER (AMRNB, amrnb);
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index a1d69c4..8f9aa1c 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2515,6 +2515,26 @@ static void vector_fmul_window_c(float *dst, const float *src0,
     }
 }

+#if (ARCH_MIPS)
+static void vector_fmul_window_fixed_c(int *dst, const int16_t *src0,
+                                       const int16_t *src1, const int16_t *win, int len)
+{
+    int i,j;
+    dst += len;
+    win += len;
+    src0+= len;
+
+    for (i=-len, j=len-1; i<0; i++, j--) {
+        int s0 = src0[i];
+        int s1 = src1[j];
+        int wi = win[i];
+        int wj = win[j];
+        dst[i] = (s0*wj - s1*wi + 0x4000) >> 15;
+        dst[j] = (s0*wi + s1*wj + 0x4000) >> 15;
+    }
+}
+#endif /* ARCH_MIPS */
+
 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
                                  int len)
 {
@@ -3042,6 +3062,9 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->vector_fmul_reverse = vector_fmul_reverse_c;
     c->vector_fmul_add = vector_fmul_add_c;
     c->vector_fmul_window = vector_fmul_window_c;
+#if (ARCH_MIPS)
+    c->vector_fmul_window_fixed = vector_fmul_window_fixed_c;
+#endif
     c->vector_clipf = vector_clipf_c;
     c->scalarproduct_int16 = scalarproduct_int16_c;
     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
@@ -3177,6 +3200,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
     if (ARCH_SH4)        ff_dsputil_init_sh4   (c, avctx);
     if (ARCH_BFIN)       ff_dsputil_init_bfin  (c, avctx);
     if (HAVE_MIPSFPU)    ff_dsputil_init_mips  (c, avctx);
+    if (HAVE_MIPSDSPR2)  ff_dsputil_init_mips_fixed(c);

     for (i = 0; i < 4; i++) {
         for (j = 0; j < 16; j++) {
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 18dd316..d437844 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -398,6 +398,9 @@ typedef struct DSPContext {
     void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
     /* assume len is a multiple of 4, and arrays are 16-byte aligned */
     void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
+#if (ARCH_MIPS)
+    void (*vector_fmul_window_fixed)(int *dst, const int16_t *src0, const int16_t *src1, const int16_t *win, int len);
+#endif /* ARCH_MIPS */
     /* assume len is a multiple of 8, and arrays are 16-byte aligned */
     void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
     /**
@@ -624,6 +627,7 @@ void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
 void ff_dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
 void ff_dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
 void ff_dsputil_init_mips(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_mips_fixed(DSPContext* c);

 void ff_dsputil_init_dwt(DSPContext *c);
 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
diff --git a/libavcodec/fft.c b/libavcodec/fft.c
index 39c8972..a57b62e 100644
--- a/libavcodec/fft.c
+++ b/libavcodec/fft.c
@@ -167,6 +167,7 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
 #else
     if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
     if (ARCH_ARM)     ff_fft_fixed_init_arm(s);
+    if (ARCH_MIPS)    ff_fft_fixed_init_mips(s);
 #endif

     for(j=4; j<=nbits; j++) {
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 15e5a12..deabbab 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -80,6 +80,10 @@ struct FFTContext {
     void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
     void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
     void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+#if (ARCH_MIPS)
+    void (*fft_fixed_calc)(struct FFTContext *s, FFTComplex *z);
+    void (*imdct_fixed_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+#endif /* ARCH_MIPS */
     void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
     void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input);
     int fft_permutation;
@@ -140,6 +144,9 @@ void ff_fft_init_arm(FFTContext *s);
 void ff_fft_init_mips(FFTContext *s);
 #else
 void ff_fft_fixed_init_arm(FFTContext *s);
+#if (ARCH_MIPS)
+void ff_fft_fixed_init_mips(FFTContext *s);
+#endif
 #endif

 void ff_fft_end(FFTContext *s);
@@ -147,6 +154,11 @@ void ff_fft_end(FFTContext *s);
 #define ff_mdct_init FFT_NAME(ff_mdct_init)
 #define ff_mdct_end  FFT_NAME(ff_mdct_end)

+#if (ARCH_MIPS)
+int ff_mdct_fixed_init_hardcoded_128(FFTContext *s, int nbits, int inverse, int scale);
+int ff_mdct_fixed_init_hardcoded(FFTContext *s, int nbits, int inverse, int scale);
+int ff_mdct_fixed_init(FFTContext *s, int nbits, int inverse, int scale);
+#endif /* ARCH_MIPS */
 int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
 void ff_mdct_end(FFTContext *s);

diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index e47c205..b7b0345 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -28,7 +28,18 @@ static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul,
     for(i=0; i<len; i++)
         dst[i] = src[i] * mul;
 }
+#if (ARCH_MIPS)
+static void int32_to_fixed_fmul_scalar_c(int16_t *dst, const int *src, int mul, int len) {
+    int i;
+    for(i=0; i<len; i++)
+    dst[i] = (src[i] * mul + 0x8000) >> 16;
+}

+static av_always_inline int fixed_to_int16_one(const int *src)
+{
+    return av_clip_int16_c_fixed(*src);
+}
+#endif /* ARCH_MIPS */
 static av_always_inline int float_to_int16_one(const float *src){
     return av_clip_int16(lrintf(*src));
 }
@@ -56,6 +67,37 @@ static void float_to_int16_interleave_c(int16_t *dst, const float **src,
     }
 }

+#if (ARCH_MIPS)
+static void fixed_to_int16_interleave_c(int16_t *dst, const int **src,
+                                        long len, int channels)
+{
+    int i,j,c;
+    if(channels==2) {
+        for(i=0; i<len; i++) {
+            dst[2*i] = fixed_to_int16_one(src[0]+i);
+            dst[2*i+1] = fixed_to_int16_one(src[1]+i);
+        }
+    }
+    else {
+        if(channels==6) {
+            for(i=0; i<len; i++) {
+                dst[6*i] = fixed_to_int16_one(src[0]+i);
+                dst[6*i+1] = fixed_to_int16_one(src[1]+i);
+                dst[6*i+2] = fixed_to_int16_one(src[2]+i);
+                dst[6*i+3] = fixed_to_int16_one(src[3]+i);
+                dst[6*i+4] = fixed_to_int16_one(src[4]+i);
+                dst[6*i+5] = fixed_to_int16_one(src[5]+i);
+            }
+        }
+        else {
+            for(c=0; c<channels; c++)
+                for(i=0, j=c; i<len; i++, j+=channels)
+                    dst[j] = fixed_to_int16_one(src[c]+i);
+        }
+    }
+}
+#endif /* ARCH_MIPS */
+
 void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
                            int channels)
 {
@@ -75,9 +117,45 @@ void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
     }
 }

+#if (ARCH_MIPS)
+void ff_fixed_interleave_c(int *dst, const int **src, unsigned int len,
+                           int channels)
+{
+    int j, c;
+    unsigned int i;
+    if (channels == 6) {
+        for (i = 0; i < len; i++) {
+            dst[6*i]   = src[0][i];
+            dst[6*i+1] = src[1][i];
+            dst[6*i+2] = src[2][i];
+            dst[6*i+3] = src[3][i];
+            dst[6*i+4] = src[4][i];
+            dst[6*i+5] = src[5][i];
+        }
+    }
+    else if (channels == 2) {
+        for (i = 0; i < len; i++) {
+            dst[2*i]   = src[0][i];
+            dst[2*i+1] = src[1][i];
+        }
+    } else if (channels == 1 && len < INT_MAX / sizeof(int)) {
+        memcpy(dst, src[0], len * sizeof(int));
+    } else {
+        for (c = 0; c < channels; c++)
+            for (i = 0, j = c; i < len; i++, j += channels)
+                dst[j] = src[c][i];
+    }
+}
+#endif /* ARCH_MIPS */
+
 av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
 {
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
+#if (ARCH_MIPS)
+    c->int32_to_fixed_fmul_scalar = int32_to_fixed_fmul_scalar_c;
+    c->fixed_to_int16_interleave  = fixed_to_int16_interleave_c;
+    c->fixed_interleave           = ff_fixed_interleave_c;
+#endif /* ARCH_MIPS */
     c->float_to_int16             = float_to_int16_c;
     c->float_to_int16_interleave  = float_to_int16_interleave_c;
     c->float_interleave           = ff_float_interleave_c;
@@ -86,6 +164,7 @@ av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
     if (HAVE_ALTIVEC) ff_fmt_convert_init_altivec(c, avctx);
     if (HAVE_MMX) ff_fmt_convert_init_x86(c, avctx);
     if (HAVE_MIPSFPU) ff_fmt_convert_init_mips(c);
+    if (HAVE_MIPSDSPR1) ff_fmt_convert_init_mips_fixed(c, avctx);
 }

 /* ffdshow custom code */
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index ab2caa2..49e7992 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -36,7 +36,55 @@ typedef struct FmtConvertContext {
      *            constraints: multiple of 8
      */
     void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
-
+#if (ARCH_MIPS)
+    /**
+     * Multiply a array of int32_t by a int32_t value and convert to int16_t.
+     * @param dst destination array of int16_t.
+     *            constraints: 16-byte aligned
+     * @param src source array of int32_t.
+     *            constraints: 16-byte aligned
+     * @param len number of elements in array.
+     *            constraints: multiple of 8
+     */
+    void (*int32_to_fixed_fmul_scalar)(int16_t *dst, const int *src, int mul, int len);
+    /**
+     * Convert an array of int32_t to an array of int16_t.
+     *
+     * @param dst destination array of int16_t.
+     *            constraints: 16-byte aligned
+     * @param src source array of int32_t.
+     *            constraints: 16-byte aligned
+     * @param len number of elements to convert.
+     *            constraints: multiple of 8
+     */
+    void (*fixed_to_int16)(int16_t *dst, const int *src, long len);
+    /**
+     * Convert multiple arrays of int32_t to an interleaved array of int16_t.
+     *
+     * @param dst destination array of interleaved int16_t.
+     *            constraints: 16-byte aligned
+     * @param src source array of int32_t arrays, one for each channel.
+     *            constraints: 16-byte aligned
+     * @param len number of elements to convert.
+     *            constraints: multiple of 8
+     * @param channels number of channels
+     */
+    void (*fixed_to_int16_interleave)(int16_t *dst, const int **src,
+                                      long len, int channels);
+    /**
+     * Convert multiple arrays of int32_t to an array of interleaved int32_t.
+     *
+     * @param dst destination array of interleaved int32_t.
+     *            constraints: 16-byte aligned
+     * @param src source array of int32_t arrays, one for each channel.
+     *            constraints: 16-byte aligned
+     * @param len number of elements to convert.
+     *            constraints: multiple of 8
+     * @param channels number of channels
+     */
+    void (*fixed_interleave)(int *dst, const int **src, unsigned int len,
+                             int channels);
+#endif /* ARCH_MIPS */
     /**
      * Convert an array of float to an array of int16_t.
      *
@@ -86,7 +134,12 @@ typedef struct FmtConvertContext {

 void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
                            int channels);
-
+#if (ARCH_MIPS)
+void ff_fixed_interleave_c(int *dst, const int **src, unsigned int len,
+                           int channels);
+void fixed_interleave(int *dst, const int **src, unsigned int len, int channels);
+void ff_fmt_convert_init_mips_fixed(FmtConvertContext *c, AVCodecContext *avctx);
+#endif /* ARCH_MIPS */
 av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx);

 void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx);
diff --git a/libavcodec/kbdwin.c b/libavcodec/kbdwin.c
index 2722312..4f76b20 100644
--- a/libavcodec/kbdwin.c
+++ b/libavcodec/kbdwin.c
@@ -46,3 +46,35 @@ av_cold void ff_kbd_window_init(float *window, float alpha, int n)
    for (i = 0; i < n; i++)
        window[i] = sqrt(local_window[i] / sum);
 }
+
+#if (ARCH_MIPS)
+av_cold void ff_kbd_fixed_window_init(int16_t *window, float alpha, int n)
+{
+    int i, j;
+    double sum = 0.0, bessel, tmp;
+    double local_window[FF_KBD_WINDOW_MAX];
+    double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
+
+    assert(n <= FF_KBD_WINDOW_MAX);
+
+    for (i = 0; i < n; i++) {
+        tmp = i * (n - i) * alpha2;
+        bessel = 1.0;
+        for (j = BESSEL_I0_ITER; j > 0; j--)
+            bessel = bessel * tmp / (j * j) + 1;
+        sum += bessel;
+        local_window[i] = sum;
+    }
+
+    sum++;
+    for (i = 0; i < n; i++)
+    {
+        int tmp;
+
+        tmp = (int)(32767*sqrt(local_window[i] / sum) + 0.5);
+        if (tmp > 32767)
+            tmp = 32767;
+        window[i] = (int16_t)tmp;
+    }
+}
+#endif
diff --git a/libavcodec/kbdwin.h b/libavcodec/kbdwin.h
index 4b93975..66621a2 100644
--- a/libavcodec/kbdwin.h
+++ b/libavcodec/kbdwin.h
@@ -18,7 +18,7 @@

 #ifndef AVCODEC_KBDWIN_H
 #define AVCODEC_KBDWIN_H
-
+#include "config.h"
 /**
  * Maximum window size for ff_kbd_window_init.
  */
@@ -31,5 +31,7 @@
  * @param   n       size of half window, max FF_KBD_WINDOW_MAX
  */
 void ff_kbd_window_init(float *window, float alpha, int n);
-
+#if (ARCH_MIPS)
+void ff_kbd_fixed_window_init(int16_t *window, float alpha, int n);
+#endif
 #endif /* AVCODEC_KBDWIN_H */
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index ff46768..4830039 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -17,3 +17,7 @@ OBJS-$(CONFIG_FFT)                        += mips/fft_init_table.o
 MIPSFPU-OBJS-$(CONFIG_FFT)                += mips/fft_mips.o
 MIPSFPU-OBJS-$(HAVE_INLINE_ASM)           += mips/fmtconvert_mips.o
 MIPSFPU-OBJS-$(HAVE_INLINE_ASM)           += mips/dsputil_mips.o
+MIPSDSPR1-OBJS-$(HAVE_INLINE_ASM)         += mips/fmtconvert_mips_fixed.o
+MIPSDSPR2-OBJS-$(HAVE_INLINE_ASM)         += mips/dsputil_mips_fixed.o
+OBJS-$(CONFIG_FFT)                        += mips/fft_mips_fixed.o
+OBJS-$(CONFIG_AC3_FIXED_DECODER)          += mips/ac3dec_fixed.o
diff --git a/libavcodec/mips/ac3dec_fixed.c b/libavcodec/mips/ac3dec_fixed.c
new file mode 100644
index 0000000..50a30dd
--- /dev/null
+++ b/libavcodec/mips/ac3dec_fixed.c
@@ -0,0 +1,1660 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Stanislav Ocovaj (socovaj at mips.com)
+ *
+ * AC3 fixed-point decoder for MIPS platforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FFT_FLOAT 0
+
+#include <stdio.h>
+#include <stddef.h>
+#include <math.h>
+#include <string.h>
+
+#include "libavutil/crc.h"
+#include "libavcodec/internal.h"
+#include "libavcodec/aac_ac3_parser.h"
+#include "libavcodec/ac3_parser.h"
+#include "ac3dec_fixed.h"
+#include "libavcodec/ac3dec_data.h"
+#include "libavcodec/ac3dsp.h"
+#include "libavcodec/kbdwin.h"
+
+#define INT2FIXED(x) (((x) << 15)  * ( x < -32767 ? -1 : 1))
+#define MULT_FINT(x, y)  (((long long)(x) * (y) ) >> 16 )
+#define ADD_FINT(x, y) ( (x) + (y) )
+#define SUB_FINT(a, b) ( (a) - (b) )
+#define SUB_INT_WITH_FINT_AND_CONVERT_TO_FINT(x, y) ( INT2FIXED(x) - (y) )
+#define DIV_INT_WITH_FINT_AND_CONVERT_TO_FINT(x, y) \
+                ( (((long long)(x) << 30)  * ( x < -32767 ? -1 : 1) ) / (y) )
+#define MULT_INT_WITH_FINT_AND_CONVERT_TO_FINT(x, y) \
+                                    (((long long)INT2FIXED(x) * (y) ) >> 15 )
+
+/**
+ * table for ungrouping 3 values in 7 bits.
+ * used for exponents and bap=2 mantissas
+ */
+static uint8_t ungroup_3_in_7_bits_tab[128][3];
+
+
+/** tables for ungrouping mantissas */
+static int b1_mantissas[32][3];
+static int b2_mantissas[128][3];
+static int b3_mantissas[8];
+static int b4_mantissas[128][2];
+static int b5_mantissas[16];
+
+/**
+ * Quantization table: levels for symmetric. bits for asymmetric.
+ * reference: Table 7.18 Mapping of bap to Quantizer
+ */
+static const uint8_t quantization_tab[16] = {
+    0, 3, 5, 7, 11, 15,
+    5, 6, 7, 8, 9, 10, 11, 12, 14, 16
+};
+
+/** Adjustments in dB gain */
+static const int gain_levels_fixed[9] = {
+    LEVEL_FIXED_PLUS_3DB,
+    LEVEL_FIXED_PLUS_1POINT5DB,
+    LEVEL_FIXED_ONE,
+    LEVEL_FIXED_MINUS_1POINT5DB,
+    LEVEL_FIXED_MINUS_3DB,
+    LEVEL_FIXED_MINUS_4POINT5DB,
+    LEVEL_FIXED_MINUS_6DB,
+    LEVEL_FIXED_ZERO,
+    LEVEL_FIXED_MINUS_9DB
+};
+
+/**
+ * Table for center mix levels
+ * reference: Section 5.4.2.4 cmixlev
+ */
+static const uint8_t center_levels[4] = { 4, 5, 6, 5 };
+
+/**
+ * Table for surround mix levels
+ * reference: Section 5.4.2.5 surmixlev
+ */
+static const uint8_t surround_levels[4] = { 4, 6, 7, 6 };
+
+/**
+ * Table for default stereo downmixing coefficients
+ * reference: Section 7.8.2 Downmixing Into Two Channels
+ */
+static const uint8_t ac3_default_coeffs[8][5][2] = {
+    { { 2, 7 }, { 7, 2 },                               },
+    { { 4, 4 },                                         },
+    { { 2, 7 }, { 7, 2 },                               },
+    { { 2, 7 }, { 5, 5 }, { 7, 2 },                     },
+    { { 2, 7 }, { 7, 2 }, { 6, 6 },                     },
+    { { 2, 7 }, { 5, 5 }, { 7, 2 }, { 8, 8 },           },
+    { { 2, 7 }, { 7, 2 }, { 6, 7 }, { 7, 6 },           },
+    { { 2, 7 }, { 5, 5 }, { 7, 2 }, { 6, 7 }, { 7, 6 }, },
+};
+
+/**
+ * Symmetrical Dequantization
+ * reference: Section 7.3.3 Expansion of Mantissas for Symmetrical Quantization
+ *            Tables 7.19 to 7.23
+ */
+static inline int
+symmetric_dequant(int code, int levels)
+{
+    return ((code - (levels >> 1)) << 24) / levels;
+}
+
+/**
+ * Initialize tables at runtime.
+ */
+static av_cold void ac3_tables_init(void)
+{
+    int i;
+
+    /* generate table for ungrouping 3 values in 7 bits
+       reference: Section 7.1.3 Exponent Decoding */
+    for(i=0; i<128; i++) {
+        ungroup_3_in_7_bits_tab[i][0] =  i / 25;
+        ungroup_3_in_7_bits_tab[i][1] = (i % 25) / 5;
+        ungroup_3_in_7_bits_tab[i][2] = (i % 25) % 5;
+    }
+
+    /* generate grouped mantissa tables
+       reference: Section 7.3.5 Ungrouping of Mantissas */
+    for(i=0; i<32; i++) {
+        /* bap=1 mantissas */
+        b1_mantissas[i][0] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][0], 3);
+        b1_mantissas[i][1] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][1], 3);
+        b1_mantissas[i][2] = symmetric_dequant(ff_ac3_ungroup_3_in_5_bits_tab[i][2], 3);
+    }
+    for(i=0; i<128; i++) {
+        /* bap=2 mantissas */
+        b2_mantissas[i][0] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][0], 5);
+        b2_mantissas[i][1] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][1], 5);
+        b2_mantissas[i][2] = symmetric_dequant(ungroup_3_in_7_bits_tab[i][2], 5);
+
+        /* bap=4 mantissas */
+        b4_mantissas[i][0] = symmetric_dequant(i / 11, 11);
+        b4_mantissas[i][1] = symmetric_dequant(i % 11, 11);
+    }
+    /* generate ungrouped mantissa tables
+       reference: Tables 7.21 and 7.23 */
+    for(i=0; i<7; i++) {
+        /* bap=3 mantissas */
+        b3_mantissas[i] = symmetric_dequant(i, 7);
+    }
+    for(i=0; i<15; i++) {
+        /* bap=5 mantissas */
+        b5_mantissas[i] = symmetric_dequant(i, 15);
+    }
+}
+
+/**
+ * AVCodec initialization
+ */
+av_cold int ac3_fixed_decode_init(AVCodecContext *avctx)
+{
+    AC3FixedDecodeContext *s = avctx->priv_data;
+    s->avctx = avctx;
+
+    ff_ac3_common_init();
+    ac3_tables_init();
+
+#if !CONFIG_HARDCODED_TABLES
+    ff_mdct_fixed_init(&s->imdct_256, 8, 1, 1);
+    ff_mdct_fixed_init(&s->imdct_512, 9, 1, 1);
+#else
+    ff_mdct_fixed_init_hardcoded(&s->imdct_256, 8, 1, 1);
+    ff_mdct_fixed_init_hardcoded_128(&s->imdct_512, 9, 1, 1);
+#endif
+
+    ff_kbd_fixed_window_init(s->window, 5.0, 256);
+    ff_dsputil_init(&s->dsp, avctx);
+
+    ff_fmt_convert_init(&s->fmt_conv, avctx);
+    av_lfg_init(&s->dith_state, 0);
+
+
+    if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
+        //DONE s->mul_bias = 1.0f;
+        s->mul_bias = 65536;
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+    } else {
+        //DONE s->mul_bias = 32767.0f;
+        s->mul_bias = 2147418112;
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    }
+
+    /* allow downmixing to stereo or mono */
+    if (avctx->channels > 0 && avctx->request_channels > 0 &&
+            avctx->request_channels < avctx->channels &&
+            avctx->request_channels <= 2) {
+        avctx->channels = avctx->request_channels;
+    }
+    s->downmixed = 1;
+
+    avcodec_get_frame_defaults(&s->frame);
+    avctx->coded_frame = &s->frame;
+
+    return 0;
+}
+
+/**
+ * Parse the 'sync info' and 'bit stream info' from the AC-3 bitstream.
+ * GetBitContext within AC3DecodeContext must point to
+ * the start of the synchronized AC-3 bitstream.
+ */
+static int ac3_parse_header_fixed(AC3FixedDecodeContext *s)
+{
+    GetBitContext *gbc = &s->gbc;
+    int i;
+
+    /* read the rest of the bsi. read twice for dual mono mode. */
+    i = !(s->channel_mode);
+    do {
+        skip_bits(gbc, 5); /* skip dialog normalization */
+        if (get_bits1(gbc))
+            skip_bits(gbc, 8); /* skip compression */
+        if (get_bits1(gbc))
+            skip_bits(gbc, 8); /* skip language code */
+        if (get_bits1(gbc))
+            skip_bits(gbc, 7); /* skip audio production information */
+    } while (i--);
+
+    skip_bits(gbc, 2); /* skip copyright bit and original bitstream bit */
+
+    /* skip the timecodes (or extra bitstream information for Alternate Syntax)
+       TODO: read & use the xbsi1 downmix levels */
+    if (get_bits1(gbc))
+        skip_bits(gbc, 14); /* kip timecode1 / xbsi1 */
+    if (get_bits1(gbc))
+        skip_bits(gbc, 14); /* skip timecode2 / xbsi2 */
+
+    /* skip additional bitstream info */
+    if (get_bits1(gbc)) {
+        i = get_bits(gbc, 6);
+        do {
+            skip_bits(gbc, 8);
+        } while(i--);
+    }
+    return 0;
+}
+
+/**
+ * Common function to parse AC-3 or E-AC-3 frame header
+ */
+static int parse_frame_header_fixed(AC3FixedDecodeContext *s)
+{
+    AC3HeaderInfo hdr;
+    int err;
+
+    err = avpriv_ac3_parse_header(&s->gbc, &hdr);
+    if(err)
+        return err;
+
+    /* get decoding parameters from header info */
+    s->bit_alloc_params.sr_code     = hdr.sr_code;
+    s->bitstream_mode               = hdr.bitstream_mode;
+    s->channel_mode                 = hdr.channel_mode;
+    s->channel_layout               = hdr.channel_layout;
+    s->lfe_on                       = hdr.lfe_on;
+    s->bit_alloc_params.sr_shift    = hdr.sr_shift;
+    s->sample_rate                  = hdr.sample_rate;
+    s->bit_rate                     = hdr.bit_rate;
+    s->channels                     = hdr.channels;
+    s->fbw_channels                 = s->channels - s->lfe_on;
+    s->lfe_ch                       = s->fbw_channels + 1;
+    s->frame_size                   = hdr.frame_size;
+    s->center_mix_level             = hdr.center_mix_level;
+    s->surround_mix_level           = hdr.surround_mix_level;
+    s->num_blocks                   = hdr.num_blocks;
+    s->frame_type                   = hdr.frame_type;
+    s->substreamid                  = hdr.substreamid;
+
+    if(s->lfe_on) {
+        s->start_freq[s->lfe_ch] = 0;
+        s->end_freq[s->lfe_ch] = 7;
+        s->num_exp_groups[s->lfe_ch] = 2;
+        s->channel_in_cpl[s->lfe_ch] = 0;
+    }
+
+    if (hdr.bitstream_id <= 10) {
+        s->eac3                  = 0;
+        s->snr_offset_strategy   = 2;
+        s->block_switch_syntax   = 1;
+        s->dither_flag_syntax    = 1;
+        s->bit_allocation_syntax = 1;
+        s->fast_gain_syntax      = 0;
+        s->first_cpl_leak        = 0;
+        s->dba_syntax            = 1;
+        s->skip_syntax           = 1;
+        memset(s->channel_uses_aht, 0, sizeof(s->channel_uses_aht));
+        return ac3_parse_header_fixed(s);
+    }
+    else {
+        av_log(s->avctx, AV_LOG_ERROR, "E-AC-3 support not compiled in\n");
+        return -1;
+    }
+}
+
+/**
+ * Set stereo downmixing coefficients based on frame header info.
+ * reference: Section 7.8.2 Downmixing Into Two Channels
+ */
+static void set_downmix_coeffs_fixed(AC3FixedDecodeContext *s)
+{
+    int i;
+
+    int cmix = gain_levels_fixed[center_levels[s->center_mix_level]];
+    int smix = gain_levels_fixed[surround_levels[s->surround_mix_level]];
+    int norm0, norm1;
+
+    for(i=0; i<s->fbw_channels; i++) {
+        s->downmix_coeffs[i][0] = gain_levels_fixed[ac3_default_coeffs[s->channel_mode][i][0]];
+        s->downmix_coeffs[i][1] = gain_levels_fixed[ac3_default_coeffs[s->channel_mode][i][1]];
+    }
+    if(s->channel_mode > 1 && s->channel_mode & 1) {
+        s->downmix_coeffs[1][0] = s->downmix_coeffs[1][1] = cmix;
+    }
+    if(s->channel_mode == AC3_CHMODE_2F1R || s->channel_mode == AC3_CHMODE_3F1R) {
+        int nf = s->channel_mode - 2;
+        s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf][1] = (smix * 23170 + 0x4000) >> 15;
+    }
+    if(s->channel_mode == AC3_CHMODE_2F2R || s->channel_mode == AC3_CHMODE_3F2R) {
+        int nf = s->channel_mode - 4;
+        s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf+1][1] = smix;
+    }
+
+    /* renormalize FLOAT2FIXED(0.0f) */
+    norm0 = norm1 = 0;
+    for(i=0; i<s->fbw_channels; i++) {
+        norm0 += s->downmix_coeffs[i][0];
+        norm1 += s->downmix_coeffs[i][1];
+    }
+    for(i=0; i<s->fbw_channels; i++) {
+        s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] << 12) / norm0;
+        s->downmix_coeffs[i][1] = (s->downmix_coeffs[i][1] << 12) / norm1;
+    }
+
+    if(s->output_mode == AC3_CHMODE_MONO) {
+        for(i=0; i<s->fbw_channels; i++)
+            //s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] + s->downmix_coeffs[i][1]) * LEVEL_FIXED_MINUS_3DB;
+            s->downmix_coeffs[i][0] = ((s->downmix_coeffs[i][0] + s->downmix_coeffs[i][1]) * 23170 + 0x4000) >> 15;
+    }
+}
+
+/**
+ * Decode the grouped exponents according to exponent strategy.
+ * reference: Section 7.1.3 Exponent Decoding
+ */
+static int decode_exponents(GetBitContext *gbc, int exp_strategy, int ngrps,
+                            uint8_t absexp, int8_t *dexps)
+{
+    int i, j, grp, group_size;
+    int dexp[256];
+    int expacc, prevexp;
+
+    /* unpack groups */
+    group_size = exp_strategy + (exp_strategy == EXP_D45);
+    for(grp=0,i=0; grp<ngrps; grp++) {
+        expacc = get_bits(gbc, 7);
+        dexp[i++] = ungroup_3_in_7_bits_tab[expacc][0];
+        dexp[i++] = ungroup_3_in_7_bits_tab[expacc][1];
+        dexp[i++] = ungroup_3_in_7_bits_tab[expacc][2];
+    }
+
+    /* convert to absolute exps and expand groups */
+    prevexp = absexp;
+    for(i=0,j=0; i<ngrps*3; i++) {
+        prevexp += dexp[i] - 2;
+        if (prevexp > 24U)
+            return -1;
+        switch (group_size) {
+            case 4: dexps[j++] = prevexp;
+                    dexps[j++] = prevexp;
+            case 2: dexps[j++] = prevexp;
+            case 1: dexps[j++] = prevexp;
+        }
+    }
+    return 0;
+}
+
+/**
+ * Generate transform coefficients for each coupled channel in the coupling
+ * range using the coupling coefficients and coupling coordinates.
+ * reference: Section 7.4.3 Coupling Coordinate Format
+ */
+static void calc_transform_coeffs_cpl_fixed(AC3FixedDecodeContext *s)
+{
+    int bin, band, ch;
+
+    bin = s->start_freq[CPL_CH];
+    for (band = 0; band < s->num_cpl_bands; band++) {
+        int band_start = bin;
+        int band_end = bin + s->cpl_band_sizes[band];
+        for (ch = 1; ch <= s->fbw_channels; ch++) {
+            if (s->channel_in_cpl[ch]) {
+                int cpl_coord = s->cpl_coords[ch][band] << 5;
+                for (bin = band_start; bin < band_end; bin++) {
+                    s->fixed_coeffs[ch][bin] = MULH(s->fixed_coeffs[CPL_CH][bin] << 4, cpl_coord);
+                }
+                if (ch == 2 && s->phase_flags[band]) {
+                    for (bin = band_start; bin < band_end; bin++)
+                        s->fixed_coeffs[2][bin] = -s->fixed_coeffs[2][bin];
+                }
+            }
+        }
+        bin = band_end;
+    }
+}
+
+/**
+ * Grouped mantissas for 3-level 5-level and 11-level quantization
+ */
+typedef struct {
+    int b1_mant[2];
+    int b2_mant[2];
+    int b4_mant;
+    int b1;
+    int b2;
+    int b4;
+} mant_groups;
+
+static void ac3_decode_fixed_transform_coeffs_ch(
+    AC3FixedDecodeContext *s,
+    int ch_index,
+    mant_groups *m
+)
+{
+    int start_freq = s->start_freq[ch_index];
+    int end_freq = s->end_freq[ch_index];
+    uint8_t *baps = s->bap[ch_index];
+    int8_t *exps = s->dexps[ch_index];
+    int *coeffs = s->fixed_coeffs[ch_index];
+    int dither = (ch_index == CPL_CH) || s->dither_flag[ch_index];
+    GetBitContext *gbc = &s->gbc;
+    int freq;
+
+    for(freq = start_freq; freq < end_freq; freq++) {
+
+        int bap = baps[freq];
+        int mantissa;
+
+        if(bap == 1) {
+            if(m->b1) {
+                m->b1--;
+                mantissa = m->b1_mant[m->b1];
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+            {
+                int bits = get_bits(gbc, 5);
+                mantissa = b1_mantissas[bits][0];
+                m->b1_mant[1] = b1_mantissas[bits][1];
+                m->b1_mant[0] = b1_mantissas[bits][2];
+                m->b1 = 2;
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+        }
+
+        if(bap == 3) {
+            mantissa = b3_mantissas[get_bits(gbc, 3)];
+            coeffs[freq] = mantissa >> exps[freq];
+            continue;
+        }
+
+        if(bap == 0) {
+            if (dither) {
+                mantissa = (av_lfg_get(&s->dith_state) & 0x7FFFFF) - 0x400000;
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+            coeffs[freq] = 0;
+            continue;
+        }
+
+        if(bap == 2) {
+            if(m->b2) {
+                m->b2--;
+                mantissa = m->b2_mant[m->b2];
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+            {
+                int bits = get_bits(gbc, 7);
+                mantissa = b2_mantissas[bits][0];
+                m->b2_mant[1] = b2_mantissas[bits][1];
+                m->b2_mant[0] = b2_mantissas[bits][2];
+                m->b2 = 2;
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+        }
+
+        if(bap == 4) {
+            if(m->b4) {
+                m->b4 = 0;
+                mantissa = m->b4_mant;
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+            {
+                int bits = get_bits(gbc, 7);
+                mantissa = b4_mantissas[bits][0];
+                m->b4_mant = b4_mantissas[bits][1];
+                m->b4 = 1;
+                coeffs[freq] = mantissa >> exps[freq];
+                continue;
+            }
+        }
+
+        if(bap == 5) {
+            mantissa = b5_mantissas[get_bits(gbc, 4)];
+            coeffs[freq] = mantissa >> exps[freq];
+            continue;
+        }
+
+        {
+            /* 6 to 15 */
+            mantissa = get_bits(gbc, quantization_tab[bap]);
+            /* Shift mantissa and sign-extend it. */
+            mantissa = (mantissa << (32-quantization_tab[bap]))>>8;
+            coeffs[freq] = mantissa >> exps[freq];
+        }
+    }
+}
+
+static void decode_fixed_transform_coeffs_ch(AC3FixedDecodeContext *s, int blk, int ch, \
+                                    mant_groups *m)
+{
+    if (!s->channel_uses_aht[ch]) {
+        ac3_decode_fixed_transform_coeffs_ch(s, ch, m);
+    } else {
+        /* if AHT is used, mantissas for all blocks are encoded in the first
+           block of the frame. */
+        int bin;
+        for (bin = s->start_freq[ch]; bin < s->end_freq[ch]; bin++) {
+            s->fixed_coeffs[ch][bin] = s->pre_mantissa[ch][bin][blk] >> s->dexps[ch][bin];
+        }
+    }
+}
+
+/**
+ * Decode the transform coefficients for a particular channel
+ * reference: Section 7.3 Quantization and Decoding of Mantissas
+ */
+/**
+ * Remove random dithering from coupling range coefficients with zero-bit
+ * mantissas for coupled channels which do not use dithering.
+ * reference: Section 7.3.4 Dither for Zero Bit Mantissas (bap=0)
+ */
+static void remove_dithering_fixed(AC3FixedDecodeContext *s)
+{
+    int ch, i;
+    for(ch=1; ch<=s->fbw_channels; ch++) {
+        if(!s->dither_flag[ch] && s->channel_in_cpl[ch]) {
+            for(i = s->start_freq[CPL_CH]; i<s->end_freq[CPL_CH]; i++) {
+                if(!s->bap[CPL_CH][i])
+                    s->fixed_coeffs[ch][i] = 0;
+            }
+        }
+    }
+}
+
+static void scale_coefs (
+    int16_t *dst,
+    const int *src,
+    int dynrng,
+    int len)
+{
+    int i, shift, round;
+    int16_t mul;
+    int temp, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    mul = (dynrng & 0x1f) + 0x20;
+    shift = 12 - ((dynrng << 24) >> 29);
+    round = 1 << (shift-1);
+    for (i=0; i<len; i+=8) {
+
+#if !(HAVE_INLINE_ASM && HAVE_MIPS32R2)
+
+        temp = src[i] * mul;
+        temp1 = src[i+1] * mul;
+        temp = temp + round;
+        temp2 = src[i+2] * mul;
+
+        temp1 = temp1 + round;
+        dst[i] = temp >> shift;
+        temp3 = src[i+3] * mul;
+        temp2 = temp2 + round;
+
+        dst[i+1] = temp1 >> shift;
+        temp4 = src[i + 4] * mul;
+        temp3 = temp3 + round;
+        dst[i+2] = temp2 >> shift;
+
+        temp5 = src[i+5] * mul;
+        temp4 = temp4 + round;
+        dst[i+3] = temp3 >> shift;
+        temp6 = src[i+6] * mul;
+
+        dst[i+4] = temp4 >> shift;
+        temp5 = temp5 + round;
+        temp7 = src[i+7] * mul;
+        temp6 = temp6 + round;
+
+        dst[i+5] = temp5 >> shift;
+        temp7 = temp7 + round;
+        dst[i+6] = temp6 >> shift;
+        dst[i+7] = temp7 >> shift;
+#else
+
+        __asm__ volatile (
+            "lw     %[temp],    0(%[src_i])                 \n\t"
+            "lw     %[temp1],   4(%[src_i])                 \n\t"
+            "lw     %[temp2],   8(%[src_i])                 \n\t"
+            "mul    %[temp],    %[temp],        %[mul]      \n\t"
+            "lw     %[temp3],   12(%[src_i])                \n\t"
+            "mul    %[temp1],   %[temp1],       %[mul]      \n\t"
+            "lw     %[temp4],   16(%[src_i])                \n\t"
+            "addu   %[temp],    %[temp],        %[round]    \n\t"
+            "mul    %[temp3],   %[temp3],       %[mul]      \n\t"
+            "addu   %[temp1],   %[temp1],       %[round]    \n\t"
+            "srav   %[temp],    %[temp],        %[shift]    \n\t"
+            "mul    %[temp2],   %[temp2],       %[mul]      \n\t"
+            "srav   %[temp1],   %[temp1],       %[shift]    \n\t"
+            "lw     %[temp5],   20(%[src_i])                \n\t"
+            "addu   %[temp2],   %[temp2],       %[round]    \n\t"
+            "addu   %[temp3],   %[temp3],       %[round]    \n\t"
+            "mul    %[temp4],   %[temp4],       %[mul]      \n\t"
+            "srav   %[temp2],   %[temp2],       %[shift]    \n\t"
+            "srav   %[temp3],   %[temp3],       %[shift]    \n\t"
+            "mul    %[temp5],   %[temp5],       %[mul]      \n\t"
+            "lw     %[temp6],   24(%[src_i])                \n\t"
+            "lw     %[temp7],   28(%[src_i])                \n\t"
+            "addu   %[temp4],   %[temp4],       %[round]    \n\t"
+            "mul    %[temp6],   %[temp6],       %[mul]      \n\t"
+            "mul    %[temp7],   %[temp7],       %[mul]      \n\t"
+            "addu   %[temp5],   %[temp5],       %[round]    \n\t"
+            "srav   %[temp4],   %[temp4],       %[shift]    \n\t"
+            "srav   %[temp5],   %[temp5],       %[shift]    \n\t"
+            "addu   %[temp6],   %[temp6],       %[round]    \n\t"
+            "addu   %[temp7],   %[temp7],       %[round]    \n\t"
+            "srav   %[temp6],   %[temp6],       %[shift]    \n\t"
+            "srav   %[temp7],   %[temp7],       %[shift]    \n\t"
+
+            : [temp] "=&r" (temp), [temp1] "=&r" (temp1),
+              [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+              [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
+              [temp6] "=&r" (temp6), [temp7] "=&r" (temp7)
+            : [src_i] "r" (src + i), [mul] "r" (mul),
+              [round] "r" (round), [shift] "r" (shift)
+        );
+
+        dst[i  ]=temp;
+        dst[i+1]=temp1;
+        dst[i+2]=temp2;
+        dst[i+3]=temp3;
+
+        dst[i+4]=temp4;
+        dst[i+5]=temp5;
+        dst[i+6]=temp6;
+        dst[i+7]=temp7;
+#endif
+    }
+}
+
+/**
+ * Decode the transform coefficients.
+ */
+static void decode_transform_coeffs_fixed(AC3FixedDecodeContext *s, int blk)
+{
+    int ch, end;
+    int got_cplchan = 0;
+    mant_groups m;
+
+    m.b1 = m.b2 = m.b4 = 0;
+
+    for (ch = 1; ch <= s->channels; ch++) {
+        /* transform coefficients for full-bandwidth channel */
+        decode_fixed_transform_coeffs_ch(s, blk, ch, &m);
+        /* tranform coefficients for coupling channel come right after the
+           coefficients for the first coupled channel*/
+        if (s->channel_in_cpl[ch]) {
+            if (!got_cplchan) {
+                decode_fixed_transform_coeffs_ch(s, blk, CPL_CH, &m);
+                calc_transform_coeffs_cpl_fixed(s);
+                got_cplchan = 1;
+            }
+            end = s->end_freq[CPL_CH];
+        } else {
+            end = s->end_freq[ch];
+        }
+        do
+            s->fixed_coeffs[ch][end] = 0;
+        while(++end < 256);
+    }
+
+    /* zero the dithered coefficients for appropriate channels */
+    remove_dithering_fixed(s);
+}
+
+/**
+ * Stereo rematrixing.
+ * reference: Section 7.5.4 Rematrixing : Decoding Technique
+ */
+static void do_rematrixing_fixed(AC3FixedDecodeContext *s)
+{
+    int bnd, i;
+    int end, bndend;
+
+    end = FFMIN(s->end_freq[1], s->end_freq[2]);
+
+    for(bnd=0; bnd<s->num_rematrixing_bands; bnd++) {
+        if(s->rematrixing_flags[bnd]) {
+            bndend = FFMIN(end, ff_ac3_rematrix_band_tab[bnd+1]);
+            for(i=ff_ac3_rematrix_band_tab[bnd]; i<bndend; i++) {
+                int tmp0 = s->fixed_coeffs[1][i];
+                s->fixed_coeffs[1][i] += s->fixed_coeffs[2][i];
+                s->fixed_coeffs[2][i]  = tmp0 - s->fixed_coeffs[2][i];
+            }
+        }
+    }
+}
+
+/**
+ * Inverse MDCT Transform.
+ * Convert frequency domain coefficients to time-domain audio samples.
+ * reference: Section 7.9.4 Transformation Equations
+ */
+static inline void do_imdct_fixed(AC3FixedDecodeContext *s, int channels)
+{
+    int ch;
+    for (ch=1; ch<=channels; ch++) {
+        if (s->block_switch[ch]) {
+            int i;
+            FFTSample *x = s->tmp_output+128;
+
+            for(i=0; i<128; i++)
+                x[i] = s->transform_coeffs[ch][2*i];
+            s->imdct_256.imdct_fixed_half(&s->imdct_256, s->tmp_output, x);
+            s->dsp.vector_fmul_window_fixed(s->output[ch-1], s->delay[ch-1],
+            s->tmp_output, s->window, 128);
+
+            for(i=0; i<128; i++)
+                x[i] = s->transform_coeffs[ch][2*i+1];
+            s->imdct_256.imdct_fixed_half(&s->imdct_256, s->delay[ch-1], x);
+
+        } else {
+            s->imdct_512.imdct_fixed_half(&s->imdct_512, s->tmp_output,
+            s->transform_coeffs[ch]);
+            s->dsp.vector_fmul_window_fixed(s->output[ch-1], s->delay[ch-1],
+            s->tmp_output, s->window, 128);
+            memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(int16_t));
+        }
+    }
+}
+
+/**
+ * Downmix the output to mono or stereo.
+ */
+void ff_ac3_downmix_c_fixed(int (*samples)[256], int (*matrix)[2], int out_ch, int in_ch, int len)
+{
+    int i, j;
+    int v0, v1;
+
+    if (out_ch == 2)
+    {
+        for(i=0; i<len; i++)
+        {
+            v0 = v1 = 0;
+            for(j=0; j<in_ch; j++)
+            {
+                v0 += samples[j][i] * matrix[j][0];
+                v1 += samples[j][i] * matrix[j][1];
+            }
+            samples[0][i] = (v0 + 2048) >> 12;
+            samples[1][i] = (v1 + 2048) >> 12;
+        }
+    }
+    else if (out_ch == 1)
+    {
+        for(i=0; i<len; i++)
+        {
+            v0 = 0;
+            for(j=0; j<in_ch; j++)
+                v0 += samples[j][i] * matrix[j][0];
+            samples[0][i] = (v0 + 2048) >> 12;
+        }
+    }
+}
+
+static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd,
+                                     int start, int end,
+                                     int snr_offset, int floor,
+                                     const uint8_t *bap_tab, uint8_t *bap)
+{
+    int bin, band;
+
+    /* special case, if snr offset is -960, set all bap's to zero */
+    if (snr_offset == -960) {
+        memset(bap, 0, AC3_MAX_COEFS);
+        return;
+    }
+
+    bin  = start;
+    band = ff_ac3_bin_to_band_tab[start];
+    do {
+        int m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor;
+        int band_end = FFMIN(ff_ac3_band_start_tab[band+1], end);
+        for (; bin < band_end; bin++) {
+            int address = av_clip((psd[bin] - m) >> 5, 0, 63);
+            bap[bin] = bap_tab[address];
+        }
+    } while (end > ff_ac3_band_start_tab[band++]);
+}
+
+/**
+ * Decode band structure for coupling, spectral extension, or enhanced coupling.
+ * The band structure defines how many subbands are in each band.  For each
+ * subband in the range, 1 means it is combined with the previous band, and 0
+ * means that it starts a new band.
+ *
+ * @param[in] gbc bit reader context
+ * @param[in] blk block number
+ * @param[in] eac3 flag to indicate E-AC-3
+ * @param[in] ecpl flag to indicate enhanced coupling
+ * @param[in] start_subband subband number for start of range
+ * @param[in] end_subband subband number for end of range
+ * @param[in] default_band_struct default band structure table
+ * @param[out] num_bands number of bands (optionally NULL)
+ * @param[out] band_sizes array containing the number of bins in each band (optionally NULL)
+ */
+static void decode_band_structure(GetBitContext *gbc, int blk, int eac3,
+                                  int ecpl, int start_subband, int end_subband,
+                                  const uint8_t *default_band_struct,
+                                  int *num_bands, uint8_t *band_sizes)
+{
+    int subbnd, bnd, n_subbands, n_bands=0;
+    uint8_t bnd_sz[22];
+    uint8_t coded_band_struct[22];
+    const uint8_t *band_struct;
+
+    n_subbands = end_subband - start_subband;
+
+    /* decode band structure from bitstream or use default */
+    if (!eac3 || get_bits1(gbc)) {
+        for (subbnd = 0; subbnd < n_subbands - 1; subbnd++) {
+            coded_band_struct[subbnd] = get_bits1(gbc);
+        }
+        band_struct = coded_band_struct;
+    } else if (!blk) {
+        band_struct = &default_band_struct[start_subband+1];
+    } else {
+        /* no change in band structure */
+        return;
+    }
+
+    /* calculate number of bands and band sizes based on band structure.
+       note that the first 4 subbands in enhanced coupling span only 6 bins
+       instead of 12. */
+    if (num_bands || band_sizes ) {
+        n_bands = n_subbands;
+        bnd_sz[0] = ecpl ? 6 : 12;
+        for (bnd = 0, subbnd = 1; subbnd < n_subbands; subbnd++) {
+            int subbnd_size = (ecpl && subbnd < 4) ? 6 : 12;
+            if (band_struct[subbnd-1]) {
+                n_bands--;
+                bnd_sz[bnd] += subbnd_size;
+            } else {
+                bnd_sz[++bnd] = subbnd_size;
+            }
+        }
+    }
+
+    /* set optional output params */
+    if (num_bands)
+        *num_bands = n_bands;
+    if (band_sizes)
+        memcpy(band_sizes, bnd_sz, n_bands);
+}
+
+int end_freq_inv_tab[8] =
+{
+  50529027, 44278013, 39403370, 32292987, 27356480, 23729101, 20951060, 18755316
+};
+
+static int ac3_fixed_sqrt(int x)
+{
+  int retval;
+  int bit_mask;
+  int guess;
+  int square;
+  int   i;
+  long long accu;
+
+    retval = 0;
+    bit_mask = 0x400000;
+
+    for (i=0; i<23; i++)
+    {
+        guess = retval + bit_mask;
+        accu = (long long)guess * guess;
+        square = (int)(accu >> 23);
+        if (x >= square)
+            retval += bit_mask;
+        bit_mask >>= 1;
+    }
+  return retval;
+}
+
+/**
+ * Decode a single audio block from the AC-3 bitstream.
+ */
+static int decode_audio_block_fixed(AC3FixedDecodeContext *s, int blk)
+{
+    int fbw_channels = s->fbw_channels;
+    int channel_mode = s->channel_mode;
+    int i, bnd, seg, ch;
+    int different_transforms;
+    int cpl_in_use;
+    GetBitContext *gbc = &s->gbc;
+    uint8_t bit_alloc_stages[AC3_MAX_CHANNELS];
+
+    memset(bit_alloc_stages, 0, AC3_MAX_CHANNELS);
+
+    /* block switch flags */
+    different_transforms = 0;
+    if (s->block_switch_syntax) {
+        for (ch = 1; ch <= fbw_channels; ch++) {
+            s->block_switch[ch] = get_bits1(gbc);
+            if(ch > 1 && s->block_switch[ch] != s->block_switch[1])
+                different_transforms = 1;
+        }
+    }
+
+    /* dithering flags */
+    if (s->dither_flag_syntax) {
+        for (ch = 1; ch <= fbw_channels; ch++) {
+            s->dither_flag[ch] = get_bits1(gbc);
+        }
+    }
+
+    /* dynamic range */
+    i = !(s->channel_mode);
+    do {
+        if(get_bits1(gbc)) {
+            s->dynamic_range[i] = get_bits(gbc, 8);
+        } else if(blk == 0) {
+            s->dynamic_range[i] = 0;
+        }
+    } while(i--);
+
+    /* spectral extension strategy */
+    if (s->eac3 && (!blk || get_bits1(gbc))) {
+        s->spx_in_use = get_bits1(gbc);
+        if (s->spx_in_use) {
+            int dst_start_freq, dst_end_freq, src_start_freq,
+                start_subband, end_subband;
+
+            /* determine which channels use spx */
+            if (s->channel_mode == AC3_CHMODE_MONO) {
+                s->channel_uses_spx[1] = 1;
+            } else {
+                for (ch = 1; ch <= fbw_channels; ch++)
+                    s->channel_uses_spx[ch] = get_bits1(gbc);
+            }
+
+            /* get the frequency bins of the spx copy region and the spx start
+               and end subbands */
+            dst_start_freq = get_bits(gbc, 2);
+            start_subband  = get_bits(gbc, 3) + 2;
+            if (start_subband > 7)
+                start_subband += start_subband - 7;
+            end_subband    = get_bits(gbc, 3) + 5;
+            s->spx_dst_end_freq = end_freq_inv_tab[end_subband];
+            end_subband += 5;
+            if (end_subband   > 7)
+                end_subband   += end_subband   - 7;
+            dst_start_freq = dst_start_freq * 12 + 25;
+            src_start_freq = start_subband  * 12 + 25;
+            dst_end_freq   = end_subband    * 12 + 25;
+
+            /* check validity of spx ranges */
+            if (start_subband >= end_subband) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension "
+                       "range (%d >= %d)\n", start_subband, end_subband);
+                return -1;
+            }
+            if (dst_start_freq >= src_start_freq) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension "
+                       "copy start bin (%d >= %d)\n", dst_start_freq, src_start_freq);
+                return -1;
+            }
+
+            s->spx_dst_start_freq = dst_start_freq;
+            s->spx_src_start_freq = src_start_freq;
+
+            decode_band_structure(gbc, blk, s->eac3, 0,
+                                  start_subband, end_subband,
+                                  ff_eac3_default_spx_band_struct,
+                                  &s->num_spx_bands,
+                                  s->spx_band_sizes);
+        } else {
+            for (ch = 1; ch <= fbw_channels; ch++) {
+                s->channel_uses_spx[ch] = 0;
+                s->first_spx_coords[ch] = 1;
+            }
+        }
+    }
+
+    /* spectral extension coordinates */
+    if (s->spx_in_use) {
+        for (ch = 1; ch <= fbw_channels; ch++) {
+            if (s->channel_uses_spx[ch]) {
+                if (s->first_spx_coords[ch] || get_bits1(gbc)) {
+
+                    int spx_blend;
+                    int bin, master_spx_coord;
+
+                    s->first_spx_coords[ch] = 0;
+
+                    spx_blend = MULT_INT_WITH_FINT_AND_CONVERT_TO_FINT(get_bits(gbc, 5) , 2048);
+                    master_spx_coord = get_bits(gbc, 2) * 3;
+
+                    bin = s->spx_src_start_freq;
+                    for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
+                        long long accu;
+                        int bandsize;
+                        int spx_coord_exp, spx_coord_mant;
+                        int nratio, sblend, nblend;
+
+                        /* calculate blending factors */
+                        bandsize = s->spx_band_sizes[bnd];
+                        accu = (long long)((bin << 23) + (bandsize << 22)) * s->spx_dst_end_freq;
+                        nratio = (int)(accu >> 32);
+                        nratio -= spx_blend << 18;
+
+                        if (nratio < 0)
+                        {
+                            nblend = 0;
+                            sblend = 0x800000;
+                        }
+                        else if (nratio > 0x7fffff)
+                        {
+                            nblend = 0x800000;
+                            sblend = 0;
+                        }
+                        else
+                        {
+                            nblend = ac3_fixed_sqrt(nratio);
+                            accu = (long long)nblend * 1859775393;
+                            nblend = (int)((accu + (1<<29)) >> 30);
+                            sblend = ac3_fixed_sqrt(0x800000 - nratio);
+                        }
+
+                        bin += bandsize;
+
+                        /* decode spx coordinates */
+                        spx_coord_exp  = get_bits(gbc, 4);
+                        spx_coord_mant = get_bits(gbc, 2);
+                        if (spx_coord_exp == 15) spx_coord_mant <<= 1;
+                        else spx_coord_mant += 4;
+                        spx_coord_mant <<= (25 - spx_coord_exp - master_spx_coord);
+
+                        /* multiply noise and signal blending factors by spx coordinate */
+                        accu = (long long)nblend * spx_coord_mant;
+                        s->spx_noise_blend[ch][bnd]  = (int)((accu + (1<<22)) >> 23);
+                        accu = (long long)sblend * spx_coord_mant;
+                        s->spx_signal_blend[ch][bnd] = (int)((accu + (1<<22)) >> 23);
+                    }
+                }
+            } else {
+                s->first_spx_coords[ch] = 1;
+            }
+        }
+    }
+
+    /* coupling strategy */
+    if (s->eac3 ? s->cpl_strategy_exists[blk] : get_bits1(gbc)) {
+        memset(bit_alloc_stages, 3, AC3_MAX_CHANNELS);
+        if (!s->eac3)
+            s->cpl_in_use[blk] = get_bits1(gbc);
+        if (s->cpl_in_use[blk]) {
+            /* coupling in use */
+            int cpl_start_subband, cpl_end_subband;
+
+            if (channel_mode < AC3_CHMODE_STEREO) {
+                av_log(s->avctx, AV_LOG_ERROR, "coupling not allowed in mono or dual-mono\n");
+                return -1;
+            }
+
+            /* check for enhanced coupling */
+            if (s->eac3 && get_bits1(gbc)) {
+                /* TODO: parse enhanced coupling strategy info */
+                av_log_missing_feature(s->avctx, "Enhanced coupling", 1);
+                return -1;
+            }
+
+            /* determine which channels are coupled */
+            if (s->eac3 && s->channel_mode == AC3_CHMODE_STEREO) {
+                s->channel_in_cpl[1] = 1;
+                s->channel_in_cpl[2] = 1;
+            } else {
+                for (ch = 1; ch <= fbw_channels; ch++)
+                    s->channel_in_cpl[ch] = get_bits1(gbc);
+            }
+
+            /* phase flags in use */
+            if (channel_mode == AC3_CHMODE_STEREO)
+                s->phase_flags_in_use = get_bits1(gbc);
+
+            /* coupling frequency range */
+            cpl_start_subband = get_bits(gbc, 4);
+            cpl_end_subband = s->spx_in_use ? (s->spx_src_start_freq - 37) / 12 :
+                                              get_bits(gbc, 4) + 3;
+
+            if (cpl_start_subband >= cpl_end_subband) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid coupling range (%d >= %d)\n",
+                       cpl_start_subband, cpl_end_subband);
+                return -1;
+            }
+
+            s->start_freq[CPL_CH] = cpl_start_subband * 12 + 37;
+            s->end_freq[CPL_CH]   = cpl_end_subband   * 12 + 37;
+
+            decode_band_structure(gbc, blk, s->eac3, 0, cpl_start_subband,
+                                  cpl_end_subband,
+                                  ff_eac3_default_cpl_band_struct,
+                                  &s->num_cpl_bands, s->cpl_band_sizes);
+        } else {
+            /* coupling not in use */
+            for (ch = 1; ch <= fbw_channels; ch++) {
+                s->channel_in_cpl[ch] = 0;
+                s->first_cpl_coords[ch] = 1;
+            }
+            s->first_cpl_leak = s->eac3;
+            s->phase_flags_in_use = 0;
+        }
+    } else if (!s->eac3) {
+        if(!blk) {
+            av_log(s->avctx, AV_LOG_ERROR, "new coupling strategy must be present in block 0\n");
+            return -1;
+        } else {
+            s->cpl_in_use[blk] = s->cpl_in_use[blk-1];
+        }
+    }
+    cpl_in_use = s->cpl_in_use[blk];
+
+    /* coupling coordinates */
+    if (cpl_in_use) {
+        int cpl_coords_exist = 0;
+
+        for (ch = 1; ch <= fbw_channels; ch++) {
+            if (s->channel_in_cpl[ch]) {
+                if ((s->eac3 && s->first_cpl_coords[ch]) || get_bits1(gbc)) {
+                    int master_cpl_coord, cpl_coord_exp, cpl_coord_mant;
+                    s->first_cpl_coords[ch] = 0;
+                    cpl_coords_exist = 1;
+                    master_cpl_coord = 3 * get_bits(gbc, 2);
+                    for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+                        cpl_coord_exp = get_bits(gbc, 4);
+                        cpl_coord_mant = get_bits(gbc, 4);
+                        if (cpl_coord_exp == 15)
+                            s->cpl_coords[ch][bnd] = cpl_coord_mant << 22;
+                        else
+                            s->cpl_coords[ch][bnd] = (cpl_coord_mant + 16) << 21;
+                        s->cpl_coords[ch][bnd] >>= (cpl_coord_exp + master_cpl_coord);
+                    }
+                } else if (!blk) {
+                    av_log(s->avctx, AV_LOG_ERROR, "new coupling coordinates must be present in block 0\n");
+                    return -1;
+                }
+            } else {
+                /* channel not in coupling */
+                s->first_cpl_coords[ch] = 1;
+            }
+        }
+        /* phase flags */
+        if (channel_mode == AC3_CHMODE_STEREO && cpl_coords_exist) {
+            for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+                s->phase_flags[bnd] = s->phase_flags_in_use? get_bits1(gbc) : 0;
+            }
+        }
+    }
+
+    /* stereo rematrixing strategy and band structure */
+    if (channel_mode == AC3_CHMODE_STEREO) {
+        if ((s->eac3 && !blk) || get_bits1(gbc)) {
+            s->num_rematrixing_bands = 4;
+            if (cpl_in_use && s->start_freq[CPL_CH] <= 61) {
+                s->num_rematrixing_bands -= 1 + (s->start_freq[CPL_CH] == 37);
+            } else if (s->spx_in_use && s->spx_src_start_freq <= 61) {
+                s->num_rematrixing_bands--;
+            }
+            for(bnd=0; bnd<s->num_rematrixing_bands; bnd++)
+                s->rematrixing_flags[bnd] = get_bits1(gbc);
+        } else if (!blk) {
+            av_log(s->avctx, AV_LOG_WARNING, "Warning: new rematrixing strategy not present in block 0\n");
+            s->num_rematrixing_bands = 0;
+        }
+    }
+
+    /* exponent strategies for each channel */
+    for (ch = !cpl_in_use; ch <= s->channels; ch++) {
+        if (!s->eac3)
+            s->exp_strategy[blk][ch] = get_bits(gbc, 2 - (ch == s->lfe_ch));
+        if(s->exp_strategy[blk][ch] != EXP_REUSE)
+            bit_alloc_stages[ch] = 3;
+    }
+
+    /* channel bandwidth */
+    for (ch = 1; ch <= fbw_channels; ch++) {
+        s->start_freq[ch] = 0;
+        if (s->exp_strategy[blk][ch] != EXP_REUSE) {
+            int group_size;
+            int prev = s->end_freq[ch];
+            if (s->channel_in_cpl[ch])
+                s->end_freq[ch] = s->start_freq[CPL_CH];
+            else if (s->channel_uses_spx[ch])
+                s->end_freq[ch] = s->spx_src_start_freq;
+            else {
+                int bandwidth_code = get_bits(gbc, 6);
+                if (bandwidth_code > 60) {
+                    av_log(s->avctx, AV_LOG_ERROR, "bandwidth code = %d > 60\n", bandwidth_code);
+                    return -1;
+                }
+                s->end_freq[ch] = bandwidth_code * 3 + 73;
+            }
+            group_size = 3 << (s->exp_strategy[blk][ch] - 1);
+            s->num_exp_groups[ch] = (s->end_freq[ch]+group_size-4) / group_size;
+            if(blk > 0 && s->end_freq[ch] != prev)
+                memset(bit_alloc_stages, 3, AC3_MAX_CHANNELS);
+        }
+    }
+    if (cpl_in_use && s->exp_strategy[blk][CPL_CH] != EXP_REUSE) {
+        s->num_exp_groups[CPL_CH] = (s->end_freq[CPL_CH] - s->start_freq[CPL_CH]) /
+                                    (3 << (s->exp_strategy[blk][CPL_CH] - 1));
+    }
+
+    /* decode exponents for each channel */
+    for (ch = !cpl_in_use; ch <= s->channels; ch++) {
+        if (s->exp_strategy[blk][ch] != EXP_REUSE) {
+            s->dexps[ch][0] = get_bits(gbc, 4) << !ch;
+            if (decode_exponents(gbc, s->exp_strategy[blk][ch],
+                                 s->num_exp_groups[ch], s->dexps[ch][0],
+                                 &s->dexps[ch][s->start_freq[ch]+!!ch])) {
+                av_log(s->avctx, AV_LOG_ERROR, "exponent out-of-range\n");
+                return -1;
+            }
+            if(ch != CPL_CH && ch != s->lfe_ch)
+                skip_bits(gbc, 2); /* skip gainrng */
+        }
+    }
+
+    /* bit allocation information */
+    if (s->bit_allocation_syntax) {
+        if (get_bits1(gbc)) {
+            s->bit_alloc_params.slow_decay = ff_ac3_slow_decay_tab[get_bits(gbc, 2)] >> s->bit_alloc_params.sr_shift;
+            s->bit_alloc_params.fast_decay = ff_ac3_fast_decay_tab[get_bits(gbc, 2)] >> s->bit_alloc_params.sr_shift;
+            s->bit_alloc_params.slow_gain  = ff_ac3_slow_gain_tab[get_bits(gbc, 2)];
+            s->bit_alloc_params.db_per_bit = ff_ac3_db_per_bit_tab[get_bits(gbc, 2)];
+            s->bit_alloc_params.floor  = ff_ac3_floor_tab[get_bits(gbc, 3)];
+            for(ch=!cpl_in_use; ch<=s->channels; ch++)
+                bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+        } else if (!blk) {
+            av_log(s->avctx, AV_LOG_ERROR, "new bit allocation info must be present in block 0\n");
+            return -1;
+        }
+    }
+
+    /* signal-to-noise ratio offsets and fast gains (signal-to-mask ratios) */
+    if(!s->eac3 || !blk){
+        if(s->snr_offset_strategy && get_bits1(gbc)) {
+            int snr = 0;
+            int csnr;
+            csnr = (get_bits(gbc, 6) - 15) << 4;
+            for (i = ch = !cpl_in_use; ch <= s->channels; ch++) {
+                /* snr offset */
+                if (ch == i || s->snr_offset_strategy == 2)
+                    snr = (csnr + get_bits(gbc, 4)) << 2;
+                /* run at least last bit allocation stage if snr offset changes */
+                if(blk && s->snr_offset[ch] != snr) {
+                    bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 1);
+                }
+                s->snr_offset[ch] = snr;
+
+                /* fast gain (normal AC-3 only) */
+                if (!s->eac3) {
+                    int prev = s->fast_gain[ch];
+                    s->fast_gain[ch] = ff_ac3_fast_gain_tab[get_bits(gbc, 3)];
+                    /* run last 2 bit allocation stages if fast gain changes */
+                    if(blk && prev != s->fast_gain[ch])
+                        bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+                }
+            }
+        } else if (!s->eac3 && !blk) {
+            av_log(s->avctx, AV_LOG_ERROR, "new snr offsets must be present in block 0\n");
+            return -1;
+        }
+    }
+
+    /* fast gain (E-AC-3 only) */
+    if (s->fast_gain_syntax && get_bits1(gbc)) {
+        for (ch = !cpl_in_use; ch <= s->channels; ch++) {
+            int prev = s->fast_gain[ch];
+            s->fast_gain[ch] = ff_ac3_fast_gain_tab[get_bits(gbc, 3)];
+            /* run last 2 bit allocation stages if fast gain changes */
+            if(blk && prev != s->fast_gain[ch])
+                bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+        }
+    } else if (s->eac3 && !blk) {
+        for (ch = !cpl_in_use; ch <= s->channels; ch++)
+            s->fast_gain[ch] = ff_ac3_fast_gain_tab[4];
+    }
+
+    /* coupling leak information */
+    if (cpl_in_use) {
+        if (s->first_cpl_leak || get_bits1(gbc)) {
+            int fl = get_bits(gbc, 3);
+            int sl = get_bits(gbc, 3);
+            /* run last 2 bit allocation stages for coupling channel if
+               coupling leak changes */
+            if(blk && (fl != s->bit_alloc_params.cpl_fast_leak ||
+                       sl != s->bit_alloc_params.cpl_slow_leak)) {
+                bit_alloc_stages[CPL_CH] = FFMAX(bit_alloc_stages[CPL_CH], 2);
+            }
+            s->bit_alloc_params.cpl_fast_leak = fl;
+            s->bit_alloc_params.cpl_slow_leak = sl;
+        } else if (!s->eac3 && !blk) {
+            av_log(s->avctx, AV_LOG_ERROR, "new coupling leak info must be present in block 0\n");
+            return -1;
+        }
+        s->first_cpl_leak = 0;
+    }
+
+    /* delta bit allocation information */
+    if (s->dba_syntax && get_bits1(gbc)) {
+        /* delta bit allocation exists (strategy) */
+        for (ch = !cpl_in_use; ch <= fbw_channels; ch++) {
+            s->dba_mode[ch] = get_bits(gbc, 2);
+            if (s->dba_mode[ch] == DBA_RESERVED) {
+                av_log(s->avctx, AV_LOG_ERROR, "delta bit allocation strategy reserved\n");
+                return -1;
+            }
+            bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+        }
+        /* channel delta offset, len and bit allocation */
+        for (ch = !cpl_in_use; ch <= fbw_channels; ch++) {
+            if (s->dba_mode[ch] == DBA_NEW) {
+                s->dba_nsegs[ch] = get_bits(gbc, 3) + 1;
+                for (seg = 0; seg < s->dba_nsegs[ch]; seg++) {
+                    s->dba_offsets[ch][seg] = get_bits(gbc, 5);
+                    s->dba_lengths[ch][seg] = get_bits(gbc, 4);
+                    s->dba_values[ch][seg] = get_bits(gbc, 3);
+                }
+                /* run last 2 bit allocation stages if new dba values */
+                bit_alloc_stages[ch] = FFMAX(bit_alloc_stages[ch], 2);
+            }
+        }
+    } else if(blk == 0) {
+        for(ch=0; ch<=s->channels; ch++) {
+            s->dba_mode[ch] = DBA_NONE;
+        }
+    }
+
+    /* Bit allocation */
+    for(ch=!cpl_in_use; ch<=s->channels; ch++) {
+        if(bit_alloc_stages[ch] > 2) {
+            /* Exponent mapping into PSD and PSD integration */
+            ff_ac3_bit_alloc_calc_psd(s->dexps[ch],
+                                      s->start_freq[ch], s->end_freq[ch],
+                                      s->psd[ch], s->band_psd[ch]);
+        }
+        if(bit_alloc_stages[ch] > 1) {
+            /* Compute excitation function, Compute masking curve, and
+               Apply delta bit allocation */
+            if (ff_ac3_bit_alloc_calc_mask(&s->bit_alloc_params, s->band_psd[ch],
+                                           s->start_freq[ch], s->end_freq[ch],
+                                           s->fast_gain[ch], (ch == s->lfe_ch),
+                                           s->dba_mode[ch], s->dba_nsegs[ch],
+                                           s->dba_offsets[ch], s->dba_lengths[ch],
+                                           s->dba_values[ch], s->mask[ch])) {
+                av_log(s->avctx, AV_LOG_ERROR, "error in bit allocation\n");
+                return -1;
+            }
+        }
+        if(bit_alloc_stages[ch] > 0) {
+            /* Compute bit allocation */
+            const uint8_t *bap_tab = s->channel_uses_aht[ch] ?
+                                     ff_eac3_hebap_tab : ff_ac3_bap_tab;
+            ac3_bit_alloc_calc_bap_c(s->mask[ch], s->psd[ch],
+                                      s->start_freq[ch], s->end_freq[ch],
+                                      s->snr_offset[ch],
+                                      s->bit_alloc_params.floor,
+                                      bap_tab, s->bap[ch]);
+        }
+    }
+
+    /* unused dummy data */
+    if (s->skip_syntax && get_bits1(gbc)) {
+        int skipl = get_bits(gbc, 9);
+        while(skipl--)
+            skip_bits(gbc, 8);
+    }
+
+    /* unpack the transform coefficients
+       this also uncouples channels if coupling is in use. */
+    decode_transform_coeffs_fixed(s, blk);
+
+    /* TODO: generate enhanced coupling coordinates and uncouple */
+
+    /* recover coefficients if rematrixing is in use */
+    if(s->channel_mode == AC3_CHMODE_STEREO)
+        do_rematrixing_fixed(s);
+
+    /* apply scaling to coefficients (headroom, dynrng) */
+    for(ch=1; ch<=s->channels; ch++) {
+        int dynrng;
+         if(s->channel_mode == AC3_CHMODE_DUALMONO) {
+            dynrng = s->dynamic_range[2-ch];
+        } else {
+            dynrng = s->dynamic_range[0];
+        }
+        scale_coefs(s->transform_coeffs[ch], s->fixed_coeffs[ch], dynrng, 256);
+    }
+
+        do_imdct_fixed(s, s->channels);
+
+    if (s->channels != s->out_channels && !((s->output_mode & AC3_OUTPUT_LFEON) &&
+        s->fbw_channels == s->out_channels))
+            ff_ac3_downmix_c_fixed(s->output, s->downmix_coeffs, s->out_channels, s->fbw_channels, 256);
+
+    return 0;
+}
+
+/**
+ * Decode a single AC-3 fixed frame.
+ */
+int ac3_fixed_decode_frame(AVCodecContext * avctx, void *data,
+                            int *got_frame_ptr, AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    AC3FixedDecodeContext *s = avctx->priv_data;
+    int   *out_samples_flt;
+    int16_t *out_samples_s16;
+    int blk, ch, err, ret;
+    const uint8_t *channel_map;
+    const int *output[AC3_MAX_CHANNELS];
+
+    /* copy input buffer to decoder context to avoid reading past the end
+       of the buffer, which can be caused by a damaged input stream. */
+    if (buf_size >= 2 && AV_RB16(buf) == 0x770B) {
+        /* seems to be byte-swapped AC-3 */
+        int cnt = FFMIN(buf_size, AC3_FRAME_BUFFER_SIZE) >> 1;
+        s->dsp.bswap16_buf((uint16_t *)s->input_buffer, (const uint16_t *)buf, cnt);
+    } else
+        memcpy(s->input_buffer, buf, FFMIN(buf_size, AC3_FRAME_BUFFER_SIZE));
+    buf = s->input_buffer;
+    /* initialize the GetBitContext with the start of valid AC-3 Frame */
+    init_get_bits(&s->gbc, buf, buf_size * 8);
+
+    /* parse the syncinfo */
+    err = parse_frame_header_fixed(s);
+
+    if (err) {
+        switch(err) {
+            case AAC_AC3_PARSE_ERROR_SYNC:
+                av_log(avctx, AV_LOG_ERROR, "frame sync error\n");
+                return -1;
+            case AAC_AC3_PARSE_ERROR_BSID:
+                av_log(avctx, AV_LOG_ERROR, "invalid bitstream id\n");
+                break;
+            case AAC_AC3_PARSE_ERROR_SAMPLE_RATE:
+                av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
+                break;
+            case AAC_AC3_PARSE_ERROR_FRAME_SIZE:
+                av_log(avctx, AV_LOG_ERROR, "invalid frame size\n");
+                break;
+            case AAC_AC3_PARSE_ERROR_FRAME_TYPE:
+                /* skip frame if CRC is ok. otherwise use error concealment. */
+                break;
+            default:
+                av_log(avctx, AV_LOG_ERROR, "invalid header\n");
+                break;
+        }
+    } else {
+        /* check that reported frame size fits in input buffer */
+        if (s->frame_size > buf_size) {
+            av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
+            err = AAC_AC3_PARSE_ERROR_FRAME_SIZE;
+        }
+    }
+
+    /* if frame is ok, set audio parameters */
+    if (!err) {
+        avctx->sample_rate = s->sample_rate;
+        avctx->bit_rate = s->bit_rate;
+
+        /* channel config */
+        s->out_channels = s->channels;
+        s->output_mode = s->channel_mode;
+        if(s->lfe_on)
+            s->output_mode |= AC3_OUTPUT_LFEON;
+        if (avctx->request_channels > 0 && avctx->request_channels <= 2 &&
+                avctx->request_channels < s->channels) {
+            s->out_channels = avctx->request_channels;
+            s->output_mode  = avctx->request_channels == 1 ? AC3_CHMODE_MONO : AC3_CHMODE_STEREO;
+            s->channel_layout = avpriv_ac3_channel_layout_tab[s->output_mode];
+        }
+        avctx->channels = s->out_channels;
+        avctx->channel_layout = s->channel_layout;
+
+        s->loro_center_mix_level   = gain_levels_fixed[s->  center_mix_level];
+        s->loro_surround_mix_level = gain_levels_fixed[s->surround_mix_level];
+        s->ltrt_center_mix_level   = LEVEL_MINUS_3DB;
+        s->ltrt_surround_mix_level = LEVEL_MINUS_3DB;
+        /* set downmixing coefficients if needed */
+        if(s->channels != s->out_channels && !((s->output_mode & AC3_OUTPUT_LFEON) &&
+                s->fbw_channels == s->out_channels)) {
+            set_downmix_coeffs_fixed(s);
+        }
+    } else if (!s->out_channels) {
+        s->out_channels = avctx->channels;
+        if(s->out_channels < s->channels)
+            s->output_mode  = s->out_channels == 1 ? AC3_CHMODE_MONO : AC3_CHMODE_STEREO;
+    }
+    /* set audio service type based on bitstream mode for AC-3 */
+    avctx->audio_service_type = s->bitstream_mode;
+    if (s->bitstream_mode == 0x7 && s->channels > 1)
+        avctx->audio_service_type = AV_AUDIO_SERVICE_TYPE_KARAOKE;
+
+    /* get output buffer */
+    s->frame.nb_samples = s->num_blocks * 256;
+    if ((ret = avctx->get_buffer(avctx, &s->frame)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return ret;
+    }
+    out_samples_flt = (int   *)s->frame.data[0];
+    out_samples_s16 = (int16_t *)s->frame.data[0];
+
+    /* decode the audio blocks */
+    channel_map = ff_ac3_dec_channel_map[s->output_mode & ~AC3_OUTPUT_LFEON][s->lfe_on];
+    for (ch = 0; ch < s->out_channels; ch++)
+        output[ch] = s->output[channel_map[ch]];
+    for (blk = 0; blk < s->num_blocks; blk++) {
+        if (!err && decode_audio_block_fixed(s, blk)) {
+            av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n");
+            err = 1;
+        }
+
+        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
+            s->fmt_conv.fixed_interleave(out_samples_flt, output, 256,
+                                         s->out_channels);
+            out_samples_flt += 256 * s->out_channels;
+        } else {
+            s->fmt_conv.fixed_to_int16_interleave(out_samples_s16, output, 256,
+                                                  s->out_channels);
+            out_samples_s16 += 256 * s->out_channels;
+        }
+    }
+
+    *got_frame_ptr   = 1;
+    *(AVFrame *)data = s->frame;
+
+    return FFMIN(buf_size, s->frame_size);
+}
+
+/**
+ * Uninitialize the AC-3 decoder.
+ */
+ int ac3_fixed_decode_end(AVCodecContext *avctx)
+{
+    AC3FixedDecodeContext *s = avctx->priv_data;
+    ff_mdct_end(&s->imdct_512);
+    ff_mdct_end(&s->imdct_256);
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(AC3FixedDecodeContext, x)
+#define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
+static const AVOption options[] = {
+    { "drc_scale", "percentage of dynamic range compression to apply", OFFSET(drc_scale), AV_OPT_TYPE_FLOAT, {1.0}, 0.0, 1.0, PAR },
+
+{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), AV_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, 0, "dmix_mode"},
+{"ltrt_cmixlev",   "Lt/Rt Center Mix Level",   OFFSET(ltrt_center_mix_level),    AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level),  AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"loro_cmixlev",   "Lo/Ro Center Mix Level",   OFFSET(loro_center_mix_level),    AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level),  AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+
+    { NULL},
+};
+
+static const AVClass ac3_decoder_class = {
+    .class_name = "AC3 fixed decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_ac3_fixed_decoder = {
+    .name           = "ac3_fixed",
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = CODEC_ID_AC3,
+    .priv_data_size = sizeof (AC3FixedDecodeContext),
+    .init           = ac3_fixed_decode_init,
+    .close          = ac3_fixed_decode_end,
+    .decode         = ac3_fixed_decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+    .long_name      = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT,
+                                                      AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_NONE },
+    .priv_class     = &ac3_decoder_class,
+};
diff --git a/libavcodec/mips/ac3dec_fixed.h b/libavcodec/mips/ac3dec_fixed.h
new file mode 100644
index 0000000..ee05f46
--- /dev/null
+++ b/libavcodec/mips/ac3dec_fixed.h
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Stanislav Ocovaj (socovaj at mips.com)
+ *
+ * AC3 fixed-point decoder for MIPS platforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_AC3DEC_FIXED_H
+#define AVCODEC_MIPS_AC3DEC_FIXED_H
+
+#include "libavutil/lfg.h"
+#include "libavcodec/ac3.h"
+#include "libavcodec/ac3dsp.h"
+#include "libavcodec/get_bits.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/fft.h"
+#include "libavcodec/fmtconvert.h"
+
+#define AC3_OUTPUT_LFEON  8
+
+#define SPX_MAX_BANDS    17
+
+#define LEVEL_FIXED_PLUS_3DB          92681
+#define LEVEL_FIXED_PLUS_1POINT5DB    77935
+#define LEVEL_FIXED_MINUS_1POINT5DB   65536
+#define LEVEL_FIXED_MINUS_3DB         55108
+#define LEVEL_FIXED_MINUS_4POINT5DB   46340
+#define LEVEL_FIXED_MINUS_6DB         38967
+#define LEVEL_FIXED_MINUS_9DB         32768
+#define LEVEL_FIXED_ZERO              0
+#define LEVEL_FIXED_ONE               23170
+
+/** Large enough for maximum possible frame size when the specification limit is ignored */
+#define AC3_FRAME_BUFFER_SIZE 32768
+
+typedef struct {
+    AVClass        *class;                  ///< class for AVOptions
+    AVCodecContext *avctx;                  ///< parent context
+    AVFrame frame;                          ///< AVFrame for decoded output
+    GetBitContext gbc;                      ///< bitstream reader
+
+///@name Bit stream information
+///@{
+    int frame_type;                         ///< frame type                             (strmtyp)
+    int substreamid;                        ///< substream identification
+    int frame_size;                         ///< current frame size, in bytes
+    int bit_rate;                           ///< stream bit rate, in bits-per-second
+    int sample_rate;                        ///< sample frequency, in Hz
+    int num_blocks;                         ///< number of audio blocks
+    int bitstream_mode;                     ///< bitstream mode                         (bsmod)
+    int channel_mode;                       ///< channel mode                           (acmod)
+    int channel_layout;                     ///< channel layout
+    int lfe_on;                             ///< lfe channel in use
+    int channel_map;                        ///< custom channel map
+    int center_mix_level;                   ///< Center mix level index
+    int surround_mix_level;                 ///< Surround mix level index
+    int eac3;                               ///< indicates if current frame is E-AC-3
+///@}
+
+    int preferred_stereo_downmix;
+    float ltrt_center_mix_level;
+    float ltrt_surround_mix_level;
+    float loro_center_mix_level;
+    float loro_surround_mix_level;
+
+///@name Frame syntax parameters
+    int snr_offset_strategy;                ///< SNR offset strategy                    (snroffststr)
+    int block_switch_syntax;                 ///< block switch syntax enabled            (blkswe)
+    int dither_flag_syntax;                   ///< dither flag syntax enabled             (dithflage)
+    int bit_allocation_syntax;              ///< bit allocation model syntax enabled    (bamode)
+    int fast_gain_syntax;                   ///< fast gain codes enabled                (frmfgaincode)
+    int dba_syntax;                         ///< delta bit allocation syntax enabled    (dbaflde)
+    int skip_syntax;                        ///< skip field syntax enabled              (skipflde)
+ ///@}
+
+///@name Standard coupling
+    int cpl_in_use[AC3_MAX_BLOCKS];         ///< coupling in use                        (cplinu)
+    int cpl_strategy_exists[AC3_MAX_BLOCKS];///< coupling strategy exists               (cplstre)
+    int channel_in_cpl[AC3_MAX_CHANNELS];   ///< channel in coupling                    (chincpl)
+    int phase_flags_in_use;                 ///< phase flags in use                     (phsflginu)
+    int phase_flags[AC3_MAX_CPL_BANDS];     ///< phase flags                            (phsflg)
+    int num_cpl_bands;                      ///< number of coupling bands               (ncplbnd)
+    uint8_t cpl_band_sizes[AC3_MAX_CPL_BANDS]; ///< number of coeffs in each coupling band
+    int firstchincpl;                       ///< first channel in coupling
+    int first_cpl_coords[AC3_MAX_CHANNELS]; ///< first coupling coordinates states      (firstcplcos)
+    int cpl_coords[AC3_MAX_CHANNELS][AC3_MAX_CPL_BANDS]; ///< coupling coordinates      (cplco)
+///@}
+
+///@name Spectral extension
+///@{
+    int spx_in_use;                             ///< spectral extension in use              (spxinu)
+    uint8_t channel_uses_spx[AC3_MAX_CHANNELS]; ///< channel uses spectral extension        (chinspx)
+    int8_t spx_atten_code[AC3_MAX_CHANNELS];    ///< spx attenuation code                   (spxattencod)
+    int spx_src_start_freq;                     ///< spx start frequency bin
+    int spx_dst_end_freq;                       ///< spx end frequency bin
+    int spx_dst_start_freq;                     ///< spx starting frequency bin for copying (copystartmant)
+                                                ///< the copy region ends at the start of the spx region.
+    int num_spx_bands;                          ///< number of spx bands                    (nspxbnds)
+    uint8_t spx_band_sizes[SPX_MAX_BANDS];      ///< number of bins in each spx band
+    uint8_t first_spx_coords[AC3_MAX_CHANNELS]; ///< first spx coordinates states           (firstspxcos)
+    int spx_noise_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS]; ///< spx noise blending factor  (nblendfact)
+    int spx_signal_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS];///< spx signal blending factor (sblendfact)
+///@}
+
+///@name Adaptive hybrid transform
+    int channel_uses_aht[AC3_MAX_CHANNELS];                         ///< channel AHT in use (chahtinu)
+    int pre_mantissa[AC3_MAX_CHANNELS][AC3_MAX_COEFS][AC3_MAX_BLOCKS];  ///< pre-IDCT mantissas
+///@}
+
+///@name Channel
+    int fbw_channels;                           ///< number of full-bandwidth channels
+    int channels;                               ///< number of total channels
+    int lfe_ch;                                 ///< index of LFE channel
+    int downmix_coeffs[AC3_MAX_CHANNELS][2];  ///< stereo downmix coefficients
+    int downmixed;                              ///< indicates if coeffs are currently downmixed
+    int output_mode;                            ///< output channel configuration
+    int out_channels;                           ///< number of output channels
+///@}
+
+///@name Dynamic range
+    int dynamic_range[2];                 ///< dynamic range
+    int drc_scale;                        ///< percentage of dynamic range compression to be applied
+///@}
+
+///@name Bandwidth
+    int start_freq[AC3_MAX_CHANNELS];       ///< start frequency bin                    (strtmant)
+    int end_freq[AC3_MAX_CHANNELS];         ///< end frequency bin                      (endmant)
+///@}
+
+///@name Rematrixing
+    int num_rematrixing_bands;              ///< number of rematrixing bands            (nrematbnd)
+    int rematrixing_flags[4];               ///< rematrixing flags                      (rematflg)
+///@}
+
+///@name Exponents
+    int num_exp_groups[AC3_MAX_CHANNELS];           ///< Number of exponent groups      (nexpgrp)
+    int8_t dexps[AC3_MAX_CHANNELS][AC3_MAX_COEFS];  ///< decoded exponents
+    int exp_strategy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS]; ///< exponent strategies        (expstr)
+///@}
+
+///@name Bit allocation
+    AC3BitAllocParameters bit_alloc_params;         ///< bit allocation parameters
+    int first_cpl_leak;                             ///< first coupling leak state      (firstcplleak)
+    int snr_offset[AC3_MAX_CHANNELS];               ///< signal-to-noise ratio offsets  (snroffst)
+    int fast_gain[AC3_MAX_CHANNELS];                ///< fast gain values/SMR's         (fgain)
+    uint8_t bap[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< bit allocation pointers
+    int16_t psd[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< scaled exponents
+    int16_t band_psd[AC3_MAX_CHANNELS][AC3_CRITICAL_BANDS]; ///< interpolated exponents
+    int16_t mask[AC3_MAX_CHANNELS][AC3_CRITICAL_BANDS];     ///< masking curve values
+    int dba_mode[AC3_MAX_CHANNELS];                 ///< delta bit allocation mode
+    int dba_nsegs[AC3_MAX_CHANNELS];                ///< number of delta segments
+    uint8_t dba_offsets[AC3_MAX_CHANNELS][8];       ///< delta segment offsets
+    uint8_t dba_lengths[AC3_MAX_CHANNELS][8];       ///< delta segment lengths
+    uint8_t dba_values[AC3_MAX_CHANNELS][8];        ///< delta values for each segment
+///@}
+
+///@name Zero-mantissa dithering
+    int dither_flag[AC3_MAX_CHANNELS];      ///< dither flags                           (dithflg)
+    AVLFG dith_state;                       ///< for dither generation
+///@}
+
+///@name IMDCT
+    int block_switch[AC3_MAX_CHANNELS];     ///< block switch flags                     (blksw)
+    FFTContext imdct_512;                   ///< for 512 sample IMDCT
+    FFTContext imdct_256;                   ///< for 256 sample IMDCT
+///@}
+
+///@name Optimization
+    DSPContext dsp;                         ///< for optimization
+   //AC3DSPContext ac3dsp;
+    FmtConvertContext fmt_conv;             ///< optimized conversion functions
+    int mul_bias;                         ///< scaling for fixed_to_int16 conversion
+///@}
+
+///@name Aligned arrays
+    DECLARE_ALIGNED(16, int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];       ///< fixed-point transform coefficients
+    DECLARE_ALIGNED(32, FFTSample, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< transform coefficients
+    DECLARE_ALIGNED(32, FFTSample, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];             ///< delay - added to the next block
+    DECLARE_ALIGNED(32, int16_t, window)[AC3_BLOCK_SIZE];                              ///< window coefficients
+    DECLARE_ALIGNED(32, FFTSample, tmp_output)[AC3_BLOCK_SIZE];                          ///< temporary storage for output before windowing
+    DECLARE_ALIGNED(32, int, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];            ///< output after imdct transform and windowing
+    DECLARE_ALIGNED(32, uint8_t, input_buffer)[AC3_FRAME_BUFFER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< temp buffer to prevent overread
+///@}
+} AC3FixedDecodeContext;
+
+int ac3_fixed_decode_init(AVCodecContext *avctx);
+int ac3_fixed_decode_end(AVCodecContext *avctx);
+int ac3_fixed_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
+                            AVPacket *avpkt);
+void ff_ac3_downmix_c_fixed(int (*samples)[256], int (*matrix)[2], int out_ch, int in_ch, int len);
+
+#endif /* AVCODEC_MIPS_AC3DEC_FIXED_H */
diff --git a/libavcodec/mips/dsputil_mips_fixed.c b/libavcodec/mips/dsputil_mips_fixed.c
new file mode 100644
index 0000000..e1b8037
--- /dev/null
+++ b/libavcodec/mips/dsputil_mips_fixed.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Dragan Mrdjan (dmrdjan at mips.com)
+ *
+ * DSP utils optimized for MIPS fixed-point platforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/dsputil.c
+ */
+
+#include "config.h"
+#include "libavcodec/dsputil.h"
+
+static void vector_fmul_window_mips_fixed(int *dst, const int16_t *src0, const int16_t *src1, const int16_t *win, int len)
+{
+    int i,j;
+    int *dst_i, *dst_j;
+    const int16_t * src0_i, *src1_j;
+    const int16_t *win_i, *win_j;
+    int16_t s0, s01, s02, s03, s1, s11, s12, s13;
+    int16_t wi, wi1, wi2, wi3, wj, wj1, wj2, wj3;
+
+    dst += len;
+    win += len;
+    src0 += len;
+
+    for(i=-len, j=len-1; i<0; i+=4, j-=4) {
+        dst_i = dst + i;
+        dst_j = dst + j;
+        src0_i = src0 + i;
+        src1_j = src1 + j;
+        win_i = win + i;
+        win_j = win + j;
+
+        __asm__ volatile (
+            "lh             %[s0],      0(%[src0_i])                \n\t"
+            "lh             %[s1],      0(%[src1_j])                \n\t"
+            "lh             %[wi],      0(%[win_i])                 \n\t"
+            "lh             %[wj],      0(%[win_j])                 \n\t"
+            "append         %[s0],      %[s1],          16          \n\t"
+            "append         %[wj],      %[wi],          16          \n\t"
+            "mult           $ac0,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac0,       %[s0],          %[wj]       \n\t"
+            "mult           $ac1,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac1,       %[s0],          %[wj]       \n\t"
+            "lh             %[s01],     2(%[src0_i])                \n\t"
+            "lh             %[s11],     -2(%[src1_j])               \n\t"
+            "extr_r.w       %[s1],      $ac0,           16          \n\t"
+            "lh             %[wi1],     2(%[win_i])                 \n\t"
+            "lh             %[wj1],     -2(%[win_j])                \n\t"
+            "extr_r.w       %[wj],      $ac1,           16          \n\t"
+            "append         %[s01],     %[s11],         16          \n\t"
+            "append         %[wj1],     %[wi1],         16          \n\t"
+            "mult           $ac2,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac2,       %[s01],         %[wj1]      \n\t"
+            "sw             %[s1],      0(%[dst_i])                 \n\t"
+            "sw             %[wj],       0(%[dst_j])                \n\t"
+            "mult           $ac3,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac3,       %[s01],         %[wj1]      \n\t"
+            "extr_r.w       %[s11],     $ac2,           16          \n\t"
+            "extr_r.w       %[wj1],     $ac3,           16          \n\t"
+            "lh             %[s02],     4(%[src0_i])                \n\t"
+            "lh             %[s12],     -4(%[src1_j])               \n\t"
+            "lh             %[wi2],     4(%[win_i])                 \n\t"
+            "lh             %[wj2],     -4(%[win_j])                \n\t"
+            "append         %[s02],     %[s12],         16          \n\t"
+            "append         %[wj2],     %[wi2],         16          \n\t"
+            "mult           $ac0,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac0,       %[s02],         %[wj2]      \n\t"
+            "sw             %[s11],     4(%[dst_i])                 \n\t"
+            "sw             %[wj1],     -4(%[dst_j])                \n\t"
+            "mult           $ac1,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac1,       %[s02],         %[wj2]      \n\t"
+            "extr_r.w       %[s12],     $ac0,           16          \n\t"
+            "lh             %[s03],     6(%[src0_i])                \n\t"
+            "lh             %[s13],     -6(%[src1_j])               \n\t"
+            "lh             %[wi3],     6(%[win_i])                 \n\t"
+            "lh             %[wj3],     -6(%[win_j])                \n\t"
+            "append         %[s03],     %[s13],         16          \n\t"
+            "append         %[wj3],     %[wi3],         16          \n\t"
+            "mult           $ac2,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac2,       %[s03],         %[wj3]      \n\t"
+            "sw             %[s12],     8(%[dst_i])                 \n\t"
+            "extr_r.w       %[wj2],     $ac1,           16          \n\t"
+            "mult           $ac3,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac3,       %[s03],         %[wj3]      \n\t"
+            "extr_r.w       %[s13],     $ac2,           16          \n\t"
+            "extr_r.w       %[wj3],     $ac3,           16          \n\t"
+            "sw             %[wj2],     -8(%[dst_j])                \n\t"
+            "sw             %[s13],     12(%[dst_i])                \n\t"
+            "sw             %[wj3],     -12(%[dst_j])               \n\t"
+
+            : [s0] "=&r" (s0), [s1] "=&r" (s1), [wi] "=&r" (wi),
+              [wj] "=&r" (wj), [s03] "=&r" (s03), [s01] "=&r" (s01),
+              [s11] "=&r" (s11), [wi1] "=&r" (wi1), [wj1] "=&r" (wj1),
+              [s13] "=&r" (s13), [s02] "=&r" (s02), [s12] "=&r" (s12),
+              [wi2] "=&r" (wi2), [wj2] "=&r" (wj2), [wi3] "=&r" (wi3),
+              [wj3] "=&r" (wj3)
+            : [src0_i] "r" (src0_i), [win_j] "r" (win_j ), [src1_j] "r" (src1_j),
+              [win_i] "r" (win_i), [dst_i] "r" (dst_i), [dst_j] "r" (dst_j)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+        );
+    }
+}
+
+void ff_dsputil_init_mips_fixed(DSPContext* c) {
+    c->vector_fmul_window_fixed = vector_fmul_window_mips_fixed;
+}
diff --git a/libavcodec/mips/fft_mips_fixed.c b/libavcodec/mips/fft_mips_fixed.c
new file mode 100644
index 0000000..9fc9287
--- /dev/null
+++ b/libavcodec/mips/fft_mips_fixed.c
@@ -0,0 +1,906 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Stanislav Ocovaj (socovaj at mips.com)
+ *           Dragan Mrdjan    (dmrdjan at mips.com)
+ *           Zoran Lukic      (zlukic at mips.com)
+ *           Bojan Zivkovic   (bojan at mips.com)
+ *
+ * Optimization of FFT and MDCT/IMDCT transforms for MIPS fixed-point
+ * architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FFT_FLOAT 0
+#include "libavcodec/fft.h"
+#include "libavcodec/mips/fft_table.h"
+
+#include "fft_table_fixed.h"
+
+av_cold int ff_mdct_fixed_init(FFTContext *s, int nbits, int inverse, int scale)
+{
+    int n, n4, i;
+    double alpha, theta;
+    int tstep;
+
+    memset(s, 0, sizeof(*s));
+    n = 1 << nbits;
+    s->mdct_bits = nbits;
+    s->mdct_size = n;
+    n4 = n >> 2;
+    s->mdct_permutation = FF_MDCT_PERM_NONE;
+
+    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+        goto fail;
+
+    s->tcos = av_malloc((n * sizeof(int)) / 2);
+
+    if (!s->tcos)
+        goto fail;
+
+    switch (s->mdct_permutation) {
+    case FF_MDCT_PERM_NONE:
+        s->tsin = s->tcos + n4;
+        tstep = 1;
+        break;
+    case FF_MDCT_PERM_INTERLEAVE:
+        s->tsin = s->tcos + 1;
+        tstep = 2;
+        break;
+    default:
+        goto fail;
+    }
+    theta = 0.125 + (scale < 0 ? n4 : 0);
+
+    for(i=0;i<n4;i++) {
+    int tmp;
+
+        alpha = 2 * M_PI * (i + theta) / n;
+        tmp = (int)(-cos(alpha) * 65536);
+        tmp = (tmp + 1) >> 1;
+        if (tmp > 32767)
+          tmp = 32767;
+        s->tcos[i*tstep] = (FFTSample)tmp;
+        tmp = (int)(-sin(alpha) * 65536);
+        tmp = (tmp + 1) >> 1;
+        if (tmp > 32767)
+          tmp = 32767;
+        s->tsin[i*tstep] = tmp;
+    }
+
+    return 0;
+fail:
+    ff_mdct_end(s);
+    return -1;
+}
+
+av_cold int ff_mdct_fixed_init_hardcoded_128(FFTContext *s, int nbits, int inverse, int scale)
+    {
+        int n, n4, i;
+        int tstep;
+
+        memset(s, 0, sizeof(*s));
+        n = 1 << nbits;
+        s->mdct_bits = nbits;
+        s->mdct_size = n;
+        n4 = n >> 2;
+        s->mdct_permutation = FF_MDCT_PERM_NONE;
+
+        if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+            goto fail;
+
+        s->tcos = av_malloc((n * sizeof(int)) / 2);
+
+        if (!s->tcos)
+            goto fail;
+
+        switch (s->mdct_permutation) {
+        case FF_MDCT_PERM_NONE:
+            s->tsin = s->tcos + n4;
+            tstep = 1;
+            break;
+        case FF_MDCT_PERM_INTERLEAVE:
+            s->tsin = s->tcos + 1;
+            tstep = 2;
+            break;
+        default:
+            goto fail;
+        }
+        for(i=0;i<n4;i++) {
+            s->tcos[i*tstep] = tcos_fixed_128[i];
+            s->tsin[i*tstep] = tsin_fixed_128[i];
+        }
+    return 0;
+fail:
+    ff_mdct_end(s);
+    return -1;
+}
+
+#if HAVE_MIPSDSPR2 && HAVE_INLINE_ASM
+static void ff_imdct_fixed_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k, n8, n4, n2, n, j, j2;
+    int ax0, ax1, ax2, ax3;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2, *in3, *in4;
+    FFTComplex *z = (FFTComplex *)output;
+
+    FFTSample t0, t1, t2, t3, t01, t11, t21, t31;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in3 = input + 2;
+    in2 = input + n2 - 1;
+    in4 = input + n2 - 3;
+
+    for(k=0; k<n4; k+=4) {
+        int k1 = k * 2;
+        int k2 = k1 + 2;
+
+        __asm__ volatile (
+            "lh             %[ax0],     0(%[in2])                   \n\t"
+            "lh             %[ax1],     0(%[in1])                   \n\t"
+            "lhx            %[ax2],     %[k1](%[tcos])              \n\t"
+            "lhx            %[ax3],     %[k1](%[tsin])              \n\t"
+            "multu          $ac0,       $0,             $0          \n\t"
+            "multu          $ac1,       $0,             $0          \n\t"
+            "append         %[ax0],     %[ax1],         16          \n\t"
+            "append         %[ax2],     %[ax3],         16          \n\t"
+            "multu          $ac2,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac0,       %[ax0],         %[ax2]      \n\t"
+            "dpaqx_s.w.ph   $ac1,       %[ax0],         %[ax2]      \n\t"
+            "lh             %[ax0],     -4(%[in2])                  \n\t"
+            "lh             %[ax1],     4(%[in1])                   \n\t"
+            "lhx            %[ax2],     %[k2](%[tcos])              \n\t"
+            "lhx            %[ax3],     %[k2](%[tsin])              \n\t"
+            "append         %[ax0],     %[ax1],         16          \n\t"
+            "append         %[ax2],     %[ax3],         16          \n\t"
+            "mulsaq_s.w.ph  $ac2,       %[ax0],         %[ax2]      \n\t"
+            "multu          $ac3,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac3,       %[ax0],         %[ax2]      \n\t"
+            "extr_r.w       %[t0],      $ac0,           16          \n\t"
+            "extr_r.w       %[t2],      $ac1,           16          \n\t"
+            "extr_r.w       %[t1],      $ac2,           16          \n\t"
+            "extr_r.w       %[t3],      $ac3,           16          \n\t"
+
+            : [ax0] "=&r" (ax0), [ax2] "=&r" (ax2),[ax1]  "=&r"  (ax1), [ax3] "=&r" (ax3),
+              [t0] "=&r" (t0),  [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3)
+            : [in1] "r" (in1), [in2] "r" (in2), [tcos] "r" (tcos),
+              [tsin] "r" (tsin), [k1] "r" (k1), [k2] "r" (k2)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+        );
+
+        j  = revtab[k];
+        j2 = revtab[k+1];
+
+        z[j].re = t0;
+        z[j].im = t2;
+        z[j2].re = t1;
+        z[j2].im = t3;
+
+        k1 += 4;
+        k2 += 4;
+
+        __asm__ volatile (
+            "lh             %[ax0],     -8(%[in2])                  \n\t"
+            "lh             %[ax1],     8(%[in1])                   \n\t"
+            "lhx            %[ax2],     %[k1](%[tcos])              \n\t"
+            "lhx            %[ax3],     %[k1](%[tsin])              \n\t"
+            "multu          $ac0,       $0,             $0          \n\t"
+            "multu          $ac1,       $0,             $0          \n\t"
+            "append         %[ax0],     %[ax1],         16          \n\t"
+            "append         %[ax2],     %[ax3],         16          \n\t"
+            "multu          $ac2,       $0,             $0          \n\t"
+            "mulsaq_s.w.ph  $ac0,       %[ax0],         %[ax2]      \n\t"
+            "dpaqx_s.w.ph   $ac1,       %[ax0],         %[ax2]      \n\t"
+            "lh             %[ax0],     -12(%[in2])                 \n\t"
+            "lh             %[ax1],     12(%[in1])                  \n\t"
+            "lhx            %[ax2],     %[k2](%[tcos])              \n\t"
+            "lhx            %[ax3],     %[k2](%[tsin])              \n\t"
+            "append         %[ax0],     %[ax1],         16          \n\t"
+            "append         %[ax2],     %[ax3],         16          \n\t"
+            "mulsaq_s.w.ph  $ac2,       %[ax0],         %[ax2]      \n\t"
+            "multu          $ac3,       $0,             $0          \n\t"
+            "dpaqx_s.w.ph   $ac3,       %[ax0],         %[ax2]      \n\t"
+            "extr_r.w       %[t0],      $ac0,           16          \n\t"
+            "extr_r.w       %[t2],      $ac1,           16          \n\t"
+            "extr_r.w       %[t1],      $ac2,           16          \n\t"
+            "extr_r.w       %[t3],      $ac3,           16          \n\t"
+
+            : [ax0] "=&r" (ax0), [ax2] "=&r" (ax2), [ax1] "=&r" (ax1), [ax3] "=&r" (ax3),
+              [t0] "=&r" (t0), [t2] "=&r" (t2), [t1] "=r" (t1), [t3] "=r" (t3)
+            : [in1] "r" (in1), [in2] "r" (in2), [tcos] "r" (tcos),
+              [tsin] "r"  (tsin),[k1] "r" (k1), [k2] "r" (k2)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+        );
+
+        j  = revtab[k+2];
+        j2 = revtab[k+3];
+
+        z[j ].re = t0;
+        z[j ].im = t2;
+        z[j2].re = t1;
+        z[j2].im = t3;
+        in1 += 8;
+        in2 -= 8;
+    }
+
+    s->fft_fixed_calc(s, z);
+
+    /* post rotation + reordering */
+
+    for(k=0; k<n8; k+=2 ) {
+        int k1 = 2 * (n8 - k - 1), k2 = k1 - 2;
+        int k11 = 2 * (n8 + k), k21 = k11 + 2;
+        in1 = (const FFTSample*)(z + (n8 - k - 1));
+        in2 = (const FFTSample*)(z + (n8 + k));
+
+         __asm__ volatile (
+             "lh             %[ax0],     2(%[in1])                   \n\t"
+             "lh             %[ax1],     0(%[in1])                   \n\t"
+             "lhx            %[ax2],     %[k1](%[tsin])              \n\t"
+             "lhx            %[ax3],     %[k1](%[tcos])              \n\t"
+             "multu          $ac0,       $0,             $0          \n\t"
+             "multu          $ac1,       $0,             $0          \n\t"
+             "append         %[ax0],     %[ax1],         16          \n\t"
+             "append         %[ax2],     %[ax3],         16          \n\t"
+             "mulsaq_s.w.ph  $ac0,       %[ax0],         %[ax2]      \n\t"
+             "dpaqx_s.w.ph   $ac1,       %[ax0],         %[ax2]      \n\t"
+             "lh             %[ax0],     -2(%[in1])                  \n\t"
+             "lh             %[ax1],     -4(%[in1])                  \n\t"
+             "lhx            %[ax2],     %[k2](%[tsin])              \n\t"
+             "lhx            %[ax3],     %[k2](%[tcos])              \n\t"
+             "append         %[ax0],     %[ax1],         16          \n\t"
+             "append         %[ax2],     %[ax3],         16          \n\t"
+             "multu          $ac2,       $0,             $0          \n\t"
+             "mulsaq_s.w.ph  $ac2,       %[ax0],         %[ax2]      \n\t"
+             "multu          $ac3,       $0,             $0          \n\t"
+             "dpaqx_s.w.ph   $ac3,       %[ax0],         %[ax2]      \n\t"
+             "extr_r.w       %[t0],      $ac0,           16          \n\t"
+             "extr_r.w       %[t2],      $ac1,           16          \n\t"
+             "extr_r.w       %[t1],      $ac2,           16          \n\t"
+             "extr_r.w       %[t3],      $ac3,           16          \n\t"
+
+            : [ax0] "=&r" (ax0), [ax1] "=&r" (ax1), [ax2] "=&r" (ax2), [ax3] "=&r" (ax3),
+              [t0] "=r" (t0), [t2] "=r" (t2), [t1] "=r" (t1), [t3] "=r" (t3)
+            : [in1] "r" (in1), [k1] "r" (k1), [tsin] "r" (tsin), [tcos] "r" (tcos),
+              [z] "r" (z), [k2] "r" (k2)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+         );
+
+         __asm__ volatile (
+             "lh             %[ax0],     2(%[in2])                   \n\t"
+             "lh             %[ax1],     0(%[in2])                   \n\t"
+             "lhx            %[ax2],     %[k11](%[tsin])             \n\t"
+             "lhx            %[ax3],     %[k11](%[tcos])             \n\t"
+             "multu          $ac0,       $0,             $0          \n\t"
+             "multu          $ac1,       $0,             $0          \n\t"
+             "append         %[ax0],     %[ax1],         16          \n\t"
+             "append         %[ax2],     %[ax3],         16          \n\t"
+             "mulsaq_s.w.ph  $ac0,       %[ax0],         %[ax2]      \n\t"
+             "dpaqx_s.w.ph   $ac1,       %[ax0],         %[ax2]      \n\t"
+             "lh             %[ax0],     6(%[in2])                   \n\t"
+             "lh             %[ax1],     4(%[in2])                   \n\t"
+             "lhx            %[ax2],     %[k21](%[tsin])             \n\t"
+             "lhx            %[ax3],     %[k21](%[tcos])             \n\t"
+             "append         %[ax0],     %[ax1],        16           \n\t"
+             "append         %[ax2],     %[ax3],        16           \n\t"
+             "multu          $ac2,       $0,            $0           \n\t"
+             "mulsaq_s.w.ph  $ac2,       %[ax0],        %[ax2]       \n\t"
+             "multu          $ac3,       $0,            $0           \n\t"
+             "dpaqx_s.w.ph   $ac3,       %[ax0],        %[ax2]       \n\t"
+             "extr_r.w       %[t01],     $ac0,          16           \n\t"
+             "extr_r.w       %[t21],     $ac1,          16           \n\t"
+             "extr_r.w       %[t11],     $ac2,          16           \n\t"
+             "extr_r.w       %[t31],     $ac3,          16           \n\t"
+
+            : [ax0] "=&r" (ax0), [ax1] "=&r" (ax1), [ax2] "=&r" (ax2), [ax3] "=&r" (ax3),
+              [t01] "=r" (t01), [t21] "=r" (t21), [t11] "=r" (t11), [t31] "=r" (t31)
+            : [in2] "r" (in2), [k11] "r" (k11), [tsin] "r" (tsin),[tcos] "r" (tcos),
+              [z] "r" (z), [k21] "r" (k21)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+        );
+
+        z[n8-k-1].re = t0;
+        z[n8+k  ].im = t2;
+        z[n8-k-1].im = t21;
+        z[n8+k  ].re = t01;
+
+        z[n8-k-2].re = t1;
+        z[n8+k+1].im = t3;
+        z[n8-k-2].im = t31;
+        z[n8+k+1].re = t11;
+        z[n8+k+1].im = t3;
+    }
+}
+#else
+#define CMUL_SR(dre, dim, are, aim, bre, bim) do { \
+        (dre) = ( ((are) * (bre) - (aim) * (bim) + 0x4000) >> 15 );  \
+        (dim) = ( ((are) * (bim) + (aim) * (bre) + 0x4000) >> 15 );  \
+    } while(0)
+
+static void ff_imdct_fixed_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k, n8, n4, n2, n, j;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2;
+    FFTComplex *z = (FFTComplex *)output;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    for(k=0; k<n4; k++) {
+        j=revtab[k];
+        CMUL_SR(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
+        in1 += 2;
+        in2 -= 2;
+    }
+    s->fft_fixed_calc(s, z);
+
+    /* post rotation + reordering */
+    for(k=0; k<n8; k++) {
+
+        FFTSample r0, i0, r1, i1;
+        CMUL_SR(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
+        CMUL_SR(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
+        z[n8-k-1].re = r0;
+        z[n8-k-1].im = i0;
+        z[n8+k  ].re = r1;
+        z[n8+k  ].im = i1;
+    }
+}
+#endif /* HAVE_MIPSDSPR2 && HAVE_INLINE_ASM */
+
+av_cold int ff_mdct_fixed_init_hardcoded(FFTContext *s, int nbits, int inverse, int scale)
+{
+    int n, n4, i;
+    int tstep;
+
+    memset(s, 0, sizeof(*s));
+    n = 1 << nbits;
+    s->mdct_bits = nbits;
+    s->mdct_size = n;
+    n4 = n >> 2;
+    s->mdct_permutation = FF_MDCT_PERM_NONE;
+
+    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+        goto fail;
+
+    s->tcos = av_malloc((n * sizeof(int)) / 2);
+
+    if (!s->tcos)
+        goto fail;
+
+    switch (s->mdct_permutation) {
+    case FF_MDCT_PERM_NONE:
+        s->tsin = s->tcos + n4;
+        tstep = 1;
+        break;
+    case FF_MDCT_PERM_INTERLEAVE:
+        s->tsin = s->tcos + 1;
+        tstep = 2;
+        break;
+    default:
+        goto fail;
+    }
+    for(i=0;i<n4;i++) {
+
+        s->tcos[i*tstep] = tcos_fixed[i];
+        s->tsin[i*tstep] = tsin_fixed[i];
+    }
+    return 0;
+fail:
+    ff_mdct_end(s);
+    return -1;
+}
+
+#if HAVE_MIPSDSPR2 && HAVE_INLINE_ASM
+static void ff_fft_fixed_calc_mips(FFTContext *s, FFTComplex *z)
+{
+
+    int nbits, i, n, num_transforms, offset, step;
+    int n4, n2, n34;
+    FFTSample tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+    int step2;
+    int temp1, temp2, temp3, temp4;
+    int z0, z1, z2, z3;
+    int t12, t34, t56, t78, t0a, t1a, t2a, t3a;
+    int in1, in2, in3, in4;
+    FFTComplex *tmpz, *addr1, *addr2, *addr3;
+    int w_re, w_im;
+    FFTSample *w_re_ptr, *w_im_ptr;
+    int pom;
+    const int fft_size = (1 << s->nbits);
+
+    FFTComplex *tmpz_n2, *tmpz_n34, *tmpz_n4;
+    FFTComplex *tmpz_n2_i, *tmpz_n34_i, *tmpz_n4_i, *tmpz_i;
+
+    int z_re_n2, z_im_n2, z_re_n34, z_im_n34, z_re, z_im, z_re_n4, z_im_n4;
+
+    num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
+    for (n=0; n<num_transforms; n++)
+    {
+        offset = fft_offsets_lut[n] << 2;
+        tmpz = z + offset;
+
+        /* fft4 */
+        __asm__ volatile (
+            "lw         %[z0],      0(%[tmpz])              \n\t"
+            "lw         %[z1],      4(%[tmpz])              \n\t"
+            "lw         %[z2],      8(%[tmpz])              \n\t"
+            "lw         %[z3],      12(%[tmpz])             \n\t"
+            "addq.ph    %[t12],     %[z0],      %[z1]       \n\t"
+            "subq.ph    %[t34],     %[z0],      %[z1]       \n\t"
+            "addq.ph    %[t56],     %[z2],      %[z3]       \n\t"
+            "subq.ph    %[t78],     %[z2],      %[z3]       \n\t"
+            "addq.ph    %[t0a],     %[t12],     %[t56]      \n\t"
+            "packrl.ph  %[t78],     %[t78],     %[t78]      \n\t"
+            "subq.ph    %[t2a],     %[t12],     %[t56]      \n\t"
+            "addq.ph    %[t1a],     %[t34],     %[t78]      \n\t"
+            "subq.ph    %[t3a],     %[t34],     %[t78]      \n\t"
+            "packrl.ph  %[t1a],     %[t1a],     %[t1a]      \n\t"
+            "packrl.ph  %[t3a],     %[t3a],     %[t3a]      \n\t"
+            "sw         %[t0a],     0(%[tmpz])              \n\t"
+            "packrl.ph  %[z1],      %[t1a],     %[t3a]      \n\t"
+            "packrl.ph  %[z3],      %[t3a],     %[t1a]      \n\t"
+            "sw         %[t2a],     8(%[tmpz])              \n\t"
+            "sw         %[z3],      4(%[tmpz])              \n\t"
+            "sw         %[z1],      12(%[tmpz])             \n\t"
+
+            : [z0] "=&r" (z0), [z1] "=&r" (z1), [t12] "=&r" (t12),
+              [z2] "=&r" (z2), [z3] "=&r" (z3), [t34] "=&r" (t34),
+              [t56] "=&r" (t56), [t78] "=&r" (t78), [t0a] "=&r" (t0a),
+              [t1a] "=&r" (t1a), [t2a] "=&r" (t2a), [t3a] "=&r" (t3a)
+            : [tmpz] "r" (tmpz)
+            : "memory"
+        );
+    }
+
+    if (fft_size < 8)
+        return;
+
+    pom = 23170;
+
+    num_transforms = (num_transforms >> 1) | 1;
+    for (n=0; n<num_transforms; n++)
+    {
+        offset = fft_offsets_lut[n] << 3;
+        tmpz = z + offset;
+
+        /* fft8 */
+        __asm__ volatile (
+            "lw         %[in1],     16(%[tmpz])             \t\n"
+            "lw         %[in2],     20(%[tmpz])             \t\n"
+            "lw         %[in3],     24(%[tmpz])             \t\n"
+            "lw         %[in4],     28(%[tmpz])             \t\n"
+            "addq.ph    %[temp1],   %[in1],     %[in2]      \t\n"
+            "subq.ph    %[temp3],   %[in1],     %[in2]      \t\n"
+            "seh        %[tmp1],    %[temp1]                \t\n"
+            "sra        %[temp1],   %[temp1],   16          \t\n"
+            "seh        %[tmp2],    %[temp1]                \t\n"
+            "addq.ph    %[temp2],   %[in3],     %[in4]      \t\n"
+            "subq.ph    %[temp4],   %[in3],     %[in4]      \t\n"
+            "seh        %[tmp3],    %[temp2]                \t\n"
+            "sra        %[temp2],   %[temp2],   16          \t\n"
+            "seh        %[tmp4],    %[temp2]                \t\n"
+            "add        %[tmp5],    %[tmp1],    %[tmp3]     \t\n"
+            "sub        %[tmp7],    %[tmp1],    %[tmp3]     \t\n"
+            "add        %[tmp6],    %[tmp2],    %[tmp4]     \t\n"
+            "sub        %[tmp8],    %[tmp2],    %[tmp4]     \t\n"
+            "seh        %[tmp1],    %[temp3]                \t\n"
+            "sra        %[temp3],   %[temp3],   16          \t\n"
+            "seh        %[tmp2],    %[temp3]                \t\n"
+            "seh        %[tmp3],    %[temp4]                \t\n"
+            "sra        %[temp4],   %[temp4],   16          \t\n"
+            "seh        %[tmp4],    %[temp4]                \t\n"
+            "lw         %[in1],     0(%[tmpz])              \t\n"
+            "move       %[temp1],   %[tmp6]                 \t\n"
+            "append     %[temp1],   %[tmp5],    16          \t\n"
+            "subq.ph    %[temp3],   %[in1],     %[temp1]    \t\n"
+            "addq.ph    %[temp4],   %[in1],     %[temp1]    \t\n"
+            "sw         %[temp3],   16(%[tmpz])             \t\n"
+            "sw         %[temp4],   0(%[tmpz])              \t\n"
+            "lw         %[in2],     8(%[tmpz])              \t\n"
+            "negu       %[temp1],   %[tmp7]                 \t\n"
+            "append     %[temp1],   %[tmp8],    16          \t\n"
+            "subq.ph    %[temp2],   %[in2],     %[temp1]    \t\n"
+            "addq.ph    %[temp3],   %[in2],     %[temp1]    \t\n"
+            "sw         %[temp2],   24(%[tmpz])             \t\n"
+            "sw         %[temp3],   8(%[tmpz])              \t\n"
+            "add        %[tmp5],    %[tmp1],    %[tmp2]     \t\n"
+            "mul        %[tmp5],    %[tmp5],    %[pom]      \t\n"
+            "sub        %[tmp6],    %[tmp2],    %[tmp1]     \t\n"
+            "mul        %[tmp6],    %[tmp6],    %[pom]      \t\n"
+            "sub        %[tmp7],    %[tmp3],    %[tmp4]     \t\n"
+            "mul        %[tmp7],    %[tmp7],    %[pom]      \t\n"
+            "add        %[tmp8],    %[tmp3],    %[tmp4]     \t\n"
+            "mul        %[tmp8],    %[tmp8],    %[pom]      \t\n"
+            "shra_r.w   %[tmp5],    %[tmp5],    15          \t\n"
+            "lw         %[in1],     4(%[tmpz])              \t\n"
+            "shra_r.w   %[tmp6],    %[tmp6],    15          \t\n"
+            "lw         %[in2],     12(%[tmpz])             \t\n"
+            "shra_r.w   %[tmp7],    %[tmp7],    15          \t\n"
+            "add        %[tmp1],    %[tmp5],    %[tmp7]     \t\n"
+            "shra_r.w   %[tmp8],    %[tmp8],    15          \t\n"
+            "add        %[tmp2],    %[tmp6],    %[tmp8]     \t\n"
+            "sub        %[tmp3],    %[tmp5],    %[tmp7]     \t\n"
+            "sub        %[tmp4],    %[tmp6],    %[tmp8]     \t\n"
+            "move       %[temp1],   %[tmp2]                 \t\n"
+            "append     %[temp1],   %[tmp1],    16          \t\n"
+            "subq.ph    %[temp2],   %[in1],     %[temp1]    \t\n"
+            "addq.ph    %[temp3],   %[in1],     %[temp1]    \t\n"
+            "sw         %[temp2],   20(%[tmpz])             \t\n"
+            "sw         %[temp3],   4(%[tmpz])              \t\n"
+            "negu       %[temp1],   %[tmp3]                 \t\n"
+            "append     %[temp1],   %[tmp4],    16          \t\n"
+            "subq.ph    %[temp2],   %[in2],     %[temp1]    \t\n"
+            "addq.ph    %[temp3],   %[in2],     %[temp1]    \t\n"
+            "sw         %[temp2],   28(%[tmpz])             \t\n"
+            "sw         %[temp3],   12(%[tmpz])             \t\n"
+
+            : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+              [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
+              [tmp7] "=&r" (tmp7), [tmp8] "=&r" (tmp8), [temp1] "=&r" (temp1),
+              [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [temp4] "=&r" (temp4),
+              [in1] "=&r" (in1), [in2] "=&r" (in2), [in3] "=&r" (in3),
+              [in4] "=&r" (in4)
+            : [tmpz] "r" (tmpz), [pom] "r" (pom)
+            : "memory"
+        );
+    }
+
+    step = 1 << (MAX_LOG2_NFFT - 4);
+    n4 = 4;
+
+    for (nbits=4; nbits<=s->nbits; nbits++)
+    {
+        n2  = 2*n4;
+        n34 = 3*n4;
+        num_transforms = (num_transforms >> 1) | 1;
+        for (n=0; n<num_transforms; n++)
+        {
+            offset = fft_offsets_lut[n] << nbits;
+            tmpz = z + offset;
+
+            __asm__ volatile (
+                "sll        %[z0],      %[n2],          2           \n\t"
+                "sll        %[z1],      %[n34],         2           \n\t"
+                "sll        %[z2],      %[n4],          2           \n\t"
+                "addu       %[addr1],   %[tmpz],        %[z0]       \n\t"
+                "addu       %[addr2],   %[tmpz],        %[z1]       \n\t"
+                "addu       %[addr3],   %[tmpz],        %[z2]       \n\t"
+                "lw         %[z0],      0(%[addr1])                 \n\t"
+                "lw         %[z1],      0(%[addr2])                 \n\t"
+                "lw         %[z2],      0(%[tmpz])                  \n\t"
+                "sll        %[step2],   %[step],        2           \n\t"
+                "lw         %[z3],      0(%[addr3])                 \n\t"
+                "addq.ph    %[t56],     %[z0],          %[z1]       \n\t"
+                "subq.ph    %[t12],     %[z0],          %[z1]       \n\t"
+                "addq.ph    %[t0a],     %[z2],          %[t56]      \n\t"
+                "packrl.ph  %[z3],      %[z3],          %[z3]       \n\t"
+                "subq.ph    %[t2a],     %[z2],          %[t56]      \n\t"
+                "addq.ph    %[t1a],     %[z3],          %[t12]      \n\t"
+                "subq.ph    %[t3a],     %[z3],          %[t12]      \n\t"
+                "sw         %[t0a],     0(%[tmpz])                  \n\t"
+                "sw         %[t2a],     0(%[addr1])                 \n\t"
+                "packrl.ph  %[z0],      %[t1a],         %[t3a]      \n\t"
+                "packrl.ph  %[z1],      %[t3a],         %[t1a]      \n\t"
+                "sw         %[z0],      0(%[addr2])                 \n\t"
+                "sw         %[z1],      0(%[addr3])                 \n\t"
+
+                : [z0] "=&r" (z0), [z1] "=&r" (z1), [t12] "=&r" (t12),
+                  [z2] "=&r" (z2), [z3] "=&r" (z3), [step2] "=&r" (step2),
+                  [t56] "=&r" (t56), [t0a] "=&r" (t0a), [t1a] "=&r" (t1a),
+                  [t2a] "=&r" (t2a), [t3a] "=&r" (t3a), [addr1] "=&r" (addr1),
+                  [addr2] "=&r" (addr2), [addr3] "=&r" (addr3)
+                : [n2] "r" (n2), [n34] "r" (n34), [n4] "r" (n4), [tmpz] "r" (tmpz),
+                  [step] "r" (step)
+                : "memory"
+            );
+
+            w_re_ptr = (FFTSample*)(ff_cos_65536_fixed + step);
+            w_im_ptr = (FFTSample*)(ff_cos_65536_fixed + MAX_FFT_SIZE/4 - step);
+
+            for (i=1; i<n4; i ++ )
+            {
+                w_re = w_re_ptr[0];
+                w_im = w_im_ptr[0];
+
+                tmpz_n2  = tmpz + n2;
+                tmpz_n4  = tmpz + n4;
+                tmpz_n34 = tmpz + n34;
+
+                tmpz_n2_i  = tmpz_n2  + i;
+                tmpz_n4_i  = tmpz_n4  + i;
+                tmpz_n34_i = tmpz_n34 + i;
+                tmpz_i     = tmpz     + i;
+
+                __asm__ volatile (
+                    "lh         %[z_re_n2],     0(%[tmpz_n2_i])                     \n\t"
+                    "lh         %[z_im_n2],     2(%[tmpz_n2_i])                     \n\t"
+                    "lh         %[z_re_n34],    0(%[tmpz_n34_i])                    \n\t"
+                    "lh         %[z_im_n34],    2(%[tmpz_n34_i])                    \n\t"
+                    "mult       $ac0,           %[w_re],            %[z_re_n2]      \n\t"
+                    "mult       $ac2,           %[w_re],            %[z_re_n34]     \n\t"
+                    "mult       $ac1,           %[w_re],            %[z_im_n2]      \n\t"
+                    "mult       $ac3,           %[w_re],            %[z_im_n34]     \n\t"
+                    "madd       $ac0,           %[w_im],            %[z_im_n2]      \n\t"
+                    "msub       $ac2,           %[w_im],            %[z_im_n34]     \n\t"
+                    "msub       $ac1,           %[w_im],            %[z_re_n2]      \n\t"
+                    "madd       $ac3,           %[w_im],            %[z_re_n34]     \n\t"
+                    "lh         %[z_re],        0(%[tmpz_i])                        \n\t"
+                    "extr_r.w   %[tmp1],        $ac0, 15                            \n\t"
+                    "extr_r.w   %[tmp3],        $ac2, 15                            \n\t"
+                    "extr_r.w   %[tmp2],        $ac1, 15                            \n\t"
+                    "extr_r.w   %[tmp4],        $ac3, 15                            \n\t"
+                    "lh         %[z_im],        2(%[tmpz_i])                        \n\t"
+                    "lh         %[z_re_n4],     0(%[tmpz_n4_i])                     \n\t"
+                    "lh         %[z_im_n4],     2(%[tmpz_n4_i])                     \n\t"
+                    "add        %[tmp5],        %[tmp1],            %[tmp3]         \n\t"
+                    "sub        %[tmp1],        %[tmp1],            %[tmp3]         \n\t"
+                    "add        %[tmp6],        %[tmp2],            %[tmp4]         \n\t"
+                    "sub        %[tmp2],        %[tmp2],            %[tmp4]         \n\t"
+                    "subq_s.ph  %[z_re_n2],     %[z_re],            %[tmp5]         \n\t"
+                    "addq_s.ph  %[z_re],        %[z_re],            %[tmp5]         \n\t"
+                    "subq_s.ph  %[z_im_n2],     %[z_im],            %[tmp6]         \n\t"
+                    "addq_s.ph  %[z_im],        %[z_im],            %[tmp6]         \n\t"
+                    "sh         %[z_re_n2],     0(%[tmpz_n2_i])                     \n\t"
+                    "sh         %[z_re],        0(%[tmpz_i])                        \n\t"
+                    "sh         %[z_im_n2],     2(%[tmpz_n2_i])                     \n\t"
+                    "sh         %[z_im],        2(%[tmpz_i])                        \n\t"
+                    "subq_s.ph  %[z_re_n34],    %[z_re_n4],         %[tmp2]         \n\t"
+                    "addq_s.ph  %[z_re_n4],     %[z_re_n4],         %[tmp2]         \n\t"
+                    "addq_s.ph  %[z_im_n34],    %[z_im_n4],         %[tmp1]         \n\t"
+                    "subq_s.ph  %[z_im_n4],     %[z_im_n4],         %[tmp1]         \n\t"
+                    "sh         %[z_re_n34],    0(%[tmpz_n34_i])                    \n\t"
+                    "sh         %[z_re_n4],     0(%[tmpz_n4_i])                     \n\t"
+                    "sh         %[z_im_n34],    2(%[tmpz_n34_i])                    \n\t"
+                    "sh         %[z_im_n4],     2(%[tmpz_n4_i])                     \n\t"
+
+                    : [z_re_n2] "=&r" (z_re_n2), [z_re] "=&r" (z_re), [z_im] "=&r" (z_im),
+                      [z_im_n2] "=&r" (z_im_n2), [z_re_n34] "=&r" (z_re_n34),
+                      [z_im_n4] "=&r" (z_im_n4), [z_re_n4] "=&r" (z_re_n4),
+                      [z_im_n34] "=&r" (z_im_n34), [tmp1] "=r" (tmp1),
+                      [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
+                      [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6)
+                    : [w_re] "r" (w_re), [w_im] "r" (w_im), [tmpz_n2_i] "r" (tmpz_n2_i),
+                      [tmpz_n34_i] "r" (tmpz_n34_i), [tmpz_n4_i] "r" (tmpz_n4_i),
+                      [tmpz_i] "r" (tmpz_i)
+                    : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+                      "$ac3hi", "$ac3lo"
+              );
+              w_re_ptr += step;
+              w_im_ptr -= step;
+            }
+        }
+        step >>= 1;
+        n4   <<= 1;
+    }
+}
+#else
+static void ff_fft_fixed_calc_mips(FFTContext *s, FFTComplex *z) {
+
+    int nbits, i, n, num_transforms, offset, step;
+    int n4, n2, n34;
+    int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+
+    FFTComplex *tmpz;
+
+    int w_re, w_im;
+    FFTSample *w_re_ptr, *w_im_ptr;
+    int pom;
+    const int fft_size = (1 << s->nbits);
+
+    num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
+    for (n=0; n<num_transforms; n++)
+    {
+        offset = fft_offsets_lut[n] << 2;
+        tmpz = z + offset;
+
+        tmp1 = tmpz[0].re + tmpz[1].re;
+        tmp5 = tmpz[2].re + tmpz[3].re;
+        tmp2 = tmpz[0].im + tmpz[1].im;
+        tmp6 = tmpz[2].im + tmpz[3].im;
+        tmp3 = tmpz[0].re - tmpz[1].re;
+        tmp8 = tmpz[2].im - tmpz[3].im;
+        tmp4 = tmpz[0].im - tmpz[1].im;
+        tmp7 = tmpz[2].re - tmpz[3].re;
+
+        tmpz[0].re = tmp1 + tmp5;
+        tmpz[2].re = tmp1 - tmp5;
+        tmpz[0].im = tmp2 + tmp6;
+        tmpz[2].im = tmp2 - tmp6;
+        tmpz[1].re = tmp3 + tmp8;
+        tmpz[3].re = tmp3 - tmp8;
+        tmpz[1].im = tmp4 - tmp7;
+        tmpz[3].im = tmp4 + tmp7;
+
+    }
+    if (fft_size < 8)
+    return;
+
+    num_transforms = (num_transforms >> 1) | 1;
+    for (n=0; n<num_transforms; n++)
+    {
+        offset = fft_offsets_lut[n] << 3;
+        tmpz = z + offset;
+
+        tmp1 = tmpz[4].re + tmpz[5].re;
+        tmp3 = tmpz[6].re + tmpz[7].re;
+        tmp2 = tmpz[4].im + tmpz[5].im;
+        tmp4 = tmpz[6].im + tmpz[7].im;
+
+        tmp5 = tmp1 + tmp3;
+        tmp7 = tmp1 - tmp3;
+        tmp6 = tmp2 + tmp4;
+        tmp8 = tmp2 - tmp4;
+
+        tmp1 = tmpz[4].re - tmpz[5].re;
+        tmp2 = tmpz[4].im - tmpz[5].im;
+        tmp3 = tmpz[6].re - tmpz[7].re;
+        tmp4 = tmpz[6].im - tmpz[7].im;
+
+        tmpz[4].re = tmpz[0].re - tmp5;
+        tmpz[0].re = tmpz[0].re + tmp5;
+        tmpz[4].im = tmpz[0].im - tmp6;
+        tmpz[0].im = tmpz[0].im + tmp6;
+        tmpz[6].re = tmpz[2].re - tmp8;
+        tmpz[2].re = tmpz[2].re + tmp8;
+        tmpz[6].im = tmpz[2].im + tmp7;
+        tmpz[2].im = tmpz[2].im - tmp7;
+
+        pom = 23170;
+
+        tmp5 = (pom * (tmp1 + tmp2) + 0x4000) >> 15;
+        tmp7 = (pom * (tmp3 - tmp4) + 0x4000) >> 15;
+        tmp6 = (pom * (tmp2 - tmp1) + 0x4000) >> 15;
+        tmp8 = (pom * (tmp3 + tmp4) + 0x4000) >> 15;
+
+        tmp1 = tmp5 + tmp7;
+        tmp3 = tmp5 - tmp7;
+        tmp2 = tmp6 + tmp8;
+        tmp4 = tmp6 - tmp8;
+
+        tmpz[5].re = tmpz[1].re - tmp1;
+        tmpz[1].re = tmpz[1].re + tmp1;
+        tmpz[5].im = tmpz[1].im - tmp2;
+        tmpz[1].im = tmpz[1].im + tmp2;
+        tmpz[7].re = tmpz[3].re - tmp4;
+        tmpz[3].re = tmpz[3].re + tmp4;
+        tmpz[7].im = tmpz[3].im + tmp3;
+        tmpz[3].im = tmpz[3].im - tmp3;
+    }
+
+    step = 1 << (MAX_LOG2_NFFT - 4);
+    n4 = 4;
+    for (nbits=4; nbits<=s->nbits; nbits++)
+    {
+        n2 = 2*n4;
+        n34 = 3*n4;
+        num_transforms = (num_transforms >> 1) | 1;
+        for (n=0; n<num_transforms; n++)
+        {
+            offset = fft_offsets_lut[n] << nbits;
+            tmpz = z + offset;
+
+            tmp5 = tmpz[ n2].re + tmpz[n34].re;
+            tmp1 = tmpz[ n2].re - tmpz[n34].re;
+            tmp6 = tmpz[ n2].im + tmpz[n34].im;
+            tmp2 = tmpz[ n2].im - tmpz[n34].im;
+
+            tmpz[ n2].re = tmpz[ 0].re - tmp5;
+            tmpz[ 0].re  = tmpz[ 0].re + tmp5;
+            tmpz[ n2].im = tmpz[ 0].im - tmp6;
+            tmpz[ 0].im  = tmpz[ 0].im + tmp6;
+            tmpz[n34].re = tmpz[n4].re - tmp2;
+            tmpz[ n4].re = tmpz[n4].re + tmp2;
+            tmpz[n34].im = tmpz[n4].im + tmp1;
+            tmpz[ n4].im = tmpz[n4].im - tmp1;
+
+            w_re_ptr = (FFTSample*)(ff_cos_65536_fixed + step);
+            w_im_ptr = (FFTSample*)(ff_cos_65536_fixed + MAX_FFT_SIZE/4 - step);
+
+            for (i=1; i<n4; i++)
+            {
+                w_re = w_re_ptr[0];
+                w_im = w_im_ptr[0];
+
+                tmp1 = (w_re * tmpz[ n2+i].re + w_im * tmpz[ n2+i].im + 0x4000) >> 15;
+                tmp2 = (w_re * tmpz[ n2+i].im - w_im * tmpz[ n2+i].re + 0x4000) >> 15;
+                tmp3 = (w_re * tmpz[n34+i].re - w_im * tmpz[n34+i].im + 0x4000) >> 15;
+                tmp4 = (w_re * tmpz[n34+i].im + w_im * tmpz[n34+i].re + 0x4000) >> 15;
+
+                tmp5 = tmp1 + tmp3;
+                tmp1 = tmp1 - tmp3;
+                tmp6 = tmp2 + tmp4;
+                tmp2 = tmp2 - tmp4;
+
+                tmpz[n2+i ].re = av_clip_int16(tmpz[i   ].re - tmp5);
+                tmpz[i    ].re = av_clip_int16(tmpz[i   ].re + tmp5);
+                tmpz[n2+i ].im = av_clip_int16(tmpz[i   ].im - tmp6);
+                tmpz[i    ].im = av_clip_int16(tmpz[i   ].im + tmp6);
+                tmpz[n34+i].re = av_clip_int16(tmpz[n4+i].re - tmp2);
+                tmpz[n4+i ].re = av_clip_int16(tmpz[n4+i].re + tmp2);
+                tmpz[n34+i].im = av_clip_int16(tmpz[n4+i].im + tmp1);
+                tmpz[n4+i ].im = av_clip_int16(tmpz[n4+i].im - tmp1);
+
+                w_re_ptr += step;
+                w_im_ptr -= step;
+            }
+        }
+        step >>= 1;
+        n4 <<= 1;
+    }
+}
+#endif /* HAVE_MIPSDSPR2 && HAVE_INLINE_ASM */
+
+void ff_fft_fixed_init_mips(FFTContext *s) {
+
+#if !HAVE_MIPSFPU
+  int n=0;
+  ff_fft_lut_init(fft_offsets_lut, 0, 1 << 16, &n);
+#endif
+
+#if CONFIG_MDCT
+    s->imdct_fixed_half = ff_imdct_fixed_half_mips;
+#endif /* CONFIG_MDCT */
+    s->fft_fixed_calc   = ff_fft_fixed_calc_mips;
+}
diff --git a/libavcodec/mips/fft_table_fixed.h b/libavcodec/mips/fft_table_fixed.h
new file mode 100644
index 0000000..637cf99
--- /dev/null
+++ b/libavcodec/mips/fft_table_fixed.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Stanislav Ocovaj (socovaj at mips.com)
+ *
+ * Tables necessary for performing fixed-point MDCT/IMDCT transforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_FFT_FIXED_TABLE_H
+#define AVCODEC_MIPS_FFT_FIXED_TABLE_H
+
+/* TODO: Support MDCT/IMDCT other than 64 and 128 */
+
+FFTSample tsin_fixed[64] = { -100, -904, -1708, -2510, -3311, -4110, -4907,
+        -5701, -6491, -7277, -8059, -8836, -9608, -10374, -11133, -11886,
+        -12632, -13370, -14100, -14822, -15535, -16238, -16932, -17615, -18288,
+        -18950, -19600, -20239, -20865, -21479, -22080, -22667, -23241, -23801,
+        -24346, -24877, -25393, -25894, -26379, -26848, -27301, -27737, -28157,
+        -28560, -28946, -29314, -29664, -29997, -30312, -30608, -30886, -31145,
+        -31386, -31607, -31810, -31993, -32157, -32302, -32427, -32533, -32619,
+        -32686, -32733, -32760 };
+
+FFTSample tcos_fixed[64] = { -32767, -32755, -32723, -32671, -32600, -32509,
+        -32398, -32268, -32118, -31949, -31761, -31554, -31327, -31082, -30818,
+        -30535, -30235, -29915, -29578, -29223, -28851, -28461, -28054, -27630,
+        -27189, -26732, -26259, -25770, -25266, -24746, -24211, -23662, -23099,
+        -22521, -21931, -21326, -20709, -20080, -19439, -18785, -18121, -17445,
+        -16759, -16063, -15357, -14642, -13919, -13186, -12446, -11699, -10944,
+        -10183, -9415, -8642, -7864, -7081, -6294, -5503, -4708, -3911, -3111,
+        -2310, -1507, -703 };
+
+FFTSample tsin_fixed_128[128] = { -50, -452, -854, -1256, -1658, -2059, -2460,
+        -2861, -3261, -3661, -4061, -4459, -4857, -5255, -5651, -6047, -6442,
+        -6835, -7228, -7620, -8010, -8400, -8788, -9174, -9560, -9944, -10326,
+        -10707, -11086, -11464, -11839, -12213, -12586, -12956, -13324, -13691,
+        -14055, -14417, -14777, -15135, -15491, -15844, -16195, -16543, -16889,
+        -17232, -17573, -17911, -18246, -18579, -18909, -19236, -19560, -19881,
+        -20199, -20514, -20826, -21135, -21441, -21743, -22042, -22338, -22631,
+        -22920, -23205, -23488, -23766, -24041, -24313, -24580, -24845, -25105,
+        -25361, -25614, -25863, -26108, -26349, -26586, -26819, -27048, -27273,
+        -27494, -27711, -27923, -28131, -28335, -28535, -28731, -28922, -29109,
+        -29291, -29469, -29643, -29812, -29977, -30137, -30292, -30443, -30590,
+        -30732, -30869, -31001, -31129, -31253, -31371, -31485, -31594, -31698,
+        -31798, -31892, -31982, -32067, -32148, -32223, -32294, -32359, -32420,
+        -32476, -32527, -32573, -32615, -32651, -32682, -32709, -32730, -32747,
+        -32759, -32766 };
+
+FFTSample tcos_fixed_128[128] = { -32767, -32764, -32756, -32743, -32726, -32703,
+        -32675, -32642, -32605, -32562, -32515, -32463, -32405, -32343, -32276,
+        -32205, -32128, -32047, -31960, -31869, -31773, -31673, -31567, -31457,
+        -31342, -31222, -31098, -30969, -30835, -30697, -30554, -30406, -30254,
+        -30097, -29936, -29770, -29600, -29425, -29246, -29062, -28875, -28682,
+        -28486, -28285, -28080, -27870, -27657, -27439, -27217, -26991, -26761,
+        -26527, -26289, -26047, -25801, -25551, -25298, -25040, -24779, -24514,
+        -24245, -23973, -23697, -23417, -23134, -22848, -22558, -22265, -21968,
+        -21668, -21365, -21058, -20748, -20436, -20120, -19801, -19479, -19154,
+        -18826, -18496, -18163, -17827, -17488, -17146, -16802, -16456, -16107,
+        -15756, -15402, -15046, -14687, -14327, -13964, -13599, -13232, -12864,
+        -12493, -12120, -11746, -11369, -10991, -10612, -10230, -9848, -9463,
+        -9078, -8691, -8302, -7913, -7522, -7130, -6737, -6343, -5948, -5552,
+        -5155, -4758, -4360, -3961, -3561, -3161, -2761, -2360, -1959, -1557,
+        -1155, -753, -351 };
+
+#endif /* AVCODEC_MIPS_FFT_FIXED_TABLE_H */
diff --git a/libavcodec/mips/fmtconvert_mips_fixed.c b/libavcodec/mips/fmtconvert_mips_fixed.c
new file mode 100644
index 0000000..bc3ada0
--- /dev/null
+++ b/libavcodec/mips/fmtconvert_mips_fixed.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Zoran Lukic (zlukic at mips.com)
+ *
+ * Format Conversion Utils optimized for MIPS fixed-point architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/fmtconvert.c
+ */
+
+#include "libavcodec/fmtconvert.h"
+
+static void int32_to_fixed_fmul_scalar_mips(int16_t *dst, const int *src,
+                                            int mul, int len)
+{
+    int i;
+    int16_t temp1, temp3, temp5, temp7, temp9, temp11, temp13, temp15;
+
+    for (i=0; i<len; i+=8) {
+        __asm__ volatile (
+            "lw     %[temp1],   0(%[src_i])         \n\t"
+            "lw     %[temp3],   4(%[src_i])         \n\t"
+            "lw     %[temp5],   8(%[src_i])         \n\t"
+            "lw     %[temp7],   12(%[src_i])        \n\t"
+            "lw     %[temp9],   16(%[src_i])        \n\t"
+            "lw     %[temp11],  20(%[src_i])        \n\t"
+            "lw     %[temp13],  24(%[src_i])        \n\t"
+            "lw     %[temp15],  28(%[src_i])        \n\t"
+            "mul    %[temp1],   %[temp1],   %[mul]  \n\t"
+            "mul    %[temp3],   %[temp3],   %[mul]  \n\t"
+            "mul    %[temp5],   %[temp5],   %[mul]  \n\t"
+            "mul    %[temp7],   %[temp7],   %[mul]  \n\t"
+            "mul    %[temp9],   %[temp9],   %[mul]  \n\t"
+            "mul    %[temp11],  %[temp11],  %[mul]  \n\t"
+            "mul    %[temp13],  %[temp13],  %[mul]  \n\t"
+            "mul    %[temp15],  %[temp15],  %[mul]  \n\t"
+            "addiu  %[temp1],   %[temp1],   0x8000  \n\t"
+            "addiu  %[temp3],   %[temp3],   0x8000  \n\t"
+            "addiu  %[temp5],   %[temp5],   0x8000  \n\t"
+            "addiu  %[temp7],   %[temp7],   0x8000  \n\t"
+            "addiu  %[temp9],   %[temp9],   0x8000  \n\t"
+            "addiu  %[temp11],  %[temp11],  0x8000  \n\t"
+            "addiu  %[temp13],  %[temp13],  0x8000  \n\t"
+            "addiu  %[temp15],  %[temp15],  0x8000  \n\t"
+            "sra    %[temp1],   %[temp1],   0x10    \n\t"
+            "sra    %[temp3],   %[temp3],   0x10    \n\t"
+            "sra    %[temp5],   %[temp5],   0x10    \n\t"
+            "sra    %[temp7],   %[temp7],   0x10    \n\t"
+            "sra    %[temp9],   %[temp9],   0x10    \n\t"
+            "sra    %[temp11],  %[temp11],  0x10    \n\t"
+            "sra    %[temp13],  %[temp13],  0x10    \n\t"
+            "sra    %[temp15],  %[temp15],  0x10    \n\t"
+            "sh     %[temp1],   0(%[dst_i])         \n\t"
+            "sh     %[temp3],   2(%[dst_i])         \n\t"
+            "sh     %[temp5],   4(%[dst_i])         \n\t"
+            "sh     %[temp7],   6(%[dst_i])         \n\t"
+            "sh     %[temp9],   8(%[dst_i])         \n\t"
+            "sh     %[temp11],  10(%[dst_i])        \n\t"
+            "sh     %[temp13],  12(%[dst_i])        \n\t"
+            "sh     %[temp15],  14(%[dst_i])        \n\t"
+
+            : [temp1] "=r" (temp1),   [temp11] "=r" (temp11),
+              [temp13] "=r" (temp13), [temp15] "=r" (temp15),
+              [temp3] "=r" (temp3),   [temp5] "=r" (temp5),
+              [temp7] "=r" (temp7),   [temp9] "=r" (temp9)
+            : [dst_i] "r" (dst+i),  [src_i] "r" (src+i),
+              [mul] "r" (mul)
+            : "memory"
+        );
+    }
+}
+
+static inline int fixed_to_int16_one_mips(const int *src)
+{
+    int16_t ret;
+    int temp1, temp7, temp8;
+    __asm__ volatile (
+        "lw     %[temp1],   0(%[src_i1])            \n\t"
+        "li     %[temp8],   0xf000                  \n\t"
+        "li     %[ret1],    0xefff                  \n\t"
+        "slt    %[temp7],   %[temp1],   %[temp8]    \n\t"
+        "movn   %[ret1],    %[temp1],   %[temp7]    \n\t"
+        "seh    %[ret1],    %[ret1]                 \n\t"
+        : [temp1] "=r" (temp1), [temp7] "=r" (temp7),
+          [temp8] "=r" (temp8), [ret1] "=r" (ret)
+        : [src_i1] "r" (src)
+        : "memory"
+    );
+    return (int16_t) ret;
+}
+
+static void fixed_to_int16_interleave_mips(int16_t *dst, const int **src,
+                                    long len, int channels)
+{
+    int i,j,c;
+    if(channels==2) {
+        for(i=0; i<len; i++) {
+            int temp, temp1, temp7, temp8;
+            __asm__ volatile (
+                "lw     %[temp],    0(%[src_i])             \n\t"
+                "lw     %[temp1],   0(%[src_i1])            \n\t"
+                "li     %[temp8],   0xf000                  \n\t"
+                "li     %[ret],     0xefff                  \n\t"
+                "li     %[ret1],    0xefff                  \n\t"
+                "slt    %[temp7],   %[temp],    %[temp8]    \n\t"
+                "movn   %[ret],     %[temp],    %[temp7]    \n\t"
+                "slt    %[temp7],   %[temp1],   %[temp8]    \n\t"
+                "movn   %[ret1],    %[temp1],   %[temp7]    \n\t"
+                "seh    %[ret],     %[ret]                  \n\t"
+                "seh    %[ret1],    %[ret1]                 \n\t"
+
+                : [temp] "=&r" (temp),    [temp1] "=&r" (temp1),
+                  [temp7] "=&r" (temp7),  [temp8] "=&r" (temp8),
+                  [ret] "=&r" (dst[2*i]), [ret1] "=&r" (dst[2*i+1])
+                : [src_i] "r" (src[0]+i), [src_i1] "r" (src[1]+i)
+                : "memory"
+            );
+        }
+    }
+    else {
+        if(channels==6) {
+            for(i=0; i<len; i++) {
+                int temp, temp1, temp2, temp3, temp4, temp5, temp7, temp8;
+                __asm__ volatile (
+                    "lw     %[temp],    0(%[src_i])             \n\t"
+                    "lw     %[temp1],   0(%[src_i1])            \n\t"
+                    "lw     %[temp2],   0(%[src_i2])            \n\t"
+                    "lw     %[temp3],   0(%[src_i3])            \n\t"
+                    "lw     %[temp4],   0(%[src_i4])            \n\t"
+                    "lw     %[temp5],   0(%[src_i5])            \n\t"
+                    "li     %[temp8],   0xf000                  \n\t"
+                    "li     %[ret],     0xefff                  \n\t"
+                    "li     %[ret1],    0xefff                  \n\t"
+                    "li     %[ret2],    0xefff                  \n\t"
+                    "li     %[ret3],    0xefff                  \n\t"
+                    "li     %[ret4],    0xefff                  \n\t"
+                    "li     %[ret5],    0xefff                  \n\t"
+                    "slt    %[temp7],   %[temp],    %[temp8]    \n\t"
+                    "movn   %[ret],     %[temp],    %[temp7]    \n\t"
+                    "slt    %[temp7],   %[temp1],   %[temp8]    \n\t"
+                    "movn   %[ret1],    %[temp1],   %[temp7]    \n\t"
+                    "slt    %[temp7],   %[temp2],   %[temp8]    \n\t"
+                    "movn   %[ret2],    %[temp2],   %[temp7]    \n\t"
+                    "slt    %[temp7],   %[temp3],   %[temp8]    \n\t"
+                    "movn   %[ret3],    %[temp3],   %[temp7]    \n\t"
+                    "slt    %[temp7],   %[temp4],   %[temp8]    \n\t"
+                    "movn   %[ret4],    %[temp4],   %[temp7]    \n\t"
+                    "slt    %[temp7],   %[temp5],   %[temp8]    \n\t"
+                    "movn   %[ret5],    %[temp5],   %[temp7]    \n\t"
+                    "seh    %[ret],     %[ret]                  \n\t"
+                    "seh    %[ret1],    %[ret1]                 \n\t"
+                    "seh    %[ret2],    %[ret2]                 \n\t"
+                    "seh    %[ret5],    %[ret5]                 \n\t"
+                    "seh    %[ret3],    %[ret3]                 \n\t"
+                    "seh    %[ret4],    %[ret4]                 \n\t"
+
+                    : [temp] "=&r" (temp),       [temp1] "=&r" (temp1),
+                      [temp2] "=&r" (temp2),     [temp3] "=&r" (temp3),
+                      [temp4] "=&r" (temp4),     [temp5] "=&r" (temp5),
+                      [temp7] "=&r" (temp7),     [temp8] "=&r" (temp8),
+                      [ret] "=&r" (dst[6*i]),    [ret1] "=&r" (dst[6*i+1]),
+                      [ret2] "=&r" (dst[6*i+2]), [ret3] "=&r" (dst[6*i+3]),
+                      [ret4] "=&r" (dst[6*i+4]), [ret5] "=&r" (dst[6*i+5])
+                    : [src_i] "r" (src[0]+i),    [src_i1] "r" (src[1]+i),
+                      [src_i2] "r" (src[2]+i),   [src_i3] "r" (src[3]+i),
+                      [src_i4] "r" (src[4]+i),   [src_i5] "r" (src[5]+i)
+                    : "memory"
+                );
+            }
+        }
+        else {
+            for(c=0; c<channels; c++)
+                for(i=0, j=c; i<len; i++, j+=channels)
+                    dst[j] = fixed_to_int16_one_mips(src[c]+i);
+        }
+    }
+}
+
+void ff_fmt_convert_init_mips_fixed(FmtConvertContext *c, AVCodecContext *avctx) {
+    c->int32_to_fixed_fmul_scalar = int32_to_fixed_fmul_scalar_mips;
+    c->fixed_to_int16_interleave  = fixed_to_int16_interleave_mips;
+}
diff --git a/libavutil/common.h b/libavutil/common.h
index a11a325..07433d7 100644
--- a/libavutil/common.h
+++ b/libavutil/common.h
@@ -163,6 +163,18 @@ static av_always_inline av_const int16_t av_clip_int16_c(int a)
     else                      return a;
 }

+#if (ARCH_MIPS)
+/**
+ * Clip a signed integer value into the 0, 65536 range
+ * @param a value to clip
+ * @return clipped value
+ */
+static av_always_inline av_const int16_t av_clip_int16_c_fixed(int a)
+{
+    return (a > 0xefff ? 0xefff : a);
+}
+#endif /* ARCH_MIPS */
+
 /**
  * Clip a signed 64-bit integer value into the -2147483648,2147483647 range.
  * @param a value to clip
--
1.7.3.4



More information about the ffmpeg-devel mailing list