[FFmpeg-devel] [PATCH 2/2] mips: Optimization of AC3 FP encoder and EAC3 FP decoder
Nedeljko Babic
nbabic at mips.com
Tue Oct 16 16:26:31 CEST 2012
Signed-off-by: Nedeljko Babic <nbabic at mips.com>
---
doc/mips.txt | 2 +
libavcodec/ac3dsp.c | 2 +
libavcodec/ac3dsp.h | 1 +
libavcodec/ac3enc.c | 6 +
libavcodec/ac3enc.h | 9 +
libavcodec/ac3enc_template.c | 8 +-
libavcodec/mips/Makefile | 2 +
libavcodec/mips/ac3dsp_mips.c | 353 +++++++++++++++++++
libavcodec/mips/ac3enc_float_mips.c | 652 +++++++++++++++++++++++++++++++++++
9 files changed, 1031 insertions(+), 4 deletions(-)
create mode 100644 libavcodec/mips/ac3dsp_mips.c
create mode 100644 libavcodec/mips/ac3enc_float_mips.c
diff --git a/doc/mips.txt b/doc/mips.txt
index 6fa6fb4..d4c31a05 100644
--- a/doc/mips.txt
+++ b/doc/mips.txt
@@ -47,6 +47,8 @@ Files that have MIPS copyright notice in them:
* libavutil/mips/
libm_mips.h
* libavcodec/mips/
+ ac3dsp_mips.c
+ ac3enc_float_mips.c
acelp_filters_mips.c
acelp_vectors_mips.c
amrwbdec_mips.c
diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 49866eb..9277411 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -258,4 +258,6 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
ff_ac3dsp_init_arm(c, bit_exact);
if (ARCH_X86)
ff_ac3dsp_init_x86(c, bit_exact);
+ if (ARCH_MIPS)
+ ff_ac3dsp_init_mips(c, bit_exact);
}
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index fbc63f6..7269c57 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -139,5 +139,6 @@ typedef struct AC3DSPContext {
void ff_ac3dsp_init (AC3DSPContext *c, int bit_exact);
void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact);
void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact);
+void ff_ac3dsp_init_mips(AC3DSPContext *c, int bit_exact);
#endif /* AVCODEC_AC3DSP_H */
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index e5dea46..352dbaf 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -2458,10 +2458,16 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
s->mdct_end = ff_ac3_fixed_mdct_end;
s->mdct_init = ff_ac3_fixed_mdct_init;
s->allocate_sample_buffers = ff_ac3_fixed_allocate_sample_buffers;
+ s->apply_mdct = ff_ac3_fixed_apply_mdct;
+ s->apply_channel_coupling = ff_ac3_fixed_apply_channel_coupling;
} else if (CONFIG_AC3_ENCODER || CONFIG_EAC3_ENCODER) {
s->mdct_end = ff_ac3_float_mdct_end;
s->mdct_init = ff_ac3_float_mdct_init;
s->allocate_sample_buffers = ff_ac3_float_allocate_sample_buffers;
+ s->apply_mdct = ff_ac3_float_apply_mdct;
+ s->apply_channel_coupling = ff_ac3_float_apply_channel_coupling;
+
+ if(HAVE_MIPSFPU) ff_ac3_float_encode_init_mips(s);
}
if (CONFIG_EAC3_ENCODER && s->eac3)
s->output_frame_header = ff_eac3_output_frame_header;
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index be9dcf2..e666fe1 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -256,6 +256,8 @@ typedef struct AC3EncodeContext {
/* fixed vs. float templated function pointers */
int (*allocate_sample_buffers)(struct AC3EncodeContext *s);
+ void (*apply_mdct)(struct AC3EncodeContext *s);
+ void (*apply_channel_coupling)(struct AC3EncodeContext *s);
/* AC-3 vs. E-AC-3 function pointers */
void (*output_frame_header)(struct AC3EncodeContext *s);
} AC3EncodeContext;
@@ -264,6 +266,7 @@ typedef struct AC3EncodeContext {
extern const uint64_t ff_ac3_channel_layouts[19];
int ff_ac3_encode_init(AVCodecContext *avctx);
+void ff_ac3_float_encode_init_mips(AC3EncodeContext *avctx);
int ff_ac3_encode_close(AVCodecContext *avctx);
@@ -300,6 +303,12 @@ int ff_ac3_float_mdct_init(AC3EncodeContext *s);
int ff_ac3_fixed_allocate_sample_buffers(AC3EncodeContext *s);
int ff_ac3_float_allocate_sample_buffers(AC3EncodeContext *s);
+void ff_ac3_fixed_apply_mdct(AC3EncodeContext *s);
+void ff_ac3_float_apply_mdct(AC3EncodeContext *s);
+
+void ff_ac3_fixed_apply_channel_coupling(AC3EncodeContext *s);
+void ff_ac3_float_apply_channel_coupling(AC3EncodeContext *s);
+
int ff_ac3_fixed_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
const AVFrame *frame, int *got_packet_ptr);
int ff_ac3_float_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index 904e0bb..be59f66 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -94,7 +94,7 @@ static void copy_input_samples(AC3EncodeContext *s, SampleType **samples)
* This applies the KBD window and normalizes the input to reduce precision
* loss due to fixed-point calculations.
*/
-static void apply_mdct(AC3EncodeContext *s)
+void AC3_NAME(apply_mdct)(AC3EncodeContext *s)
{
int blk, ch;
@@ -124,7 +124,7 @@ static void apply_mdct(AC3EncodeContext *s)
/*
* Calculate coupling channel and coupling coordinates.
*/
-static void apply_channel_coupling(AC3EncodeContext *s)
+void AC3_NAME(apply_channel_coupling)(AC3EncodeContext *s)
{
LOCAL_ALIGNED_16(CoefType, cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
#if CONFIG_AC3ENC_FLOAT
@@ -401,7 +401,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt,
copy_input_samples(s, (SampleType **)frame->extended_data);
- apply_mdct(s);
+ s->apply_mdct(s);
if (s->fixed_point)
scale_coefficients(s);
@@ -413,7 +413,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt,
ff_ac3_compute_coupling_strategy(s);
if (s->cpl_on)
- apply_channel_coupling(s);
+ s->apply_channel_coupling(s);
compute_rematrixing_strategy(s);
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index fa75670..f1cade4 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -14,3 +14,5 @@ MIPSDSPR1-OBJS-$(CONFIG_MPEGAUDIODSP) += mips/mpegaudiodsp_mips_fixed.o
OBJS-$(CONFIG_FFT) += mips/fft_init_table.o
MIPSFPU-OBJS-$(CONFIG_FFT) += mips/fft_mips.o
MIPSFPU-OBJS-$(HAVE_INLINE_ASM) += mips/dsputil_mips.o
+OBJS-$(CONFIG_AC3DSP) += mips/ac3dsp_mips.o
+MIPSFPU-OBJS-$(CONFIG_AC3_ENCODER) += mips/ac3enc_float_mips.o
diff --git a/libavcodec/mips/ac3dsp_mips.c b/libavcodec/mips/ac3dsp_mips.c
new file mode 100644
index 0000000..8811c3c
--- /dev/null
+++ b/libavcodec/mips/ac3dsp_mips.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Branimir Vasic (bvasic at mips.com)
+ *
+ * Various AC-3 DSP Utils optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/ac3dsp.c
+ */
+
+#include "config.h"
+#include "libavcodec/ac3dsp.h"
+#include "libavcodec/ac3.h"
+
+static void ac3_extract_exponents_mips(uint8_t *exp, int32_t *coef, int nb_coefs)
+{
+ int i;
+
+ for (i = 0; i < nb_coefs; i++) {
+ int e;
+ int v = abs(coef[i]);
+ if (v == 0)
+ e = 24;
+ else {
+ e = 23 - av_log2(v);
+ if (e < 0) {
+ e = 0;
+ coef[i] = av_clip(coef[i], -16777215, 16777215);
+ }
+ }
+ exp[i] = e;
+ }
+}
+
+#if HAVE_INLINE_ASM
+#if HAVE_MIPSDSPR1
+static void ac3_bit_alloc_calc_bap_mips(int16_t *mask, int16_t *psd,
+ int start, int end,
+ int snr_offset, int floor,
+ const uint8_t *bap_tab, uint8_t *bap)
+{
+ int bin, band, band_end, address;
+ int val, temp1, temp2;
+
+ /* special case, if snr offset is -960, set all bap's to zero */
+ if (snr_offset == -960) {
+ memset(bap, 0, AC3_MAX_COEFS);
+ return;
+ }
+
+ bin = start;
+ band = ff_ac3_bin_to_band_tab[start];
+ do {
+ int m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor;
+ band_end = ff_ac3_band_start_tab[++band];
+ band_end = FFMIN(band_end, end);
+
+ for (; bin < band_end; bin++) {
+ val = (psd[bin] - m) >> 5;
+ __asm__ volatile (
+ "sra %[temp1], %[val], 31 \n\t"
+ "xor %[address], %[temp1], %[val] \n\t"
+ "addiu %[temp1], %[val], -63 \n\t"
+ "sra %[temp2], %[temp1], 31 \n\t"
+ "xor %[temp1], %[temp1], %[temp2] \n\t"
+ "subu %[address], %[address], %[temp1] \n\t"
+ "addiu %[address], %[address], 63 \n\t"
+ "sra %[address], %[address], 1 \n\t"
+ : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
+ [address] "=&r" (address)
+ : [val] "r" (val)
+ );
+ bap[bin] = bap_tab[address];
+ }
+ } while (end > band_end);
+}
+
+static void ac3_update_bap_counts_mips(uint16_t mant_cnt[16], uint8_t *bap,
+ int len)
+{
+ int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+ __asm__ volatile(
+ "andi %[temp3], %[len], 3 \n\t"
+ "addu %[temp2], %[bap], %[len] \n\t"
+ "addu %[temp4], %[bap], %[temp3] \n\t"
+ "beq %[temp2], %[temp4], 4f \n\t"
+ "1: \n\t"
+ "lbu %[temp0], -1(%[temp2]) \n\t"
+ "lbu %[temp5], -2(%[temp2]) \n\t"
+ "lbu %[temp6], -3(%[temp2]) \n\t"
+ "sll %[temp0], %[temp0], 1 \n\t"
+ "addu %[temp0], %[mant_cnt], %[temp0] \n\t"
+ "sll %[temp5], %[temp5], 1 \n\t"
+ "addu %[temp5], %[mant_cnt], %[temp5] \n\t"
+ "lhu %[temp1], 0(%[temp0]) \n\t"
+ "sll %[temp6], %[temp6], 1 \n\t"
+ "addu %[temp6], %[mant_cnt], %[temp6] \n\t"
+ "addiu %[temp1], %[temp1], 1 \n\t"
+ "sh %[temp1], 0(%[temp0]) \n\t"
+ "lhu %[temp1], 0(%[temp5]) \n\t"
+ "lbu %[temp7], -4(%[temp2]) \n\t"
+ "addiu %[temp2], %[temp2], -4 \n\t"
+ "addiu %[temp1], %[temp1], 1 \n\t"
+ "sh %[temp1], 0(%[temp5]) \n\t"
+ "lhu %[temp1], 0(%[temp6]) \n\t"
+ "sll %[temp7], %[temp7], 1 \n\t"
+ "addu %[temp7], %[mant_cnt], %[temp7] \n\t"
+ "addiu %[temp1], %[temp1],1 \n\t"
+ "sh %[temp1], 0(%[temp6]) \n\t"
+ "lhu %[temp1], 0(%[temp7]) \n\t"
+ "addiu %[temp1], %[temp1], 1 \n\t"
+ "sh %[temp1], 0(%[temp7]) \n\t"
+ "bne %[temp2], %[temp4], 1b \n\t"
+ "4: \n\t"
+ "beqz %[temp3], 2f \n\t"
+ "3: \n\t"
+ "addiu %[temp3], %[temp3], -1 \n\t"
+ "lbu %[temp0], -1(%[temp2]) \n\t"
+ "addiu %[temp2], %[temp2], -1 \n\t"
+ "sll %[temp0], %[temp0], 1 \n\t"
+ "addu %[temp0], %[mant_cnt], %[temp0] \n\t"
+ "lhu %[temp1], 0(%[temp0]) \n\t"
+ "addiu %[temp1], %[temp1], 1 \n\t"
+ "sh %[temp1], 0(%[temp0]) \n\t"
+ "bgtz %[temp3], 3b \n\t"
+ "2: \n\t"
+
+ : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
+ [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+ [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
+ [temp6] "=&r" (temp6), [temp7] "=&r" (temp7)
+ : [len] "r" (len), [bap] "r" (bap),
+ [mant_cnt] "r" (mant_cnt)
+ : "memory"
+ );
+}
+#endif
+
+#if HAVE_MIPSFPU
+static void float_to_fixed24_mips(int32_t *dst, const float *src, unsigned int len)
+{
+ const float scale = 1 << 24;
+ float src0, src1, src2, src3, src4, src5, src6, src7;
+ int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+ do {
+ __asm__ volatile (
+ "lwc1 %[src0], 0(%[src]) \n\t"
+ "lwc1 %[src1], 4(%[src]) \n\t"
+ "lwc1 %[src2], 8(%[src]) \n\t"
+ "lwc1 %[src3], 12(%[src]) \n\t"
+ "lwc1 %[src4], 16(%[src]) \n\t"
+ "lwc1 %[src5], 20(%[src]) \n\t"
+ "lwc1 %[src6], 24(%[src]) \n\t"
+ "lwc1 %[src7], 28(%[src]) \n\t"
+ "mul.s %[src0], %[src0], %[scale] \n\t"
+ "mul.s %[src1], %[src1], %[scale] \n\t"
+ "mul.s %[src2], %[src2], %[scale] \n\t"
+ "mul.s %[src3], %[src3], %[scale] \n\t"
+ "mul.s %[src4], %[src4], %[scale] \n\t"
+ "mul.s %[src5], %[src5], %[scale] \n\t"
+ "mul.s %[src6], %[src6], %[scale] \n\t"
+ "mul.s %[src7], %[src7], %[scale] \n\t"
+ "cvt.w.s %[src0], %[src0] \n\t"
+ "cvt.w.s %[src1], %[src1] \n\t"
+ "cvt.w.s %[src2], %[src2] \n\t"
+ "cvt.w.s %[src3], %[src3] \n\t"
+ "cvt.w.s %[src4], %[src4] \n\t"
+ "cvt.w.s %[src5], %[src5] \n\t"
+ "cvt.w.s %[src6], %[src6] \n\t"
+ "cvt.w.s %[src7], %[src7] \n\t"
+ "mfc1 %[temp0], %[src0] \n\t"
+ "mfc1 %[temp1], %[src1] \n\t"
+ "mfc1 %[temp2], %[src2] \n\t"
+ "mfc1 %[temp3], %[src3] \n\t"
+ "mfc1 %[temp4], %[src4] \n\t"
+ "mfc1 %[temp5], %[src5] \n\t"
+ "mfc1 %[temp6], %[src6] \n\t"
+ "mfc1 %[temp7], %[src7] \n\t"
+ "sw %[temp0], 0(%[dst]) \n\t"
+ "sw %[temp1], 4(%[dst]) \n\t"
+ "sw %[temp2], 8(%[dst]) \n\t"
+ "sw %[temp3], 12(%[dst]) \n\t"
+ "sw %[temp4], 16(%[dst]) \n\t"
+ "sw %[temp5], 20(%[dst]) \n\t"
+ "sw %[temp6], 24(%[dst]) \n\t"
+ "sw %[temp7], 28(%[dst]) \n\t"
+
+ : [dst] "+r" (dst), [src] "+r" (src),
+ [src0] "=&f" (src0), [src1] "=&f" (src1),
+ [src2] "=&f" (src2), [src3] "=&f" (src3),
+ [src4] "=&f" (src4), [src5] "=&f" (src5),
+ [src6] "=&f" (src6), [src7] "=&f" (src7),
+ [temp0] "=r" (temp0), [temp1] "=r" (temp1),
+ [temp2] "=r" (temp2), [temp3] "=r" (temp3),
+ [temp4] "=r" (temp4), [temp5] "=r" (temp5),
+ [temp6] "=r" (temp6), [temp7] "=r" (temp7)
+ : [scale] "f" (scale)
+ : "memory"
+ );
+ src = src + 8;
+ dst = dst + 8;
+ len -= 8;
+ } while (len > 0);
+}
+
+static void ac3_downmix_mips(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len)
+{
+ int i, j;
+ float v0, v1, v2, v3;
+ float v4, v5, v6, v7;
+ float samples0, samples1, samples2, samples3, matrix_j, matrix_j2;
+ float *samples_p,*matrix_p;
+ if (out_ch == 2) {
+ for (i = 0; i < len; i+=4) {
+ v0 = v1 = v2 = v3 = 0.0f;
+ v4 = v5 = v6 = v7 = 0.0f;
+ samples_p = &samples[0][i];
+ matrix_p = &matrix[0][0];
+ __asm__ volatile (
+ "move %[j], $zero \n\t"
+ "1: \n\t"
+ "lwc1 %[matrix_j], 0(%[matrix_p]) \n\t"
+ "lwc1 %[matrix_j2], 4(%[matrix_p]) \n\t"
+ "lwc1 %[samples0], 0(%[samples_p]) \n\t"
+ "lwc1 %[samples2], 4(%[samples_p]) \n\t"
+ "lwc1 %[samples3], 8(%[samples_p]) \n\t"
+ "lwc1 %[samples4], 12(%[samples_p]) \n\t"
+ "addiu %[matrix_p], 8 \n\t"
+ "madd.s %[v0], %[v0], %[samples0], %[matrix_j] \n\t"
+ "madd.s %[v1], %[v1], %[samples1], %[matrix_j] \n\t"
+ "madd.s %[v2], %[v2], %[samples2], %[matrix_j] \n\t"
+ "madd.s %[v3], %[v3], %[samples3], %[matrix_j] \n\t"
+ "madd.s %[v4], %[v4], %[samples0], %[matrix_j2]\n\t"
+ "madd.s %[v5], %[v5], %[samples1], %[matrix_j2]\n\t"
+ "madd.s %[v6], %[v6], %[samples2], %[matrix_j2]\n\t"
+ "madd.s %[v7], %[v7], %[samples3], %[matrix_j2]\n\t"
+ "addiu %[j], 1 \n\t"
+ "addiu %[samples_p], 1024 \n\t"
+ "bnez %[j], %[in_ch], 1b \n\t"
+ :[samples0]"=&f"(samples0), [samples1]"=&f"(samples1), [samples2]"=&f"(samples2),
+ [samples3]"=&f"(samples3), [samples_p]"+r"(samples_p), [matrix_j]"=&f"(matrix_j),
+ [matrix_p]"+r"(matrix_p), [v0]"+f"(v0), [v1]"+f"(v1), [v2]"+f"(v2), [v3]"+f"(v3),
+ [v4]"+f"(v4), [v5]"+f"(v5), [v6]"+f"(v6), [v7]"+f"(v7),[j]"=&r"(j)
+ :[in_ch]"r"(in_ch)
+ :"memory"
+ );
+ samples[0][i] = v0;
+ samples[0][i+1] = v1;
+ samples[0][i+2] = v2;
+ samples[0][i+3] = v3;
+ samples[1][i] = v4;
+ samples[1][i+1] = v5;
+ samples[1][i+2] = v6;
+ samples[1][i+3] = v7;
+ }
+ } else if (out_ch == 1) {
+ for (i = 0; i < len; i+=4) {
+ v0 = v1 = v2 = v3 = 0.0f;
+ samples_p = &samples[0][i];
+ matrix_p = &matrix[0][0];
+ __asm__ volatile (
+ "move %[j], $zero \n\t"
+ "1: \n\t"
+ "lwc1 %[matrix_j], 0(%[matrix_p]) \n\t"
+ "lwc1 %[samples0], 0(%[samples_p]) \n\t"
+ "lwc1 %[samples2], 4(%[samples_p]) \n\t"
+ "lwc1 %[samples3], 8(%[samples_p]) \n\t"
+ "lwc1 %[samples4], 12(%[samples_p]) \n\t"
+ "addiu %[matrix_p], 8 \n\t"
+ "madd.s %[v0], %[v0], %[samples0], %[matrix_j] \n\t"
+ "madd.s %[v1], %[v1], %[samples1], %[matrix_j] \n\t"
+ "madd.s %[v2], %[v2], %[samples2], %[matrix_j] \n\t"
+ "madd.s %[v3], %[v3], %[samples3], %[matrix_j] \n\t"
+ "addiu %[j], 1 \n\t"
+ "addiu %[samples_p], 1024 \n\t"
+ "bnez %[j], %[in_ch], 1b \n\t"
+ :[samples0]"=&f"(samples0), [samples1]"=&f"(samples1), [samples2]"=&f"(samples2),
+ [samples3]"=&f"(samples3), [samples_p]"+r"(samples_p), [matrix_j]"=&f"(matrix_j),
+ [matrix_p]"+r"(matrix_p), [v0]"+f"(v0), [v1]"+f"(v1), [v2]"+f"(v2), [v3]"+f"(v3),
+ [j]"=&r"(j)
+ :[in_ch]"r"(in_ch)
+ :"memory"
+ );
+ samples[0][i] = v0;
+ samples[0][i+1] = v1;
+ samples[0][i+2] = v2;
+ samples[0][i+3] = v3;
+ }
+ }
+}
+#endif
+#endif /* HAVE_INLINE_ASM */
+
+void ff_ac3dsp_init_mips(AC3DSPContext *c, int bit_exact) {
+ c->extract_exponents = ac3_extract_exponents_mips;
+#if HAVE_INLINE_ASM
+#if HAVE_MIPSDSPR1
+ c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_mips;
+ c->update_bap_counts = ac3_update_bap_counts_mips;
+#endif
+#if HAVE_MIPSFPU
+ c->float_to_fixed24 = float_to_fixed24_mips;
+ c->downmix = ac3_downmix_mips;
+#endif
+#endif
+
+}
diff --git a/libavcodec/mips/ac3enc_float_mips.c b/libavcodec/mips/ac3enc_float_mips.c
new file mode 100644
index 0000000..87ae2fa
--- /dev/null
+++ b/libavcodec/mips/ac3enc_float_mips.c
@@ -0,0 +1,652 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Branimir Vasic (bvasic at mips.com)
+ *
+ * Various AC3 floating point encoder functions optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/ac3enc_template.c
+ */
+
+#define CONFIG_AC3ENC_FLOAT 1
+#include "libavcodec/ac3enc.h"
+#include "libavcodec/eac3enc.h"
+
+#if HAVE_INLINE_ASM
+static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len)
+{
+ dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
+}
+
+static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl)
+{
+ float coord = 0.125;
+ if (energy_cpl > 0)
+ coord *= sqrtf(energy_ch / energy_cpl);
+ return FFMIN(coord, COEF_MAX);
+}
+
+static void ff_ac3_float_deinterleave_input_samples_mips(AC3EncodeContext *s,
+ const SampleType *samples)
+{
+ int ch, i;
+ int *sptrs[6];
+ int *sptrs_2[2];
+ int *sptr1,*sptr2,*sptr3,*sptr4,*sptr5,*sptr6;
+
+ int temp1, temp2, temp3, temp4, temp5, temp6;
+ const int *sptr = (const int*) samples;
+
+ for (ch = 0; ch < s->channels; ch++) {
+ memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_FRAME_SIZE],
+ AC3_BLOCK_SIZE * sizeof(s->planar_samples[0][0]));
+ }
+
+ if (s->channels == 6) {
+ sptrs[s->channel_map[0]] = (int*)(&s->planar_samples[0][0]);
+ sptrs[s->channel_map[1]] = (int*)(&s->planar_samples[1][0]);
+ sptrs[s->channel_map[2]] = (int*)(&s->planar_samples[2][0]);
+ sptrs[s->channel_map[3]] = (int*)(&s->planar_samples[3][0]);
+ sptrs[s->channel_map[4]] = (int*)(&s->planar_samples[4][0]);
+ sptrs[s->channel_map[5]] = (int*)(&s->planar_samples[5][0]);
+ sptr1 = sptrs[0] + AC3_BLOCK_SIZE;
+ sptr2 = sptrs[1] + AC3_BLOCK_SIZE;
+ sptr3 = sptrs[2] + AC3_BLOCK_SIZE;
+ sptr4 = sptrs[3] + AC3_BLOCK_SIZE;
+ sptr5 = sptrs[4] + AC3_BLOCK_SIZE;
+ sptr6 = sptrs[5] + AC3_BLOCK_SIZE;
+
+ /* deinterleave and remap input samples */
+ for (i = AC3_BLOCK_SIZE; i < AC3_FRAME_SIZE+AC3_BLOCK_SIZE; i+=4) {
+ __asm__ volatile (
+ "lw %[temp1], 0(%[sptr]) \n\t"
+ "lw %[temp2], 4(%[sptr]) \n\t"
+ "lw %[temp3], 8(%[sptr]) \n\t"
+ "lw %[temp4], 12(%[sptr]) \n\t"
+ "lw %[temp5], 16(%[sptr]) \n\t"
+ "lw %[temp6], 20(%[sptr]) \n\t"
+ "sw %[temp1], 0(%[sptr1]) \n\t"
+ "sw %[temp2], 0(%[sptr2]) \n\t"
+ "sw %[temp3], 0(%[sptr3]) \n\t"
+ "sw %[temp4], 0(%[sptr4]) \n\t"
+ "sw %[temp5], 0(%[sptr5]) \n\t"
+ "sw %[temp6], 0(%[sptr6]) \n\t"
+ "lw %[temp1], 24(%[sptr]) \n\t"
+ "lw %[temp2], 28(%[sptr]) \n\t"
+ "lw %[temp3], 32(%[sptr]) \n\t"
+ "lw %[temp4], 36(%[sptr]) \n\t"
+ "lw %[temp5], 40(%[sptr]) \n\t"
+ "lw %[temp6], 44(%[sptr]) \n\t"
+ "sw %[temp1], 4(%[sptr1]) \n\t"
+ "sw %[temp2], 4(%[sptr2]) \n\t"
+ "sw %[temp3], 4(%[sptr3]) \n\t"
+ "sw %[temp4], 4(%[sptr4]) \n\t"
+ "sw %[temp5], 4(%[sptr5]) \n\t"
+ "sw %[temp6], 4(%[sptr6]) \n\t"
+ "lw %[temp1], 48(%[sptr]) \n\t"
+ "lw %[temp2], 52(%[sptr]) \n\t"
+ "lw %[temp3], 56(%[sptr]) \n\t"
+ "lw %[temp4], 60(%[sptr]) \n\t"
+ "lw %[temp5], 64(%[sptr]) \n\t"
+ "lw %[temp6], 68(%[sptr]) \n\t"
+ "sw %[temp1], 8(%[sptr1]) \n\t"
+ "sw %[temp2], 8(%[sptr2]) \n\t"
+ "sw %[temp3], 8(%[sptr3]) \n\t"
+ "sw %[temp4], 8(%[sptr4]) \n\t"
+ "sw %[temp5], 8(%[sptr5]) \n\t"
+ "sw %[temp6], 8(%[sptr6]) \n\t"
+ "lw %[temp1], 72(%[sptr]) \n\t"
+ "lw %[temp2], 76(%[sptr]) \n\t"
+ "lw %[temp3], 80(%[sptr]) \n\t"
+ "lw %[temp4], 84(%[sptr]) \n\t"
+ "lw %[temp5], 88(%[sptr]) \n\t"
+ "lw %[temp6], 92(%[sptr]) \n\t"
+ "sw %[temp1], 12(%[sptr1]) \n\t"
+ "sw %[temp2], 12(%[sptr2]) \n\t"
+ "sw %[temp3], 12(%[sptr3]) \n\t"
+ "sw %[temp4], 12(%[sptr4]) \n\t"
+ "sw %[temp5], 12(%[sptr5]) \n\t"
+ "sw %[temp6], 12(%[sptr6]) \n\t"
+ "addiu %[sptr], %[sptr], 96 \n\t"
+ "addiu %[sptr1], %[sptr1], 16 \n\t"
+ "addiu %[sptr2], %[sptr2], 16 \n\t"
+ "addiu %[sptr3], %[sptr3], 16 \n\t"
+ "addiu %[sptr4], %[sptr4], 16 \n\t"
+ "addiu %[sptr5], %[sptr5], 16 \n\t"
+ "addiu %[sptr6], %[sptr6], 16 \n\t"
+
+ :[temp1] "=&r" (temp1),[temp2] "=&r" (temp2),[temp3] "=&r" (temp3),
+ [temp4] "=&r" (temp4),[temp5] "=&r" (temp5),[temp6] "=&r" (temp6),
+ [sptr] "+r" (sptr),[sptr1] "+r" (sptr1), [sptr2] "+r" (sptr2),
+ [sptr3] "+r" (sptr3), [sptr4] "+r" (sptr4), [sptr5] "+r" (sptr5),
+ [sptr6] "+r" (sptr6)
+ :
+ : "memory"
+ );
+ }
+ }
+ else if (s->channels == 2){
+ sptrs_2[s->channel_map[0]] = (int*)(&s->planar_samples[0][0]);
+ sptrs_2[s->channel_map[1]] = (int*)(&s->planar_samples[1][0]);
+ sptr1 = sptrs_2[0] + AC3_BLOCK_SIZE;
+ sptr2 = sptrs_2[1] + AC3_BLOCK_SIZE;
+
+ for (i = AC3_BLOCK_SIZE; i < AC3_FRAME_SIZE+AC3_BLOCK_SIZE; i+=4) {
+ __asm__ volatile (
+ "lw %[temp1], 0(%[sptr]) \n\t"
+ "lw %[temp2], 4(%[sptr]) \n\t"
+ "lw %[temp3], 8(%[sptr]) \n\t"
+ "lw %[temp4], 12(%[sptr]) \n\t"
+ "sw %[temp1], 0(%[sptr1]) \n\t"
+ "sw %[temp2], 0(%[sptr2]) \n\t"
+ "sw %[temp3], 4(%[sptr1]) \n\t"
+ "sw %[temp4], 4(%[sptr2]) \n\t"
+ "lw %[temp1], 16(%[sptr]) \n\t"
+ "lw %[temp2], 20(%[sptr]) \n\t"
+ "lw %[temp3], 24(%[sptr]) \n\t"
+ "lw %[temp4], 28(%[sptr]) \n\t"
+ "sw %[temp1], 8(%[sptr1]) \n\t"
+ "sw %[temp2], 8(%[sptr2]) \n\t"
+ "sw %[temp3], 12(%[sptr1]) \n\t"
+ "sw %[temp4], 12(%[sptr2]) \n\t"
+ "addiu %[sptr], %[sptr], 32 \n\t"
+ "addiu %[sptr1], %[sptr1], 16 \n\t"
+ "addiu %[sptr2], %[sptr2], 16 \n\t"
+
+ :[temp1] "=&r" (temp1),[temp2] "=&r" (temp2),[temp3] "=&r" (temp3),
+ [temp4] "=&r" (temp4),[sptr] "+r" (sptr),[sptr1] "+r" (sptr1),
+ [sptr2] "+r" (sptr2)
+ :
+ : "memory"
+ );
+ }
+ }
+ else
+ {
+ for (ch = 0; ch < s->channels; ch++)
+ {
+ const SampleType *sptr;
+ int sinc;
+ sinc = s->channels;
+ sptr = samples + s->channel_map[ch];
+ for (i = AC3_BLOCK_SIZE; i < AC3_FRAME_SIZE+AC3_BLOCK_SIZE; i++) {
+ s->planar_samples[ch][i] = *sptr;
+ sptr += sinc;
+ }
+ }
+ }
+}
+
+static void ff_ac3_float_apply_mdct_mips(AC3EncodeContext *s)
+{
+ int blk, ch, i;
+ float scr0_1, scr0_2, scr0_3, scr0_4, scr1_1, scr1_2, scr1_3, scr1_4;
+ float *win_smpl, *pl_smpl;
+ const float *mdct_win;
+
+ for (ch = 0; ch < s->channels; ch++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
+ AC3Block *block = &s->blocks[blk];
+ win_smpl = &s->windowed_samples[0];
+ pl_smpl = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE];
+ mdct_win = (const float*) (&s->mdct_window[0]);
+
+ for(i=0; i<AC3_WINDOW_SIZE; i+=4){
+ __asm__ volatile (
+ "lwc1 %[scr0_1], 0(%[pl_smpl]) \n\t"
+ "lwc1 %[scr1_1], 0(%[mdct_win]) \n\t"
+ "lwc1 %[scr0_2], 4(%[pl_smpl]) \n\t"
+ "lwc1 %[scr1_2], 4(%[mdct_win]) \n\t"
+ "lwc1 %[scr0_3], 8(%[pl_smpl]) \n\t"
+ "lwc1 %[scr1_3], 8(%[mdct_win]) \n\t"
+ "lwc1 %[scr0_4], 12(%[pl_smpl]) \n\t"
+ "lwc1 %[scr1_4], 12(%[mdct_win]) \n\t"
+ "mul.s %[scr0_1], %[scr0_1], %[scr1_1] \n\t"
+ "mul.s %[scr0_2], %[scr0_2], %[scr1_2] \n\t"
+ "mul.s %[scr0_3], %[scr0_3], %[scr1_3] \n\t"
+ "mul.s %[scr0_4], %[scr0_4], %[scr1_4] \n\t"
+ "swc1 %[scr0_1], 0(%[win_smpl]) \n\t"
+ "swc1 %[scr0_2], 4(%[win_smpl]) \n\t"
+ "swc1 %[scr0_3], 8(%[win_smpl]) \n\t"
+ "swc1 %[scr0_4], 12(%[win_smpl]) \n\t"
+ "addiu %[pl_smpl], 16 \n\t"
+ "addiu %[mdct_win], 16 \n\t"
+ "addiu %[win_smpl], 16 \n\t"
+
+ : [pl_smpl] "+r" (pl_smpl), [mdct_win] "+r" (mdct_win),
+ [win_smpl] "+r" (win_smpl), [scr0_1] "=&f" (scr0_1),
+ [scr0_2] "=&f" (scr0_2), [scr0_3] "=&f" (scr0_3),
+ [scr0_4] "=&f" (scr0_4), [scr1_1] "=&f" (scr1_1),
+ [scr1_2] "=&f" (scr1_2), [scr1_3] "=&f" (scr1_3),
+ [scr1_4] "=&f" (scr1_4)
+ :
+ : "memory"
+ );
+ }
+ s->mdct.mdct_calcw(&s->mdct, block->mdct_coef[ch+1],
+ s->windowed_samples);
+ }
+ }
+}
+
+static void ff_ac3_float_apply_channel_coupling_mips(AC3EncodeContext *s)
+{
+ LOCAL_ALIGNED_16(CoefType, cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
+ LOCAL_ALIGNED_16(int32_t, fixed_cpl_coords, [AC3_MAX_BLOCKS], [AC3_MAX_CHANNELS][16]);
+ int blk, ch, bnd, i, j;
+ CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
+ int cpl_start, num_cpl_coefs;
+ int32_t *dst;
+ const float *src;
+ unsigned int len;
+ uint8_t *exp;
+ float scale = 1 << 24;
+ float src0, src1, src2, src3, src4, src5, src6, src7;
+ int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+ int e,v;
+
+ memset(cpl_coords, 0, AC3_MAX_BLOCKS * sizeof(*cpl_coords));
+ memset(fixed_cpl_coords, 0, AC3_MAX_BLOCKS * sizeof(*cpl_coords));
+
+ /* align start to 16-byte boundary. align length to multiple of 32.
+ note: coupling start bin % 4 will always be 1 */
+ cpl_start = s->start_freq[CPL_CH] - 1;
+ num_cpl_coefs = FFALIGN(s->num_cpl_subbands * 12 + 1, 32);
+ cpl_start = FFMIN(256, cpl_start + num_cpl_coefs) - num_cpl_coefs;
+
+ /* calculate coupling channel from fbw channels */
+ for (blk = 0; blk < s->num_blocks; blk++) {
+ AC3Block *block = &s->blocks[blk];
+ CoefType *cpl_coef = &block->mdct_coef[CPL_CH][cpl_start];
+ if (!block->cpl_in_use)
+ continue;
+ memset(cpl_coef, 0, num_cpl_coefs * sizeof(*cpl_coef));
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
+ CoefType *ch_coef = &block->mdct_coef[ch][cpl_start];
+ if (!block->channel_in_cpl[ch])
+ continue;
+ for (i = 0; i < num_cpl_coefs; i++)
+ cpl_coef[i] += ch_coef[i];
+ }
+
+ /* coefficients must be clipped in order to be encoded */
+ clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs);
+ }
+
+ /* calculate energy in each band in coupling channel and each fbw channel */
+ /* TODO: possibly use SIMD to speed up energy calculation */
+ bnd = 0;
+ i = s->start_freq[CPL_CH];
+ while (i < s->cpl_end_freq) {
+ int band_size = s->cpl_band_sizes[bnd];
+ for (ch = CPL_CH; ch <= s->fbw_channels; ch++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
+ AC3Block *block = &s->blocks[blk];
+ if (!block->cpl_in_use || (ch > CPL_CH && !block->channel_in_cpl[ch]))
+ continue;
+ for (j = 0; j < band_size; j++) {
+ CoefType v = block->mdct_coef[ch][i+j];
+ MAC_COEF(energy[blk][ch][bnd], v, v);
+ }
+ }
+ }
+ i += band_size;
+ bnd++;
+ }
+
+ /* calculate coupling coordinates for all blocks for all channels */
+ for (blk = 0; blk < s->num_blocks; blk++) {
+ AC3Block *block = &s->blocks[blk];
+ if (!block->cpl_in_use)
+ continue;
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
+ if (!block->channel_in_cpl[ch])
+ continue;
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+ cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy[blk][ch][bnd],
+ energy[blk][CPL_CH][bnd]);
+ }
+ }
+ }
+
+ /* determine which blocks to send new coupling coordinates for */
+ for (blk = 0; blk < s->num_blocks; blk++) {
+ AC3Block *block = &s->blocks[blk];
+ AC3Block *block0 = blk ? &s->blocks[blk-1] : NULL;
+
+ memset(block->new_cpl_coords, 0, sizeof(block->new_cpl_coords));
+
+ if (block->cpl_in_use) {
+ /* send new coordinates if this is the first block, if previous
+ * block did not use coupling but this block does, the channels
+ * using coupling has changed from the previous block, or the
+ * coordinate difference from the last block for any channel is
+ * greater than a threshold value. */
+ if (blk == 0 || !block0->cpl_in_use) {
+ for (ch = 1; ch <= s->fbw_channels; ch++)
+ block->new_cpl_coords[ch] = 1;
+ } else {
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
+ if (!block->channel_in_cpl[ch])
+ continue;
+ if (!block0->channel_in_cpl[ch]) {
+ block->new_cpl_coords[ch] = 1;
+ } else {
+ CoefSumType coord_diff = 0;
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+ coord_diff += FFABS(cpl_coords[blk-1][ch][bnd] -
+ cpl_coords[blk ][ch][bnd]);
+ }
+ coord_diff /= s->num_cpl_bands;
+ if (coord_diff > NEW_CPL_COORD_THRESHOLD)
+ block->new_cpl_coords[ch] = 1;
+ }
+ }
+ }
+ }
+ }
+
+ /* calculate final coupling coordinates, taking into account reusing of
+ coordinates in successive blocks */
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+ blk = 0;
+ while (blk < s->num_blocks) {
+ int av_uninit(blk1);
+ AC3Block *block = &s->blocks[blk];
+
+ if (!block->cpl_in_use) {
+ blk++;
+ continue;
+ }
+
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
+ CoefSumType energy_ch, energy_cpl;
+ if (!block->channel_in_cpl[ch])
+ continue;
+ energy_cpl = energy[blk][CPL_CH][bnd];
+ energy_ch = energy[blk][ch][bnd];
+ blk1 = blk+1;
+ while (!s->blocks[blk1].new_cpl_coords[ch] && blk1 < s->num_blocks) {
+ if (s->blocks[blk1].cpl_in_use) {
+ energy_cpl += energy[blk1][CPL_CH][bnd];
+ energy_ch += energy[blk1][ch][bnd];
+ }
+ blk1++;
+ }
+ cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy_ch, energy_cpl);
+ }
+ blk = blk1;
+ }
+ }
+
+ /* calculate exponents/mantissas for coupling coordinates */
+ for (blk = 0; blk < s->num_blocks; blk++) {
+ AC3Block *block = &s->blocks[blk];
+ if (!block->cpl_in_use || !block->new_cpl_coords)
+ continue;
+ dst = (int32_t*)fixed_cpl_coords[blk][1];
+ src = cpl_coords[blk][1];
+ len = s->fbw_channels * 16;
+ exp = block->cpl_coord_exp[1];
+
+ do {
+ __asm__ volatile (
+ "lwc1 %[src0], 0(%[src]) \n\t"
+ "lwc1 %[src1], 4(%[src]) \n\t"
+ "lwc1 %[src2], 8(%[src]) \n\t"
+ "lwc1 %[src3], 12(%[src]) \n\t"
+ "lwc1 %[src4], 16(%[src]) \n\t"
+ "lwc1 %[src5], 20(%[src]) \n\t"
+ "lwc1 %[src6], 24(%[src]) \n\t"
+ "lwc1 %[src7], 28(%[src]) \n\t"
+ "mul.s %[src0], %[src0], %[scale] \n\t"
+ "mul.s %[src1], %[src1], %[scale] \n\t"
+ "mul.s %[src2], %[src2], %[scale] \n\t"
+ "mul.s %[src3], %[src3], %[scale] \n\t"
+ "mul.s %[src4], %[src4], %[scale] \n\t"
+ "mul.s %[src5], %[src5], %[scale] \n\t"
+ "mul.s %[src6], %[src6], %[scale] \n\t"
+ "mul.s %[src7], %[src7], %[scale] \n\t"
+ "cvt.w.s %[src0], %[src0] \n\t"
+ "cvt.w.s %[src1], %[src1] \n\t"
+ "cvt.w.s %[src2], %[src2] \n\t"
+ "cvt.w.s %[src3], %[src3] \n\t"
+ "cvt.w.s %[src4], %[src4] \n\t"
+ "cvt.w.s %[src5], %[src5] \n\t"
+ "cvt.w.s %[src6], %[src6] \n\t"
+ "cvt.w.s %[src7], %[src7] \n\t"
+ "mfc1 %[temp0], %[src0] \n\t"
+ "mfc1 %[temp1], %[src1] \n\t"
+ "mfc1 %[temp2], %[src2] \n\t"
+ "mfc1 %[temp3], %[src3] \n\t"
+ "mfc1 %[temp4], %[src4] \n\t"
+ "mfc1 %[temp5], %[src5] \n\t"
+ "mfc1 %[temp6], %[src6] \n\t"
+ "mfc1 %[temp7], %[src7] \n\t"
+
+ : [src] "+r" (src),
+ [src0] "=&f" (src0), [src1] "=&f" (src1),
+ [src2] "=&f" (src2), [src3] "=&f" (src3),
+ [src4] "=&f" (src4), [src5] "=&f" (src5),
+ [src6] "=&f" (src6), [src7] "=&f" (src7),
+ [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
+ [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+ [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
+ [temp6] "=&r" (temp6), [temp7] "=&r" (temp7)
+ : [scale] "f" (scale)
+ : "memory"
+ );
+
+ v = abs(temp0);
+ if (v == 0)
+ e = 24;
+ else {
+ e = 23 - av_log2(v);
+ if (e < 0) {
+ e = 0;
+ temp0 = av_clip(temp0, -16777215, 16777215);
+ }
+ }
+ exp[0] = e;
+
+ v = abs(temp1);
+ if (v == 0)
+ e = 24;
+ else {
+ e = 23 - av_log2(v);
+ if (e < 0) {
+ e = 0;
+ temp1 = av_clip(temp1, -16777215, 16777215);
+ }
+ }
+ exp[1] = e;
+
+ v = abs(temp2);
+ if (v == 0)
+ e = 24;
+ else {
+ e = 23 - av_log2(v);
+ if (e < 0) {
+ e = 0;
+ temp2 = av_clip(temp2, -16777215, 16777215);
+ }
+ }
+ exp[2] = e;
+
+ v = abs(temp3);
+ if (v == 0)
+ e = 24;
+ else {
+ e = 23 - av_log2(v);
+ if (e < 0) {
+ e = 0;
+ temp3 = av_clip(temp3, -16777215, 16777215);
+ }
+ }
+ exp[3] = e;
+
+ v = abs(temp4);
+ if (v == 0)
+ e = 24;
+ else {
+ e = 23 - av_log2(v);
+ if (e < 0) {
+ e = 0;
+ temp4 = av_clip(temp4, -16777215, 16777215);
+ }
+ }
+ exp[4] = e;
+
+ v = abs(temp5);
+ if (v == 0)
+ e = 24;
+ else {
+ e = 23 - av_log2(v);
+ if (e < 0) {
+ e = 0;
+ temp5 = av_clip(temp5, -16777215, 16777215);
+ }
+ }
+ exp[5] = e;
+
+ v = abs(temp6);
+ if (v == 0)
+ e = 24;
+ else {
+ e = 23 - av_log2(v);
+ if (e < 0) {
+ e = 0;
+ temp6 = av_clip(temp6, -16777215, 16777215);
+ }
+ }
+ exp[6] = e;
+
+ v = abs(temp7);
+ if (v == 0)
+ e = 24;
+ else {
+ e = 23 - av_log2(v);
+ if (e < 0) {
+ e = 0;
+ temp7 = av_clip(temp7, -16777215, 16777215);
+ }
+ }
+ exp[7] = e;
+
+ __asm__ volatile (
+ "sw %[temp0], 0(%[dst]) \n\t"
+ "sw %[temp1], 4(%[dst]) \n\t"
+ "sw %[temp2], 8(%[dst]) \n\t"
+ "sw %[temp3], 12(%[dst]) \n\t"
+ "sw %[temp4], 16(%[dst]) \n\t"
+ "sw %[temp5], 20(%[dst]) \n\t"
+ "sw %[temp6], 24(%[dst]) \n\t"
+ "sw %[temp7], 28(%[dst]) \n\t"
+
+ : [dst] "+r" (dst)
+ : [temp0] "r" (temp0), [temp1] "r" (temp1),
+ [temp2] "r" (temp2), [temp3] "r" (temp3),
+ [temp4] "r" (temp4), [temp5] "r" (temp5),
+ [temp6] "r" (temp6), [temp7] "r" (temp7)
+ : "memory"
+ );
+
+ src = src + 8;
+ dst = dst + 8;
+ exp = exp + 8;
+ len -= 8;
+ } while (len > 0);
+
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
+ int bnd, min_exp, max_exp, master_exp;
+
+ if (!block->new_cpl_coords[ch])
+ continue;
+
+ /* determine master exponent */
+ min_exp = max_exp = block->cpl_coord_exp[ch][0];
+ for (bnd = 1; bnd < s->num_cpl_bands; bnd++) {
+ int exp = block->cpl_coord_exp[ch][bnd];
+ min_exp = FFMIN(exp, min_exp);
+ max_exp = FFMAX(exp, max_exp);
+ }
+ master_exp = ((max_exp - 15) + 2) / 3;
+ master_exp = FFMAX(master_exp, 0);
+ while (min_exp < master_exp * 3)
+ master_exp--;
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+ block->cpl_coord_exp[ch][bnd] = av_clip(block->cpl_coord_exp[ch][bnd] -
+ master_exp * 3, 0, 15);
+ }
+ block->cpl_master_exp[ch] = master_exp;
+
+ /* quantize mantissas */
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
+ int cpl_exp = block->cpl_coord_exp[ch][bnd];
+ int cpl_mant = (fixed_cpl_coords[blk][ch][bnd] << (5 + cpl_exp + master_exp * 3)) >> 24;
+ if (cpl_exp == 15)
+ cpl_mant >>= 1;
+ else
+ cpl_mant -= 16;
+
+ block->cpl_coord_mant[ch][bnd] = cpl_mant;
+ }
+ }
+ }
+
+ if (CONFIG_EAC3_ENCODER && s->eac3)
+ ff_eac3_set_cpl_states(s);
+}
+#endif
+
+void ff_ac3_float_encode_init_mips(AC3EncodeContext *s) {
+#if HAVE_INLINE_ASM
+ s->deinterleave_input_samples = ff_ac3_float_deinterleave_input_samples_mips;
+ s->apply_mdct = ff_ac3_float_apply_mdct_mips;
+ s->apply_channel_coupling = ff_ac3_float_apply_channel_coupling_mips;
+#endif
+}
--
1.7.3.4
More information about the ffmpeg-devel
mailing list