[FFmpeg-devel] [PATCH 2/4] mips: Optimization of AC3 fixed point decoder
Nedeljko Babic
nbabic at mips.com
Tue Sep 25 16:10:55 CEST 2012
Signed-off-by: Nedeljko Babic <nbabic at mips.com>
---
libavcodec/dsputil.c | 2 +-
libavcodec/fmtconvert.c | 1 +
libavcodec/fmtconvert.h | 1 +
libavcodec/mips/Makefile | 3 +-
libavcodec/mips/dsputil_mips.c | 102 ++++++++++++++
libavcodec/mips/fmtconvert_mips_fixed.c | 226 +++++++++++++++++++++++++++++++
6 files changed, 333 insertions(+), 2 deletions(-)
create mode 100644 libavcodec/mips/fmtconvert_mips_fixed.c
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index f813bb8..9e06050 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -3188,7 +3188,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
if (HAVE_MMI) ff_dsputil_init_mmi (c, avctx);
if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
- if (HAVE_MIPSFPU) ff_dsputil_init_mips (c, avctx);
+ if (ARCH_MIPS) ff_dsputil_init_mips (c, avctx);
for (i = 0; i < 4; i++) {
for (j = 0; j < 16; j++) {
diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index 951a2e5..c3b4544 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -159,6 +159,7 @@ av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
if (HAVE_ALTIVEC) ff_fmt_convert_init_altivec(c, avctx);
if (HAVE_MMX) ff_fmt_convert_init_x86(c, avctx);
if (HAVE_MIPSFPU) ff_fmt_convert_init_mips(c);
+ if (HAVE_MIPSDSPR1) ff_fmt_convert_init_mips_fixed(c, avctx);
}
/* ffdshow custom code */
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index 8bda1e7..cc088b8 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -145,6 +145,7 @@ void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx);
void ff_fmt_convert_init_altivec(FmtConvertContext *c, AVCodecContext *avctx);
void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx);
void ff_fmt_convert_init_mips(FmtConvertContext *c);
+void ff_fmt_convert_init_mips_fixed(FmtConvertContext *c, AVCodecContext *avctx);
/* ffdshow custom code */
void float_interleave(float *dst, const float **src, long len, int channels);
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index ff46768..17f6b13 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -16,4 +16,5 @@ MIPSDSPR1-OBJS-$(CONFIG_MPEGAUDIODSP) += mips/mpegaudiodsp_mips_fixed.o
OBJS-$(CONFIG_FFT) += mips/fft_init_table.o
MIPSFPU-OBJS-$(CONFIG_FFT) += mips/fft_mips.o
MIPSFPU-OBJS-$(HAVE_INLINE_ASM) += mips/fmtconvert_mips.o
-MIPSFPU-OBJS-$(HAVE_INLINE_ASM) += mips/dsputil_mips.o
+OBJS-$(HAVE_INLINE_ASM) += mips/dsputil_mips.o
+MIPSDSPR1-OBJS-$(HAVE_INLINE_ASM) += mips/fmtconvert_mips_fixed.o
diff --git a/libavcodec/mips/dsputil_mips.c b/libavcodec/mips/dsputil_mips.c
index e46a0a9..4e4ee61 100644
--- a/libavcodec/mips/dsputil_mips.c
+++ b/libavcodec/mips/dsputil_mips.c
@@ -47,6 +47,7 @@
#include "config.h"
#include "libavcodec/dsputil.h"
+#if HAVE_MIPSFPU
static void vector_fmul_window_mips(float *dst, const float *src0,
const float *src1, const float *win, int len)
{
@@ -157,8 +158,109 @@ static void vector_fmul_window_mips(float *dst, const float *src0,
);
}
}
+#endif
+
+#if HAVE_MIPSDSPR2
+static void vector_fmul_window_mips_fixed(int *dst, const int16_t *src0, const int16_t *src1, const int16_t *win, int len)
+{
+ int i,j;
+ int *dst_i, *dst_j;
+ const int16_t * src0_i, *src1_j;
+ const int16_t *win_i, *win_j;
+ int16_t s0, s01, s02, s03, s1, s11, s12, s13;
+ int16_t wi, wi1, wi2, wi3, wj, wj1, wj2, wj3;
+
+ dst += len;
+ win += len;
+ src0 += len;
+
+ for(i=-len, j=len-1; i<0; i+=4, j-=4) {
+ dst_i = dst + i;
+ dst_j = dst + j;
+ src0_i = src0 + i;
+ src1_j = src1 + j;
+ win_i = win + i;
+ win_j = win + j;
+
+ __asm__ volatile (
+ "lh %[s0], 0(%[src0_i]) \n\t"
+ "lh %[s1], 0(%[src1_j]) \n\t"
+ "lh %[wi], 0(%[win_i]) \n\t"
+ "lh %[wj], 0(%[win_j]) \n\t"
+ "append %[s0], %[s1], 16 \n\t"
+ "append %[wj], %[wi], 16 \n\t"
+ "mult $ac0, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac0, %[s0], %[wj] \n\t"
+ "mult $ac1, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac1, %[s0], %[wj] \n\t"
+ "lh %[s01], 2(%[src0_i]) \n\t"
+ "lh %[s11], -2(%[src1_j]) \n\t"
+ "extr_r.w %[s1], $ac0, 16 \n\t"
+ "lh %[wi1], 2(%[win_i]) \n\t"
+ "lh %[wj1], -2(%[win_j]) \n\t"
+ "extr_r.w %[wj], $ac1, 16 \n\t"
+ "append %[s01], %[s11], 16 \n\t"
+ "append %[wj1], %[wi1], 16 \n\t"
+ "mult $ac2, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac2, %[s01], %[wj1] \n\t"
+ "sw %[s1], 0(%[dst_i]) \n\t"
+ "sw %[wj], 0(%[dst_j]) \n\t"
+ "mult $ac3, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac3, %[s01], %[wj1] \n\t"
+ "extr_r.w %[s11], $ac2, 16 \n\t"
+ "extr_r.w %[wj1], $ac3, 16 \n\t"
+ "lh %[s02], 4(%[src0_i]) \n\t"
+ "lh %[s12], -4(%[src1_j]) \n\t"
+ "lh %[wi2], 4(%[win_i]) \n\t"
+ "lh %[wj2], -4(%[win_j]) \n\t"
+ "append %[s02], %[s12], 16 \n\t"
+ "append %[wj2], %[wi2], 16 \n\t"
+ "mult $ac0, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac0, %[s02], %[wj2] \n\t"
+ "sw %[s11], 4(%[dst_i]) \n\t"
+ "sw %[wj1], -4(%[dst_j]) \n\t"
+ "mult $ac1, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac1, %[s02], %[wj2] \n\t"
+ "extr_r.w %[s12], $ac0, 16 \n\t"
+ "lh %[s03], 6(%[src0_i]) \n\t"
+ "lh %[s13], -6(%[src1_j]) \n\t"
+ "lh %[wi3], 6(%[win_i]) \n\t"
+ "lh %[wj3], -6(%[win_j]) \n\t"
+ "append %[s03], %[s13], 16 \n\t"
+ "append %[wj3], %[wi3], 16 \n\t"
+ "mult $ac2, $0, $0 \n\t"
+ "mulsaq_s.w.ph $ac2, %[s03], %[wj3] \n\t"
+ "sw %[s12], 8(%[dst_i]) \n\t"
+ "extr_r.w %[wj2], $ac1, 16 \n\t"
+ "mult $ac3, $0, $0 \n\t"
+ "dpaqx_s.w.ph $ac3, %[s03], %[wj3] \n\t"
+ "extr_r.w %[s13], $ac2, 16 \n\t"
+ "extr_r.w %[wj3], $ac3, 16 \n\t"
+ "sw %[wj2], -8(%[dst_j]) \n\t"
+ "sw %[s13], 12(%[dst_i]) \n\t"
+ "sw %[wj3], -12(%[dst_j]) \n\t"
+
+ : [s0] "=&r" (s0), [s1] "=&r" (s1), [wi] "=&r" (wi),
+ [wj] "=&r" (wj), [s03] "=&r" (s03), [s01] "=&r" (s01),
+ [s11] "=&r" (s11), [wi1] "=&r" (wi1), [wj1] "=&r" (wj1),
+ [s13] "=&r" (s13), [s02] "=&r" (s02), [s12] "=&r" (s12),
+ [wi2] "=&r" (wi2), [wj2] "=&r" (wj2), [wi3] "=&r" (wi3),
+ [wj3] "=&r" (wj3)
+ : [src0_i] "r" (src0_i), [win_j] "r" (win_j ), [src1_j] "r" (src1_j),
+ [win_i] "r" (win_i), [dst_i] "r" (dst_i), [dst_j] "r" (dst_j)
+ : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+ "$ac3hi", "$ac3lo"
+ );
+ }
+}
+#endif
av_cold void ff_dsputil_init_mips( DSPContext* c, AVCodecContext *avctx )
{
+#if HAVE_MIPSFPU
c->vector_fmul_window = vector_fmul_window_mips;
+#endif
+#if HAVE_MIPSDSPR2
+ c->vector_fmul_window_fixed = vector_fmul_window_mips_fixed;
+#endif
}
diff --git a/libavcodec/mips/fmtconvert_mips_fixed.c b/libavcodec/mips/fmtconvert_mips_fixed.c
new file mode 100644
index 0000000..bc3ada0
--- /dev/null
+++ b/libavcodec/mips/fmtconvert_mips_fixed.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Zoran Lukic (zlukic at mips.com)
+ *
+ * Format Conversion Utils optimized for MIPS fixed-point architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/fmtconvert.c
+ */
+
+#include "libavcodec/fmtconvert.h"
+
+static void int32_to_fixed_fmul_scalar_mips(int16_t *dst, const int *src,
+ int mul, int len)
+{
+ int i;
+ int16_t temp1, temp3, temp5, temp7, temp9, temp11, temp13, temp15;
+
+ for (i=0; i<len; i+=8) {
+ __asm__ volatile (
+ "lw %[temp1], 0(%[src_i]) \n\t"
+ "lw %[temp3], 4(%[src_i]) \n\t"
+ "lw %[temp5], 8(%[src_i]) \n\t"
+ "lw %[temp7], 12(%[src_i]) \n\t"
+ "lw %[temp9], 16(%[src_i]) \n\t"
+ "lw %[temp11], 20(%[src_i]) \n\t"
+ "lw %[temp13], 24(%[src_i]) \n\t"
+ "lw %[temp15], 28(%[src_i]) \n\t"
+ "mul %[temp1], %[temp1], %[mul] \n\t"
+ "mul %[temp3], %[temp3], %[mul] \n\t"
+ "mul %[temp5], %[temp5], %[mul] \n\t"
+ "mul %[temp7], %[temp7], %[mul] \n\t"
+ "mul %[temp9], %[temp9], %[mul] \n\t"
+ "mul %[temp11], %[temp11], %[mul] \n\t"
+ "mul %[temp13], %[temp13], %[mul] \n\t"
+ "mul %[temp15], %[temp15], %[mul] \n\t"
+ "addiu %[temp1], %[temp1], 0x8000 \n\t"
+ "addiu %[temp3], %[temp3], 0x8000 \n\t"
+ "addiu %[temp5], %[temp5], 0x8000 \n\t"
+ "addiu %[temp7], %[temp7], 0x8000 \n\t"
+ "addiu %[temp9], %[temp9], 0x8000 \n\t"
+ "addiu %[temp11], %[temp11], 0x8000 \n\t"
+ "addiu %[temp13], %[temp13], 0x8000 \n\t"
+ "addiu %[temp15], %[temp15], 0x8000 \n\t"
+ "sra %[temp1], %[temp1], 0x10 \n\t"
+ "sra %[temp3], %[temp3], 0x10 \n\t"
+ "sra %[temp5], %[temp5], 0x10 \n\t"
+ "sra %[temp7], %[temp7], 0x10 \n\t"
+ "sra %[temp9], %[temp9], 0x10 \n\t"
+ "sra %[temp11], %[temp11], 0x10 \n\t"
+ "sra %[temp13], %[temp13], 0x10 \n\t"
+ "sra %[temp15], %[temp15], 0x10 \n\t"
+ "sh %[temp1], 0(%[dst_i]) \n\t"
+ "sh %[temp3], 2(%[dst_i]) \n\t"
+ "sh %[temp5], 4(%[dst_i]) \n\t"
+ "sh %[temp7], 6(%[dst_i]) \n\t"
+ "sh %[temp9], 8(%[dst_i]) \n\t"
+ "sh %[temp11], 10(%[dst_i]) \n\t"
+ "sh %[temp13], 12(%[dst_i]) \n\t"
+ "sh %[temp15], 14(%[dst_i]) \n\t"
+
+ : [temp1] "=r" (temp1), [temp11] "=r" (temp11),
+ [temp13] "=r" (temp13), [temp15] "=r" (temp15),
+ [temp3] "=r" (temp3), [temp5] "=r" (temp5),
+ [temp7] "=r" (temp7), [temp9] "=r" (temp9)
+ : [dst_i] "r" (dst+i), [src_i] "r" (src+i),
+ [mul] "r" (mul)
+ : "memory"
+ );
+ }
+}
+
+static inline int fixed_to_int16_one_mips(const int *src)
+{
+ int16_t ret;
+ int temp1, temp7, temp8;
+ __asm__ volatile (
+ "lw %[temp1], 0(%[src_i1]) \n\t"
+ "li %[temp8], 0xf000 \n\t"
+ "li %[ret1], 0xefff \n\t"
+ "slt %[temp7], %[temp1], %[temp8] \n\t"
+ "movn %[ret1], %[temp1], %[temp7] \n\t"
+ "seh %[ret1], %[ret1] \n\t"
+ : [temp1] "=r" (temp1), [temp7] "=r" (temp7),
+ [temp8] "=r" (temp8), [ret1] "=r" (ret)
+ : [src_i1] "r" (src)
+ : "memory"
+ );
+ return (int16_t) ret;
+}
+
+static void fixed_to_int16_interleave_mips(int16_t *dst, const int **src,
+ long len, int channels)
+{
+ int i,j,c;
+ if(channels==2) {
+ for(i=0; i<len; i++) {
+ int temp, temp1, temp7, temp8;
+ __asm__ volatile (
+ "lw %[temp], 0(%[src_i]) \n\t"
+ "lw %[temp1], 0(%[src_i1]) \n\t"
+ "li %[temp8], 0xf000 \n\t"
+ "li %[ret], 0xefff \n\t"
+ "li %[ret1], 0xefff \n\t"
+ "slt %[temp7], %[temp], %[temp8] \n\t"
+ "movn %[ret], %[temp], %[temp7] \n\t"
+ "slt %[temp7], %[temp1], %[temp8] \n\t"
+ "movn %[ret1], %[temp1], %[temp7] \n\t"
+ "seh %[ret], %[ret] \n\t"
+ "seh %[ret1], %[ret1] \n\t"
+
+ : [temp] "=&r" (temp), [temp1] "=&r" (temp1),
+ [temp7] "=&r" (temp7), [temp8] "=&r" (temp8),
+ [ret] "=&r" (dst[2*i]), [ret1] "=&r" (dst[2*i+1])
+ : [src_i] "r" (src[0]+i), [src_i1] "r" (src[1]+i)
+ : "memory"
+ );
+ }
+ }
+ else {
+ if(channels==6) {
+ for(i=0; i<len; i++) {
+ int temp, temp1, temp2, temp3, temp4, temp5, temp7, temp8;
+ __asm__ volatile (
+ "lw %[temp], 0(%[src_i]) \n\t"
+ "lw %[temp1], 0(%[src_i1]) \n\t"
+ "lw %[temp2], 0(%[src_i2]) \n\t"
+ "lw %[temp3], 0(%[src_i3]) \n\t"
+ "lw %[temp4], 0(%[src_i4]) \n\t"
+ "lw %[temp5], 0(%[src_i5]) \n\t"
+ "li %[temp8], 0xf000 \n\t"
+ "li %[ret], 0xefff \n\t"
+ "li %[ret1], 0xefff \n\t"
+ "li %[ret2], 0xefff \n\t"
+ "li %[ret3], 0xefff \n\t"
+ "li %[ret4], 0xefff \n\t"
+ "li %[ret5], 0xefff \n\t"
+ "slt %[temp7], %[temp], %[temp8] \n\t"
+ "movn %[ret], %[temp], %[temp7] \n\t"
+ "slt %[temp7], %[temp1], %[temp8] \n\t"
+ "movn %[ret1], %[temp1], %[temp7] \n\t"
+ "slt %[temp7], %[temp2], %[temp8] \n\t"
+ "movn %[ret2], %[temp2], %[temp7] \n\t"
+ "slt %[temp7], %[temp3], %[temp8] \n\t"
+ "movn %[ret3], %[temp3], %[temp7] \n\t"
+ "slt %[temp7], %[temp4], %[temp8] \n\t"
+ "movn %[ret4], %[temp4], %[temp7] \n\t"
+ "slt %[temp7], %[temp5], %[temp8] \n\t"
+ "movn %[ret5], %[temp5], %[temp7] \n\t"
+ "seh %[ret], %[ret] \n\t"
+ "seh %[ret1], %[ret1] \n\t"
+ "seh %[ret2], %[ret2] \n\t"
+ "seh %[ret5], %[ret5] \n\t"
+ "seh %[ret3], %[ret3] \n\t"
+ "seh %[ret4], %[ret4] \n\t"
+
+ : [temp] "=&r" (temp), [temp1] "=&r" (temp1),
+ [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+ [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
+ [temp7] "=&r" (temp7), [temp8] "=&r" (temp8),
+ [ret] "=&r" (dst[6*i]), [ret1] "=&r" (dst[6*i+1]),
+ [ret2] "=&r" (dst[6*i+2]), [ret3] "=&r" (dst[6*i+3]),
+ [ret4] "=&r" (dst[6*i+4]), [ret5] "=&r" (dst[6*i+5])
+ : [src_i] "r" (src[0]+i), [src_i1] "r" (src[1]+i),
+ [src_i2] "r" (src[2]+i), [src_i3] "r" (src[3]+i),
+ [src_i4] "r" (src[4]+i), [src_i5] "r" (src[5]+i)
+ : "memory"
+ );
+ }
+ }
+ else {
+ for(c=0; c<channels; c++)
+ for(i=0, j=c; i<len; i++, j+=channels)
+ dst[j] = fixed_to_int16_one_mips(src[c]+i);
+ }
+ }
+}
+
+void ff_fmt_convert_init_mips_fixed(FmtConvertContext *c, AVCodecContext *avctx) {
+ c->int32_to_fixed_fmul_scalar = int32_to_fixed_fmul_scalar_mips;
+ c->fixed_to_int16_interleave = fixed_to_int16_interleave_mips;
+}
--
1.7.3.4
More information about the ffmpeg-devel
mailing list