[FFmpeg-cvslog] Add x86-optimized versions of exponent_min().
Justin Ruggles
git
Fri Feb 11 03:52:13 CET 2011
ffmpeg | branch: master | Justin Ruggles <justin.ruggles at gmail.com> | Thu Feb 10 12:20:36 2011 -0500| [a30ac54a19d27920ec262ebb104e1f48092a3715] | committer: Michael Niedermayer
Add x86-optimized versions of exponent_min().
Signed-off-by: Ronald S. Bultje <rsbultje at gmail.com>
(cherry picked from commit dda3f0ef48aa5c3b03566b60b6bf63211e1fe579)
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a30ac54a19d27920ec262ebb104e1f48092a3715
---
libavcodec/Makefile | 6 ++-
libavcodec/ac3dsp.c | 51 ++++++++++++++++++++++++++++++++
libavcodec/ac3dsp.h | 43 +++++++++++++++++++++++++++
libavcodec/ac3enc.c | 33 ++++-----------------
libavcodec/x86/Makefile | 4 ++
libavcodec/x86/ac3dsp.asm | 67 +++++++++++++++++++++++++++++++++++++++++++
libavcodec/x86/ac3dsp_mmx.c | 45 +++++++++++++++++++++++++++++
libavcodec/x86/x86util.asm | 10 ++++++
8 files changed, 230 insertions(+), 29 deletions(-)
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fa6c0bb..682b626 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -55,8 +55,10 @@ OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aaccoder.o \
mpeg4audio.o
OBJS-$(CONFIG_AASC_DECODER) += aasc.o msrledec.o
OBJS-$(CONFIG_AC3_DECODER) += ac3dec.o ac3dec_data.o ac3.o
-OBJS-$(CONFIG_AC3_ENCODER) += ac3enc_float.o ac3tab.o ac3.o
-OBJS-$(CONFIG_AC3_FIXED_ENCODER) += ac3enc_fixed.o ac3tab.o ac3.o
+OBJS-$(CONFIG_AC3_ENCODER) += ac3enc_float.o ac3tab.o ac3.o \
+ ac3dsp.o
+OBJS-$(CONFIG_AC3_FIXED_ENCODER) += ac3enc_fixed.o ac3tab.o ac3.o \
+ ac3dsp.o
OBJS-$(CONFIG_ALAC_DECODER) += alac.o
OBJS-$(CONFIG_ALAC_ENCODER) += alacenc.o
OBJS-$(CONFIG_ALS_DECODER) += alsdec.o bgmc.o mpeg4audio.o
diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
new file mode 100644
index 0000000..f688e6a
--- /dev/null
+++ b/libavcodec/ac3dsp.c
@@ -0,0 +1,51 @@
+/*
+ * AC-3 DSP utils
+ * Copyright (c) 2011 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "ac3dsp.h"
+
+static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
+{
+ int blk, i;
+
+ if (!num_reuse_blocks)
+ return;
+
+ for (i = 0; i < nb_coefs; i++) {
+ uint8_t min_exp = *exp;
+ uint8_t *exp1 = exp + 256;
+ for (blk = 0; blk < num_reuse_blocks; blk++) {
+ uint8_t next_exp = *exp1;
+ if (next_exp < min_exp)
+ min_exp = next_exp;
+ exp1 += 256;
+ }
+ *exp++ = min_exp;
+ }
+}
+
+av_cold void ff_ac3dsp_init(AC3DSPContext *c)
+{
+ c->ac3_exponent_min = ac3_exponent_min_c;
+
+ if (HAVE_MMX)
+ ff_ac3dsp_init_x86(c);
+}
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
new file mode 100644
index 0000000..7f13b11
--- /dev/null
+++ b/libavcodec/ac3dsp.h
@@ -0,0 +1,43 @@
+/*
+ * AC-3 DSP utils
+ * Copyright (c) 2011 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AC3DSP_H
+#define AVCODEC_AC3DSP_H
+
+#include <stdint.h>
+
+typedef struct AC3DSPContext {
+ /**
+ * Set each encoded exponent in a block to the minimum of itself and the
+ * exponents in the same frequency bin of up to 5 following blocks.
+ * @param exp pointer to the start of the current block of exponents.
+ * constraints: align 16
+ * @param num_reuse_blocks number of blocks that will reuse exponents from the current block.
+ * constraints: range 0 to 5
+ * @param nb_coefs number of frequency coefficients.
+ */
+ void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+} AC3DSPContext;
+
+void ff_ac3dsp_init (AC3DSPContext *c);
+void ff_ac3dsp_init_x86(AC3DSPContext *c);
+
+#endif /* AVCODEC_AC3DSP_H */
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index e41a0ae..851fdc9 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -33,6 +33,7 @@
#include "avcodec.h"
#include "put_bits.h"
#include "dsputil.h"
+#include "ac3dsp.h"
#include "ac3.h"
#include "audioconvert.h"
@@ -86,6 +87,7 @@ typedef struct AC3Block {
typedef struct AC3EncodeContext {
PutBitContext pb; ///< bitstream writer context
DSPContext dsp;
+ AC3DSPContext ac3dsp; ///< AC-3 optimized functions
AC3MDCTContext mdct; ///< MDCT context
AC3Block blocks[AC3_MAX_BLOCKS]; ///< per-block info
@@ -458,7 +460,6 @@ static void compute_exp_strategy_ch(AC3EncodeContext *s, uint8_t *exp_strategy,
exp_strategy[blk] = EXP_REUSE;
exp += AC3_MAX_COEFS;
}
- emms_c();
/* now select the encoding strategy type : if exponents are often
recoded, we use a coarse encoding */
@@ -499,31 +500,6 @@ static void compute_exp_strategy(AC3EncodeContext *s)
/**
- * Set each encoded exponent in a block to the minimum of itself and the
- * exponents in the same frequency bin of up to 5 following blocks.
- */
-static void exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
-{
- int blk, i;
-
- if (!num_reuse_blocks)
- return;
-
- for (i = 0; i < nb_coefs; i++) {
- uint8_t min_exp = *exp;
- uint8_t *exp1 = exp + AC3_MAX_COEFS;
- for (blk = 0; blk < num_reuse_blocks; blk++) {
- uint8_t next_exp = *exp1;
- if (next_exp < min_exp)
- min_exp = next_exp;
- exp1 += AC3_MAX_COEFS;
- }
- *exp++ = min_exp;
- }
-}
-
-
-/**
* Update the exponents so that they are the ones the decoder will decode.
*/
static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy)
@@ -616,7 +592,7 @@ static void encode_exponents(AC3EncodeContext *s)
num_reuse_blocks = blk1 - blk - 1;
/* for the EXP_REUSE case we select the min of the exponents */
- exponent_min(exp, num_reuse_blocks, nb_coefs);
+ s->ac3dsp.ac3_exponent_min(exp, num_reuse_blocks, nb_coefs);
encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk]);
@@ -704,6 +680,8 @@ static void process_exponents(AC3EncodeContext *s)
encode_exponents(s);
group_exponents(s);
+
+ emms_c();
}
@@ -1856,6 +1834,7 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx)
avctx->coded_frame= avcodec_alloc_frame();
dsputil_init(&s->dsp, avctx);
+ ff_ac3dsp_init(&s->ac3dsp);
return 0;
init_fail:
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 83cec00..1b58fa1 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -17,6 +17,10 @@ MMX-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o
YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_yasm.o
+MMX-OBJS-$(CONFIG_AC3_ENCODER) += x86/ac3dsp_mmx.o
+MMX-OBJS-$(CONFIG_AC3_FIXED_ENCODER) += x86/ac3dsp_mmx.o
+YASM-OBJS-$(CONFIG_AC3_ENCODER) += x86/ac3dsp.o
+YASM-OBJS-$(CONFIG_AC3_FIXED_ENCODER) += x86/ac3dsp.o
MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o
MMX-OBJS-$(CONFIG_MP1FLOAT_DECODER) += x86/mpegaudiodec_mmx.o
MMX-OBJS-$(CONFIG_MP2FLOAT_DECODER) += x86/mpegaudiodec_mmx.o
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
new file mode 100644
index 0000000..e71c51c
--- /dev/null
+++ b/libavcodec/x86/ac3dsp.asm
@@ -0,0 +1,67 @@
+;*****************************************************************************
+;* x86-optimized AC-3 DSP utils
+;* Copyright (c) 2011 Justin Ruggles
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION .text
+
+;-----------------------------------------------------------------------------
+; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
+;-----------------------------------------------------------------------------
+
+%macro AC3_EXPONENT_MIN 1
+cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
+ shl reuse_blksq, 8
+ jz .end
+ LOOP_ALIGN
+.nextexp:
+ mov offsetq, reuse_blksq
+ mova m0, [expq+offsetq]
+ sub offsetq, 256
+ LOOP_ALIGN
+.nextblk:
+ PMINUB m0, [expq+offsetq], m1
+ sub offsetq, 256
+ jae .nextblk
+ mova [expq], m0
+ add expq, mmsize
+ sub expnq, mmsize
+ jg .nextexp
+.end:
+ REP_RET
+%endmacro
+
+%define PMINUB PMINUB_MMX
+%define LOOP_ALIGN
+INIT_MMX
+AC3_EXPONENT_MIN mmx
+%ifdef HAVE_MMX2
+%define PMINUB PMINUB_MMXEXT
+%define LOOP_ALIGN ALIGN 16
+AC3_EXPONENT_MIN mmxext
+%endif
+%ifdef HAVE_SSE
+INIT_XMM
+AC3_EXPONENT_MIN sse2
+%endif
+%undef PMINUB
+%undef LOOP_ALIGN
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
new file mode 100644
index 0000000..7ce3aa3
--- /dev/null
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -0,0 +1,45 @@
+/*
+ * x86-optimized AC-3 DSP utils
+ * Copyright (c) 2011 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86_cpu.h"
+#include "dsputil_mmx.h"
+#include "libavcodec/ac3dsp.h"
+
+extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+
+av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
+{
+ int mm_flags = av_get_cpu_flags();
+
+#if HAVE_YASM
+ if (mm_flags & AV_CPU_FLAG_MMX) {
+ c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
+ }
+ if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
+ c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
+ }
+ if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
+ c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
+ }
+#endif
+}
diff --git a/libavcodec/x86/x86util.asm b/libavcodec/x86/x86util.asm
index 7cabc70..b28a619 100644
--- a/libavcodec/x86/x86util.asm
+++ b/libavcodec/x86/x86util.asm
@@ -434,3 +434,13 @@
movh [%7], %3
movh [%7+%8], %4
%endmacro
+
+%macro PMINUB_MMX 3 ; dst, src, tmp
+ mova %3, %1
+ psubusb %3, %2
+ psubb %1, %3
+%endmacro
+
+%macro PMINUB_MMXEXT 3 ; dst, src, ignored
+ pminub %1, %2
+%endmacro
More information about the ffmpeg-cvslog
mailing list