[FFmpeg-devel] [PATCH 1/4] lavc/flacenc: add sse4 version of the 16-bit lpc encoder
Michael Niedermayer
michaelni at gmx.at
Fri Feb 21 17:04:08 CET 2014
On Thu, Feb 20, 2014 at 07:48:57PM +0100, James Darnley wrote:
> From 1.8 to 2.4 times faster. Runtime is reduced by 2 to 39%. The
> speed-up generally increases with compression_level.
>
> This lpc encoder is not used with levels < 3 so it provides no speed-up
> in these cases.
> ---
> LICENSE | 1 +
> libavcodec/flacenc.c | 2 +-
> libavcodec/x86/Makefile | 3 +
> libavcodec/x86/flac_dsp_gpl.asm | 83 +++++++++++++++++++++++++++++++++++++++
> libavcodec/x86/flacdsp_init.c | 4 ++
> 5 files changed, 92 insertions(+), 1 deletions(-)
> create mode 100644 libavcodec/x86/flac_dsp_gpl.asm
>
> diff --git a/LICENSE b/LICENSE
> index 1f757aa..c194087 100644
> --- a/LICENSE
> +++ b/LICENSE
> @@ -15,6 +15,7 @@ Specifically, the GPL parts of FFmpeg are
> - libpostproc
> - libmpcodecs
> - optional x86 optimizations in the files
> + libavcodec/x86/flac_dsp_gpl.asm
> libavcodec/x86/idct_mmx.c
> - libutvideo encoding/decoding wrappers in
> libavcodec/libutvideo*.cpp
> diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
> index 1fc8c4c..e958cd8 100644
> --- a/libavcodec/flacenc.c
> +++ b/libavcodec/flacenc.c
> @@ -80,7 +80,7 @@ typedef struct FlacSubframe {
> int shift;
> RiceContext rc;
> int32_t samples[FLAC_MAX_BLOCKSIZE];
> - int32_t residual[FLAC_MAX_BLOCKSIZE+1];
> + int32_t residual[FLAC_MAX_BLOCKSIZE+3];
> } FlacSubframe;
>
> typedef struct FlacFrame {
> diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
> index 0d3594f..374b1d2 100644
> --- a/libavcodec/x86/Makefile
> +++ b/libavcodec/x86/Makefile
> @@ -76,6 +76,9 @@ YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o \
> YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o
> YASM-OBJS-$(CONFIG_FFT) += x86/fft.o
> YASM-OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp.o
> +ifdef CONFIG_GPL
> +YASM-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flac_dsp_gpl.o
> +endif
> YASM-OBJS-$(CONFIG_H263DSP) += x86/h263_loopfilter.o
> YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \
> x86/h264_chromamc_10bit.o
> diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm
> new file mode 100644
> index 0000000..2221af8
> --- /dev/null
> +++ b/libavcodec/x86/flac_dsp_gpl.asm
> @@ -0,0 +1,83 @@
> +;*****************************************************************************
> +;* FLAC DSP functions
> +;*
> +;* Copyright (c) 2014 James Darnley <james.darnley at gmail.com>
> +;*
> +;* This file is part of FFmpeg.
> +;*
> +;* FFmpeg is free software; you can redistribute it and/or modify
> +;* it under the terms of the GNU General Public License as published by
> +;* the Free Software Foundation; either version 2 of the License, or
> +;* (at your option) any later version.
> +;*
> +;* FFmpeg is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +;* GNU General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU General Public License along
> +;* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> +;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> +;******************************************************************************
> +
> +%include "libavutil/x86/x86util.asm"
> +
> +SECTION_TEXT
> +
> +INIT_XMM sse4
> +%if ARCH_X86_64
> + cglobal flac_enc_lpc_16, 5, 7, 4, 0, res, smp, len, order, coefs, shift
> + %define posj r5
> + %define negj r6
> + %define length r2d
> +
> + movsxd orderq, orderd
> +%else
> + cglobal flac_enc_lpc_16, 5, 6, 4, 0, res, smp, len, order, coefs, shift
> + %define posj r2
> + %define negj r5
> + %define length r2mp
> +%endif
why dont you list these "local registers" in cglobal ?
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Dictatorship naturally arises out of democracy, and the most aggravated
form of tyranny and slavery out of the most extreme liberty. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20140221/72fd4dfb/attachment.asc>
More information about the ffmpeg-devel
mailing list