[FFmpeg-devel] [PATCH 2/2] ac3enc: add SIMD-optimized shifting functions for use with the fixed-point AC3 encoder.
Måns Rullgård
mans
Fri Mar 11 23:44:52 CET 2011
Justin Ruggles <justin.ruggles at gmail.com> writes:
> ---
> libavcodec/ac3dsp.c | 24 +++++++++++++++++++++
> libavcodec/ac3dsp.h | 22 +++++++++++++++++++
> libavcodec/ac3enc_fixed.c | 40 ++---------------------------------
> libavcodec/x86/ac3dsp.asm | 48 +++++++++++++++++++++++++++++++++++++++++++
> libavcodec/x86/ac3dsp_mmx.c | 12 ++++++++++
> 5 files changed, 109 insertions(+), 37 deletions(-)
>
>
> diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
> index da3a123..83f48a6 100644
> --- a/libavcodec/ac3dsp.c
> +++ b/libavcodec/ac3dsp.c
> @@ -50,10 +50,34 @@ static int ac3_max_msb_abs_int16_c(const int16_t *src, int len)
> return v;
> }
>
> +static void ac3_lshift_int16_c(int16_t *src, unsigned int len,
> + unsigned int shift)
> +{
> + int i;
> +
> + if (shift > 0) {
> + for (i = 0; i < len; i++)
> + src[i] <<= shift;
> + }
> +}
> +
> +static void ac3_rshift_int32_c(int32_t *src, unsigned int len,
> + unsigned int shift)
> +{
> + int i;
> +
> + if (shift > 0) {
> + for (i = 0; i < len; i++)
> + src[i] >>= shift;
> + }
> +}
> +
> av_cold void ff_ac3dsp_init(AC3DSPContext *c)
> {
> c->ac3_exponent_min = ac3_exponent_min_c;
> c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c;
> + c->ac3_lshift_int16 = ac3_lshift_int16_c;
> + c->ac3_rshift_int32 = ac3_rshift_int32_c;
>
> if (HAVE_MMX)
> ff_ac3dsp_init_x86(c);
> diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
> index a4f141f..31a0af3 100644
> --- a/libavcodec/ac3dsp.h
> +++ b/libavcodec/ac3dsp.h
> @@ -46,6 +46,28 @@ typedef struct AC3DSPContext {
> * @return a value with the same MSB as max(abs(src[]))
> */
> int (*ac3_max_msb_abs_int16)(const int16_t *src, int len);
> +
> + /**
> + * Left-shift each value in an array of int16_t by a specified amount.
> + * @param src input array
> + * constraints: align 16
> + * @param len number of values in the array
> + * constraints: multiple of 32 greater than 0
> + * @param shift left shift amount
> + * constraints: range [0,15]
> + */
> + void (*ac3_lshift_int16)(int16_t *src, unsigned int len, unsigned int shift);
> +
> + /**
> + * Right-shift each value in an array of int32_t by a specified amount.
> + * @param src input array
> + * constraints: align 16
> + * @param len number of values in the array
> + * constraints: multiple of 16 greater than 0
> + * @param shift right shift amount
> + * constraints: range [0,31]
> + */
> + void (*ac3_rshift_int32)(int32_t *src, unsigned int len, unsigned int shift);
> } AC3DSPContext;
>
> void ff_ac3dsp_init (AC3DSPContext *c);
> diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c
> index e750a39..baa1363 100644
> --- a/libavcodec/ac3enc_fixed.c
> +++ b/libavcodec/ac3enc_fixed.c
> @@ -278,40 +278,6 @@ static int log2_tab(AC3EncodeContext *s, int16_t *src, int len)
>
> /**
> - * Left-shift each value in an array by a specified amount.
> - * @param tab input array
> - * @param n number of values in the array
> - * @param lshift left shift amount
> - */
> -static void lshift_tab(int16_t *tab, int n, unsigned int lshift)
> -{
> - int i;
> -
> - if (lshift > 0) {
> - for (i = 0; i < n; i++)
> - tab[i] <<= lshift;
> - }
> -}
> -
> -
> -/**
> - * Right-shift each value in an array of int32_t by a specified amount.
> - * @param src input array
> - * @param len number of values in the array
> - * @param shift right shift amount
> - */
> -static void ac3_rshift_int32_c(int32_t *src, unsigned int len, unsigned int shift)
> -{
> - int i;
> -
> - if (shift > 0) {
> - for (i = 0; i < len; i++)
> - src[i] >>= shift;
> - }
> -}
> -
> -
> -/**
> * Normalize the input samples to use the maximum available precision.
> * This assumes signed 16-bit input samples.
> *
> @@ -320,7 +286,7 @@ static void ac3_rshift_int32_c(int32_t *src, unsigned int len, unsigned int shif
> static int normalize_samples(AC3EncodeContext *s)
> {
> int v = 14 - log2_tab(s, s->windowed_samples, AC3_WINDOW_SIZE);
> - lshift_tab(s->windowed_samples, AC3_WINDOW_SIZE, v);
> + s->ac3dsp.ac3_lshift_int16(s->windowed_samples, AC3_WINDOW_SIZE, v);
> /* +6 to right-shift from 31-bit to 25-bit */
> return v + 6;
> }
> @@ -336,8 +302,8 @@ static void scale_coefficients(AC3EncodeContext *s)
> for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
> AC3Block *block = &s->blocks[blk];
> for (ch = 0; ch < s->channels; ch++) {
> - ac3_rshift_int32_c(block->mdct_coef[ch], AC3_MAX_COEFS,
> - block->coeff_shift[ch]);
> + s->ac3dsp.ac3_rshift_int32(block->mdct_coef[ch], AC3_MAX_COEFS,
> + block->coeff_shift[ch]);
> }
> }
> }
Looks good so far.
> diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
> index b1eeca9..320180c 100644
> --- a/libavcodec/x86/ac3dsp.asm
> +++ b/libavcodec/x86/ac3dsp.asm
> @@ -133,3 +133,51 @@ INIT_XMM
> AC3_MAX_MSB_ABS_INT16 sse2, min_max
> %define ABS2 ABS2_SSSE3
> AC3_MAX_MSB_ABS_INT16 ssse3, or_abs
> +
> +;-----------------------------------------------------------------------------
> +; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
> +;-----------------------------------------------------------------------------
> +
> +%macro AC3_SHIFT 4 ; l/r, 16/32, shift instruction, instruction set
> +cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift
> + test shiftd, shiftd
> + jz .end
> + movd m0, shiftd
> + ALIGN 8
> +.loop:
> + mova m1, [srcq ]
> + mova m2, [srcq+mmsize ]
> + mova m3, [srcq+mmsize*2]
> + mova m4, [srcq+mmsize*3]
> + %3 m1, m0
> + %3 m2, m0
> + %3 m3, m0
> + %3 m4, m0
> + mova [srcq ], m1
> + mova [srcq+mmsize ], m2
> + mova [srcq+mmsize*2], m3
> + mova [srcq+mmsize*3], m4
> + add srcq, mmsize*4
> + sub lend, mmsize*32/%2
> + ja .loop
> +.end:
> + REP_RET
> +%endmacro
> +
> +;-----------------------------------------------------------------------------
> +; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
> +;-----------------------------------------------------------------------------
> +
> +INIT_MMX
> +AC3_SHIFT l, 16, psllw, mmx
> +INIT_XMM
> +AC3_SHIFT l, 16, psllw, sse2
> +
> +;-----------------------------------------------------------------------------
> +; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
> +;-----------------------------------------------------------------------------
> +
> +INIT_MMX
> +AC3_SHIFT r, 32, psrad, mmx
> +INIT_XMM
> +AC3_SHIFT r, 32, psrad, sse2
Within the limits of my x86 knowledge, this seems OK.
> diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
> index d8af59c..835b106 100644
> --- a/libavcodec/x86/ac3dsp_mmx.c
> +++ b/libavcodec/x86/ac3dsp_mmx.c
> @@ -32,6 +32,12 @@ extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
> extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
> extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len);
>
> +extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
> +extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);
> +
> +extern void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift);
> +extern void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift);
> +
> av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
> {
> int mm_flags = av_get_cpu_flags();
> @@ -40,6 +46,8 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
> if (mm_flags & AV_CPU_FLAG_MMX) {
> c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
> c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
> + c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
> + c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
> }
> if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
> c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
> @@ -48,6 +56,10 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
> if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
> c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
> c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
> + if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
> + c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
> + c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
> + }
> }
> if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
> c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
OK
--
M?ns Rullg?rd
mans at mansr.com
More information about the ffmpeg-devel
mailing list