[FFmpeg-devel] [PATCH]HE-AACv1 try 3 (all missing functionality added)

Sat Feb 13 17:31:59 CET 2010

Alex Converse wrote:
> Notes:
> *A filterbank that supports float_to_int16_c as been added
> *All the computation time is spent in ff_sbr_apply() and it's
> children. If it isn't called from ff_sbr_apply() making it 100% faster
> isn't going to buy us anything.
> *No calls to lrintf depend on rounding behavior at (2*n+1)*0.5
> *Some sample SIMD placeholders are attached as a second patch.
> *Right now the synthesis filterbank is written on top on an MDCT. With
> appropriate SIMD functions it may make sense to move it to an FFT.
> Right now the MDCT version is much faster.
> *The analysis filterbank has been switched from an FFT to an RDFT.

> +    for (n = 0; n < 64; n++) {
> +        float pre = M_PI * n / 64;
> +        analysis_cos_pre[n] = cos(pre);
> +        analysis_sin_pre[n] = sin(pre);

Please do not reinvent ff_cos_tabs[]. There is no point polluting the 
cache with several different copies of the same tables.

> +    for (k = 0; k < 32; k++) {
> +        float post = M_PI * (k + 0.5) / 128;
> +        analysis_cossin_post[k][0] =  2.0 * cos(post);
> +        analysis_cossin_post[k][1] = -2.0 * sin(post);
> +    }

Also here if possible

> +static void sbr_extension(SpectralBandReplication *sbr, GetBitContext *gb,
> +                          int bs_extension_id, int *num_bits_left)
> +{
> +/* TODO - implement ps_data for parametric stereo parsing
> +    switch (bs_extension_id) {
> +    case EXTENSION_ID_PS:
> +        num_bits_left -= ps_data(sbr, gb);
> +        break;
> +    default:
> +*/
> +        skip_bits(gb, *num_bits_left); // bs_fill_bits
> +        *num_bits_left = 0;
> +/*
> +        break;
> +    }
> +*/
> +}

Isn't it missing a call to av_log_missing_feature()?

> +        if (div) {
> +            memset(X[0][l]+32, 0, 32*sizeof(float));
> +            memset(X[1][l]+32, 0, 32*sizeof(float));
> +        }
> +        ff_imdct_half(mdct, mdct_buf[0], X[0][l]);
> +        ff_imdct_half(mdct, mdct_buf[1], X[1][l]);
> +        for (n = 0; n < 64 >> div; n++) {
> +            v[               n] = -mdct_buf[0][64-1-(n<<div)    ] + mdct_buf[1][ n<<div     ];
> +            v[(128>>div)-1 - n] =  mdct_buf[0][64-1-(n<<div)-div] + mdct_buf[1][(n<<div)+div];
> +        }

I still think you can avoid the temporary scratch buffer...

> +        for (n = 0; n < 64 >> div; n++)
> +            out[n] = out[n] * scale + bias;

I suppose scale == 1 and bias == 0 is a common case, no? If yes, I'd put 
it inside an if(scale != 1 || bias != 0)

> --- /dev/null
> +++ b/libavcodec/aacsbr.h
> @@ -0,0 +1,186 @@
> +/*
> + * AAC Spectral Band Replication definitions and structures
> + * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
> + * Copyright (c) 2010      Alex Converse <alex.converse at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file libavcodec/aacsbr.h
> + * AAC Spectral Band Replication definitions and structures
> + * @author Robert Swain ( rob opendot cl )
> + */
> +
> +#ifndef AVCODEC_AACSBR_H
> +#define AVCODEC_AACSBR_H
> +
> +#include <stdint.h>
> +
> +#define SBR_INIT_VLC_STATIC(num, size) \
> +    INIT_VLC_STATIC(&vlc_sbr[num], 9, sbr_tmp[num].table_size / sbr_tmp[num].elem_size,     \
> +                    sbr_tmp[num].sbr_bits ,                      1,                      1, \
> +                    sbr_tmp[num].sbr_codes, sbr_tmp[num].elem_size, sbr_tmp[num].elem_size, \
> +                    size);
> +
> +#define ENVELOPE_ADJUSTMENT_OFFSET 2
> +
> +/**
> + * SBR VLC tables
> + */
> +enum {
> +    T_HUFFMAN_ENV_1_5DB,
> +    F_HUFFMAN_ENV_1_5DB,
> +    T_HUFFMAN_ENV_BAL_1_5DB,
> +    F_HUFFMAN_ENV_BAL_1_5DB,
> +    T_HUFFMAN_ENV_3_0DB,
> +    F_HUFFMAN_ENV_3_0DB,
> +    T_HUFFMAN_ENV_BAL_3_0DB,
> +    F_HUFFMAN_ENV_BAL_3_0DB,
> +    T_HUFFMAN_NOISE_3_0DB,
> +    T_HUFFMAN_NOISE_BAL_3_0DB,
> +};
> +
> +/**
> + * bs_frame_class - frame class of current SBR frame (14496-3 sp04 p98)
> + */
> +enum {
> +    FIXFIX,
> +    FIXVAR,
> +    VARFIX,
> +    VARVAR,
> +};
> +

I think those belong in the file where they are actually used.

> +++ b/libavcodec/aacsbr_internal.h
> @@ -0,0 +1,51 @@
> +/*
> + * AAC Spectral Band Replication function declarations
> + * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
> + * Copyright (c) 2010      Alex Converse <alex.converse at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file libavcodec/aacsbr_internal.h
> + * AAC Spectral Band Replication function declarations
> + * @author Robert Swain ( rob opendot cl )
> + */
> +
> +#ifndef AVCODEC_AACSBR_INTERNAL_H
> +#define AVCODEC_AACSBR_INTERNAL_H
> +
> +#include "get_bits.h"
> +#include "aac.h"
> +#include "aacsbr.h"
> +
> +/** Initialize SBR. */
> +av_cold void ff_aac_sbr_init(void);
> +/** Initialize one SBR context. */
> +av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr);
> +/** Close one SBR context. */
> +av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr);
> +/** Decode one SBR element. */
> +int ff_decode_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
> +                            GetBitContext *gb, int crc, int cnt, int id_aac);
> +/** Dequantized all channels in one SBR element. */
> +void ff_sbr_dequant(AACContext *ac, SpectralBandReplication *sbr, int id_aac);
> +/** Apply dequantized SBR to a single AAC channel. */
> +void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int ch,
> +                  const float* in, float* out);
> +
> +#endif /* AVCODEC_AACSBR_INTERNAL_H */

I don't see how this header is more internal than aacsbr.h...

>  
> +static void vector_fadd_c(float *dst, const float *src, int len)
> +{
> +    int i;
> +    for (i = 0; i < len; i++)
> +        dst[i] += src[i];
> +}
> +
> +static void vector_cmul_c(float (*dst)[2], const float (*src0)[2], const float (*src1)[2], int len)
> +{
> +    int i;
> +    for (i = 0; i < len; i++) {
> +        dst[i][0] = src0[i][0] * src1[i][0] - src0[i][1] * src1[i][1];
> +        dst[i][1] = src0[i][1] * src1[i][0] + src0[i][0] * src1[i][1];
> +    }
> +}
> +
>  static void vector_fmul_c(float *dst, const float *src, int len){
>      int i;
>      for(i=0; i<len; i++)
> @@ -4160,6 +4176,22 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
>          dst[i] = src[i] * mul;
>  }
>  
> +static void vector_fadd_scalar_fmul_scalar_c(float *dst, const float *src,
> +                                             float add, float mul, int len)
> +{
> +    int i;
> +    for (i = 0; i < len; i++)
> +        dst[i] = (src[i] + add) * mul;
> +}
> +
> +static void vector_fmul_scalar_fadd_scalar_c(float *dst, const float *src,
> +                                             float mul, float add, int len)
> +{
> +    int i;
> +    for (i = 0; i < len; i++)
> +        dst[i] = src[i] * mul + add;
> +}
> +

Did you benchmark if hand-unrolling these loops give any benefit? You 
know that len is a multiple of four, but the compiler don't.

-Vitor