[FFmpeg-devel] [RFC] AAC Encoder

Wed Aug 13 16:44:18 CEST 2008

On Wed, Aug 13, 2008 at 04:42:56PM +0300, Kostya wrote:
> On Wed, Aug 13, 2008 at 02:57:50PM +0200, Michael Niedermayer wrote:
[...]
> 
> > > 3. based on psy model suggestions, encoder performs windowing and MDCT
> > 
> > ok
> > 
> > 
> > > 4. encoder feeds coefficients to psy model
> > > 5. psy model by some magic determines scalefactors and use them to convert
> > > coefficients into integer form
> > > 6. encoder encodes obtained scalefactors and integer coefficients
> > > 
> > > There are 11 codebooks for AAC, each designed to code either pairs or quads
> > > of values with sign coded separately or incorporated into value,
> > > each has a maximum value limit.
> > > While it's feasible to find the best encoding (like take raw coeff, quantize
> > > it and round up or down, then see which vector takes less bits), I feel
> > > it would be too slow.
> > 
> > thats fine, you already have the fast variant implemented i do not suggest
> > that to be removed, what we need is a high quality variant. The encoder should
> > be better than other encoders ...
> > Also as the max value you mentioned is another example of where your code
> > fails fatally, a single +3 that would sound nearly as good when encoded as +2
> > could force a less efficient code book to be choosen. Also the +3 could be
> > encoded as a pulse, i dont remember if your code optimally choose between
> > pulse and normal codebook encodings?
> 
> not optimally, unfortunately, but it can search for pulses and encode them
> 
> in any case, here's a new encoder version

please commit the parts ive ok-ed and/or send a patch without them

I started to review it again but then realized that we can safe alot of
time if i do not have to review the already approved parts each time

[...]

> /*
>  * AAC encoder
>  * Copyright (C) 2008 Konstantin Shishkov
>  *
>  * This file is part of FFmpeg.
>  *
>  * FFmpeg is free software; you can redistribute it and/or
>  * modify it under the terms of the GNU Lesser General Public
>  * License as published by the Free Software Foundation; either
>  * version 2.1 of the License, or (at your option) any later version.
>  *
>  * FFmpeg is distributed in the hope that it will be useful,
>  * but WITHOUT ANY WARRANTY; without even the implied warranty of
>  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>  * Lesser General Public License for more details.
>  *
>  * You should have received a copy of the GNU Lesser General Public
>  * License along with FFmpeg; if not, write to the Free Software
>  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>  */
> 
> /**
>  * @file aacenc.c
>  * AAC encoder
>  */
> 

ok
(and please commit ok-ed parts so the patch becomes smaller!
 less to review-> faster and better reviews)

> /***********************************
>  *              TODOs:
>  * psy model selection with some option
>  * change greedy codebook search into something more optimal, like Viterbi algorithm
>  * determine run lengths along with codebook
>  ***********************************/

> 
> #include "avcodec.h"
> #include "bitstream.h"
> #include "dsputil.h"
> #include "mpeg4audio.h"
> 
> #include "aacpsy.h"
> #include "aac.h"
> #include "aactab.h"
> 
> static const uint8_t swb_size_1024_96[] = {
>     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
>     12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
>     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
> };
> 
> static const uint8_t swb_size_1024_64[] = {
>     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
>     12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
>     40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
> };
> 
> static const uint8_t swb_size_1024_48[] = {
>     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
>     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
>     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
>     96
> };
> 
> static const uint8_t swb_size_1024_32[] = {
>     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
>     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
>     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
> };
> 
> static const uint8_t swb_size_1024_24[] = {
>     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
>     12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
>     32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
> };
> 
> static const uint8_t swb_size_1024_16[] = {
>     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
>     12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
>     32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
> };
> 
> static const uint8_t swb_size_1024_8[] = {
>     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
>     16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
>     32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
> };
> 
> static const uint8_t *swb_size_1024[] = {
>     swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
>     swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
>     swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
>     swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
> };
> 
> static const uint8_t swb_size_128_96[] = {
>     4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
> };
> 
> static const uint8_t swb_size_128_48[] = {
>     4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
> };
> 
> static const uint8_t swb_size_128_24[] = {
>     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
> };
> 
> static const uint8_t swb_size_128_16[] = {
>     4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
> };
> 
> static const uint8_t swb_size_128_8[] = {
>     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
> };
> 
> static const uint8_t *swb_size_128[] = {
>     /* the last entry on the following row is swb_size_128_64 but is a
>        duplicate of swb_size_128_96 */
>     swb_size_128_96, swb_size_128_96, swb_size_128_96,
>     swb_size_128_48, swb_size_128_48, swb_size_128_48,
>     swb_size_128_24, swb_size_128_24, swb_size_128_16,
>     swb_size_128_16, swb_size_128_16, swb_size_128_8
> };

ok

> 
> #define CB_UNSIGNED 0x01    ///< coefficients are coded as absolute values
> #define CB_PAIRS    0x02    ///< coefficients are grouped into pairs before coding (quads by default)
> #define CB_ESCAPE   0x04    ///< codebook allows escapes
> 
> /** spectral coefficients codebook information */
> static const struct {
>     int16_t maxval;         ///< maximum possible value
>      int8_t cb_num;         ///< codebook number
>     uint8_t flags;          ///< codebook features
> } aac_cb_info[] = {
>     {    0, -1, CB_UNSIGNED }, // zero codebook
>     {    1,  0, 0 },
>     {    1,  1, 0 },
>     {    2,  2, CB_UNSIGNED },
>     {    2,  3, CB_UNSIGNED },
>     {    4,  4, CB_PAIRS },
>     {    4,  5, CB_PAIRS },
>     {    7,  6, CB_PAIRS | CB_UNSIGNED },
>     {    7,  7, CB_PAIRS | CB_UNSIGNED },
>     {   12,  8, CB_PAIRS | CB_UNSIGNED },
>     {   12,  9, CB_PAIRS | CB_UNSIGNED },
>     { 8191, 10, CB_PAIRS | CB_UNSIGNED | CB_ESCAPE },
>     {   -1, -1, 0 }, // reserved
>     {   -1, -1, 0 }, // perceptual noise substitution
>     {   -1, -1, 0 }, // intensity out-of-phase
>     {   -1, -1, 0 }, // intensity in-phase
> };
> 

> /** default channel configurations */
> static const uint8_t aac_chan_configs[6][5] = {
>  {1, ID_SCE},                         // 1 channel  - single channel element
>  {1, ID_CPE},                         // 2 channels - channel pair
>  {2, ID_SCE, ID_CPE},                 // 3 channels - center + stereo
>  {3, ID_SCE, ID_CPE, ID_SCE},         // 4 channels - front center + stereo + back center
>  {3, ID_SCE, ID_CPE, ID_CPE},         // 5 channels - front center + stereo + back stereo
>  {4, ID_SCE, ID_CPE, ID_CPE, ID_LFE}, // 6 channels - front center + stereo + back stereo + LFE
> };
> 
> /**
>  * AAC encoder context
>  */
> typedef struct {
>     PutBitContext pb;
>     MDCTContext mdct1024;                        ///< long (1024 samples) frame transform context
>     MDCTContext mdct128;                         ///< short (128 samples) frame transform context
>     DSPContext  dsp;

ok

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Observe your enemies, for they first find out your faults. -- Antisthenes
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080813/6001c239/attachment.pgp>