[FFmpeg-soc] [soc]: r3007 - in aacenc: aac.h aac_enc.patch aacenc.c aacpsy.c checkout.sh
kostya
subversion at mplayerhq.hu
Mon Aug 4 13:05:48 CEST 2008
Author: kostya
Date: Mon Aug 4 13:05:48 2008
New Revision: 3007
Log:
Sychronize encoder with aac.h and aactab.h from decoder
Modified:
aacenc/aac.h
aacenc/aac_enc.patch
aacenc/aacenc.c
aacenc/aacpsy.c
aacenc/checkout.sh
Modified: aacenc/aac.h
==============================================================================
--- aacenc/aac.h (original)
+++ aacenc/aac.h Mon Aug 4 13:05:48 2008
@@ -1,80 +1,187 @@
-/* this file is a mere collection of things that were borrowed from
- * GSoC AAC decoder and should be put into common aac.h
- * while the merge is not done, declarations should reside here
+/*
+ * AAC definitions and structures
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#ifndef ERSATZ_AAC_H
-#define ERSATZ_AAC_H
-#define MAX_SWB_SIZE 51
+/**
+ * @file aac.h
+ * AAC definitions and structures
+ * @author Oded Shimon ( ods15 ods15 dyndns org )
+ * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
+ */
-DECLARE_ALIGNED_16(static float, kbd_long_1024[1024]);
-DECLARE_ALIGNED_16(static float, kbd_short_128[128]);
-DECLARE_ALIGNED_16(static float, sine_long_1024[1024]);
-DECLARE_ALIGNED_16(static float, sine_short_128[128]);
+#ifndef FFMPEG_AAC_H
+#define FFMPEG_AAC_H
/**
- * window sequences
+ * AAC SSR (Scalable Sample Rate) is currently not working, and therefore
+ * not compiled in. SSR files play without crashing but produce audible
+ * artifacts that seem to be related to EIGHT_SHORT_SEQUENCE windows.
*/
-enum WindowSequence {
- ONLY_LONG_SEQUENCE,
- LONG_START_SEQUENCE,
- EIGHT_SHORT_SEQUENCE,
- LONG_STOP_SEQUENCE,
-};
+//#define AAC_SSR
/**
- * IDs for raw_data_block
+ * AAC LTP (Long Term Prediction) is currently not working, and therefore
+ * not compiled in. Playing LTP files with LTP support compiled in results
+ * in crashes due to SSE alignment issues. Also, there are major audible
+ * artifacts.
*/
-enum {
- ID_SCE = 0x0,
+//#define AAC_LTP
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "libavutil/random.h"
+
+#include "mpeg4audio.h"
+
+#include <stdint.h>
+
+#define AAC_INIT_VLC_STATIC(num, size) \
+ INIT_VLC_STATIC(&vlc_spectral[num], 6, ff_aac_spectral_sizes[num], \
+ ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
+ ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
+ size);
+
+#define MAX_CHANNELS 64
+#define MAX_TAGID 16
+
+#define TNS_MAX_ORDER 20
+#define PNS_MEAN_ENERGY 3719550720.0f // sqrt(3.0) * 1<<31
+#define IVQUANT_SIZE 1024
+
+enum AudioObjectType {
+ AOT_NULL,
+ // Support? Name
+ AOT_AAC_MAIN, ///< Y Main
+ AOT_AAC_LC, ///< Y Low Complexity
+ AOT_AAC_SSR, ///< N (code in SoC repo) Scalable Sample Rate
+ AOT_AAC_LTP, ///< N (code in SoC repo) Long Term Prediction
+ AOT_SBR, ///< N (in progress) Spectral Band Replication
+ AOT_AAC_SCALABLE, ///< N Scalable
+ AOT_TWINVQ, ///< N Twin Vector Quantizer
+ AOT_CELP, ///< N Code Excited Linear Prediction
+ AOT_HVXC, ///< N Harmonic Vector eXcitation Coding
+ AOT_TTSI = 12, ///< N Text-To-Speech Interface
+ AOT_MAINSYNTH, ///< N Main Synthesis
+ AOT_WAVESYNTH, ///< N Wavetable Synthesis
+ AOT_MIDI, ///< N General MIDI
+ AOT_SAFX, ///< N Algorithmic Synthesis and Audio Effects
+ AOT_ER_AAC_LC, ///< N Error Resilient Low Complexity
+ AOT_ER_AAC_LTP = 19, ///< N Error Resilient Long Term Prediction
+ AOT_ER_AAC_SCALABLE, ///< N Error Resilient Scalable
+ AOT_ER_TWINVQ, ///< N Error Resilient Twin Vector Quantizer
+ AOT_ER_BSAC, ///< N Error Resilient Bit-Sliced Arithmetic Coding
+ AOT_ER_AAC_LD, ///< N Error Resilient Low Delay
+ AOT_ER_CELP, ///< N Error Resilient Code Excited Linear Prediction
+ AOT_ER_HVXC, ///< N Error Resilient Harmonic Vector eXcitation Coding
+ AOT_ER_HILN, ///< N Error Resilient Harmonic and Individual Lines plus Noise
+ AOT_ER_PARAM, ///< N Error Resilient Parametric
+ AOT_SSC, ///< N SinuSoidal Coding
+};
+
+enum RawDataBlockID {
+ ID_SCE,
ID_CPE,
ID_CCE,
ID_LFE,
ID_DSE,
ID_PCE,
ID_FIL,
- ID_END
+ ID_END,
};
-/**
- * special codebooks
- */
-enum Codebook {
- ZERO_HCB = 0,
- FIRST_PAIR_HCB = 5,
- ESC_HCB = 11,
- NOISE_HCB = 13,
- INTENSITY_HCB2 = 14,
- INTENSITY_HCB = 15,
- ESC_FLAG = 16,
+enum ExtensionPayloadID {
+ EXT_FILL,
+ EXT_FILL_DATA,
+ EXT_DATA_ELEMENT,
+ EXT_DYNAMIC_RANGE = 0xb,
+ EXT_SBR_DATA = 0xd,
+ EXT_SBR_DATA_CRC = 0xe,
+};
+
+enum WindowSequence {
+ ONLY_LONG_SEQUENCE,
+ LONG_START_SEQUENCE,
+ EIGHT_SHORT_SEQUENCE,
+ LONG_STOP_SEQUENCE,
+};
+
+enum BandType {
+ ZERO_BT = 0, ///< Scalefactors and spectral data are all zero.
+ FIRST_PAIR_BT = 5, ///< This and later band types encode two values (rather than four) with one code word.
+ ESC_BT = 11, ///< Spectral data are coded with an escape sequence.
+ NOISE_BT = 13, ///< Spectral data are scaled white noise not coded in the bitstream.
+ INTENSITY_BT2 = 14, ///< Scalefactor data are intensity stereo positions.
+ INTENSITY_BT = 15, ///< Scalefactor data are intensity stereo positions.
+};
+
+#define IS_CODEBOOK_UNSIGNED(x) ((x - 1) & 10)
+
+enum ChannelType {
+ AAC_CHANNEL_FRONT = 1,
+ AAC_CHANNEL_SIDE = 2,
+ AAC_CHANNEL_BACK = 3,
+ AAC_CHANNEL_LFE = 4,
+ AAC_CHANNEL_CC = 5,
};
/**
- * pulse tool
+ * mix-down channel types
+ * MIXDOWN_CENTER is the index into the mix-down arrays for a Single Channel Element with AAC_CHANNEL_FRONT.
+ * MIXDOWN_(BACK|FRONT) are the indices for Channel Pair Elements with AAC_CHANNEL_(BACK|FRONT).
*/
-typedef struct {
- int present;
- int num_pulse;
- int start;
- int offset[4];
- int amp[4];
-} Pulse;
+enum {
+ MIXDOWN_CENTER,
+ MIXDOWN_FRONT,
+ MIXDOWN_BACK,
+};
-#define MAX_TAGID 16
+#define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0
+#define SCALE_MAX_POS 255 ///< scalefactor index maximum value
+#define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard
+#define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference
/**
* Program configuration - describes how channels are arranged. Either read from
* stream (ID_PCE) or created based on a default fixed channel arrangement.
*/
typedef struct {
- int che_type[4][MAX_TAGID]; ///< channel element type with the first index as the first 4 raw_data_block IDs
- int mono_mixdown; ///< The SCE tag to use if user requests mono output, -1 if not available.
- int stereo_mixdown; ///< The CPE tag to use if user requests stereo output, -1 if not available.
- int matrix_mixdown; ///< The CPE tag to use if user requests matrixed stereo output, -1 if not available.
- int mixdown_coeff_index; ///< 0-3
- int pseudo_surround; ///< Mix surround channels out of phase.
+ enum ChannelType che_type[4][MAX_TAGID]; ///< channel element type with the first index as the first 4 raw_data_block IDs
+ int mono_mixdown_tag; ///< The SCE tag to use if user requests mono output, -1 if not available.
+ int stereo_mixdown_tag; ///< The CPE tag to use if user requests stereo output, -1 if not available.
+ int mixdown_coeff_index; ///< 0-3
+ int pseudo_surround; ///< Mix surround channels out of phase.
} ProgramConfig;
+#ifdef AAC_LTP
+/**
+ * Long Term Prediction
+ */
+#define MAX_LTP_LONG_SFB 40
+typedef struct {
+ int present;
+ int lag;
+ float coef;
+ int used[MAX_LTP_LONG_SFB];
+} LongTermPrediction;
+#endif /* AAC_LTP */
/**
* Individual Channel Stream
@@ -82,19 +189,21 @@ typedef struct {
typedef struct {
int intensity_present;
uint8_t max_sfb; ///< number of scalefactor bands per group
- enum WindowSequence window_sequence;
- enum WindowSequence window_sequence_prev;
+ enum WindowSequence window_sequence[2];
uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sinus window.
int num_window_groups;
- uint8_t grouping;
uint8_t group_len[8];
- const uint8_t *swb_sizes;
- int num_swb;
+#ifdef AAC_LTP
+ LongTermPrediction ltp;
+ LongTermPrediction ltp2;
+#endif /* AAC_LTP */
+ const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window
+ const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window
+ int num_swb; ///< number of scalefactor window bands
int num_windows;
int tns_max_bands;
} IndividualChannelStream;
-#define TNS_MAX_ORDER 20
/**
* Temporal Noise Shaping
*/
@@ -120,26 +229,97 @@ typedef struct {
} MidSideStereo;
/**
- * Single Channel Element
- * Used for both SCE and LFE elements
+ * Dynamic Range Control - decoded from the bitstream but not processed further.
*/
typedef struct {
- int gain; /**< Channel gain (not used by AAC bitstream).
+ int pce_instance_tag; ///< Indicates with which program the DRC info is associated.
+ int dyn_rng_sgn[17]; ///< DRC sign information; 0 - positive, 1 - negative
+ int dyn_rng_ctl[17]; ///< DRC magnitude information
+ int exclude_mask[MAX_CHANNELS]; ///< Channels to be excluded from DRC processing.
+ int additional_excluded_chns[MAX_CHANNELS / 7]; /**< The exclude_mask bits are
+ coded in groups of 7 with 1 bit preceeding each group (except the first)
+ indicating that 7 more mask bits are coded. */
+ int band_incr; ///< Number of DRC bands greater than 1 having DRC info.
+ int interpolation_scheme; ///< Indicates the interpolation scheme used in the SBR QMF domain.
+ int band_top[17]; ///< Indicates the top of the i-th DRC band in units of 4 spectral lines.
+ int prog_ref_level; /**< A reference level for the long-term program audio level for all
+ channels combined. */
+} DynamicRangeControl;
+
+/**
+ * pulse tool
+ */
+typedef struct {
+ int present;
+ int num_pulse;
+ int start;
+ int offset[4];
+ int amp[4];
+} Pulse;
+
+#ifdef AAC_SSR
+/**
+ * parameters for the SSR Inverse Polyphase Quadrature Filter
+ */
+typedef struct {
+ float q[4][4];
+ float t0[4][12];
+ float t1[4][12];
+} ssr_context;
+
+/**
+ * per-element gain control for SSR
+ */
+typedef struct {
+ int max_band;
+ int adjust_num[4][8];
+ int alev[4][8][8];
+ int aloc[4][8][8];
+ float buf[4][24];
+} ScalableSamplingRate;
+#endif /* AAC_SSR */
+
+/**
+ * coupling parameters
+ */
+typedef struct {
+ int is_indep_coup; ///< Set if independent coupling (i.e. after IMDCT).
+ int domain; ///< Controls if coupling is performed before (0) or after (1) the TNS decoding of the target channels.
+ int num_coupled; ///< number of target elements
+ int is_cpe[9]; ///< Set if target is an CPE (otherwise it's an SCE).
+ int tag_select[9]; ///< element tag index
+ int l[9]; ///< Apply gain to left channel of a CPE.
+ int r[9]; ///< Apply gain to right channel of a CPE.
+ float gain[18][8][64];
+} ChannelCoupling;
+
+
+/**
+ * Single Channel Element - used for both SCE and LFE elements.
+ */
+typedef struct {
+ float mixing_gain; /**< Channel gain (not used by AAC bitstream).
* Note that this is applied before joint stereo decoding.
* Thus, when used inside CPE elements, both channels must have equal gain.
*/
IndividualChannelStream ics;
TemporalNoiseShaping tns;
Pulse pulse;
- int zeroes[8][64];
- int sf_idx[8][64];
- enum Codebook cb[8][64]; ///< codebooks
- int cb_run_end[8][64]; ///< codebook run end points
+ enum BandType band_type[8][64]; ///< band types
+ int band_type_run_end[8][64]; ///< band type run end points
float sf[8][64]; ///< scalefactors
+ int sf_idx[8][64]; ///< scalefactor indices (used by encoder)
+ int zeroes[8][64]; ///< band is not coded (used by encoder)
DECLARE_ALIGNED_16(float, coeffs[1024]); ///< coefficients for IMDCT
DECLARE_ALIGNED_16(float, saved[1024]); ///< overlap
DECLARE_ALIGNED_16(float, ret[1024]); ///< PCM output
- DECLARE_ALIGNED_16(int, icoefs[1024]); ///< integer coefficients for coding
+ DECLARE_ALIGNED_16(int, icoefs[1024]); ///< integer coefficients for encoding
+#ifdef AAC_LTP
+ int16_t *ltp_state;
+#endif /* AAC_LTP */
+#ifdef AAC_SSR
+ ScalableSamplingRate *ssr;
+#endif /* AAC_SSR */
} SingleChannelElement;
/**
@@ -152,16 +332,64 @@ typedef struct {
// shared
SingleChannelElement ch[2];
// CCE specific
-// ChannelCoupling coup;
+ ChannelCoupling coup;
} ChannelElement;
-//my stuff
+/**
+ * main AAC context
+ */
+typedef struct {
+ AVCodecContext * avccontext;
-#define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0
-#define SCALE_MAX_POS 255 ///< scalefactor index maximum value
-#define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard
-#define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference
+ MPEG4AudioConfig m4ac;
+ int is_saved; ///< Set if elements have stored overlap from previous frame.
+ DynamicRangeControl che_drc;
-#endif
+ /**
+ * @defgroup elements
+ * @{
+ */
+ ProgramConfig pcs;
+ ChannelElement * che[4][MAX_TAGID];
+ /** @} */
+ /**
+ * @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.)
+ * @{
+ */
+ DECLARE_ALIGNED_16(float, buf_mdct[2048]);
+ DECLARE_ALIGNED_16(float, revers[1024]);
+ /** @} */
+
+ /**
+ * @defgroup tables Computed / set up during initialization.
+ * @{
+ */
+ MDCTContext mdct;
+ MDCTContext mdct_small;
+#ifdef AAC_LTP
+ MDCTContext mdct_ltp;
+#endif /* AAC_LTP */
+ DSPContext dsp;
+#ifdef AAC_SSR
+ ssr_context ssrctx;
+#endif /* AAC_SSR */
+ AVRandomState random_state;
+ /** @} */
+
+ /**
+ * @defgroup output Members used for output interleaving and down-mixing.
+ * @{
+ */
+ float *interleaved_output; ///< Interim buffer for interleaving PCM samples.
+ float *output_data[MAX_CHANNELS]; ///< Points to each element's 'ret' buffer (PCM output).
+ ChannelElement *mm[3]; ///< Center/Front/Back channel elements to use for matrix mix-down.
+ float add_bias; ///< offset for dsp.float_to_int16
+ float sf_scale; ///< Pre-scale for correct IMDCT and dsp.float_to_int16.
+ int sf_offset; ///< offset into pow2sf_tab as appropriate for dsp.float_to_int16
+ /** @} */
+
+} AACContext;
+
+#endif /* FFMPEG_AAC_H */
Modified: aacenc/aac_enc.patch
==============================================================================
--- aacenc/aac_enc.patch (original)
+++ aacenc/aac_enc.patch Mon Aug 4 13:05:48 2008
@@ -6,7 +6,7 @@ index d4f6d1c..0ed9057 100644
OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o
-+OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aacpsy.o mdct.o fft.o mpeg4audio.o
++OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aacpsy.o aactab.o mdct.o fft.o mpeg4audio.o
OBJS-$(CONFIG_AASC_DECODER) += aasc.o
OBJS-$(CONFIG_AC3_DECODER) += ac3dec.o ac3tab.o ac3.o mdct.o fft.o
OBJS-$(CONFIG_AC3_ENCODER) += ac3enc.o ac3tab.o ac3.o
Modified: aacenc/aacenc.c
==============================================================================
--- aacenc/aacenc.c (original)
+++ aacenc/aacenc.c Mon Aug 4 13:05:48 2008
@@ -119,28 +119,25 @@ static const uint8_t *swb_size_128[] = {
/** spectral coefficients codebook information */
static const struct {
int16_t maxval; ///< maximum possible value
-
- const uint8_t *bits; ///< codeword lengths
- const uint16_t *codes; ///< codewords
-
+ int8_t cb_num; ///< codebook number
uint8_t flags; ///< codebook features
} aac_cb_info[] = {
- { 0, NULL , NULL , CB_UNSIGNED }, // zero codebook
- { 1, bits1 , code1 , 0 },
- { 1, bits2 , code2 , 0 },
- { 2, bits3 , code3 , CB_UNSIGNED },
- { 2, bits4 , code4 , CB_UNSIGNED },
- { 4, bits5 , code5 , CB_PAIRS },
- { 4, bits6 , code6 , CB_PAIRS },
- { 7, bits7 , code7 , CB_PAIRS | CB_UNSIGNED },
- { 7, bits8 , code8 , CB_PAIRS | CB_UNSIGNED },
- { 12, bits9 , code9 , CB_PAIRS | CB_UNSIGNED },
- { 12, bits10, code10, CB_PAIRS | CB_UNSIGNED },
- { 8191, bits11, code11, CB_PAIRS | CB_UNSIGNED | CB_ESCAPE },
- { -1, NULL , NULL , 0 }, // reserved
- { -1, NULL , NULL , 0 }, // perceptual noise substitution
- { -1, NULL , NULL , 0 }, // intensity out-of-phase
- { -1, NULL , NULL , 0 }, // intensity in-phase
+ { 0, -1, CB_UNSIGNED }, // zero codebook
+ { 1, 0, 0 },
+ { 1, 1, 0 },
+ { 2, 2, CB_UNSIGNED },
+ { 2, 3, CB_UNSIGNED },
+ { 4, 4, CB_PAIRS },
+ { 4, 5, CB_PAIRS },
+ { 7, 6, CB_PAIRS | CB_UNSIGNED },
+ { 7, 7, CB_PAIRS | CB_UNSIGNED },
+ { 12, 8, CB_PAIRS | CB_UNSIGNED },
+ { 12, 9, CB_PAIRS | CB_UNSIGNED },
+ { 8191, 10, CB_PAIRS | CB_UNSIGNED | CB_ESCAPE },
+ { -1, -1, 0 }, // reserved
+ { -1, -1, 0 }, // perceptual noise substitution
+ { -1, -1, 0 }, // intensity out-of-phase
+ { -1, -1, 0 }, // intensity in-phase
};
/** default channel configurations */
@@ -213,18 +210,18 @@ static av_cold int aac_encode_init(AVCod
}
s->samplerate_index = i;
s->swb_sizes1024 = swb_size_1024[i];
- s->swb_num1024 = num_swb_1024[i];
+ s->swb_num1024 = ff_aac_num_swb_1024[i];
s->swb_sizes128 = swb_size_128[i];
- s->swb_num128 = num_swb_128[i];
+ s->swb_num128 = ff_aac_num_swb_128[i];
dsputil_init(&s->dsp, avctx);
ff_mdct_init(&s->mdct1024, 11, 0);
ff_mdct_init(&s->mdct128, 8, 0);
// window init
- ff_kbd_window_init(kbd_long_1024, 4.0, 1024);
- ff_kbd_window_init(kbd_short_128, 6.0, 128);
- ff_sine_window_init(sine_long_1024, 1024);
- ff_sine_window_init(sine_short_128, 128);
+ ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
+ ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
+ ff_sine_window_init(ff_aac_sine_long_1024, 1024);
+ ff_sine_window_init(ff_aac_sine_short_128, 128);
s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]);
//TODO: psy model selection with some option
@@ -241,20 +238,20 @@ static av_cold int aac_encode_init(AVCod
static void analyze(AVCodecContext *avctx, AACEncContext *s, ChannelElement *cpe, short *audio, int channel)
{
int i, j, k;
- const float * lwindow = cpe->ch[channel].ics.use_kb_window[0] ? kbd_long_1024 : sine_long_1024;
- const float * swindow = cpe->ch[channel].ics.use_kb_window[0] ? kbd_short_128 : sine_short_128;
- const float * pwindow = cpe->ch[channel].ics.use_kb_window[1] ? kbd_short_128 : sine_short_128;
+ const float * lwindow = cpe->ch[channel].ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_aac_sine_long_1024;
+ const float * swindow = cpe->ch[channel].ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_aac_sine_short_128;
+ const float * pwindow = cpe->ch[channel].ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_aac_sine_short_128;
- if (cpe->ch[channel].ics.window_sequence != EIGHT_SHORT_SEQUENCE) {
+ if (cpe->ch[channel].ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
memcpy(s->output, cpe->ch[channel].saved, sizeof(float)*1024);
- if(cpe->ch[channel].ics.window_sequence == LONG_STOP_SEQUENCE){
+ if(cpe->ch[channel].ics.window_sequence[0] == LONG_STOP_SEQUENCE){
memset(s->output, 0, sizeof(s->output[0]) * 448);
for(i = 448; i < 576; i++)
s->output[i] = cpe->ch[channel].saved[i] * pwindow[i - 448];
for(i = 576; i < 704; i++)
s->output[i] = cpe->ch[channel].saved[i];
}
- if(cpe->ch[channel].ics.window_sequence != LONG_START_SEQUENCE){
+ if(cpe->ch[channel].ics.window_sequence[0] != LONG_START_SEQUENCE){
j = channel;
for (i = 0; i < 1024; i++, j += avctx->channels){
s->output[i+1024] = audio[j] / 512.0 * lwindow[1024 - i - 1];
@@ -297,9 +294,9 @@ static void put_ics_info(AVCodecContext
int i;
put_bits(&s->pb, 1, 0); // ics_reserved bit
- put_bits(&s->pb, 2, info->window_sequence);
+ put_bits(&s->pb, 2, info->window_sequence[0]);
put_bits(&s->pb, 1, info->use_kb_window[0]);
- if(info->window_sequence != EIGHT_SHORT_SEQUENCE){
+ if(info->window_sequence[0] != EIGHT_SHORT_SEQUENCE){
put_bits(&s->pb, 6, info->max_sfb);
put_bits(&s->pb, 1, 0); // no prediction
}else{
@@ -357,7 +354,7 @@ static int determine_section_info(AACEnc
for(; cb < 12; cb++){
score = 0;
dim = (aac_cb_info[cb].flags & CB_PAIRS) ? 2 : 4;
- if(!band || cpe->ch[channel].cb[win][band - 1] != cb)
+ if(!band || cpe->ch[channel].band_type[win][band - 1] != cb)
score += 9; //that's for new codebook entry
w = win;
if(aac_cb_info[cb].flags & CB_UNSIGNED){
@@ -366,7 +363,7 @@ static int determine_section_info(AACEnc
idx = 0;
for(j = 0; j < dim; j++)
idx = idx * aac_cb_info[cb].maxval + FFABS(cpe->ch[channel].icoefs[i+j]);
- score += bits[idx];
+ score += ff_aac_spectral_bits[aac_cb_info[cb].cb_num][idx];
for(j = 0; j < dim; j++)
if(cpe->ch[channel].icoefs[i+j])
score++;
@@ -379,7 +376,7 @@ static int determine_section_info(AACEnc
idx = 0;
for(j = 0; j < dim; j++)
idx = idx * (aac_cb_info[cb].maxval*2 + 1) + cpe->ch[channel].icoefs[i+j] + aac_cb_info[cb].maxval;
- score += bits[idx];
+ score += ff_aac_spectral_bits[aac_cb_info[cb].cb_num][idx];
}
w++;
}while(w < cpe->ch[channel].ics.num_windows && cpe->ch[channel].ics.group_len[w]);
@@ -397,8 +394,8 @@ static int determine_section_info(AACEnc
*/
static void encode_codebook(AACEncContext *s, ChannelElement *cpe, int channel, int start, int size, int cb)
{
- const uint8_t *bits = aac_cb_info[cb].bits;
- const uint16_t *codes = aac_cb_info[cb].codes;
+ const uint8_t *bits = ff_aac_spectral_bits[aac_cb_info[cb].cb_num];
+ const uint16_t *codes = ff_aac_spectral_codes[aac_cb_info[cb].cb_num];
const int dim = (aac_cb_info[cb].flags & CB_PAIRS) ? 2 : 4;
int i, j, idx;
@@ -459,7 +456,7 @@ static void encode_section_data(AVCodecC
if(cpe->ch[channel].ics.group_len[w]) continue;
count = 0;
for(i = 0; i < cpe->ch[channel].ics.max_sfb; i++){
- if(!i || cpe->ch[channel].cb[w][i] != cpe->ch[channel].cb[w][i-1]){
+ if(!i || cpe->ch[channel].band_type[w][i] != cpe->ch[channel].band_type[w][i-1]){
if(count){
while(count >= esc){
put_bits(&s->pb, bits, esc);
@@ -467,7 +464,7 @@ static void encode_section_data(AVCodecC
}
put_bits(&s->pb, bits, count);
}
- put_bits(&s->pb, 4, cpe->ch[channel].cb[w][i]);
+ put_bits(&s->pb, 4, cpe->ch[channel].band_type[w][i]);
count = 1;
}else
count++;
@@ -487,7 +484,7 @@ static void encode_section_data(AVCodecC
*/
static void encode_scale_factor_data(AVCodecContext *avctx, AACEncContext *s, ChannelElement *cpe, int channel)
{
- int off = cpe->ch[channel].gain, diff;
+ int off = cpe->ch[channel].mixing_gain, diff;
int i, w;
for(w = 0; w < cpe->ch[channel].ics.num_windows; w++){
@@ -497,7 +494,7 @@ static void encode_scale_factor_data(AVC
diff = cpe->ch[channel].sf_idx[w][i] - off + SCALE_DIFF_ZERO;
if(diff < 0 || diff > 120) av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
off = cpe->ch[channel].sf_idx[w][i];
- put_bits(&s->pb, bits[diff], code[diff]);
+ put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
}
}
}
@@ -530,7 +527,7 @@ static void encode_tns_data(AVCodecConte
put_bits(&s->pb, 1, cpe->ch[channel].tns.present);
if(!cpe->ch[channel].tns.present) return;
- if(cpe->ch[channel].ics.window_sequence == EIGHT_SHORT_SEQUENCE){
+ if(cpe->ch[channel].ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE){
for(w = 0; w < cpe->ch[channel].ics.num_windows; w++){
put_bits(&s->pb, 1, cpe->ch[channel].tns.n_filt[w]);
if(!cpe->ch[channel].tns.n_filt[w]) continue;
@@ -578,7 +575,7 @@ static void encode_spectral_data(AVCodec
}
w2 = w;
do{
- encode_codebook(s, cpe, channel, start + w2*128, cpe->ch[channel].ics.swb_sizes[i], cpe->ch[channel].cb[w][i]);
+ encode_codebook(s, cpe, channel, start + w2*128, cpe->ch[channel].ics.swb_sizes[i], cpe->ch[channel].band_type[w][i]);
w2++;
}while(w2 < cpe->ch[channel].ics.num_windows && cpe->ch[channel].ics.group_len[w2]);
start += cpe->ch[channel].ics.swb_sizes[i];
@@ -599,15 +596,15 @@ static int encode_individual_channel(AVC
if(cpe->ch[channel].ics.group_len[w]) continue;
for(g = 0; g < cpe->ch[channel].ics.max_sfb; g++){
if(!cpe->ch[channel].zeroes[w][g]){
- cpe->ch[channel].cb[w][g] = determine_section_info(s, cpe, channel, w, g, i, cpe->ch[channel].ics.swb_sizes[g]);
- cpe->ch[channel].zeroes[w][g] = !cpe->ch[channel].cb[w][g];
+ cpe->ch[channel].band_type[w][g] = determine_section_info(s, cpe, channel, w, g, i, cpe->ch[channel].ics.swb_sizes[g]);
+ cpe->ch[channel].zeroes[w][g] = !cpe->ch[channel].band_type[w][g];
}else
- cpe->ch[channel].cb[w][g] = 0;
+ cpe->ch[channel].band_type[w][g] = 0;
i += cpe->ch[channel].ics.swb_sizes[g];
}
}
- put_bits(&s->pb, 8, cpe->ch[channel].gain); //global gain
+ put_bits(&s->pb, 8, cpe->ch[channel].mixing_gain); //global gain
if(!cpe->common_window) put_ics_info(avctx, &cpe->ch[channel].ics);
encode_section_data(avctx, s, cpe, channel);
encode_scale_factor_data(avctx, s, cpe, channel);
Modified: aacenc/aacpsy.c
==============================================================================
--- aacenc/aacpsy.c (original)
+++ aacenc/aacpsy.c Mon Aug 4 13:05:48 2008
@@ -167,7 +167,7 @@ static void psy_null_window(AACPsyContex
int chans = type == ID_CPE ? 2 : 1;
for(ch = 0; ch < chans; ch++){
- cpe->ch[ch].ics.window_sequence = ONLY_LONG_SEQUENCE;
+ cpe->ch[ch].ics.window_sequence[0] = ONLY_LONG_SEQUENCE;
cpe->ch[ch].ics.use_kb_window[0] = 1;
cpe->ch[ch].ics.num_windows = 1;
cpe->ch[ch].ics.swb_sizes = apc->bands1024;
@@ -211,7 +211,7 @@ static void psy_null_process(AACPsyConte
for(g = 0; g < apc->num_bands1024; g++)
if(!cpe->ch[ch].zeroes[0][g])
minscale = FFMIN(minscale, cpe->ch[ch].sf_idx[0][g]);
- cpe->ch[ch].gain = minscale;
+ cpe->ch[ch].mixing_gain = minscale;
for(g = 0; g < apc->num_bands1024; g++)
if(!cpe->ch[ch].zeroes[0][g])
cpe->ch[ch].sf_idx[0][g] = FFMIN(minscale + SCALE_MAX_DIFF, cpe->ch[ch].sf_idx[0][g]);
@@ -225,17 +225,17 @@ static void psy_null8_window(AACPsyConte
int chans = type == ID_CPE ? 2 : 1;
for(ch = 0; ch < chans; ch++){
- int prev_seq = cpe->ch[ch].ics.window_sequence_prev;
+ int prev_seq = cpe->ch[ch].ics.window_sequence[1];
cpe->ch[ch].ics.use_kb_window[1] = cpe->ch[ch].ics.use_kb_window[0];
- cpe->ch[ch].ics.window_sequence_prev = cpe->ch[ch].ics.window_sequence;
- switch(cpe->ch[ch].ics.window_sequence){
- case ONLY_LONG_SEQUENCE: if(prev_seq == ONLY_LONG_SEQUENCE)cpe->ch[ch].ics.window_sequence = LONG_START_SEQUENCE; break;
- case LONG_START_SEQUENCE: cpe->ch[ch].ics.window_sequence = EIGHT_SHORT_SEQUENCE; break;
- case EIGHT_SHORT_SEQUENCE: if(prev_seq == EIGHT_SHORT_SEQUENCE)cpe->ch[ch].ics.window_sequence = LONG_STOP_SEQUENCE; break;
- case LONG_STOP_SEQUENCE: cpe->ch[ch].ics.window_sequence = ONLY_LONG_SEQUENCE; break;
+ cpe->ch[ch].ics.window_sequence[1] = cpe->ch[ch].ics.window_sequence[0];
+ switch(cpe->ch[ch].ics.window_sequence[0]){
+ case ONLY_LONG_SEQUENCE: if(prev_seq == ONLY_LONG_SEQUENCE)cpe->ch[ch].ics.window_sequence[0] = LONG_START_SEQUENCE; break;
+ case LONG_START_SEQUENCE: cpe->ch[ch].ics.window_sequence[0] = EIGHT_SHORT_SEQUENCE; break;
+ case EIGHT_SHORT_SEQUENCE: if(prev_seq == EIGHT_SHORT_SEQUENCE)cpe->ch[ch].ics.window_sequence[0] = LONG_STOP_SEQUENCE; break;
+ case LONG_STOP_SEQUENCE: cpe->ch[ch].ics.window_sequence[0] = ONLY_LONG_SEQUENCE; break;
}
- if(cpe->ch[ch].ics.window_sequence != EIGHT_SHORT_SEQUENCE){
+ if(cpe->ch[ch].ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE){
cpe->ch[ch].ics.use_kb_window[0] = 1;
cpe->ch[ch].ics.num_windows = 1;
cpe->ch[ch].ics.swb_sizes = apc->bands1024;
@@ -273,7 +273,7 @@ static void psy_null8_process(AACPsyCont
}
}
for(ch = 0; ch < chans; ch++){
- cpe->ch[ch].gain = SCALE_ONE_POS;
+ cpe->ch[ch].mixing_gain = SCALE_ONE_POS;
for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){
cpe->ch[ch].sf_idx[w][g] = SCALE_ONE_POS;
@@ -468,7 +468,7 @@ static void psy_3gpp_window(AACPsyContex
if(la && !(apc->flags & PSY_MODEL_NO_SWITCH)){
float s[8], v;
for(ch = 0; ch < chans; ch++){
- enum WindowSequence last_window_sequence = cpe->ch[ch].ics.window_sequence;
+ enum WindowSequence last_window_sequence = cpe->ch[ch].ics.window_sequence[0];
int switch_to_eight = 0;
float sum = 0.0, sum2 = 0.0;
int attack_n = 0;
@@ -511,13 +511,13 @@ static void psy_3gpp_window(AACPsyContex
}
}else{
for(ch = 0; ch < chans; ch++){
- win[ch] = (cpe->ch[ch].ics.window_sequence == EIGHT_SHORT_SEQUENCE) ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
- grouping[ch] = (cpe->ch[ch].ics.window_sequence == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
+ win[ch] = (cpe->ch[ch].ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
+ grouping[ch] = (cpe->ch[ch].ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
}
}
for(ch = 0; ch < chans; ch++){
- cpe->ch[ch].ics.window_sequence = win[ch];
+ cpe->ch[ch].ics.window_sequence[0] = win[ch];
cpe->ch[ch].ics.use_kb_window[0] = 1;
if(win[ch] != EIGHT_SHORT_SEQUENCE){
cpe->ch[ch].ics.num_windows = 1;
@@ -531,8 +531,8 @@ static void psy_3gpp_window(AACPsyContex
for(i = 0; i < 8; i++)
cpe->ch[ch].ics.group_len[i] = (grouping[ch] >> i) & 1;
}
- cpe->common_window = chans > 1 && cpe->ch[0].ics.window_sequence == cpe->ch[1].ics.window_sequence && cpe->ch[0].ics.use_kb_window[0] == cpe->ch[1].ics.use_kb_window[0];
- if(cpe->common_window && cpe->ch[0].ics.window_sequence == EIGHT_SHORT_SEQUENCE && grouping[0] != grouping[1])
+ cpe->common_window = chans > 1 && cpe->ch[0].ics.window_sequence[0] == cpe->ch[1].ics.window_sequence[0] && cpe->ch[0].ics.use_kb_window[0] == cpe->ch[1].ics.use_kb_window[0];
+ if(cpe->common_window && cpe->ch[0].ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE && grouping[0] != grouping[1])
cpe->common_window = 0;
if(PSY_MODEL_MODE(apc->flags) > PSY_MODE_QUALITY){
av_log(apc->avctx, AV_LOG_ERROR, "Unknown mode %d, defaulting to CBR\n", PSY_MODEL_MODE(apc->flags));
@@ -602,7 +602,7 @@ static void psy_3gpp_process(AACPsyConte
memset(pch->band, 0, sizeof(pch->band));
for(ch = 0; ch < chans; ch++){
start = 0;
- cpe->ch[ch].gain = 0;
+ cpe->ch[ch].mixing_gain = 0;
for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){
g2 = w*16 + g;
@@ -741,7 +741,7 @@ static void psy_3gpp_process(AACPsyConte
//determine scalefactors - 5.6.2
for(ch = 0; ch < chans; ch++){
prev_scale = -1;
- cpe->ch[ch].gain = 0;
+ cpe->ch[ch].mixing_gain = 0;
for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){
g2 = w*16 + g;
@@ -799,7 +799,7 @@ static void psy_3gpp_process(AACPsyConte
for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){
if(cpe->ch[ch].zeroes[w][g]) continue;
cpe->ch[ch].sf_idx[w][g] = av_clip(SCALE_ONE_POS + cpe->ch[ch].sf_idx[w][g], 0, SCALE_MAX_POS);
- if(!cpe->ch[ch].gain) cpe->ch[ch].gain = cpe->ch[ch].sf_idx[w][g];
+ if(!cpe->ch[ch].mixing_gain) cpe->ch[ch].mixing_gain = cpe->ch[ch].sf_idx[w][g];
}
//adjust scalefactors for window groups
Modified: aacenc/checkout.sh
==============================================================================
--- aacenc/checkout.sh (original)
+++ aacenc/checkout.sh Mon Aug 4 13:05:48 2008
@@ -16,5 +16,6 @@ cp ../../aacpsy.h .
cd ../..
svn co svn://svn.mplayerhq.hu/soc/aac
cp aac/aactab.h ffmpeg/libavcodec/.
+cp aac/aactab.c ffmpeg/libavcodec/
echo "Done! Now enter the ffmpeg dir, configure and make FFmpeg and enjoy the AAC encoder! :)"
More information about the FFmpeg-soc
mailing list