[FFmpeg-soc] [soc]: r2426 - aacenc/aacenc.c
kostya
subversion at mplayerhq.hu
Thu Jun 12 18:54:57 CEST 2008
Author: kostya
Date: Thu Jun 12 18:54:57 2008
New Revision: 2426
Log:
Reuse channel (pair) structures from AAC decoder.
This will allow multichannel encoding in distant future too.
Modified:
aacenc/aacenc.c
Modified: aacenc/aacenc.c
==============================================================================
--- aacenc/aacenc.c (original)
+++ aacenc/aacenc.c Thu Jun 12 18:54:57 2008
@@ -131,26 +131,76 @@ static const struct {
{ -1, NULL , NULL , 0 }, // intensity in-phase
};
+// data structures borrowed from aac.c with some minor modifications
+
+/**
+ * Individual Channel Stream
+ */
+typedef struct {
+ int intensity_present;
+ int max_sfb;
+ int window_sequence;
+ int window_shape; ///< If set, use Kaiser-Bessel window, otherwise use a sinus window
+ int window_shape_prev;
+ int num_window_groups;
+ uint8_t grouping;
+ uint8_t group_len[8];
+ const uint8_t *swb_sizes;
+ int num_swb;
+ int num_windows;
+ int tns_max_bands;
+} ics_struct;
+
+/**
+ * M/S joint channel coding
+ */
+typedef struct {
+ int present;
+ uint8_t mask[8][64];
+} ms_struct;
+
+/**
+ * Single Channel Element
+ * Used for both SCE and LFE elements
+ */
+typedef struct {
+ int gain; /**< Channel gain (not used by AAC bitstream).
+ * Note that this is applied before joint stereo decoding.
+ * Thus, when used inside CPE elements, both channels must have equal gain.
+ */
+ ics_struct ics;
+ int zeroes[64];
+ int sf_idx[64];
+ int cb[8][64]; ///< Codebooks
+ float sf[8][64]; ///< Scalefactors
+ DECLARE_ALIGNED_16(float, coeffs[1024]); ///< Coefficients for IMDCT
+ DECLARE_ALIGNED_16(float, saved[1024]); ///< Overlap
+ DECLARE_ALIGNED_16(float, ret[1024]); ///< PCM output
+ DECLARE_ALIGNED_16(int, icoefs[1024]); ///< integer coefficients for coding
+} sce_struct;
+
+/**
+ * Channel Pair Element
+ */
+typedef struct {
+ int common_window; ///< Set if channels share a common 'ics_struct' in bitstream
+ ms_struct ms;
+ sce_struct ch[2];
+} cpe_struct;
+
+// borrowing temporarily ends here
+
typedef struct {
PutBitContext pb;
MDCTContext mdct;
DECLARE_ALIGNED_16(float, kbd_long_1024[1024]);
DECLARE_ALIGNED_16(FFTSample, output[2048]);
- DECLARE_ALIGNED_16(FFTSample, frame_out[2][2048]);
- DECLARE_ALIGNED_16(FFTSample, coefs[2][1024]);
DECLARE_ALIGNED_16(FFTSample, tmp[1024]);
- DECLARE_ALIGNED_16(int, icoefs[2][1024]);
int samplerate_index;
- int common_window;
uint8_t *swb_sizes;
int swb_num;
- int coded_swb_num;
- int codebooks[MAX_SWB_SIZE];
- int scales[2][MAX_SWB_SIZE];
- uint8_t zeroes[2][MAX_SWB_SIZE];
-
- int global_gain;
+ cpe_struct cpe;
} AACEncContext;
#define SCALE_ONE_POS 140
@@ -212,93 +262,96 @@ static int aac_encode_init(AVCodecContex
return 0;
}
-static void determine_scales(AVCodecContext *avctx, int channel)
+static void determine_scales(AVCodecContext *avctx, cpe_struct *cpe, int channel)
{
AACEncContext *s = avctx->priv_data;
int i = 0, j, g, count = 0, maxswb;
double me, d;
+ cpe->ch[channel].ics.swb_sizes = s->swb_sizes;
+ cpe->ch[channel].ics.num_swb = s->swb_num;
for(g = 0; g < s->swb_num; g++){
me = 0.0;
d = 0.0;
for(j = 0; j < s->swb_sizes[g]; j++)
- if(s->coefs[channel][i + j] != 0.0){
- me += fabs(s->coefs[channel][i + j]);
+ if(cpe->ch[channel].coeffs[i + j] != 0.0){
+ me += fabs(cpe->ch[channel].coeffs[i + j]);
count++;
}
if(count)
me /= count;
- for(j = 0; j < s->swb_sizes[g]; j++)
- if(s->coefs[channel][i + j] != 0.0)
- d += (s->coefs[channel][i + j] - me) * (s->coefs[channel][i + j] - me);
+ for(j = 0; j < cpe->ch[channel].ics.swb_sizes[g]; j++)
+ if(cpe->ch[channel].coeffs[i + j] != 0.0)
+ d += (cpe->ch[channel].coeffs[i + j] - me) * (cpe->ch[channel].coeffs[i + j] - me);
if(count)
d /= count;
- s->zeroes[channel][g] = (me < 0.1 && d < 0.1);
- s->scales[channel][g] = SCALE_ONE_POS + g;
- i += s->swb_sizes[g];
+ cpe->ch[channel].zeroes[g] = (me < 0.1 && d < 0.1);
+ cpe->ch[channel].sf_idx[g] = SCALE_ONE_POS + g;
+ i += cpe->ch[channel].ics.swb_sizes[g];
}
- // if tail is zero, do not code it
- for(maxswb = s->swb_num; maxswb > 0 && s->zeroes[channel][maxswb - 1]; maxswb--);
- s->coded_swb_num = channel ? FFMAX(s->coded_swb_num, maxswb) : maxswb;
- s->global_gain = SCALE_ONE_POS;
+ cpe->ch[channel].gain = SCALE_ONE_POS;
+ for(maxswb = s->swb_num; maxswb > 0 && cpe->ch[channel].zeroes[maxswb-1]; maxswb--);
+ cpe->ch[channel].ics.max_sfb = maxswb;
+ cpe->ch[channel].ics.window_sequence = 0;
+ cpe->ch[channel].ics.window_shape = 1;
}
/* BIG FAT TODO! */
/* for now it just converts spectra to integer form */
-static void apply_psychoacoustics(AVCodecContext *avctx, int channel)
+static void apply_psychoacoustics(AVCodecContext *avctx, cpe_struct *cpe, int channel)
{
AACEncContext *s = avctx->priv_data;
int i = 0, j, g;
- for(g = 0; g < s->coded_swb_num; g++)
- if(s->zeroes[channel][g]){
- memset(s->icoefs[channel] + i, 0, s->swb_sizes[g] * sizeof(s->icoefs[0][0]));
- i += s->swb_sizes[g];
+ for(g = 0; g < cpe->ch[channel].ics.max_sfb; g++)
+ if(cpe->ch[channel].zeroes[g]){
+ memset(cpe->ch[channel].icoefs + i, 0, cpe->ch[channel].ics.swb_sizes[g] * sizeof(cpe->ch[0].icoefs[0]));
+ i += cpe->ch[channel].ics.swb_sizes[g];
}else
- for(j = 0; j < s->swb_sizes[g]; j++, i++)
- s->icoefs[channel][i] = (int)(roundf(s->coefs[channel][i] / pow2sf_tab[s->scales[channel][g]+60]));
- memset(s->icoefs[channel] + i, 0, (1024 - i) * sizeof(s->icoefs[0][0]));
+ for(j = 0; j < cpe->ch[channel].ics.swb_sizes[g]; j++, i++)
+ cpe->ch[channel].icoefs[i] = (int)(roundf(cpe->ch[channel].coeffs[i] / pow2sf_tab[cpe->ch[channel].sf_idx[g]+60]));
+ memset(cpe->ch[channel].icoefs + i, 0, (1024 - i) * sizeof(cpe->ch[channel].icoefs[0]));
}
-static void analyze(AVCodecContext *avctx, AACEncContext *s, short *audio, int channel)
+static void analyze(AVCodecContext *avctx, AACEncContext *s, cpe_struct *cpe, short *audio, int channel)
{
int i, j;
// perform MDCT
- memcpy(s->output, s->frame_out[channel], sizeof(float)*1024);
+ memcpy(s->output, cpe->ch[channel].saved, sizeof(float)*1024);
j = channel;
for (i = 0; i < 1024; i++, j += avctx->channels){
- s->output[i+1024] = audio[j] / 512 * s->kbd_long_1024[1024 - i - 1];
- s->frame_out[channel][i] = audio[j] / 512 * s->kbd_long_1024[i];
+ s->output[i+1024] = audio[j] / 512 * s->kbd_long_1024[1024 - i - 1];
+ cpe->ch[channel].saved[i] = audio[j] / 512 * s->kbd_long_1024[i];
}
- ff_mdct_calc(&s->mdct, s->coefs[channel], s->output, s->tmp);
+ ff_mdct_calc(&s->mdct, cpe->ch[channel].coeffs, s->output, s->tmp);
//convert coefficients into form used by AAC
for(i = 0; i < 1024; i++)
- s->coefs[channel][i] = -copysignf(pow(fabsf(s->coefs[channel][i]), 0.75f), s->coefs[channel][i]);
+ cpe->ch[channel].coeffs[i] = -copysignf(pow(fabsf(cpe->ch[channel].coeffs[i]), 0.75f), cpe->ch[channel].coeffs[i]);
- determine_scales(avctx, channel);
- apply_psychoacoustics(avctx, channel);
+ determine_scales(avctx, cpe, channel);
+ apply_psychoacoustics(avctx, cpe, channel);
}
/**
* Encode ics_info element.
* @see Table 4.6
*/
-static void put_ics_info(AVCodecContext *avctx)
+static void put_ics_info(AVCodecContext *avctx, ics_struct *info)
{
AACEncContext *s = avctx->priv_data;
put_bits(&s->pb, 1, 0); // ics_reserved bit
- put_bits(&s->pb, 2, 0); // only_long_window_sequence
- put_bits(&s->pb, 1, 1); // window shape - Kaiser-Bessel derived
- put_bits(&s->pb, 6, s->coded_swb_num); // max scalefactor bands
+ put_bits(&s->pb, 2, info->window_sequence);
+ put_bits(&s->pb, 1, info->window_shape);
+ put_bits(&s->pb, 6, info->max_sfb);
put_bits(&s->pb, 1, 0); // no prediction
}
/**
* Scan spectral band and determine optimal codebook for it.
*/
-static int determine_section_info(AACEncContext *s, int channel, int start, int size)
+static int determine_section_info(AACEncContext *s, cpe_struct *cpe, int channel, int start, int size)
{
int i;
int maxval, sign;
@@ -306,8 +359,8 @@ static int determine_section_info(AACEnc
maxval = 0;
sign = 0;
for(i = start; i < start + size; i++){
- maxval = FFMAX(maxval, FFABS(s->icoefs[channel][i]));
- if(s->icoefs[channel][i] < 0) sign = 1;
+ maxval = FFMAX(maxval, FFABS(cpe->ch[channel].icoefs[i]));
+ if(cpe->ch[channel].icoefs[i] < 0) sign = 1;
}
///TODO: better decision
@@ -316,7 +369,7 @@ static int determine_section_info(AACEnc
return 11; //escape codebook
}
-static void encode_codebook(AACEncContext *s, int channel, int start, int size, int cb)
+static void encode_codebook(AACEncContext *s, cpe_struct *cpe, int channel, int start, int size, int cb)
{
const uint8_t *bits = aac_cb_info[cb].bits;
const uint16_t *codes = aac_cb_info[cb].codes;
@@ -330,50 +383,50 @@ static void encode_codebook(AACEncContex
for(i = start; i < start + size; i += dim){
idx = 0;
for(j = 0; j < dim; j++)
- idx = idx*17 + FFMIN(FFABS(s->icoefs[channel][i+j]), 16);
+ idx = idx*17 + FFMIN(FFABS(cpe->ch[channel].icoefs[i+j]), 16);
put_bits(&s->pb, bits[idx], codes[idx]);
//output signs
for(j = 0; j < dim; j++)
- if(s->icoefs[channel][i+j])
- put_bits(&s->pb, 1, s->icoefs[channel][i+j] < 0);
+ if(cpe->ch[channel].icoefs[i+j])
+ put_bits(&s->pb, 1, cpe->ch[channel].icoefs[i+j] < 0);
//output escape values
for(j = 0; j < dim; j++)
- if(FFABS(s->icoefs[channel][i+j]) > 15){
- int l = av_log2(FFABS(s->icoefs[channel][i+j]));
+ if(FFABS(cpe->ch[channel].icoefs[i+j]) > 15){
+ int l = av_log2(FFABS(cpe->ch[channel].icoefs[i+j]));
put_bits(&s->pb, l - 4 + 1, (1 << (l - 4 + 1)) - 2);
- put_bits(&s->pb, l, FFABS(s->icoefs[channel][i+j]) & ((1 << l) - 1));
+ put_bits(&s->pb, l, FFABS(cpe->ch[channel].icoefs[i+j]) & ((1 << l) - 1));
}
}
}else if(aac_cb_info[cb].flags & CB_UNSIGNED){
for(i = start; i < start + size; i += dim){
idx = 0;
for(j = 0; j < dim; j++)
- idx = idx * aac_cb_info[cb].maxval + FFABS(s->icoefs[channel][i+j]);
+ idx = idx * aac_cb_info[cb].maxval + FFABS(cpe->ch[channel].icoefs[i+j]);
put_bits(&s->pb, bits[idx], codes[idx]);
//output signs
for(j = 0; j < dim; j++)
- if(s->icoefs[channel][i+j])
- put_bits(&s->pb, 1, s->icoefs[channel][i+j] < 0);
+ if(cpe->ch[channel].icoefs[i+j])
+ put_bits(&s->pb, 1, cpe->ch[channel].icoefs[i+j] < 0);
}
}else{
for(i = start; i < start + size; i += dim){
idx = 0;
for(j = 0; j < dim; j++)
- idx = idx * (aac_cb_info[cb].maxval*2 + 1) + s->icoefs[channel][i+j] + aac_cb_info[cb].maxval;
+ idx = idx * (aac_cb_info[cb].maxval*2 + 1) + cpe->ch[channel].icoefs[i+j] + aac_cb_info[cb].maxval;
put_bits(&s->pb, bits[idx], codes[idx]);
}
}
}
-static void encode_section_data(AVCodecContext *avctx, AACEncContext *s, int channel)
+static void encode_section_data(AVCodecContext *avctx, AACEncContext *s, cpe_struct *cpe, int channel)
{
int i;
int bits = 5; //for long window
int count = 0;
- for(i = 0; i < s->coded_swb_num; i++){
- if(!i || s->codebooks[i] != s->codebooks[i-1]){
+ for(i = 0; i < cpe->ch[channel].ics.max_sfb; i++){
+ if(!i || cpe->ch[channel].cb[0][i] != cpe->ch[channel].cb[0][i-1]){
if(count){
while(count >= (1 << bits) - 1){
put_bits(&s->pb, bits, (1 << bits) - 1);
@@ -381,7 +434,7 @@ static void encode_section_data(AVCodecC
}
put_bits(&s->pb, bits, count);
}
- put_bits(&s->pb, 4, s->codebooks[i]);
+ put_bits(&s->pb, 4, cpe->ch[channel].cb[0][i]);
count = 1;
}else
count++;
@@ -395,27 +448,27 @@ static void encode_section_data(AVCodecC
}
}
-static void encode_scale_factor_data(AVCodecContext *avctx, AACEncContext *s, int channel)
+static void encode_scale_factor_data(AVCodecContext *avctx, AACEncContext *s, cpe_struct *cpe, int channel)
{
- int off = s->global_gain, diff;
+ int off = cpe->ch[channel].gain, diff;
int i;
- for(i = 0; i < s->coded_swb_num; i++){
- if(!s->zeroes[channel][i]){
- diff = s->scales[channel][i] - off + SCALE_DIFF_ZERO;
- off = s->scales[channel][i];
+ for(i = 0; i < cpe->ch[channel].ics.max_sfb; i++){
+ if(!cpe->ch[channel].zeroes[i]){
+ diff = cpe->ch[channel].sf_idx[i] - off + SCALE_DIFF_ZERO;
+ off = cpe->ch[channel].sf_idx[i];
put_bits(&s->pb, bits[diff], code[diff]);
}
}
}
-static void encode_spectral_data(AVCodecContext *avctx, AACEncContext *s, int channel)
+static void encode_spectral_data(AVCodecContext *avctx, AACEncContext *s, cpe_struct *cpe, int channel)
{
int start = 0, i;
- for(i = 0; i < s->coded_swb_num; i++){
- if(!s->zeroes[channel][i])
- encode_codebook(s, channel, start, s->swb_sizes[i], s->codebooks[i]);
+ for(i = 0; i < cpe->ch[channel].ics.max_sfb; i++){
+ if(!cpe->ch[channel].zeroes[i])
+ encode_codebook(s, cpe, channel, start, s->swb_sizes[i], cpe->ch[channel].cb[0][i]);
start += s->swb_sizes[i];
}
}
@@ -423,30 +476,30 @@ static void encode_spectral_data(AVCodec
/**
* Encode one channel of audio data.
*/
-static int encode_individual_channel(AVCodecContext *avctx, int channel, int common_window)
+static int encode_individual_channel(AVCodecContext *avctx, cpe_struct *cpe, int channel, int common_window)
{
AACEncContext *s = avctx->priv_data;
int i, j, g = 0;
i = 0;
while(i < 1024){
- if(!s->zeroes[channel][g]){
- s->codebooks[g] = determine_section_info(s, channel, i, s->swb_sizes[g]);
- s->zeroes[channel][g] = !s->codebooks[g];
+ if(!cpe->ch[channel].zeroes[g]){
+ cpe->ch[channel].cb[0][g] = determine_section_info(s, cpe, channel, i, s->swb_sizes[g]);
+ cpe->ch[channel].zeroes[g] = !cpe->ch[channel].cb[0][g];
}else
- s->codebooks[g] = 0;
+ cpe->ch[channel].cb[0][g] = 0;
i += s->swb_sizes[g];
g++;
}
- put_bits(&s->pb, 8, s->global_gain); //global gain
- if(!common_window) put_ics_info(avctx);
- encode_section_data(avctx, s, channel);
- encode_scale_factor_data(avctx, s, channel);
+ put_bits(&s->pb, 8, cpe->ch[channel].gain); //global gain
+ if(!common_window) put_ics_info(avctx, &cpe->ch[channel].ics);
+ encode_section_data(avctx, s, cpe, channel);
+ encode_scale_factor_data(avctx, s, cpe,channel);
put_bits(&s->pb, 1, 0); //pulse
put_bits(&s->pb, 1, 0); //tns
put_bits(&s->pb, 1, 0); //ssr
- encode_spectral_data(avctx, s, channel);
+ encode_spectral_data(avctx, s, cpe, channel);
return 0;
}
@@ -456,9 +509,9 @@ static int aac_encode_frame(AVCodecConte
AACEncContext *s = avctx->priv_data;
int16_t *samples = data;
- analyze(avctx, s, samples, 0);
+ analyze(avctx, s, &s->cpe, samples, 0);
if(avctx->channels > 1)
- analyze(avctx, s, samples, 1);
+ analyze(avctx, s, &s->cpe, samples, 1);
init_put_bits(&s->pb, frame, buf_size*8);
//output encoded
@@ -466,19 +519,19 @@ static int aac_encode_frame(AVCodecConte
case 1:
put_bits(&s->pb, 3, ID_SCE);
put_bits(&s->pb, 4, 0); //tag
- encode_individual_channel(avctx, 0, 0);
+ encode_individual_channel(avctx, &s->cpe, 0, 0);
break;
case 2:
put_bits(&s->pb, 3, ID_CPE);
put_bits(&s->pb, 4, 0); //tag
- s->common_window = 1;
- put_bits(&s->pb, 1, s->common_window);
- if(s->common_window){
- put_ics_info(avctx);
+ s->cpe.common_window = 1;
+ put_bits(&s->pb, 1, s->cpe.common_window);
+ if(s->cpe.common_window){
+ put_ics_info(avctx, &s->cpe.ch[0].ics);
put_bits(&s->pb, 2, 0); //no MS mode for now
}
- encode_individual_channel(avctx, 0, s->common_window);
- encode_individual_channel(avctx, 1, s->common_window);
+ encode_individual_channel(avctx, &s->cpe, 0, s->cpe.common_window);
+ encode_individual_channel(avctx, &s->cpe, 1, s->cpe.common_window);
break;
default:
av_log(NULL,0,"?");
More information about the FFmpeg-soc
mailing list