[FFmpeg-soc] [soc]: r2887 - in aacenc: aacenc.c aacpsy.c aacpsy.h
kostya
subversion at mplayerhq.hu
Mon Jul 28 15:22:53 CEST 2008
Author: kostya
Date: Mon Jul 28 15:22:53 2008
New Revision: 2887
Log:
Rudimentary multichannel support
Modified:
aacenc/aacenc.c
aacenc/aacpsy.c
aacenc/aacpsy.h
Modified: aacenc/aacenc.c
==============================================================================
--- aacenc/aacenc.c (original)
+++ aacenc/aacenc.c Mon Jul 28 15:22:53 2008
@@ -181,7 +181,9 @@ typedef struct {
int swb_num1024;
const uint8_t *swb_sizes128;
int swb_num128;
- ChannelElement cpe;
+
+ ProgramConfig pc;
+ ChannelElement *cpe;
AACPsyContext psy;
} AACEncContext;
@@ -239,6 +241,7 @@ static av_cold int aac_encode_init(AVCod
ff_sine_window_init(sine_long_1024, 1024);
ff_sine_window_init(sine_short_128, 128);
+ s->cpe = av_mallocz(sizeof(ChannelElement) * ((avctx->channels + 1) >> 1));
//TODO: psy model selection with some option
ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP, 0, s->swb_sizes1024, s->swb_num1024, s->swb_sizes128, s->swb_num128);
avctx->extradata = av_malloc(2);
@@ -300,6 +303,45 @@ static void analyze(AVCodecContext *avct
}
/**
+ * Encode channel layout (aka program config element).
+ * @see table 4.2
+ */
+static void put_program_config_element(AVCodecContext *avctx, AACEncContext *s)
+{
+ int i;
+ ProgramConfig *pc = &s->pc;
+
+ put_bits(&s->pb, 2, 0); //object type - ?
+ put_bits(&s->pb, 4, s->samplerate_index); //sample rate index
+
+ put_bits(&s->pb, 4, avctx->channels/2); // all channels are front :)
+ put_bits(&s->pb, 4, 0); // no side channels
+ put_bits(&s->pb, 4, 0); // no back channels
+ put_bits(&s->pb, 2, 0); // no LFE
+ put_bits(&s->pb, 3, 0); // no associated data
+ put_bits(&s->pb, 4, 0); // no valid channel couplings
+
+ put_bits(&s->pb, 1, pc->mono_mixdown);
+ if(pc->mono_mixdown)
+ put_bits(&s->pb, 4, pc->mixdown_coeff_index);
+ put_bits(&s->pb, 1, pc->stereo_mixdown);
+ if(pc->stereo_mixdown)
+ put_bits(&s->pb, 4, pc->mixdown_coeff_index);
+ put_bits(&s->pb, 1, pc->matrix_mixdown);
+ if(pc->matrix_mixdown){
+ put_bits(&s->pb, 2, pc->mixdown_coeff_index);
+ put_bits(&s->pb, 1, pc->pseudo_surround);
+ }
+ //TODO: proper channel map output
+ for(i = 0; i < avctx->channels; i += 2){
+ put_bits(&s->pb, 1, 1); // channel is CPE
+ put_bits(&s->pb, 4, i/2);
+ }
+ align_put_bits(&s->pb);
+ put_bits(&s->pb, 8, 0); // no commentary bytes
+}
+
+/**
* Encode ics_info element.
* @see Table 4.6
*/
@@ -654,45 +696,47 @@ static int aac_encode_frame(AVCodecConte
uint8_t *frame, int buf_size, void *data)
{
AACEncContext *s = avctx->priv_data;
- int16_t *samples = s->samples;
+ int16_t *samples = s->samples, *samples2;
+ ChannelElement *cpe;
+ int i, j, chans;
if(!samples){
s->samples = av_malloc(1024 * avctx->channels * sizeof(s->samples[0]));
memcpy(s->samples, data, 1024 * avctx->channels * sizeof(s->samples[0]));
return 0;
}
- ff_aac_psy_suggest_window(&s->psy, samples, data, 0, &s->cpe);
-
- analyze(avctx, s, &s->cpe, samples, 0);
- if(avctx->channels > 1)
- analyze(avctx, s, &s->cpe, samples, 1);
-
- ff_aac_psy_analyze(&s->psy, 0, &s->cpe);
init_put_bits(&s->pb, frame, buf_size*8);
if(avctx->frame_number==1 && !(avctx->flags & CODEC_FLAG_BITEXACT)){
put_bitstream_info(avctx, s, LIBAVCODEC_IDENT);
}
- switch(avctx->channels){
- case 1:
- put_bits(&s->pb, 3, ID_SCE);
- put_bits(&s->pb, 4, 0); //tag
- encode_individual_channel(avctx, &s->cpe, 0);
- break;
- case 2:
- put_bits(&s->pb, 3, ID_CPE);
- put_bits(&s->pb, 4, 0); //tag
- put_bits(&s->pb, 1, s->cpe.common_window);
- if(s->cpe.common_window){
- put_ics_info(avctx, &s->cpe.ch[0].ics);
- encode_ms_info(&s->pb, &s->cpe);
+ //encode channels as channel pairs and one optional single channel element
+ /*if(avctx->channels > 2){
+ put_bits(&s->pb, 3, ID_PCE);
+ put_bits(&s->pb, 4, 0);
+ put_program_config_element(avctx, s);
+ }*/
+ for(i = 0; i < avctx->channels; i += 2){
+ chans = FFMIN(avctx->channels - i, 2);
+ cpe = &s->cpe[i/2];
+ samples2 = samples + i;
+ ff_aac_psy_suggest_window(&s->psy, samples2, data, i, cpe);
+ for(j = 0; j < chans; j++){
+ analyze(avctx, s, cpe, samples2, j);
+ }
+ ff_aac_psy_analyze(&s->psy, i, cpe);
+ put_bits(&s->pb, 3, chans > 1 ? ID_CPE : ID_SCE);
+ put_bits(&s->pb, 4, i >> 1);
+ if(chans == 2){
+ put_bits(&s->pb, 1, cpe->common_window);
+ if(cpe->common_window){
+ put_ics_info(avctx, &cpe->ch[0].ics);
+ encode_ms_info(&s->pb, cpe);
+ }
+ }
+ for(j = 0; j < chans; j++){
+ encode_individual_channel(avctx, cpe, j);
}
- encode_individual_channel(avctx, &s->cpe, 0);
- encode_individual_channel(avctx, &s->cpe, 1);
- break;
- default:
- av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels);
- return -1;
}
put_bits(&s->pb, 3, ID_END);
@@ -713,6 +757,7 @@ static av_cold int aac_encode_end(AVCode
ff_mdct_end(&s->mdct128);
ff_aac_psy_end(&s->psy);
av_freep(&s->samples);
+ av_freep(&s->cpe);
return 0;
}
Modified: aacenc/aacpsy.c
==============================================================================
--- aacenc/aacpsy.c (original)
+++ aacenc/aacpsy.c Mon Jul 28 15:22:53 2008
@@ -76,13 +76,14 @@ static inline float calc_distortion(floa
/**
* Produce integer coefficients from scalefactors provided by model.
*/
-static void psy_create_output(AACPsyContext *apc, ChannelElement *cpe, int search_pulses)
+static void psy_create_output(AACPsyContext *apc, ChannelElement *cpe, int channel, int search_pulses)
{
int i, w, w2, g, ch;
int start, sum, maxsfb, cmaxsfb;
int pulses, poff[4], pamp[4];
+ int chans = FFMIN(apc->avctx->channels - channel, 2);
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
start = 0;
maxsfb = 0;
cpe->ch[ch].pulse.present = 0;
@@ -152,7 +153,7 @@ static void psy_create_output(AACPsyCont
}
}
- if(apc->avctx->channels > 1 && cpe->common_window){
+ if(apc->avctx->channels - channel > 1 && cpe->common_window){
int msc = 0;
cpe->ch[0].ics.max_sfb = FFMAX(cpe->ch[0].ics.max_sfb, cpe->ch[1].ics.max_sfb);
cpe->ch[1].ics.max_sfb = cpe->ch[0].ics.max_sfb;
@@ -167,8 +168,9 @@ static void psy_create_output(AACPsyCont
static void psy_null_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe)
{
int ch;
+ int chans = FFMIN(apc->avctx->channels - channel, 2);
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
cpe->ch[ch].ics.window_sequence = ONLY_LONG_SEQUENCE;
cpe->ch[ch].ics.use_kb_window[0] = 1;
cpe->ch[ch].ics.num_windows = 1;
@@ -184,8 +186,9 @@ static void psy_null_process(AACPsyConte
int start;
int ch, g, i;
int minscale;
+ int chans = FFMIN(apc->avctx->channels - channel, 2);
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
start = 0;
for(g = 0; g < apc->num_bands1024; g++){
float energy = 0.0f, ffac = 0.0f, thr, dist;
@@ -207,7 +210,7 @@ static void psy_null_process(AACPsyConte
}
}
}
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
minscale = 255;
for(g = 0; g < apc->num_bands1024; g++)
if(!cpe->ch[ch].zeroes[0][g])
@@ -217,14 +220,15 @@ static void psy_null_process(AACPsyConte
if(!cpe->ch[ch].zeroes[0][g])
cpe->ch[ch].sf_idx[0][g] = FFMIN(minscale + SCALE_MAX_DIFF, cpe->ch[ch].sf_idx[0][g]);
}
- psy_create_output(apc, cpe, 1);
+ psy_create_output(apc, cpe, channel, 1);
}
static void psy_null8_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe)
{
int ch, i;
+ int chans = FFMIN(apc->avctx->channels - channel, 2);
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
int prev_seq = cpe->ch[ch].ics.window_sequence_prev;
cpe->ch[ch].ics.use_kb_window[1] = cpe->ch[ch].ics.use_kb_window[0];
cpe->ch[ch].ics.window_sequence_prev = cpe->ch[ch].ics.window_sequence;
@@ -257,9 +261,10 @@ static void psy_null8_process(AACPsyCont
{
int start;
int w, ch, g, i;
+ int chans = FFMIN(apc->avctx->channels - channel, 2);
//detect M/S
- if(apc->avctx->channels > 1 && cpe->common_window){
+ if(chans > 1 && cpe->common_window){
start = 0;
for(w = 0; w < cpe->ch[0].ics.num_windows; w++){
for(g = 0; g < cpe->ch[0].ics.num_swb; g++){
@@ -271,7 +276,7 @@ static void psy_null8_process(AACPsyCont
}
}
}
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
cpe->ch[ch].gain = SCALE_ONE_POS;
for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){
@@ -280,7 +285,7 @@ static void psy_null8_process(AACPsyCont
}
}
}
- psy_create_output(apc, cpe, 0);
+ psy_create_output(apc, cpe, channel, 0);
}
/**
@@ -422,9 +427,10 @@ static av_cold int psy_3gpp_init(AACPsyC
static void psy_3gpp_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe)
{
int ch;
+ int chans = FFMIN(apc->avctx->channels - channel, 2);
//XXX: stub, because encoder does not support long to short window transition yet :(
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
cpe->ch[ch].ics.window_sequence = ONLY_LONG_SEQUENCE;
cpe->ch[ch].ics.use_kb_window[0] = 1;
cpe->ch[ch].ics.num_windows = 1;
@@ -479,9 +485,10 @@ static void psy_3gpp_process(AACPsyConte
Psy3gppContext *pctx = (Psy3gppContext*) apc->model_priv_data;
float stereo_att, pe_target;
int bits_avail;
+ const int chans = FFMIN(apc->avctx->channels - channel, 2);
//calculate and apply stereo attenuation factor - 5.2
- if(apc->avctx->channels > 1){
+ if(apc->avctx->channels - channel > 1){
float l, r;
stereo_att = 1.0 / 2.0; //XXX: find some way to determine it
for(i = 0; i < 1024; i++){
@@ -494,7 +501,7 @@ static void psy_3gpp_process(AACPsyConte
//calculate energies, initial thresholds and related values - 5.4.2
memset(pctx->band, 0, sizeof(pctx->band));
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
start = 0;
cpe->ch[ch].gain = 0;
for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
@@ -517,7 +524,7 @@ static void psy_3gpp_process(AACPsyConte
}
//modify thresholds - spread, threshold in quiet - 5.4.3
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
for(g = 1; g < cpe->ch[ch].ics.num_swb; g++){
g2 = w*16 + g;
@@ -540,7 +547,7 @@ static void psy_3gpp_process(AACPsyConte
}
// M/S detection - 5.5.2
- if(apc->avctx->channels > 1 && cpe->common_window){
+ if(chans > 1 && cpe->common_window){
start = 0;
for(w = 0; w < cpe->ch[0].ics.num_windows; w++){
for(g = 0; g < cpe->ch[0].ics.num_swb; g++){
@@ -571,7 +578,7 @@ static void psy_3gpp_process(AACPsyConte
}
}
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
pctx->a[ch] = pctx->b[ch] = pctx->pe[ch] = pctx->thr[ch] = 0.0f;
for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){
@@ -594,7 +601,7 @@ static void psy_3gpp_process(AACPsyConte
bits_avail = pctx->avg_bits + pctx->reservoir;
bits_avail = FFMIN(bits_avail, pctx->avg_bits * 1.5);
pe_target = 1.18f * bits_avail / apc->avctx->channels;
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
float t0, pe, r;
if(pctx->b[ch] == 0.0f) continue;
for(i = 0; i < 2; i++){
@@ -622,7 +629,7 @@ static void psy_3gpp_process(AACPsyConte
}
//determine scalefactors - 5.6.2
- for(ch = 0; ch < apc->avctx->channels; ch++){
+ for(ch = 0; ch < chans; ch++){
int min_scale = 256;
prev_scale = -1;
cpe->ch[ch].gain = 0;
@@ -655,7 +662,7 @@ static void psy_3gpp_process(AACPsyConte
}
memcpy(pctx->prev_band, pctx->band, sizeof(pctx->band));
- psy_create_output(apc, cpe, 0);
+ psy_create_output(apc, cpe, channel, 0);
}
static av_cold void psy_3gpp_end(AACPsyContext *apc)
Modified: aacenc/aacpsy.h
==============================================================================
--- aacenc/aacpsy.h (original)
+++ aacenc/aacpsy.h Mon Jul 28 15:22:53 2008
@@ -68,6 +68,21 @@ typedef struct {
int amp[4];
} Pulse;
+#define MAX_TAGID 16
+
+/**
+ * Program configuration - describes how channels are arranged. Either read from
+ * stream (ID_PCE) or created based on a default fixed channel arrangement.
+ */
+typedef struct {
+ int che_type[4][MAX_TAGID]; ///< channel element type with the first index as the first 4 raw_data_block IDs
+ int mono_mixdown; ///< The SCE tag to use if user requests mono output, -1 if not available.
+ int stereo_mixdown; ///< The CPE tag to use if user requests stereo output, -1 if not available.
+ int matrix_mixdown; ///< The CPE tag to use if user requests matrixed stereo output, -1 if not available.
+ int mixdown_coeff_index; ///< 0-3
+ int pseudo_surround; ///< Mix surround channels out of phase.
+} ProgramConfig;
+
/**
* Individual Channel Stream
*/
More information about the FFmpeg-soc
mailing list