[FFmpeg-devel] [RFC] AAC Encoder
Kostya
kostya.shishkov
Mon Aug 11 11:18:03 CEST 2008
Here's my implementation of AAC encoder.
Since AAC decoder is not fully in SVN yet, I'm omitting data structures
and declarations used from it, I'll sort it after decoder commit.
The main difference is that psy model fills data structures and encoder
writes bitstream with them, so several structures have additional members.
I've also stripped out model based on 3GPP 26.403 to make review easier.
-------------- next part --------------
Index: libavcodec/Makefile
===================================================================
--- libavcodec/Makefile (revision 14690)
+++ libavcodec/Makefile (working copy)
@@ -25,6 +25,7 @@
OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o
+OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aacpsy.o aactab.o mpeg4audio.o mdct.o fft.o
OBJS-$(CONFIG_AASC_DECODER) += aasc.o
OBJS-$(CONFIG_AC3_DECODER) += ac3dec.o ac3tab.o ac3dec_data.o ac3.o mdct.o fft.o
OBJS-$(CONFIG_AC3_ENCODER) += ac3enc.o ac3tab.o ac3.o
Index: libavcodec/aacenc.c
===================================================================
--- libavcodec/aacenc.c (revision 0)
+++ libavcodec/aacenc.c (revision 0)
@@ -0,0 +1,729 @@
+/*
+ * AAC encoder
+ * Copyright (C) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file aacenc.c
+ * AAC encoder
+ */
+
+#include "avcodec.h"
+#include "bitstream.h"
+#include "dsputil.h"
+#include "mpeg4audio.h"
+
+#include "aacpsy.h"
+#include "aac.h"
+#include "aactab.h"
+
+static const uint8_t swb_size_1024_96[] = {
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
+ 12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+};
+
+static const uint8_t swb_size_1024_64[] = {
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
+ 12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
+};
+
+static const uint8_t swb_size_1024_48[] = {
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
+ 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 96
+};
+
+static const uint8_t swb_size_1024_32[] = {
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
+ 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
+};
+
+static const uint8_t swb_size_1024_24[] = {
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
+ 32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
+};
+
+static const uint8_t swb_size_1024_16[] = {
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
+ 32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
+};
+
+static const uint8_t swb_size_1024_8[] = {
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
+ 32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
+};
+
+static const uint8_t *swb_size_1024[] = {
+ swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
+ swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
+ swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
+ swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
+};
+
+static const uint8_t swb_size_128_96[] = {
+ 4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
+};
+
+static const uint8_t swb_size_128_48[] = {
+ 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
+};
+
+static const uint8_t swb_size_128_24[] = {
+ 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
+};
+
+static const uint8_t swb_size_128_16[] = {
+ 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
+};
+
+static const uint8_t swb_size_128_8[] = {
+ 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
+};
+
+static const uint8_t *swb_size_128[] = {
+ /* the last entry on the following row is swb_size_128_64 but is a
+ duplicate of swb_size_128_96 */
+ swb_size_128_96, swb_size_128_96, swb_size_128_96,
+ swb_size_128_48, swb_size_128_48, swb_size_128_48,
+ swb_size_128_24, swb_size_128_24, swb_size_128_16,
+ swb_size_128_16, swb_size_128_16, swb_size_128_8
+};
+
+#define CB_UNSIGNED 0x01 ///< coefficients are coded as absolute values
+#define CB_PAIRS 0x02 ///< coefficients are grouped into pairs before coding (quads by default)
+#define CB_ESCAPE 0x04 ///< codebook allows escapes
+
+/** spectral coefficients codebook information */
+static const struct {
+ int16_t maxval; ///< maximum possible value
+ int8_t cb_num; ///< codebook number
+ uint8_t flags; ///< codebook features
+} aac_cb_info[] = {
+ { 0, -1, CB_UNSIGNED }, // zero codebook
+ { 1, 0, 0 },
+ { 1, 1, 0 },
+ { 2, 2, CB_UNSIGNED },
+ { 2, 3, CB_UNSIGNED },
+ { 4, 4, CB_PAIRS },
+ { 4, 5, CB_PAIRS },
+ { 7, 6, CB_PAIRS | CB_UNSIGNED },
+ { 7, 7, CB_PAIRS | CB_UNSIGNED },
+ { 12, 8, CB_PAIRS | CB_UNSIGNED },
+ { 12, 9, CB_PAIRS | CB_UNSIGNED },
+ { 8191, 10, CB_PAIRS | CB_UNSIGNED | CB_ESCAPE },
+ { -1, -1, 0 }, // reserved
+ { -1, -1, 0 }, // perceptual noise substitution
+ { -1, -1, 0 }, // intensity out-of-phase
+ { -1, -1, 0 }, // intensity in-phase
+};
+
+/** default channel configurations */
+static const uint8_t aac_chan_configs[6][5] = {
+ {1, ID_SCE}, // 1 channel - single channel element
+ {1, ID_CPE}, // 2 channels - channel pair
+ {2, ID_SCE, ID_CPE}, // 3 channels - center + stereo
+ {3, ID_SCE, ID_CPE, ID_SCE}, // 4 channels - front center + stereo + back center
+ {3, ID_SCE, ID_CPE, ID_CPE}, // 5 channels - front center + stereo + back stereo
+ {4, ID_SCE, ID_CPE, ID_CPE, ID_LFE}, // 6 channels - front center + stereo + back stereo + LFE
+};
+
+typedef struct {
+ PutBitContext pb;
+ MDCTContext mdct1024;
+ MDCTContext mdct128;
+ DSPContext dsp;
+ DECLARE_ALIGNED_16(FFTSample, output[2048]);
+ DECLARE_ALIGNED_16(FFTSample, tmp[1024]);
+ int16_t* samples;
+
+ int samplerate_index;
+ const uint8_t *swb_sizes1024;
+ int swb_num1024;
+ const uint8_t *swb_sizes128;
+ int swb_num128;
+
+ ProgramConfig pc;
+ ChannelElement *cpe;
+ AACPsyContext psy;
+} AACEncContext;
+
+/**
+ * Make AAC audio config object.
+ * @see 1.6.2.1
+ */
+static void put_audio_specific_config(AVCodecContext *avctx)
+{
+ PutBitContext pb;
+ AACEncContext *s = avctx->priv_data;
+
+ init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
+ put_bits(&pb, 5, 2); //object type - AAC-LC
+ put_bits(&pb, 4, s->samplerate_index); //sample rate index
+ put_bits(&pb, 4, avctx->channels);
+ //GASpecificConfig
+ put_bits(&pb, 1, 0); //frame length - 1024 samples
+ put_bits(&pb, 1, 0); //does not depend on core coder
+ put_bits(&pb, 1, 0); //is not extension
+ flush_put_bits(&pb);
+}
+
+static av_cold int aac_encode_init(AVCodecContext *avctx)
+{
+ AACEncContext *s = avctx->priv_data;
+ int i;
+
+ avctx->frame_size = 1024;
+
+ for(i = 0; i < 16; i++)
+ if(avctx->sample_rate == ff_mpeg4audio_sample_rates[i])
+ break;
+ if(i == 16){
+ av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate);
+ return -1;
+ }
+ if(avctx->channels > 6){
+ av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels);
+ return -1;
+ }
+ s->samplerate_index = i;
+ s->swb_sizes1024 = swb_size_1024[i];
+ s->swb_num1024 = ff_aac_num_swb_1024[i];
+ s->swb_sizes128 = swb_size_128[i];
+ s->swb_num128 = ff_aac_num_swb_128[i];
+
+ dsputil_init(&s->dsp, avctx);
+ ff_mdct_init(&s->mdct1024, 11, 0);
+ ff_mdct_init(&s->mdct128, 8, 0);
+ // window init
+ ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
+ ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
+ ff_sine_window_init(ff_aac_sine_long_1024, 1024);
+ ff_sine_window_init(ff_aac_sine_short_128, 128);
+
+ s->samples = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0]));
+ s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]);
+ //TODO: psy model selection with some option
+ if(ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP, aac_chan_configs[avctx->channels-1][0], 0, s->swb_sizes1024, s->swb_num1024, s->swb_sizes128, s->swb_num128) < 0){
+ av_log(avctx, AV_LOG_ERROR, "Cannot initialize selected model.\n");
+ return -1;
+ }
+ avctx->extradata = av_malloc(2);
+ avctx->extradata_size = 2;
+ put_audio_specific_config(avctx);
+ return 0;
+}
+
+/**
+ * Perform windowing and MDCT.
+ */
+static void analyze(AVCodecContext *avctx, AACEncContext *s, ChannelElement *cpe, short *audio, int channel)
+{
+ int i, j, k;
+ const float * lwindow = cpe->ch[channel].ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_aac_sine_long_1024;
+ const float * swindow = cpe->ch[channel].ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_aac_sine_short_128;
+ const float * pwindow = cpe->ch[channel].ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_aac_sine_short_128;
+
+ if (cpe->ch[channel].ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
+ memcpy(s->output, cpe->ch[channel].saved, sizeof(float)*1024);
+ if(cpe->ch[channel].ics.window_sequence[0] == LONG_STOP_SEQUENCE){
+ memset(s->output, 0, sizeof(s->output[0]) * 448);
+ for(i = 448; i < 576; i++)
+ s->output[i] = cpe->ch[channel].saved[i] * pwindow[i - 448];
+ for(i = 576; i < 704; i++)
+ s->output[i] = cpe->ch[channel].saved[i];
+ }
+ if(cpe->ch[channel].ics.window_sequence[0] != LONG_START_SEQUENCE){
+ j = channel;
+ for (i = 0; i < 1024; i++, j += avctx->channels){
+ s->output[i+1024] = audio[j] / 512.0 * lwindow[1024 - i - 1];
+ cpe->ch[channel].saved[i] = audio[j] / 512.0 * lwindow[i];
+ }
+ }else{
+ j = channel;
+ for(i = 0; i < 448; i++, j += avctx->channels)
+ s->output[i+1024] = audio[j] / 512.0;
+ for(i = 448; i < 576; i++, j += avctx->channels)
+ s->output[i+1024] = audio[j] / 512.0 * swindow[576 - i - 1];
+ memset(s->output+1024+576, 0, sizeof(s->output[0]) * 448);
+ j = channel;
+ for(i = 0; i < 1024; i++, j += avctx->channels)
+ cpe->ch[channel].saved[i] = audio[j] / 512.0;
+ }
+ ff_mdct_calc(&s->mdct1024, cpe->ch[channel].coeffs, s->output, s->tmp);
+ }else{
+ j = channel;
+ for (k = 0; k < 1024; k += 128) {
+ for(i = 448 + k; i < 448 + k + 256; i++)
+ s->output[i - 448 - k] = (i < 1024) ? cpe->ch[channel].saved[i] : audio[channel + (i-1024)*avctx->channels] / 512.0;
+ s->dsp.vector_fmul (s->output, k ? swindow : pwindow, 128);
+ s->dsp.vector_fmul_reverse(s->output+128, s->output+128, swindow, 128);
+ ff_mdct_calc(&s->mdct128, cpe->ch[channel].coeffs + k, s->output, s->tmp);
+ }
+ j = channel;
+ for(i = 0; i < 1024; i++, j += avctx->channels)
+ cpe->ch[channel].saved[i] = audio[j] / 512.0;
+ }
+}
+
+/**
+ * Encode ics_info element.
+ * @see Table 4.6
+ */
+static void put_ics_info(AVCodecContext *avctx, IndividualChannelStream *info)
+{
+ AACEncContext *s = avctx->priv_data;
+ int i;
+
+ put_bits(&s->pb, 1, 0); // ics_reserved bit
+ put_bits(&s->pb, 2, info->window_sequence[0]);
+ put_bits(&s->pb, 1, info->use_kb_window[0]);
+ if(info->window_sequence[0] != EIGHT_SHORT_SEQUENCE){
+ put_bits(&s->pb, 6, info->max_sfb);
+ put_bits(&s->pb, 1, 0); // no prediction
+ }else{
+ put_bits(&s->pb, 4, info->max_sfb);
+ for(i = 1; i < info->num_windows; i++)
+ put_bits(&s->pb, 1, info->group_len[i]);
+ }
+}
+
+/**
+ * Encode MS data.
+ * @see 4.6.8.1
+ */
+static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
+{
+ int i, w;
+
+ put_bits(pb, 2, cpe->ms.present);
+ if(cpe->ms.present == 1)
+ for(w = 0; w < cpe->ch[0].ics.num_windows; w++){
+ if(cpe->ch[0].ics.group_len[w]) continue;
+ for(i = 0; i < cpe->ch[0].ics.max_sfb; i++)
+ put_bits(pb, 1, cpe->ms.mask[w][i]);
+ }
+}
+
+/**
+ * Scan spectral band and determine optimal codebook for it.
+ */
+static int determine_section_info(AACEncContext *s, ChannelElement *cpe, int channel, int win, int band, int start, int size)
+{
+ int i, j, w;
+ int maxval, sign;
+ int score, best, cb, bestcb, dim, idx;
+
+ maxval = 0;
+ sign = 0;
+ w = win;
+ do{
+ for(i = start + (w-win)*128; i < start + (w-win)*128 + size; i++){
+ maxval = FFMAX(maxval, FFABS(cpe->ch[channel].icoefs[i]));
+ if(cpe->ch[channel].icoefs[i] < 0) sign = 1;
+ }
+ w++;
+ }while(w < cpe->ch[channel].ics.num_windows && cpe->ch[channel].ics.group_len[w]);
+
+ if(maxval > 12) return 11;
+ if(!maxval) return 0;
+
+ for(cb = 0; cb < 12; cb++)
+ if(aac_cb_info[cb].maxval >= maxval)
+ break;
+ best = 9999;
+ bestcb = 11;
+ for(; cb < 12; cb++){
+ score = 0;
+ dim = (aac_cb_info[cb].flags & CB_PAIRS) ? 2 : 4;
+ if(!band || cpe->ch[channel].band_type[win][band - 1] != cb)
+ score += 9; //that's for new codebook entry
+ w = win;
+ if(aac_cb_info[cb].flags & CB_UNSIGNED){
+ do{
+ for(i = start + (w-win)*128; i < start + (w-win)*128 + size; i += dim){
+ idx = 0;
+ for(j = 0; j < dim; j++)
+ idx = idx * aac_cb_info[cb].maxval + FFABS(cpe->ch[channel].icoefs[i+j]);
+ score += ff_aac_spectral_bits[aac_cb_info[cb].cb_num][idx];
+ for(j = 0; j < dim; j++)
+ if(cpe->ch[channel].icoefs[i+j])
+ score++;
+ }
+ w++;
+ }while(w < cpe->ch[channel].ics.num_windows && cpe->ch[channel].ics.group_len[w]);
+ }else{
+ do{
+ for(i = start + (w-win)*128; i < start + (w-win)*128 + size; i += dim){
+ idx = 0;
+ for(j = 0; j < dim; j++)
+ idx = idx * (aac_cb_info[cb].maxval*2 + 1) + cpe->ch[channel].icoefs[i+j] + aac_cb_info[cb].maxval;
+ score += ff_aac_spectral_bits[aac_cb_info[cb].cb_num][idx];
+ }
+ w++;
+ }while(w < cpe->ch[channel].ics.num_windows && cpe->ch[channel].ics.group_len[w]);
+ }
+ if(score < best){
+ best = score;
+ bestcb = cb;
+ }
+ }
+ return bestcb;
+}
+
+/**
+ * Encode one scalefactor band with selected codebook.
+ */
+static void encode_codebook(AACEncContext *s, ChannelElement *cpe, int channel, int start, int size, int cb)
+{
+ const uint8_t *bits = ff_aac_spectral_bits[aac_cb_info[cb].cb_num];
+ const uint16_t *codes = ff_aac_spectral_codes[aac_cb_info[cb].cb_num];
+ const int dim = (aac_cb_info[cb].flags & CB_PAIRS) ? 2 : 4;
+ int i, j, idx;
+
+ if(!bits || !codes) return;
+
+ //TODO: factorize?
+ if(aac_cb_info[cb].flags & CB_ESCAPE){
+ for(i = start; i < start + size; i += dim){
+ idx = 0;
+ for(j = 0; j < dim; j++)
+ idx = idx*17 + FFMIN(FFABS(cpe->ch[channel].icoefs[i+j]), 16);
+ put_bits(&s->pb, bits[idx], codes[idx]);
+ //output signs
+ for(j = 0; j < dim; j++)
+ if(cpe->ch[channel].icoefs[i+j])
+ put_bits(&s->pb, 1, cpe->ch[channel].icoefs[i+j] < 0);
+ //output escape values
+ for(j = 0; j < dim; j++)
+ if(FFABS(cpe->ch[channel].icoefs[i+j]) > 15){
+ int l = av_log2(FFABS(cpe->ch[channel].icoefs[i+j]));
+
+ put_bits(&s->pb, l - 4 + 1, (1 << (l - 4 + 1)) - 2);
+ put_bits(&s->pb, l, FFABS(cpe->ch[channel].icoefs[i+j]) & ((1 << l) - 1));
+ }
+ }
+ }else if(aac_cb_info[cb].flags & CB_UNSIGNED){
+ for(i = start; i < start + size; i += dim){
+ idx = 0;
+ for(j = 0; j < dim; j++)
+ idx = idx * (aac_cb_info[cb].maxval + 1) + FFABS(cpe->ch[channel].icoefs[i+j]);
+ put_bits(&s->pb, bits[idx], codes[idx]);
+ //output signs
+ for(j = 0; j < dim; j++)
+ if(cpe->ch[channel].icoefs[i+j])
+ put_bits(&s->pb, 1, cpe->ch[channel].icoefs[i+j] < 0);
+ }
+ }else{
+ for(i = start; i < start + size; i += dim){
+ idx = 0;
+ for(j = 0; j < dim; j++)
+ idx = idx * (aac_cb_info[cb].maxval*2 + 1) + cpe->ch[channel].icoefs[i+j] + aac_cb_info[cb].maxval;
+ put_bits(&s->pb, bits[idx], codes[idx]);
+ }
+ }
+}
+
+/**
+ * Encode information about codebooks used for scalefactor bands coding.
+ */
+static void encode_section_data(AVCodecContext *avctx, AACEncContext *s, ChannelElement *cpe, int channel)
+{
+ int i, w;
+ int bits = cpe->ch[channel].ics.num_windows == 1 ? 5 : 3;
+ int esc = (1 << bits) - 1;
+ int count;
+
+ for(w = 0; w < cpe->ch[channel].ics.num_windows; w++){
+ if(cpe->ch[channel].ics.group_len[w]) continue;
+ count = 0;
+ for(i = 0; i < cpe->ch[channel].ics.max_sfb; i++){
+ if(!i || cpe->ch[channel].band_type[w][i] != cpe->ch[channel].band_type[w][i-1]){
+ if(count){
+ while(count >= esc){
+ put_bits(&s->pb, bits, esc);
+ count -= esc;
+ }
+ put_bits(&s->pb, bits, count);
+ }
+ put_bits(&s->pb, 4, cpe->ch[channel].band_type[w][i]);
+ count = 1;
+ }else
+ count++;
+ }
+ if(count){
+ while(count >= esc){
+ put_bits(&s->pb, bits, esc);
+ count -= esc;
+ }
+ put_bits(&s->pb, bits, count);
+ }
+ }
+}
+
+/**
+ * Encode scalefactors.
+ */
+static void encode_scale_factor_data(AVCodecContext *avctx, AACEncContext *s, ChannelElement *cpe, int channel)
+{
+ int off = cpe->ch[channel].mixing_gain, diff;
+ int i, w;
+
+ for(w = 0; w < cpe->ch[channel].ics.num_windows; w++){
+ if(cpe->ch[channel].ics.group_len[w]) continue;
+ for(i = 0; i < cpe->ch[channel].ics.max_sfb; i++){
+ if(!cpe->ch[channel].zeroes[w][i]){
+ diff = cpe->ch[channel].sf_idx[w][i] - off + SCALE_DIFF_ZERO;
+ if(diff < 0 || diff > 120) av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
+ off = cpe->ch[channel].sf_idx[w][i];
+ put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
+ }
+ }
+ }
+}
+
+/**
+ * Encode pulse data.
+ */
+static void encode_pulse_data(AVCodecContext *avctx, AACEncContext *s, ChannelElement *cpe, int channel)
+{
+ int i;
+
+ put_bits(&s->pb, 1, cpe->ch[channel].pulse.present);
+ if(!cpe->ch[channel].pulse.present) return;
+
+ put_bits(&s->pb, 2, cpe->ch[channel].pulse.num_pulse - 1);
+ put_bits(&s->pb, 6, cpe->ch[channel].pulse.start);
+ for(i = 0; i < cpe->ch[channel].pulse.num_pulse; i++){
+ put_bits(&s->pb, 5, cpe->ch[channel].pulse.offset[i]);
+ put_bits(&s->pb, 4, cpe->ch[channel].pulse.amp[i]);
+ }
+}
+
+/**
+ * Encode temporal noise shaping data.
+ */
+static void encode_tns_data(AVCodecContext *avctx, AACEncContext *s, ChannelElement *cpe, int channel)
+{
+ int i, w;
+
+ put_bits(&s->pb, 1, cpe->ch[channel].tns.present);
+ if(!cpe->ch[channel].tns.present) return;
+ if(cpe->ch[channel].ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE){
+ for(w = 0; w < cpe->ch[channel].ics.num_windows; w++){
+ put_bits(&s->pb, 1, cpe->ch[channel].tns.n_filt[w]);
+ if(!cpe->ch[channel].tns.n_filt[w]) continue;
+ put_bits(&s->pb, 1, cpe->ch[channel].tns.coef_res[w] - 3);
+ put_bits(&s->pb, 4, cpe->ch[channel].tns.length[w][0]);
+ put_bits(&s->pb, 3, cpe->ch[channel].tns.order[w][0]);
+ if(cpe->ch[channel].tns.order[w][0]){
+ put_bits(&s->pb, 1, cpe->ch[channel].tns.direction[w][0]);
+ put_bits(&s->pb, 1, cpe->ch[channel].tns.coef_compress[w][0]);
+ for(i = 0; i < cpe->ch[channel].tns.order[w][0]; i++)
+ put_bits(&s->pb, cpe->ch[channel].tns.coef_len[w][0], cpe->ch[channel].tns.coef[w][0][i]);
+ }
+ }
+ }else{
+ put_bits(&s->pb, 1, cpe->ch[channel].tns.n_filt[0]);
+ if(!cpe->ch[channel].tns.n_filt[0]) return;
+ put_bits(&s->pb, 1, cpe->ch[channel].tns.coef_res[0] - 3);
+ for(w = 0; w < cpe->ch[channel].tns.n_filt[0]; w++){
+ put_bits(&s->pb, 6, cpe->ch[channel].tns.length[0][w]);
+ put_bits(&s->pb, 5, cpe->ch[channel].tns.order[0][w]);
+ if(cpe->ch[channel].tns.order[0][w]){
+ put_bits(&s->pb, 1, cpe->ch[channel].tns.direction[0][w]);
+ put_bits(&s->pb, 1, cpe->ch[channel].tns.coef_compress[0][w]);
+ for(i = 0; i < cpe->ch[channel].tns.order[0][w]; i++)
+ put_bits(&s->pb, cpe->ch[channel].tns.coef_len[0][w], cpe->ch[channel].tns.coef[0][w][i]);
+ }
+ }
+ }
+}
+
+/**
+ * Encode spectral coefficients processed by psychoacoustic model.
+ */
+static void encode_spectral_data(AVCodecContext *avctx, AACEncContext *s, ChannelElement *cpe, int channel)
+{
+ int start, i, w, w2;
+
+ for(w = 0; w < cpe->ch[channel].ics.num_windows; w++){
+ if(cpe->ch[channel].ics.group_len[w]) continue;
+ start = 0;
+ for(i = 0; i < cpe->ch[channel].ics.max_sfb; i++){
+ if(cpe->ch[channel].zeroes[w][i]){
+ start += cpe->ch[channel].ics.swb_sizes[i];
+ continue;
+ }
+ w2 = w;
+ do{
+ encode_codebook(s, cpe, channel, start + w2*128, cpe->ch[channel].ics.swb_sizes[i], cpe->ch[channel].band_type[w][i]);
+ w2++;
+ }while(w2 < cpe->ch[channel].ics.num_windows && cpe->ch[channel].ics.group_len[w2]);
+ start += cpe->ch[channel].ics.swb_sizes[i];
+ }
+ }
+}
+
+/**
+ * Encode one channel of audio data.
+ */
+static int encode_individual_channel(AVCodecContext *avctx, ChannelElement *cpe, int channel)
+{
+ AACEncContext *s = avctx->priv_data;
+ int i, g, w;
+
+ for(w = 0; w < cpe->ch[channel].ics.num_windows; w++){
+ i = w << 7;
+ if(cpe->ch[channel].ics.group_len[w]) continue;
+ for(g = 0; g < cpe->ch[channel].ics.max_sfb; g++){
+ if(!cpe->ch[channel].zeroes[w][g]){
+ cpe->ch[channel].band_type[w][g] = determine_section_info(s, cpe, channel, w, g, i, cpe->ch[channel].ics.swb_sizes[g]);
+ cpe->ch[channel].zeroes[w][g] = !cpe->ch[channel].band_type[w][g];
+ }else
+ cpe->ch[channel].band_type[w][g] = 0;
+ i += cpe->ch[channel].ics.swb_sizes[g];
+ }
+ }
+
+ put_bits(&s->pb, 8, cpe->ch[channel].mixing_gain); //global gain
+ if(!cpe->common_window) put_ics_info(avctx, &cpe->ch[channel].ics);
+ encode_section_data(avctx, s, cpe, channel);
+ encode_scale_factor_data(avctx, s, cpe, channel);
+ encode_pulse_data(avctx, s, cpe, channel);
+ encode_tns_data(avctx, s, cpe, channel);
+ put_bits(&s->pb, 1, 0); //ssr
+ encode_spectral_data(avctx, s, cpe, channel);
+ return 0;
+}
+
+/**
+ * Write some auxiliary information about created AAC file.
+ */
+static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s, const char *name)
+{
+ int i, namelen, padbits;
+
+ namelen = strlen(name) + 2;
+ put_bits(&s->pb, 3, ID_FIL);
+ put_bits(&s->pb, 4, FFMIN(namelen, 15));
+ if(namelen >= 15)
+ put_bits(&s->pb, 8, namelen - 16);
+ put_bits(&s->pb, 4, 0); //extension type - filler
+ padbits = 8 - (put_bits_count(&s->pb) & 7);
+ align_put_bits(&s->pb);
+ for(i = 0; i < namelen - 2; i++)
+ put_bits(&s->pb, 8, name[i]);
+ put_bits(&s->pb, 12 - padbits, 0);
+}
+
+static int aac_encode_frame(AVCodecContext *avctx,
+ uint8_t *frame, int buf_size, void *data)
+{
+ AACEncContext *s = avctx->priv_data;
+ int16_t *samples = s->samples, *samples2, *la;
+ ChannelElement *cpe;
+ int i, j, chans, tag, start_ch;
+ const uint8_t *chan_map = aac_chan_configs[avctx->channels-1];
+ int chan_el_counter[4];
+
+ if(data){
+ start_ch = 0;
+ samples2 = s->samples + 1024 * avctx->channels;
+ for(i = 0; i < chan_map[0]; i++){
+ tag = chan_map[i+1];
+ chans = tag == ID_CPE ? 2 : 1;
+ ff_aac_psy_preprocess(&s->psy, (uint16_t*)data + start_ch, samples2 + start_ch, i, tag);
+ start_ch += chans;
+ }
+ }
+ if(!avctx->frame_number){
+ memmove(s->samples, s->samples + 1024 * avctx->channels, 1024 * avctx->channels * sizeof(s->samples[0]));
+ return 0;
+ }
+
+ init_put_bits(&s->pb, frame, buf_size*8);
+ if(avctx->frame_number==1 && !(avctx->flags & CODEC_FLAG_BITEXACT)){
+ put_bitstream_info(avctx, s, LIBAVCODEC_IDENT);
+ }
+ start_ch = 0;
+ memset(chan_el_counter, 0, sizeof(chan_el_counter));
+ for(i = 0; i < chan_map[0]; i++){
+ tag = chan_map[i+1];
+ chans = tag == ID_CPE ? 2 : 1;
+ cpe = &s->cpe[i];
+ samples2 = samples + start_ch;
+ la = samples2 + 1024 * avctx->channels + start_ch;
+ if(!data) la = NULL;
+ ff_aac_psy_suggest_window(&s->psy, samples2, la, i, tag, cpe);
+ for(j = 0; j < chans; j++){
+ analyze(avctx, s, cpe, samples2, j);
+ }
+ ff_aac_psy_analyze(&s->psy, i, tag, cpe);
+ put_bits(&s->pb, 3, tag);
+ put_bits(&s->pb, 4, chan_el_counter[tag]++);
+ if(chans == 2){
+ put_bits(&s->pb, 1, cpe->common_window);
+ if(cpe->common_window){
+ put_ics_info(avctx, &cpe->ch[0].ics);
+ encode_ms_info(&s->pb, cpe);
+ }
+ }
+ for(j = 0; j < chans; j++){
+ encode_individual_channel(avctx, cpe, j);
+ }
+ start_ch += chans;
+ }
+
+ put_bits(&s->pb, 3, ID_END);
+ flush_put_bits(&s->pb);
+ avctx->frame_bits = put_bits_count(&s->pb);
+
+ memmove(s->samples, s->samples + 1024 * avctx->channels, 1024 * avctx->channels * sizeof(s->samples[0]));
+ return put_bits_count(&s->pb)>>3;
+}
+
+static av_cold int aac_encode_end(AVCodecContext *avctx)
+{
+ AACEncContext *s = avctx->priv_data;
+
+ ff_mdct_end(&s->mdct1024);
+ ff_mdct_end(&s->mdct128);
+ ff_aac_psy_end(&s->psy);
+ av_freep(&s->samples);
+ av_freep(&s->cpe);
+ return 0;
+}
+
+AVCodec aac_encoder = {
+ "aac",
+ CODEC_TYPE_AUDIO,
+ CODEC_ID_AAC,
+ sizeof(AACEncContext),
+ aac_encode_init,
+ aac_encode_frame,
+ aac_encode_end,
+ .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
+};
Index: libavcodec/allcodecs.c
===================================================================
--- libavcodec/allcodecs.c (revision 14690)
+++ libavcodec/allcodecs.c (working copy)
@@ -60,6 +60,7 @@
initialized = 1;
/* video codecs */
+ REGISTER_ENCODER (AAC, aac);
REGISTER_DECODER (AASC, aasc);
REGISTER_DECODER (AMV, amv);
REGISTER_ENCDEC (ASV1, asv1);
Index: libavcodec/aacpsy.c
===================================================================
--- libavcodec/aacpsy.c (revision 0)
+++ libavcodec/aacpsy.c (revision 0)
@@ -0,0 +1,455 @@
+/*
+ * AAC encoder psychoacoustic model
+ * Copyright (C) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file aacpsy.c
+ * AAC encoder psychoacoustic model
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "aacpsy.h"
+
+//borrowed from aac.c
+static float pow2sf_tab[340];
+
+
+/**
+ * Convert coefficients to integers.
+ * @return sum of coefficients
+ * @see 3GPP TS26.403 5.6.2
+ */
+static inline int convert_coeffs(float *in, int *out, int size, int scale_idx)
+{
+ int i, sign, sum = 0;
+ for(i = 0; i < size; i++){
+ sign = in[i] > 0.0;
+ out[i] = (int)(pow(FFABS(in[i]) * pow2sf_tab[200 - scale_idx + SCALE_ONE_POS], 0.75) + 0.4054);
+ if(out[i] > 8191) out[i] = 8191;
+ sum += out[i];
+ if(sign) out[i] = -out[i];
+ }
+ return sum;
+}
+
+static inline float unquant(int q, int scale_idx){
+ return (FFABS(q) * cbrt(q*1.0)) * pow2sf_tab[200 + scale_idx - SCALE_ONE_POS];
+}
+static inline float calc_distortion(float *c, int size, int scale_idx)
+{
+ int i;
+ int q;
+ float coef, unquant, sum = 0.0f;
+ for(i = 0; i < size; i++){
+ coef = FFABS(c[i]);
+ q = (int)(pow(FFABS(coef) * pow2sf_tab[200 - scale_idx + SCALE_ONE_POS], 0.75) + 0.4054);
+ q = av_clip(q, 0, 8191);
+ unquant = (q * cbrt(q)) * pow2sf_tab[200 + scale_idx - SCALE_ONE_POS];
+ sum += (coef - unquant) * (coef - unquant);
+ }
+ return sum;
+}
+
+/**
+ * Produce integer coefficients from scalefactors provided by model.
+ */
+static void psy_create_output(AACPsyContext *apc, ChannelElement *cpe, int chans, int search_pulses)
+{
+ int i, w, w2, g, ch;
+ int start, sum, maxsfb, cmaxsfb;
+ int pulses, poff[4], pamp[4];
+
+ for(ch = 0; ch < chans; ch++){
+ start = 0;
+ maxsfb = 0;
+ cpe->ch[ch].pulse.present = 0;
+ for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
+ for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){
+ sum = 0;
+ //apply M/S
+ if(!ch && cpe->ms.mask[w][g]){
+ for(i = 0; i < cpe->ch[ch].ics.swb_sizes[g]; i++){
+ cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
+ cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
+ }
+ }
+ if(!cpe->ch[ch].zeroes[w][g])
+ sum = convert_coeffs(cpe->ch[ch].coeffs + start, cpe->ch[ch].icoefs + start, cpe->ch[ch].ics.swb_sizes[g], cpe->ch[ch].sf_idx[w][g]);
+ else
+ memset(cpe->ch[ch].icoefs + start, 0, cpe->ch[ch].ics.swb_sizes[g] * sizeof(cpe->ch[0].icoefs[0]));
+ cpe->ch[ch].zeroes[w][g] = !sum;
+ //try finding pulses
+ if(search_pulses && cpe->ch[ch].ics.num_windows == 1 && !cpe->ch[ch].pulse.present){
+ pulses = 0;
+ memset(poff,0,sizeof(poff));
+ memset(pamp,0,sizeof(pamp));
+ for(i = 0; i < cpe->ch[ch].ics.swb_sizes[g]; i++){
+ if(pulses > 4 || (pulses && i > cpe->ch[ch].pulse.offset[pulses-1] - 31)) break;
+ if(FFABS(cpe->ch[ch].icoefs[start+i]) > 4 && pulses < 4){
+ poff[pulses] = i;
+ pamp[pulses] = FFMIN(FFABS(cpe->ch[ch].icoefs[start+i]) - 1, 15);
+ pulses++;
+ }
+ }
+ if(pulses){
+ cpe->ch[ch].pulse.present = 1;
+ cpe->ch[ch].pulse.start = g;
+ cpe->ch[ch].pulse.num_pulse = pulses;
+ for(i = 0; i < pulses; i++){
+ cpe->ch[ch].pulse.amp[i] = pamp[i];
+ cpe->ch[ch].pulse.offset[i] = i ? poff[i] - poff[i-1] : poff[0];
+
+ if(cpe->ch[ch].icoefs[start+poff[i]] > 0)
+ cpe->ch[ch].icoefs[start+poff[i]] -= pamp[i];
+ else
+ cpe->ch[ch].icoefs[start+poff[i]] += pamp[i];
+ }
+ }
+ }
+ start += cpe->ch[ch].ics.swb_sizes[g];
+ }
+ for(cmaxsfb = cpe->ch[ch].ics.num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w][cmaxsfb-1]; cmaxsfb--);
+ maxsfb = FFMAX(maxsfb, cmaxsfb);
+ }
+ cpe->ch[ch].ics.max_sfb = maxsfb;
+
+ //adjust zero bands for window groups
+ for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
+ if(cpe->ch[ch].ics.group_len[w]) continue;
+ for(g = 0; g < cpe->ch[ch].ics.max_sfb; g++){
+ i = 1;
+ w2 = w;
+ do{
+ if(!cpe->ch[ch].zeroes[w2][g]){
+ i = 0;
+ break;
+ }
+ w2++;
+ }while(w2 < cpe->ch[ch].ics.num_windows && cpe->ch[ch].ics.group_len[w2]);
+ cpe->ch[ch].zeroes[w][g] = i;
+ }
+ }
+ }
+
+ if(chans > 1 && cpe->common_window){
+ int msc = 0;
+ cpe->ch[0].ics.max_sfb = FFMAX(cpe->ch[0].ics.max_sfb, cpe->ch[1].ics.max_sfb);
+ cpe->ch[1].ics.max_sfb = cpe->ch[0].ics.max_sfb;
+ for(w = 0; w < cpe->ch[0].ics.num_windows; w++)
+ for(i = 0; i < cpe->ch[0].ics.max_sfb; i++)
+ if(cpe->ms.mask[w][i]) msc++;
+ if(msc == 0 || cpe->ch[0].ics.max_sfb == 0) cpe->ms.present = 0;
+ else cpe->ms.present = msc < cpe->ch[0].ics.max_sfb ? 1 : 2;
+ }
+}
+
+static void psy_null_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int tag, int type, ChannelElement *cpe)
+{
+ int ch;
+ int chans = type == ID_CPE ? 2 : 1;
+
+ for(ch = 0; ch < chans; ch++){
+ cpe->ch[ch].ics.window_sequence[0] = ONLY_LONG_SEQUENCE;
+ cpe->ch[ch].ics.use_kb_window[0] = 1;
+ cpe->ch[ch].ics.num_windows = 1;
+ cpe->ch[ch].ics.swb_sizes = apc->bands1024;
+ cpe->ch[ch].ics.num_swb = apc->num_bands1024;
+ cpe->ch[ch].ics.group_len[0] = 0;
+ }
+ cpe->common_window = cpe->ch[0].ics.use_kb_window[0] == cpe->ch[1].ics.use_kb_window[0];
+}
+
+static void psy_null_process(AACPsyContext *apc, int tag, int type, ChannelElement *cpe)
+{
+ int start;
+ int ch, g, i;
+ int minscale;
+ int chans = type == ID_CPE ? 2 : 1;
+
+ for(ch = 0; ch < chans; ch++){
+ start = 0;
+ for(g = 0; g < apc->num_bands1024; g++){
+ float energy = 0.0f, ffac = 0.0f, thr, dist;
+
+ for(i = 0; i < apc->bands1024[g]; i++){
+ energy += cpe->ch[ch].coeffs[start+i]*cpe->ch[ch].coeffs[start+i];
+ ffac += sqrt(FFABS(cpe->ch[ch].coeffs[start+i]));
+ }
+ thr = energy * 0.001258925f;
+ cpe->ch[ch].sf_idx[ch][g] = 136;
+ cpe->ch[ch].zeroes[ch][g] = (energy == 0.0);
+ if(cpe->ch[ch].zeroes[ch][g]) continue;
+ minscale = (int)(2.66667 * (log2(6.75*thr) - log2(ffac)));
+ cpe->ch[ch].sf_idx[ch][g] = SCALE_ONE_POS - minscale;
+ while(cpe->ch[ch].sf_idx[ch][g] > 3){
+ dist = calc_distortion(cpe->ch[ch].coeffs + start, apc->bands1024[g], cpe->ch[ch].sf_idx[ch][g]);
+ if(dist < thr) break;
+ cpe->ch[ch].sf_idx[ch][g] -= 3;
+ }
+ }
+ }
+ for(ch = 0; ch < chans; ch++){
+ minscale = 255;
+ for(g = 0; g < apc->num_bands1024; g++)
+ if(!cpe->ch[ch].zeroes[0][g])
+ minscale = FFMIN(minscale, cpe->ch[ch].sf_idx[0][g]);
+ cpe->ch[ch].mixing_gain = minscale;
+ for(g = 0; g < apc->num_bands1024; g++)
+ if(!cpe->ch[ch].zeroes[0][g])
+ cpe->ch[ch].sf_idx[0][g] = FFMIN(minscale + SCALE_MAX_DIFF, cpe->ch[ch].sf_idx[0][g]);
+ }
+ psy_create_output(apc, cpe, chans, 1);
+}
+
+static void psy_null8_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int tag, int type, ChannelElement *cpe)
+{
+ int ch, i;
+ int chans = type == ID_CPE ? 2 : 1;
+
+ for(ch = 0; ch < chans; ch++){
+ int prev_seq = cpe->ch[ch].ics.window_sequence[1];
+ cpe->ch[ch].ics.use_kb_window[1] = cpe->ch[ch].ics.use_kb_window[0];
+ cpe->ch[ch].ics.window_sequence[1] = cpe->ch[ch].ics.window_sequence[0];
+ switch(cpe->ch[ch].ics.window_sequence[0]){
+ case ONLY_LONG_SEQUENCE: if(prev_seq == ONLY_LONG_SEQUENCE)cpe->ch[ch].ics.window_sequence[0] = LONG_START_SEQUENCE; break;
+ case LONG_START_SEQUENCE: cpe->ch[ch].ics.window_sequence[0] = EIGHT_SHORT_SEQUENCE; break;
+ case EIGHT_SHORT_SEQUENCE: if(prev_seq == EIGHT_SHORT_SEQUENCE)cpe->ch[ch].ics.window_sequence[0] = LONG_STOP_SEQUENCE; break;
+ case LONG_STOP_SEQUENCE: cpe->ch[ch].ics.window_sequence[0] = ONLY_LONG_SEQUENCE; break;
+ }
+
+ if(cpe->ch[ch].ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE){
+ cpe->ch[ch].ics.use_kb_window[0] = 1;
+ cpe->ch[ch].ics.num_windows = 1;
+ cpe->ch[ch].ics.swb_sizes = apc->bands1024;
+ cpe->ch[ch].ics.num_swb = apc->num_bands1024;
+ cpe->ch[ch].ics.group_len[0] = 0;
+ }else{
+ cpe->ch[ch].ics.use_kb_window[0] = 1;
+ cpe->ch[ch].ics.num_windows = 8;
+ cpe->ch[ch].ics.swb_sizes = apc->bands128;
+ cpe->ch[ch].ics.num_swb = apc->num_bands128;
+ for(i = 0; i < cpe->ch[ch].ics.num_windows; i++)
+ cpe->ch[ch].ics.group_len[i] = i & 1;
+ }
+ }
+ cpe->common_window = cpe->ch[0].ics.use_kb_window[0] == cpe->ch[1].ics.use_kb_window[0];
+}
+
+static void psy_null8_process(AACPsyContext *apc, int tag, int type, ChannelElement *cpe)
+{
+ int start;
+ int w, ch, g, i;
+ int chans = type == ID_CPE ? 2 : 1;
+
+ //detect M/S
+ if(chans > 1 && cpe->common_window){
+ start = 0;
+ for(w = 0; w < cpe->ch[0].ics.num_windows; w++){
+ for(g = 0; g < cpe->ch[0].ics.num_swb; g++){
+ float diff = 0.0f;
+
+ for(i = 0; i < cpe->ch[0].ics.swb_sizes[g]; i++)
+ diff += fabs(cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i]);
+ cpe->ms.mask[w][g] = diff == 0.0;
+ }
+ }
+ }
+ for(ch = 0; ch < chans; ch++){
+ cpe->ch[ch].mixing_gain = SCALE_ONE_POS;
+ for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){
+ for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){
+ cpe->ch[ch].sf_idx[w][g] = SCALE_ONE_POS;
+ cpe->ch[ch].zeroes[w][g] = 0;
+ }
+ }
+ }
+ psy_create_output(apc, cpe, chans, 0);
+}
+
+static const AACPsyModel psy_models[AAC_NB_PSY_MODELS] =
+{
+ {
+ "Null model",
+ NULL,
+ psy_null_window,
+ psy_null_process,
+ NULL,
+ },
+ {
+ "Null model - short windows",
+ NULL,
+ psy_null8_window,
+ psy_null8_process,
+ NULL,
+ },
+};
+
+// low-pass filter declarations and code
+#define IIR_ORDER 4
+
+/**
+ * filter data for 4th order IIR lowpass Butterworth filter
+ *
+ * data format:
+ * normalized cutoff frequency | inverse filter gain | coefficients
+ */
+static const float lp_filter_data[][IIR_ORDER+2] = {
+ { 0.4535147392, 6.816645e-01, -0.4646665999, -2.2127207402, -3.9912017501, -3.2380429984 },
+ { 0.4166666667, 4.998150e-01, -0.2498216698, -1.3392807613, -2.7693097862, -2.6386277439 },
+ { 0.3628117914, 3.103469e-01, -0.0965076902, -0.5977763360, -1.4972580903, -1.7740085241 },
+ { 0.3333333333, 2.346995e-01, -0.0557639007, -0.3623690447, -1.0304538354, -1.3066051440 },
+ { 0.2916666667, 1.528432e-01, -0.0261686639, -0.1473794606, -0.6204721225, -0.6514716536 },
+ { 0.2267573696, 6.917529e-02, -0.0202414073, 0.0780167640, -0.5277442247, 0.3631641670 },
+ { 0.2187500000, 6.178391e-02, -0.0223681543, 0.1069446609, -0.5615167033, 0.4883976841 },
+ { 0.2083333333, 5.298685e-02, -0.0261686639, 0.1473794606, -0.6204721225, 0.6514716536 },
+ { 0.1587301587, 2.229030e-02, -0.0647354087, 0.4172275190, -1.1412129810, 1.4320761385 },
+ { 0.1458333333, 1.693903e-02, -0.0823177861, 0.5192354923, -1.3444768251, 1.6365345642 },
+ { 0.1133786848, 7.374053e-03, -0.1481421788, 0.8650973862, -1.9894244796, 2.1544844308 },
+ { 0.1041666667, 5.541768e-03, -0.1742301048, 0.9921936565, -2.2090801108, 2.3024482658 },
+};
+
+/**
+ * IIR filter state
+ */
+typedef struct LPFilterState{
+ float x[IIR_ORDER + 1];
+ float y[IIR_ORDER + 1];
+}LPFilterState;
+
+static av_always_inline float lowpass_iir_filter(LPFilterState *s, const float *coeffs, float in)
+{
+ memmove(s->x, s->x + 1, sizeof(s->x) - sizeof(s->x[0]));
+ memmove(s->y, s->y + 1, sizeof(s->y) - sizeof(s->y[0]));
+ s->x[IIR_ORDER] = in * coeffs[1];
+ //FIXME: made only for 4th order filter
+ s->y[IIR_ORDER] = (s->x[0] + s->x[4])*1 + (s->x[1] + s->x[3])*4 + s->x[2]*6
+ + coeffs[2]*s->y[0] + coeffs[3]*s->y[1] + coeffs[4]*s->y[2] + coeffs[5]*s->y[3];
+ return s->y[IIR_ORDER];
+}
+
+// low-pass filter code ends here
+
+int av_cold ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx,
+ enum AACPsyModelType model, int elements, int flags,
+ const uint8_t *bands1024, int num_bands1024,
+ const uint8_t *bands128, int num_bands128)
+{
+ int i;
+
+ if(model >= AAC_NB_PSY_MODELS || !psy_models[model].window || !psy_models[model].process){
+ av_log(avctx, AV_LOG_ERROR, "Invalid psy model\n");
+ return -1;
+ }
+
+ for (i = 0; i < 340; i++)
+ pow2sf_tab[i] = pow(2, (i - 200)/4.);
+
+ ctx->avctx = avctx;
+ ctx->flags = flags;
+ ctx->bands1024 = bands1024;
+ ctx->num_bands1024 = num_bands1024;
+ ctx->bands128 = bands128;
+ ctx->num_bands128 = num_bands128;
+ dsputil_init(&ctx->dsp, avctx);
+ ctx->model = &psy_models[model];
+
+ if(ctx->flags & PSY_MODEL_NO_ST_ATT || PSY_MODEL_MODE(ctx->flags) == PSY_MODE_QUALITY){
+ ctx->flags |= PSY_MODEL_NO_ST_ATT;
+ ctx->stereo_att = 0.5f;
+ }else{
+ ctx->stereo_att = av_clipf(avctx->bit_rate / elements / 192000.0, 0.0f, 0.5f);
+ }
+ if(ctx->flags & PSY_MODEL_NO_LOWPASS || PSY_MODEL_MODE(ctx->flags) == PSY_MODE_QUALITY){
+ ctx->flags |= PSY_MODEL_NO_LOWPASS;
+ ctx->cutoff = 0;
+ }else{
+ float cutoff_ratio;
+ cutoff_ratio = avctx->bit_rate / elements / 8.0 / avctx->sample_rate;
+ ctx->cutoff = -1;
+ if(cutoff_ratio >= 0.5f){
+ ctx->flags |= PSY_MODEL_NO_LOWPASS;
+ }else{
+ ctx->lp_state = av_mallocz(sizeof(LPFilterState) * elements * 2);
+ for(i = 0; i < sizeof(lp_filter_data)/sizeof(lp_filter_data[0]); i++){
+ if(lp_filter_data[i][0] <= cutoff_ratio){
+ ctx->cutoff = i;
+ break;
+ }
+ }
+ if(ctx->cutoff == -1)
+ ctx->cutoff = i-1;
+ }
+ }
+ if(ctx->model->init)
+ return ctx->model->init(ctx, elements);
+ return 0;
+}
+
+void ff_aac_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int16_t *la, int tag, int type, ChannelElement *cpe)
+{
+ ctx->model->window(ctx, audio, la, tag, type, cpe);
+}
+
+void ff_aac_psy_analyze(AACPsyContext *ctx, int tag, int type, ChannelElement *cpe)
+{
+ ctx->model->process(ctx, tag, type, cpe);
+}
+
+void av_cold ff_aac_psy_end(AACPsyContext *ctx)
+{
+ av_freep(&ctx->lp_state);
+ if(ctx->model->end)
+ return ctx->model->end(ctx);
+}
+
+void ff_aac_psy_preprocess(AACPsyContext *ctx, int16_t *audio, int16_t *dest, int tag, int type)
+{
+ int chans = type == ID_CPE ? 2 : 1;
+ const int chstride = ctx->avctx->channels;
+ int i, ch;
+ float t[2];
+
+ if(chans == 1 || (ctx->flags & PSY_MODEL_NO_PREPROC) == PSY_MODEL_NO_PREPROC){
+ for(ch = 0; ch < chans; ch++){
+ for(i = 0; i < 1024; i++){
+ dest[i * chstride + ch] = audio[i * chstride + ch];
+ }
+ }
+ }else{
+ for(i = 0; i < 1024; i++){
+ if(ctx->flags & PSY_MODEL_NO_ST_ATT){
+ for(ch = 0; ch < 2; ch++)
+ t[ch] = audio[i * chstride + ch];
+ }else{
+ t[0] = audio[i * chstride + 0] * (0.5 + ctx->stereo_att) + audio[i * chstride + 1] * (0.5 - ctx->stereo_att);
+ t[1] = audio[i * chstride + 0] * (0.5 - ctx->stereo_att) + audio[i * chstride + 1] * (0.5 + ctx->stereo_att);
+ }
+ if(!(ctx->flags & PSY_MODEL_NO_LOWPASS)){
+ LPFilterState *is = (LPFilterState*)ctx->lp_state + tag*2;
+ for(ch = 0; ch < 2; ch++)
+ t[ch] = lowpass_iir_filter(is + ch, lp_filter_data[ctx->cutoff], t[ch]);
+ }
+ for(ch = 0; ch < 2; ch++)
+ dest[i * chstride + ch] = av_clip_int16(t[ch]);
+ }
+ }
+}
+
Index: libavcodec/aacpsy.h
===================================================================
--- libavcodec/aacpsy.h (revision 0)
+++ libavcodec/aacpsy.h (revision 0)
@@ -0,0 +1,145 @@
+/*
+ * AAC encoder psychoacoustic model
+ * Copyright (C) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef FFMPEG_AACPSY_H
+#define FFMPEG_AACPSY_H
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "aac.h"
+
+enum AACPsyModelType{
+ AAC_PSY_NULL, ///< do nothing on frequencies
+ AAC_PSY_NULL8, ///< do nothing on frequencies but work with short windows
+ AAC_PSY_3GPP, ///< model following recommendations from 3GPP TS 26.403
+
+ AAC_NB_PSY_MODELS ///< total number of psychoacoustic models
+};
+
+enum AACPsyModelMode{
+ PSY_MODE_CBR, ///< follow bitrate as closely as possible
+ PSY_MODE_ABR, ///< try to achieve bitrate but actual bitrate may differ significantly
+ PSY_MODE_QUALITY, ///< try to achieve set quality instead of bitrate
+};
+
+#define PSY_MODEL_MODE_MASK 0x0000000F ///< bit fields for storing mode (CBR, ABR, VBR)
+#define PSY_MODEL_NO_PULSE 0x00000010 ///< disable pulse searching
+#define PSY_MODEL_NO_SWITCH 0x00000020 ///< disable window switching
+#define PSY_MODEL_NO_ST_ATT 0x00000040 ///< disable stereo attenuation
+#define PSY_MODEL_NO_LOWPASS 0x00000080 ///< disable low-pass filtering
+
+#define PSY_MODEL_NO_PREPROC (PSY_MODEL_NO_ST_ATT | PSY_MODEL_NO_LOWPASS)
+
+#define PSY_MODEL_MODE(a) ((a) & PSY_MODEL_MODE_MASK)
+
+/**
+ * context used by psychoacoustic model
+ */
+typedef struct AACPsyContext {
+ AVCodecContext *avctx;
+ DSPContext dsp;
+
+ int flags;
+ int window_type[2];
+ int window_shape[2];
+ const uint8_t *bands1024;
+ int num_bands1024;
+ const uint8_t *bands128;
+ int num_bands128;
+
+ const struct AACPsyModel *model;
+ void* model_priv_data;
+
+ float stereo_att;
+ int cutoff;
+ void* lp_state;
+}AACPsyContext;
+
+typedef struct AACPsyModel {
+ const char *name;
+ int (*init) (AACPsyContext *apc, int elements);
+ void (*window) (AACPsyContext *apc, int16_t *audio, int16_t *la, int tag, int type, ChannelElement *cpe);
+ void (*process)(AACPsyContext *apc, int tag, int type, ChannelElement *cpe);
+ void (*end) (AACPsyContext *apc);
+}AACPsyModel;
+
+/**
+ * Initialize psychoacoustic model.
+ *
+ * @param ctx model context
+ * @param avctx codec context
+ * @param model model implementation that will be used
+ * @param elements number of channel elements (single channel or channel pair) to handle by model
+ * @param flags model flags, may be ignored by model if unsupported
+ * @param bands1024 scalefactor band lengths for long (1024 samples) frame
+ * @param num_bands1024 number of scalefactor bands for long frame
+ * @param bands128 scalefactor band lengths for short (128 samples) frame
+ * @param num_bands128 number of scalefactor bands for short frame
+ *
+ * @return zero if successful, a negative value if not
+ */
+int ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx,
+ enum AACPsyModelType model, int elements, int flags,
+ const uint8_t *bands1024, int num_bands1024,
+ const uint8_t *bands128, int num_bands128);
+
+/**
+ * Preprocess audio frame in order to compress it better.
+ *
+ * @param ctx model context
+ * @param audio samples to preprocess
+ * @param dest place to put filtered samples
+ * @param tag number of channel element to analyze
+ * @param type channel element type (e.g. ID_SCE or ID_CPE)
+ */
+void ff_aac_psy_preprocess(AACPsyContext *ctx, int16_t *audio, int16_t *dest, int tag, int type);
+
+/**
+ * Set window sequence and related parameters for channel element.
+ *
+ * @param ctx model context
+ * @param audio samples for the current frame
+ * @param la lookahead samples (NULL when unavailable)
+ * @param tag number of channel element to analyze
+ * @param type channel element type (e.g. ID_SCE or ID_CPE)
+ * @param cpe pointer to the current channel element
+ */
+void ff_aac_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int16_t *la, int tag, int type, ChannelElement *cpe);
+
+/**
+ * Perform psychoacoustic analysis and output coefficients in integer form
+ * along with scalefactors, M/S flags, etc.
+ *
+ * @param ctx model context
+ * @param tag number of channel element to analyze
+ * @param type channel element type (e.g. ID_SCE or ID_CPE)
+ * @param cpe pointer to the current channel element
+ */
+void ff_aac_psy_analyze(AACPsyContext *ctx, int tag, int type, ChannelElement *cpe);
+
+/**
+ * Cleanup model context at the end.
+ *
+ * @param ctx model context
+ */
+void ff_aac_psy_end(AACPsyContext *ctx);
+#endif /* FFMPEG_AACPSY_H */
+
More information about the ffmpeg-devel
mailing list