[FFmpeg-soc] [soc]: r3010 - in aacenc: aacenc.c aacpsy.c aacpsy.h
kostya
subversion at mplayerhq.hu
Mon Aug 4 18:43:23 CEST 2008
Author: kostya
Date: Mon Aug 4 18:43:22 2008
New Revision: 3010
Log:
Audio preprocessing for better compression
Modified:
aacenc/aacenc.c
aacenc/aacpsy.c
aacenc/aacpsy.h
Modified: aacenc/aacenc.c
==============================================================================
--- aacenc/aacenc.c (original)
+++ aacenc/aacenc.c Mon Aug 4 18:43:22 2008
@@ -223,6 +223,7 @@ static av_cold int aac_encode_init(AVCod
ff_sine_window_init(ff_aac_sine_long_1024, 1024);
ff_sine_window_init(ff_aac_sine_short_128, 128);
+ s->samples = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0]));
s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]);
//TODO: psy model selection with some option
ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP, aac_chan_configs[avctx->channels-1][0], 0, s->swb_sizes1024, s->swb_num1024, s->swb_sizes128, s->swb_num128);
@@ -645,9 +646,18 @@ static int aac_encode_frame(AVCodecConte
const uint8_t *chan_map = aac_chan_configs[avctx->channels-1];
int chan_el_counter[4];
- if(!samples){
- s->samples = av_malloc(1024 * avctx->channels * sizeof(s->samples[0]));
- memcpy(s->samples, data, 1024 * avctx->channels * sizeof(s->samples[0]));
+ if(data){
+ start_ch = 0;
+ samples2 = s->samples + 1024 * avctx->channels;
+ for(i = 0; i < chan_map[0]; i++){
+ tag = chan_map[i+1];
+ chans = tag == ID_CPE ? 2 : 1;
+ ff_aac_psy_preprocess(&s->psy, (uint16_t*)data + start_ch, samples2 + start_ch, i, tag);
+ start_ch += chans;
+ }
+ }
+ if(!avctx->frame_number){
+ memmove(s->samples, s->samples + 1024 * avctx->channels, 1024 * avctx->channels * sizeof(s->samples[0]));
return 0;
}
@@ -662,7 +672,8 @@ static int aac_encode_frame(AVCodecConte
chans = tag == ID_CPE ? 2 : 1;
cpe = &s->cpe[i];
samples2 = samples + start_ch;
- la = (uint16_t*)data + start_ch;
+ la = samples2 + 1024 * avctx->channels + start_ch;
+ if(!data) la = NULL;
ff_aac_psy_suggest_window(&s->psy, samples2, la, i, tag, cpe);
for(j = 0; j < chans; j++){
analyze(avctx, s, cpe, samples2, j);
@@ -687,9 +698,7 @@ static int aac_encode_frame(AVCodecConte
flush_put_bits(&s->pb);
avctx->frame_bits = put_bits_count(&s->pb);
- if(data){
- memcpy(s->samples, data, 1024 * avctx->channels * sizeof(s->samples[0]));
- }
+ memmove(s->samples, s->samples + 1024 * avctx->channels, 1024 * avctx->channels * sizeof(s->samples[0]));
return put_bits_count(&s->pb)>>3;
}
Modified: aacenc/aacpsy.c
==============================================================================
--- aacenc/aacpsy.c (original)
+++ aacenc/aacpsy.c Mon Aug 4 18:43:22 2008
@@ -581,23 +581,11 @@ static void psy_3gpp_process(AACPsyConte
int ch, w, w2, g, g2, i;
int prev_scale;
Psy3gppContext *pctx = (Psy3gppContext*) apc->model_priv_data;
- float stereo_att, pe_target;
+ float pe_target;
int bits_avail;
int chans = type == ID_CPE ? 2 : 1;
Psy3gppChannel *pch = &pctx->ch[tag];
- //calculate and apply stereo attenuation factor - 5.2
- if(chans > 1 && cpe->common_window){
- float l, r;
- stereo_att = 1.0 / 2.0; //XXX: find some way to determine it
- for(i = 0; i < 1024; i++){
- l = cpe->ch[0].coeffs[i];
- r = cpe->ch[1].coeffs[i];
- cpe->ch[0].coeffs[i] = (0.5 + stereo_att) * l + (0.5 - stereo_att) * r;
- cpe->ch[1].coeffs[i] = (0.5 - stereo_att) * l + (0.5 + stereo_att) * r;
- }
- }
-
//calculate energies, initial thresholds and related values - 5.4.2
memset(pch->band, 0, sizeof(pch->band));
for(ch = 0; ch < chans; ch++){
@@ -858,6 +846,51 @@ static const AACPsyModel psy_models[AAC_
},
};
+// low-pass filter declarations and code
+#define IIR_ORDER 4
+
+/**
+ * filter data for 4th order IIR lowpass Butterworth filter
+ *
+ * data format:
+ * normalized cutoff frequency | inverse filter gain | coefficients
+ */
+static const float lp_filter_data[][IIR_ORDER+2] = {
+ { 0.4535147392, 6.816645e-01, -0.4646665999, -2.2127207402, -3.9912017501, -3.2380429984 },
+ { 0.4166666667, 4.998150e-01, -0.2498216698, -1.3392807613, -2.7693097862, -2.6386277439 },
+ { 0.3628117914, 3.103469e-01, -0.0965076902, -0.5977763360, -1.4972580903, -1.7740085241 },
+ { 0.3333333333, 2.346995e-01, -0.0557639007, -0.3623690447, -1.0304538354, -1.3066051440 },
+ { 0.2916666667, 1.528432e-01, -0.0261686639, -0.1473794606, -0.6204721225, -0.6514716536 },
+ { 0.2267573696, 6.917529e-02, -0.0202414073, 0.0780167640, -0.5277442247, 0.3631641670 },
+ { 0.2187500000, 6.178391e-02, -0.0223681543, 0.1069446609, -0.5615167033, 0.4883976841 },
+ { 0.2083333333, 5.298685e-02, -0.0261686639, 0.1473794606, -0.6204721225, 0.6514716536 },
+ { 0.1587301587, 2.229030e-02, -0.0647354087, 0.4172275190, -1.1412129810, 1.4320761385 },
+ { 0.1458333333, 1.693903e-02, -0.0823177861, 0.5192354923, -1.3444768251, 1.6365345642 },
+ { 0.1133786848, 7.374053e-03, -0.1481421788, 0.8650973862, -1.9894244796, 2.1544844308 },
+ { 0.1041666667, 5.541768e-03, -0.1742301048, 0.9921936565, -2.2090801108, 2.3024482658 },
+};
+
+/**
+ * IIR filter state
+ */
+typedef struct LPFilterState{
+ float x[IIR_ORDER + 1];
+ float y[IIR_ORDER + 1];
+}LPFilterState;
+
+static av_always_inline float lowpass_iir_filter(LPFilterState *s, const float *coeffs, float in)
+{
+ memmove(s->x, s->x + 1, sizeof(s->x) - sizeof(s->x[0]));
+ memmove(s->y, s->y + 1, sizeof(s->y) - sizeof(s->y[0]));
+ s->x[IIR_ORDER] = in * coeffs[1];
+ //FIXME: made only for 4th order filter
+ s->y[IIR_ORDER] = (s->x[0] + s->x[4])*1 + (s->x[1] + s->x[3])*4 + s->x[2]*6
+ + coeffs[2]*s->y[0] + coeffs[3]*s->y[1] + coeffs[4]*s->y[2] + coeffs[5]*s->y[3];
+ return s->y[IIR_ORDER];
+}
+
+// low-pass filter code ends here
+
int av_cold ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx,
enum AACPsyModelType model, int elements, int flags,
const uint8_t *bands1024, int num_bands1024,
@@ -882,6 +915,32 @@ int av_cold ff_aac_psy_init(AACPsyContex
dsputil_init(&ctx->dsp, avctx);
ctx->model = &psy_models[model];
+ if(ctx->flags & PSY_MODEL_NO_ST_ATT)
+ ctx->stereo_att = 0.5f;
+ else{
+ ctx->stereo_att = 0.5f;//todo: adaptive
+ }
+ if(ctx->flags & PSY_MODEL_NO_LOWPASS || PSY_MODEL_MODE(ctx->flags) == PSY_MODE_QUALITY){
+ ctx->flags |= PSY_MODEL_NO_LOWPASS;
+ ctx->cutoff = 0;
+ }else{
+ float cutoff_ratio;
+ cutoff_ratio = avctx->bit_rate / elements / 8.0 / avctx->sample_rate;
+ ctx->cutoff = -1;
+ if(cutoff_ratio >= 0.5f){
+ ctx->flags |= PSY_MODEL_NO_LOWPASS;
+ }else{
+ ctx->lp_state = av_mallocz(sizeof(LPFilterState) * elements * 2);
+ for(i = 0; i < sizeof(lp_filter_data)/sizeof(lp_filter_data[0]); i++){
+ if(lp_filter_data[i][0] <= cutoff_ratio){
+ ctx->cutoff = i;
+ break;
+ }
+ }
+ if(ctx->cutoff == -1)
+ ctx->cutoff = i-1;
+ }
+ }
if(ctx->model->init)
return ctx->model->init(ctx, elements);
return 0;
@@ -899,6 +958,41 @@ void ff_aac_psy_analyze(AACPsyContext *c
void av_cold ff_aac_psy_end(AACPsyContext *ctx)
{
+ av_freep(&ctx->lp_state);
if(ctx->model->end)
return ctx->model->end(ctx);
}
+
+void ff_aac_psy_preprocess(AACPsyContext *ctx, int16_t *audio, int16_t *dest, int tag, int type)
+{
+ int chans = type == ID_CPE ? 2 : 1;
+ const int chstride = ctx->avctx->channels;
+ int i, ch;
+ float t[2];
+
+ if(chans == 1 || (ctx->flags & PSY_MODEL_NO_PREPROC) == PSY_MODEL_NO_PREPROC){
+ for(ch = 0; ch < chans; ch++){
+ for(i = 0; i < 1024; i++){
+ dest[i * chstride + ch] = audio[i * chstride + ch];
+ }
+ }
+ }else{
+ for(i = 0; i < 1024; i++){
+ if(ctx->flags & PSY_MODEL_NO_ST_ATT){
+ for(ch = 0; ch < 2; ch++)
+ t[ch] = audio[i * chstride + ch];
+ }else{
+ t[0] = audio[i * chstride + 0] * (0.5 + ctx->stereo_att) + audio[i * chstride + 1] * (0.5 - ctx->stereo_att);
+ t[1] = audio[i * chstride + 0] * (0.5 - ctx->stereo_att) + audio[i * chstride + 1] * (0.5 + ctx->stereo_att);
+ }
+ if(!(ctx->flags & PSY_MODEL_NO_LOWPASS)){
+ LPFilterState *is = (LPFilterState*)ctx->lp_state + tag*2;
+ for(ch = 0; ch < 2; ch++)
+ t[ch] = lowpass_iir_filter(is + ch, lp_filter_data[ctx->cutoff], t[ch]);
+ }
+ for(ch = 0; ch < 2; ch++)
+ dest[i * chstride + ch] = av_clip_int16(t[ch]);
+ }
+ }
+}
+
Modified: aacenc/aacpsy.h
==============================================================================
--- aacenc/aacpsy.h (original)
+++ aacenc/aacpsy.h Mon Aug 4 18:43:22 2008
@@ -43,6 +43,10 @@ enum AACPsyModelMode{
#define PSY_MODEL_MODE_MASK 0x0000000F ///< bit fields for storing mode (CBR, ABR, VBR)
#define PSY_MODEL_NO_PULSE 0x00000010 ///< disable pulse searching
#define PSY_MODEL_NO_SWITCH 0x00000020 ///< disable window switching
+#define PSY_MODEL_NO_ST_ATT 0x00000040 ///< disable stereo attenuation
+#define PSY_MODEL_NO_LOWPASS 0x00000080 ///< disable low-pass filtering
+
+#define PSY_MODEL_NO_PREPROC (PSY_MODEL_NO_ST_ATT | PSY_MODEL_NO_LOWPASS)
#define PSY_MODEL_MODE(a) ((a) & PSY_MODEL_MODE_MASK)
@@ -63,6 +67,10 @@ typedef struct AACPsyContext {
const struct AACPsyModel *model;
void* model_priv_data;
+
+ float stereo_att;
+ int cutoff;
+ void* lp_state;
}AACPsyContext;
typedef struct AACPsyModel {
@@ -94,6 +102,17 @@ int ff_aac_psy_init(AACPsyContext *ctx,
const uint8_t *bands128, int num_bands128);
/**
+ * Preprocess audio frame in order to compress it better.
+ *
+ * @param ctx model context
+ * @param audio samples to preprocess
+ * @param dest place to put filtered samples
+ * @param tag number of channel element to analyze
+ * @param type channel element type (e.g. ID_SCE or ID_CPE)
+ */
+void ff_aac_psy_preprocess(AACPsyContext *ctx, int16_t *audio, int16_t *dest, int tag, int type);
+
+/**
* Set window sequence and related parameters for channel element.
*
* @param ctx model context
More information about the FFmpeg-soc
mailing list