[FFmpeg-soc] [soc]: r2430 - in aacenc: aac_enc.patch aacenc.c aacpsy.c aacpsy.h
kostya
subversion at mplayerhq.hu
Sat Jun 14 06:57:57 CEST 2008
Author: kostya
Date: Sat Jun 14 06:57:56 2008
New Revision: 2430
Log:
Make psychoacoustic model less dependent from encoder and selectable.
For now, simple model used by encoder was copied into null psy model.
Added:
aacenc/aacpsy.c
aacenc/aacpsy.h
Modified:
aacenc/aac_enc.patch
aacenc/aacenc.c
Modified: aacenc/aac_enc.patch
==============================================================================
--- aacenc/aac_enc.patch (original)
+++ aacenc/aac_enc.patch Sat Jun 14 06:57:56 2008
@@ -6,7 +6,7 @@ index d4f6d1c..0ed9057 100644
OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o
-+OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o mdct.o fft.o mpeg4audio.o
++OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aacpsy.o mdct.o fft.o mpeg4audio.o
OBJS-$(CONFIG_AASC_DECODER) += aasc.o
OBJS-$(CONFIG_AC3_DECODER) += ac3dec.o ac3tab.o ac3.o mdct.o fft.o
OBJS-$(CONFIG_AC3_ENCODER) += ac3enc.o ac3tab.o ac3.o
Modified: aacenc/aacenc.c
==============================================================================
--- aacenc/aacenc.c (original)
+++ aacenc/aacenc.c Sat Jun 14 06:57:56 2008
@@ -29,6 +29,8 @@
#include "dsputil.h"
#include "mpeg4audio.h"
+#include "aacpsy.h"
+
// XXX: borrowed from aac.c, move to some header eventually
#include "aactab.h"
@@ -131,65 +133,6 @@ static const struct {
{ -1, NULL , NULL , 0 }, // intensity in-phase
};
-// data structures borrowed from aac.c with some minor modifications
-
-/**
- * Individual Channel Stream
- */
-typedef struct {
- int intensity_present;
- int max_sfb;
- int window_sequence;
- int window_shape; ///< If set, use Kaiser-Bessel window, otherwise use a sinus window
- int window_shape_prev;
- int num_window_groups;
- uint8_t grouping;
- uint8_t group_len[8];
- const uint8_t *swb_sizes;
- int num_swb;
- int num_windows;
- int tns_max_bands;
-} ics_struct;
-
-/**
- * M/S joint channel coding
- */
-typedef struct {
- int present;
- uint8_t mask[8][64];
-} ms_struct;
-
-/**
- * Single Channel Element
- * Used for both SCE and LFE elements
- */
-typedef struct {
- int gain; /**< Channel gain (not used by AAC bitstream).
- * Note that this is applied before joint stereo decoding.
- * Thus, when used inside CPE elements, both channels must have equal gain.
- */
- ics_struct ics;
- int zeroes[64];
- int sf_idx[64];
- int cb[8][64]; ///< Codebooks
- float sf[8][64]; ///< Scalefactors
- DECLARE_ALIGNED_16(float, coeffs[1024]); ///< Coefficients for IMDCT
- DECLARE_ALIGNED_16(float, saved[1024]); ///< Overlap
- DECLARE_ALIGNED_16(float, ret[1024]); ///< PCM output
- DECLARE_ALIGNED_16(int, icoefs[1024]); ///< integer coefficients for coding
-} sce_struct;
-
-/**
- * Channel Pair Element
- */
-typedef struct {
- int common_window; ///< Set if channels share a common 'ics_struct' in bitstream
- ms_struct ms;
- sce_struct ch[2];
-} cpe_struct;
-
-// borrowing temporarily ends here
-
typedef struct {
PutBitContext pb;
MDCTContext mdct;
@@ -201,6 +144,7 @@ typedef struct {
uint8_t *swb_sizes;
int swb_num;
cpe_struct cpe;
+ AACPsyContext psy;
} AACEncContext;
#define SCALE_ONE_POS 140
@@ -208,9 +152,6 @@ typedef struct {
#define SCALE_MAX_DIFF 60
#define SCALE_DIFF_ZERO 60
-//borrowed from aac.c
-static float pow2sf_tab[316];
-
/**
* Make AAC audio config object.
* @see 1.6.2.1
@@ -253,66 +194,14 @@ static int aac_encode_init(AVCodecContex
// window init
ff_kbd_window_init(s->kbd_long_1024, 4.0, 1024);
+ ff_aac_psy_init(&s->psy, avctx, AAC_PSY_NULL, 0, s->swb_sizes, s->swb_num);
avctx->extradata = av_malloc(2);
avctx->extradata_size = 2;
put_audio_specific_config(avctx);
- for (i = 0; i < 316; i++)
- pow2sf_tab[i] = pow(2, (i - 200)/4.);
return 0;
}
-static void determine_scales(AVCodecContext *avctx, cpe_struct *cpe, int channel)
-{
- AACEncContext *s = avctx->priv_data;
- int i = 0, j, g, count = 0, maxswb;
- double me, d;
-
- cpe->ch[channel].ics.swb_sizes = s->swb_sizes;
- cpe->ch[channel].ics.num_swb = s->swb_num;
- for(g = 0; g < s->swb_num; g++){
- me = 0.0;
- d = 0.0;
- for(j = 0; j < s->swb_sizes[g]; j++)
- if(cpe->ch[channel].coeffs[i + j] != 0.0){
- me += fabs(cpe->ch[channel].coeffs[i + j]);
- count++;
- }
- if(count)
- me /= count;
- for(j = 0; j < cpe->ch[channel].ics.swb_sizes[g]; j++)
- if(cpe->ch[channel].coeffs[i + j] != 0.0)
- d += (cpe->ch[channel].coeffs[i + j] - me) * (cpe->ch[channel].coeffs[i + j] - me);
- if(count)
- d /= count;
- cpe->ch[channel].zeroes[g] = (me < 0.1 && d < 0.1);
- cpe->ch[channel].sf_idx[g] = SCALE_ONE_POS + g;
- i += cpe->ch[channel].ics.swb_sizes[g];
- }
- cpe->ch[channel].gain = SCALE_ONE_POS;
- for(maxswb = s->swb_num; maxswb > 0 && cpe->ch[channel].zeroes[maxswb-1]; maxswb--);
- cpe->ch[channel].ics.max_sfb = maxswb;
- cpe->ch[channel].ics.window_sequence = 0;
- cpe->ch[channel].ics.window_shape = 1;
-}
-
-/* BIG FAT TODO! */
-/* for now it just converts spectra to integer form */
-static void apply_psychoacoustics(AVCodecContext *avctx, cpe_struct *cpe, int channel)
-{
- AACEncContext *s = avctx->priv_data;
- int i = 0, j, g;
-
- for(g = 0; g < cpe->ch[channel].ics.max_sfb; g++)
- if(cpe->ch[channel].zeroes[g]){
- memset(cpe->ch[channel].icoefs + i, 0, cpe->ch[channel].ics.swb_sizes[g] * sizeof(cpe->ch[0].icoefs[0]));
- i += cpe->ch[channel].ics.swb_sizes[g];
- }else
- for(j = 0; j < cpe->ch[channel].ics.swb_sizes[g]; j++, i++)
- cpe->ch[channel].icoefs[i] = (int)(roundf(cpe->ch[channel].coeffs[i] / pow2sf_tab[cpe->ch[channel].sf_idx[g]+60]));
- memset(cpe->ch[channel].icoefs + i, 0, (1024 - i) * sizeof(cpe->ch[channel].icoefs[0]));
-}
-
static void analyze(AVCodecContext *avctx, AACEncContext *s, cpe_struct *cpe, short *audio, int channel)
{
int i, j;
@@ -328,13 +217,6 @@ static void analyze(AVCodecContext *avct
//convert coefficients into form used by AAC
for(i = 0; i < 1024; i++)
cpe->ch[channel].coeffs[i] = -copysignf(pow(fabsf(cpe->ch[channel].coeffs[i]), 0.75f), cpe->ch[channel].coeffs[i]);
-
- determine_scales(avctx, cpe, channel);
- if(channel == 1){
- cpe->ch[0].ics.max_sfb = FFMAX(cpe->ch[0].ics.max_sfb, cpe->ch[1].ics.max_sfb);
- cpe->common_window = 1;
- }
- apply_psychoacoustics(avctx, cpe, channel);
}
/**
@@ -513,10 +395,21 @@ static int aac_encode_frame(AVCodecConte
AACEncContext *s = avctx->priv_data;
int16_t *samples = data;
+ ff_aac_psy_suggest_window(&s->psy, samples, 0, &s->cpe);
+
analyze(avctx, s, &s->cpe, samples, 0);
if(avctx->channels > 1)
analyze(avctx, s, &s->cpe, samples, 1);
+ ff_aac_psy_analyze(&s->psy, samples, 0, &s->cpe);
+ if(avctx->channels > 1){
+ s->cpe.common_window = s->cpe.ch[0].ics.window_shape == s->cpe.ch[1].ics.window_shape;
+ if(s->cpe.common_window){
+ s->cpe.ch[0].ics.max_sfb = FFMAX(s->cpe.ch[0].ics.max_sfb, s->cpe.ch[1].ics.max_sfb);
+ s->cpe.ch[1].ics.max_sfb = s->cpe.ch[0].ics.max_sfb;
+ }
+ }
+
init_put_bits(&s->pb, frame, buf_size*8);
//output encoded
switch(avctx->channels){
@@ -550,6 +443,7 @@ static int aac_encode_end(AVCodecContext
AACEncContext *s = avctx->priv_data;
ff_mdct_end(&s->mdct);
+ ff_aac_psy_end(&s->psy);
return 0;
}
Added: aacenc/aacpsy.c
==============================================================================
--- (empty file)
+++ aacenc/aacpsy.c Sat Jun 14 06:57:56 2008
@@ -0,0 +1,121 @@
+/*
+ * AAC encoder psychoacoustic model
+ * Copyright (C) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file aacpsy.c
+ * AAC encoder psychoacoustic model
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "aacpsy.h"
+
+//borrowed from aac.c
+static float pow2sf_tab[316];
+
+
+#define SCALE_ONE_POS 140
+#define SCALE_MAX_POS 255
+#define SCALE_MAX_DIFF 60
+
+static void psy_null_window(AACPsyContext *apc, int16_t *audio, int channel, cpe_struct *cpe)
+{
+ int ch;
+
+ for(ch = 0; ch < apc->avctx->channels; ch++){
+ cpe->ch[ch].ics.window_sequence = 0;
+ cpe->ch[ch].ics.window_shape = 1;
+ }
+}
+
+static void psy_null_process(AACPsyContext *apc, int16_t *audio, int channel, cpe_struct *cpe)
+{
+ int start, sum, maxsfb;
+ int ch, g, i;
+
+ for(ch = 0; ch < apc->avctx->channels; ch++){
+ start = 0;
+ cpe->ch[ch].gain = SCALE_ONE_POS;
+ for(g = 0; g < apc->num_bands; g++){
+ sum = 0;
+ cpe->ch[ch].sf_idx[g] = SCALE_ONE_POS;
+ for(i = 0; i < apc->bands[g]; i++){
+ cpe->ch[ch].icoefs[start+i] = av_clip((int)(roundf(cpe->ch[ch].coeffs[start+i] / pow2sf_tab[cpe->ch[ch].sf_idx[g]+60])), -8191, 8191);
+ sum += !!cpe->ch[ch].icoefs[start+i];
+ }
+ cpe->ch[ch].zeroes[g] = !sum;
+ start += apc->bands[g];
+ }
+ for(maxsfb = apc->num_bands; maxsfb > 0 && cpe->ch[ch].zeroes[maxsfb-1]; maxsfb--);
+ cpe->ch[ch].ics.max_sfb = maxsfb;
+ }
+}
+
+static const AACPsyModel psy_models[AAC_NB_PSY_MODELS] =
+{
+ {
+ "Null model",
+ NULL,
+ psy_null_window,
+ psy_null_process,
+ NULL,
+ },
+};
+
+int ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx, int model, int flags,
+ const uint8_t *bands, int num_bands)
+{
+ int i;
+
+ if(model >= AAC_NB_PSY_MODELS || !psy_models[model].window || !psy_models[model].process){
+ av_log(avctx, AV_LOG_ERROR, "Invalid psy model\n");
+ return -1;
+ }
+
+ for (i = 0; i < 316; i++)
+ pow2sf_tab[i] = pow(2, (i - 200)/4.);
+
+ ctx->avctx = avctx;
+ ctx->bands = bands;
+ ctx->num_bands = num_bands;
+ dsputil_init(&ctx->dsp, avctx);
+ ctx->model = &psy_models[model];
+
+ if(ctx->model->init)
+ return ctx->model->init(ctx);
+ return 0;
+}
+
+void ff_aac_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int channel, cpe_struct *cpe)
+{
+ ctx->model->window(ctx, audio, channel, cpe);
+}
+
+void ff_aac_psy_analyze(AACPsyContext *ctx, int16_t *audio, int channel, cpe_struct *cpe)
+{
+ ctx->model->process(ctx, audio, channel, cpe);
+}
+
+void ff_aac_psy_end(AACPsyContext *ctx)
+{
+ if(ctx->model->end)
+ return ctx->model->end(ctx);
+}
Added: aacenc/aacpsy.h
==============================================================================
--- (empty file)
+++ aacenc/aacpsy.h Sat Jun 14 06:57:56 2008
@@ -0,0 +1,123 @@
+/*
+ * AAC encoder psychoacoustic model
+ * Copyright (C) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef FFMPEG_AACPSY_H
+#define FFMPEG_AACPSY_H
+
+#include "avcodec.h"
+#include "dsputil.h"
+
+enum AACPsyModelType{
+ AAC_PSY_NULL, // do nothing on frequencies
+
+ AAC_NB_PSY_MODELS
+};
+
+// data structures borrowed from aac.c with some minor modifications
+
+/**
+ * Individual Channel Stream
+ */
+typedef struct {
+ int intensity_present;
+ int max_sfb;
+ int window_sequence;
+ int window_shape; ///< If set, use Kaiser-Bessel window, otherwise use a sinus window
+ int window_shape_prev;
+ int num_window_groups;
+ uint8_t grouping;
+ uint8_t group_len[8];
+ const uint8_t *swb_sizes;
+ int num_swb;
+ int num_windows;
+ int tns_max_bands;
+} ics_struct;
+
+/**
+ * M/S joint channel coding
+ */
+typedef struct {
+ int present;
+ uint8_t mask[8][64];
+} ms_struct;
+
+/**
+ * Single Channel Element
+ * Used for both SCE and LFE elements
+ */
+typedef struct {
+ int gain; /**< Channel gain (not used by AAC bitstream).
+ * Note that this is applied before joint stereo decoding.
+ * Thus, when used inside CPE elements, both channels must have equal gain.
+ */
+ ics_struct ics;
+ int zeroes[64];
+ int sf_idx[64];
+ int cb[8][64]; ///< Codebooks
+ float sf[8][64]; ///< Scalefactors
+ DECLARE_ALIGNED_16(float, coeffs[1024]); ///< Coefficients for IMDCT
+ DECLARE_ALIGNED_16(float, saved[1024]); ///< Overlap
+ DECLARE_ALIGNED_16(float, ret[1024]); ///< PCM output
+ DECLARE_ALIGNED_16(int, icoefs[1024]); ///< integer coefficients for coding
+} sce_struct;
+
+/**
+ * Channel Pair Element
+ */
+typedef struct {
+ int common_window; ///< Set if channels share a common 'ics_struct' in bitstream
+ ms_struct ms;
+ sce_struct ch[2];
+} cpe_struct;
+
+// borrowing temporarily ends here
+
+/**
+ * context used by psychoacoustic model
+ */
+typedef struct AACPsyContext {
+ AVCodecContext *avctx;
+ DSPContext dsp;
+
+ int window_type[2];
+ int window_shape[2];
+ const uint8_t *bands;
+ int num_bands;
+
+ const struct AACPsyModel *model;
+ void* model_priv_data;
+}AACPsyContext;
+
+typedef struct AACPsyModel {
+ const char *name;
+ int (*init) (AACPsyContext *apc);
+ void (*window) (AACPsyContext *apc, int16_t *audio, int channel, cpe_struct *cpe);
+ void (*process)(AACPsyContext *apc, int16_t *audio, int channel, cpe_struct *cpe);
+ void (*end) (AACPsyContext *apc);
+}AACPsyModel;
+
+int ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx, int model, int flags,
+ const uint8_t *bands, int num_bands);
+void ff_aac_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int channel, cpe_struct *cpe);
+void ff_aac_psy_analyze(AACPsyContext *ctx, int16_t *audio, int channel, cpe_struct *cpe);
+void ff_aac_psy_end(AACPsyContext *ctx);
+#endif /* FFMPEG_AACPSY_H */
+
More information about the FFmpeg-soc
mailing list