[FFmpeg-soc] [soc]: r2947 - in aacenc: aacenc.c aacpsy.c aacpsy.h

Thu Jul 31 07:07:28 CEST 2008

Author: kostya
Date: Thu Jul 31 07:07:28 2008
New Revision: 2947

Log:
Final (or at least release candidate) API for psychoacoustic model

Modified:
   aacenc/aacenc.c
   aacenc/aacpsy.c
   aacenc/aacpsy.h

Modified: aacenc/aacenc.c
==============================================================================

--- aacenc/aacenc.c	(original)
+++ aacenc/aacenc.c	Thu Jul 31 07:07:28 2008
@@ -38,19 +38,6 @@ DECLARE_ALIGNED_16(static float, sine_lo
 DECLARE_ALIGNED_16(static float, sine_short_128[128]);
 
 #include "aactab.h"
-/**
- * IDs for raw_data_block
- */
-enum {
-    ID_SCE = 0x0,
-    ID_CPE,
-    ID_CCE,
-    ID_LFE,
-    ID_DSE,
-    ID_PCE,
-    ID_FIL,
-    ID_END
-};
 
 static const uint8_t swb_size_1024_96[] = {
     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
@@ -243,7 +230,7 @@ static av_cold int aac_encode_init(AVCod
 
     s->cpe = av_mallocz(sizeof(ChannelElement) * ((avctx->channels + 1) >> 1));
     //TODO: psy model selection with some option
-    ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP, 0, s->swb_sizes1024, s->swb_num1024, s->swb_sizes128, s->swb_num128);
+    ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP, (avctx->channels + 1) >> 1, 0, s->swb_sizes1024, s->swb_num1024, s->swb_sizes128, s->swb_num128);
     avctx->extradata = av_malloc(2);
     avctx->extradata_size = 2;
     put_audio_specific_config(avctx);
@@ -698,7 +685,7 @@ static int aac_encode_frame(AVCodecConte
     AACEncContext *s = avctx->priv_data;
     int16_t *samples = s->samples, *samples2;
     ChannelElement *cpe;
-    int i, j, chans;
+    int i, j, chans, tag;
 
     if(!samples){
         s->samples = av_malloc(1024 * avctx->channels * sizeof(s->samples[0]));
@@ -718,14 +705,15 @@ static int aac_encode_frame(AVCodecConte
     }*/
     for(i = 0; i < avctx->channels; i += 2){
         chans = FFMIN(avctx->channels - i, 2);
-        cpe = &s->cpe[i/2];
+        tag = chans > 1 ? ID_CPE : ID_SCE;
+        cpe = &s->cpe[(i + 1)/2];
         samples2 = samples + i;
-        ff_aac_psy_suggest_window(&s->psy, samples2, data, i, cpe);
+        ff_aac_psy_suggest_window(&s->psy, samples2, data, (i + 1) >> 1, tag, cpe);
         for(j = 0; j < chans; j++){
             analyze(avctx, s, cpe, samples2, j);
         }
-        ff_aac_psy_analyze(&s->psy, i, cpe);
-        put_bits(&s->pb, 3, chans > 1 ? ID_CPE : ID_SCE);
+        ff_aac_psy_analyze(&s->psy, (i + 1) >> 1, tag, cpe);
+        put_bits(&s->pb, 3, tag);
         put_bits(&s->pb, 4, i >> 1);
         if(chans == 2){
             put_bits(&s->pb, 1, cpe->common_window);

Modified: aacenc/aacpsy.c
==============================================================================
--- aacenc/aacpsy.c	(original)
+++ aacenc/aacpsy.c	Thu Jul 31 07:07:28 2008
@@ -76,12 +76,11 @@ static inline float calc_distortion(floa
 /**
  * Produce integer coefficients from scalefactors provided by model.
  */
-static void psy_create_output(AACPsyContext *apc, ChannelElement *cpe, int channel, int search_pulses)
+static void psy_create_output(AACPsyContext *apc, ChannelElement *cpe, int chans, int search_pulses)
 {
     int i, w, w2, g, ch;
     int start, sum, maxsfb, cmaxsfb;
     int pulses, poff[4], pamp[4];
-    int chans = FFMIN(apc->avctx->channels - channel, 2);
 
     for(ch = 0; ch < chans; ch++){
         start = 0;
@@ -155,7 +154,7 @@ static void psy_create_output(AACPsyCont
         }
     }
 
-    if(apc->avctx->channels - channel > 1 && cpe->common_window){
+    if(chans > 1 && cpe->common_window){
         int msc = 0;
         cpe->ch[0].ics.max_sfb = FFMAX(cpe->ch[0].ics.max_sfb, cpe->ch[1].ics.max_sfb);
         cpe->ch[1].ics.max_sfb = cpe->ch[0].ics.max_sfb;
@@ -167,10 +166,10 @@ static void psy_create_output(AACPsyCont
     }
 }
 
-static void psy_null_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe)
+static void psy_null_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int tag, int type, ChannelElement *cpe)
 {
     int ch;
-    int chans = FFMIN(apc->avctx->channels - channel, 2);
+    int chans = type == ID_CPE ? 2 : 1;
 
     for(ch = 0; ch < chans; ch++){
         cpe->ch[ch].ics.window_sequence = ONLY_LONG_SEQUENCE;
@@ -183,12 +182,12 @@ static void psy_null_window(AACPsyContex
     cpe->common_window = cpe->ch[0].ics.use_kb_window[0] == cpe->ch[1].ics.use_kb_window[0];
 }
 
-static void psy_null_process(AACPsyContext *apc, int channel, ChannelElement *cpe)
+static void psy_null_process(AACPsyContext *apc, int tag, int type, ChannelElement *cpe)
 {
     int start;
     int ch, g, i;
     int minscale;
-    int chans = FFMIN(apc->avctx->channels - channel, 2);
+    int chans = type == ID_CPE ? 2 : 1;
 
     for(ch = 0; ch < chans; ch++){
         start = 0;
@@ -222,13 +221,13 @@ static void psy_null_process(AACPsyConte
             if(!cpe->ch[ch].zeroes[0][g])
                 cpe->ch[ch].sf_idx[0][g] = FFMIN(minscale + SCALE_MAX_DIFF, cpe->ch[ch].sf_idx[0][g]);
     }
-    psy_create_output(apc, cpe, channel, 1);
+    psy_create_output(apc, cpe, chans, 1);
 }
 
-static void psy_null8_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe)
+static void psy_null8_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int tag, int type, ChannelElement *cpe)
 {
     int ch, i;
-    int chans = FFMIN(apc->avctx->channels - channel, 2);
+    int chans = type == ID_CPE ? 2 : 1;
 
     for(ch = 0; ch < chans; ch++){
         int prev_seq = cpe->ch[ch].ics.window_sequence_prev;
@@ -259,11 +258,11 @@ static void psy_null8_window(AACPsyConte
     cpe->common_window = cpe->ch[0].ics.use_kb_window[0] == cpe->ch[1].ics.use_kb_window[0];
 }
 
-static void psy_null8_process(AACPsyContext *apc, int channel, ChannelElement *cpe)
+static void psy_null8_process(AACPsyContext *apc, int tag, int type, ChannelElement *cpe)
 {
     int start;
     int w, ch, g, i;
-    int chans = FFMIN(apc->avctx->channels - channel, 2);
+    int chans = type == ID_CPE ? 2 : 1;
 
     //detect M/S
     if(chans > 1 && cpe->common_window){
@@ -287,7 +286,7 @@ static void psy_null8_process(AACPsyCont
             }
         }
     }
-    psy_create_output(apc, cpe, channel, 0);
+    psy_create_output(apc, cpe, chans, 0);
 }
 
 /**
@@ -379,7 +378,7 @@ static inline float ath(float f, float a
             + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
 }
 
-static av_cold int psy_3gpp_init(AACPsyContext *apc)
+static av_cold int psy_3gpp_init(AACPsyContext *apc, int elements)
 {
     Psy3gppContext *pctx;
     int i, g, start;
@@ -458,10 +457,10 @@ static const uint8_t window_grouping[9] 
  * Tell encoder which window types to use.
  * @see 3GPP TS26.403 5.4.1
  */
-static void psy_3gpp_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe)
+static void psy_3gpp_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int tag, int type, ChannelElement *cpe)
 {
     int ch;
-    int chans = FFMIN(apc->avctx->channels - channel, 2);
+    int chans = type == ID_CPE ? 2 : 1;
     int i, j;
     int br = apc->avctx->bit_rate / apc->avctx->channels;
     int attack_ratio = (br <= 16000 + 8000*chans) ? 18 : 10;
@@ -577,7 +576,7 @@ static void calc_pe(Psy3gppBand *band, i
  * Determine scalefactors and prepare coefficients for encoding.
  * @see 3GPP TS26.403 5.4
  */
-static void psy_3gpp_process(AACPsyContext *apc, int channel, ChannelElement *cpe)
+static void psy_3gpp_process(AACPsyContext *apc, int tag, int type, ChannelElement *cpe)
 {
     int start;
     int ch, w, w2, g, g2, i;
@@ -585,7 +584,7 @@ static void psy_3gpp_process(AACPsyConte
     Psy3gppContext *pctx = (Psy3gppContext*) apc->model_priv_data;
     float stereo_att, pe_target;
     int bits_avail;
-    const int chans = FFMIN(apc->avctx->channels - channel, 2);
+    int chans = type == ID_CPE ? 2 : 1;
     Psy3gppChannel *pch = &pctx->ch;
 
     //calculate and apply stereo attenuation factor - 5.2
@@ -786,7 +785,7 @@ static void psy_3gpp_process(AACPsyConte
     }
 
     memcpy(pch->prev_band, pch->band, sizeof(pch->band));
-    psy_create_output(apc, cpe, channel, 0);
+    psy_create_output(apc, cpe, chans, 0);
 }
 
 static av_cold void psy_3gpp_end(AACPsyContext *apc)
@@ -819,7 +818,8 @@ static const AACPsyModel psy_models[AAC_
     },
 };
 
-int av_cold ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx, int model, int flags,
+int av_cold ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx,
+                            int model, int elements, int flags,
                             const uint8_t *bands1024, int num_bands1024,
                             const uint8_t *bands128,  int num_bands128)
 {
@@ -842,18 +842,18 @@ int av_cold ff_aac_psy_init(AACPsyContex
     ctx->model = &psy_models[model];
 
     if(ctx->model->init)
-        return ctx->model->init(ctx);
+        return ctx->model->init(ctx, elements);
     return 0;
 }
 
-void ff_aac_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe)
+void ff_aac_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int16_t *la, int tag, int type, ChannelElement *cpe)
 {
-    ctx->model->window(ctx, audio, la, channel, cpe);
+    ctx->model->window(ctx, audio, la, tag, type, cpe);
 }
 
-void ff_aac_psy_analyze(AACPsyContext *ctx, int channel, ChannelElement *cpe)
+void ff_aac_psy_analyze(AACPsyContext *ctx, int tag, int type, ChannelElement *cpe)
 {
-    ctx->model->process(ctx, channel, cpe);
+    ctx->model->process(ctx, tag, type, cpe);
 }
 
 void av_cold ff_aac_psy_end(AACPsyContext *ctx)

Modified: aacenc/aacpsy.h
==============================================================================
--- aacenc/aacpsy.h	(original)
+++ aacenc/aacpsy.h	Thu Jul 31 07:07:28 2008
@@ -45,6 +45,20 @@ enum WindowSequence {
 };
 
 /**
+ * IDs for raw_data_block
+ */
+enum {
+    ID_SCE = 0x0,
+    ID_CPE,
+    ID_CCE,
+    ID_LFE,
+    ID_DSE,
+    ID_PCE,
+    ID_FIL,
+    ID_END
+};
+
+/**
  * special codebooks
  */
 enum Codebook {
@@ -184,17 +198,18 @@ typedef struct AACPsyContext {
 
 typedef struct AACPsyModel {
     const char *name;
-    int   (*init)   (AACPsyContext *apc);
-    void  (*window) (AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe);
-    void  (*process)(AACPsyContext *apc,int channel, ChannelElement *cpe);
+    int   (*init)   (AACPsyContext *apc, int elements);
+    void  (*window) (AACPsyContext *apc, int16_t *audio, int16_t *la, int tag, int type, ChannelElement *cpe);
+    void  (*process)(AACPsyContext *apc, int tag, int type, ChannelElement *cpe);
     void  (*end)    (AACPsyContext *apc);
 }AACPsyModel;
 
-int ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx, int model, int flags,
+int ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx,
+                    int model, int elements, int flags,
                     const uint8_t *bands1024, int num_bands1024,
                     const uint8_t *bands128,  int num_bands128);
-void ff_aac_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe);
-void ff_aac_psy_analyze(AACPsyContext *ctx, int channel, ChannelElement *cpe);
+void ff_aac_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int16_t *la, int tag, int type, ChannelElement *cpe);
+void ff_aac_psy_analyze(AACPsyContext *ctx, int tag, int type, ChannelElement *cpe);
 void ff_aac_psy_end(AACPsyContext *ctx);
 #endif /* FFMPEG_AACPSY_H */