[FFmpeg-soc] [soc]: r3702 - in aacenc: aac_enc.patch aacenc.c aacpsy.c aacpsy.h lowpass.c lowpass.h psymodel.c psymodel.h

Tue Sep 2 08:14:15 CEST 2008

Author: kostya
Date: Tue Sep  2 08:14:14 2008
New Revision: 3702

Log:
Make encoder use generic psychoacoustic model interface and optimal quantizers search

Added:
   aacenc/psymodel.c
   aacenc/psymodel.h
Removed:
   aacenc/aacpsy.h
   aacenc/lowpass.c
   aacenc/lowpass.h
Modified:
   aacenc/aac_enc.patch
   aacenc/aacenc.c
   aacenc/aacpsy.c

Modified: aacenc/aac_enc.patch
==============================================================================

--- aacenc/aac_enc.patch	(original)
+++ aacenc/aac_enc.patch	Tue Sep  2 08:14:14 2008
@@ -6,7 +6,7 @@ index d4f6d1c..0ed9057 100644
  
  OBJS-$(CONFIG_ENCODERS)                += faandct.o jfdctfst.o jfdctint.o
  
-+OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aacpsy.o aactab.o lowpass.o mdct.o fft.o mpeg4audio.o
++OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aacpsy.o aactab.o psymodel.o iirfilter.o mdct.o fft.o mpeg4audio.o
  OBJS-$(CONFIG_AAC_DECODER)             += aac.o aactab.o mdct.o fft.o
  OBJS-$(CONFIG_AASC_DECODER)            += aasc.o
  OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3tab.o ac3dec_data.o ac3.o mdct.o fft.o

Modified: aacenc/aacenc.c
==============================================================================
--- aacenc/aacenc.c	(original)
+++ aacenc/aacenc.c	Tue Sep  2 08:14:14 2008
@@ -26,7 +26,7 @@
 
 /***********************************
  *              TODOs:
- * psy model selection with some option
+ * speedup quantizer selection
  * add sane pulse detection
  * add temporal noise shaping
  ***********************************/
@@ -36,10 +36,11 @@
 #include "dsputil.h"
 #include "mpeg4audio.h"
 
-#include "aacpsy.h"
 #include "aac.h"
 #include "aactab.h"
 
+#include "psymodel.h"
+
 static const uint8_t swb_size_1024_96[] = {
     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
     12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
@@ -192,7 +193,9 @@ typedef struct {
     int samplerate_index;                        ///< MPEG-4 samplerate index
 
     ChannelElement *cpe;                         ///< channel elements
-    AACPsyContext psy;                           ///< psychoacoustic model context
+    FFPsyContext psy;
+    struct FFPsyPreprocessContext* psypp;
+    int cur_channel;
     int last_frame;
 } AACEncContext;
 
@@ -220,6 +223,8 @@ static av_cold int aac_encode_init(AVCod
 {
     AACEncContext *s = avctx->priv_data;
     int i;
+    const uint8_t *sizes[2];
+    int lengths[2];
 
     avctx->frame_size = 1024;
 
@@ -247,15 +252,22 @@ static av_cold int aac_encode_init(AVCod
 
     s->samples = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0]));
     s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]);
-    if(ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP,
-                       aac_chan_configs[avctx->channels-1][0], 0,
-                       swb_size_1024[i], ff_aac_num_swb_1024[i], swb_size_128[i], ff_aac_num_swb_128[i]) < 0){
-        av_log(avctx, AV_LOG_ERROR, "Cannot initialize selected model.\n");
-        return -1;
-    }
     avctx->extradata = av_malloc(2);
     avctx->extradata_size = 2;
     put_audio_specific_config(avctx);
+
+    sizes[0] = swb_size_1024[i];
+    sizes[1] = swb_size_128[i];
+    lengths[0] = ff_aac_num_swb_1024[i];
+    lengths[1] = ff_aac_num_swb_128[i];
+    ff_psy_init(&s->psy, avctx, 2, sizes, lengths);
+    s->psypp = ff_psy_preprocess_init(avctx);
+
+#ifndef CONFIG_HARDCODED_TABLES
+    for (i = 0; i < 316; i++)
+        ff_aac_pow2sf_tab[i] = pow(2, (i - 200)/4.);
+#endif /* CONFIG_HARDCODED_TABLES */
+
     return 0;
 }
 
@@ -351,6 +363,65 @@ static void encode_ms_info(PutBitContext
 }
 
 /**
+ * Quantize one coefficient.
+ * @return absolute value of the quantized coefficient
+ * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
+ */
+static av_always_inline int quant(float coef, const float Q)
+{
+    return av_clip((int)(pow(fabsf(coef) * Q, 0.75) + 0.4054), 0, 8191);
+}
+
+static inline float get_approximate_quant_error(const float *q, const int *c, int size, int scale_idx)
+{
+    int i;
+    float coef, unquant, sum = 0.0f;
+    const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
+    for(i = 0; i < size; i++){
+        coef = fabsf(q[i]);
+        unquant = (c[i] * cbrt(c[i])) * IQ;
+        sum += (coef - unquant) * (coef - unquant);
+    }
+    return sum * 1.0;
+}
+
+/**
+ * Convert coefficients to integers.
+ * @fixme make it RD-optimal
+ * @return sum of coefficient absolute values
+ */
+static inline int quantize_band(const float *in, int *out, int size, int scale_idx)
+{
+    int i, sign, sum = 0;
+    const float Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    for(i = 0; i < size; i++){
+        sign = in[i] > 0.0;
+        out[i] = quant(in[i], Q);
+        sum += out[i];
+        if(sign) out[i] = -out[i];
+    }
+    return sum;
+}
+
+static inline int get_approximate_bits(const int *in, int size)
+{
+    int i, bits = 0;
+    for(i = 0; i < size; i += 2){
+        int j, idx = 0;
+        for(j = 0; j < 2; j++){
+            int t = FFABS(in[i+j]);
+            if(t)
+                bits++;
+            if(t > 16)
+                bits += av_log2(t)*2 + 4 - 1;
+            idx = idx*17 + FFMIN(t, 16);
+        }
+        bits += ff_aac_spectral_bits[ESC_BT-1][idx];
+    }
+    return bits;
+}
+
+/**
  * Calculate the number of bits needed to code all coefficient signs in current band.
  */
 static int calculate_band_sign_bits(AACEncContext *s, SingleChannelElement *sce,
@@ -525,6 +596,178 @@ static void encode_window_bands_info(AAC
 }
 
 /**
+ * Produce integer coefficients from scalefactors provided by the model.
+ */
+static void quantize_coeffs(AACEncContext *apc, ChannelElement *cpe, int chans)
+{
+    int i, w, w2, g, ch;
+    int start, sum, maxsfb, cmaxsfb;
+
+    for(ch = 0; ch < chans; ch++){
+        IndividualChannelStream *ics = &cpe->ch[ch].ics;
+        start = 0;
+        maxsfb = 0;
+        cpe->ch[ch].pulse.num_pulse = 0;
+        for(w = 0; w < ics->num_windows*16; w += 16){
+            for(g = 0; g < ics->num_swb; g++){
+                sum = 0;
+                //apply M/S
+                if(!ch && cpe->ms_mask[w + g]){
+                    for(i = 0; i < ics->swb_sizes[g]; i++){
+                        cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
+                        cpe->ch[1].coeffs[start+i] =  cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
+                    }
+                }
+                if(!cpe->ch[ch].zeroes[w + g])
+                    sum = quantize_band(cpe->ch[ch].coeffs + start,
+                                        cpe->ch[ch].icoefs + start,
+                                        ics->swb_sizes[g],
+                                        cpe->ch[ch].sf_idx[w + g]);
+                else
+                    memset(cpe->ch[ch].icoefs + start, 0, ics->swb_sizes[g] * sizeof(cpe->ch[0].icoefs[0]));
+                cpe->ch[ch].zeroes[w + g] = !sum;
+                start += ics->swb_sizes[g];
+            }
+            for(cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--);
+            maxsfb = FFMAX(maxsfb, cmaxsfb);
+        }
+        ics->max_sfb = maxsfb;
+
+        //adjust zero bands for window groups
+        for(w = 0; w < ics->num_windows; w += ics->group_len[w]){
+            for(g = 0; g < ics->max_sfb; g++){
+                i = 1;
+                for(w2 = w; w2 < w + ics->group_len[w]; w2++){
+                    if(!cpe->ch[ch].zeroes[w2*16 + g]){
+                        i = 0;
+                        break;
+                    }
+                }
+                cpe->ch[ch].zeroes[w*16 + g] = i;
+            }
+        }
+    }
+
+    if(chans > 1 && cpe->common_window){
+        IndividualChannelStream *ics0 = &cpe->ch[0].ics;
+        IndividualChannelStream *ics1 = &cpe->ch[1].ics;
+        int msc = 0;
+        ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
+        ics1->max_sfb = ics0->max_sfb;
+        for(w = 0; w < ics0->num_windows*16; w += 16)
+            for(i = 0; i < ics0->max_sfb; i++)
+                if(cpe->ms_mask[w+i]) msc++;
+        if(msc == 0 || ics0->max_sfb == 0) cpe->ms_mode = 0;
+        else cpe->ms_mode = msc < ics0->max_sfb ? 1 : 2;
+    }
+}
+
+typedef struct TrellisPath {
+    float cost;
+    int prev;
+} TrellisPath;
+
+static void search_for_quantizers(AACEncContext *s, SingleChannelElement *sce)
+{
+    int q, w, g, start = 0;
+    int i;
+    int qcoeffs[128];
+    int idx;
+    TrellisPath paths[256*128];
+    int bandaddr[128];
+    const float lambda = 5e-7f;
+    int minq = 0;
+    float mincost;
+    int stack[128], sptr = 0;
+
+    for(i = 0; i < 256; i++){
+        paths[i].cost = 0.0f;
+        paths[i].prev = -1;
+    }
+    for(i = 256; i < 256*128; i++){
+        paths[i].cost = INFINITY;
+        paths[i].prev = -2;
+    }
+    idx = 256;
+    for(w = 0; w < sce->ics.num_windows*16; w += 16){
+        for(g = 0; g < sce->ics.num_swb; g++){
+            const float *coefs = sce->coeffs + start;
+            float qmin, qmax, invthr;
+            int minscale, maxscale;
+            FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+w+g];
+
+            bandaddr[idx >> 8] = w+g;
+            if(band->energy <= band->threshold){
+                sce->zeroes[w+g] = 1;
+                for(q = 0; q < 256; q++){
+                    for(i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, 256); i++){
+                        float cost;
+                        if(isinf(paths[idx - 256 + i].cost))
+                            continue;
+                        cost = paths[idx - 256 + i].cost + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
+                        if(cost < paths[idx + q].cost){
+                            paths[idx + q].cost = cost;
+                            paths[idx + q].prev = idx - 256 + i;
+                        }
+                    }
+                }
+                start += sce->ics.swb_sizes[g];
+                idx += 256;
+                continue;
+            }
+            sce->zeroes[w+g] = 0;
+            qmin = qmax = fabsf(coefs[0]);
+            if(qmin == 0.0f) qmin = INT_MAX;
+            for(i = 1; i < sce->ics.swb_sizes[g]; i++){
+                float t = fabsf(coefs[i]);
+                if(t > 0.0f) qmin = fminf(qmin, t);
+                qmax = fmaxf(qmax, t);
+            }
+            //minimum scalefactor index is when mininum nonzero coefficient after quantizing is not clipped
+            minscale = av_clip_uint8(log2(qmin)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
+            //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
+            maxscale = av_clip_uint8(log2(qmax)*4 +  6 + SCALE_ONE_POS - SCALE_DIV_512);
+            invthr = (band->threshold == 0.0f) ? INFINITY : 1.0 / band->threshold;
+            for(q = minscale; q < maxscale; q++){
+                float dist;
+                int bits, sum;
+                sum = quantize_band(coefs, qcoeffs, sce->ics.swb_sizes[g], q);
+                dist = get_approximate_quant_error(coefs, qcoeffs, sce->ics.swb_sizes[g], q);
+                bits = get_approximate_bits(qcoeffs, sce->ics.swb_sizes[g]);
+                for(i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, 256); i++){
+                    float cost;
+                    if(isinf(paths[idx - 256 + i].cost))
+                        continue;
+                    cost = paths[idx - 256 + i].cost + dist * invthr * lambda + bits
+                           + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
+                    if(cost < paths[idx + q].cost){
+                        paths[idx + q].cost = cost;
+                        paths[idx + q].prev = idx - 256 + i;
+                    }
+                }
+            }
+            start += sce->ics.swb_sizes[g];
+            idx += 256;
+        }
+    }
+    idx -= 256;
+    mincost = paths[idx].cost;
+    for(i = 1; i < 256; i++){
+        if(paths[idx + i].cost < mincost){
+            mincost = paths[idx + i].cost;
+            minq = idx + i;
+        }
+    }
+    while(minq >= 0){
+        stack[sptr++] = minq;
+        minq = paths[minq].prev;
+    }
+    for(i = sptr - 2; i >= 0; i--){
+        sce->sf_idx[bandaddr[stack[i]>>8]] = stack[i]&0xFF;
+    }
+}
+
+/**
  * Encode the coefficients of one scalefactor band with selected codebook.
  */
 static void encode_band_coeffs(AACEncContext *s, SingleChannelElement *sce,
@@ -600,9 +843,9 @@ static void encode_band_info(AACEncConte
 /**
  * Encode scalefactors.
  */
-static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, int global_gain)
+static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce)
 {
-    int off = global_gain, diff;
+    int off = sce->sf_idx[0], diff;
     int i, w;
 
     for(w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]){
@@ -664,36 +907,10 @@ static void encode_spectral_coeffs(AACEn
  */
 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, int common_window)
 {
-    int g, w;
-    int global_gain, last = 256;
-
-    //determine global gain as standard recommends - the first scalefactor value
-    //and assign an appropriate scalefactor index to empty bands
-    for(w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]){
-        for(g = sce->ics.max_sfb - 1; g >= 0; g--){
-            if(sce->sf_idx[w*16 + g] == 256)
-                sce->sf_idx[w*16 + g] = last;
-            else
-                last = sce->sf_idx[w*16 + g];
-        }
-    }
-    //make sure global gain won't be 256
-    last &= 0xFF;
-    global_gain = last;
-    //assign scalefactor index to tail bands in case encoder decides to code them
-    for(w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]){
-        for(g = 0; g < sce->ics.max_sfb; g++){
-            if(sce->sf_idx[w*16 + g] == 256)
-                sce->sf_idx[w*16 + g] = last;
-            else
-                last = sce->sf_idx[w*16 + g];
-        }
-    }
-
-    put_bits(&s->pb, 8, global_gain);
+    put_bits(&s->pb, 8, sce->sf_idx[0]);
     if(!common_window) put_ics_info(s, &sce->ics);
     encode_band_info(s, sce);
-    encode_scale_factors(avctx, s, sce, global_gain);
+    encode_scale_factors(avctx, s, sce);
     encode_pulses(s, &sce->pulse);
     put_bits(&s->pb, 1, 0); //tns
     put_bits(&s->pb, 1, 0); //ssr
@@ -734,7 +951,7 @@ static int aac_encode_frame(AVCodecConte
     if(s->last_frame)
         return 0;
     if(data){
-        if((s->psy.flags & PSY_MODEL_NO_PREPROC) == PSY_MODEL_NO_PREPROC){
+        if(!s->psypp){
             memcpy(s->samples + 1024 * avctx->channels, data, 1024 * avctx->channels * sizeof(s->samples[0]));
         }else{
             start_ch = 0;
@@ -742,7 +959,7 @@ static int aac_encode_frame(AVCodecConte
             for(i = 0; i < chan_map[0]; i++){
                 tag = chan_map[i+1];
                 chans = tag == TYPE_CPE ? 2 : 1;
-                ff_aac_psy_preprocess(&s->psy, (uint16_t*)data + start_ch, samples2 + start_ch, i, tag);
+                ff_psy_preprocess(s->psypp, (uint16_t*)data + start_ch, samples2 + start_ch, start_ch + i, chans);
                 start_ch += chans;
             }
         }
@@ -759,17 +976,44 @@ static int aac_encode_frame(AVCodecConte
     start_ch = 0;
     memset(chan_el_counter, 0, sizeof(chan_el_counter));
     for(i = 0; i < chan_map[0]; i++){
+        FFPsyWindowInfo wi[2];
         tag = chan_map[i+1];
         chans = tag == TYPE_CPE ? 2 : 1;
         cpe = &s->cpe[i];
         samples2 = samples + start_ch;
         la = samples2 + 1024 * avctx->channels + start_ch;
         if(!data) la = NULL;
-        ff_aac_psy_suggest_window(&s->psy, samples2, la, i, tag, cpe);
         for(j = 0; j < chans; j++){
+            IndividualChannelStream *ics = &cpe->ch[j].ics;
+            int k;
+            wi[j] = ff_psy_suggest_window(&s->psy, samples2, la, start_ch + j, ics->window_sequence[0]);
+            ics->window_sequence[1] = ics->window_sequence[0];
+            ics->window_sequence[0] = wi[j].window_type[0];
+            ics->use_kb_window[1]   = ics->use_kb_window[0];
+            ics->use_kb_window[0]   = wi[j].window_shape;
+            ics->num_windows        = wi[j].num_windows;
+            ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
+            ics->num_swb            = s->psy.num_bands[ics->num_windows == 8];
+            for(k = 0; k < ics->num_windows; k++)
+                ics->group_len[k] = wi[j].grouping[k];
+
             apply_window_and_mdct(avctx, s, &cpe->ch[j], samples2, j);
+            search_for_quantizers(s, &cpe->ch[j]);
         }
-        ff_aac_psy_analyze(&s->psy, i, tag, cpe);
+        cpe->common_window = 0;
+        if(chans > 1
+            && wi[0].window_type[0] == wi[1].window_type[0]
+            && wi[0].window_shape   == wi[1].window_shape){
+
+            cpe->common_window = 1;
+            for(j = 0; j < wi[0].num_windows; j++){
+                if(wi[0].grouping[j] != wi[1].grouping[j]){
+                    cpe->common_window = 0;
+                    break;
+                }
+            }
+        }
+        quantize_coeffs(s, cpe, chans);
         put_bits(&s->pb, 3, tag);
         put_bits(&s->pb, 4, chan_el_counter[tag]++);
         if(chans == 2){
@@ -780,6 +1024,8 @@ static int aac_encode_frame(AVCodecConte
             }
         }
         for(j = 0; j < chans; j++){
+            s->cur_channel = start_ch + j;
+            ff_psy_set_band_info(&s->psy, s->cur_channel, cpe->ch[j].coeffs, &wi[j]);
             encode_individual_channel(avctx, s, &cpe->ch[j], cpe->common_window);
         }
         start_ch += chans;
@@ -801,7 +1047,8 @@ static av_cold int aac_encode_end(AVCode
 
     ff_mdct_end(&s->mdct1024);
     ff_mdct_end(&s->mdct128);
-    ff_aac_psy_end(&s->psy);
+    ff_psy_end(&s->psy);
+    ff_psy_preprocess_end(s->psypp);
     av_freep(&s->samples);
     av_freep(&s->cpe);
     return 0;

Modified: aacenc/aacpsy.c
==============================================================================
--- aacenc/aacpsy.c	(original)
+++ aacenc/aacpsy.c	Tue Sep  2 08:14:14 2008
@@ -25,140 +25,20 @@
  */
 
 #include "avcodec.h"
-#include "aacpsy.h"
 #include "aactab.h"
+#include "psymodel.h"
 
 /***********************************
  *              TODOs:
- * General:
- * better audio preprocessing (add DC highpass filter?)
- * more psy models
- * maybe improve coefficient quantization function in some way
- *
- * 3GPP-based psy model:
  * thresholds linearization after their modifications for attaining given bitrate
  * try other bitrate controlling mechanism (maybe use ratecontrol.c?)
  * control quality for quality-based output
  **********************************/
 
 /**
- * Quantize one coefficient.
- * @return absolute value of the quantized coefficient
- * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
- */
-static av_always_inline int quant(float coef, const float Q)
-{
-    return av_clip((int)(pow(fabsf(coef) * Q, 0.75) + 0.4054), 0, 8191);
-}
-
-/**
- * Convert coefficients to integers.
- * @return sum of coefficient absolute values
- */
-static inline int quantize_coeffs(float *in, int *out, int size, int scale_idx)
-{
-    int i, sign, sum = 0;
-    const float Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
-    for(i = 0; i < size; i++){
-        sign = in[i] > 0.0;
-        out[i] = quant(in[i], Q);
-        sum += out[i];
-        if(sign) out[i] = -out[i];
-    }
-    return sum;
-}
-
-static inline float get_approximate_quant_error(float *c, int size, int scale_idx)
-{
-    int i;
-    int q;
-    float coef, unquant, sum = 0.0f;
-    const float Q  = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
-    const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
-    for(i = 0; i < size; i++){
-        coef = fabs(c[i]);
-        q = quant(c[i], Q);
-        unquant = (q * cbrt(q)) * IQ;
-        sum += (coef - unquant) * (coef - unquant);
-    }
-    return sum;
-}
-
-/**
- * Produce integer coefficients from scalefactors provided by the model.
- */
-static void psy_create_output(AACPsyContext *apc, ChannelElement *cpe, int chans)
-{
-    int i, w, w2, g, ch;
-    int start, sum, maxsfb, cmaxsfb;
-
-    for(ch = 0; ch < chans; ch++){
-        IndividualChannelStream *ics = &cpe->ch[ch].ics;
-        start = 0;
-        maxsfb = 0;
-        cpe->ch[ch].pulse.num_pulse = 0;
-        for(w = 0; w < ics->num_windows*16; w += 16){
-            for(g = 0; g < ics->num_swb; g++){
-                sum = 0;
-                //apply M/S
-                if(!ch && cpe->ms_mask[w + g]){
-                    for(i = 0; i < ics->swb_sizes[g]; i++){
-                        cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
-                        cpe->ch[1].coeffs[start+i] =  cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
-                    }
-                }
-                if(!cpe->ch[ch].zeroes[w + g])
-                    sum = quantize_coeffs(cpe->ch[ch].coeffs + start,
-                                          cpe->ch[ch].icoefs + start,
-                                          ics->swb_sizes[g],
-                                          cpe->ch[ch].sf_idx[w + g]);
-                else
-                    memset(cpe->ch[ch].icoefs + start, 0, ics->swb_sizes[g] * sizeof(cpe->ch[0].icoefs[0]));
-                cpe->ch[ch].zeroes[w + g] = !sum;
-                start += ics->swb_sizes[g];
-            }
-            for(cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--);
-            maxsfb = FFMAX(maxsfb, cmaxsfb);
-        }
-        ics->max_sfb = maxsfb;
-
-        //adjust zero bands for window groups
-        for(w = 0; w < ics->num_windows; w += ics->group_len[w]){
-            for(g = 0; g < ics->max_sfb; g++){
-                i = 1;
-                for(w2 = w; w2 < w + ics->group_len[w]; w2++){
-                    if(!cpe->ch[ch].zeroes[w2*16 + g]){
-                        i = 0;
-                        break;
-                    }
-                }
-                cpe->ch[ch].zeroes[w*16 + g] = i;
-            }
-        }
-    }
-
-    if(chans > 1 && cpe->common_window){
-        IndividualChannelStream *ics0 = &cpe->ch[0].ics;
-        IndividualChannelStream *ics1 = &cpe->ch[1].ics;
-        int msc = 0;
-        ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
-        ics1->max_sfb = ics0->max_sfb;
-        for(w = 0; w < ics0->num_windows*16; w += 16)
-            for(i = 0; i < ics0->max_sfb; i++)
-                if(cpe->ms_mask[w+i]) msc++;
-        if(msc == 0 || ics0->max_sfb == 0) cpe->ms_mode = 0;
-        else cpe->ms_mode = msc < ics0->max_sfb ? 1 : 2;
-    }
-}
-
-/**
  * constants for 3GPP AAC psychoacoustic model
  * @{
  */
-#define PSY_3GPP_C1 3.0f                    // log2(8.0)
-#define PSY_3GPP_C2 1.32192809488736234787f // log2(2.5)
-#define PSY_3GPP_C3 0.55935730170421255071f // 1 - C2/C1
-
 #define PSY_3GPP_SPREAD_LOW  1.5f // spreading factor for ascending threshold spreading  (15 dB/Bark)
 #define PSY_3GPP_SPREAD_HI   3.0f // spreading factor for descending threshold spreading (30 dB/Bark)
 
@@ -175,10 +55,6 @@ typedef struct Psy3gppBand{
     float energy;    ///< band energy
     float ffac;      ///< form factor
     float thr;       ///< energy threshold
-    float pe;        ///< perceptual entropy
-    float a;         ///< constant part in perceptual entropy
-    float b;         ///< variable part in perceptual entropy
-    float nl;        ///< predicted number of lines left after quantization
     float min_snr;   ///< minimal SNR
     float thr_quiet; ///< threshold in quiet
 }Psy3gppBand;
@@ -187,17 +63,13 @@ typedef struct Psy3gppBand{
  * single/pair channel context for psychoacoustic model
  */
 typedef struct Psy3gppChannel{
-    float       a[2];                       ///< parameter used for perceptual entropy - constant part
-    float       b[2];                       ///< parameter used for perceptual entropy - variable part
-    float       pe[2];                      ///< channel perceptual entropy
-    float       thr[2];                     ///< channel thresholds sum
-    Psy3gppBand band[2][128];               ///< bands information
-    Psy3gppBand prev_band[2][128];          ///< bands information from the previous frame
+    Psy3gppBand band[128];               ///< bands information
+    Psy3gppBand prev_band[128];          ///< bands information from the previous frame
 
-    float       win_energy[2];              ///< sliding average of channel energy
-    float       iir_state[2][2];            ///< hi-pass IIR filter state
-    uint8_t     next_grouping[2];           ///< stored grouping scheme for the next frame (in case of 8 short window sequence)
-    enum WindowSequence next_window_seq[2]; ///< window sequence to be used in the next frame
+    float       win_energy;              ///< sliding average of channel energy
+    float       iir_state[2];            ///< hi-pass IIR filter state
+    uint8_t     next_grouping;           ///< stored grouping scheme for the next frame (in case of 8 short window sequence)
+    enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame
 }Psy3gppChannel;
 
 /**
@@ -215,15 +87,13 @@ typedef struct Psy3gppCoeffs{
  */
 typedef struct Psy3gppContext{
     Psy3gppCoeffs psy_coef[2];
-    int         reservoir;    ///< bit reservoir fullness
-    int         avg_bits;     ///< average frame size of bits for CBR
     Psy3gppChannel *ch;
 }Psy3gppContext;
 
 /**
  * Calculate Bark value for given line.
  */
-static inline float calc_bark(float f)
+static av_cold float calc_bark(float f)
 {
     return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.0f));
 }
@@ -233,7 +103,7 @@ static inline float calc_bark(float f)
  * Calculate ATH value for given frequency.
  * Borrowed from Lame.
  */
-static inline float ath(float f, float add)
+static av_cold float ath(float f, float add)
 {
     f /= 1000.0f;
     return   3.64 * pow(f, -0.8)
@@ -242,46 +112,43 @@ static inline float ath(float f, float a
             + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
 }
 
-static av_cold int psy_3gpp_init(AACPsyContext *apc, int elements)
-{
+static av_cold int psy_3gpp_init(FFPsyContext *ctx){
     Psy3gppContext *pctx;
     float barks[1024];
     int i, j, g, start;
     float prev, minscale, minath;
-    apc->model_priv_data = av_mallocz(sizeof(Psy3gppContext));
-    pctx = (Psy3gppContext*) apc->model_priv_data;
+
+    ctx->model_priv_data = av_mallocz(sizeof(Psy3gppContext));
+    pctx = (Psy3gppContext*) ctx->model_priv_data;
 
     for(i = 0; i < 1024; i++)
-        barks[i] = calc_bark(i * apc->avctx->sample_rate / 2048.0);
+        barks[i] = calc_bark(i * ctx->avctx->sample_rate / 2048.0);
     minath = ath(3410, ATH_ADD);
     for(j = 0; j < 2; j++){
         Psy3gppCoeffs *coeffs = &pctx->psy_coef[j];
-        int bands = j ? apc->num_bands128 : apc->num_bands1024;
         i = 0;
         prev = 0.0;
-        for(g = 0; g < bands; g++){
-            i += j ? apc->bands128[g] : apc->bands1024[g];
+        for(g = 0; g < ctx->num_bands[j]; g++){
+            i += ctx->bands[j][g];
             coeffs->barks[g] = (barks[i - 1] + prev) / 2.0;
             prev = barks[i - 1];
         }
-        for(g = 0; g < bands - 1; g++){
+        for(g = 0; g < ctx->num_bands[j] - 1; g++){
             coeffs->spread_low[g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->barks[g]) * PSY_3GPP_SPREAD_LOW);
             coeffs->spread_hi [g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->barks[g]) * PSY_3GPP_SPREAD_HI);
         }
         start = 0;
-        for(g = 0; g < bands; g++){
-            int size = j ? apc->bands128[g] : apc->bands1024[g];
-            minscale = ath(apc->avctx->sample_rate * start / 1024.0, ATH_ADD);
-            for(i = 1; i < size; i++){
-                minscale = fminf(minscale, ath(apc->avctx->sample_rate * (start + i) / 1024.0 / 2.0, ATH_ADD));
+        for(g = 0; g < ctx->num_bands[j]; g++){
+            minscale = ath(ctx->avctx->sample_rate * start / 1024.0, ATH_ADD);
+            for(i = 1; i < ctx->bands[j][g]; i++){
+                minscale = fminf(minscale, ath(ctx->avctx->sample_rate * (start + i) / 1024.0 / 2.0, ATH_ADD));
             }
             coeffs->ath[g] = minscale - minath;
-            start += size;
+            start += ctx->bands[j][g];
         }
     }
 
-    pctx->avg_bits = apc->avctx->bit_rate * 1024 / apc->avctx->sample_rate;
-    pctx->ch = av_mallocz(sizeof(Psy3gppChannel) * elements);
+    pctx->ch = av_mallocz(sizeof(Psy3gppChannel) * ctx->avctx->channels);
     return 0;
 }
 
@@ -309,519 +176,146 @@ static const uint8_t window_grouping[9] 
  * Tell encoder which window types to use.
  * @see 3GPP TS26.403 5.4.1 "Blockswitching"
  */
-static void psy_3gpp_window(AACPsyContext *apc, int16_t *audio, int16_t *la,
-                            int tag, int type, ChannelElement *cpe)
+static FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx,
+                                       const int16_t *audio, const int16_t *la,
+                                       int channel, int prev_type)
 {
-    int ch;
-    int chans = type == TYPE_CPE ? 2 : 1;
     int i, j;
-    int br = apc->avctx->bit_rate / apc->avctx->channels;
-    int attack_ratio = (br <= 16000 + 8000*chans) ? 18 : 10;
-    Psy3gppContext *pctx = (Psy3gppContext*) apc->model_priv_data;
-    Psy3gppChannel *pch = &pctx->ch[tag];
-    uint8_t grouping[2];
-    enum WindowSequence win[2];
-    IndividualChannelStream *ics0 = &cpe->ch[0].ics, *ics1 = &cpe->ch[1].ics;
+    int br = ctx->avctx->bit_rate / ctx->avctx->channels;
+    int attack_ratio = br <= 16000 ? 18 : 10;
+    Psy3gppContext *pctx = (Psy3gppContext*) ctx->model_priv_data;
+    Psy3gppChannel *pch = &pctx->ch[channel];
+    uint8_t grouping = 0;
+    FFPsyWindowInfo wi;
 
-    if(la && !(apc->flags & PSY_MODEL_NO_SWITCH)){
+    memset(&wi, 0, sizeof(wi));
+    if(la){
         float s[8], v;
-        for(ch = 0; ch < chans; ch++){
-            enum WindowSequence last_window_sequence = cpe->ch[ch].ics.window_sequence[0];
-            int switch_to_eight = 0;
-            float sum = 0.0, sum2 = 0.0;
-            int attack_n = 0;
-            for(i = 0; i < 8; i++){
-                for(j = 0; j < 128; j++){
-                    v = iir_filter(audio[(i*128+j)*apc->avctx->channels+ch], pch->iir_state[ch]);
-                    sum += v*v;
-                }
-                s[i] = sum;
-                sum2 += sum;
-            }
-            for(i = 0; i < 8; i++){
-                if(s[i] > pch->win_energy[ch] * attack_ratio){
-                    attack_n = i + 1;
-                    switch_to_eight = 1;
-                    break;
-                }
+        int switch_to_eight = 0;
+        float sum = 0.0, sum2 = 0.0;
+        int attack_n = 0;
+        for(i = 0; i < 8; i++){
+            for(j = 0; j < 128; j++){
+                v = iir_filter(audio[(i*128+j)*ctx->avctx->channels], pch->iir_state);
+                sum += v*v;
             }
-            pch->win_energy[ch] = pch->win_energy[ch]*7/8 + sum2/64;
-
-            switch(last_window_sequence){
-            case ONLY_LONG_SEQUENCE:
-                win[ch] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
-                grouping[ch] = 0;
-                break;
-            case LONG_START_SEQUENCE:
-                win[ch] = EIGHT_SHORT_SEQUENCE;
-                grouping[ch] = pch->next_grouping[ch];
-                break;
-            case LONG_STOP_SEQUENCE:
-                win[ch] = ONLY_LONG_SEQUENCE;
-                grouping[ch] = 0;
-                break;
-            case EIGHT_SHORT_SEQUENCE:
-                win[ch] = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
-                grouping[ch] = switch_to_eight ? pch->next_grouping[ch] : 0;
+            s[i] = sum;
+            sum2 += sum;
+        }
+        for(i = 0; i < 8; i++){
+            if(s[i] > pch->win_energy * attack_ratio){
+                attack_n = i + 1;
+                switch_to_eight = 1;
                 break;
             }
-            pch->next_grouping[ch] = window_grouping[attack_n];
         }
-    }else{
-        for(ch = 0; ch < chans; ch++){
-            IndividualChannelStream *ics = &cpe->ch[ch].ics;
-            win[ch] = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE)
-                      ? EIGHT_SHORT_SEQUENCE
-                      : ONLY_LONG_SEQUENCE;
-            grouping[ch] = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
-        }
-    }
+        pch->win_energy = pch->win_energy*7/8 + sum2/64;
 
-    for(ch = 0; ch < chans; ch++){
-        IndividualChannelStream *ics = &cpe->ch[ch].ics;
-        ics->window_sequence[0] = win[ch];
-        ics->use_kb_window[0] = 1;
-        if(win[ch] != EIGHT_SHORT_SEQUENCE){
-            ics->num_windows = 1;
-            ics->swb_sizes = apc->bands1024;
-            ics->num_swb = apc->num_bands1024;
-            ics->num_window_groups = 1;
-            ics->group_len[0] = 1;
-        }else{
-            int lastgrp = 0;
-            ics->num_windows = 8;
-            ics->swb_sizes = apc->bands128;
-            ics->num_swb = apc->num_bands128;
-            ics->num_window_groups = 0;
-            memset(ics->group_len, 0, sizeof(ics->group_len));
-            for(i = 0; i < 8; i++){
-                if(!((grouping[ch] >> i) & 1))
-                    lastgrp = i;
-                ics->group_len[lastgrp]++;
-            }
+        wi.window_type[1] = prev_type;
+        switch(prev_type){
+        case ONLY_LONG_SEQUENCE:
+            wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
+            break;
+        case LONG_START_SEQUENCE:
+            wi.window_type[0] = EIGHT_SHORT_SEQUENCE;
+            grouping = pch->next_grouping;
+            break;
+        case LONG_STOP_SEQUENCE:
+            wi.window_type[0] = ONLY_LONG_SEQUENCE;
+            break;
+        case EIGHT_SHORT_SEQUENCE:
+            wi.window_type[0] = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
+            grouping = switch_to_eight ? pch->next_grouping : 0;
+            break;
         }
+        pch->next_grouping = window_grouping[attack_n];
+    }else{
+        for(i = 0; i < 3; i++)
+            wi.window_type[i] = prev_type;
+        grouping = (prev_type == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
     }
-    cpe->common_window = 0;
-    if(chans > 1
-            && ics0->window_sequence[0] == ics1->window_sequence[0]
-            && ics0->use_kb_window[0]   == ics1->use_kb_window[0]
-            && !(ics0->window_sequence[0] == EIGHT_SHORT_SEQUENCE && grouping[0] != grouping[1]))
-        cpe->common_window = 1;
-    if(PSY_MODEL_MODE(apc->flags) > PSY_MODE_QUALITY){
-        av_log(apc->avctx, AV_LOG_ERROR, "Unknown mode %d, defaulting to CBR\n", PSY_MODEL_MODE(apc->flags));
-    }
-}
 
-/**
- * Modify threshold by adding some value in loudness domain.
- * @see 3GPP TS26.403 5.6.1.1.1 "Addition of noise with equal loudness"
- */
-static inline float modify_thr(float thr, float r){
-    float t;
-    t = pow(thr, 0.25) + r;
-    return (t*t)*(t*t);
-}
-
-/**
- * Calculate perceptual entropy and its corresponding values for one band.
- * @see 3GPP TS26.403 5.6.1.3 "Calculation of the reduction value"
- */
-static void calc_pe(Psy3gppBand *band, int band_width)
-{
-    if(band->energy <= band->thr){
-        band->a  = 0.0f;
-        band->b  = 0.0f;
-        band->nl = 0.0f;
-        return;
-    }
-    band->nl = band->ffac / pow(band->energy/band_width, 0.25);
-    if(band->energy >= band->thr * 8.0){
-        band->a = band->nl * log2(band->energy);
-        band->b = band->nl;
+    wi.window_shape   = 1;
+    if(wi.window_type[0] != EIGHT_SHORT_SEQUENCE){
+        wi.num_windows = 1;
+        wi.grouping[0] = 1;
     }else{
-        band->a = band->nl * (PSY_3GPP_C2 + PSY_3GPP_C3 * log2(band->energy));
-        band->b = band->nl * PSY_3GPP_C3;
+        int lastgrp = 0;
+        wi.num_windows = 8;
+        for(i = 0; i < 8; i++){
+            if(!((grouping >> i) & 1))
+                lastgrp = i;
+            wi.grouping[lastgrp]++;
+        }
     }
-    band->pe = band->a - band->b * log2(band->thr);
-    band->min_snr = 1.0 / (pow(2.0, band->pe / band_width) - 1.5);
-    band->min_snr = av_clipf(band->min_snr, 1.26f, 316.2277f);
-}
 
-/**
- * Determine scalefactor from band threshold and form factor.
- * @see 3GPP TS26.403 5.4 5.6.2 "Scalefactor determination"
- */
-static inline int determine_scalefactor(Psy3gppBand *band)
-{
-    //spec gives constant for lg() but we scaled it for log2()
-    return (int)(2.66667 * log2(6.75*band->thr/band->ffac));
+    return wi;
 }
 
 /**
- * Determine scalefactors and prepare coefficients for encoding.
- * @see 3GPP TS26.403 5.4 "Psychoacoustic model"
+ * Calculate band thresholds as suggested in 3GPP TS26.403
  */
-static void psy_3gpp_process(AACPsyContext *apc, int tag, int type, ChannelElement *cpe)
+static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, const float *coefs,
+                             FFPsyWindowInfo *wi)
 {
-    int start;
-    int ch, w, g, i;
-    Psy3gppContext *pctx = (Psy3gppContext*) apc->model_priv_data;
-    float pe_target;
-    int bits_avail;
-    int chans = type == TYPE_CPE ? 2 : 1;
-    Psy3gppChannel *pch = &pctx->ch[tag];
+    Psy3gppContext *pctx = (Psy3gppContext*) ctx->model_priv_data;
+    Psy3gppChannel *pch = &pctx->ch[channel];
+    int start = 0;
+    int i, w, g;
+    const int num_bands = ctx->num_bands[wi->num_windows == 8];
+    const uint8_t* band_sizes = ctx->bands[wi->num_windows == 8];
+    Psy3gppCoeffs *coeffs = &pctx->psy_coef[wi->num_windows == 8];
 
     //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation"
-    memset(pch->band, 0, sizeof(pch->band));
-    for(ch = 0; ch < chans; ch++){
-        IndividualChannelStream *ics = &cpe->ch[ch].ics;
-        start = 0;
-        for(w = 0; w < ics->num_windows*16; w += 16){
-            for(g = 0; g < ics->num_swb; g++){
-                Psy3gppBand *band = &pch->band[ch][w+g];
-                for(i = 0; i < ics->swb_sizes[g]; i++)
-                    band->energy +=  cpe->ch[ch].coeffs[start+i] * cpe->ch[ch].coeffs[start+i];
-                band->energy *= 1.0f / (512*512);
-                band->thr = band->energy * 0.001258925f;
-                start += ics->swb_sizes[g];
-                if(band->energy != 0.0){
-                    float ffac = 0.0;
+    for(w = 0; w < wi->num_windows*16; w += 16){
+        for(g = 0; g < num_bands; g++){
+            Psy3gppBand *band = &pch->band[w+g];
+            for(i = 0; i < band_sizes[g]; i++)
+                band->energy += coefs[start+i] * coefs[start+i];
+            band->energy *= 1.0f / (512*512);
+            band->thr = band->energy * 0.001258925f;
+            start += band_sizes[g];
 
-                    for(i = 0; i < ics->swb_sizes[g]; i++)
-                        ffac += sqrt(FFABS(cpe->ch[ch].coeffs[start+i]));
-                    band->ffac = ffac / sqrt(512.0);
-                }
-            }
+            ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].energy = band->energy;
         }
     }
-
     //modify thresholds - spread, threshold in quiet - 5.4.3 "Spreaded Energy Calculation"
-    for(ch = 0; ch < chans; ch++){
-        IndividualChannelStream *ics = &cpe->ch[ch].ics;
-        Psy3gppCoeffs *coeffs = &pctx->psy_coef[ics->num_windows == 8];
-        for(w = 0; w < ics->num_windows*16; w += 16){
-            Psy3gppBand *band = &pch->band[ch][w];
-            for(g = 1; g < ics->num_swb; g++){
-                band[g].thr = FFMAX(band[g].thr, band[g-1].thr * coeffs->spread_low[g-1]);
-            }
-            for(g = ics->num_swb - 2; g >= 0; g--){
-                band[g].thr = FFMAX(band[g].thr, band[g+1].thr * coeffs->spread_hi [g+1]);
-            }
-            for(g = 0; g < ics->num_swb; g++){
-                band[g].thr_quiet = FFMAX(band[g].thr, coeffs->ath[g]);
-                band[g].thr_quiet = fmaxf(PSY_3GPP_RPEMIN*band[g].thr_quiet,
-                                          fminf(band[g].thr_quiet,
-                                                PSY_3GPP_RPELEV*pch->prev_band[ch][w+g].thr_quiet));
-                band[g].thr = FFMAX(band[g].thr, band[g].thr_quiet * 0.25);
-            }
-        }
-    }
-
-    // M/S detection - 5.5.2 "Mid/Side Stereo"
-    if(chans > 1 && cpe->common_window){
-        start = 0;
-        for(w = 0; w < cpe->ch[0].ics.num_windows*16; w += 16){
-            for(g = 0; g < cpe->ch[0].ics.num_swb; g++){
-                Psy3gppBand *band0 = &pch->band[0][w+g];
-                Psy3gppBand *band1 = &pch->band[1][w+g];
-                double en_m = 0.0, en_s = 0.0, ff_m = 0.0, ff_s = 0.0, minthr;
-                float m, s;
-
-                cpe->ms_mask[w+g] = 0;
-                if(band0->energy == 0.0 || band1->energy == 0.0)
-                    continue;
-                for(i = 0; i < cpe->ch[0].ics.swb_sizes[g]; i++){
-                    m = cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i];
-                    s = cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
-                    en_m += m*m;
-                    en_s += s*s;
-                }
-                en_m *= 1.0f / (512*512*4);
-                en_s *= 1.0f / (512*512*4);
-                minthr = FFMIN(band0->thr, band1->thr);
-                if(minthr * minthr * band0->energy * band1->energy >= band0->thr * band1->thr * en_m * en_s){
-                    cpe->ms_mask[w+g] = 1;
-                    band0->energy = en_m;
-                    band1->energy = en_s;
-                    band0->thr = en_m * 0.001258925f;
-                    band1->thr = en_s * 0.001258925f;
-                    for(i = 0; i < cpe->ch[0].ics.swb_sizes[g]; i++){
-                        m = cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i];
-                        s = cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
-                        ff_m += sqrt(fabs(m));
-                        ff_s += sqrt(fabs(s));
-                    }
-                    band0->ffac = ff_m * (1.0f / 32.0f); // sqrt(512)*sqrt(2)
-                    band1->ffac = ff_s * (1.0f / 32.0f);
-                }
-            }
-        }
-    }
-
-    for(ch = 0; ch < chans; ch++){
-        IndividualChannelStream *ics = &cpe->ch[ch].ics;
-        pch->a[ch] = pch->b[ch] = pch->pe[ch] = pch->thr[ch] = 0.0f;
-        for(w = 0; w < ics->num_windows*16; w += 16){
-            for(g = 0; g < ics->num_swb; g++){
-                Psy3gppBand *band = &pch->band[ch][w+g];
-                if(band->energy != 0.0)
-                    calc_pe(band, ics->swb_sizes[g]);
-                if(band->thr < band->energy){
-                    pch->a[ch]   += band->a;
-                    pch->b[ch]   += band->b;
-                    pch->pe[ch]  += band->pe;
-                    pch->thr[ch] += band->thr;
-                }
-            }
-        }
-    }
-
-    switch(PSY_MODEL_MODE(apc->flags)){
-    case PSY_MODE_CBR:
-    case PSY_MODE_ABR:
-        //bitrate reduction - 5.6.1 "Reduction of psychoacoustic requirements"
-        if(PSY_MODEL_MODE(apc->flags) != PSY_MODE_ABR){
-            pctx->reservoir += pctx->avg_bits - apc->avctx->frame_bits;
-            bits_avail = pctx->avg_bits + pctx->reservoir;
-            bits_avail = FFMIN(bits_avail, pctx->avg_bits * 1.5);
-            pe_target = 1.18f * bits_avail / apc->avctx->channels * chans;
-        }else{
-            pe_target = pctx->avg_bits / apc->avctx->channels * chans;
-        }
-        for(i = 0; i < 2; i++){
-            float t0, pe, r, a0 = 0.0f, pe0 = 0.0f, b0 = 0.0f;
-            for(ch = 0; ch < chans; ch++){
-                a0  += pch->a[ch];
-                b0  += pch->b[ch];
-                pe0 += pch->pe[ch];
-            }
-            if(pe0 == 0.0f) break;
-            t0 = pow(2.0, (a0 - pe0)       / (4.0 * b0));
-            r  = pow(2.0, (a0 - pe_target) / (4.0 * b0)) - t0;
-
-            //add correction factor to thresholds and recalculate perceptual entropy
-            for(ch = 0; ch < chans; ch++){
-                IndividualChannelStream *ics = &cpe->ch[ch].ics;
-                pch->a[ch] = pch->b[ch] = pch->pe[ch] = pch->thr[ch] = 0.0;
-                pe = 0.0f;
-                for(w = 0; w < ics->num_windows*16; w += 16){
-                    for(g = 0; g < ics->num_swb; g++){
-                        Psy3gppBand *band = &pch->band[ch][w+g];
-                        band->thr = modify_thr(band->thr, r);
-                        calc_pe(band, ics->swb_sizes[g]);
-                        if(band->thr < band->energy){
-                            pch->a[ch]   += band->a;
-                            pch->b[ch]   += band->b;
-                            pch->pe[ch]  += band->pe;
-                            pch->thr[ch] += band->thr;
-                        }
-                    }
-                }
-            }
-        }
-
-        //determine scalefactors - 5.6.2 "Scalefactor determination"
-        for(ch = 0; ch < chans; ch++){
-            IndividualChannelStream *ics = &cpe->ch[ch].ics;
-            for(w = 0; w < ics->num_windows*16; w += 16){
-                for(g = 0; g < ics->num_swb; g++){
-                    Psy3gppBand *band = &pch->band[ch][w+g];
-                    cpe->ch[ch].zeroes[w+g] = band->thr >= band->energy;
-                    if(cpe->ch[ch].zeroes[w+g]) continue;
-                    cpe->ch[ch].sf_idx[w+g] = determine_scalefactor(band);
-                }
-            }
+    for(w = 0; w < wi->num_windows*16; w += 16){
+        Psy3gppBand *band = &pch->band[w];
+        for(g = 1; g < num_bands; g++){
+            band[g].thr = FFMAX(band[g].thr, band[g-1].thr * coeffs->spread_low[g-1]);
         }
-        break;
-    case PSY_MODE_QUALITY:
-        for(ch = 0; ch < chans; ch++){
-            IndividualChannelStream *ics = &cpe->ch[ch].ics;
-            start = 0;
-            for(w = 0; w < ics->num_windows*16; w += 16){
-                for(g = 0; g < ics->num_swb; g++){
-                    Psy3gppBand *band = &pch->band[ch][w+g];
-                    if(band->thr >= band->energy){
-                        cpe->ch[ch].sf_idx[w+g] = 0;
-                        cpe->ch[ch].zeroes[w+g] = 1;
-                    }else{
-                        cpe->ch[ch].zeroes[w+g] = 0;
-                        cpe->ch[ch].sf_idx[w+g] = determine_scalefactor(band);
-                        while(cpe->ch[ch].sf_idx[w+g] > 3){
-                            float dist = get_approximate_quant_error(cpe->ch[ch].coeffs + start,
-                                                         ics->swb_sizes[g],
-                                                         SCALE_ONE_POS + cpe->ch[ch].sf_idx[w+g]);
-                            if(dist < band->thr) break;
-                            cpe->ch[ch].sf_idx[w+g] -= 3;
-                        }
-                    }
-                    start += ics->swb_sizes[g];
-                }
-            }
+        for(g = num_bands - 2; g >= 0; g--){
+            band[g].thr = FFMAX(band[g].thr, band[g+1].thr * coeffs->spread_hi [g+1]);
         }
-        break;
-    }
-
-    //limit scalefactors
-    for(ch = 0; ch < chans; ch++){
-        int min_scale = 256;
-        IndividualChannelStream *ics = &cpe->ch[ch].ics;
-        for(w = 0; w < ics->num_windows*16; w += 16)
-            for(g = 0; g < ics->num_swb; g++){
-                if(cpe->ch[ch].zeroes[w + g]) continue;
-                min_scale = FFMIN(min_scale, cpe->ch[ch].sf_idx[w + g]);
-            }
-        for(w = 0; w < ics->num_windows*16; w += 16)
-            for(g = 0; g < ics->num_swb; g++){
-                if(cpe->ch[ch].zeroes[w + g]) continue;
-                cpe->ch[ch].sf_idx[w + g] = FFMIN(cpe->ch[ch].sf_idx[w + g], min_scale + SCALE_MAX_DIFF);
-            }
-        for(w = 0; w < ics->num_windows*16; w += 16)
-            for(g = 0; g < ics->num_swb; g++){
-                if(cpe->ch[ch].zeroes[w + g])
-                    cpe->ch[ch].sf_idx[w + g] = 256;
-                else
-                    cpe->ch[ch].sf_idx[w + g] = av_clip(SCALE_ONE_POS + cpe->ch[ch].sf_idx[w + g],
-                                                        0,
-                                                        SCALE_MAX_POS);
+        for(g = 0; g < num_bands; g++){
+            band[g].thr_quiet = FFMAX(band[g].thr, coeffs->ath[g]);
+            if(wi->num_windows != 8 && wi->window_type[1] != EIGHT_SHORT_SEQUENCE){
+                band[g].thr_quiet = fmaxf(PSY_3GPP_RPEMIN*band[g].thr_quiet,
+                                          fminf(band[g].thr_quiet,
+                                          PSY_3GPP_RPELEV*pch->prev_band[w+g].thr_quiet));
             }
+            band[g].thr = FFMAX(band[g].thr, band[g].thr_quiet * 0.25);
 
-        //adjust scalefactors for window groups
-        for(w = 0; w < ics->num_windows; w += ics->group_len[w]){
-            int min_scale = 256;
-
-            for(g = 0; g < ics->num_swb; g++){
-                for(i = w; i < w + ics->group_len[w]; i++){
-                    if(cpe->ch[ch].zeroes[i*16 + g]) continue;
-                    min_scale = FFMIN(min_scale, cpe->ch[ch].sf_idx[i*16 + g]);
-                }
-                for(i = w; i < w + ics->group_len[w]; i++)
-                    cpe->ch[ch].sf_idx[i*16 + g] = min_scale;
-            }
+            ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].threshold = band[g].thr;
         }
     }
-
     memcpy(pch->prev_band, pch->band, sizeof(pch->band));
-    psy_create_output(apc, cpe, chans);
 }
 
-static av_cold void psy_3gpp_end(AACPsyContext *apc)
+static av_cold void psy_3gpp_end(FFPsyContext *apc)
 {
     Psy3gppContext *pctx = (Psy3gppContext*) apc->model_priv_data;
     av_freep(&pctx->ch);
     av_freep(&apc->model_priv_data);
 }
 
-static const AACPsyModel psy_models[AAC_NB_PSY_MODELS] =
-{
-    {
-       "3GPP TS 26.403-inspired model",
-        psy_3gpp_init,
-        psy_3gpp_window,
-        psy_3gpp_process,
-        psy_3gpp_end,
-    },
-};
-
-int av_cold ff_aac_psy_init(AACPsyContext *ctx, AVCodecContext *avctx,
-                            enum AACPsyModelType model, int elements, int flags,
-                            const uint8_t *bands1024, int num_bands1024,
-                            const uint8_t *bands128,  int num_bands128)
-{
-    int i;
-
-    if(model < 0 || model >= AAC_NB_PSY_MODELS){
-         av_log(avctx, AV_LOG_ERROR, "Invalid psy model\n");
-         return -1;
-    }
-
-#ifndef CONFIG_HARDCODED_TABLES
-   for (i = 0; i < 316; i++)
-        ff_aac_pow2sf_tab[i] = pow(2, (i - 200)/4.);
-#endif /* CONFIG_HARDCODED_TABLES */
-
-    ctx->avctx = avctx;
-    ctx->flags = flags;
-    ctx->bands1024 = bands1024;
-    ctx->num_bands1024 = num_bands1024;
-    ctx->bands128 = bands128;
-    ctx->num_bands128 = num_bands128;
-    ctx->model = &psy_models[model];
-
-    if(ctx->flags & PSY_MODEL_NO_ST_ATT || PSY_MODEL_MODE(ctx->flags) == PSY_MODE_QUALITY){
-        ctx->flags |= PSY_MODEL_NO_ST_ATT;
-        ctx->stereo_att = 0.5f;
-    }else{
-        ctx->stereo_att = av_clipf(avctx->bit_rate / elements / 192000.0, 0.0f, 0.5f);
-    }
-    if(ctx->flags & PSY_MODEL_NO_LOWPASS || PSY_MODEL_MODE(ctx->flags) == PSY_MODE_QUALITY){
-        ctx->flags |= PSY_MODEL_NO_LOWPASS;
-    }else{
-        float cutoff = (float)avctx->bit_rate / elements / 8 / avctx->sample_rate;
-        ctx->lp_coeffs = ff_lowpass_filter_init_coeffs(4, cutoff);
-        if(!ctx->lp_coeffs){
-            ctx->flags |= PSY_MODEL_NO_LOWPASS;
-        }else{
-            ctx->lp_state = av_malloc(sizeof(struct FFLPFilterState*) * elements * 2);
-            for(i = 0; i < elements*2; i++)
-                ctx->lp_state[i] = ff_lowpass_filter_init_state(4);
-        }
-    }
-    ctx->elements = elements;
-    if(ctx->model->init)
-        return ctx->model->init(ctx, elements);
-    return 0;
-}
-
-void ff_aac_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int16_t *la,
-                               int tag, int type, ChannelElement *cpe)
-{
-    ctx->model->window(ctx, audio, la, tag, type, cpe);
-}
-
-void ff_aac_psy_analyze(AACPsyContext *ctx, int tag, int type, ChannelElement *cpe)
-{
-    ctx->model->process(ctx, tag, type, cpe);
-}
-
-void av_cold ff_aac_psy_end(AACPsyContext *ctx)
-{
-    if(!(ctx->flags & PSY_MODEL_NO_LOWPASS)){
-        int i;
-        ff_lowpass_filter_free_coeffs(ctx->lp_coeffs);
-        for(i = 0; i < ctx->elements; i++)
-            ff_lowpass_filter_free_state(ctx->lp_state[i]);
-        av_freep(&ctx->lp_state);
-    }
-    if(ctx->model->end)
-        return ctx->model->end(ctx);
-}
 
-void ff_aac_psy_preprocess(AACPsyContext *ctx, int16_t *audio, int16_t *dest, int tag, int type)
+const FFPsyModel ff_aac_psy_model =
 {
-    int chans = type == TYPE_CPE ? 2 : 1;
-    const int chstride = ctx->avctx->channels;
-    int i, ch;
-    float t[2];
-
-    for(ch = 0; ch < chans; ch++){
-        if(!(ctx->flags & PSY_MODEL_NO_LOWPASS)){
-            ff_lowpass_filter(ctx->lp_coeffs, ctx->lp_state[tag*2 + ch], 1024,
-                              audio + ch, chstride,
-                              dest + ch,  chstride);
-        }else{
-            for(i = 0; i < 1024; i++){
-                dest[i * chstride + ch] = audio[i * chstride + ch];
-            }
-        }
-    }
-    if(chans == 2 && !(ctx->flags & PSY_MODEL_NO_ST_ATT)){
-        for(i = 0; i < 1024; i++){
-            t[0] = dest[0] * (0.5 + ctx->stereo_att) + dest[1] * (0.5 - ctx->stereo_att);
-            t[1] = dest[0] * (0.5 - ctx->stereo_att) + dest[1] * (0.5 + ctx->stereo_att);
-            dest[0] = t[0];
-            dest[1] = t[1];
-            dest += chstride;
-        }
-    }
-}
-
+    .name    = "3GPP TS 26.403-inspired model",
+    .init    = psy_3gpp_init,
+    .window  = psy_3gpp_window,
+    .analyze = psy_3gpp_analyze,
+    .end     = psy_3gpp_end,
+};

Added: aacenc/psymodel.c
==============================================================================
--- (empty file)
+++ aacenc/psymodel.c	Tue Sep  2 08:14:14 2008
@@ -0,0 +1,123 @@
+/*
+ * audio encoder psychoacoustic model
+ * Copyright (C) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "psymodel.h"
+#include "iirfilter.h"
+
+extern const FFPsyModel ff_aac_psy_model;
+
+av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
+                        int num_lens,
+                        uint8_t **bands, int* num_bands)
+{
+    ctx->avctx = avctx;
+    ctx->psy_bands = av_mallocz(sizeof(FFPsyBand) * PSY_MAX_BANDS * avctx->channels);
+    ctx->bands     = av_malloc (sizeof(ctx->bands[0])     * num_lens);
+    ctx->num_bands = av_malloc (sizeof(ctx->num_bands[0]) * num_lens);
+    memcpy(ctx->bands,     bands,     sizeof(ctx->bands[0])     *  num_lens);
+    memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) *  num_lens);
+    switch(ctx->avctx->codec_id){
+    case CODEC_ID_AAC:
+        ctx->model = &ff_aac_psy_model;
+        break;
+    }
+    if(ctx->model->init)
+        return ctx->model->init(ctx);
+    return 0;
+}
+
+FFPsyWindowInfo ff_psy_suggest_window(FFPsyContext *ctx,
+                                      const int16_t *audio, const int16_t *la,
+                                      int channel, int prev_type)
+{
+    return ctx->model->window(ctx, audio, la, channel, prev_type);
+}
+
+void ff_psy_set_band_info(FFPsyContext *ctx, int channel,
+                          const float *coeffs, FFPsyWindowInfo *wi)
+{
+    ctx->model->analyze(ctx, channel, coeffs, wi);
+}
+
+av_cold void ff_psy_end(FFPsyContext *ctx)
+{
+    if(ctx->model->end)
+        ctx->model->end(ctx);
+    av_freep(&ctx->bands);
+    av_freep(&ctx->num_bands);
+    av_freep(&ctx->psy_bands);
+}
+
+typedef struct FFPsyPreprocessContext{
+    AVCodecContext *avctx;
+    float stereo_att;
+    struct FFIIRFilterCoeffs *fcoeffs;
+    struct FFIIRFilterState **fstate;
+}FFPsyPreprocessContext;
+
+#define FILT_ORDER 4
+
+av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx)
+{
+    FFPsyPreprocessContext *ctx;
+    int i;
+    ctx = av_mallocz(sizeof(FFPsyPreprocessContext));
+    ctx->avctx = avctx;
+    ctx->fcoeffs = ff_iir_filter_init_coeffs(FF_FILTER_TYPE_BUTTERWORTH, FF_FILTER_MODE_LOWPASS,
+                                           FILT_ORDER, 0.25, 0.0, 0.0);
+    if(ctx->fcoeffs){
+        ctx->fstate = av_mallocz(sizeof(ctx->fstate[0]) * avctx->channels);
+        for(i = 0; i < avctx->channels; i++)
+            ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER);
+    }
+    return ctx;
+}
+
+void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx,
+                       const int16_t *audio, int16_t *dest,
+                       int tag, int channels)
+{
+    int ch, i;
+    if(ctx->fstate){
+        for(ch = 0; ch < channels; ch++){
+            ff_iir_filter(ctx->fcoeffs, ctx->fstate[tag+ch], ctx->avctx->frame_size,
+                          audio + ch, ctx->avctx->channels,
+                          dest  + ch, ctx->avctx->channels);
+        }
+    }else{
+        for(ch = 0; ch < channels; ch++){
+            for(i = 0; i < ctx->avctx->frame_size; i++)
+                dest[i*ctx->avctx->channels + ch] = audio[i*ctx->avctx->channels + ch];
+        }
+    }
+}
+
+av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx)
+{
+    int i;
+    ff_iir_filter_free_coeffs(ctx->fcoeffs);
+    for(i = 0; i < ctx->avctx->channels; i++){
+        ff_iir_filter_free_state(ctx->fstate[i]);
+    }
+    av_freep(&ctx->fstate);
+}
+

Added: aacenc/psymodel.h
==============================================================================
--- (empty file)
+++ aacenc/psymodel.h	Tue Sep  2 08:14:14 2008
@@ -0,0 +1,158 @@
+/*
+ * audio encoder psychoacoustic model
+ * Copyright (C) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef FFMPEG_PSYMODEL_H
+#define FFMPEG_PSYMODEL_H
+
+#include "avcodec.h"
+
+/** maximum possible number of bands */
+#define PSY_MAX_BANDS 128
+
+/**
+ * single band psychoacoustic information
+ */
+typedef struct FFPsyBand{
+    int   bits;
+    float energy;
+    float threshold;
+    float distortion;
+    float perceptual_weight;
+}FFPsyBand;
+
+/**
+ * windowing related information
+ */
+typedef struct FFPsyWindowInfo{
+    int window_type[3];               ///< window type (short/long/transitional, etc.) - current, previous and next
+    int window_shape;                 ///< window shape (sine/KBD/whatever)
+    int num_windows;                  ///< number of windows in a frame
+    int grouping[8];                  ///< window grouping (for e.g. AAC)
+    int *window_sizes;                ///< sequence of window sizes inside one frame (for eg. WMA)
+}FFPsyWindowInfo;
+
+/**
+ * context used by psychoacoustic model
+ */
+typedef struct FFPsyContext{
+    AVCodecContext *avctx;            ///< encoder context
+    const struct FFPsyModel *model;   ///< encoder-specific model functions
+
+    FFPsyBand *psy_bands;             ///< frame bands information
+
+    uint8_t **bands;                  ///< scalefactor band sizes for possible frame sizes
+    int     *num_bands;               ///< number of scalefactor bands for possible frame sizes
+    int num_lens;                     ///< number of scalefactor band sets
+
+    void* model_priv_data;            ///< psychoacoustic model implementation private data
+}FFPsyContext;
+
+/**
+ * codec-specific psychoacoustic model implementation
+ */
+typedef struct FFPsyModel {
+    const char *name;
+    int  (*init)   (FFPsyContext *apc);
+    FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type);
+    void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, FFPsyWindowInfo *wi);
+    void (*end)    (FFPsyContext *apc);
+}FFPsyModel;
+
+/**
+ * Initialize psychoacoustic model.
+ *
+ * @param ctx        model context
+ * @param avctx      codec context
+ * @param num_lens   number of possible frame lengths
+ * @param bands      scalefactor band lengths for all frame lengths
+ * @param num_bands  number of scalefactor bands for all frame lengths
+ *
+ * @return zero if successful, a negative value if not
+ */
+av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
+                        int num_lens,
+                        uint8_t **bands, int* num_bands);
+
+/**
+ * Suggest window sequence for channel.
+ *
+ * @param ctx       model context
+ * @param audio     samples for the current frame
+ * @param la        lookahead samples (NULL when unavailable)
+ * @param channel   number of channel element to analyze
+ * @param prev_type previous window type
+ *
+ * @return suggested window information in a structure
+ */
+FFPsyWindowInfo ff_psy_suggest_window(FFPsyContext *ctx,
+                                      const int16_t *audio, const int16_t *la,
+                                      int channel, int prev_type);
+
+
+/**
+ * Perform psychoacoustic analysis and set band info (threshold, energy).
+ *
+ * @param ctx     model context
+ * @param channel audio channel number
+ * @param coeffs  pointer to the transformed coefficients
+ * @param wi      window information
+ */
+void ff_psy_set_band_info(FFPsyContext *ctx, int channel, const float *coeffs,
+                          FFPsyWindowInfo *wi);
+
+/**
+ * Cleanup model context at the end.
+ *
+ * @param ctx model context
+ */
+av_cold void ff_psy_end(FFPsyContext *ctx);
+
+
+/**************************************************************************
+ *                       Audio preprocessing stuff.                       *
+ *       This should be moved into some audio filter eventually.          *
+ **************************************************************************/
+struct FFPsyPreprocessContext;
+
+/**
+ * psychoacoustic model audio preprocessing initialization
+ */
+av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx);
+
+/**
+ * Preprocess several channel in audio frame in order to compress it better.
+ *
+ * @param ctx      preprocessing context
+ * @param audio    samples to preprocess
+ * @param dest     place to put filtered samples
+ * @param tag      channel number
+ * @param channels number of channel to preprocess (some additional work may be done on stereo pair)
+ */
+void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx,
+                       const int16_t *audio, int16_t *dest,
+                       int tag, int channels);
+
+/**
+ * Cleanup audio preprocessing module.
+ */
+av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx);
+
+#endif /* FFMPEG_PSYMODEL_H */