[FFmpeg-devel] [PATCH 2/6] aacenc: Improve Intensity Stereo phase detection

Wed Jul 29 06:44:25 CEST 2015

This commit improves the intensity stereo phase detection by considering
the distortion measurement of both phases and picking the phase with the
lowest distortion. This way the phase is always explicitly chosen with
producing the least amount of distortions in both channels.
This commit also enables the use of M/S encoding in parallel with IS.
The specifications say that this is allowed as long as the phase of
any bands marked as M/S is reversed.
Previously this was disabled due to the very primitive nature of the
previous phase detection algorithm, which always naively used a simple
majority from all spectral coefficients of both channels. This resulted
in distortions due to incorrect phase setting, which are now fixed.
---
 libavcodec/aaccoder.c | 159 ++++++++++++++++++++++++++++----------------------
 1 file changed, 89 insertions(+), 70 deletions(-)

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 0441971..7872a9d 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -1214,102 +1214,121 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
     }
 }
 
-static void search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe)
+struct is_error {
+    int pass;
+    int phase;
+    float error;
+};
+
+static struct is_error calc_encoding_err_is(AACEncContext *s, ChannelElement *cpe,
+                                            float *L34, float *R34, float ener0,
+                                            float ener1, float ener01,
+                                            int start, int g, int w, int phase)
 {
+    int i, w2;
     float IS[128];
-    float *L34  = s->scoefs + 128*0, *R34  = s->scoefs + 128*1;
     float *I34  = s->scoefs + 128*2;
+    float dist1 = 0.0f, dist2 = 0.0f;
+    struct is_error is_error = {0};
+    SingleChannelElement *sce0 = &cpe->ch[0];
+    SingleChannelElement *sce1 = &cpe->ch[1];
+
+    if (cpe->ms_mode)
+        phase = 1 - 2 * cpe->ms_mask[w*16+g];
+
+    for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+        FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
+        FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
+        int is_band_type, is_sf_idx = FFMAX(1, sce0->sf_idx[(w+w2)*16+g]-4);
+        float e01_34 = phase*pow(sqrt(ener1/ener0), 3.0/4.0);
+        float maxval, dist_spec_err = 0.0f;
+        float minthr = FFMIN(band0->threshold, band1->threshold);
+        for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+            IS[i] = (sce0->pcoeffs[start+(w+w2)*128+i]+
+            phase*sce1->pcoeffs[start+(w+w2)*128+i])*
+            sqrt(ener0/ener01);
+        }
+        abs_pow34_v(I34, IS,                            sce0->ics.swb_sizes[g]);
+        maxval = find_max_val(1, sce0->ics.swb_sizes[g], I34);
+        is_band_type = find_min_book(maxval, is_sf_idx);
+        dist1 += quantize_band_cost(s, sce0->coeffs + start + (w+w2)*128, L34,
+                                    sce0->ics.swb_sizes[g],
+                                    sce0->sf_idx[(w+w2)*16+g],
+                                    sce0->band_type[(w+w2)*16+g],
+                                    s->lambda / band0->threshold, INFINITY, NULL, 0);
+        dist1 += quantize_band_cost(s, sce1->coeffs + start + (w+w2)*128, R34,
+                                    sce1->ics.swb_sizes[g],
+                                    sce1->sf_idx[(w+w2)*16+g],
+                                    sce1->band_type[(w+w2)*16+g],
+                                    s->lambda / band1->threshold, INFINITY, NULL, 0);
+        dist2 += quantize_band_cost(s, IS, I34, sce0->ics.swb_sizes[g],
+                                    is_sf_idx, is_band_type,
+                                    s->lambda / minthr, INFINITY, NULL, 0);
+        for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+            dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]);
+            dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34);
+        }
+        dist_spec_err *= s->lambda / minthr;
+        dist2 += dist_spec_err;
+    }
+
+    is_error.phase = phase;
+    is_error.pass  = dist2 <= dist1;
+    is_error.error = (dist1 - dist2)*(dist1 - dist2);
+
+    return is_error;
+}
+
+static void search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe)
+{
     SingleChannelElement *sce0 = &cpe->ch[0];
     SingleChannelElement *sce1 = &cpe->ch[1];
-    int start = 0, count = 0, i, w, w2, g;
+    int start = 0, count = 0, w, g, w2, i;
     const float freq_mult = avctx->sample_rate/(1024.0f/sce0->ics.num_windows)/2.0f;
-    const float lambda = s->lambda;
+    float *L34  = s->scoefs + 128*0, *R34  = s->scoefs + 128*1;
 
     for (w = 0; w < 128; w++)
         if (sce1->band_type[w] >= INTENSITY_BT2)
             sce1->band_type[w] = 0;
 
-    if (!cpe->common_window)
-        return;
-    for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
-        start = 0;
-        for (g = 0;  g < sce0->ics.num_swb; g++) {
-            if (start*freq_mult > INT_STEREO_LOW_LIMIT*(lambda/170.0f) &&
-                cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] &&
-                cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g]) {
-                int phase = 0;
-                float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
-                float dist1 = 0.0f, dist2 = 0.0f;
+        if (!cpe->common_window)
+            return;
+        for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
+            start = 0;
+            for (g = 0;  g < sce0->ics.num_swb; g++) {
+                if (start*freq_mult > INT_STEREO_LOW_LIMIT*(s->lambda/170.0f) &&
+                    cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] &&
+                    cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g]) {
+                    float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
+                struct is_error ph_err1, ph_err2, *erf;
                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+                    abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
+                    abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
                         float coef0 = sce0->pcoeffs[start+(w+w2)*128+i];
                         float coef1 = sce1->pcoeffs[start+(w+w2)*128+i];
-                        phase += coef0*coef1 >= 0.0f ? 1 : -1;
                         ener0 += coef0*coef0;
                         ener1 += coef1*coef1;
                         ener01 += (coef0 + coef1)*(coef0 + coef1);
                     }
                 }
-                if (!phase) { /* Too much phase difference between channels */
-                    start += sce0->ics.swb_sizes[g];
-                    continue;
-                }
-                phase = av_clip(phase, -1, 1);
-                for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
-                    FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
-                    FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
-                    int is_band_type, is_sf_idx = FFMAX(1, sce0->sf_idx[(w+w2)*16+g]-4);
-                    float e01_34 = phase*pow(sqrt(ener1/ener0), 3.0/4.0);
-                    float maxval, dist_spec_err = 0.0f;
-                    float minthr = FFMIN(band0->threshold, band1->threshold);
-                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++)
-                        IS[i] = (sce0->pcoeffs[start+(w+w2)*128+i] + phase*sce1->pcoeffs[start+(w+w2)*128+i]) * sqrt(ener0/ener01);
-                    abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
-                    abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
-                    abs_pow34_v(I34, IS,                            sce0->ics.swb_sizes[g]);
-                    maxval = find_max_val(1, sce0->ics.swb_sizes[g], I34);
-                    is_band_type = find_min_book(maxval, is_sf_idx);
-                    dist1 += quantize_band_cost(s, sce0->coeffs + start + (w+w2)*128,
-                                                L34,
-                                                sce0->ics.swb_sizes[g],
-                                                sce0->sf_idx[(w+w2)*16+g],
-                                                sce0->band_type[(w+w2)*16+g],
-                                                lambda / band0->threshold, INFINITY, NULL, 0);
-                    dist1 += quantize_band_cost(s, sce1->coeffs + start + (w+w2)*128,
-                                                R34,
-                                                sce1->ics.swb_sizes[g],
-                                                sce1->sf_idx[(w+w2)*16+g],
-                                                sce1->band_type[(w+w2)*16+g],
-                                                lambda / band1->threshold, INFINITY, NULL, 0);
-                    dist2 += quantize_band_cost(s, IS,
-                                                I34,
-                                                sce0->ics.swb_sizes[g],
-                                                is_sf_idx,
-                                                is_band_type,
-                                                lambda / minthr, INFINITY, NULL, 0);
-                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
-                        dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]);
-                        dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34);
-                    }
-                    dist_spec_err *= lambda / minthr;
-                    dist2 += dist_spec_err;
-                }
-                if (dist2 <= dist1) {
+                ph_err1 = calc_encoding_err_is(s, cpe, L34, R34, ener0, ener1,
+                                               ener01, start, g, w, -1);
+                ph_err2 = calc_encoding_err_is(s, cpe, L34, R34, ener0, ener1,
+                                               ener01, start, g, w, +1);
+                erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
+                if (erf->pass) {
                     cpe->is_mask[w*16+g] = 1;
-                    cpe->ms_mask[w*16+g] = 0;
                     cpe->ch[0].is_ener[w*16+g] = sqrt(ener0/ener01);
                     cpe->ch[1].is_ener[w*16+g] = ener0/ener1;
-                    if (phase)
-                        cpe->ch[1].band_type[w*16+g] = INTENSITY_BT;
-                    else
-                        cpe->ch[1].band_type[w*16+g] = INTENSITY_BT2;
+                    cpe->ch[1].band_type[w*16+g] = erf->phase ? INTENSITY_BT : INTENSITY_BT2;
                     count++;
                 }
+                    }
+                    start += sce0->ics.swb_sizes[g];
             }
-            start += sce0->ics.swb_sizes[g];
         }
-    }
-    cpe->is_mode = !!count;
+        cpe->is_mode = !!count;
 }
 
 static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
@@ -1325,7 +1344,7 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
     for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
         start = 0;
         for (g = 0;  g < sce0->ics.num_swb; g++) {
-            if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
+            if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
                 float dist1 = 0.0f, dist2 = 0.0f;
                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
                     FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
-- 
2.5.0.rc2.392.g76e840b