[FFmpeg-cvslog] Add floating-point sample format support to the ac3, eac3, dca, aac, and vorbis

Thu May 19 06:02:21 CEST 2011

ffmpeg | branch: master | Justin Ruggles <justin.ruggles at gmail.com> | Fri Apr 22 21:30:19 2011 -0400| [9aa8193a234ccb6a79cba5cc550531f62ffb0a17] | committer: Justin Ruggles

Add floating-point sample format support to the ac3, eac3, dca, aac, and vorbis
decoders.

Based on patches by clsid2 in ffdshow-tryout.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9aa8193a234ccb6a79cba5cc550531f62ffb0a17
---

 libavcodec/aacdec.c    |   34 ++++++++++++++++++++++++----------
 libavcodec/aacsbr.c    |   11 ++++++++---
 libavcodec/aacsbr.h    |    2 +-
 libavcodec/ac3dec.c    |   32 ++++++++++++++++++++++++++------
 libavcodec/dca.c       |   34 +++++++++++++++++++++++++++-------
 libavcodec/vorbisdec.c |   23 ++++++++++++++++++-----
 6 files changed, 104 insertions(+), 32 deletions(-)

diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index 5f9dd83..f2d50f4 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -186,7 +186,7 @@ static av_cold int che_configure(AACContext *ac,
     if (che_pos[type][id]) {
         if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
             return AVERROR(ENOMEM);
-        ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
+        ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
         if (type != TYPE_CCE) {
             ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
             if (type == TYPE_CPE ||
@@ -546,6 +546,7 @@ static void reset_predictor_group(PredictorState *ps, int group_num)
 static av_cold int aac_decode_init(AVCodecContext *avctx)
 {
     AACContext *ac = avctx->priv_data;
+    float output_scale_factor;
 
     ac->avctx = avctx;
     ac->m4ac.sample_rate = avctx->sample_rate;
@@ -557,7 +558,13 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
             return -1;
     }
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+        output_scale_factor = 1.0 / 32768.0;
+    } else {
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+        output_scale_factor = 1.0;
+    }
 
     AAC_INIT_VLC_STATIC( 0, 304);
     AAC_INIT_VLC_STATIC( 1, 270);
@@ -585,9 +592,9 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
                     ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
                     352);
 
-    ff_mdct_init(&ac->mdct,       11, 1, 1.0/1024.0);
-    ff_mdct_init(&ac->mdct_small,  8, 1, 1.0/128.0);
-    ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0);
+    ff_mdct_init(&ac->mdct,       11, 1, output_scale_factor/1024.0);
+    ff_mdct_init(&ac->mdct_small,  8, 1, output_scale_factor/128.0);
+    ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0/output_scale_factor);
     // window initialization
     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
@@ -2169,7 +2176,8 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
         avctx->frame_size = samples;
     }
 
-    data_size_tmp = samples * avctx->channels * sizeof(int16_t);
+    data_size_tmp = samples * avctx->channels *
+                    (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
     if (*data_size < data_size_tmp) {
         av_log(avctx, AV_LOG_ERROR,
                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
@@ -2178,8 +2186,14 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
     }
     *data_size = data_size_tmp;
 
-    if (samples)
-        ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
+    if (samples) {
+        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
+            ac->fmt_conv.float_interleave(data, (const float **)ac->output_data,
+                                          samples, avctx->channels);
+        else
+            ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data,
+                                                   samples, avctx->channels);
+    }
 
     if (ac->output_configured)
         ac->output_configured = OC_LOCKED;
@@ -2497,7 +2511,7 @@ AVCodec ff_aac_decoder = {
     aac_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
     .sample_fmts = (const enum AVSampleFormat[]) {
-        AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
     },
     .channel_layouts = aac_channel_layout,
 };
@@ -2517,7 +2531,7 @@ AVCodec ff_aac_latm_decoder = {
     .decode = latm_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"),
     .sample_fmts = (const enum AVSampleFormat[]) {
-        AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
     },
     .channel_layouts = aac_channel_layout,
 };
diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c
index 7a217ab..81b0b4c 100644
--- a/libavcodec/aacsbr.c
+++ b/libavcodec/aacsbr.c
@@ -126,14 +126,19 @@ av_cold void ff_aac_sbr_init(void)
     ff_ps_init();
 }
 
-av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr)
+av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr)
 {
+    float mdct_scale;
     sbr->kx[0] = sbr->kx[1] = 32; //Typo in spec, kx' inits to 32
     sbr->data[0].e_a[1] = sbr->data[1].e_a[1] = -1;
     sbr->data[0].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
     sbr->data[1].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
-    ff_mdct_init(&sbr->mdct, 7, 1, 1.0/64);
-    ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0);
+    /* SBR requires samples to be scaled to +/-32768.0 to work correctly.
+     * mdct scale factors are adjusted to scale up from +/-1.0 at analysis
+     * and scale back down at synthesis. */
+    mdct_scale = ac->avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? 32768.0f : 1.0f;
+    ff_mdct_init(&sbr->mdct,     7, 1, 1.0 / (64 * mdct_scale));
+    ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0 * mdct_scale);
     ff_ps_ctx_init(&sbr->ps);
 }
 
diff --git a/libavcodec/aacsbr.h b/libavcodec/aacsbr.h
index dca8330..153070d 100644
--- a/libavcodec/aacsbr.h
+++ b/libavcodec/aacsbr.h
@@ -36,7 +36,7 @@
 /** Initialize SBR. */
 av_cold void ff_aac_sbr_init(void);
 /** Initialize one SBR context. */
-av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr);
+av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr);
 /** Close one SBR context. */
 av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr);
 /** Decode one SBR element. */
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 015ebae..2966c33 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -189,7 +189,13 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
     av_lfg_init(&s->dith_state, 0);
 
     /* set scale value for float to int16 conversion */
-    s->mul_bias = 32767.0f;
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        s->mul_bias = 1.0f;
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+    } else {
+        s->mul_bias = 32767.0f;
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    }
 
     /* allow downmixing to stereo or mono */
     if (avctx->channels > 0 && avctx->request_channels > 0 &&
@@ -204,7 +210,6 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
         if (!s->input_buffer)
             return AVERROR(ENOMEM);
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
     return 0;
 }
 
@@ -1299,7 +1304,8 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     AC3DecodeContext *s = avctx->priv_data;
-    int16_t *out_samples = (int16_t *)data;
+    float   *out_samples_flt = data;
+    int16_t *out_samples_s16 = data;
     int blk, ch, err;
     const uint8_t *channel_map;
     const float *output[AC3_MAX_CHANNELS];
@@ -1405,10 +1411,18 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
             av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n");
             err = 1;
         }
-        s->fmt_conv.float_to_int16_interleave(out_samples, output, 256, s->out_channels);
-        out_samples += 256 * s->out_channels;
+        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
+            s->fmt_conv.float_interleave(out_samples_flt, output, 256,
+                                         s->out_channels);
+            out_samples_flt += 256 * s->out_channels;
+        } else {
+            s->fmt_conv.float_to_int16_interleave(out_samples_s16, output, 256,
+                                                  s->out_channels);
+            out_samples_s16 += 256 * s->out_channels;
+        }
     }
-    *data_size = s->num_blocks * 256 * avctx->channels * sizeof (int16_t);
+    *data_size = s->num_blocks * 256 * avctx->channels *
+                 (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
     return FFMIN(buf_size, s->frame_size);
 }
 
@@ -1435,6 +1449,9 @@ AVCodec ff_ac3_decoder = {
     .close = ac3_decode_end,
     .decode = ac3_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
 };
 
 #if CONFIG_EAC3_DECODER
@@ -1447,5 +1464,8 @@ AVCodec ff_eac3_decoder = {
     .close = ac3_decode_end,
     .decode = ac3_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52B (AC-3, E-AC-3)"),
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
 };
 #endif
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index f1cd64e..dbadeba 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -1626,7 +1626,9 @@ static int dca_decode_frame(AVCodecContext * avctx,
     int lfe_samples;
     int num_core_channels = 0;
     int i;
-    int16_t *samples = data;
+    float   *samples_flt = data;
+    int16_t *samples_s16 = data;
+    int out_size;
     DCAContext *s = avctx->priv_data;
     int channels;
     int core_ss_end;
@@ -1812,9 +1814,11 @@ static int dca_decode_frame(AVCodecContext * avctx,
         return -1;
     }
 
-    if (*data_size < (s->sample_blocks / 8) * 256 * sizeof(int16_t) * channels)
+    out_size = 256 / 8 * s->sample_blocks * channels *
+               (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
+    if (*data_size < out_size)
         return -1;
-    *data_size = 256 / 8 * s->sample_blocks * sizeof(int16_t) * channels;
+    *data_size = out_size;
 
     /* filter to get final output */
     for (i = 0; i < (s->sample_blocks / 8); i++) {
@@ -1833,8 +1837,16 @@ static int dca_decode_frame(AVCodecContext * avctx,
             }
         }
 
-        s->fmt_conv.float_to_int16_interleave(samples, s->samples_chanptr, 256, channels);
-        samples += 256 * channels;
+        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
+            s->fmt_conv.float_interleave(samples_flt, s->samples_chanptr, 256,
+                                         channels);
+            samples_flt += 256 * channels;
+        } else {
+            s->fmt_conv.float_to_int16_interleave(samples_s16,
+                                                  s->samples_chanptr, 256,
+                                                  channels);
+            samples_s16 += 256 * channels;
+        }
     }
 
     /* update lfe history */
@@ -1870,9 +1882,14 @@ static av_cold int dca_decode_init(AVCodecContext * avctx)
 
     for (i = 0; i < DCA_PRIM_CHANNELS_MAX+1; i++)
         s->samples_chanptr[i] = s->samples + i * 256;
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 
-    s->scale_bias = 1.0;
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+        s->scale_bias = 1.0 / 32768.0;
+    } else {
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+        s->scale_bias = 1.0;
+    }
 
     /* allow downmixing to stereo */
     if (avctx->channels > 0 && avctx->request_channels < avctx->channels &&
@@ -1909,5 +1926,8 @@ AVCodec ff_dca_decoder = {
     .close = dca_decode_end,
     .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),
     .capabilities = CODEC_CAP_CHANNEL_CONF,
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
     .profiles = NULL_IF_CONFIG_SMALL(profiles),
 };
diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index 7443e98..f6ec74f 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -979,7 +979,13 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext)
     dsputil_init(&vc->dsp, avccontext);
     ff_fmt_convert_init(&vc->fmt_conv, avccontext);
 
-    vc->scale_bias = 32768.0f;
+    if (avccontext->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avccontext->sample_fmt = AV_SAMPLE_FMT_FLT;
+        vc->scale_bias = 1.0f;
+    } else {
+        avccontext->sample_fmt = AV_SAMPLE_FMT_S16;
+        vc->scale_bias = 32768.0f;
+    }
 
     if (!headers_len) {
         av_log(avccontext, AV_LOG_ERROR, "Extradata missing.\n");
@@ -1024,7 +1030,6 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext)
     avccontext->channels    = vc->audio_channels;
     avccontext->sample_rate = vc->audio_samplerate;
     avccontext->frame_size  = FFMIN(vc->blocksize[0], vc->blocksize[1]) >> 2;
-    avccontext->sample_fmt  = AV_SAMPLE_FMT_S16;
 
     return 0 ;
 }
@@ -1634,9 +1639,14 @@ static int vorbis_decode_frame(AVCodecContext *avccontext,
                               len * ff_vorbis_channel_layout_offsets[vc->audio_channels - 1][i];
     }
 
-    vc->fmt_conv.float_to_int16_interleave(data, channel_ptrs, len,
-                                           vc->audio_channels);
-    *data_size = len * 2 * vc->audio_channels;
+    if (avccontext->sample_fmt == AV_SAMPLE_FMT_FLT)
+        vc->fmt_conv.float_interleave(data, channel_ptrs, len, vc->audio_channels);
+    else
+        vc->fmt_conv.float_to_int16_interleave(data, channel_ptrs, len,
+                                               vc->audio_channels);
+
+    *data_size = len * vc->audio_channels *
+                 (av_get_bits_per_sample_fmt(avccontext->sample_fmt) / 8);
 
     return buf_size ;
 }
@@ -1663,5 +1673,8 @@ AVCodec ff_vorbis_decoder = {
     vorbis_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("Vorbis"),
     .channel_layouts = ff_vorbis_channel_layouts,
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
 };