[FFmpeg-cvslog] avformat/mp3dec: fix gapless audio support

Sat Sep 20 20:19:43 CEST 2014

ffmpeg | branch: master | wm4 <nfxjfg at googlemail.com> | Sat Sep 20 13:48:05 2014 +0200| [d87fe2687fdc5b1cb9aaec957afadb56d207618f] | committer: Michael Niedermayer

avformat/mp3dec: fix gapless audio support

The code already had skipping of initial padding, but discarding
trailing frame padding was missing.

This is somewhat questionable, because it will make the decoder discard
any data after the declared file size in the LAME header. But note that
skipping full frames at the end of the stream is required. Encoders
actually create such files.

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d87fe2687fdc5b1cb9aaec957afadb56d207618f
---

 libavformat/avformat.h |    8 ++++++++
 libavformat/mp3dec.c   |    2 ++
 libavformat/utils.c    |   17 ++++++++++++++++-
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index b915148..2370cb0 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1031,6 +1031,14 @@ typedef struct AVStream {
     int skip_samples;
 
     /**
+     * If not 0, the first audio sample that should be discarded from the stream.
+     * This is broken by design (needs global sample count), but can't be
+     * avoided for broken by design formats such as mp3 with ad-hoc gapless
+     * audio support.
+     */
+    int64_t end_discard_sample;
+
+    /**
      * Number of internally decoded frames, used internally in libavformat, do not access
      * its lifetime differs from info which is why it is not in that structure.
      */
diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index 4872afc..639c78d 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -219,6 +219,8 @@ static void mp3_parse_info_tag(AVFormatContext *s, AVStream *st,
         mp3->start_pad = v>>12;
         mp3->  end_pad = v&4095;
         st->skip_samples = mp3->start_pad + 528 + 1;
+        if (mp3->frames)
+            st->end_discard_sample = -mp3->end_pad + 528 + 1 + mp3->frames * (int64_t)spf;
         if (!st->start_time)
             st->start_time = av_rescale_q(st->skip_samples,
                                             (AVRational){1, c->sample_rate},
diff --git a/libavformat/utils.c b/libavformat/utils.c
index e899e4d..58533f8 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1255,6 +1255,11 @@ static int read_from_packet_buffer(AVPacketList **pkt_buffer,
     return 0;
 }
 
+static int64_t ts_to_samples(AVStream *st, int64_t ts)
+{
+    return av_rescale(ts, st->time_base.num * st->codec->sample_rate, st->time_base.den);
+}
+
 static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
 {
     int ret = 0, i, got_packet = 0;
@@ -1352,10 +1357,20 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
 
     if (ret >= 0) {
         AVStream *st = s->streams[pkt->stream_index];
-        if (st->skip_samples) {
+        int discard_padding = 0;
+        if (st->end_discard_sample && pkt->pts != AV_NOPTS_VALUE) {
+            int64_t pts = pkt->pts - (is_relative(pkt->pts) ? RELATIVE_TS_BASE : 0);
+            int64_t sample = ts_to_samples(st, pts);
+            int duration = ts_to_samples(st, pkt->duration);
+            int64_t end_sample = sample + duration;
+            if (duration > 0 && end_sample >= st->end_discard_sample)
+                discard_padding = FFMIN(end_sample - st->end_discard_sample, duration);
+        }
+        if (st->skip_samples || discard_padding) {
             uint8_t *p = av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
             if (p) {
                 AV_WL32(p, st->skip_samples);
+                AV_WL32(p + 4, discard_padding);
                 av_log(s, AV_LOG_DEBUG, "demuxer injecting skip %d\n", st->skip_samples);
             }
             st->skip_samples = 0;