[FFmpeg-devel] [PATCH 1/1] avcodec/libopusdec: Enable FEC/PLC

Philip-Dylan Gleonec philip-dylan.gleonec at savoirfairelinux.com
Tue Feb 16 16:00:50 EET 2021


Adds FEC/PLC support to libopus. The lost packets are detected as a
discontinuity in the audio stream. When a discontinuity is used, this
patch tries to decode the FEC data. If FEC data is present in the
packet, it is decoded, otherwise audio is re-created through PLC.

This patch is based on Steinar H. Gunderson contribution, and corrects
the pts computation: all pts are expressed in samples instead of time.
This patch also adds an option "decode_fec" which enables or disables
FEC decoding. This option is disabled by default to keep consistent
behaviour with former versions.

A number of checks are made to ensure compatibility with different
containers. Indeed, video containers seem to have a pts expressed in ms
while it is expressed in samples for audio containers. It also manages
the cases where pkt->duration is 0, in some RTP streams. This patch
ignores data it can not reconstruct, i.e. packets received twice and
packets with a length that is not a multiple of 2.5ms.

Signed-off-by: Philip-Dylan Gleonec <philip-dylan.gleonec at savoirfairelinux.com>
Co-developed-by: Steinar H. Gunderson <steinar+ffmpeg at gunderson.no>
---
 libavcodec/libopusdec.c | 105 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 94 insertions(+), 11 deletions(-)

diff --git a/libavcodec/libopusdec.c b/libavcodec/libopusdec.c
index 082a431c6c..504043353f 100644
--- a/libavcodec/libopusdec.c
+++ b/libavcodec/libopusdec.c
@@ -43,10 +43,15 @@ struct libopus_context {
 #ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
     int apply_phase_inv;
 #endif
+    int decode_fec;
+    int64_t expected_next_pts;
 };
 
 #define OPUS_HEAD_SIZE 19
 
+// Sample rate is constant as libopus always output at 48kHz
+#define OPUS_SAMPLERATE 48000
+
 static av_cold int libopus_decode_init(AVCodecContext *avc)
 {
     struct libopus_context *opus = avc->priv_data;
@@ -134,6 +139,8 @@ static av_cold int libopus_decode_init(AVCodecContext *avc)
     /* Decoder delay (in samples) at 48kHz */
     avc->delay = avc->internal->skip_samples = opus->pre_skip;
 
+    opus->expected_next_pts = AV_NOPTS_VALUE;
+
     return 0;
 }
 
@@ -155,25 +162,100 @@ static int libopus_decode(AVCodecContext *avc, void *data,
 {
     struct libopus_context *opus = avc->priv_data;
     AVFrame *frame               = data;
-    int ret, nb_samples;
+    uint8_t *outptr;
+    int ret, nb_samples = 0, nb_lost_samples = 0, nb_samples_left;
+
+    // If FEC is enabled, calculate number of lost samples
+    if (opus->decode_fec &&
+        opus->expected_next_pts != AV_NOPTS_VALUE &&
+        pkt->pts != AV_NOPTS_VALUE &&
+        pkt->pts != opus->expected_next_pts) {
+        // Cap at recovering 120 ms of lost audio.
+        nb_lost_samples = pkt->pts - opus->expected_next_pts;
+        nb_lost_samples = FFMIN(nb_lost_samples, MAX_FRAME_SIZE);
+        // pts is expressed in ms for some containers (e.g. mkv)
+        // FEC only works for SILK frames (> 10ms)
+        // Detect if nb_lost_samples is in ms, and convert in samples if it is
+        if (nb_lost_samples > 0) {
+            if (pkt->duration > 0 && pkt->duration < OPUS_SAMPLERATE * 10 / 1000) {
+                nb_lost_samples = nb_lost_samples * OPUS_SAMPLERATE / 1000;
+            }
+            // For FEC/PLC, frame_size has to be to have a multiple of 2.5 ms
+            if (nb_lost_samples % (int)(2.5 / 1000 * OPUS_SAMPLERATE)) {
+                nb_lost_samples -= nb_lost_samples % (int)(2.5 / 1000 * OPUS_SAMPLERATE);
+            }
+        }
+    }
 
-    frame->nb_samples = MAX_FRAME_SIZE;
+    frame->nb_samples = MAX_FRAME_SIZE + nb_lost_samples;
     if ((ret = ff_get_buffer(avc, frame, 0)) < 0)
         return ret;
 
+    outptr = frame->data[0];
+    nb_samples_left = frame->nb_samples;
+
+    if (opus->decode_fec && nb_lost_samples > 0) {
+        // Try to recover the lost samples with FEC data from this one.
+        // If there's no FEC data, the decoder will do loss concealment instead.
+        if (avc->sample_fmt == AV_SAMPLE_FMT_S16)
+            ret = opus_multistream_decode(opus->dec, pkt->data, pkt->size,
+                                                  (opus_int16 *)outptr,
+                                                  nb_lost_samples, 1);
+        else
+            ret = opus_multistream_decode_float(opus->dec, pkt->data, pkt->size,
+                                                       (float *)outptr,
+                                                       nb_lost_samples, 1);
+
+        if (ret < 0) {
+            if (opus->decode_fec) opus->expected_next_pts = pkt->pts + pkt->duration;
+            av_log(avc, AV_LOG_ERROR, "Decoding error: %s\n",
+                   opus_strerror(ret));
+            return ff_opus_error_to_averror(ret);
+        }
+
+        av_log(avc, AV_LOG_WARNING, "Recovered %d samples with FEC/PLC\n",
+                   ret);
+
+        outptr += ret * avc->channels * av_get_bytes_per_sample(avc->sample_fmt);
+        nb_samples_left -= ret;
+        nb_samples += ret;
+        if (pkt->pts != AV_NOPTS_VALUE) {
+            frame->pts = pkt->pts - ret;
+        }
+    }
+
+    // Decode the actual, non-lost data.
     if (avc->sample_fmt == AV_SAMPLE_FMT_S16)
-        nb_samples = opus_multistream_decode(opus->dec, pkt->data, pkt->size,
-                                             (opus_int16 *)frame->data[0],
-                                             frame->nb_samples, 0);
+        ret = opus_multistream_decode(opus->dec, pkt->data, pkt->size,
+                                      (opus_int16 *)outptr,
+                                      nb_samples_left, 0);
     else
-        nb_samples = opus_multistream_decode_float(opus->dec, pkt->data, pkt->size,
-                                                   (float *)frame->data[0],
-                                                   frame->nb_samples, 0);
+        ret = opus_multistream_decode_float(opus->dec, pkt->data, pkt->size,
+                                            (float *)outptr,
+                                            nb_samples_left, 0);
 
-    if (nb_samples < 0) {
+    if (ret < 0) {
+        if (opus->decode_fec) opus->expected_next_pts = pkt->pts + pkt->duration;
         av_log(avc, AV_LOG_ERROR, "Decoding error: %s\n",
-               opus_strerror(nb_samples));
-        return ff_opus_error_to_averror(nb_samples);
+               opus_strerror(ret));
+        return ff_opus_error_to_averror(ret);
+    }
+    nb_samples += ret;
+
+    av_log(avc, AV_LOG_WARNING, "Decoded %d samples normally\n", ret);
+
+    if (opus->decode_fec)
+    {
+        // Calculate the next expected pts
+        if (pkt->pts == AV_NOPTS_VALUE) {
+            opus->expected_next_pts = AV_NOPTS_VALUE;
+        } else {
+            if (pkt->duration) {
+                opus->expected_next_pts = pkt->pts + pkt->duration;
+            } else {
+                opus->expected_next_pts = pkt->pts + ret;
+            }
+        }
     }
 
 #ifndef OPUS_SET_GAIN
@@ -214,6 +296,7 @@ static const AVOption libopusdec_options[] = {
 #ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
     { "apply_phase_inv", "Apply intensity stereo phase inversion", OFFSET(apply_phase_inv), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
 #endif
+    { "decode_fec", "Decode FEC data or use PLC", OFFSET(decode_fec), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
     { NULL },
 };
 
-- 
2.25.1



More information about the ffmpeg-devel mailing list