[FFmpeg-devel] [PATCH 2/4] avcodec/encode: restructure the core encoding code

Thu Feb 27 20:02:00 EET 2020

This commit follows the same logic as 061a0c14bb, but for the encode API: The
new public encoding API will no longer be a wrapper around the old deprecated
one, and the internal API used by the encoders now consists of a single
receive_packet() callback that pulls frames as required.

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavcodec/avcodec.h  |  12 +-
 libavcodec/encode.c   | 268 ++++++++++++++++++++++++++++++++----------
 libavcodec/encode.h   |  39 ++++++
 libavcodec/internal.h |   7 +-
 libavcodec/utils.c    |  12 +-
 5 files changed, 268 insertions(+), 70 deletions(-)
 create mode 100644 libavcodec/encode.h

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 894a9e5565..9d22390dd3 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3641,14 +3641,10 @@ typedef struct AVCodec {
     int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, AVPacket *avpkt);
     int (*close)(AVCodecContext *);
     /**
-     * Encode API with decoupled packet/frame dataflow. The API is the
-     * same as the avcodec_ prefixed APIs (avcodec_send_frame() etc.), except
-     * that:
-     * - never called if the codec is closed or the wrong type,
-     * - if AV_CODEC_CAP_DELAY is not set, drain frames are never sent,
-     * - only one drain frame is ever passed down,
-     */
-    int (*send_frame)(AVCodecContext *avctx, const AVFrame *frame);
+     * Encode API with decoupled frame/packet dataflow. This function is called
+     * to get one output packet. It should call ff_encode_get_packet() to obtain
+     * input data.
+     */
     int (*receive_packet)(AVCodecContext *avctx, AVPacket *avpkt);
 
     /**
diff --git a/libavcodec/encode.c b/libavcodec/encode.c
index 9ed2cf0f59..a887a0ab55 100644
--- a/libavcodec/encode.c
+++ b/libavcodec/encode.c
@@ -26,6 +26,7 @@
 #include "libavutil/samplefmt.h"
 
 #include "avcodec.h"
+#include "encode.h"
 #include "frame_thread_encoder.h"
 #include "internal.h"
 
@@ -359,101 +360,250 @@ int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
     return ret;
 }
 
-static int do_encode(AVCodecContext *avctx, const AVFrame *frame, int *got_packet)
+int ff_encode_get_frame(AVCodecContext *avctx, AVFrame *frame)
 {
+    AVCodecInternal *avci = avctx->internal;
+
+    if (avci->draining)
+        return AVERROR_EOF;
+
+    if (!avci->buffer_frame->buf[0])
+        return AVERROR(EAGAIN);
+
+    av_frame_move_ref(frame, avci->buffer_frame);
+
+    return 0;
+}
+
+static int encode_simple_internal(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    AVCodecInternal   *avci = avctx->internal;
+    EncodeSimpleContext *es = &avci->es;
+    AVFrame          *frame = es->in_frame;
+    AVFrame   *padded_frame = NULL;
+    int got_packet;
     int ret;
-    *got_packet = 0;
 
-    av_packet_unref(avctx->internal->buffer_pkt);
-    avctx->internal->buffer_pkt_valid = 0;
+    if (!frame->buf[0] && !avci->draining) {
+        av_frame_unref(frame);
+        ret = ff_encode_get_frame(avctx, frame);
+        if (ret < 0 && ret != AVERROR_EOF)
+            return ret;
+    }
 
-    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
-        ret = avcodec_encode_video2(avctx, avctx->internal->buffer_pkt,
-                                    frame, got_packet);
-    } else if (avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
-        ret = avcodec_encode_audio2(avctx, avctx->internal->buffer_pkt,
-                                    frame, got_packet);
-    } else {
-        ret = AVERROR(EINVAL);
+    if (avci->draining_done)
+        return AVERROR_EOF;
+
+    if (!frame->buf[0]) {
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
+              avctx->active_thread_type & FF_THREAD_FRAME))
+            return AVERROR_EOF;
+
+        // Flushing is signaled with a NULL frame
+        frame = NULL;
+    }
+
+    got_packet = 0;
+
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if ((avctx->flags & AV_CODEC_FLAG_PASS1) && avctx->stats_out)
+            avctx->stats_out[0] = '\0';
+
+        if (av_image_check_size2(avctx->width, avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx)) {
+            ret = AVERROR(EINVAL);
+            goto end;
+        }
+        if (frame) {
+            if (frame->format == AV_PIX_FMT_NONE)
+                av_log(avctx, AV_LOG_WARNING, "AVFrame.format is not set\n");
+            if (frame->width == 0 || frame->height == 0)
+                av_log(avctx, AV_LOG_WARNING, "AVFrame.width or height is not set\n");
+        }
+    } else if (frame && avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+        /* extract audio service type metadata */
+        AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_AUDIO_SERVICE_TYPE);
+        if (sd && sd->size >= sizeof(enum AVAudioServiceType))
+            avctx->audio_service_type = *(enum AVAudioServiceType*)sd->data;
+
+        /* check for valid frame size */
+        if (avctx->codec->capabilities & AV_CODEC_CAP_SMALL_LAST_FRAME) {
+            if (frame->nb_samples > avctx->frame_size) {
+                av_log(avctx, AV_LOG_ERROR, "more samples than frame size (avcodec_encode_audio2)\n");
+                ret = AVERROR(EINVAL);
+                goto end;
+            }
+        } else if (!(avctx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)) {
+            /* if we already got an undersized frame, that must have been the last */
+            if (avctx->internal->last_audio_frame) {
+                av_log(avctx, AV_LOG_ERROR, "frame_size (%d) was not respected for a non-last frame (avcodec_encode_audio2)\n", avctx->frame_size);
+                ret = AVERROR(EINVAL);
+                goto end;
+            }
+
+            if (frame->nb_samples < avctx->frame_size) {
+                ret = pad_last_frame(avctx, &padded_frame, frame);
+                if (ret < 0)
+                    goto end;
+
+                av_frame_unref(frame);
+                frame = padded_frame;
+                avctx->internal->last_audio_frame = 1;
+            }
+
+            if (frame->nb_samples != avctx->frame_size) {
+                av_log(avctx, AV_LOG_ERROR, "nb_samples (%d) != frame_size (%d) (avcodec_encode_audio2)\n", frame->nb_samples, avctx->frame_size);
+                ret = AVERROR(EINVAL);
+                goto end;
+            }
+        }
+    }
+
+    av_assert0(avctx->codec->encode2);
+
+    if (CONFIG_FRAME_THREAD_ENCODER &&
+        avci->frame_thread_encoder && (avctx->active_thread_type&FF_THREAD_FRAME))
+        ret = ff_thread_video_encode_frame(avctx, avpkt, frame, &got_packet);
+    else
+        ret = avctx->codec->encode2(avctx, avpkt, frame, &got_packet);
+
+    av_assert0(ret <= 0);
+
+    emms_c();
+
+    if (!ret && got_packet) {
+        if (avpkt->data) {
+            ret = av_packet_make_refcounted(avpkt);
+            if (ret < 0)
+                goto end;
+        }
+
+        if (frame && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
+                       avctx->active_thread_type & FF_THREAD_FRAME)) {
+            if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+                avpkt->pts = avpkt->dts = frame->pts;
+            } else if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+                if (avpkt->pts == AV_NOPTS_VALUE)
+                    avpkt->pts = frame->pts;
+                if (!avpkt->duration)
+                    avpkt->duration = ff_samples_to_time_base(avctx,
+                                                              frame->nb_samples);
+            }
+        }
+        if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
+            /* NOTE: if we add any audio encoders which output non-keyframe packets,
+             *       this needs to be moved to the encoders, but for now we can do it
+             *       here to simplify things */
+            avpkt->flags |= AV_PKT_FLAG_KEY;
+            avpkt->dts = avpkt->pts;
+        }
+    }
+
+    if (avci->draining && !got_packet)
+        avci->draining_done = 1;
+
+end:
+    if (ret < 0 || !got_packet)
+        av_packet_unref(avpkt);
+
+    if (frame) {
+        if (!ret)
+            avctx->frame_number++;
+        av_frame_unref(frame);
     }
 
-    if (ret >= 0 && *got_packet) {
+    av_frame_free(&padded_frame);
+
+    if (got_packet)
         // Encoders must always return ref-counted buffers.
         // Side-data only packets have no data and can be not ref-counted.
-        av_assert0(!avctx->internal->buffer_pkt->data || avctx->internal->buffer_pkt->buf);
-        avctx->internal->buffer_pkt_valid = 1;
-        ret = 0;
-    } else {
-        av_packet_unref(avctx->internal->buffer_pkt);
+        av_assert0(!avpkt->data || avpkt->buf);
+
+    return ret;
+}
+
+static int encode_simple_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    int ret;
+
+    while (!avpkt->data && !avpkt->side_data) {
+        ret = encode_simple_internal(avctx, avpkt);
+        if (ret < 0)
+            return ret;
     }
 
+    return 0;
+}
+
+static int encode_receive_packet_internal(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
+    av_assert0(!avpkt->data && !avpkt->side_data);
+
+    if (avctx->codec->receive_packet) {
+        ret = avctx->codec->receive_packet(avctx, avpkt);
+        if (!ret)
+            // Encoders must always return ref-counted buffers.
+            // Side-data only packets have no data and can be not ref-counted.
+            av_assert0(!avpkt->data || avpkt->buf);
+    } else
+        ret = encode_simple_receive_packet(avctx, avpkt);
+
+    if (ret == AVERROR_EOF)
+        avci->draining_done = 1;
+
     return ret;
 }
 
 int attribute_align_arg avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame)
 {
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
     if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
         return AVERROR(EINVAL);
 
-    if (avctx->internal->draining)
+    if (avci->draining)
         return AVERROR_EOF;
 
-    if (!frame) {
-        avctx->internal->draining = 1;
+    if (avci->buffer_frame->data[0])
+        return AVERROR(EAGAIN);
 
-        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
-            return 0;
+    if (!frame) {
+        avci->draining = 1;
+    } else {
+        ret = av_frame_ref(avci->buffer_frame, frame);
+        if (ret < 0)
+            return ret;
     }
 
-    if (avctx->codec->send_frame)
-        return avctx->codec->send_frame(avctx, frame);
-
-    // Emulation via old API. Do it here instead of avcodec_receive_packet, because:
-    // 1. if the AVFrame is not refcounted, the copying will be much more
-    //    expensive than copying the packet data
-    // 2. assume few users use non-refcounted AVPackets, so usually no copy is
-    //    needed
-
-    if (avctx->internal->buffer_pkt_valid)
-        return AVERROR(EAGAIN);
+    if (!avci->buffer_pkt->data && !avci->buffer_pkt->side_data) {
+        ret = encode_receive_packet_internal(avctx, avci->buffer_pkt);
+        if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
+            return ret;
+    }
 
-    return do_encode(avctx, frame, &(int){0});
+    return 0;
 }
 
 int attribute_align_arg avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
 {
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+
     av_packet_unref(avpkt);
 
     if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
         return AVERROR(EINVAL);
 
-    if (avctx->codec->receive_packet) {
-        int ret;
-        if (avctx->internal->draining && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
-            return AVERROR_EOF;
-        ret = avctx->codec->receive_packet(avctx, avpkt);
-        if (!ret)
-            // Encoders must always return ref-counted buffers.
-            // Side-data only packets have no data and can be not ref-counted.
-            av_assert0(!avpkt->data || avpkt->buf);
-        return ret;
-    }
-
-    // Emulation via old API.
-
-    if (!avctx->internal->buffer_pkt_valid) {
-        int got_packet;
-        int ret;
-        if (!avctx->internal->draining)
-            return AVERROR(EAGAIN);
-        ret = do_encode(avctx, NULL, &got_packet);
+    if (avci->buffer_pkt->data || avci->buffer_pkt->side_data) {
+        av_packet_move_ref(avpkt, avci->buffer_pkt);
+    } else {
+        ret = encode_receive_packet_internal(avctx, avpkt);
         if (ret < 0)
             return ret;
-        if (ret >= 0 && !got_packet)
-            return AVERROR_EOF;
     }
 
-    av_packet_move_ref(avpkt, avctx->internal->buffer_pkt);
-    avctx->internal->buffer_pkt_valid = 0;
     return 0;
 }
diff --git a/libavcodec/encode.h b/libavcodec/encode.h
new file mode 100644
index 0000000000..2eef31251a
--- /dev/null
+++ b/libavcodec/encode.h
@@ -0,0 +1,39 @@
+/*
+ * generic encoding-related code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ENCODE_H
+#define AVCODEC_ENCODE_H
+
+#include "libavutil/frame.h"
+
+#include "avcodec.h"
+
+/**
+ * Called by encoders to get the next frame for encoding.
+ *
+ * @param frame An empty frame to be filled with data.
+ * @return 0 if a new reference has been successfully written to frame
+ *         AVERROR(EAGAIN) if no data is currently available
+ *         AVERROR_EOF if and end of stream has been reached, so no more data
+ *                     will be available
+ */
+int ff_encode_get_frame(AVCodecContext *avctx, AVFrame *frame);
+
+#endif /* AVCODEC_ENCODE_H */
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index bccd9222d4..8b97e08e9f 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -132,6 +132,10 @@ typedef struct DecodeFilterContext {
     int         nb_bsfs;
 } DecodeFilterContext;
 
+typedef struct EncodeSimpleContext {
+    AVFrame *in_frame;
+} EncodeSimpleContext;
+
 typedef struct AVCodecInternal {
     /**
      * Whether the parent AVCodecContext is a copy of the context which had
@@ -171,6 +175,8 @@ typedef struct AVCodecInternal {
     DecodeSimpleContext ds;
     DecodeFilterContext filter;
 
+    EncodeSimpleContext es;
+
     /**
      * Properties (timestamps+side data) extracted from the last packet passed
      * for decoding.
@@ -204,7 +210,6 @@ typedef struct AVCodecInternal {
      * buffers for using new encode/decode API through legacy API
      */
     AVPacket *buffer_pkt;
-    int buffer_pkt_valid; // encoding: packet without data can be valid
     AVFrame *buffer_frame;
     int draining_done;
     /* set to 1 when the caller is using the old decoding API */
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 5257a1c627..ccc08c2dfb 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -93,7 +93,7 @@ void av_fast_padded_mallocz(void *ptr, unsigned int *size, size_t min_size)
 
 int av_codec_is_encoder(const AVCodec *codec)
 {
-    return codec && (codec->encode_sub || codec->encode2 ||codec->send_frame);
+    return codec && (codec->encode_sub || codec->encode2 || codec->receive_packet);
 }
 
 int av_codec_is_decoder(const AVCodec *codec)
@@ -607,6 +607,12 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
         goto free_and_end;
     }
 
+    avci->es.in_frame = av_frame_alloc();
+    if (!avctx->internal->es.in_frame) {
+        ret = AVERROR(ENOMEM);
+        goto free_and_end;
+    }
+
     avci->buffer_pkt = av_packet_alloc();
     if (!avci->buffer_pkt) {
         ret = AVERROR(ENOMEM);
@@ -1074,6 +1080,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
         av_packet_free(&avci->last_pkt_props);
 
         av_packet_free(&avci->ds.in_pkt);
+        av_frame_free(&avci->es.in_frame);
         ff_decode_bsfs_uninit(avctx);
 
         av_freep(&avci->pool);
@@ -1094,9 +1101,9 @@ void avcodec_flush_buffers(AVCodecContext *avctx)
     av_frame_unref(avci->buffer_frame);
     av_frame_unref(avci->compat_decode_frame);
     av_packet_unref(avci->buffer_pkt);
-    avci->buffer_pkt_valid = 0;
 
     av_packet_unref(avci->ds.in_pkt);
+    av_frame_unref(avci->es.in_frame);
 
     if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
         ff_thread_flush(avctx);
@@ -1162,6 +1169,7 @@ av_cold int avcodec_close(AVCodecContext *avctx)
         av_packet_free(&avctx->internal->last_pkt_props);
 
         av_packet_free(&avctx->internal->ds.in_pkt);
+        av_frame_free(&avctx->internal->es.in_frame);
 
         for (i = 0; i < FF_ARRAY_ELEMS(pool->pools); i++)
             av_buffer_pool_uninit(&pool->pools[i]);
-- 
2.25.1