[FFmpeg-devel] [PATCH 19/23] fftools/ffmpeg: add support for multiview video

Anton Khirnov anton at khirnov.net
Sat Sep 14 13:45:44 EEST 2024


This extends the syntax for specifying input streams in -map and complex
filtergraph labels, to allow selecting a view by view ID, index, or
position. The corresponding decoder is then set up to decode the
appropriate view and send frames for that view to the correct
filtergraph input(s).
---
 doc/ffmpeg.texi           |  30 ++-
 fftools/cmdutils.c        |   2 +-
 fftools/cmdutils.h        |   2 +
 fftools/ffmpeg.h          |  45 ++++-
 fftools/ffmpeg_dec.c      | 387 +++++++++++++++++++++++++++++++++++++-
 fftools/ffmpeg_demux.c    |  24 ++-
 fftools/ffmpeg_filter.c   |  71 ++++---
 fftools/ffmpeg_mux_init.c |  34 ++--
 fftools/ffmpeg_opt.c      |  70 ++++++-
 9 files changed, 610 insertions(+), 55 deletions(-)

diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 842e92ad1a..34007f7ea2 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -1799,7 +1799,7 @@ Set the size of the canvas used to render subtitles.
 @section Advanced options
 
 @table @option
- at item -map [-]@var{input_file_id}[:@var{stream_specifier}][?] | @var{[linklabel]} (@emph{output})
+ at item -map [-]@var{input_file_id}[:@var{stream_specifier}][:@var{view_specifier}][?] | @var{[linklabel]} (@emph{output})
 
 Create one or more streams in the output file. This option has two forms for
 specifying the data source(s): the first selects one or more streams from some
@@ -1814,6 +1814,26 @@ only those streams that match the specifier are used (see the
 A @code{-} character before the stream identifier creates a "negative" mapping.
 It disables matching streams from already created mappings.
 
+An optional @var{view_specifier} may be given after the stream specifier, which
+for multiview video specifies the view to be used. The view specifier may have
+one of the following formats:
+ at table @option
+ at item view:@var{view_id}
+select a view by its ID; @var{view_id} may be set to 'all' to use all the views
+interleaved into one stream;
+
+ at item vidx:@var{view_idx}
+select a view by its index; i.e. 0 is the base view, 1 is the first non-base
+view, etc.
+
+ at item vpos:@var{position}
+select a view by its display position; @var{position} may be @code{left} or
+ at code{right}
+ at end table
+The default for transcoding is to only use the base view, i.e. the equivalent of
+ at code{vidx:0}. For streamcopy, view specifiers are not supported and all views
+are always copied.
+
 A trailing @code{?} after the stream index will allow the map to be
 optional: if the map matches no streams the map will be ignored instead
 of failing. Note the map will still fail if an invalid input file index
@@ -2206,11 +2226,15 @@ distinguished by the format of the corresponding link label:
 @item
 To connect an input stream, use @code{[file_index:stream_specifier]} (i.e. the
 same syntax as @option{-map}). If @var{stream_specifier} matches multiple
-streams, the first one will be used.
+streams, the first one will be used. For multiview video, the stream specifier
+may be followed by the view specifier, see documentation for the @option{-map}
+option for its syntax.
 
 @item
 To connect a loopback decoder use [dec:@var{dec_idx}], where @var{dec_idx} is
-the index of the loopback decoder to be connected to given input.
+the index of the loopback decoder to be connected to given input. For multiview
+video, the decoder index may be followed by the view specifier, see
+documentation for the @option{-map} option for its syntax.
 
 @item
 To connect an output from another complex filtergraph, use its link label. E.g
diff --git a/fftools/cmdutils.c b/fftools/cmdutils.c
index 1573106d8b..76e5721cb1 100644
--- a/fftools/cmdutils.c
+++ b/fftools/cmdutils.c
@@ -988,7 +988,7 @@ FILE *get_preset_file(char *filename, size_t filename_size,
     return f;
 }
 
-static int cmdutils_isalnum(char c)
+int cmdutils_isalnum(char c)
 {
     return (c >= '0' && c <= '9') ||
            (c >= 'A' && c <= 'Z') ||
diff --git a/fftools/cmdutils.h b/fftools/cmdutils.h
index 9441d5726b..e7c8c9f86b 100644
--- a/fftools/cmdutils.h
+++ b/fftools/cmdutils.h
@@ -541,4 +541,6 @@ void remove_avoptions(AVDictionary **a, AVDictionary *b);
 /* Check if any keys exist in dictionary m */
 int check_avoptions(AVDictionary *m);
 
+int cmdutils_isalnum(char c);
+
 #endif /* FFTOOLS_CMDUTILS_H */
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index f4a10b2a66..733d551fa4 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -112,12 +112,32 @@ typedef struct HWDevice {
     AVBufferRef *device_ref;
 } HWDevice;
 
+enum ViewSpecifierType {
+    // no specifier given
+    VIEW_SPECIFIER_TYPE_NONE = 0,
+    // val is view index
+    VIEW_SPECIFIER_TYPE_IDX,
+    // val is view ID
+    VIEW_SPECIFIER_TYPE_ID,
+    // specify view by its position, val is AV_STEREO3D_VIEW_LEFT/RIGHT
+    VIEW_SPECIFIER_TYPE_POS,
+    // use all views, val is ignored
+    VIEW_SPECIFIER_TYPE_ALL,
+};
+
+typedef struct ViewSpecifier {
+    enum ViewSpecifierType type;
+    unsigned               val;
+} ViewSpecifier;
+
 /* select an input stream for an output stream */
 typedef struct StreamMap {
     int disabled;           /* 1 is this mapping is disabled by a negative map */
     int file_index;
     int stream_index;
     char *linklabel;       /* name of an output link, for mapping lavfi outputs */
+
+    ViewSpecifier vs;
 } StreamMap;
 
 typedef struct OptionsContext {
@@ -318,6 +338,10 @@ typedef struct OutputFilterOptions {
     const AVRational         *frame_rates;
     const enum AVColorSpace  *color_spaces;
     const enum AVColorRange  *color_ranges;
+
+    // for simple filtergraphs only, view specifier passed
+    // along to the decoder
+    const ViewSpecifier *vs;
 } OutputFilterOptions;
 
 typedef struct InputFilter {
@@ -817,7 +841,21 @@ void dec_free(Decoder **pdec);
  *
  * @param opts filtergraph input options, to be filled by this function
  */
-int dec_filter_add(Decoder *dec, InputFilter *ifilter, InputFilterOptions *opts);
+int dec_filter_add(Decoder *dec, InputFilter *ifilter, InputFilterOptions *opts,
+                   const ViewSpecifier *vs, SchedulerNode *src);
+
+/*
+ * For multiview video, request output of the view(s) determined by vs.
+ * May be called multiple times.
+ *
+ * If this function is never called, only the base view is output. If it is
+ * called at least once, only the views requested are output.
+ *
+ * @param src scheduler node from which the frames corresponding vs
+ *            will originate
+ */
+int dec_request_view(Decoder *dec, const ViewSpecifier *vs,
+                     SchedulerNode *src);
 
 int enc_alloc(Encoder **penc, const AVCodec *codec,
               Scheduler *sch, unsigned sch_idx);
@@ -847,7 +885,8 @@ void ifile_close(InputFile **f);
 
 int ist_output_add(InputStream *ist, OutputStream *ost);
 int ist_filter_add(InputStream *ist, InputFilter *ifilter, int is_simple,
-                   InputFilterOptions *opts);
+                   const ViewSpecifier *vs, InputFilterOptions *opts,
+                   SchedulerNode *src);
 
 /**
  * Find an unused input stream of given type.
@@ -875,6 +914,8 @@ void opt_match_per_stream_int64(void *logctx, const SpecifierOptList *sol,
 void opt_match_per_stream_dbl(void *logctx, const SpecifierOptList *sol,
                               AVFormatContext *fc, AVStream *st, double *out);
 
+int view_specifier_parse(const char **pspec, ViewSpecifier *vs);
+
 int muxer_thread(void *arg);
 int encoder_thread(void *arg);
 
diff --git a/fftools/ffmpeg_dec.c b/fftools/ffmpeg_dec.c
index 54f7223f0f..2723a0312e 100644
--- a/fftools/ffmpeg_dec.c
+++ b/fftools/ffmpeg_dec.c
@@ -16,6 +16,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <stdbit.h>
+
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/dict.h"
@@ -25,6 +27,7 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/pixfmt.h"
+#include "libavutil/stereo3d.h"
 #include "libavutil/time.h"
 #include "libavutil/timestamp.h"
 
@@ -39,6 +42,7 @@ typedef struct DecoderPriv {
     AVCodecContext     *dec_ctx;
 
     AVFrame            *frame;
+    AVFrame            *frame_tmp_ref;
     AVPacket           *pkt;
 
     // override output video sample aspect ratio with this value
@@ -77,6 +81,23 @@ typedef struct DecoderPriv {
     char                log_name[32];
     char               *parent_name;
 
+    // user specified decoder multiview options manually
+    int                 multiview_user_config;
+
+    struct {
+        ViewSpecifier   vs;
+        unsigned        out_idx;
+    }                  *views_requested;
+    int              nb_views_requested;
+
+    /* A map of view ID to decoder outputs.
+     * MUST NOT be accessed outside of get_format()/get_buffer() */
+    struct {
+        unsigned        id;
+        uintptr_t       out_mask;
+    }                  *view_map;
+    int              nb_view_map;
+
     struct {
         AVDictionary       *opts;
         const AVCodec      *codec;
@@ -106,6 +127,7 @@ void dec_free(Decoder **pdec)
     avcodec_free_context(&dp->dec_ctx);
 
     av_frame_free(&dp->frame);
+    av_frame_free(&dp->frame_tmp_ref);
     av_packet_free(&dp->pkt);
 
     av_dict_free(&dp->standalone_init.opts);
@@ -116,6 +138,9 @@ void dec_free(Decoder **pdec)
 
     av_freep(&dp->parent_name);
 
+    av_freep(&dp->views_requested);
+    av_freep(&dp->view_map);
+
     av_freep(pdec);
 }
 
@@ -357,7 +382,8 @@ fail:
     return err;
 }
 
-static int video_frame_process(DecoderPriv *dp, AVFrame *frame)
+static int video_frame_process(DecoderPriv *dp, AVFrame *frame,
+                               unsigned *outputs_mask)
 {
 #if FFMPEG_OPT_TOP
     if (dp->flags & DECODER_FLAG_TOP_FIELD_FIRST) {
@@ -419,6 +445,9 @@ static int video_frame_process(DecoderPriv *dp, AVFrame *frame)
         }
     }
 
+    if (frame->opaque)
+        *outputs_mask = (uintptr_t)frame->opaque;
+
     return 0;
 }
 
@@ -715,6 +744,7 @@ static int packet_decode(DecoderPriv *dp, AVPacket *pkt, AVFrame *frame)
 
     while (1) {
         FrameData *fd;
+        unsigned outputs_mask = 1;
 
         av_frame_unref(frame);
 
@@ -763,7 +793,7 @@ static int packet_decode(DecoderPriv *dp, AVPacket *pkt, AVFrame *frame)
 
             audio_ts_process(dp, frame);
         } else {
-            ret = video_frame_process(dp, frame);
+            ret = video_frame_process(dp, frame, &outputs_mask);
             if (ret < 0) {
                 av_log(dp, AV_LOG_FATAL,
                        "Error while processing the decoded data\n");
@@ -773,10 +803,28 @@ static int packet_decode(DecoderPriv *dp, AVPacket *pkt, AVFrame *frame)
 
         dp->dec.frames_decoded++;
 
-        ret = sch_dec_send(dp->sch, dp->sch_idx, 0, frame);
-        if (ret < 0) {
-            av_frame_unref(frame);
-            return ret == AVERROR_EOF ? AVERROR_EXIT : ret;
+        for (int i = 0; i < stdc_count_ones(outputs_mask); i++) {
+            AVFrame *to_send = frame;
+            int pos;
+
+            av_assert0(outputs_mask);
+            pos = stdc_trailing_zeros(outputs_mask);
+            outputs_mask &= ~(1U << pos);
+
+            // this is not the last output and sch_dec_send() consumes the frame
+            // given to it, so make a temporary reference
+            if (outputs_mask) {
+                to_send = dp->frame_tmp_ref;
+                ret = av_frame_ref(to_send, frame);
+                if (ret < 0)
+                    return ret;
+            }
+
+            ret = sch_dec_send(dp->sch, dp->sch_idx, pos, to_send);
+            if (ret < 0) {
+                av_frame_unref(to_send);
+                return ret == AVERROR_EOF ? AVERROR_EXIT : ret;
+            }
         }
     }
 }
@@ -975,10 +1023,307 @@ finish:
     return ret;
 }
 
+int dec_request_view(Decoder *d, const ViewSpecifier *vs,
+                     SchedulerNode *src)
+{
+    DecoderPriv *dp = dp_from_dec(d);
+    unsigned out_idx = 0;
+    int ret;
+
+    if (dp->multiview_user_config) {
+        if (!vs || vs->type == VIEW_SPECIFIER_TYPE_NONE) {
+            *src = SCH_DEC_OUT(dp->sch_idx, 0);
+            return 0;
+        }
+
+        av_log(dp, AV_LOG_ERROR,
+               "Manually selecting views with -view_ids cannot be combined "
+               "with view selection via stream specifiers. It is strongly "
+               "recommended you always use stream specifiers only.\n");
+        return AVERROR(EINVAL);
+    }
+
+    // when multiview_user_config is not set, NONE specifier is treated
+    // as requesting the base view
+    vs = (vs && vs->type != VIEW_SPECIFIER_TYPE_NONE) ? vs :
+         &(ViewSpecifier){ .type = VIEW_SPECIFIER_TYPE_IDX, .val = 0 };
+
+    // check if the specifier matches an already-existing one
+    for (int i = 0; i < dp->nb_views_requested; i++) {
+        const ViewSpecifier *vs1 = &dp->views_requested[i].vs;
+
+        if (vs->type == vs1->type &&
+            (vs->type == VIEW_SPECIFIER_TYPE_ALL || vs->val == vs1->val)) {
+            *src = SCH_DEC_OUT(dp->sch_idx, dp->views_requested[i].out_idx);
+            return 0;
+        }
+    }
+
+    // we use a bitmask to map view IDs to decoder outputs, which
+    // limits the number of outputs allowed
+    if (dp->nb_views_requested >= sizeof(dp->view_map[0].out_mask) * 8) {
+        av_log(dp, AV_LOG_ERROR, "Too many view specifiers\n");
+        return AVERROR(ENOSYS);
+    }
+
+    ret = GROW_ARRAY(dp->views_requested, dp->nb_views_requested);
+    if (ret < 0)
+        return ret;
+
+    if (dp->nb_views_requested > 1) {
+        ret = sch_add_dec_output(dp->sch, dp->sch_idx);
+        if (ret < 0)
+            return ret;
+        out_idx = ret;
+    }
+
+    dp->views_requested[dp->nb_views_requested - 1].out_idx = out_idx;
+    dp->views_requested[dp->nb_views_requested - 1].vs      = *vs;
+
+    *src = SCH_DEC_OUT(dp->sch_idx,
+                       dp->views_requested[dp->nb_views_requested - 1].out_idx);
+
+    return 0;
+}
+
+static int multiview_setup(DecoderPriv *dp, AVCodecContext *dec_ctx)
+{
+    unsigned views_wanted = 0;
+
+    unsigned nb_view_ids_av, nb_view_ids;
+    unsigned *view_ids_av = NULL, *view_pos_av = NULL;
+    int      *view_ids    = NULL;
+    int ret;
+
+    // no views/only base view were requested - do nothing
+    if (!dp->nb_views_requested ||
+        (dp->nb_views_requested == 1                               &&
+         dp->views_requested[0].vs.type == VIEW_SPECIFIER_TYPE_IDX &&
+         dp->views_requested[0].vs.val  == 0))
+        return 0;
+
+    av_freep(&dp->view_map);
+    dp->nb_view_map = 0;
+
+    // retrieve views available in current CVS
+    ret = av_opt_get_array_size(dec_ctx, "view_ids_available",
+                                AV_OPT_SEARCH_CHILDREN, &nb_view_ids_av);
+    if (ret < 0) {
+        av_log(dp, AV_LOG_ERROR,
+               "Multiview decoding requested, but decoder '%s' does not "
+               "support it\n", dec_ctx->codec->name);
+        return AVERROR(ENOSYS);
+    }
+
+    if (nb_view_ids_av) {
+        unsigned nb_view_pos_av;
+
+        if (nb_view_ids_av >= sizeof(views_wanted) * 8) {
+            av_log(dp, AV_LOG_ERROR, "Too many views in video: %u\n", nb_view_ids_av);
+            ret = AVERROR(ENOSYS);
+            goto fail;
+        }
+
+        view_ids_av = av_calloc(nb_view_ids_av, sizeof(*view_ids_av));
+        if (!view_ids_av) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        ret = av_opt_get_array(dec_ctx, "view_ids_available",
+                               AV_OPT_SEARCH_CHILDREN, 0, nb_view_ids_av,
+                               AV_OPT_TYPE_UINT, view_ids_av);
+        if (ret < 0)
+            goto fail;
+
+        ret = av_opt_get_array_size(dec_ctx, "view_pos_available",
+                                    AV_OPT_SEARCH_CHILDREN, &nb_view_pos_av);
+        if (ret >= 0 && nb_view_pos_av == nb_view_ids_av) {
+            view_pos_av = av_calloc(nb_view_ids_av, sizeof(*view_pos_av));
+            if (!view_pos_av) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            ret = av_opt_get_array(dec_ctx, "view_pos_available",
+                                   AV_OPT_SEARCH_CHILDREN, 0, nb_view_ids_av,
+                                   AV_OPT_TYPE_UINT, view_pos_av);
+            if (ret < 0)
+                goto fail;
+        }
+    } else {
+        // assume there is a single view with ID=0
+        nb_view_ids_av = 1;
+        view_ids_av = av_calloc(nb_view_ids_av, sizeof(*view_ids_av));
+        view_pos_av = av_calloc(nb_view_ids_av, sizeof(*view_pos_av));
+        if (!view_ids_av || !view_pos_av) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        view_pos_av[0] = AV_STEREO3D_VIEW_UNSPEC;
+    }
+
+    dp->view_map = av_calloc(nb_view_ids_av, sizeof(*dp->view_map));
+    if (!dp->view_map) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    dp->nb_view_map = nb_view_ids_av;
+
+    for (int i = 0; i < dp->nb_view_map; i++)
+        dp->view_map[i].id = view_ids_av[i];
+
+    // figure out which views should go to which output
+    for (int i = 0; i < dp->nb_views_requested; i++) {
+        const ViewSpecifier *vs = &dp->views_requested[i].vs;
+
+        switch (vs->type) {
+        case VIEW_SPECIFIER_TYPE_IDX:
+            if (vs->val >= nb_view_ids_av) {
+                av_log(dp, exit_on_error ? AV_LOG_ERROR : AV_LOG_WARNING,
+                       "View with index %u requested, but only %u views available "
+                       "in current video sequence (more views may or may not be "
+                       "available in later sequences).\n",
+                       vs->val, nb_view_ids_av);
+                if (exit_on_error) {
+                    ret = AVERROR(EINVAL);
+                    goto fail;
+                }
+
+                continue;
+            }
+            views_wanted                   |= 1U   << vs->val;
+            dp->view_map[vs->val].out_mask |= 1ULL << i;
+
+            break;
+        case VIEW_SPECIFIER_TYPE_ID: {
+            int view_idx = -1;
+
+            for (unsigned j = 0; j < nb_view_ids_av; j++) {
+                if (view_ids_av[j] == vs->val) {
+                    view_idx = j;
+                    break;
+                }
+            }
+            if (view_idx < 0) {
+                av_log(dp, exit_on_error ? AV_LOG_ERROR : AV_LOG_WARNING,
+                       "View with ID %u requested, but is not available "
+                       "in the video sequence\n", vs->val);
+                if (exit_on_error) {
+                    ret = AVERROR(EINVAL);
+                    goto fail;
+                }
+
+                continue;
+            }
+            views_wanted                    |= 1U   << view_idx;
+            dp->view_map[view_idx].out_mask |= 1ULL << i;
+
+            break;
+            }
+        case VIEW_SPECIFIER_TYPE_POS: {
+            int view_idx = -1;
+
+            for (unsigned j = 0; view_pos_av && j < nb_view_ids_av; j++) {
+                if (view_pos_av[j] == vs->val) {
+                    view_idx = j;
+                    break;
+                }
+            }
+            if (view_idx < 0) {
+                av_log(dp, exit_on_error ? AV_LOG_ERROR : AV_LOG_WARNING,
+                       "View position '%s' requested, but is not available "
+                       "in the video sequence\n", av_stereo3d_view_name(vs->val));
+                if (exit_on_error) {
+                    ret = AVERROR(EINVAL);
+                    goto fail;
+                }
+
+                continue;
+            }
+            views_wanted                    |= 1U   << view_idx;
+            dp->view_map[view_idx].out_mask |= 1ULL << i;
+
+            break;
+            }
+        case VIEW_SPECIFIER_TYPE_ALL:
+            views_wanted |= (1U << nb_view_ids_av) - 1;
+
+            for (int j = 0; j < dp->nb_view_map; j++)
+                dp->view_map[j].out_mask |= 1ULL << i;
+
+            break;
+        }
+    }
+    if (!views_wanted) {
+        av_log(dp, AV_LOG_ERROR, "No views were selected for decoding\n");
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    // signal to decoder which views we want
+    nb_view_ids = stdc_count_ones(views_wanted);
+    view_ids = av_malloc_array(nb_view_ids, sizeof(*view_ids));
+    if (!view_ids) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (unsigned i = 0; i < nb_view_ids; i++) {
+        int pos;
+
+        av_assert0(views_wanted);
+        pos = stdc_trailing_zeros(views_wanted);
+        views_wanted &= ~(1U << pos);
+
+        view_ids[i] = view_ids_av[pos];
+    }
+
+    // unset view_ids in case we set it earlier
+    av_opt_set(dec_ctx, "view_ids", NULL, AV_OPT_SEARCH_CHILDREN);
+
+    ret = av_opt_set_array(dec_ctx, "view_ids", AV_OPT_SEARCH_CHILDREN,
+                           0, nb_view_ids, AV_OPT_TYPE_INT, view_ids);
+    if (ret < 0)
+        goto fail;
+
+    if (!dp->frame_tmp_ref) {
+        dp->frame_tmp_ref = av_frame_alloc();
+        if (!dp->frame_tmp_ref) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+    }
+
+fail:
+    av_freep(&view_ids_av);
+    av_freep(&view_pos_av);
+    av_freep(&view_ids);
+
+    return ret;
+}
+
+static void multiview_check_manual(DecoderPriv *dp, const AVDictionary *dec_opts)
+{
+    if (av_dict_get(dec_opts, "view_ids", NULL, 0)) {
+        av_log(dp, AV_LOG_WARNING, "Manually selecting views with -view_ids "
+               "is not recommended, use view specifiers instead\n");
+        dp->multiview_user_config = 1;
+    }
+}
+
 static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat *pix_fmts)
 {
     DecoderPriv  *dp = s->opaque;
     const enum AVPixelFormat *p;
+    int ret;
+
+    ret = multiview_setup(dp, s);
+    if (ret < 0) {
+        av_log(dp, AV_LOG_ERROR, "Error setting up multiview decoding: %s\n",
+               av_err2str(ret));
+        return AV_PIX_FMT_NONE;
+    }
 
     for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(*p);
@@ -1009,6 +1354,26 @@ static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat
     return *p;
 }
 
+static int get_buffer(AVCodecContext *dec_ctx, AVFrame *frame, int flags)
+{
+    DecoderPriv *dp = dec_ctx->opaque;
+
+    // for multiview video, store the output mask in frame opaque
+    if (dp->nb_view_map) {
+        const AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_VIEW_ID);
+        int view_id = sd ? *(int*)sd->data : 0;
+
+        for (int i = 0; i < dp->nb_view_map; i++) {
+            if (dp->view_map[i].id == view_id) {
+                frame->opaque = (void*)dp->view_map[i].out_mask;
+                break;
+            }
+        }
+    }
+
+    return avcodec_default_get_buffer2(dec_ctx, frame, flags);
+}
+
 static HWDevice *hw_device_match_by_codec(const AVCodec *codec)
 {
     const AVCodecHWConfig *config;
@@ -1202,6 +1567,7 @@ static int dec_open(DecoderPriv *dp, AVDictionary **dec_opts,
 
     dp->dec_ctx->opaque                = dp;
     dp->dec_ctx->get_format            = get_format;
+    dp->dec_ctx->get_buffer2           = get_buffer;
     dp->dec_ctx->pkt_timebase          = o->time_base;
 
     if (!av_dict_get(*dec_opts, "threads", NULL, 0))
@@ -1291,6 +1657,8 @@ int dec_init(Decoder **pdec, Scheduler *sch,
     if (ret < 0)
         return ret;
 
+    multiview_check_manual(dp, *dec_opts);
+
     ret = dec_open(dp, dec_opts, o, param_out);
     if (ret < 0)
         goto fail;
@@ -1363,6 +1731,8 @@ int dec_create(const OptionsContext *o, const char *arg, Scheduler *sch)
     if (ret < 0)
         return ret;
 
+    multiview_check_manual(dp, dp->standalone_init.opts);
+
     if (o->codec_names.nb_opt) {
         const char *name = o->codec_names.opt[o->codec_names.nb_opt - 1].u.str;
         dp->standalone_init.codec = avcodec_find_decoder_by_name(name);
@@ -1375,7 +1745,8 @@ int dec_create(const OptionsContext *o, const char *arg, Scheduler *sch)
     return 0;
 }
 
-int dec_filter_add(Decoder *d, InputFilter *ifilter, InputFilterOptions *opts)
+int dec_filter_add(Decoder *d, InputFilter *ifilter, InputFilterOptions *opts,
+                   const ViewSpecifier *vs, SchedulerNode *src)
 {
     DecoderPriv *dp = dp_from_dec(d);
     char name[16];
@@ -1385,5 +1756,5 @@ int dec_filter_add(Decoder *d, InputFilter *ifilter, InputFilterOptions *opts)
     if (!opts->name)
         return AVERROR(ENOMEM);
 
-    return dp->sch_idx;
+    return dec_request_view(d, vs, src);
 }
diff --git a/fftools/ffmpeg_demux.c b/fftools/ffmpeg_demux.c
index 476efff127..7eb8ecdd89 100644
--- a/fftools/ffmpeg_demux.c
+++ b/fftools/ffmpeg_demux.c
@@ -874,7 +874,8 @@ void ifile_close(InputFile **pf)
     av_freep(pf);
 }
 
-static int ist_use(InputStream *ist, int decoding_needed)
+static int ist_use(InputStream *ist, int decoding_needed,
+                   const ViewSpecifier *vs, SchedulerNode *src)
 {
     Demuxer      *d = demuxer_from_ifile(ist->file);
     DemuxStream *ds = ds_from_ist(ist);
@@ -961,15 +962,26 @@ static int ist_use(InputStream *ist, int decoding_needed)
         d->have_audio_dec |= is_audio;
     }
 
+    if (decoding_needed && ist->par->codec_type == AVMEDIA_TYPE_VIDEO) {
+        ret = dec_request_view(ist->decoder, vs, src);
+        if (ret < 0)
+            return ret;
+    } else {
+        *src = decoding_needed                             ?
+               SCH_DEC_OUT(ds->sch_idx_dec, 0)             :
+               SCH_DSTREAM(d->f.index, ds->sch_idx_stream);
+    }
+
     return 0;
 }
 
 int ist_output_add(InputStream *ist, OutputStream *ost)
 {
     DemuxStream *ds = ds_from_ist(ist);
+    SchedulerNode src;
     int ret;
 
-    ret = ist_use(ist, ost->enc ? DECODING_FOR_OST : 0);
+    ret = ist_use(ist, ost->enc ? DECODING_FOR_OST : 0, NULL, &src);
     if (ret < 0)
         return ret;
 
@@ -983,14 +995,16 @@ int ist_output_add(InputStream *ist, OutputStream *ost)
 }
 
 int ist_filter_add(InputStream *ist, InputFilter *ifilter, int is_simple,
-                   InputFilterOptions *opts)
+                   const ViewSpecifier *vs, InputFilterOptions *opts,
+                   SchedulerNode *src)
 {
     Demuxer      *d = demuxer_from_ifile(ist->file);
     DemuxStream *ds = ds_from_ist(ist);
     int64_t tsoffset = 0;
     int ret;
 
-    ret = ist_use(ist, is_simple ? DECODING_FOR_OST : DECODING_FOR_FILTER);
+    ret = ist_use(ist, is_simple ? DECODING_FOR_OST : DECODING_FOR_FILTER,
+                  vs, src);
     if (ret < 0)
         return ret;
 
@@ -1074,7 +1088,7 @@ int ist_filter_add(InputStream *ist, InputFilter *ifilter, int is_simple,
     opts->flags |= IFILTER_FLAG_AUTOROTATE * !!(ds->autorotate) |
                    IFILTER_FLAG_REINIT     * !!(ds->reinit_filters);
 
-    return ds->sch_idx_dec;
+    return 0;
 }
 
 static int choose_decoder(const OptionsContext *o, void *logctx,
diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
index 05f9b287fb..329f972780 100644
--- a/fftools/ffmpeg_filter.c
+++ b/fftools/ffmpeg_filter.c
@@ -659,11 +659,13 @@ static OutputFilter *ofilter_alloc(FilterGraph *fg, enum AVMediaType type)
     return ofilter;
 }
 
-static int ifilter_bind_ist(InputFilter *ifilter, InputStream *ist)
+static int ifilter_bind_ist(InputFilter *ifilter, InputStream *ist,
+                            const ViewSpecifier *vs)
 {
     InputFilterPriv *ifp = ifp_from_ifilter(ifilter);
     FilterGraphPriv *fgp = fgp_from_fg(ifilter->graph);
-    int ret, dec_idx;
+    SchedulerNode src;
+    int ret;
 
     av_assert0(!ifp->bound);
     ifp->bound = 1;
@@ -681,13 +683,13 @@ static int ifilter_bind_ist(InputFilter *ifilter, InputStream *ist)
     if (!ifp->opts.fallback)
         return AVERROR(ENOMEM);
 
-    dec_idx = ist_filter_add(ist, ifilter, filtergraph_is_simple(ifilter->graph),
-                             &ifp->opts);
-    if (dec_idx < 0)
-        return dec_idx;
+    ret = ist_filter_add(ist, ifilter, filtergraph_is_simple(ifilter->graph),
+                         vs, &ifp->opts, &src);
+    if (ret < 0)
+        return ret;
 
-    ret = sch_connect(fgp->sch, SCH_DEC_OUT(dec_idx, 0),
-                                SCH_FILTER_IN(fgp->sch_idx, ifp->index));
+    ret = sch_connect(fgp->sch,
+                      src, SCH_FILTER_IN(fgp->sch_idx, ifp->index));
     if (ret < 0)
         return ret;
 
@@ -712,10 +714,12 @@ static int ifilter_bind_ist(InputFilter *ifilter, InputStream *ist)
     return 0;
 }
 
-static int ifilter_bind_dec(InputFilterPriv *ifp, Decoder *dec)
+static int ifilter_bind_dec(InputFilterPriv *ifp, Decoder *dec,
+                            const ViewSpecifier *vs)
 {
     FilterGraphPriv *fgp = fgp_from_fg(ifp->ifilter.graph);
-    int ret, dec_idx;
+    SchedulerNode src;
+    int ret;
 
     av_assert0(!ifp->bound);
     ifp->bound = 1;
@@ -728,12 +732,11 @@ static int ifilter_bind_dec(InputFilterPriv *ifp, Decoder *dec)
 
     ifp->type_src = ifp->type;
 
-    dec_idx = dec_filter_add(dec, &ifp->ifilter, &ifp->opts);
-    if (dec_idx < 0)
-        return dec_idx;
+    ret = dec_filter_add(dec, &ifp->ifilter, &ifp->opts, vs, &src);
+    if (ret < 0)
+        return ret;
 
-    ret = sch_connect(fgp->sch, SCH_DEC_OUT(dec_idx, 0),
-                                SCH_FILTER_IN(fgp->sch_idx, ifp->index));
+    ret = sch_connect(fgp->sch, src, SCH_FILTER_IN(fgp->sch_idx, ifp->index));
     if (ret < 0)
         return ret;
 
@@ -1216,7 +1219,7 @@ int init_simple_filtergraph(InputStream *ist, OutputStream *ost,
 
     ost->filter = fg->outputs[0];
 
-    ret = ifilter_bind_ist(fg->inputs[0], ist);
+    ret = ifilter_bind_ist(fg->inputs[0], ist, opts->vs);
     if (ret < 0)
         return ret;
 
@@ -1240,28 +1243,38 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter)
     InputFilterPriv *ifp = ifp_from_ifilter(ifilter);
     InputStream *ist = NULL;
     enum AVMediaType type = ifp->type;
+    ViewSpecifier vs = { .type = VIEW_SPECIFIER_TYPE_NONE };
+    const char *spec;
+    char *p;
     int i, ret;
 
     if (ifp->linklabel && !strncmp(ifp->linklabel, "dec:", 4)) {
         // bind to a standalone decoder
         int dec_idx;
 
-        dec_idx = strtol(ifp->linklabel + 4, NULL, 0);
+        dec_idx = strtol(ifp->linklabel + 4, &p, 0);
         if (dec_idx < 0 || dec_idx >= nb_decoders) {
             av_log(fg, AV_LOG_ERROR, "Invalid decoder index %d in filtergraph description %s\n",
                    dec_idx, fgp->graph_desc);
             return AVERROR(EINVAL);
         }
 
-        ret = ifilter_bind_dec(ifp, decoders[dec_idx]);
+        if (type == AVMEDIA_TYPE_VIDEO) {
+            spec = *p == ':' ? p + 1 : p;
+            ret = view_specifier_parse(&spec, &vs);
+            if (ret < 0)
+                return ret;
+        }
+
+        ret = ifilter_bind_dec(ifp, decoders[dec_idx], &vs);
         if (ret < 0)
             av_log(fg, AV_LOG_ERROR, "Error binding a decoder to filtergraph input %s\n",
                    ifilter->name);
         return ret;
     } else if (ifp->linklabel) {
+        StreamSpecifier ss;
         AVFormatContext *s;
         AVStream       *st = NULL;
-        char *p;
         int file_idx;
 
         // try finding an unbound filtergraph output with this label
@@ -1298,17 +1311,33 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter)
         }
         s = input_files[file_idx]->ctx;
 
+        ret = stream_specifier_parse(&ss, *p == ':' ? p + 1 : p, 1, fg);
+        if (ret < 0) {
+            av_log(fg, AV_LOG_ERROR, "Invalid stream specifier: %s\n", p);
+            return ret;
+        }
+
+        if (type == AVMEDIA_TYPE_VIDEO) {
+            spec = ss.remainder ? ss.remainder : "";
+            ret = view_specifier_parse(&spec, &vs);
+            if (ret < 0) {
+                stream_specifier_uninit(&ss);
+                return ret;
+            }
+        }
+
         for (i = 0; i < s->nb_streams; i++) {
             enum AVMediaType stream_type = s->streams[i]->codecpar->codec_type;
             if (stream_type != type &&
                 !(stream_type == AVMEDIA_TYPE_SUBTITLE &&
                   type == AVMEDIA_TYPE_VIDEO /* sub2video hack */))
                 continue;
-            if (check_stream_specifier(s, s->streams[i], *p == ':' ? p + 1 : p) == 1) {
+            if (stream_specifier_match(&ss, s, s->streams[i], fg)) {
                 st = s->streams[i];
                 break;
             }
         }
+        stream_specifier_uninit(&ss);
         if (!st) {
             av_log(fg, AV_LOG_FATAL, "Stream specifier '%s' in filtergraph description %s "
                    "matches no streams.\n", p, fgp->graph_desc);
@@ -1333,7 +1362,7 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter)
     }
     av_assert0(ist);
 
-    ret = ifilter_bind_ist(ifilter, ist);
+    ret = ifilter_bind_ist(ifilter, ist, &vs);
     if (ret < 0) {
         av_log(fg, AV_LOG_ERROR,
                "Error binding an input stream to complex filtergraph input %s.\n",
diff --git a/fftools/ffmpeg_mux_init.c b/fftools/ffmpeg_mux_init.c
index 771fb2ef7b..aa1fcb6a42 100644
--- a/fftools/ffmpeg_mux_init.c
+++ b/fftools/ffmpeg_mux_init.c
@@ -920,7 +920,8 @@ static int
 ost_bind_filter(const Muxer *mux, MuxStream *ms, OutputFilter *ofilter,
                 const OptionsContext *o, char *filters,
                 AVRational enc_tb, enum VideoSyncMethod vsync_method,
-                int keep_pix_fmt, int autoscale, int threads_manual)
+                int keep_pix_fmt, int autoscale, int threads_manual,
+                const ViewSpecifier *vs)
 {
     OutputStream       *ost = &ms->ost;
     AVCodecContext *enc_ctx = ost->enc_ctx;
@@ -946,6 +947,7 @@ ost_bind_filter(const Muxer *mux, MuxStream *ms, OutputFilter *ofilter,
         .trim_duration_us = mux->of.recording_time,
         .ts_offset        = mux->of.start_time == AV_NOPTS_VALUE ?
                             0 : mux->of.start_time,
+        .vs               = vs,
 
         .flags = OFILTER_FLAG_DISABLE_CONVERT * !!keep_pix_fmt |
                  OFILTER_FLAG_AUTOSCALE       * !!autoscale    |
@@ -1140,7 +1142,7 @@ fail:
 }
 
 static int ost_add(Muxer *mux, const OptionsContext *o, enum AVMediaType type,
-                   InputStream *ist, OutputFilter *ofilter,
+                   InputStream *ist, OutputFilter *ofilter, const ViewSpecifier *vs,
                    OutputStream **post)
 {
     AVFormatContext *oc = mux->fc;
@@ -1500,7 +1502,7 @@ static int ost_add(Muxer *mux, const OptionsContext *o, enum AVMediaType type,
     if (ost->enc &&
         (type == AVMEDIA_TYPE_VIDEO || type == AVMEDIA_TYPE_AUDIO)) {
         ret = ost_bind_filter(mux, ms, ofilter, o, filters, enc_tb, vsync_method,
-                              keep_pix_fmt, autoscale, threads_manual);
+                              keep_pix_fmt, autoscale, threads_manual, vs);
         if (ret < 0)
             goto fail;
     } else if (ost->ist) {
@@ -1602,7 +1604,7 @@ static int map_auto_video(Muxer *mux, const OptionsContext *o)
        }
     }
     if (best_ist)
-        return ost_add(mux, o, AVMEDIA_TYPE_VIDEO, best_ist, NULL, NULL);
+        return ost_add(mux, o, AVMEDIA_TYPE_VIDEO, best_ist, NULL, NULL, NULL);
 
     return 0;
 }
@@ -1646,7 +1648,7 @@ static int map_auto_audio(Muxer *mux, const OptionsContext *o)
        }
     }
     if (best_ist)
-        return ost_add(mux, o, AVMEDIA_TYPE_AUDIO, best_ist, NULL, NULL);
+        return ost_add(mux, o, AVMEDIA_TYPE_AUDIO, best_ist, NULL, NULL, NULL);
 
     return 0;
 }
@@ -1683,7 +1685,7 @@ static int map_auto_subtitle(Muxer *mux, const OptionsContext *o)
                 input_descriptor && output_descriptor &&
                 (!input_descriptor->props ||
                  !output_descriptor->props)) {
-                return ost_add(mux, o, AVMEDIA_TYPE_SUBTITLE, ist, NULL, NULL);
+                return ost_add(mux, o, AVMEDIA_TYPE_SUBTITLE, ist, NULL, NULL, NULL);
             }
         }
 
@@ -1704,7 +1706,7 @@ static int map_auto_data(Muxer *mux, const OptionsContext *o)
             continue;
         if (ist->st->codecpar->codec_type == AVMEDIA_TYPE_DATA &&
             ist->st->codecpar->codec_id == codec_id) {
-            int ret = ost_add(mux, o, AVMEDIA_TYPE_DATA, ist, NULL, NULL);
+            int ret = ost_add(mux, o, AVMEDIA_TYPE_DATA, ist, NULL, NULL, NULL);
             if (ret < 0)
                 return ret;
         }
@@ -1746,10 +1748,13 @@ loop_end:
         av_log(mux, AV_LOG_VERBOSE, "Creating output stream from an explicitly "
                "mapped complex filtergraph %d, output [%s]\n", fg->index, map->linklabel);
 
-        ret = ost_add(mux, o, ofilter->type, NULL, ofilter, NULL);
+        ret = ost_add(mux, o, ofilter->type, NULL, ofilter, NULL, NULL);
         if (ret < 0)
             return ret;
     } else {
+        const ViewSpecifier *vs = map->vs.type == VIEW_SPECIFIER_TYPE_NONE ?
+                                  NULL : &map->vs;
+
         ist = input_files[map->file_index]->streams[map->stream_index];
         if (ist->user_set_discard == AVDISCARD_ALL) {
             av_log(mux, AV_LOG_FATAL, "Stream #%d:%d is disabled and cannot be mapped.\n",
@@ -1780,7 +1785,14 @@ loop_end:
             return 0;
         }
 
-        ret = ost_add(mux, o, ist->st->codecpar->codec_type, ist, NULL, NULL);
+        if (vs && ist->st->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
+            av_log(mux, AV_LOG_ERROR,
+                   "View specifier given for mapping a %s input stream\n",
+                   av_get_media_type_string(ist->st->codecpar->codec_type));
+            return AVERROR(EINVAL);
+        }
+
+        ret = ost_add(mux, o, ist->st->codecpar->codec_type, ist, NULL, vs, NULL);
         if (ret < 0)
             return ret;
     }
@@ -1850,7 +1862,7 @@ read_fail:
             return AVERROR(ENOMEM);
         }
 
-        err = ost_add(mux, o, AVMEDIA_TYPE_ATTACHMENT, NULL, NULL, &ost);
+        err = ost_add(mux, o, AVMEDIA_TYPE_ATTACHMENT, NULL, NULL, NULL, &ost);
         if (err < 0) {
             av_free(attachment_filename);
             av_freep(&attachment);
@@ -1905,7 +1917,7 @@ static int create_streams(Muxer *mux, const OptionsContext *o)
                        av_get_media_type_string(ofilter->type));
             av_log(mux, AV_LOG_VERBOSE, "\n");
 
-            ret = ost_add(mux, o, ofilter->type, NULL, ofilter, NULL);
+            ret = ost_add(mux, o, ofilter->type, NULL, ofilter, NULL, NULL);
             if (ret < 0)
                 return ret;
         }
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index 1aa187f706..f639a1cf0a 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -46,6 +46,7 @@
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/stereo3d.h"
 
 HWDevice *filter_hw_device;
 
@@ -228,6 +229,59 @@ OPT_MATCH_PER_STREAM(int,   int,          OPT_TYPE_INT,    i);
 OPT_MATCH_PER_STREAM(int64, int64_t,      OPT_TYPE_INT64,  i64);
 OPT_MATCH_PER_STREAM(dbl,   double,       OPT_TYPE_DOUBLE, dbl);
 
+int view_specifier_parse(const char **pspec, ViewSpecifier *vs)
+{
+    const char *spec = *pspec;
+    char *endptr;
+
+    vs->type = VIEW_SPECIFIER_TYPE_NONE;
+
+    if (!strncmp(spec, "view:", 5)) {
+        spec += 5;
+
+        if (!strncmp(spec, "all", 3)) {
+            spec += 3;
+            vs->type = VIEW_SPECIFIER_TYPE_ALL;
+        } else {
+            vs->type = VIEW_SPECIFIER_TYPE_ID;
+            vs->val  = strtoul(spec, &endptr, 0);
+            if (endptr == spec) {
+                av_log(NULL, AV_LOG_ERROR, "Invalid view ID: %s\n", spec);
+                return AVERROR(EINVAL);
+            }
+            spec = endptr;
+        }
+    } else if (!strncmp(spec, "vidx:", 5)) {
+        spec += 5;
+        vs->type = VIEW_SPECIFIER_TYPE_IDX;
+        vs->val  = strtoul(spec, &endptr, 0);
+        if (endptr == spec) {
+            av_log(NULL, AV_LOG_ERROR, "Invalid view index: %s\n", spec);
+            return AVERROR(EINVAL);
+        }
+        spec = endptr;
+    } else if (!strncmp(spec, "vpos:", 5)) {
+        spec += 5;
+        vs->type = VIEW_SPECIFIER_TYPE_POS;
+
+        if (!strncmp(spec, "left", 4) && !cmdutils_isalnum(spec[4])) {
+            spec += 4;
+            vs->val = AV_STEREO3D_VIEW_LEFT;
+        } else if (!strncmp(spec, "right", 5) && !cmdutils_isalnum(spec[5])) {
+            spec += 5;
+            vs->val = AV_STEREO3D_VIEW_RIGHT;
+        } else {
+            av_log(NULL, AV_LOG_ERROR, "Invalid view position: %s\n", spec);
+            return AVERROR(EINVAL);
+        }
+    } else
+        return 0;
+
+    *pspec = spec;
+
+    return 0;
+}
+
 int parse_and_set_vsync(const char *arg, int *vsync_var, int file_idx, int st_idx, int is_global)
 {
     if      (!av_strcasecmp(arg, "cfr"))         *vsync_var = VSYNC_CFR;
@@ -452,6 +506,7 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
             goto fail;
         }
     } else {
+        ViewSpecifier vs;
         char *endptr;
 
         file_idx = strtol(arg, &endptr, 0);
@@ -468,12 +523,18 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
             goto fail;
         }
 
-        if (ss.remainder) {
-            if (!strcmp(ss.remainder, "?"))
+        arg = ss.remainder ? ss.remainder : "";
+
+        ret = view_specifier_parse(&arg, &vs);
+        if (ret < 0)
+            goto fail;
+
+        if (*arg) {
+            if (!strcmp(arg, "?"))
                 allow_unused = 1;
             else {
-                av_log(NULL, AV_LOG_ERROR, "Trailing garbage after stream specifier: %s\n",
-                       ss.remainder);
+                av_log(NULL, AV_LOG_ERROR,
+                       "Trailing garbage after stream specifier: %s\n", arg);
                 ret = AVERROR(EINVAL);
                 goto fail;
             }
@@ -509,6 +570,7 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
 
                 m->file_index   = file_idx;
                 m->stream_index = i;
+                m->vs           = vs;
             }
     }
 
-- 
2.43.0



More information about the ffmpeg-devel mailing list