[FFmpeg-devel] [PATCH] doc/examples/muxing: code rewrite with improved readability and fixed issues

Leo Izen leo.izen at gmail.com
Sat Jun 18 18:17:57 EEST 2022


On 6/18/22 08:06, Paolo Prete wrote:
> Please review this. It's a code rewrite of doc/examples/muxing.c which improves readability and fixes issues.
> From 8a4e942a001ae49dc052899f331ed43abf954dda Mon Sep 17 00:00:00 2001
> From: paolo <paolopr976 at gmail.com>
> Date: Sat, 18 Jun 2022 13:53:55 +0200
> Subject: [PATCH] doc/examples/muxing: code rewrite with improved readability
>  and fixed issues
This commit message is too long, you can just truncate "and fixed 
issues" since it doesn't say much.

> 
>                Improved readability with functions that have clearer prototypes and that don't mix logically unrelated blocks of code
> 
>                Fixed issues in case of unsupported extensions
> 
>                Fixed memory leaks on errors, which are now properly propagated to the main() function
> 
>                Fixed issue on raw images output
> 
>                fprintf() replaced with av_log()
> 
>                Input A/V parameters exposed in the main() function and easier to customize
> ---
>  doc/examples/muxing.c | 905 +++++++++++++++++++-----------------------
>  1 file changed, 406 insertions(+), 499 deletions(-)
> 
> diff --git a/doc/examples/muxing.c b/doc/examples/muxing.c
> index 3acb778322..04739995d8 100644
> --- a/doc/examples/muxing.c
> +++ b/doc/examples/muxing.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2003 Fabrice Bellard
> + * Copyright (c) 2022 Paolo Prete (paolopr976 at gmail.com) after Fabrice Bellard
Don't remove the original copyright, just add yourself below it. You 
also don't need to put your email address in the copyright line.
>   *
>   * Permission is hereby granted, free of charge, to any person obtaining a copy
>   * of this software and associated documentation files (the "Software"), to deal
> @@ -24,625 +24,532 @@
>   * @file
>   * libavformat API example.
>   *
> - * Output a media file in any supported libavformat format. The default
> + * Output a media file in a set of supported libavformat formats. The default
>   * codecs are used.
>   * @example muxing.c
>   */
>  
> -#include <stdlib.h>
> -#include <stdio.h>
> -#include <string.h>
> -#include <math.h>
> -
> -#include <libavutil/avassert.h>
> -#include <libavutil/channel_layout.h>
> -#include <libavutil/opt.h>
> -#include <libavutil/mathematics.h>
> -#include <libavutil/timestamp.h>
>  #include <libavcodec/avcodec.h>
>  #include <libavformat/avformat.h>
> -#include <libswscale/swscale.h>
> +#include <libavutil/timestamp.h>Make sure these stay sorted.
>  #include <libswresample/swresample.h>
> +#include <libswscale/swscale.h>
>  
> -#define STREAM_DURATION   10.0
> -#define STREAM_FRAME_RATE 25 /* 25 images/s */
> -#define STREAM_PIX_FMT    AV_PIX_FMT_YUV420P /* default pix_fmt */
> -
> -#define SCALE_FLAGS SWS_BICUBIC
> +#define VIDEO_FRAME_RATE 25 /* 25 images/s */
> +#define VIDEO_SCALE_FLAGS SWS_BICUBIC
> +#define STREAM_DURATION 10.0 /* 10 seconds */
>  
> -// a wrapper around a single output AVStream
> -typedef struct OutputStream {
> -    AVStream *st;
> -    AVCodecContext *enc;
> +static void log_error(const char *s, int *num)
> +{
> +    if (num)
> +        av_log(NULL, AV_LOG_ERROR, "%s (error '%s')\n", s, av_err2str(*num));
> +    else
> +        av_log(NULL, AV_LOG_ERROR, "%s\n", s);
> +}
This does not need to be a pointer. Convention is that negative values 
are errors and nonnegative values are not. So you could always use 
something like: if (num < 0).

> -    /* pts of the next frame that will be generated */
> -    int64_t next_pts;
> -    int samples_count;
> +static int mux_encoded_pkt(AVPacket *out_pkt, AVFormatContext *out_fmt_ctx,
> +                              enum AVMediaType type)
> +{
> +    int ret;
> +    AVRational enc_time_base, str_time_base;
>  
> -    AVFrame *frame;
> -    AVFrame *tmp_frame;
> +    if (out_fmt_ctx->streams[0]->codecpar->codec_type == type)
> +        out_pkt->stream_index = 0;
> +    else if ((out_fmt_ctx->nb_streams > 1) && (type == AVMEDIA_TYPE_VIDEO))
> +        out_pkt->stream_index = 1;
> +    str_time_base = out_fmt_ctx->streams[out_pkt->stream_index]->time_base;
>  
> -    AVPacket *tmp_pkt;
> +    if (type == AVMEDIA_TYPE_AUDIO)
> +        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[0];
> +    else
> +        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[1];
> 
Why are you referencing the opaque elements of out_fmt_ctx?

> -    float t, tincr, tincr2;
> +    av_packet_rescale_ts(out_pkt, enc_time_base, str_time_base);
>  
> -    struct SwsContext *sws_ctx;
> -    struct SwrContext *swr_ctx;
> -} OutputStream;
> +    av_log(NULL, AV_LOG_INFO, "stream_index=%d, size=%d, pts_time=%s\n",
> +           out_pkt->stream_index,
> +           out_pkt->size, av_ts2timestr(out_pkt->pts, &str_time_base));
>  
> -static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
> -{
> -    AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
> +    if ((ret = av_interleaved_write_frame(out_fmt_ctx, out_pkt)) < 0)
> +        log_error("Error calling av_interleaved_write_frame()", &ret);
>  
> -    printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
> -           av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
> -           av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
> -           av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
> -           pkt->stream_index);
> +    return ret;
>  }
>  
> -static int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
> -                       AVStream *st, AVFrame *frame, AVPacket *pkt)
> +static int is_extension_supported(const char *filename)
Why are you artificially limiting what is permitted?
>  {
> -    int ret;
> +    const char *extensions[] = {".aac", ".avi", ".bmp", ".jpeg", ".mka",
> +                                ".mkv", ".mov", ".mp4", ".flv",  ".ts"};
> +    int i, size = sizeof(extensions) / sizeof(extensions[0]);
> +    char *dot = strrchr(filename, '.');
>  
> -    // send the frame to the encoder
> -    ret = avcodec_send_frame(c, frame);
> -    if (ret < 0) {
> -        fprintf(stderr, "Error sending a frame to the encoder: %s\n",
> -                av_err2str(ret));
> -        exit(1);
> +    for (i = 0; i < size; i++) {
> +        if (dot && !strcmp(dot, extensions[i]))
> +            return 1;
>      }
>  
> -    while (ret >= 0) {
> -        ret = avcodec_receive_packet(c, pkt);
> -        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
> -            break;
> -        else if (ret < 0) {
> -            fprintf(stderr, "Error encoding a frame: %s\n", av_err2str(ret));
> -            exit(1);
> -        }
> +    log_error("File extension not supported", NULL);
> +    av_log(NULL, AV_LOG_WARNING, "Please choose one of the following extensions: ");
> +    for (i = 0; i < size - 1; i++)
> +        av_log(NULL, AV_LOG_WARNING, "%s, ", extensions[i]);
> +    av_log(NULL, AV_LOG_WARNING, "%s\n", extensions[size-1]);
>  
> -        /* rescale output packet timestamp values from codec to stream timebase */
> -        av_packet_rescale_ts(pkt, c->time_base, st->time_base);
> -        pkt->stream_index = st->index;
> -
> -        /* Write the compressed frame to the media file. */
> -        log_packet(fmt_ctx, pkt);
> -        ret = av_interleaved_write_frame(fmt_ctx, pkt);
> -        /* pkt is now blank (av_interleaved_write_frame() takes ownership of
> -         * its contents and resets pkt), so that no unreferencing is necessary.
> -         * This would be different if one used av_write_frame(). */
> -        if (ret < 0) {
> -            fprintf(stderr, "Error while writing output packet: %s\n", av_err2str(ret));
> -            exit(1);
> -        }
> -    }
> -
> -    return ret == AVERROR_EOF ? 1 : 0;
> +    return 0;
>  }
>  
> -/* Add an output stream. */
> -static void add_stream(OutputStream *ost, AVFormatContext *oc,
> -                       const AVCodec **codec,
> -                       enum AVCodecID codec_id)
> +static int get_default_enc_params(AVCodecParameters *params,
> +                                  const char *fname, enum AVMediaType type)
>  {
> -    AVCodecContext *c;
> -    int i;
> -
> -    /* find the encoder */
> -    *codec = avcodec_find_encoder(codec_id);
> -    if (!(*codec)) {
> -        fprintf(stderr, "Could not find encoder for '%s'\n",
> -                avcodec_get_name(codec_id));
> -        exit(1);
> +    AVFormatContext *tmp_fctx;
> +    enum AVCodecID id;
> +    const AVCodec *c;
> +    int ret = 0;
> +
> +    if ((ret = avformat_alloc_output_context2(&tmp_fctx, NULL, NULL, fname)) < 0) {
> +        log_error("Could not get default encoder", &ret);
> +        return AVERROR_EXIT;
>      }
>  
> -    ost->tmp_pkt = av_packet_alloc();
> -    if (!ost->tmp_pkt) {
> -        fprintf(stderr, "Could not allocate AVPacket\n");
> -        exit(1);
> -    }
> +    id = (type == AVMEDIA_TYPE_AUDIO) ? tmp_fctx->oformat->audio_codec :
> +                                        tmp_fctx->oformat->video_codec;
>  
> -    ost->st = avformat_new_stream(oc, NULL);
> -    if (!ost->st) {
> -        fprintf(stderr, "Could not allocate stream\n");
> -        exit(1);
> -    }
> -    ost->st->id = oc->nb_streams-1;
> -    c = avcodec_alloc_context3(*codec);
> -    if (!c) {
> -        fprintf(stderr, "Could not alloc an encoding context\n");
> -        exit(1);
> +    if (!(c = avcodec_find_encoder(id))) {
> +        avformat_free_context(tmp_fctx);
> +        return ret;
You probably don't want to return "ret" here as you don't assign it.

>      }
> -    ost->enc = c;
> -
> -    switch ((*codec)->type) {
> -    case AVMEDIA_TYPE_AUDIO:
> -        c->sample_fmt  = (*codec)->sample_fmts ?
> -            (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
> -        c->bit_rate    = 64000;
> -        c->sample_rate = 44100;
> -        if ((*codec)->supported_samplerates) {
> -            c->sample_rate = (*codec)->supported_samplerates[0];
> -            for (i = 0; (*codec)->supported_samplerates[i]; i++) {
> -                if ((*codec)->supported_samplerates[i] == 44100)
> -                    c->sample_rate = 44100;
> -            }
> -        }
> -        av_channel_layout_copy(&c->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO);
> -        ost->st->time_base = (AVRational){ 1, c->sample_rate };
> -        break;
> -
> -    case AVMEDIA_TYPE_VIDEO:
> -        c->codec_id = codec_id;
> -
> -        c->bit_rate = 400000;
> -        /* Resolution must be a multiple of two. */
> -        c->width    = 352;
> -        c->height   = 288;
> -        /* timebase: This is the fundamental unit of time (in seconds) in terms
> -         * of which frame timestamps are represented. For fixed-fps content,
> -         * timebase should be 1/framerate and timestamp increments should be
> -         * identical to 1. */
> -        ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
> -        c->time_base       = ost->st->time_base;
> -
> -        c->gop_size      = 12; /* emit one intra frame every twelve frames at most */
> -        c->pix_fmt       = STREAM_PIX_FMT;
> -        if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
> -            /* just for testing, we also add B-frames */
> -            c->max_b_frames = 2;
> -        }
> -        if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
> -            /* Needed to avoid using macroblocks in which some coeffs overflow.
> -             * This does not happen with normal video, it just happens here as
> -             * the motion of the chroma plane does not match the luma plane. */
> -            c->mb_decision = 2;
> -        }
> -        break;
>  
> -    default:
> -        break;
> +    params->codec_type = c->type;
> +    params->codec_id   = c-> id;
> +    if (c->type == AVMEDIA_TYPE_AUDIO) {
> +        params->format      = c->sample_fmts ?
> +                              c->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
> +        params->ch_layout   = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
> +        params->sample_rate = c->supported_samplerates ?
> +                              c->supported_samplerates[0] : 44100;
> +    } else if (c->type == AVMEDIA_TYPE_VIDEO) {
> +        params->format = c->pix_fmts ? c->pix_fmts[0] : AV_PIX_FMT_YUV420P;
>      }
> +    avformat_free_context(tmp_fctx);
>  
> -    /* Some formats want stream headers to be separate. */
> -    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
> -        c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
> +    return ret;
Again, why are you returning ret if you are not assigning to it?

>  }
>  
> -/**************************************************************/
> -/* audio output */
> -
> -static AVFrame *alloc_audio_frame(enum AVSampleFormat sample_fmt,
> -                                  const AVChannelLayout *channel_layout,
> -                                  int sample_rate, int nb_samples)
> +static int init_encoder(AVCodecContext **enc_ctx, AVCodecParameters *params)
>  {
> -    AVFrame *frame = av_frame_alloc();
> +    const AVCodec *codec = NULL;
>      int ret;
>  
> -    if (!frame) {
> -        fprintf(stderr, "Error allocating an audio frame\n");
> -        exit(1);
> +    codec = avcodec_find_encoder(params->codec_id);
> +    *enc_ctx = avcodec_alloc_context3(codec);
Don't attempt to allocate anything until after you check if the codec is 
found.
> +    if (!codec) {
> +        log_error("Could not allocate the encoding context", NULL);
This error message does not match the check, which is if the codec is found.
> +        return AVERROR_EXIT;
return AVERROR_CODEC_NOT_FOUND;
>      }
>  
> -    frame->format = sample_fmt;
> -    av_channel_layout_copy(&frame->ch_layout, channel_layout);
> -    frame->sample_rate = sample_rate;
> -    frame->nb_samples = nb_samples;
> -
> -    if (nb_samples) {
> -        ret = av_frame_get_buffer(frame, 0);
> -        if (ret < 0) {
> -            fprintf(stderr, "Error allocating an audio buffer\n");
> -            exit(1);
> -        }
> +    (*enc_ctx)->codec_id   = params->codec_id;
> +    (*enc_ctx)->codec_type = params->codec_type;
> +    if (params->codec_type == AVMEDIA_TYPE_AUDIO) {
> +        (*enc_ctx)->sample_fmt  = params->format;
> +        (*enc_ctx)->sample_rate = params->sample_rate;
> +        (*enc_ctx)->time_base   = (AVRational){1, params->sample_rate};
Use av_make_q to avoid casting.
> +        (*enc_ctx)->ch_layout   = params->ch_layout;
> +    } else if (params->codec_type == AVMEDIA_TYPE_VIDEO) {
> +        (*enc_ctx)->width      = params->width;
> +        (*enc_ctx)->height     = params->height;
> +        (*enc_ctx)->time_base  = (AVRational){ 1, VIDEO_FRAME_RATE };Better off just definine VIDEO_FRAME_RATE_Q to be an AVRational with 
value {1, 25};
> +        (*enc_ctx)->gop_size   = 12;
> +        (*enc_ctx)->pix_fmt    = params->format;
>      }
>  
> -    return frame;
> +    if ((ret = avcodec_open2(*enc_ctx, codec, NULL)) < 0) {
> +        log_error("Could not open input codec", &ret);
> +        return ret;
> +    } else
> +        return 0;This violates the coding style, you need to use braces {} for the else 
block if you also use it for the if block.
>  }
>  
> -static void open_audio(AVFormatContext *oc, const AVCodec *codec,
> -                       OutputStream *ost, AVDictionary *opt_arg)
> +static int init_avframe(AVFrame **frame, AVCodecParameters *params)
>  {
> -    AVCodecContext *c;
> -    int nb_samples;
>      int ret;
> -    AVDictionary *opt = NULL;
>  
> -    c = ost->enc;
> +    if (!(*frame = av_frame_alloc())) {
> +        log_error("Could not allocate AVFrame", NULL);
> +        return AVERROR(ENOMEM);
> +    }
>  
> -    /* open it */
> -    av_dict_copy(&opt, opt_arg, 0);
> -    ret = avcodec_open2(c, codec, &opt);
> -    av_dict_free(&opt);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));
> -        exit(1);
> +    (*frame)->opaque = &params->codec_type;
> +    if (params->codec_type == AVMEDIA_TYPE_AUDIO) {
> +        (*frame)->nb_samples  = params->frame_size;
> +        (*frame)->sample_rate = params->sample_rate;
> +        (*frame)->format      = params->format;
> +        (*frame)->ch_layout   = params->ch_layout;
> +    } else {
> +        (*frame)->width  = params->width;
> +        (*frame)->height = params->height;
> +        (*frame)->format = params->format;
>      }
>  
> -    /* init signal generator */
> -    ost->t     = 0;
> -    ost->tincr = 2 * M_PI * 110.0 / c->sample_rate;
> -    /* increment frequency by 110 Hz per second */
> -    ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate;
> +    /* Allocate the frame's data buffer */
> +    if ((ret = av_frame_get_buffer(*frame, 0)) < 0) {
> +        log_error("Could not allocate buffer for AVFrame", &ret);
> +        return AVERROR(ENOMEM);
> +    } else
> +        return 0;
You don't need the else block here at all.
> +}
>  
> -    if (c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
> -        nb_samples = 10000;
> -    else
> -        nb_samples = c->frame_size;
> +static int init_audio_convert(struct SwrContext **ctx, AVCodecParameters *in_params,
> +                              AVCodecParameters *out_params)
> +{
> +    swr_alloc_set_opts2(ctx,
> +                        &(out_params->ch_layout),
> +                        out_params->format, out_params->sample_rate,
> +                        &(in_params->ch_layout),
> +                        in_params->format, in_params->sample_rate,
> +                        0, NULL);
> +    if (!*ctx) {
> +        log_error("Could not allocate resample context", NULL);
> +        return AVERROR(ENOMEM);
> +    } else
> +        return 0;
> +}
You don't need the else block here at all.
>  
> -    ost->frame     = alloc_audio_frame(c->sample_fmt, &c->ch_layout,
> -                                       c->sample_rate, nb_samples);
> -    ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, &c->ch_layout,
> -                                       c->sample_rate, nb_samples);
> +static int init_video_convert(struct SwsContext **ctx, AVCodecParameters *in_params,
> +                              AVCodecParameters *out_params)
> +{
This paper-thin wrapper function is unnecessary, just inline it.
> +    *ctx = sws_getContext(in_params->width, in_params->height,
> +                          in_params->format,
> +                          out_params->width, out_params->height,
> +                          out_params->codec_id == out_params->format,
> +                          VIDEO_SCALE_FLAGS, NULL, NULL, NULL);
> +    if (!*ctx) {
> +        log_error("Could not allocate scale context", NULL);
> +        return AVERROR(ENOMEM);
> +    } else
> +        return 0;
> +}
>  
> -    /* copy the stream parameters to the muxer */
> -    ret = avcodec_parameters_from_context(ost->st->codecpar, c);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not copy the stream parameters\n");
> -        exit(1);
> -    }
> +static int init_muxer(AVFormatContext **out_fmt_ctx, AVCodecContext *audio_enc_ctx,
> +                      AVCodecContext *video_enc_ctx, const char *filename)
> +{
> +    int ret;
> +    AVStream *out_audio_str, *out_video_str;
>  
> -    /* create resampler context */
> -    ost->swr_ctx = swr_alloc();
> -    if (!ost->swr_ctx) {
> -        fprintf(stderr, "Could not allocate resampler context\n");
> -        exit(1);
> +    if ((ret = avformat_alloc_output_context2(out_fmt_ctx, NULL, NULL, filename)) < 0) {
> +        log_error("Could not create output context", &ret);
> +        return ret;
>      }
>  
> -    /* set options */
> -    av_opt_set_chlayout  (ost->swr_ctx, "in_chlayout",       &c->ch_layout,      0);
> -    av_opt_set_int       (ost->swr_ctx, "in_sample_rate",     c->sample_rate,    0);
> -    av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt",      AV_SAMPLE_FMT_S16, 0);
> -    av_opt_set_chlayout  (ost->swr_ctx, "out_chlayout",      &c->ch_layout,      0);
> -    av_opt_set_int       (ost->swr_ctx, "out_sample_rate",    c->sample_rate,    0);
> -    av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt",     c->sample_fmt,     0);
> -
> -    /* initialize the resampling context */
> -    if ((ret = swr_init(ost->swr_ctx)) < 0) {
> -        fprintf(stderr, "Failed to initialize the resampling context\n");
> -        exit(1);
> +    /* open the output file, if needed */
> +    if (!((*out_fmt_ctx)->oformat->flags & AVFMT_NOFILE)) {
> +        if ((ret = avio_open(&(*out_fmt_ctx)->pb, filename, AVIO_FLAG_WRITE)) < 0) {
> +            log_error("Could not open output file", &ret);
> +            return ret;
> +        }
>      }
> -}
>  
> -/* Prepare a 16 bit dummy audio frame of 'frame_size' samples and
> - * 'nb_channels' channels. */
> -static AVFrame *get_audio_frame(OutputStream *ost)
> -{
> -    AVFrame *frame = ost->tmp_frame;
> -    int j, i, v;
> -    int16_t *q = (int16_t*)frame->data[0];
> -
> -    /* check if we want to generate more frames */
> -    if (av_compare_ts(ost->next_pts, ost->enc->time_base,
> -                      STREAM_DURATION, (AVRational){ 1, 1 }) > 0)
> -        return NULL;
> +    if (audio_enc_ctx) {
> +        if (!(out_audio_str = avformat_new_stream(*out_fmt_ctx, NULL))) {
> +            log_error("Could not create new stream", NULL);
> +            return AVERROR(ENOMEM);
> +        }
> +        out_audio_str->id = (*out_fmt_ctx)->nb_streams - 1;
> +        avcodec_parameters_from_context(out_audio_str->codecpar, audio_enc_ctx);
> +    }
>  
> -    for (j = 0; j <frame->nb_samples; j++) {
> -        v = (int)(sin(ost->t) * 10000);
> -        for (i = 0; i < ost->enc->ch_layout.nb_channels; i++)
> -            *q++ = v;
> -        ost->t     += ost->tincr;
> -        ost->tincr += ost->tincr2;
> +    if (video_enc_ctx) {
> +        if (!(out_video_str = avformat_new_stream(*out_fmt_ctx, NULL))) {
> +            log_error("Could not create new stream", NULL);
> +            return AVERROR(ENOMEM);
> +        }
> +        out_video_str->id = (*out_fmt_ctx)->nb_streams - 1;
> +        avcodec_parameters_from_context(out_video_str->codecpar, video_enc_ctx);
>      }
>  
> -    frame->pts = ost->next_pts;
> -    ost->next_pts  += frame->nb_samples;
> +    av_dump_format(*out_fmt_ctx, 0, filename, 1);
>  
> -    return frame;
> +    /* Write the stream header, if any. */
> +    if (avformat_write_header(*out_fmt_ctx, NULL) < 0) {
> +        log_error("avformat_write_header() error", NULL);
> +        return AVERROR_EXIT;
> +    } else
> +        return 0;This else block is unnecessary.
>  }
>  
> -/*
> - * encode one audio frame and send it to the muxer
> - * return 1 when encoding is finished, 0 otherwise
> - */
> -static int write_audio_frame(AVFormatContext *oc, OutputStream *ost)
> +static void fill_dummy_s16_frame(AVFrame *frame)
>  {
> -    AVCodecContext *c;
> -    AVFrame *frame;
> -    int ret;
> -    int dst_nb_samples;
> -
> -    c = ost->enc;
> -
> -    frame = get_audio_frame(ost);
> -
> -    if (frame) {
> -        /* convert samples from native format to destination codec format, using the resampler */
> -        /* compute destination number of samples */
> -        dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples,
> -                                        c->sample_rate, c->sample_rate, AV_ROUND_UP);
> -        av_assert0(dst_nb_samples == frame->nb_samples);
> -
> -        /* when we pass a frame to the encoder, it may keep a reference to it
> -         * internally;
> -         * make sure we do not overwrite it here
> -         */
> -        ret = av_frame_make_writable(ost->frame);
> -        if (ret < 0)
> -            exit(1);
> -
> -        /* convert to destination format */
> -        ret = swr_convert(ost->swr_ctx,
> -                          ost->frame->data, dst_nb_samples,
> -                          (const uint8_t **)frame->data, frame->nb_samples);
> -        if (ret < 0) {
> -            fprintf(stderr, "Error while converting\n");
> -            exit(1);
> -        }
> -        frame = ost->frame;
> -
> -        frame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base);
> -        ost->samples_count += dst_nb_samples;
> +    int j, i, v;
> +    static float t, tincr, tincr2;
> +    int16_t *data = (int16_t*)frame->data[0];
> +    static int frame_ctr;
> +
> +    if (!tincr) {
> +        t       = 0;
> +        tincr   = 2 * M_PI * 110.0 / frame->sample_rate;
> +        /* increment frequency by 110 Hz per second */
> +        tincr2  = tincr / frame->sample_rate;
What are you doing here? Why are you doing it?

>      }
> -
> -    return write_frame(oc, c, ost->st, frame, ost->tmp_pkt);
> +    for (j = 0; j <frame->nb_samples; j++) {
> +        v = (int)(sin(t) * 10000);
> +        for (i = 0; i < frame->ch_layout.nb_channels; i++)
> +            *data++ = v;
> +        t     += tincr;
> +        tincr += tincr2;
> +    }
> +    frame->pts = frame->nb_samples*(++frame_ctr);
If you're trying to populate a stream, you should be using the aevalsrc 
filter, which exists for exactly this purpose. Otherwise just populate 
it with zeroes (silence).
>  }
>  
> -/**************************************************************/
> -/* video output */
> -
> -static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
> +static void fill_dummy_yuv420p_frame(AVFrame *frame)
>  {
There's a testsrc filter, or just fill a frame with zeroes (black). 
Don't reinvent the wheel in an example, that discourages people from 
using features that exist.

> -    AVFrame *picture;
> -    int ret;
> -
> -    picture = av_frame_alloc();
> -    if (!picture)
> -        return NULL;
> +    int x, y;
> +    static int idx;
>  
> -    picture->format = pix_fmt;
> -    picture->width  = width;
> -    picture->height = height;
> +    /* Y */
> +    for (y = 0; y < frame->width; y++)
> +        for (x = 0; x < frame->width; x++)
> +            frame->data[0][y * frame->linesize[0] + x] = x + y + idx * 3;
>  
> -    /* allocate the buffers for the frame data */
> -    ret = av_frame_get_buffer(picture, 0);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not allocate frame data.\n");
> -        exit(1);
> +    /* Cb and Cr */
> +    for (y = 0; y < frame->height / 2; y++) {
> +        for (x = 0; x < frame->width / 2; x++) {
> +            frame->data[1][y * frame->linesize[1] + x] = 128 + y + idx * 2;
> +            frame->data[2][y * frame->linesize[2] + x] = 64 + x + idx * 5;
> +        }
>      }
> The
> -    return picture;
> +    frame->pts = idx++;
>  }
>  
> -static void open_video(AVFormatContext *oc, const AVCodec *codec,
> -                       OutputStream *ost, AVDictionary *opt_arg)
> +static int convert_frame(void *convert_ctx, AVFrame *in_frame, AVFrame *out_frame)
>  {
>      int ret;
> -    AVCodecContext *c = ost->enc;
> -    AVDictionary *opt = NULL;
> -
> -    av_dict_copy(&opt, opt_arg, 0);
> +    enum AVMediaType *type = (enum AVMediaType *)(in_frame->opaque);
>  
> -    /* open the codec */
> -    ret = avcodec_open2(c, codec, &opt);
> -    av_dict_free(&opt);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
> -        exit(1);
> -    }
> -
> -    /* allocate and init a re-usable frame */
> -    ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
> -    if (!ost->frame) {
> -        fprintf(stderr, "Could not allocate video frame\n");
> -        exit(1);
> +    if (av_frame_make_writable(out_frame) < 0) {
  if ((ret = av_frame_make_writable(out_frame)) < 0) {
> +        log_error("av_frame_make_writable() error", NULL);
> +        return AVERROR_EXIT;return ret;
>      }
>  
> -    /* If the output format is not YUV420P, then a temporary YUV420P
> -     * picture is needed too. It is then converted to the required
> -     * output format. */
> -    ost->tmp_frame = NULL;
> -    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
> -        ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
> -        if (!ost->tmp_frame) {
> -            fprintf(stderr, "Could not allocate temporary picture\n");
> -            exit(1);
> +    if (*type == AVMEDIA_TYPE_AUDIO) {
> +        if ((ret = swr_convert_frame((struct SwrContext *)convert_ctx, out_frame,
> +                                     (const AVFrame *)in_frame)) != 0) {
> +            log_error("Error converting AVFrame", &ret);
> +            return ret;
>          }
> +    } else {
> +        sws_scale((struct SwsContext *)convert_ctx, (const uint8_t * const *)in_frame->data,
> +                  in_frame->linesize, 0, in_frame->height, out_frame->data,
> +                  out_frame->linesize);
>      }
>  
> -    /* copy the stream parameters to the muxer */
> -    ret = avcodec_parameters_from_context(ost->st->codecpar, c);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not copy the stream parameters\n");
> -        exit(1);
> -    }
> +    out_frame->pts = in_frame->pts;
> +    return 0;
>  }
>  
> -/* Prepare a dummy image. */
> -static void fill_yuv_image(AVFrame *pict, int frame_index,
> -                           int width, int height)
> +static int encode_frame(AVCodecContext *ctx, AVFrame *in_frame, AVPacket *out_pkt)
>  {
> -    int x, y, i;
> +    static int is_flushing_audio = 0, is_flushing_video = 0;
> +    int ret = 0;
> +    int is_audio = ctx->codec->type == AVMEDIA_TYPE_AUDIO;
>  
> -    i = frame_index;
> -
> -    /* Y */
> -    for (y = 0; y < height; y++)
> -        for (x = 0; x < width; x++)
> -            pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
> -
> -    /* Cb and Cr */
> -    for (y = 0; y < height / 2; y++) {
> -        for (x = 0; x < width / 2; x++) {
> -            pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
> -            pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
> -        }
> +    if ((is_audio && !is_flushing_audio) || (!is_audio && !is_flushing_video)) {
> +        ret = avcodec_send_frame(ctx, in_frame);
>      }
> -}
> -
> -static AVFrame *get_video_frame(OutputStream *ost)
> -{
> -    AVCodecContext *c = ost->enc;
> -
> -    /* check if we want to generate more frames */
> -    if (av_compare_ts(ost->next_pts, c->time_base,
> -                      STREAM_DURATION, (AVRational){ 1, 1 }) > 0)
> -        return NULL;
> -
> -    /* when we pass a frame to the encoder, it may keep a reference to it
> -     * internally; make sure we do not overwrite it here */
> -    if (av_frame_make_writable(ost->frame) < 0)
> -        exit(1);
> -
> -    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
> -        /* as we only generate a YUV420P picture, we must convert it
> -         * to the codec pixel format if needed */
> -        if (!ost->sws_ctx) {
> -            ost->sws_ctx = sws_getContext(c->width, c->height,
> -                                          AV_PIX_FMT_YUV420P,
> -                                          c->width, c->height,
> -                                          c->pix_fmt,
> -                                          SCALE_FLAGS, NULL, NULL, NULL);
> -            if (!ost->sws_ctx) {
> -                fprintf(stderr,
> -                        "Could not initialize the conversion context\n");
> -                exit(1);
> -            }
> +    if (ret < 0) {
You need to check for AVERROR(EAGAIN).
> +        av_log(NULL, AV_LOG_ERROR,
> +               "Error sending frame to the encoder (error '%s')\n", av_err2str(ret));
> +        return ret;
> +    } else if (ret == 0) {
> +        ret = avcodec_receive_packet(ctx, out_pkt);
> +        if ((ret < 0) && (ret != AVERROR(EAGAIN)) && (ret != AVERROR_EOF)) {
> +            av_log(NULL, AV_LOG_ERROR,
> +                   "Error receiving encoded packet (error '%s')\n", av_err2str(ret));
> +            return ret;
>          }
> -        fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
> -        sws_scale(ost->sws_ctx, (const uint8_t * const *) ost->tmp_frame->data,
> -                  ost->tmp_frame->linesize, 0, c->height, ost->frame->data,
> -                  ost->frame->linesize);
> -    } else {
> -        fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
>      }
>  
> -    ost->frame->pts = ost->next_pts++;
> +    if (is_audio)
> +        is_flushing_audio = (in_frame == NULL);
> +    else
> +        is_flushing_video = (in_frame == NULL);
>  
> -    return ost->frame;
> +    return ret;
>  }
>  
> -/*
> - * encode one video frame and send it to the muxer
> - * return 1 when encoding is finished, 0 otherwise
> - */
> -static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
> +static int frame_exceeds_stream_duration(AVFrame *fr)
>  {
> -    return write_frame(oc, ost->enc, ost->st, get_video_frame(ost), ost->tmp_pkt);
> +    enum AVMediaType *type = (enum AVMediaType *)(fr->opaque);
Why are you reading from the opaque structure of the frame. Are you sure 
this is what you wanted to do?
> +    AVRational tb = (*type == AVMEDIA_TYPE_AUDIO) ? (AVRational){ 1, fr->sample_rate} :
> +                                                    (AVRational){ 1, VIDEO_FRAME_RATE};
> +
> +    return av_compare_ts(fr->pts, tb ,STREAM_DURATION, (AVRational){ 1, 1 }) > 0;
>  }
>  
> -static void close_stream(AVFormatContext *oc, OutputStream *ost)
> +static enum AVMediaType media_type_of_earlier_frame(AVFrame *audio_fr,
> +                                                    AVFrame *video_fr)
>  {
> -    avcodec_free_context(&ost->enc);
> -    av_frame_free(&ost->frame);
> -    av_frame_free(&ost->tmp_frame);
> -    av_packet_free(&ost->tmp_pkt);
> -    sws_freeContext(ost->sws_ctx);
> -    swr_free(&ost->swr_ctx);
> +    if (!audio_fr)
> +        return AVMEDIA_TYPE_VIDEO;
> +    if (!video_fr)
> +        return AVMEDIA_TYPE_AUDIO;
> +
> +    if (av_compare_ts(audio_fr->pts, (AVRational){ 1, audio_fr->sample_rate},
> +                      video_fr->pts, (AVRational){ 1, VIDEO_FRAME_RATE}) < 0)
> +        return AVMEDIA_TYPE_AUDIO;
> +    else
> +        return AVMEDIA_TYPE_VIDEO;
>  }
>  
> -/**************************************************************/
> -/* media file output */
> -
>  int main(int argc, char **argv)
>  {
> -    OutputStream video_st = { 0 }, audio_st = { 0 };
> -    const AVOutputFormat *fmt;
> -    const char *filename;
> -    AVFormatContext *oc;
> -    const AVCodec *audio_codec, *video_codec;
> -    int ret;
> -    int have_video = 0, have_audio = 0;
> -    int encode_video = 0, encode_audio = 0;
> -    AVDictionary *opt = NULL;
> -    int i;
> -
> -    if (argc < 2) {
> +    const char *fname;
> +    AVCodecContext *audio_enc_ctx = NULL, *video_enc_ctx = NULL, *enc_ctx = NULL;
> +
> +    /* NOTE: if you want to modify the audio/video input ".format" parameter,
> +     * you need to modify the corresponding fill_dummy_XXX_frame() function(s) too */
> +    AVCodecParameters audio_in_params = {
> +        .codec_type  = AVMEDIA_TYPE_AUDIO,
> +        .format      = AV_SAMPLE_FMT_S16,
> +        .sample_rate = 44100,
> +        .ch_layout   = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO
> +    },
> +    video_in_params = {
> +        .codec_type = AVMEDIA_TYPE_VIDEO,
> +        .width      = 352,
> +        .height     = 288,
> +        .format     = AV_PIX_FMT_YUV420P
> +    },
> +    video_enc_params  = { 0 }, audio_enc_params = { 0 }; > +    struct AVRational enc_timebases[2];
> +    AVFrame *in_audio_frame = NULL, *converted_audio_frame = NULL,
> +            *in_video_frame = NULL, *converted_video_frame = NULL,
> +            *frame_to_encode = NULL;
> +    struct SwrContext *audio_convert_ctx = NULL;
> +    struct SwsContext *video_convert_ctx = NULL;
> +    enum AVMediaType media_type;
> +    AVFormatContext *out_fmt_ctx = NULL;
> +    AVPacket *out_pkt = av_packet_alloc();
> +    int ret = 0, process_audio = 0, process_video = 0;
> +
> +    if (argc != 2) {
>          printf("usage: %s output_file\n"
>                 "API example program to output a media file with libavformat.\n"
> -               "This program generates a synthetic audio and video stream, encodes and\n"
> +               "This program generates a synthetic audio and/or video stream, encodes and\n"
>                 "muxes them into a file named output_file.\n"
>                 "The output format is automatically guessed according to the file extension.\n"
> -               "Raw images can also be output by using '%%d' in the filename.\n"
> +               "BMP or JPEG images can also be output by using '%%d' in the filename.\n"
>                 "\n", argv[0]);
> -        return 1;
> +        return AVERROR_EXIT;
This return value is sent to the operating system with the exit() system 
call so you don't actually want to return an AVERROR value here.

>      }
>  
> -    filename = argv[1];
> -    for (i = 2; i+1 < argc; i+=2) {
> -        if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))
> -            av_dict_set(&opt, argv[i]+1, argv[i+1], 0);
> +    fname = argv[1];
> +    if (!is_extension_supported(fname)) {
> +        ret = AVERROR_EXIT;
> +        goto end;
>      }
>  
> -    /* allocate the output media context */
> -    avformat_alloc_output_context2(&oc, NULL, NULL, filename);
> -    if (!oc) {
> -        printf("Could not deduce output format from file extension: using MPEG.\n"); > -        avformat_alloc_output_context2(&oc, NULL, "mpeg", filename);
> -    }
> -    if (!oc)
> -        return 1;
> -
> -    fmt = oc->oformat;
> -
> -    /* Add the audio and video streams using the default format codecs
> -     * and initialize the codecs. */
> -    if (fmt->video_codec != AV_CODEC_ID_NONE) {
> -        add_stream(&video_st, oc, &video_codec, fmt->video_codec);
> -        have_video = 1;
> -        encode_video = 1;
> +    /* Desume the default codecs and their default parameters from the filename */
> +    if ((ret = get_default_enc_params(&audio_enc_params, fname, AVMEDIA_TYPE_AUDIO)) < 0)
> +        goto end;
> +    if ((ret = get_default_enc_params(&video_enc_params, fname, AVMEDIA_TYPE_VIDEO)) < 0)
> +        goto end;
> +    process_audio = audio_enc_params.codec_id != AV_CODEC_ID_NONE;
> +    process_video = video_enc_params.codec_id != AV_CODEC_ID_NONE;
> +    if (!process_audio && !process_video) {
> +        log_error("Could not get default encoder(s)", NULL);
> +        ret = AVERROR_EXIT;
> +        goto end;
>      }
> -    if (fmt->audio_codec != AV_CODEC_ID_NONE) {
> -        add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
> -        have_audio = 1;
> -        encode_audio = 1;
> -    }
> -
> -    /* Now that all the parameters are set, we can open the audio and
> -     * video codecs and allocate the necessary encode buffers. */
> -    if (have_video)
> -        open_video(oc, video_codec, &video_st, opt);
> -
> -    if (have_audio)
> -        open_audio(oc, audio_codec, &audio_st, opt);
> -
> -    av_dump_format(oc, 0, filename, 1);
>  
> -    /* open the output file, if needed */
> -    if (!(fmt->flags & AVFMT_NOFILE)) {
> -        ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
> -        if (ret < 0) {
> -            fprintf(stderr, "Could not open '%s': %s\n", filename,
> -                    av_err2str(ret));
> -            return 1;
> -        }
> +    if (process_audio) {
> +        /* Prepare the audio encoder*/
> +        if ((ret = init_encoder(&audio_enc_ctx, &audio_enc_params)) < 0)
> +            goto end;
> +        enc_timebases[0] = audio_enc_ctx->time_base;
> +        audio_in_params.frame_size  = audio_enc_params.frame_size  = audio_enc_ctx->frame_size;
> +
> +        /* Allocate an audio resampler and its input and output AVFrames */
> +        if ((ret = init_audio_convert(&audio_convert_ctx, &audio_in_params,
> +                                      &audio_enc_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&in_audio_frame, &audio_in_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&converted_audio_frame, &audio_enc_params)) < 0)
> +            goto end;
>      }
>  
> -    /* Write the stream header, if any. */
> -    ret = avformat_write_header(oc, &opt);
> -    if (ret < 0) {
> -        fprintf(stderr, "Error occurred when opening output file: %s\n",
> -                av_err2str(ret));
> -        return 1;
> +    if (process_video) {
> +        video_enc_params.width  = video_in_params.width;
> +        video_enc_params.height = video_in_params.height;
> +        if ((ret = init_encoder(&video_enc_ctx, &video_enc_params)) < 0)
> +            goto end;
> +        enc_timebases[1] = video_enc_ctx->time_base;
> +        if ((ret = init_video_convert(&video_convert_ctx,&video_in_params,
> +                                      &video_enc_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&in_video_frame, &video_in_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&converted_video_frame, &video_enc_params)) < 0)
> +            goto end;
>      }
>  
> -    while (encode_video || encode_audio) {
> -        /* select the stream to encode */
> -        if (encode_video &&
> -            (!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
> -                                            audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
> -            encode_video = !write_video_frame(oc, &video_st);
> +    /* Create the output container for the encoded frames */
> +    if ((ret = init_muxer(&out_fmt_ctx, audio_enc_ctx, video_enc_ctx, fname)) < 0)
> +        goto end;
> +    out_fmt_ctx->opaque = &enc_timebases;
> +
> +    while (process_audio || process_video) {
> +
> +        frame_to_encode = NULL;
> +        media_type = media_type_of_earlier_frame(in_audio_frame, in_video_frame);
> +
> +        /* fill and convert the input frames */
> +        if (media_type == AVMEDIA_TYPE_AUDIO) {
> +            enc_ctx = audio_enc_ctx;
> +            fill_dummy_s16_frame(in_audio_frame);
> +            if ((ret = convert_frame(audio_convert_ctx, in_audio_frame,
> +                                     converted_audio_frame)) != 0)
> +                goto end;
> +            if (!frame_exceeds_stream_duration(converted_audio_frame))
> +                frame_to_encode = converted_audio_frame;
>          } else {
> -            encode_audio = !write_audio_frame(oc, &audio_st);
> +            enc_ctx = video_enc_ctx;
> +            fill_dummy_yuv420p_frame(in_video_frame);
> +            if ((ret = convert_frame(video_convert_ctx, in_video_frame,
> +                                     converted_video_frame)) != 0)
> +                goto end;
> +            if (!frame_exceeds_stream_duration(in_video_frame))
> +                frame_to_encode = converted_video_frame;
>          }
> -    }
> -
> -    av_write_trailer(oc);
>  
> -    /* Close each codec. */
> -    if (have_video)
> -        close_stream(oc, &video_st);
> -    if (have_audio)
> -        close_stream(oc, &audio_st);
> +        /* encode the converted frames and mux the encoded packets */
> +        if ((ret = encode_frame(enc_ctx, frame_to_encode, out_pkt)) == 0) {
> +            if ((ret = mux_encoded_pkt(out_pkt, out_fmt_ctx, media_type)) < 0)
> +               goto end;
> +        }
>  
> -    if (!(fmt->flags & AVFMT_NOFILE))
> -        /* Close the output file. */
> -        avio_closep(&oc->pb);
> +        /* check if the encoders have been fully flushed */
> +        process_audio &= !((ret == AVERROR_EOF) && (media_type == AVMEDIA_TYPE_AUDIO));
> +        process_video &= !((ret == AVERROR_EOF) && (media_type == AVMEDIA_TYPE_VIDEO));
>  
> -    /* free the stream */
> -    avformat_free_context(oc);
> +    }
>  
> -    return 0;
> +    av_write_trailer(out_fmt_ctx);
> +    ret = 0;
> +
> +end:
> +
> +    avcodec_free_context(&audio_enc_ctx);
> +    avcodec_free_context(&video_enc_ctx);
> +    av_frame_free(&in_audio_frame);
> +    av_frame_free(&in_video_frame);
> +    av_frame_free(&converted_audio_frame);
> +    av_frame_free(&converted_video_frame);
> +    swr_free(&audio_convert_ctx);
> +    sws_freeContext(video_convert_ctx);
> +    if (out_fmt_ctx)
> +        avio_closep(&out_fmt_ctx->pb);
> +    avformat_free_context(out_fmt_ctx);
> +    av_packet_free(&out_pkt);
> +
> +    return ret;
>  }
> -- 
> 2.32.0

This isn't a thorough review since I'm not familiar enough with the mux 
API to really say whether or not it was used correctly, but this is what 
I noticed on first glance.


More information about the ffmpeg-devel mailing list