[FFmpeg-devel] [PATCH 3/3] ffmpeg: support audio in complex filters

Thu May 3 14:36:04 CEST 2012

---
 ffmpeg.c |  185 ++++++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 157 insertions(+), 28 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 64ee5fd..2ff6910 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -60,6 +60,7 @@
 # include "libavfilter/buffersink.h"
 # include "libavfilter/buffersrc.h"
 # include "libavfilter/vsrc_buffer.h"
+# include "libavfilter/asrc_abuffer.h"
 
 #if HAVE_SYS_RESOURCE_H
 #include <sys/types.h>
@@ -183,6 +184,7 @@ typedef struct InputFilter {
 
 typedef struct OutputFilter {
     AVFilterContext     *filter;
+    AVCodecContext      *codec;  ///< Store output link information. Will be used as decoder codec context to prevent several copies in do_audio_out.
     struct OutputStream *ost;
     struct FilterGraph  *graph;
 
@@ -858,9 +860,8 @@ static void init_input_filter(FilterGraph *fg, AVFilterInOut *in)
     enum AVMediaType type = in->filter_ctx->input_pads[in->pad_idx].type;
     int i;
 
-    // TODO: support other filter types
-    if (type != AVMEDIA_TYPE_VIDEO) {
-        av_log(NULL, AV_LOG_FATAL, "Only video filters supported currently.\n");
+    if (type != AVMEDIA_TYPE_VIDEO && type != AVMEDIA_TYPE_AUDIO) {
+        av_log(NULL, AV_LOG_FATAL, "Only audio and video filters supported currently.\n");
         exit_program(1);
     }
 
@@ -926,10 +927,14 @@ static int configure_output_filter(FilterGraph *fg, OutputFilter *ofilter, AVFil
     AVCodecContext *codec = ofilter->ost->st->codec;
     AVFilterContext *last_filter = out->filter_ctx;
     int pad_idx = out->pad_idx;
-    int ret;
-    enum PixelFormat *pix_fmts = choose_pixel_fmts(ofilter->ost);
-    AVBufferSinkParams *buffersink_params = av_buffersink_params_alloc();
+    int ret = 0;
+    enum AVMediaType type = ofilter->out_tmp->filter_ctx->output_pads[ofilter->out_tmp->pad_idx].type;
 
+    if (type == AVMEDIA_TYPE_VIDEO) {
+        enum PixelFormat *pix_fmts = choose_pixel_fmts(ofilter->ost);
+        AVBufferSinkParams *buffersink_params = av_buffersink_params_alloc();
+
+    /* TODO reindent */
 #if FF_API_OLD_VSINK_API
     ret = avfilter_graph_create_filter(&ofilter->filter, avfilter_get_by_name("buffersink"),
                                        "out", NULL, pix_fmts, fg->graph);
@@ -939,10 +944,33 @@ static int configure_output_filter(FilterGraph *fg, OutputFilter *ofilter, AVFil
                                        "out", NULL, buffersink_params, fg->graph);
 #endif
     av_freep(&buffersink_params);
+    } else if (type == AVMEDIA_TYPE_AUDIO) {
+        enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_U8,
+                                              AV_SAMPLE_FMT_S16,
+                                              AV_SAMPLE_FMT_S32,
+                                              AV_SAMPLE_FMT_FLT,
+                                              AV_SAMPLE_FMT_DBL, -1 };
+        const int packing_fmts[] = { AVFILTER_PACKED, -1 };
+        const int64_t *chlayouts = avfilter_all_channel_layouts;
+        AVABufferSinkParams *abuffersink_params = av_abuffersink_params_alloc();
+        abuffersink_params->sample_fmts = sample_fmts;
+        abuffersink_params->packing_fmts = packing_fmts;
+        abuffersink_params->channel_layouts = chlayouts;
+
+        ret = avfilter_graph_create_filter(&ofilter->filter, avfilter_get_by_name("abuffersink"),
+                                           "out", NULL, abuffersink_params, fg->graph);
+
+        av_free(abuffersink_params);
+    }
 
     if (ret < 0)
         return ret;
 
+    ofilter->codec = avcodec_alloc_context3(NULL);
+    ofilter->codec->bit_rate = 0;
+
+    if (type == AVMEDIA_TYPE_VIDEO) {
+    /* TODO reindent */
     if (codec->width || codec->height) {
         char args[255];
         snprintf(args, sizeof(args), "%d:%d:flags=0x%X",
@@ -956,6 +984,7 @@ static int configure_output_filter(FilterGraph *fg, OutputFilter *ofilter, AVFil
             return ret;
         pad_idx = 0;
     }
+    }
 
     if ((ret = avfilter_link(last_filter, pad_idx, ofilter->filter, 0)) < 0)
         return ret;
@@ -983,7 +1012,10 @@ static int configure_complex_filter(FilterGraph *fg)
         InputStream     *ist = ifilter->ist;
         AVRational       sar;
         char            args[255];
+        enum AVMediaType type = cur->filter_ctx->input_pads[cur->pad_idx].type;
 
+        if (type == AVMEDIA_TYPE_VIDEO) {
+        /* TODO reindent */
         sar = ist->st->sample_aspect_ratio.num ? ist->st->sample_aspect_ratio :
                                                  ist->st->codec->sample_aspect_ratio;
         snprintf(args, sizeof(args), "%d:%d:%d:%d:%d:%d:%d", ist->st->codec->width,
@@ -994,6 +1026,19 @@ static int configure_complex_filter(FilterGraph *fg)
                                                 avfilter_get_by_name("buffer"), cur->name,
                                                 args, NULL, fg->graph)) < 0)
             return ret;
+        } else if (type == AVMEDIA_TYPE_AUDIO) {
+            AVCodecContext *icodec = ist->st->codec;
+            if (!icodec->channel_layout)
+                icodec->channel_layout = av_get_default_channel_layout(icodec->channels);
+
+            snprintf(args, sizeof(args), "%d:%d:0x%"PRIx64":packed",
+                     icodec->sample_rate, icodec->sample_fmt, icodec->channel_layout);
+
+            ret = avfilter_graph_create_filter(&ifilter->filter,
+                                               avfilter_get_by_name("abuffer"), cur->name,
+                                               args, NULL, fg->graph);
+        }
+
         if ((ret = avfilter_link(ifilter->filter, 0,
                                  cur->filter_ctx, cur->pad_idx)) < 0)
             return ret;
@@ -1175,8 +1220,10 @@ void av_noreturn exit_program(int ret)
         for (j = 0; j < filtergraphs[i]->nb_inputs; j++)
             av_freep(&filtergraphs[i]->inputs[j]);
         av_freep(&filtergraphs[i]->inputs);
-        for (j = 0; j < filtergraphs[i]->nb_outputs; j++)
+        for (j = 0; j < filtergraphs[i]->nb_outputs; j++) {
+            av_freep(&filtergraphs[i]->outputs[j]->codec);
             av_freep(&filtergraphs[i]->outputs[j]);
+        }
         av_freep(&filtergraphs[i]->outputs);
         av_freep(&filtergraphs[i]);
     }
@@ -1393,7 +1440,7 @@ static void get_default_channel_layouts(OutputStream *ost, InputStream *ist)
 {
     char layout_name[256];
     AVCodecContext *enc = ost->st->codec;
-    AVCodecContext *dec = ist->st->codec;
+    AVCodecContext *dec = ost->filter ? ost->filter->codec : ist->st->codec;
 
     if (dec->channel_layout &&
         av_get_channel_layout_nb_channels(dec->channel_layout) != dec->channels) {
@@ -1562,7 +1609,7 @@ static void do_audio_out(AVFormatContext *s, OutputStream *ost,
 
     int frame_bytes, resample_changed, ret;
     AVCodecContext *enc = ost->st->codec;
-    AVCodecContext *dec = ist->st->codec;
+    AVCodecContext *dec = ost->filter ? ost->filter->codec : ist->st->codec;
     int osize = av_get_bytes_per_sample(enc->sample_fmt);
     int isize = av_get_bytes_per_sample(dec->sample_fmt);
     uint8_t *buf[AV_NUM_DATA_POINTERS];
@@ -1571,6 +1618,7 @@ static void do_audio_out(AVFormatContext *s, OutputStream *ost,
     int i;
     int out_linesize = 0;
     int buf_linesize = decoded_frame->linesize[0];
+    int64_t ipts;
 
     av_assert0(planes <= AV_NUM_DATA_POINTERS);
 
@@ -1600,8 +1648,14 @@ static void do_audio_out(AVFormatContext *s, OutputStream *ost,
     if ((ost->audio_resample && !ost->swr) || resample_changed || ost->audio_channels_mapped) {
 
         if (resample_changed) {
-            av_log(NULL, AV_LOG_INFO, "Input stream #%d:%d frame changed from rate:%d fmt:%s ch:%d chl:0x%"PRIx64" to rate:%d fmt:%s ch:%d chl:0x%"PRIx64"\n",
-                   ist->file_index, ist->st->index,
+            char tmp[32];
+            if (ost->filter)
+                snprintf(tmp, sizeof(tmp), "filter %s", ost->filter->filter->name);
+            else
+                snprintf(tmp, sizeof(tmp), "stream #%d:%d", ist->file_index, ist->st->index);
+
+            av_log(NULL, AV_LOG_INFO, "Input %s frame changed from rate:%d fmt:%s ch:%d chl:0x%"PRIx64" to rate:%d fmt:%s ch:%d chl:0x%"PRIx64"\n",
+                   tmp,
                    ost->resample_sample_rate, av_get_sample_fmt_name(ost->resample_sample_fmt),
                    ost->resample_channels, ost->resample_channel_layout,
                    dec->sample_rate, av_get_sample_fmt_name(dec->sample_fmt),
@@ -1659,14 +1713,16 @@ static void do_audio_out(AVFormatContext *s, OutputStream *ost,
 
     av_assert0(ost->audio_resample || dec->sample_fmt==enc->sample_fmt);
 
+    ipts = ost->filter ? ost->filter->filter->inputs[0]->current_pts : ist->pts;
     if (audio_sync_method > 0) {
-        double delta = get_sync_ipts(ost, ist->pts) * enc->sample_rate - ost->sync_opts -
+        double delta = get_sync_ipts(ost, ipts) * enc->sample_rate - ost->sync_opts -
                        av_fifo_size(ost->fifo) / (enc->channels * osize);
         int idelta = delta * dec->sample_rate / enc->sample_rate;
         int byte_delta = idelta * isize * dec->channels;
 
         // FIXME resample delay
-        if (fabs(delta) > 50) {
+        // TODO resample delay should be ported to lavfi
+        if (!ost->filter && fabs(delta) > 50) {
             if (ist->is_start || fabs(delta) > audio_drift_threshold*enc->sample_rate) {
                 if (byte_delta < 0) {
                     byte_delta = FFMAX(byte_delta, -size);
@@ -1710,9 +1766,10 @@ static void do_audio_out(AVFormatContext *s, OutputStream *ost,
                 swr_set_compensation(ost->swr, comp, enc->sample_rate);
             }
         }
-    } else if (audio_sync_method == 0)
-        ost->sync_opts = lrintf(get_sync_ipts(ost, ist->pts) * enc->sample_rate) -
+    } else if (audio_sync_method == 0) {
+        ost->sync_opts = lrintf(get_sync_ipts(ost, ipts) * enc->sample_rate) -
                                 av_fifo_size(ost->fifo) / (enc->channels * osize); // FIXME wrong
+    }
 
     if (ost->audio_resample || ost->audio_channels_mapped) {
         buftmp = audio_buf;
@@ -2112,6 +2169,10 @@ static int poll_filters(void)
                              same_quant ? ost->last_quality :
                                           ost->st->codec->global_quality);
                 break;
+            case AVMEDIA_TYPE_AUDIO:
+                avfilter_fill_frame_from_audio_buffer_ref(filtered_frame, picref);
+                do_audio_out(of->ctx, ost, NULL, filtered_frame);
+                break;
             default:
                 // TODO support audio/subtitle filters
                 av_assert0(0);
@@ -2460,6 +2521,8 @@ static int transcode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
     AVCodecContext *avctx = ist->st->codec;
     int bps = av_get_bytes_per_sample(ist->st->codec->sample_fmt);
     int i, ret;
+    int decoded_data_size;
+    void *samples;
 
     if (!ist->decoded_frame && !(ist->decoded_frame = avcodec_alloc_frame()))
         return AVERROR(ENOMEM);
@@ -2497,9 +2560,9 @@ static int transcode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
 
 
     // preprocess audio (volume)
+    decoded_data_size = decoded_frame->nb_samples * avctx->channels * bps;
+    samples = decoded_frame->data[0];
     if (audio_volume != 256) {
-        int decoded_data_size = decoded_frame->nb_samples * avctx->channels * bps;
-        void *samples = decoded_frame->data[0];
         switch (avctx->sample_fmt) {
         case AV_SAMPLE_FMT_U8:
         {
@@ -2554,6 +2617,18 @@ static int transcode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
         }
     }
 
+    for (i = 0; i < ist->nb_filters; i++) {
+        if (av_asrc_buffer_add_buffer(ist->filters[i]->filter,
+                                      samples, decoded_data_size,
+                                      ist->st->codec->sample_rate,
+                                      ist->st->codec->sample_fmt,
+                                      ist->st->codec->channel_layout,
+                                      0, ist->pts, 0) < 0) {
+            av_log(NULL, AV_LOG_FATAL, "Failed ton inject audio samples into filter network\n");
+            exit_program(1);
+        }
+    }
+
     rate_emu_sleep(ist);
 
     for (i = 0; i < nb_output_streams; i++) {
@@ -2921,6 +2996,37 @@ static InputStream *get_input_stream(OutputStream *ost)
     return NULL;
 }
 
+static AVCodecContext *get_input_codec(OutputStream *ost)
+{
+    if (ost->source_index >= 0) {
+        InputStream *ist = input_streams[ost->source_index];
+        if (ist)
+            return ist->st->codec;
+    }
+
+    if (ost->filter) {
+        return ost->filter->codec;
+    }
+
+    return NULL;
+}
+
+static AVRational get_input_stream_time_base(OutputStream *ost)
+{
+    if (ost->source_index >= 0) {
+        InputStream *ist = input_streams[ost->source_index];
+        if (ist)
+            return ist->st->time_base;
+    }
+
+    if (ost->filter) {
+        return ost->filter->filter->inputs[0]->time_base;
+    }
+
+    return (AVRational) { 0, 0 };
+
+}
+
 static int transcode_init(void)
 {
     int ret = 0, i, j, k;
@@ -2950,9 +3056,30 @@ static int transcode_init(void)
     }
 
     /* init complex filtergraphs */
-    for (i = 0; i < nb_filtergraphs; i++)
+    for (i = 0; i < nb_filtergraphs; i++) {
         if ((ret = avfilter_graph_config(filtergraphs[i]->graph, NULL)) < 0)
             return ret;
+        /*
+         * Fill link information into output codec context
+         * The codec context will be used as decoder codec context in do_audio_out
+         */
+        for (j = 0; j < filtergraphs[i]->nb_outputs; j++) {
+            OutputFilter *ofilter = filtergraphs[i]->outputs[j];
+            AVFilterLink *olink = ofilter->filter->inputs[0];
+            if (olink->type == AVMEDIA_TYPE_AUDIO) {
+                ofilter->codec->codec_id    = av_get_pcm_codec(olink->format, -1);
+                ofilter->codec->channels    = av_get_channel_layout_nb_channels(olink->channel_layout);
+                ofilter->codec->sample_fmt  = olink->format;
+                ofilter->codec->sample_rate = olink->sample_rate;
+                ofilter->codec->time_base   = olink->time_base;
+                ofilter->codec->channel_layout = olink->channel_layout;
+                if (ofilter->codec->codec_id == CODEC_ID_NONE)
+                    av_log(NULL, AV_LOG_ERROR,
+                        "Could not find PCM codec for sample format %s.\n",
+                        av_get_sample_fmt_name(olink->format));
+            }
+        }
+    }
 
     /* for each output stream, we compute the right encoding parameters */
     for (i = 0; i < nb_output_streams; i++) {
@@ -2964,10 +3091,9 @@ static int transcode_init(void)
             continue;
 
         codec  = ost->st->codec;
+        icodec = get_input_codec(ost);
 
         if (ist) {
-            icodec = ist->st->codec;
-
             ost->st->disposition          = ist->st->disposition;
             codec->bits_per_raw_sample    = icodec->bits_per_raw_sample;
             codec->chroma_sample_location = icodec->chroma_sample_location;
@@ -2975,6 +3101,7 @@ static int transcode_init(void)
 
         if (ost->stream_copy) {
             uint64_t extra_size;
+            AVRational ist_tb;
 
             av_assert0(ist && !ost->filter);
 
@@ -3006,15 +3133,15 @@ static int transcode_init(void)
             memcpy(codec->extradata, icodec->extradata, icodec->extradata_size);
             codec->extradata_size= icodec->extradata_size;
 
-            codec->time_base = ist->st->time_base;
+            codec->time_base = ist_tb = get_input_stream_time_base(ost);
             /*
              * Avi is a special case here because it supports variable fps but
              * having the fps and timebase differe significantly adds quite some
              * overhead
              */
             if(!strcmp(oc->oformat->name, "avi")) {
-                if (   copy_tb<0 && av_q2d(icodec->time_base)*icodec->ticks_per_frame > 2*av_q2d(ist->st->time_base)
-                                 && av_q2d(ist->st->time_base) < 1.0/500
+                if (   copy_tb<0 && av_q2d(icodec->time_base)*icodec->ticks_per_frame > 2*av_q2d(ist_tb)
+                                 && av_q2d(ist_tb) < 1.0/500
                     || copy_tb==0){
                     codec->time_base = icodec->time_base;
                     codec->time_base.num *= icodec->ticks_per_frame;
@@ -3025,8 +3152,8 @@ static int transcode_init(void)
                       && strcmp(oc->oformat->name, "mov") && strcmp(oc->oformat->name, "mp4") && strcmp(oc->oformat->name, "3gp")
                       && strcmp(oc->oformat->name, "3g2") && strcmp(oc->oformat->name, "psp") && strcmp(oc->oformat->name, "ipod")
             ) {
-                if(   copy_tb<0 && av_q2d(icodec->time_base)*icodec->ticks_per_frame > av_q2d(ist->st->time_base)
-                                && av_q2d(ist->st->time_base) < 1.0/500
+                if(   copy_tb<0 && av_q2d(icodec->time_base)*icodec->ticks_per_frame > av_q2d(ist_tb)
+                                && av_q2d(ist_tb) < 1.0/500
                    || copy_tb==0){
                     codec->time_base = icodec->time_base;
                     codec->time_base.num *= icodec->ticks_per_frame;
@@ -4907,12 +5034,14 @@ static void init_output_filter(OutputFilter *ofilter, OptionsContext *o,
 {
     OutputStream *ost;
 
-    if (ofilter->out_tmp->filter_ctx->output_pads[ofilter->out_tmp->pad_idx].type != AVMEDIA_TYPE_VIDEO) {
-        av_log(NULL, AV_LOG_FATAL, "Only video filters are supported currently.\n");
+    switch (ofilter->out_tmp->filter_ctx->output_pads[ofilter->out_tmp->pad_idx].type) {
+    case AVMEDIA_TYPE_VIDEO: ost = new_video_stream(o, oc, -1); break;
+    case AVMEDIA_TYPE_AUDIO: ost = new_audio_stream(o, oc, -1); break;
+    default:
+        av_log(NULL, AV_LOG_ERROR, "Only audio and video filters are currently supported\n");
         exit_program(1);
     }
 
-    ost               = new_video_stream(o, oc, -1);
     ost->source_index = -1;
     ost->filter       = ofilter;
 
-- 
1.7.10