[FFmpeg-devel] [PATCH 3/3] ffmpeg: support audio in complex filters
Matthieu Bouron
matthieu.bouron at gmail.com
Sat May 5 11:14:19 CEST 2012
On Thu, May 03, 2012 at 03:42:02PM +0200, Matthieu Bouron wrote:
> On Thu, May 03, 2012 at 03:22:26PM +0200, Nicolas George wrote:
> > Le quintidi 15 floréal, an CCXX, Matthieu Bouron a écrit :
> > > ---
> > > ffmpeg.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++++++++----------
> > > 1 file changed, 157 insertions(+), 28 deletions(-)
> >
> > Cool! Thanks.
> >
> > > + for (i = 0; i < ist->nb_filters; i++) {
> > > + if (av_asrc_buffer_add_buffer(ist->filters[i]->filter,
> > > + samples, decoded_data_size,
> > > + ist->st->codec->sample_rate,
> > > + ist->st->codec->sample_fmt,
> > > + ist->st->codec->channel_layout,
> > > + 0, ist->pts, 0) < 0) {
> >
> > This API is about to be deprecated. Would you consider rebasing your patch
> > on top of my proposal to merge vsrc_buffer and asrc_buffer? I intend to push
> > most patches in less than a few hours and re-submit the other ones.
>
> No problem, i'll wait for your patches.
Patch updated using av_buffersrc_add_frame.
>
> [...]
-------------- next part --------------
>From 5900423038868e47b1cd40eb525c35426cdfd622 Mon Sep 17 00:00:00 2001
From: Matthieu Bouron <matthieu.bouron at smartjog.com>
Date: Wed, 25 Apr 2012 18:46:23 +0200
Subject: [PATCH 3/3] ffmpeg: support audio in complex filters
---
ffmpeg.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 148 insertions(+), 26 deletions(-)
diff --git a/ffmpeg.c b/ffmpeg.c
index da0e523..a99c531 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -60,6 +60,7 @@
# include "libavfilter/buffersink.h"
# include "libavfilter/buffersrc.h"
# include "libavfilter/vsrc_buffer.h"
+# include "libavfilter/asrc_abuffer.h"
#if HAVE_SYS_RESOURCE_H
#include <sys/types.h>
@@ -183,6 +184,7 @@ typedef struct InputFilter {
typedef struct OutputFilter {
AVFilterContext *filter;
+ AVCodecContext *codec; ///< Store output link information. Will be used as decoder codec context to prevent several copies in do_audio_out.
struct OutputStream *ost;
struct FilterGraph *graph;
@@ -856,9 +858,8 @@ static void init_input_filter(FilterGraph *fg, AVFilterInOut *in)
enum AVMediaType type = in->filter_ctx->input_pads[in->pad_idx].type;
int i;
- // TODO: support other filter types
- if (type != AVMEDIA_TYPE_VIDEO) {
- av_log(NULL, AV_LOG_FATAL, "Only video filters supported currently.\n");
+ if (type != AVMEDIA_TYPE_VIDEO && type != AVMEDIA_TYPE_AUDIO) {
+ av_log(NULL, AV_LOG_FATAL, "Only audio and video filters supported currently.\n");
exit_program(1);
}
@@ -924,10 +925,14 @@ static int configure_output_filter(FilterGraph *fg, OutputFilter *ofilter, AVFil
AVCodecContext *codec = ofilter->ost->st->codec;
AVFilterContext *last_filter = out->filter_ctx;
int pad_idx = out->pad_idx;
- int ret;
- enum PixelFormat *pix_fmts = choose_pixel_fmts(ofilter->ost);
- AVBufferSinkParams *buffersink_params = av_buffersink_params_alloc();
+ int ret = 0;
+ enum AVMediaType type = ofilter->out_tmp->filter_ctx->output_pads[ofilter->out_tmp->pad_idx].type;
+ if (type == AVMEDIA_TYPE_VIDEO) {
+ enum PixelFormat *pix_fmts = choose_pixel_fmts(ofilter->ost);
+ AVBufferSinkParams *buffersink_params = av_buffersink_params_alloc();
+
+ /* TODO reindent */
#if FF_API_OLD_VSINK_API
ret = avfilter_graph_create_filter(&ofilter->filter, avfilter_get_by_name("buffersink"),
"out", NULL, pix_fmts, fg->graph);
@@ -937,10 +942,33 @@ static int configure_output_filter(FilterGraph *fg, OutputFilter *ofilter, AVFil
"out", NULL, buffersink_params, fg->graph);
#endif
av_freep(&buffersink_params);
+ } else if (type == AVMEDIA_TYPE_AUDIO) {
+ enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_U8,
+ AV_SAMPLE_FMT_S16,
+ AV_SAMPLE_FMT_S32,
+ AV_SAMPLE_FMT_FLT,
+ AV_SAMPLE_FMT_DBL, -1 };
+ const int packing_fmts[] = { AVFILTER_PACKED, -1 };
+ const int64_t *chlayouts = avfilter_all_channel_layouts;
+ AVABufferSinkParams *abuffersink_params = av_abuffersink_params_alloc();
+ abuffersink_params->sample_fmts = sample_fmts;
+ abuffersink_params->packing_fmts = packing_fmts;
+ abuffersink_params->channel_layouts = chlayouts;
+
+ ret = avfilter_graph_create_filter(&ofilter->filter, avfilter_get_by_name("abuffersink"),
+ "out", NULL, abuffersink_params, fg->graph);
+
+ av_free(abuffersink_params);
+ }
if (ret < 0)
return ret;
+ ofilter->codec = avcodec_alloc_context3(NULL);
+ ofilter->codec->bit_rate = 0;
+
+ if (type == AVMEDIA_TYPE_VIDEO) {
+ /* TODO reindent */
if (codec->width || codec->height) {
char args[255];
snprintf(args, sizeof(args), "%d:%d:flags=0x%X",
@@ -954,6 +982,7 @@ static int configure_output_filter(FilterGraph *fg, OutputFilter *ofilter, AVFil
return ret;
pad_idx = 0;
}
+ }
if ((ret = avfilter_link(last_filter, pad_idx, ofilter->filter, 0)) < 0)
return ret;
@@ -981,7 +1010,10 @@ static int configure_complex_filter(FilterGraph *fg)
InputStream *ist = ifilter->ist;
AVRational sar;
char args[255];
+ enum AVMediaType type = cur->filter_ctx->input_pads[cur->pad_idx].type;
+ if (type == AVMEDIA_TYPE_VIDEO) {
+ /* TODO reindent */
sar = ist->st->sample_aspect_ratio.num ? ist->st->sample_aspect_ratio :
ist->st->codec->sample_aspect_ratio;
snprintf(args, sizeof(args), "%d:%d:%d:%d:%d:%d:%d", ist->st->codec->width,
@@ -992,6 +1024,19 @@ static int configure_complex_filter(FilterGraph *fg)
avfilter_get_by_name("buffer"), cur->name,
args, NULL, fg->graph)) < 0)
return ret;
+ } else if (type == AVMEDIA_TYPE_AUDIO) {
+ AVCodecContext *icodec = ist->st->codec;
+ if (!icodec->channel_layout)
+ icodec->channel_layout = av_get_default_channel_layout(icodec->channels);
+
+ snprintf(args, sizeof(args), "%d:%d:0x%"PRIx64":packed",
+ icodec->sample_rate, icodec->sample_fmt, icodec->channel_layout);
+
+ ret = avfilter_graph_create_filter(&ifilter->filter,
+ avfilter_get_by_name("abuffer"), cur->name,
+ args, NULL, fg->graph);
+ }
+
if ((ret = avfilter_link(ifilter->filter, 0,
cur->filter_ctx, cur->pad_idx)) < 0)
return ret;
@@ -1173,8 +1218,10 @@ void av_noreturn exit_program(int ret)
for (j = 0; j < filtergraphs[i]->nb_inputs; j++)
av_freep(&filtergraphs[i]->inputs[j]);
av_freep(&filtergraphs[i]->inputs);
- for (j = 0; j < filtergraphs[i]->nb_outputs; j++)
+ for (j = 0; j < filtergraphs[i]->nb_outputs; j++) {
+ av_freep(&filtergraphs[i]->outputs[j]->codec);
av_freep(&filtergraphs[i]->outputs[j]);
+ }
av_freep(&filtergraphs[i]->outputs);
av_freep(&filtergraphs[i]);
}
@@ -1391,7 +1438,7 @@ static void get_default_channel_layouts(OutputStream *ost, InputStream *ist)
{
char layout_name[256];
AVCodecContext *enc = ost->st->codec;
- AVCodecContext *dec = ist->st->codec;
+ AVCodecContext *dec = ost->filter ? ost->filter->codec : ist->st->codec;
if (dec->channel_layout &&
av_get_channel_layout_nb_channels(dec->channel_layout) != dec->channels) {
@@ -1560,7 +1607,7 @@ static void do_audio_out(AVFormatContext *s, OutputStream *ost,
int frame_bytes, resample_changed, ret;
AVCodecContext *enc = ost->st->codec;
- AVCodecContext *dec = ist->st->codec;
+ AVCodecContext *dec = ost->filter ? ost->filter->codec : ist->st->codec;
int osize = av_get_bytes_per_sample(enc->sample_fmt);
int isize = av_get_bytes_per_sample(dec->sample_fmt);
uint8_t *buf[AV_NUM_DATA_POINTERS];
@@ -1569,6 +1616,7 @@ static void do_audio_out(AVFormatContext *s, OutputStream *ost,
int i;
int out_linesize = 0;
int buf_linesize = decoded_frame->linesize[0];
+ int64_t ipts;
av_assert0(planes <= AV_NUM_DATA_POINTERS);
@@ -1598,8 +1646,14 @@ static void do_audio_out(AVFormatContext *s, OutputStream *ost,
if ((ost->audio_resample && !ost->swr) || resample_changed || ost->audio_channels_mapped) {
if (resample_changed) {
- av_log(NULL, AV_LOG_INFO, "Input stream #%d:%d frame changed from rate:%d fmt:%s ch:%d chl:0x%"PRIx64" to rate:%d fmt:%s ch:%d chl:0x%"PRIx64"\n",
- ist->file_index, ist->st->index,
+ char tmp[32];
+ if (ost->filter)
+ snprintf(tmp, sizeof(tmp), "filter %s", ost->filter->filter->name);
+ else
+ snprintf(tmp, sizeof(tmp), "stream #%d:%d", ist->file_index, ist->st->index);
+
+ av_log(NULL, AV_LOG_INFO, "Input %s frame changed from rate:%d fmt:%s ch:%d chl:0x%"PRIx64" to rate:%d fmt:%s ch:%d chl:0x%"PRIx64"\n",
+ tmp,
ost->resample_sample_rate, av_get_sample_fmt_name(ost->resample_sample_fmt),
ost->resample_channels, ost->resample_channel_layout,
dec->sample_rate, av_get_sample_fmt_name(dec->sample_fmt),
@@ -1657,14 +1711,16 @@ static void do_audio_out(AVFormatContext *s, OutputStream *ost,
av_assert0(ost->audio_resample || dec->sample_fmt==enc->sample_fmt);
+ ipts = ost->filter ? ost->filter->filter->inputs[0]->current_pts : ist->pts;
if (audio_sync_method > 0) {
- double delta = get_sync_ipts(ost, ist->pts) * enc->sample_rate - ost->sync_opts -
+ double delta = get_sync_ipts(ost, ipts) * enc->sample_rate - ost->sync_opts -
av_fifo_size(ost->fifo) / (enc->channels * osize);
int idelta = delta * dec->sample_rate / enc->sample_rate;
int byte_delta = idelta * isize * dec->channels;
// FIXME resample delay
- if (fabs(delta) > 50) {
+ // TODO resample delay should be ported to lavfi
+ if (!ost->filter && fabs(delta) > 50) {
if (ist->is_start || fabs(delta) > audio_drift_threshold*enc->sample_rate) {
if (byte_delta < 0) {
byte_delta = FFMAX(byte_delta, -size);
@@ -1708,9 +1764,10 @@ static void do_audio_out(AVFormatContext *s, OutputStream *ost,
swr_set_compensation(ost->swr, comp, enc->sample_rate);
}
}
- } else if (audio_sync_method == 0)
- ost->sync_opts = lrintf(get_sync_ipts(ost, ist->pts) * enc->sample_rate) -
+ } else if (audio_sync_method == 0) {
+ ost->sync_opts = lrintf(get_sync_ipts(ost, ipts) * enc->sample_rate) -
av_fifo_size(ost->fifo) / (enc->channels * osize); // FIXME wrong
+ }
if (ost->audio_resample || ost->audio_channels_mapped) {
buftmp = audio_buf;
@@ -2109,6 +2166,10 @@ static int poll_filters(void)
same_quant ? ost->last_quality :
ost->st->codec->global_quality);
break;
+ case AVMEDIA_TYPE_AUDIO:
+ avfilter_fill_frame_from_audio_buffer_ref(filtered_frame, picref);
+ do_audio_out(of->ctx, ost, NULL, filtered_frame);
+ break;
default:
// TODO support audio/subtitle filters
av_assert0(0);
@@ -2551,6 +2612,13 @@ static int transcode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
}
}
+ for (i = 0; i < ist->nb_filters; i++) {
+ if (av_buffersrc_add_frame(ist->filters[i]->filter, decoded_frame, 0) < 0) {
+ av_log(NULL, AV_LOG_FATAL, "Failed ton inject audio samples into filter network\n");
+ exit_program(1);
+ }
+ }
+
rate_emu_sleep(ist);
for (i = 0; i < nb_output_streams; i++) {
@@ -2918,6 +2986,37 @@ static InputStream *get_input_stream(OutputStream *ost)
return NULL;
}
+static AVCodecContext *get_input_codec(OutputStream *ost)
+{
+ if (ost->source_index >= 0) {
+ InputStream *ist = input_streams[ost->source_index];
+ if (ist)
+ return ist->st->codec;
+ }
+
+ if (ost->filter) {
+ return ost->filter->codec;
+ }
+
+ return NULL;
+}
+
+static AVRational get_input_stream_time_base(OutputStream *ost)
+{
+ if (ost->source_index >= 0) {
+ InputStream *ist = input_streams[ost->source_index];
+ if (ist)
+ return ist->st->time_base;
+ }
+
+ if (ost->filter) {
+ return ost->filter->filter->inputs[0]->time_base;
+ }
+
+ return (AVRational) { 0, 0 };
+
+}
+
static int transcode_init(void)
{
int ret = 0, i, j, k;
@@ -2947,9 +3046,30 @@ static int transcode_init(void)
}
/* init complex filtergraphs */
- for (i = 0; i < nb_filtergraphs; i++)
+ for (i = 0; i < nb_filtergraphs; i++) {
if ((ret = avfilter_graph_config(filtergraphs[i]->graph, NULL)) < 0)
return ret;
+ /*
+ * Fill link information into output codec context
+ * The codec context will be used as decoder codec context in do_audio_out
+ */
+ for (j = 0; j < filtergraphs[i]->nb_outputs; j++) {
+ OutputFilter *ofilter = filtergraphs[i]->outputs[j];
+ AVFilterLink *olink = ofilter->filter->inputs[0];
+ if (olink->type == AVMEDIA_TYPE_AUDIO) {
+ ofilter->codec->codec_id = av_get_pcm_codec(olink->format, -1);
+ ofilter->codec->channels = av_get_channel_layout_nb_channels(olink->channel_layout);
+ ofilter->codec->sample_fmt = olink->format;
+ ofilter->codec->sample_rate = olink->sample_rate;
+ ofilter->codec->time_base = olink->time_base;
+ ofilter->codec->channel_layout = olink->channel_layout;
+ if (ofilter->codec->codec_id == CODEC_ID_NONE)
+ av_log(NULL, AV_LOG_ERROR,
+ "Could not find PCM codec for sample format %s.\n",
+ av_get_sample_fmt_name(olink->format));
+ }
+ }
+ }
/* for each output stream, we compute the right encoding parameters */
for (i = 0; i < nb_output_streams; i++) {
@@ -2961,10 +3081,9 @@ static int transcode_init(void)
continue;
codec = ost->st->codec;
+ icodec = get_input_codec(ost);
if (ist) {
- icodec = ist->st->codec;
-
ost->st->disposition = ist->st->disposition;
codec->bits_per_raw_sample = icodec->bits_per_raw_sample;
codec->chroma_sample_location = icodec->chroma_sample_location;
@@ -2972,6 +3091,7 @@ static int transcode_init(void)
if (ost->stream_copy) {
uint64_t extra_size;
+ AVRational ist_tb;
av_assert0(ist && !ost->filter);
@@ -3003,15 +3123,15 @@ static int transcode_init(void)
memcpy(codec->extradata, icodec->extradata, icodec->extradata_size);
codec->extradata_size= icodec->extradata_size;
- codec->time_base = ist->st->time_base;
+ codec->time_base = ist_tb = get_input_stream_time_base(ost);
/*
* Avi is a special case here because it supports variable fps but
* having the fps and timebase differe significantly adds quite some
* overhead
*/
if(!strcmp(oc->oformat->name, "avi")) {
- if ( copy_tb<0 && av_q2d(icodec->time_base)*icodec->ticks_per_frame > 2*av_q2d(ist->st->time_base)
- && av_q2d(ist->st->time_base) < 1.0/500
+ if ( copy_tb<0 && av_q2d(icodec->time_base)*icodec->ticks_per_frame > 2*av_q2d(ist_tb)
+ && av_q2d(ist_tb) < 1.0/500
|| copy_tb==0){
codec->time_base = icodec->time_base;
codec->time_base.num *= icodec->ticks_per_frame;
@@ -3022,8 +3142,8 @@ static int transcode_init(void)
&& strcmp(oc->oformat->name, "mov") && strcmp(oc->oformat->name, "mp4") && strcmp(oc->oformat->name, "3gp")
&& strcmp(oc->oformat->name, "3g2") && strcmp(oc->oformat->name, "psp") && strcmp(oc->oformat->name, "ipod")
) {
- if( copy_tb<0 && av_q2d(icodec->time_base)*icodec->ticks_per_frame > av_q2d(ist->st->time_base)
- && av_q2d(ist->st->time_base) < 1.0/500
+ if( copy_tb<0 && av_q2d(icodec->time_base)*icodec->ticks_per_frame > av_q2d(ist_tb)
+ && av_q2d(ist_tb) < 1.0/500
|| copy_tb==0){
codec->time_base = icodec->time_base;
codec->time_base.num *= icodec->ticks_per_frame;
@@ -4904,12 +5024,14 @@ static void init_output_filter(OutputFilter *ofilter, OptionsContext *o,
{
OutputStream *ost;
- if (ofilter->out_tmp->filter_ctx->output_pads[ofilter->out_tmp->pad_idx].type != AVMEDIA_TYPE_VIDEO) {
- av_log(NULL, AV_LOG_FATAL, "Only video filters are supported currently.\n");
+ switch (ofilter->out_tmp->filter_ctx->output_pads[ofilter->out_tmp->pad_idx].type) {
+ case AVMEDIA_TYPE_VIDEO: ost = new_video_stream(o, oc, -1); break;
+ case AVMEDIA_TYPE_AUDIO: ost = new_audio_stream(o, oc, -1); break;
+ default:
+ av_log(NULL, AV_LOG_ERROR, "Only audio and video filters are currently supported\n");
exit_program(1);
}
- ost = new_video_stream(o, oc, -1);
ost->source_index = -1;
ost->filter = ofilter;
--
1.7.10
More information about the ffmpeg-devel
mailing list