[FFmpeg-devel] [PATCH v3] Improved the performance of 1 decode + N filter graphs and adaptive bitrate.
Shaofei Wang
shaofei.wang at intel.com
Wed Jan 16 23:17:07 EET 2019
With new option "-abr_pipeline"
It enabled multiple filter graph concurrency, which bring obove about
4%~20% improvement in some 1:N scenarios by CPU or GPU acceleration
Below are some test cases and comparison as reference.
(Hardware platform: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz)
(Software: Intel iHD driver - 16.9.00100, CentOS 7)
For 1:N transcode by GPU acceleration with vaapi:
./ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
-hwaccel_output_format vaapi \
-i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
-vf "scale_vaapi=1280:720" -c:v h264_vaapi -f null /dev/null \
-vf "scale_vaapi=720:480" -c:v h264_vaapi -f null /dev/null \
-abr_pipeline
test results:
2 encoders 5 encoders 10 encoders
Improved 6.1% 6.9% 5.5%
For 1:N transcode by GPU acceleration with QSV:
./ffmpeg -hwaccel qsv -c:v h264_qsv \
-i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
-vf "scale_qsv=1280:720:format=nv12" -c:v h264_qsv -f null /dev/null \
-vf "scale_qsv=720:480:format=nv12" -c:v h264_qsv -f null /dev/null
test results:
2 encoders 5 encoders 10 encoders
Improved 6% 4% 15%
For Intel GPU acceleration case, 1 decode to N scaling, by QSV:
./ffmpeg -hwaccel qsv -c:v h264_qsv \
-i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
-vf "scale_qsv=1280:720:format=nv12,hwdownload" -pix_fmt nv12 -f null /dev/null \
-vf "scale_qsv=720:480:format=nv12,hwdownload" -pix_fmt nv12 -f null /dev/null
test results:
2 scale 5 scale 10 scale
Improved 12% 21% 21%
For CPU only 1 decode to N scaling:
./ffmpeg -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
-vf "scale=1280:720" -pix_fmt nv12 -f null /dev/null \
-vf "scale=720:480" -pix_fmt nv12 -f null /dev/null \
-abr_pipeline
test results:
2 scale 5 scale 10 scale
Improved 25% 107% 148%
Signed-off-by: Wang, Shaofei <shaofei.wang at intel.com>
Reviewed-by: Zhao, Jun <jun.zhao at intel.com>
---
fftools/ffmpeg.c | 228 ++++++++++++++++++++++++++++++++++++++++++++----
fftools/ffmpeg.h | 15 ++++
fftools/ffmpeg_filter.c | 4 +
fftools/ffmpeg_opt.c | 6 +-
4 files changed, 237 insertions(+), 16 deletions(-)
diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 544f1a1..7dbff15 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -1523,6 +1523,109 @@ static int reap_filters(int flush)
return 0;
}
+static int pipeline_reap_filters(int flush, InputFilter * ifilter)
+{
+ AVFrame *filtered_frame = NULL;
+ int i;
+
+ for (i = 0; i < nb_output_streams; i++) {
+ if (ifilter == output_streams[i]->filter->graph->inputs[0]) break;
+ }
+ OutputStream *ost = output_streams[i];
+ OutputFile *of = output_files[ost->file_index];
+ AVFilterContext *filter;
+ AVCodecContext *enc = ost->enc_ctx;
+ int ret = 0;
+
+ if (!ost->filter || !ost->filter->graph->graph)
+ return 0;
+ filter = ost->filter->filter;
+
+ if (!ost->initialized) {
+ char error[1024] = "";
+ ret = init_output_stream(ost, error, sizeof(error));
+ if (ret < 0) {
+ av_log(NULL, AV_LOG_ERROR, "Error initializing output stream %d:%d -- %s\n",
+ ost->file_index, ost->index, error);
+ exit_program(1);
+ }
+ }
+
+ if (!ost->filtered_frame && !(ost->filtered_frame = av_frame_alloc()))
+ return AVERROR(ENOMEM);
+ filtered_frame = ost->filtered_frame;
+
+ while (1) {
+ double float_pts = AV_NOPTS_VALUE; // this is identical to filtered_frame.pts but with higher precision
+ ret = av_buffersink_get_frame_flags(filter, filtered_frame,
+ AV_BUFFERSINK_FLAG_NO_REQUEST);
+ if (ret < 0) {
+ if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) {
+ av_log(NULL, AV_LOG_WARNING,
+ "Error in av_buffersink_get_frame_flags(): %s\n", av_err2str(ret));
+ } else if (flush && ret == AVERROR_EOF) {
+ if (av_buffersink_get_type(filter) == AVMEDIA_TYPE_VIDEO)
+ do_video_out(of, ost, NULL, AV_NOPTS_VALUE);
+ }
+ break;
+ }
+ if (ost->finished) {
+ av_frame_unref(filtered_frame);
+ continue;
+ }
+ if (filtered_frame->pts != AV_NOPTS_VALUE) {
+ int64_t start_time = (of->start_time == AV_NOPTS_VALUE) ? 0 : of->start_time;
+ AVRational filter_tb = av_buffersink_get_time_base(filter);
+ AVRational tb = enc->time_base;
+ int extra_bits = av_clip(29 - av_log2(tb.den), 0, 16);
+
+ tb.den <<= extra_bits;
+ float_pts =
+ av_rescale_q(filtered_frame->pts, filter_tb, tb) -
+ av_rescale_q(start_time, AV_TIME_BASE_Q, tb);
+ float_pts /= 1 << extra_bits;
+ // avoid exact midoints to reduce the chance of rounding differences, this can be removed in case the fps code is changed to work with integers
+ float_pts += FFSIGN(float_pts) * 1.0 / (1<<17);
+
+ filtered_frame->pts =
+ av_rescale_q(filtered_frame->pts, filter_tb, enc->time_base) -
+ av_rescale_q(start_time, AV_TIME_BASE_Q, enc->time_base);
+ }
+
+ switch (av_buffersink_get_type(filter)) {
+ case AVMEDIA_TYPE_VIDEO:
+ if (!ost->frame_aspect_ratio.num)
+ enc->sample_aspect_ratio = filtered_frame->sample_aspect_ratio;
+
+ if (debug_ts) {
+ av_log(NULL, AV_LOG_INFO, "filter -> pts:%s pts_time:%s exact:%f time_base:%d/%d\n",
+ av_ts2str(filtered_frame->pts), av_ts2timestr(filtered_frame->pts, &enc->time_base),
+ float_pts,
+ enc->time_base.num, enc->time_base.den);
+ }
+
+ do_video_out(of, ost, filtered_frame, float_pts);
+ break;
+ case AVMEDIA_TYPE_AUDIO:
+ if (!(enc->codec->capabilities & AV_CODEC_CAP_PARAM_CHANGE) &&
+ enc->channels != filtered_frame->channels) {
+ av_log(NULL, AV_LOG_ERROR,
+ "Audio filter graph output is not normalized and encoder does not support parameter changes\n");
+ break;
+ }
+ do_audio_out(of, ost, filtered_frame);
+ break;
+ default:
+ // TODO support subtitle filters
+ av_assert0(0);
+ }
+
+ av_frame_unref(filtered_frame);
+ }
+
+ return 0;
+}
+
static void print_final_stats(int64_t total_size)
{
uint64_t video_size = 0, audio_size = 0, extra_size = 0, other_size = 0;
@@ -2179,7 +2282,8 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame)
}
}
- ret = reap_filters(1);
+ ret = abr_pipeline ? pipeline_reap_filters(1, ifilter) : reap_filters(1);
+
if (ret < 0 && ret != AVERROR_EOF) {
av_log(NULL, AV_LOG_ERROR, "Error while filtering: %s\n", av_err2str(ret));
return ret;
@@ -2208,6 +2312,16 @@ static int ifilter_send_eof(InputFilter *ifilter, int64_t pts)
ifilter->eof = 1;
+#if HAVE_THREADS
+ if (abr_pipeline) {
+ ifilter->waited_frm = NULL;
+ pthread_mutex_lock(&ifilter->process_mutex);
+ ifilter->t_end = 1;
+ pthread_cond_signal(&ifilter->process_cond);
+ pthread_mutex_unlock(&ifilter->process_mutex);
+ pthread_join(ifilter->f_thread, NULL);
+ }
+#endif
if (ifilter->filter) {
ret = av_buffersrc_close(ifilter->filter, pts, AV_BUFFERSRC_FLAG_PUSH);
if (ret < 0)
@@ -2252,6 +2366,41 @@ static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacke
return 0;
}
+#if HAVE_THREADS
+static void *filter_pipeline(void *arg)
+{
+ InputFilter *fl = arg;
+ AVFrame *frm;
+ int ret;
+ while(1) {
+ pthread_mutex_lock(&fl->process_mutex);
+ while (fl->waited_frm == NULL && !fl->t_end)
+ pthread_cond_wait(&fl->process_cond, &fl->process_mutex);
+ pthread_mutex_unlock(&fl->process_mutex);
+
+ if (fl->t_end) break;
+
+ frm = fl->waited_frm;
+ ret = ifilter_send_frame(fl, frm);
+ if (ret < 0) {
+ av_log(NULL, AV_LOG_ERROR,
+ "Failed to inject frame into filter network: %s\n", av_err2str(ret));
+ } else {
+ ret = pipeline_reap_filters(0, fl);
+ }
+ fl->t_error = ret;
+
+ pthread_mutex_lock(&fl->finish_mutex);
+ fl->waited_frm = NULL;
+ pthread_cond_signal(&fl->finish_cond);
+ pthread_mutex_unlock(&fl->finish_mutex);
+
+ if (ret < 0)
+ break;
+ }
+ return fl;
+}
+#endif
static int send_frame_to_filters(InputStream *ist, AVFrame *decoded_frame)
{
int i, ret;
@@ -2259,22 +2408,71 @@ static int send_frame_to_filters(InputStream *ist, AVFrame *decoded_frame)
av_assert1(ist->nb_filters > 0); /* ensure ret is initialized */
for (i = 0; i < ist->nb_filters; i++) {
- if (i < ist->nb_filters - 1) {
- f = ist->filter_frame;
- ret = av_frame_ref(f, decoded_frame);
- if (ret < 0)
+ if (!abr_pipeline) {
+ if (i < ist->nb_filters - 1) {
+ f = ist->filter_frame;
+ ret = av_frame_ref(f, decoded_frame);
+ if (ret < 0)
+ break;
+ } else
+ f = decoded_frame;
+
+ ret = ifilter_send_frame(ist->filters[i], f);
+ if (ret == AVERROR_EOF)
+ ret = 0; /* ignore */
+ if (ret < 0) {
+ av_log(NULL, AV_LOG_ERROR,
+ "Failed to inject frame into filter network: %s\n", av_err2str(ret));
+ break;
+ }
+ } else {
+#if HAVE_THREADS
+ if (i < ist->nb_filters - 1) {
+ f = &ist->filters[i]->input_frm;
+ ret = av_frame_ref(f, decoded_frame);
+ if (ret < 0)
+ break;
+ } else
+ f = decoded_frame;
+
+ if (!ist->filters[i]->b_abr_thread_init) {
+ if ((ret = pthread_create(&ist->filters[i]->f_thread, NULL, filter_pipeline, ist->filters[i]))) {
+ av_log(NULL, AV_LOG_ERROR, "pthread_create failed: %s. Try to increase `ulimit -v` or decrease `ulimit -s`.\n", strerror(ret));
+ return AVERROR(ret);
+ }
+ pthread_mutex_init(&ist->filters[i]->process_mutex, NULL);
+ pthread_mutex_init(&ist->filters[i]->finish_mutex, NULL);
+ pthread_cond_init(&ist->filters[i]->process_cond, NULL);
+ pthread_cond_init(&ist->filters[i]->finish_cond, NULL);
+ ist->filters[i]->t_end = 0;
+ ist->filters[i]->t_error = 0;
+ ist->filters[i]->b_abr_thread_init = 1;
+ }
+
+ pthread_mutex_lock(&ist->filters[i]->process_mutex);
+ ist->filters[i]->waited_frm = f;
+ pthread_cond_signal(&ist->filters[i]->process_cond);
+ pthread_mutex_unlock(&ist->filters[i]->process_mutex);
+#endif
+ }
+ }
+#if HAVE_THREADS
+ if (abr_pipeline) {
+ for (i = 0; i < ist->nb_filters; i++) {
+ pthread_mutex_lock(&ist->filters[i]->finish_mutex);
+ while(ist->filters[i]->waited_frm != NULL)
+ pthread_cond_wait(&ist->filters[i]->finish_cond, &ist->filters[i]->finish_mutex);
+ pthread_mutex_unlock(&ist->filters[i]->finish_mutex);
+ }
+ for (i = 0; i < ist->nb_filters; i++) {
+ if (ist->filters[i]->t_error < 0) {
+ ret = ist->filters[i]->t_error;
break;
- } else
- f = decoded_frame;
- ret = ifilter_send_frame(ist->filters[i], f);
- if (ret == AVERROR_EOF)
- ret = 0; /* ignore */
- if (ret < 0) {
- av_log(NULL, AV_LOG_ERROR,
- "Failed to inject frame into filter network: %s\n", av_err2str(ret));
- break;
+ }
}
}
+#endif
+
return ret;
}
@@ -4642,7 +4840,7 @@ static int transcode_step(void)
if (ret < 0)
return ret == AVERROR_EOF ? 0 : ret;
- return reap_filters(0);
+ return abr_pipeline ? 0 : reap_filters(0);
}
/*
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index eb1eaf6..110306a 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -253,6 +253,20 @@ typedef struct InputFilter {
AVBufferRef *hw_frames_ctx;
+ // for abr pipeline
+ int b_abr_thread_init;
+#if HAVE_THREADS
+ AVFrame *waited_frm;
+ AVFrame input_frm;
+ pthread_t f_thread;
+ pthread_cond_t process_cond;
+ pthread_cond_t finish_cond;
+ pthread_mutex_t process_mutex;
+ pthread_mutex_t finish_mutex;
+ int t_end;
+ int t_error;
+#endif
+
int eof;
} InputFilter;
@@ -606,6 +620,7 @@ extern int frame_bits_per_raw_sample;
extern AVIOContext *progress_avio;
extern float max_error_rate;
extern char *videotoolbox_pixfmt;
+extern int abr_pipeline;
extern int filter_nbthreads;
extern int filter_complex_nbthreads;
diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
index 6518d50..8f14fbc 100644
--- a/fftools/ffmpeg_filter.c
+++ b/fftools/ffmpeg_filter.c
@@ -197,6 +197,7 @@ DEF_CHOOSE_FORMAT(channel_layouts, uint64_t, channel_layout, channel_layouts, 0,
int init_simple_filtergraph(InputStream *ist, OutputStream *ost)
{
FilterGraph *fg = av_mallocz(sizeof(*fg));
+ int i;
if (!fg)
exit_program(1);
@@ -225,6 +226,9 @@ int init_simple_filtergraph(InputStream *ist, OutputStream *ost)
GROW_ARRAY(ist->filters, ist->nb_filters);
ist->filters[ist->nb_filters - 1] = fg->inputs[0];
+ if (abr_pipeline)
+ for (i = 0; i < ist->nb_filters; i++)
+ ist->filters[i]->b_abr_thread_init = 0;
GROW_ARRAY(filtergraphs, nb_filtergraphs);
filtergraphs[nb_filtergraphs - 1] = fg;
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index d4851a2..fa5a556 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -110,6 +110,7 @@ float max_error_rate = 2.0/3;
int filter_nbthreads = 0;
int filter_complex_nbthreads = 0;
int vstats_version = 2;
+int abr_pipeline = 0;
static int intra_only = 0;
@@ -3502,7 +3503,10 @@ const OptionDef options[] = {
"set the maximum number of queued packets from the demuxer" },
{ "find_stream_info", OPT_BOOL | OPT_PERFILE | OPT_INPUT | OPT_EXPERT, { &find_stream_info },
"read and decode the streams to fill missing information with heuristics" },
-
+#if HAVE_THREADS
+ { "abr_pipeline", OPT_BOOL, { &abr_pipeline },
+ "adaptive bitrate pipeline (1 decode to N filter graphs, and 1 to N transcode" },
+#endif
/* video options */
{ "vframes", OPT_VIDEO | HAS_ARG | OPT_PERFILE | OPT_OUTPUT, { .func_arg = opt_video_frames },
"set the number of video frames to output", "number" },
--
1.8.3.1
More information about the ffmpeg-devel
mailing list