[FFmpeg-devel] [PATCH] Parallelize vf_lut
Britt Cyr
cyr at google.com
Thu Feb 28 21:35:52 EET 2019
This will use ff_filter_get_nb_threads(ctx) threads which was 4x
faster for when I was testing on a 4K video
---
libavfilter/vf_lut.c | 106 ++++++++++++++++++++++++++++---------------
1 file changed, 70 insertions(+), 36 deletions(-)
diff --git a/libavfilter/vf_lut.c b/libavfilter/vf_lut.c
index c815ddc194..9e5527e4a1 100644
--- a/libavfilter/vf_lut.c
+++ b/libavfilter/vf_lut.c
@@ -72,6 +72,12 @@ typedef struct LutContext {
int negate_alpha; /* only used by negate */
} LutContext;
+typedef struct ThreadData {
+ AVFrame *in;
+ AVFrame *out;
+ AVFilterLink *link;
+} ThreadData;
+
#define Y 0
#define U 1
#define V 2
@@ -337,26 +343,13 @@ static int config_props(AVFilterLink *inlink)
return 0;
}
-static int filter_frame(AVFilterLink *inlink, AVFrame *in)
-{
- AVFilterContext *ctx = inlink->dst;
+static int lookup_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) {
LutContext *s = ctx->priv;
- AVFilterLink *outlink = ctx->outputs[0];
- AVFrame *out;
- int i, j, plane, direct = 0;
-
- if (av_frame_is_writable(in)) {
- direct = 1;
- out = in;
- } else {
- out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
- if (!out) {
- av_frame_free(&in);
- return AVERROR(ENOMEM);
- }
- av_frame_copy_props(out, in);
- }
-
+ int i, j, plane = 0;
+ const ThreadData *td = arg;
+ const AVFrame *in = td->in;
+ AVFrame *out = td->out;
+ const AVFilterLink *inlink = td->link;
if (s->is_rgb && s->is_16bit && !s->is_planar) {
/* packed, 16-bit */
uint16_t *inrow, *outrow, *inrow0, *outrow0;
@@ -366,11 +359,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
const int in_linesize = in->linesize[0] / 2;
const int out_linesize = out->linesize[0] / 2;
const int step = s->step;
+ const int row_min = jobnr / nb_jobs * h;
+ const int row_max = (jobnr + 1) / nb_jobs * h;
inrow0 = (uint16_t*) in ->data[0];
outrow0 = (uint16_t*) out->data[0];
- for (i = 0; i < h; i ++) {
+ for (i = row_min; i < row_max; i ++) {
inrow = inrow0;
outrow = outrow0;
for (j = 0; j < w; j++) {
@@ -403,11 +398,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
const int in_linesize = in->linesize[0];
const int out_linesize = out->linesize[0];
const int step = s->step;
+ const int row_min = jobnr / nb_jobs * h;
+ const int row_max = (jobnr + 1) / nb_jobs * h;
inrow0 = in ->data[0];
outrow0 = out->data[0];
- for (i = 0; i < h; i ++) {
+ for (i = row_min; i < row_max; i ++) {
inrow = inrow0;
outrow = outrow0;
for (j = 0; j < w; j++) {
@@ -435,11 +432,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
const uint16_t *tab = s->lut[plane];
const int in_linesize = in->linesize[plane] / 2;
const int out_linesize = out->linesize[plane] / 2;
+ const int row_min = jobnr / nb_jobs * h;
+ const int row_max = (jobnr + 1) / nb_jobs * h;
inrow = (uint16_t *)in ->data[plane];
outrow = (uint16_t *)out->data[plane];
- for (i = 0; i < h; i++) {
+ for (i = row_min; i < row_max; i++) {
for (j = 0; j < w; j++) {
#if HAVE_BIGENDIAN
outrow[j] = av_bswap16(tab[av_bswap16(inrow[j])]);
@@ -463,11 +462,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
const uint16_t *tab = s->lut[plane];
const int in_linesize = in->linesize[plane];
const int out_linesize = out->linesize[plane];
+ const int row_min = jobnr / nb_jobs * h;
+ const int row_max = (jobnr + 1) / nb_jobs * h;
inrow = in ->data[plane];
outrow = out->data[plane];
- for (i = 0; i < h; i++) {
+ for (i = row_min; i < row_max; i++) {
for (j = 0; j < w; j++)
outrow[j] = tab[inrow[j]];
inrow += in_linesize;
@@ -476,9 +477,42 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
}
}
- if (!direct)
+ return 0;
+}
+
+static AVFrame *apply_lut(AVFilterLink *inlink, AVFrame *in) {
+ AVFilterContext *ctx = inlink->dst;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AVFrame *out;
+ ThreadData td;
+
+ if (av_frame_is_writable(in)) {
+ out = in;
+ } else {
+ out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ av_frame_free(&in);
+ return NULL;
+ }
+ av_frame_copy_props(out, in);
+ }
+ td.in = in;
+ td.out = out;
+ td.link = inlink;
+ ctx->internal->execute(ctx, lookup_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
+
+ if (out != in)
av_frame_free(&in);
+ return out;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+ AVFilterLink *outlink = inlink->dst->outputs[0];
+ AVFrame *out = apply_lut(inlink, in);
+ if (!out)
+ return AVERROR(ENOMEM);
return ff_filter_frame(outlink, out);
}
@@ -497,18 +531,18 @@ static const AVFilterPad outputs[] = {
{ NULL }
};
-#define DEFINE_LUT_FILTER(name_, description_) \
- AVFilter ff_vf_##name_ = { \
- .name = #name_, \
- .description = NULL_IF_CONFIG_SMALL(description_), \
- .priv_size = sizeof(LutContext), \
- .priv_class = &name_ ## _class, \
- .init = name_##_init, \
- .uninit = uninit, \
- .query_formats = query_formats, \
- .inputs = inputs, \
- .outputs = outputs, \
- .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, \
+#define DEFINE_LUT_FILTER(name_, description_) \
+ AVFilter ff_vf_##name_ = { \
+ .name = #name_, \
+ .description = NULL_IF_CONFIG_SMALL(description_), \
+ .priv_size = sizeof(LutContext), \
+ .priv_class = &name_ ## _class, \
+ .init = name_##_init, \
+ .uninit = uninit, \
+ .query_formats = query_formats, \
+ .inputs = inputs, \
+ .outputs = outputs, \
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, \
}
#if CONFIG_LUT_FILTER
--
2.21.0.rc2.261.ga7da99ff1b-goog
More information about the ffmpeg-devel
mailing list