[FFmpeg-devel] [PATCH] Parallelize vf_lut
Michael Niedermayer
michael at niedermayer.cc
Wed Feb 27 17:28:05 EET 2019
On Mon, Feb 25, 2019 at 03:25:30PM -0500, Britt Cyr wrote:
> ---
> libavfilter/vf_lut.c | 106 ++++++++++++++++++++++++++++---------------
> 1 file changed, 70 insertions(+), 36 deletions(-)
>
> diff --git a/libavfilter/vf_lut.c b/libavfilter/vf_lut.c
> index c815ddc194..14386938be 100644
> --- a/libavfilter/vf_lut.c
> +++ b/libavfilter/vf_lut.c
> @@ -72,6 +72,12 @@ typedef struct LutContext {
> int negate_alpha; /* only used by negate */
> } LutContext;
>
> +typedef struct ThreadData {
> + AVFrame *in;
> + AVFrame *out;
> + AVFilterLink *link;
> +} ThreadData;
indention depth is inconsistant
[...]
> @@ -366,11 +359,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> const int in_linesize = in->linesize[0] / 2;
> const int out_linesize = out->linesize[0] / 2;
> const int step = s->step;
> + const int row_min = jobnr / nb_jobs * h;
> + const int row_max = (jobnr + 1) / nb_jobs * h;
>
> inrow0 = (uint16_t*) in ->data[0];
> outrow0 = (uint16_t*) out->data[0];
>
> - for (i = 0; i < h; i ++) {
> + for (i = row_min; i < row_max; i ++) {
> inrow = inrow0;
> outrow = outrow0;
> for (j = 0; j < w; j++) {
> @@ -403,11 +398,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> const int in_linesize = in->linesize[0];
> const int out_linesize = out->linesize[0];
> const int step = s->step;
> + const int row_min = jobnr / nb_jobs * h;
> + const int row_max = (jobnr + 1) / nb_jobs * h;
>
> inrow0 = in ->data[0];
> outrow0 = out->data[0];
>
> - for (i = 0; i < h; i ++) {
> + for (i = row_min; i < row_max; i ++) {
> inrow = inrow0;
> outrow = outrow0;
> for (j = 0; j < w; j++) {
> @@ -435,11 +432,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> const uint16_t *tab = s->lut[plane];
> const int in_linesize = in->linesize[plane] / 2;
> const int out_linesize = out->linesize[plane] / 2;
> + const int row_min = jobnr / nb_jobs * h;
> + const int row_max = (jobnr + 1) / nb_jobs * h;
>
> inrow = (uint16_t *)in ->data[plane];
> outrow = (uint16_t *)out->data[plane];
>
> - for (i = 0; i < h; i++) {
> + for (i = row_min; i < row_max; i++) {
> for (j = 0; j < w; j++) {
> #if HAVE_BIGENDIAN
> outrow[j] = av_bswap16(tab[av_bswap16(inrow[j])]);
> @@ -463,11 +462,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> const uint16_t *tab = s->lut[plane];
> const int in_linesize = in->linesize[plane];
> const int out_linesize = out->linesize[plane];
> + const int row_min = jobnr / nb_jobs * h;
> + const int row_max = (jobnr + 1) / nb_jobs * h;
>
> inrow = in ->data[plane];
> outrow = out->data[plane];
>
> - for (i = 0; i < h; i++) {
> + for (i = row_min; i < row_max; i++) {
> for (j = 0; j < w; j++)
> outrow[j] = tab[inrow[j]];
> inrow += in_linesize;
unreaĺated to your patch, i just spoted this as it makes it obvious
replicating this code 4 times is a bit ugly
> @@ -476,9 +477,42 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> }
> }
>
> - if (!direct)
> + return 0;
> +}
> +
> +static AVFrame *apply_lut(AVFilterLink *inlink, AVFrame *in) {
> + AVFilterContext *ctx = inlink->dst;
> + AVFilterLink *outlink = ctx->outputs[0];
> + AVFrame *out;
> + ThreadData td;
> +
> + if (av_frame_is_writable(in)) {
> + out = in;
> + } else {
> + out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> + if (!out) {
> + av_frame_free(&in);
> + return NULL;
> + }
> + av_frame_copy_props(out, in);
> + }
> + td.in = in;
> + td.out = out;
> + td.link = inlink;
> + ctx->internal->execute(ctx, lookup_slice, &td, NULL, FFMIN(outlink->h, 1));
how many tasks does this run in parallel and how much faster is it ?
thanks
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Its not that you shouldnt use gotos but rather that you should write
readable code and code with gotos often but not always is less readable
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20190227/325cbee5/attachment.sig>
More information about the ffmpeg-devel
mailing list