[FFmpeg-devel] [PATCH 1/2] avfilter/vf_zscale: add slice threading

Fri May 31 23:17:15 EEST 2019

On 5/31/19, Pavel Koshevoy <pkoshevoy at gmail.com> wrote:
> On Fri, May 31, 2019 at 2:03 PM Paul B Mahol <onemda at gmail.com> wrote:
>>
>> On 5/31/19, Pavel Koshevoy <pkoshevoy at gmail.com> wrote:
>> > On Fri, May 31, 2019 at 1:44 PM Pavel Koshevoy <pkoshevoy at gmail.com>
>> > wrote:
>> >>
>> >>
>> >>
>> >>
>> >> On Fri, May 31, 2019 at 4:46 AM Paul B Mahol <onemda at gmail.com> wrote:
>> >> >
>> >> > Signed-off-by: Paul B Mahol <onemda at gmail.com>
>> >> > ---
>> >> >  libavfilter/vf_zscale.c | 335
>> >> > +++++++++++++++++++++++++---------------
>> >> >  1 file changed, 214 insertions(+), 121 deletions(-)
>> >> >
>> >> > diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
>> >> > index f0309272fa..c53bb08ccc 100644
>> >> > --- a/libavfilter/vf_zscale.c
>> >> > +++ b/libavfilter/vf_zscale.c
>> >> > @@ -74,6 +74,16 @@ enum var_name {
>> >> >      VARS_NB
>> >> >  };
>> >> >
>> >> > +typedef struct ZScaleThreadContext {
>> >> > +    void *tmp;
>> >> > +    size_t tmp_size;
>> >> > +
>> >> > +    zimg_image_format src_format, dst_format;
>> >> > +    zimg_image_format alpha_src_format, alpha_dst_format;
>> >> > +    zimg_graph_builder_params alpha_params, params;
>> >> > +    zimg_filter_graph *alpha_graph, *graph;
>> >> > +} ZScaleThreadContext;
>> >> > +
>> >> >  typedef struct ZScaleContext {
>> >> >      const AVClass *class;
>> >> >
>> >> > @@ -100,6 +110,8 @@ typedef struct ZScaleContext {
>> >> >      double nominal_peak_luminance;
>> >> >      int approximate_gamma;
>> >> >
>> >> > +    int nb_threads;
>> >> > +
>> >> >      char *w_expr;               ///< width  expression string
>> >> >      char *h_expr;               ///< height expression string
>> >> >
>> >> > @@ -110,13 +122,7 @@ typedef struct ZScaleContext {
>> >> >
>> >> >      int force_original_aspect_ratio;
>> >> >
>> >> > -    void *tmp;
>> >> > -    size_t tmp_size;
>> >> > -
>> >> > -    zimg_image_format src_format, dst_format;
>> >> > -    zimg_image_format alpha_src_format, alpha_dst_format;
>> >> > -    zimg_graph_builder_params alpha_params, params;
>> >> > -    zimg_filter_graph *alpha_graph, *graph;
>> >> > +    ZScaleThreadContext *ztd;
>> >> >
>> >> >      enum AVColorSpace in_colorspace, out_colorspace;
>> >> >      enum AVColorTransferCharacteristic in_trc, out_trc;
>> >> > @@ -204,6 +210,12 @@ static int config_props(AVFilterLink *outlink)
>> >> >      int ret;
>> >> >      int factor_w, factor_h;
>> >> >
>> >> > +    s->nb_threads = ff_filter_get_nb_threads(ctx);
>> >> > +    av_freep(&s->ztd);
>> >> > +    s->ztd = av_calloc(s->nb_threads, sizeof(*s->ztd));
>> >> > +    if (!s->ztd)
>> >> > +        return AVERROR(ENOMEM);
>> >> > +
>> >> >      var_values[VAR_IN_W]  = var_values[VAR_IW] = inlink->w;
>> >> >      var_values[VAR_IN_H]  = var_values[VAR_IH] = inlink->h;
>> >> >      var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN;
>> >> > @@ -458,10 +470,12 @@ static int convert_range(enum AVColorRange
>> >> > color_range)
>> >> >  }
>> >> >
>> >> >  static void format_init(zimg_image_format *format, AVFrame *frame,
>> >> > const AVPixFmtDescriptor *desc,
>> >> > -                        int colorspace, int primaries, int transfer,
>> >> > int range, int location)
>> >> > +                        int colorspace, int primaries, int transfer,
>> >> > int range, int location,
>> >> > +                        int width, int height,
>> >> > +                        double active_top, double active_height, int
>> >> > set_active)
>> >> >  {
>> >> > -    format->width = frame->width;
>> >> > -    format->height = frame->height;
>> >> > +    format->width = width;
>> >> > +    format->height = height;
>> >> >      format->subsample_w = desc->log2_chroma_w;
>> >> >      format->subsample_h = desc->log2_chroma_h;
>> >> >      format->depth = desc->comp[0].depth;
>> >> > @@ -472,6 +486,10 @@ static void format_init(zimg_image_format
>> >> > *format,
>> >> > AVFrame *frame, const AVPixFm
>> >> >      format->transfer_characteristics = transfer == - 1 ?
>> >> > convert_trc(frame->color_trc) : transfer;
>> >> >      format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ?
>> >> > ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) :
>> >> > range;
>> >> >      format->chroma_location = location == -1 ?
>> >> > convert_chroma_location(frame->chroma_location) : location;
>> >> > +    if (!set_active)
>> >> > +        return;
>> >> > +    format->active_region.top = active_top;
>> >> > +    format->active_region.height = active_height;
>> >> >  }
>> >> >
>> >> >  static int graph_build(zimg_filter_graph **graph,
>> >> > zimg_graph_builder_params *params,
>> >> > @@ -502,16 +520,163 @@ static int graph_build(zimg_filter_graph
>> >> > **graph,
>> >> > zimg_graph_builder_params *par
>> >> >      return 0;
>> >> >  }
>> >> >
>> >> > +typedef struct ThreadData {
>> >> > +    AVFrame *in, *out;
>> >> > +    const AVPixFmtDescriptor *desc, *odesc;
>> >> > +} ThreadData;
>> >> > +
>> >> > +static int prepare_graph(AVFilterContext *ctx, void *arg, int jobnr,
>> >> > int nb_jobs)
>> >> > +{
>> >> > +    ZScaleContext *s = ctx->priv;
>> >> > +    ThreadData *td = arg;
>> >> > +    AVFrame *in = td->in;
>> >> > +    AVFrame *out = td->out;
>> >> > +    const AVPixFmtDescriptor *desc = td->desc;
>> >> > +    const AVPixFmtDescriptor *odesc = td->odesc;
>> >> > +    const int in_slice_start = (in->height * jobnr) / nb_jobs;
>> >> > +    const int in_slice_end = (in->height * (jobnr+1)) / nb_jobs;
>> >> > +    const int out_slice_start = (out->height * jobnr) / nb_jobs;
>> >> > +    const int out_slice_end = (out->height * (jobnr+1)) / nb_jobs;
>> >> > +    const double scale_h = (double)in->height / (double)out->height;
>> >> > +    double active_top = out_slice_start * scale_h;
>> >> > +    double active_height = (out_slice_end - out_slice_start) *
>> >> > scale_h;
>> >> > +    int ret;
>> >> > +
>> >> > +    zimg_image_format_default(&s->ztd[jobnr].src_format,
>> >> > ZIMG_API_VERSION);
>> >> > +    zimg_image_format_default(&s->ztd[jobnr].dst_format,
>> >> > ZIMG_API_VERSION);
>> >> > +    zimg_graph_builder_params_default(&s->ztd[jobnr].params,
>> >> > ZIMG_API_VERSION);
>> >> > +
>> >> > +    s->ztd[jobnr].params.dither_type = s->dither;
>> >> > +    s->ztd[jobnr].params.cpu_type = ZIMG_CPU_AUTO;
>> >> > +    s->ztd[jobnr].params.resample_filter = s->filter;
>> >> > +    s->ztd[jobnr].params.resample_filter_uv = s->filter;
>> >> > +    s->ztd[jobnr].params.nominal_peak_luminance =
>> >> > s->nominal_peak_luminance;
>> >> > +    s->ztd[jobnr].params.allow_approximate_gamma =
>> >> > s->approximate_gamma;
>> >> > +
>> >> > +    format_init(&s->ztd[jobnr].src_format, in, desc,
>> >> > s->colorspace_in,
>> >> > +                s->primaries_in, s->trc_in, s->range_in,
>> >> > s->chromal_in,
>> >> > +                in->width, in->height,
>> >> > +                active_top, active_height, 1);
>> >> > +    format_init(&s->ztd[jobnr].dst_format, out, odesc,
>> >> > s->colorspace,
>> >> > +                s->primaries, s->trc, s->range, s->chromal,
>> >> > +                out->width, out_slice_end - out_slice_start,
>> >> > +                0, 0, 0);
>> >> > +
>> >> > +    ret = graph_build(&s->ztd[jobnr].graph, &s->ztd[jobnr].params,
>> >> > &s->ztd[jobnr].src_format, &s->ztd[jobnr].dst_format,
>> >> > +                      &s->ztd[jobnr].tmp, &s->ztd[jobnr].tmp_size);
>> >> > +    if (ret)
>> >> > +        return ret;
>> >> > +
>> >> > +    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags &
>> >> > AV_PIX_FMT_FLAG_ALPHA) {
>> >> > +        zimg_image_format_default(&s->ztd[jobnr].alpha_src_format,
>> >> > ZIMG_API_VERSION);
>> >> > +        zimg_image_format_default(&s->ztd[jobnr].alpha_dst_format,
>> >> > ZIMG_API_VERSION);
>> >> > +
>> >> > zimg_graph_builder_params_default(&s->ztd[jobnr].alpha_params,
>> >> > ZIMG_API_VERSION);
>> >> > +
>> >> > +        s->ztd[jobnr].alpha_params.dither_type = s->dither;
>> >> > +        s->ztd[jobnr].alpha_params.cpu_type = ZIMG_CPU_AUTO;
>> >> > +        s->ztd[jobnr].alpha_params.resample_filter = s->filter;
>> >> > +
>> >> > +        s->ztd[jobnr].alpha_src_format.width = in->width;
>> >> > +        s->ztd[jobnr].alpha_src_format.height = in->height;
>> >> > +        s->ztd[jobnr].alpha_src_format.depth = desc->comp[0].depth;
>> >> > +        s->ztd[jobnr].alpha_src_format.pixel_type = (desc->flags &
>> >> > AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ?
>> >> > ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>> >> > +        s->ztd[jobnr].alpha_src_format.color_family =
>> >> > ZIMG_COLOR_GREY;
>> >> > +        s->ztd[jobnr].alpha_src_format.active_region.left = 0;
>> >> > +        s->ztd[jobnr].alpha_src_format.active_region.top =
>> >> > in_slice_start;
>> >> > +        s->ztd[jobnr].alpha_src_format.active_region.width =
>> >> > in->width;
>> >> > +        s->ztd[jobnr].alpha_src_format.active_region.height =
>> >> > in_slice_end - in_slice_start;
>> >> > +
>> >> > +        s->ztd[jobnr].alpha_dst_format.width = out->width;
>> >> > +        s->ztd[jobnr].alpha_dst_format.height = out->height;
>> >> > +        s->ztd[jobnr].alpha_dst_format.depth = odesc->comp[0].depth;
>> >> > +        s->ztd[jobnr].alpha_dst_format.pixel_type = (odesc->flags &
>> >> > AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8
>> >> > ?
>> >> > ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>> >> > +        s->ztd[jobnr].alpha_dst_format.color_family =
>> >> > ZIMG_COLOR_GREY;
>> >> > +
>> >> > +        zimg_filter_graph_free(s->ztd[jobnr].alpha_graph);
>> >> > +        s->ztd[jobnr].alpha_graph =
>> >> > zimg_filter_graph_build(&s->ztd[jobnr].alpha_src_format,
>> >> > &s->ztd[jobnr].alpha_dst_format, &s->ztd[jobnr].alpha_params);
>> >> > +        if (!s->ztd[jobnr].alpha_graph) {
>> >> > +            return print_zimg_error(ctx);
>> >> > +        }
>> >> > +    }
>> >> > +
>> >> > +    return 0;
>> >> > +}
>> >> > +
>> >> > +static int zscale_slice(AVFilterContext *ctx, void *arg, int jobnr,
>> >> > int
>> >> > nb_jobs)
>> >> > +{
>> >> > +    ZScaleContext *s = ctx->priv;
>> >> > +    ThreadData *td = arg;
>> >> > +    AVFrame *in = td->in;
>> >> > +    AVFrame *out = td->out;
>> >> > +    const AVPixFmtDescriptor *desc = td->desc;
>> >> > +    const AVPixFmtDescriptor *odesc = td->odesc;
>> >> > +    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
>> >> > +    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
>> >> > +    int ret = AVERROR(EINVAL);
>> >> > +
>> >> > +    for (int plane = 0; plane < 3; plane++) {
>> >> > +        const int height = plane > 0 ? AV_CEIL_RSHIFT(out->height,
>> >> > odesc->log2_chroma_h) : out->height;
>> >> > +        const int out_slice_start = (height * jobnr) / nb_jobs;
>> >> > +        int p = desc->comp[plane].plane;
>> >> > +
>> >> > +        src_buf.plane[plane].data   = in->data[p];
>> >> > +        src_buf.plane[plane].stride = in->linesize[p];
>> >> > +        src_buf.plane[plane].mask   = -1;
>> >> > +
>> >> > +        p = odesc->comp[plane].plane;
>> >> > +        dst_buf.plane[plane].data   = out->data[p] + out_slice_start
>> >> > *
>> >> > out->linesize[p];
>> >> > +        dst_buf.plane[plane].stride = out->linesize[p];
>> >> > +        dst_buf.plane[plane].mask   = -1;
>> >> > +    }
>> >> > +
>> >> > +    if (s->ztd[jobnr].graph)
>> >> > +        ret = zimg_filter_graph_process(s->ztd[jobnr].graph,
>> >> > &src_buf,
>> >> > &dst_buf, s->ztd[jobnr].tmp, 0, 0, 0, 0);
>> >> > +    if (ret)
>> >> > +        return print_zimg_error(ctx);
>> >> > +
>> >> > +    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags &
>> >> > AV_PIX_FMT_FLAG_ALPHA) {
>> >> > +        const int out_slice_start = (out->height * jobnr) / nb_jobs;
>> >> > +
>> >> > +        src_buf.plane[0].data   = in->data[3];
>> >> > +        src_buf.plane[0].stride = in->linesize[3];
>> >> > +        src_buf.plane[0].mask   = -1;
>> >> > +
>> >> > +        dst_buf.plane[0].data   = out->data[3] + out_slice_start *
>> >> > out->linesize[3];
>> >> > +        dst_buf.plane[0].stride = out->linesize[3];
>> >> > +        dst_buf.plane[0].mask   = -1;
>> >> > +
>> >> > +        ret = zimg_filter_graph_process(s->ztd[jobnr].alpha_graph,
>> >> > &src_buf, &dst_buf, s->ztd[jobnr].tmp, 0, 0, 0, 0);
>> >> > +        if (ret)
>> >> > +            return print_zimg_error(ctx);
>> >> > +    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
>> >> > +        int x, y;
>> >> > +
>> >> > +        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
>> >> > +            for (y = 0; y < out->height; y++) {
>> >> > +                for (x = 0; x < out->width; x++) {
>> >> > +                    AV_WN32(out->data[3] + x * odesc->comp[3].step +
>> >> > y
>> >> > * out->linesize[3],
>> >> > +                            av_float2int(1.0f));
>> >> > +                }
>> >> > +            }
>> >> > +        } else {
>> >> > +            for (y = 0; y < out->height; y++)
>> >> > +                memset(out->data[3] + y * out->linesize[3], 0xff,
>> >> > out->width);
>> >> > +        }
>> >> > +    }
>> >> > +
>> >> > +    return 0;
>> >> > +}
>> >> > +
>> >> >  static int filter_frame(AVFilterLink *link, AVFrame *in)
>> >> >  {
>> >> > -    ZScaleContext *s = link->dst->priv;
>> >> > -    AVFilterLink *outlink = link->dst->outputs[0];
>> >> > +    AVFilterContext *ctx = link->dst;
>> >> > +    ZScaleContext *s = ctx->priv;
>> >> > +    AVFilterLink *outlink = ctx->outputs[0];
>> >> >      const AVPixFmtDescriptor *desc =
>> >> > av_pix_fmt_desc_get(link->format);
>> >> >      const AVPixFmtDescriptor *odesc =
>> >> > av_pix_fmt_desc_get(outlink->format);
>> >> > -    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
>> >> > -    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
>> >> >      char buf[32];
>> >> > -    int ret = 0, plane;
>> >> > +    int ret = 0;
>> >> > +    ThreadData td;
>> >> >      AVFrame *out;
>> >> >
>> >> >      out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
>> >> > @@ -552,41 +717,28 @@ static int filter_frame(AVFilterLink *link,
>> >> > AVFrame *in)
>> >> >              return ret;
>> >> >          }
>> >> >
>> >> > -        zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
>> >> > -        zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
>> >> > -        zimg_graph_builder_params_default(&s->params,
>> >> > ZIMG_API_VERSION);
>> >> > -
>> >> > -        s->params.dither_type = s->dither;
>> >> > -        s->params.cpu_type = ZIMG_CPU_AUTO;
>> >> > -        s->params.resample_filter = s->filter;
>> >> > -        s->params.resample_filter_uv = s->filter;
>> >> > -        s->params.nominal_peak_luminance =
>> >> > s->nominal_peak_luminance;
>> >> > -        s->params.allow_approximate_gamma = s->approximate_gamma;
>> >> > -
>> >> > -        format_init(&s->src_format, in, desc, s->colorspace_in,
>> >> > -                    s->primaries_in, s->trc_in, s->range_in,
>> >> > s->chromal_in);
>> >> > -        format_init(&s->dst_format, out, odesc, s->colorspace,
>> >> > -                    s->primaries, s->trc, s->range, s->chromal);
>> >> > +        td.out = out;
>> >> > +        td.in = in;
>> >> > +        td.desc = desc;
>> >> > +        td.odesc = odesc;
>> >> > +        ret = ctx->internal->execute(ctx, prepare_graph, &td, NULL,
>> >> > FFMIN3(in->height, out->height, s->nb_threads));
>> >> > +        if (ret)
>> >> > +            goto fail;
>> >> >
>> >> >          if (s->colorspace != -1)
>> >> > -            out->colorspace =
>> >> > (int)s->dst_format.matrix_coefficients;
>> >> > +            out->colorspace =
>> >> > (int)s->ztd[0].dst_format.matrix_coefficients;
>> >> >
>> >> >          if (s->primaries != -1)
>> >> > -            out->color_primaries =
>> >> > (int)s->dst_format.color_primaries;
>> >> > +            out->color_primaries =
>> >> > (int)s->ztd[0].dst_format.color_primaries;
>> >> >
>> >> >          if (s->range != -1)
>> >> > -            out->color_range = (int)s->dst_format.pixel_range + 1;
>> >> > +            out->color_range = (int)s->ztd[0].dst_format.pixel_range
>> >> > +
>> >> > 1;
>> >> >
>> >> >          if (s->trc != -1)
>> >> > -            out->color_trc =
>> >> > (int)s->dst_format.transfer_characteristics;
>> >> > +            out->color_trc =
>> >> > (int)s->ztd[0].dst_format.transfer_characteristics;
>> >> >
>> >> >          if (s->chromal != -1)
>> >> > -            out->chroma_location =
>> >> > (int)s->dst_format.chroma_location -
>> >> > 1;
>> >> > -
>> >> > -        ret = graph_build(&s->graph, &s->params, &s->src_format,
>> >> > &s->dst_format,
>> >> > -                          &s->tmp, &s->tmp_size);
>> >> > -        if (ret < 0)
>> >> > -            goto fail;
>> >> > +            out->chroma_location =
>> >> > (int)s->ztd[0].dst_format.chroma_location - 1;
>> >> >
>> >> >          s->in_colorspace  = in->colorspace;
>> >> >          s->in_trc         = in->color_trc;
>> >> > @@ -596,101 +748,38 @@ static int filter_frame(AVFilterLink *link,
>> >> > AVFrame *in)
>> >> >          s->out_trc        = out->color_trc;
>> >> >          s->out_primaries  = out->color_primaries;
>> >> >          s->out_range      = out->color_range;
>> >> > -
>> >> > -        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags &
>> >> > AV_PIX_FMT_FLAG_ALPHA) {
>> >> > -            zimg_image_format_default(&s->alpha_src_format,
>> >> > ZIMG_API_VERSION);
>> >> > -            zimg_image_format_default(&s->alpha_dst_format,
>> >> > ZIMG_API_VERSION);
>> >> > -            zimg_graph_builder_params_default(&s->alpha_params,
>> >> > ZIMG_API_VERSION);
>> >> > -
>> >> > -            s->alpha_params.dither_type = s->dither;
>> >> > -            s->alpha_params.cpu_type = ZIMG_CPU_AUTO;
>> >> > -            s->alpha_params.resample_filter = s->filter;
>> >> > -
>> >> > -            s->alpha_src_format.width = in->width;
>> >> > -            s->alpha_src_format.height = in->height;
>> >> > -            s->alpha_src_format.depth = desc->comp[0].depth;
>> >> > -            s->alpha_src_format.pixel_type = (desc->flags &
>> >> > AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ?
>> >> > ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>> >> > -            s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
>> >> > -
>> >> > -            s->alpha_dst_format.width = out->width;
>> >> > -            s->alpha_dst_format.height = out->height;
>> >> > -            s->alpha_dst_format.depth = odesc->comp[0].depth;
>> >> > -            s->alpha_dst_format.pixel_type = (odesc->flags &
>> >> > AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8
>> >> > ?
>> >> > ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>> >> > -            s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
>> >> > -
>> >> > -            zimg_filter_graph_free(s->alpha_graph);
>> >> > -            s->alpha_graph =
>> >> > zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format,
>> >> > &s->alpha_params);
>> >> > -            if (!s->alpha_graph) {
>> >> > -                ret = print_zimg_error(link->dst);
>> >> > -                goto fail;
>> >> > -            }
>> >> > -        }
>> >> >      }
>> >> >
>> >> >      if (s->colorspace != -1)
>> >> > -        out->colorspace = (int)s->dst_format.matrix_coefficients;
>> >> > +        out->colorspace =
>> >> > (int)s->ztd[0].dst_format.matrix_coefficients;
>> >> >
>> >> >      if (s->primaries != -1)
>> >> > -        out->color_primaries = (int)s->dst_format.color_primaries;
>> >> > +        out->color_primaries =
>> >> > (int)s->ztd[0].dst_format.color_primaries;
>> >> >
>> >> >      if (s->range != -1)
>> >> > -        out->color_range = (int)s->dst_format.pixel_range;
>> >> > +        out->color_range = (int)s->ztd[0].dst_format.pixel_range;
>> >> >
>> >> >      if (s->trc != -1)
>> >> > -        out->color_trc =
>> >> > (int)s->dst_format.transfer_characteristics;
>> >> > +        out->color_trc =
>> >> > (int)s->ztd[0].dst_format.transfer_characteristics;
>> >> > +
>> >> > +    if (s->chromal != -1)
>> >> > +        out->chroma_location =
>> >> > (int)s->ztd[0].dst_format.chroma_location - 1;
>> >> >
>> >> >      av_reduce(&out->sample_aspect_ratio.num,
>> >> > &out->sample_aspect_ratio.den,
>> >> >                (int64_t)in->sample_aspect_ratio.num * outlink->h *
>> >> > link->w,
>> >> >                (int64_t)in->sample_aspect_ratio.den * outlink->w *
>> >> > link->h,
>> >> >                INT_MAX);
>> >> >
>> >> > -    for (plane = 0; plane < 3; plane++) {
>> >> > -        int p = desc->comp[plane].plane;
>> >> > -        src_buf.plane[plane].data   = in->data[p];
>> >> > -        src_buf.plane[plane].stride = in->linesize[p];
>> >> > -        src_buf.plane[plane].mask   = -1;
>> >> > -
>> >> > -        p = odesc->comp[plane].plane;
>> >> > -        dst_buf.plane[plane].data   = out->data[p];
>> >> > -        dst_buf.plane[plane].stride = out->linesize[p];
>> >> > -        dst_buf.plane[plane].mask   = -1;
>> >> > -    }
>> >> > -
>> >> > -    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf,
>> >> > s->tmp, 0, 0, 0, 0);
>> >> > -    if (ret) {
>> >> > -        ret = print_zimg_error(link->dst);
>> >> > +    if (!s->ztd[0].graph) {
>> >> > +        ret = AVERROR(EINVAL);
>> >> >          goto fail;
>> >> >      }
>> >> >
>> >> > -    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags &
>> >> > AV_PIX_FMT_FLAG_ALPHA) {
>> >> > -        src_buf.plane[0].data   = in->data[3];
>> >> > -        src_buf.plane[0].stride = in->linesize[3];
>> >> > -        src_buf.plane[0].mask   = -1;
>> >> > -
>> >> > -        dst_buf.plane[0].data   = out->data[3];
>> >> > -        dst_buf.plane[0].stride = out->linesize[3];
>> >> > -        dst_buf.plane[0].mask   = -1;
>> >> > -
>> >> > -        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf,
>> >> > &dst_buf, s->tmp, 0, 0, 0, 0);
>> >> > -        if (ret) {
>> >> > -            ret = print_zimg_error(link->dst);
>> >> > -            goto fail;
>> >> > -        }
>> >> > -    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
>> >> > -        int x, y;
>> >> > -
>> >> > -        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
>> >> > -            for (y = 0; y < out->height; y++) {
>> >> > -                for (x = 0; x < out->width; x++) {
>> >> > -                    AV_WN32(out->data[3] + x * odesc->comp[3].step +
>> >> > y
>> >> > * out->linesize[3],
>> >> > -                            av_float2int(1.0f));
>> >> > -                }
>> >> > -            }
>> >> > -        } else {
>> >> > -            for (y = 0; y < outlink->h; y++)
>> >> > -                memset(out->data[3] + y * out->linesize[3], 0xff,
>> >> > outlink->w);
>> >> > -        }
>> >> > -    }
>> >> > +    td.out = out;
>> >> > +    td.in = in;
>> >> > +    td.desc = desc;
>> >> > +    td.odesc = odesc;
>> >> > +    ret = ctx->internal->execute(ctx, zscale_slice, &td, NULL,
>> >> > FFMIN3(in->height, out->height, s->nb_threads));
>> >> >
>> >> >  fail:
>> >> >      av_frame_free(&in);
>> >> > @@ -706,10 +795,13 @@ static void uninit(AVFilterContext *ctx)
>> >> >  {
>> >> >      ZScaleContext *s = ctx->priv;
>> >> >
>> >> > -    zimg_filter_graph_free(s->graph);
>> >> > -    zimg_filter_graph_free(s->alpha_graph);
>> >> > -    av_freep(&s->tmp);
>> >> > -    s->tmp_size = 0;
>> >> > +    for (int i = 0; i < s->nb_threads; i++) {
>> >> > +        zimg_filter_graph_free(s->ztd[i].graph);
>> >> > +        zimg_filter_graph_free(s->ztd[i].alpha_graph);
>> >> > +        av_freep(&s->ztd[i].tmp);
>> >> > +        s->ztd[i].tmp_size = 0;
>> >> > +    }
>> >> > +    av_freep(&s->ztd);
>> >> >  }
>> >> >
>> >> >  static int process_command(AVFilterContext *ctx, const char *cmd,
>> >> > const
>> >> > char *args,
>> >> > @@ -890,4 +982,5 @@ AVFilter ff_vf_zscale = {
>> >> >      .inputs          = avfilter_vf_zscale_inputs,
>> >> >      .outputs         = avfilter_vf_zscale_outputs,
>> >> >      .process_command = process_command,
>> >> > +    .flags           = AVFILTER_FLAG_SLICE_THREADS,
>> >> >  };
>> >> > --
>> >> > 2.17.1
>> >>
>> >>
>> >> I've had to use zscale to convert 10-bit 4k60p video from HLG HDR to
>> >> SDR
>> >> (bt709).   It was ~36x times slower than real time.  What I ended up
>> >> doing
>> >> to speed it up was to generate CLUT image (16-bit yuv444 65x65x65
>> >> sampling
>> >> of input color space), lay it out as a 2D image (512x537), and run it
>> >> through zscale to generate the HDR->SDR transform CLUT.  Then I used
>> >> the
>> >> CLUT instead of zscale for every frame...  that got me to about ~3.5x
>> >> times slower than realtime converting 60fps 10-bit 4k HLG to SDR  (and
>> >> I
>> >> don't know any assembly, so I didn't attempt to optimize the CLUT
>> >> trilinear optimization with SIMD, so maybe it could be faster still).
>> >> I
>> >> then ported to CUDA and was able to convert 4k60p HLG->SDR faster than
>> >> realtime on a Pascal GPU.
>> >>
>> >
>> > I meant trilinear interpolation
>> >
>> >
>> >> So, I'm not sure that adding slice threading to zscale is the best
>> >> optimization for it.  I think capturing the effect of zscale in a CLUT
>> >> would be a more significant optimization.
>> >>
>> >> Just my 2 cents, hope this helps.
>>
>> Your logic is completely flawed.
>> You can not rescale images with LUT tables.
>
>
> I was not resizing the image from 4K to 1080p ... the output was till
> 4K.  I was converting from 10-bit in whatever HDR input colorspace
> (HLG, or HDR10), to 8-bit SDR output colorspace.  You most definitely
> can approximate that transformation with a CLUT.
>

Seen lut3d filter?

> Since zscale is capable of resizing and colorspace conversion --
> perhaps this functionality should be split into separate filters so
> each can be otpimized differently.

You logic is completely flawed yet again.
zscale is wrapper around another library.

>
> Pavel.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".