[FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x

Paul B Mahol onemda at gmail.com
Sun Feb 20 20:51:46 EET 2022


On Sat, Feb 19, 2022 at 04:58:09PM +0300, Victoria Zhislina wrote:
> By ffmpeg threading support implementation via frame slicing and doing
> zimg_filter_graph_build that used to take 30-60% of each frame processig
> only if necessary (some parameters changed)
> the performance increase vs original version
> in video downscale and color conversion  >4x is seen
> on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
> 
> Signed-off-by: Victoria Zhislina <Victoria.Zhislina at intel.com>
> ---
>  libavfilter/vf_zscale.c | 417 +++++++++++++++++++++++++++-------------
>  1 file changed, 288 insertions(+), 129 deletions(-)
> 
> diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
> index 1288c5efc1..61418d4a4a 100644
> --- a/libavfilter/vf_zscale.c
> +++ b/libavfilter/vf_zscale.c
> @@ -1,6 +1,7 @@
>  /*
>   * Copyright (c) 2015 Paul B Mahol
> - *
> + * 2022 Victoria Zhislina, Intel
> +
>   * This file is part of FFmpeg.
>   *
>   * FFmpeg is free software; you can redistribute it and/or
> @@ -44,6 +45,8 @@
>  #include "libavutil/imgutils.h"
>  
>  #define ZIMG_ALIGNMENT 32
> +#define MIN_TILESIZE 64
> +#define MAX_THREADS 64
>  
>  static const char *const var_names[] = {
>      "in_w",   "iw",
> @@ -113,13 +116,17 @@ typedef struct ZScaleContext {
>  
>      int force_original_aspect_ratio;
>  
> -    void *tmp;
> -    size_t tmp_size;
> +    void *tmp[MAX_THREADS]; //separate for each thread;
> +    int nb_threads;
> +    int slice_h;
>  
>      zimg_image_format src_format, dst_format;
>      zimg_image_format alpha_src_format, alpha_dst_format;
> +    zimg_image_format src_format_tmp, dst_format_tmp;
> +    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
>      zimg_graph_builder_params alpha_params, params;
> -    zimg_filter_graph *alpha_graph, *graph;
> +    zimg_graph_builder_params alpha_params_tmp, params_tmp;
> +    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
>  
>      enum AVColorSpace in_colorspace, out_colorspace;
>      enum AVColorTransferCharacteristic in_trc, out_trc;
> @@ -128,10 +135,36 @@ typedef struct ZScaleContext {
>      enum AVChromaLocation in_chromal, out_chromal;
>  } ZScaleContext;
>  
> +typedef struct ThreadData {
> +    const AVPixFmtDescriptor *desc, *odesc;
> +    AVFrame *in, *out;
> +} ThreadData;
> +
>  static av_cold int init(AVFilterContext *ctx)
>  {
>      ZScaleContext *s = ctx->priv;
>      int ret;
> +    int i;

remove that line above

> +
> +    for (i = 0; i < MAX_THREADS; i++) {

for (int i ...

> +        s->tmp[i] = NULL;
> +        s->graph[i] = NULL;
> +        s->alpha_graph[i] = NULL;
> +    }
> +    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
>  
>      if (s->size_str && (s->w_expr || s->h_expr)) {
>          av_log(ctx, AV_LOG_ERROR,
> @@ -158,7 +191,6 @@ static av_cold int init(AVFilterContext *ctx)
>          av_opt_set(s, "w", "iw", 0);
>      if (!s->h_expr)
>          av_opt_set(s, "h", "ih", 0);
> -
>      return 0;
>  }
>  
> @@ -471,6 +503,51 @@ static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_r
>      return AVCOL_RANGE_UNSPECIFIED;
>  }
>  
> +/* returns 0 if image formats are the same and 1 otherwise */
> +static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
> +{
> +    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
> +#if ZIMG_API_VERSION >= 0x204
> +        (img_fmt0->alpha != img_fmt1->alpha) ||
> +#endif
> +        (img_fmt0->color_family != img_fmt1->color_family) ||
> +        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
> +        (img_fmt0->depth != img_fmt1->depth) ||
> +        (img_fmt0->field_parity != img_fmt1->field_parity) ||
> +        (img_fmt0->height != img_fmt1->height) ||
> +        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
> +        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
> +        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
> +        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
> +        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
> +        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
> +        (img_fmt0->width != img_fmt1->width));
> +}
> +
> +/* returns 0 if graph builder parameters are the same and 1 otherwise */
> +static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
> +{
> +    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
> +    and NaN values that are default for some params are treated properly*/
> +    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
> +        (parm0->dither_type != parm1->dither_type) ||
> +        (parm0->resample_filter != parm1->resample_filter) ||
> +        (parm0->resample_filter_uv != parm1->resample_filter_uv);
> +
> +    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
> +        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
> +    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
> +        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
> +    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
> +        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
> +    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
> +        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
> +    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
> +        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
> +
> +    return ret;
> +}
> +
>  static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
>                          int colorspace, int primaries, int transfer, int range, int location)
>  {
> @@ -483,46 +560,94 @@ static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFm
>      format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
>      format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
>      format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> -    format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
> +    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
>      format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
>      format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
>  }
>  
> -static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
> -                       zimg_image_format *src_format, zimg_image_format *dst_format,
> -                       void **tmp, size_t *tmp_size)
> +static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *desc, const AVPixFmtDescriptor *out_desc,
> +                        ZScaleContext *s, int job_nr)
>  {
>      int ret;
>      size_t size;
> +    zimg_image_format src_format;
> +    zimg_image_format dst_format;
> +    zimg_image_format alpha_src_format;
> +    zimg_image_format alpha_dst_format;
> +
> +    src_format = s->src_format;
> +    dst_format = s->dst_format;
> +    /* The input slice is specified through the active_region field,
> +    unlike the output slice.
> +    according to zimg requirements input and output slices should have even dimentions */
> +    src_format.active_region.width = in->width;
> +    src_format.active_region.height = s->slice_h;
> +    src_format.active_region.left = 0;
> +    src_format.active_region.top = job_nr * src_format.active_region.height;
> +    //dst now is the single tile only!!
> +    dst_format.width = out->width;
> +    dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
> +
> +    //the last slice could differ from the previous ones due to the slices division "tail"
> +    if (job_nr == (s->nb_threads - 1)) {
> +        src_format.active_region.height = src_format.height - src_format.active_region.top;
> +        dst_format.height = out->height - job_nr * dst_format.height;
> +    }
>  
> -    zimg_filter_graph_free(*graph);
> -    *graph = zimg_filter_graph_build(src_format, dst_format, params);
> -    if (!*graph)
> +    if (s->graph[job_nr]) {
> +        zimg_filter_graph_free(s->graph[job_nr]);
> +    }
> +    s->graph[job_nr] = zimg_filter_graph_build(&src_format, &dst_format, &s->params);
> +    if (!s->graph[job_nr])
>          return print_zimg_error(NULL);
>  
> -    ret = zimg_filter_graph_get_tmp_size(*graph, &size);
> +    ret = zimg_filter_graph_get_tmp_size(s->graph[job_nr], &size);
>      if (ret)
>          return print_zimg_error(NULL);
>  
> -    if (size > *tmp_size) {
> -        av_freep(tmp);
> -        *tmp = av_malloc(size);
> -        if (!*tmp)
> -            return AVERROR(ENOMEM);
> -
> -        *tmp_size = size;
> -    }
> +    if (s->tmp[job_nr])

    not needed, remove line above

> +        av_freep(&s->tmp[job_nr]);
> +    s->tmp[job_nr] = av_malloc(size);
> +    if (!s->tmp[job_nr])
> +        return AVERROR(ENOMEM);
> +
> +    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && out_desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +        alpha_src_format = s->alpha_src_format;
> +        alpha_dst_format = s->alpha_dst_format;
> +        /* The input slice is specified through the active_region field, unlike the output slice.
> +        according to zimg requirements input and output slices should have even dimentions */
> +        alpha_src_format.active_region.width = in->width;
> +        alpha_src_format.active_region.height = s->slice_h;
> +        alpha_src_format.active_region.left = 0;
> +        alpha_src_format.active_region.top = job_nr * alpha_src_format.active_region.height;
> +        //dst now is the single tile only!!
> +        alpha_dst_format.width = out->width;
> +        alpha_dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
> +
> +        //the last slice could differ from the previous ones due to the slices division "tail"
> +        if (job_nr == (s->nb_threads - 1)) {
> +            alpha_src_format.active_region.height = alpha_src_format.height - alpha_src_format.active_region.top;
> +            alpha_dst_format.height = out->height - job_nr * alpha_dst_format.height;
> +        }
>  
> +        if (s->alpha_graph[job_nr]) {
> +            zimg_filter_graph_free(s->alpha_graph[job_nr]);
> +        }
> +        s->alpha_graph[job_nr] = zimg_filter_graph_build(&alpha_src_format, &alpha_dst_format, &s->alpha_params);
> +        if (!s->alpha_graph[job_nr])
> +            return print_zimg_error(NULL);
> +     }
>      return 0;
>  }
>  
>  static int realign_frame(const AVPixFmtDescriptor *desc, AVFrame **frame)
>  {
>      AVFrame *aligned = NULL;
> -    int ret = 0, plane;
> +    int ret = 0, plane, planes;
>  
>      /* Realign any unaligned input frame. */
> -    for (plane = 0; plane < 3; plane++) {
> +    planes = av_pix_fmt_count_planes(desc->nb_components);
> +    for (plane = 0; plane < planes; plane++) {
>          int p = desc->comp[plane].plane;
>          if ((uintptr_t)(*frame)->data[p] % ZIMG_ALIGNMENT || (*frame)->linesize[p] % ZIMG_ALIGNMENT) {
>              if (!(aligned = av_frame_alloc())) {
> @@ -572,20 +697,77 @@ static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
>          frame->chroma_location = (int)s->dst_format.chroma_location + 1;
>  }
>  
> +static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
> +{
> +    ThreadData *td = data;
> +    int ret = 0;
> +    int p;
> +    int out_sampl;
> +    int need_gb;
> +    ZScaleContext *s = ctx->priv;
> +    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
> +    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
> +    int  dst_tile_height = ((unsigned int)(td->out->height / n_jobs)) & 0xfffffffe;
> +
> +    /* create zimg filter graphs for each thread
> +     only if not created earlier or there is some change in frame parameters */
> +    need_gb = compare_zimg_image_formats(&s->src_format, &s->src_format_tmp) ||
> +        compare_zimg_image_formats(&s->dst_format, &s->dst_format_tmp) ||
> +        compare_zimg_graph_builder_params(&s->params, &s->params_tmp);
> +    if(td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA)
> +        need_gb = need_gb || compare_zimg_image_formats(&s->alpha_src_format, &s->alpha_src_format_tmp) ||
> +            compare_zimg_image_formats(&s->alpha_dst_format, &s->alpha_dst_format_tmp) ||
> +            compare_zimg_graph_builder_params(&s->alpha_params, &s->alpha_params_tmp);
> +
> +    if (need_gb){
> +        ret = graphs_build(td->in, td->out, td->desc, td->odesc, s, job_nr);
> +        if (ret < 0)
> +            return print_zimg_error(ctx);
> +    }
> +    out_sampl = FFMAX3(td->out->linesize[0], td->out->linesize[1], td->out->linesize[2]);
> +    for (int i = 0; i < 3; i++) {
> +        p = td->desc->comp[i].plane;
> +
> +        src_buf.plane[i].data = td->in->data[p];
> +        src_buf.plane[i].stride = td->in->linesize[p];
> +        src_buf.plane[i].mask = -1;
> +
> +        p = td->odesc->comp[i].plane;
> +        dst_buf.plane[i].data = td->out->data[p] + td->out->linesize[p] * dst_tile_height * td->out->linesize[p] / out_sampl * job_nr;
> +        dst_buf.plane[i].stride = td->out->linesize[p];
> +        dst_buf.plane[i].mask = -1;
> +    }
> +    ret = zimg_filter_graph_process(s->graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
> +    if (ret)
> +        return  print_zimg_error(ctx);
> +
> +    if (td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +        src_buf.plane[0].data = td->in->data[3];
> +        src_buf.plane[0].stride = td->in->linesize[3];
> +        src_buf.plane[0].mask = -1;
> +
> +        dst_buf.plane[0].data = td->out->data[3] + td->out->linesize[3] * dst_tile_height  * job_nr;
> +        dst_buf.plane[0].stride = td->out->linesize[3];
> +        dst_buf.plane[0].mask = -1;
> +
> +        ret = zimg_filter_graph_process(s->alpha_graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
> +        if (ret)
> +            return print_zimg_error(ctx);
> +    }
> +    return 0;
> +}
> +
>  static int filter_frame(AVFilterLink *link, AVFrame *in)
>  {
> -    ZScaleContext *s = link->dst->priv;
> -    AVFilterLink *outlink = link->dst->outputs[0];
> +    AVFilterContext *ctx = link->dst;
> +    ZScaleContext *s = ctx->priv;
> +    AVFilterLink *outlink = ctx->outputs[0];
>      const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
>      const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
> -    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
> -    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
>      char buf[32];
> -    int ret = 0, plane;
> +    int ret = 0;
>      AVFrame *out = NULL;
> -
> -    if ((ret = realign_frame(desc, &in)) < 0)
> -        goto fail;
> +    ThreadData td;
>  
>      if (!(out = ff_get_video_buffer(outlink, outlink->w, outlink->h))) {
>          ret =  AVERROR(ENOMEM);
> @@ -596,35 +778,53 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>      out->width  = outlink->w;
>      out->height = outlink->h;
>  
> -    if(   in->width  != link->w
> -       || in->height != link->h
> -       || in->format != link->format
> -       || s->in_colorspace != in->colorspace
> -       || s->in_trc  != in->color_trc
> -       || s->in_primaries != in->color_primaries
> -       || s->in_range != in->color_range
> -       || s->out_colorspace != out->colorspace
> -       || s->out_trc  != out->color_trc
> -       || s->out_primaries != out->color_primaries
> -       || s->out_range != out->color_range
> -       || s->in_chromal != in->chroma_location
> -       || s->out_chromal != out->chroma_location) {
> +    //we need to use this filter if something is different for an input and output only
> +    //otherwise - just copy the input frame to the output
> +    if ((link->w != outlink->w) ||
> +        (link->h != outlink->h) ||
> +        (s->src_format.chroma_location != s->dst_format.chroma_location) ||
> +        (s->src_format.color_family !=s->dst_format.color_family) ||
> +        (s->src_format.color_primaries !=s->dst_format.color_primaries) ||
> +        (s->src_format.depth !=s->dst_format.depth) ||
> +        (s->src_format.matrix_coefficients !=s->dst_format.matrix_coefficients) ||
> +        (s->src_format.field_parity !=s->dst_format.field_parity) ||
> +        (s->src_format.pixel_range !=s->dst_format.pixel_range) ||
> +        (s->src_format.pixel_type !=s->dst_format.pixel_type) ||
> +        (s->src_format.transfer_characteristics !=s->dst_format.transfer_characteristics)
> +    ){
> +        if ((ret = realign_frame(desc, &in)) < 0)
> +            goto fail;
> +
>          snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
>          av_opt_set(s, "w", buf, 0);
>          snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
>          av_opt_set(s, "h", buf, 0);
>  
> +

remove this not needed line addition above

>          link->dst->inputs[0]->format = in->format;
>          link->dst->inputs[0]->w      = in->width;
>          link->dst->inputs[0]->h      = in->height;
>  
> -        if ((ret = config_props(outlink)) < 0)
> -            goto fail;
> +        s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), link->h / MIN_TILESIZE);
> +        s->slice_h = ((unsigned int)(link->h / s->nb_threads)) & 0xfffffffe; // slice_h should be even for zimg
> +        s->in_colorspace = in->colorspace;
> +        s->in_trc = in->color_trc;
> +        s->in_primaries = in->color_primaries;
> +        s->in_range = in->color_range;
> +        s->out_colorspace = out->colorspace;
> +        s->out_trc = out->color_trc;
> +        s->out_primaries = out->color_primaries;
> +        s->out_range = out->color_range;
>  
>          zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
>          zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
>          zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
>  
> +        format_init(&s->src_format, in, desc, s->colorspace_in,
> +            s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
> +        format_init(&s->dst_format, out, odesc, s->colorspace,
> +            s->primaries, s->trc, s->range, s->chromal);
> +
>          s->params.dither_type = s->dither;
>          s->params.cpu_type = ZIMG_CPU_AUTO;
>          s->params.resample_filter = s->filter;
> @@ -634,27 +834,6 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>          s->params.filter_param_a = s->params.filter_param_a_uv = s->param_a;
>          s->params.filter_param_b = s->params.filter_param_b_uv = s->param_b;
>  
> -        format_init(&s->src_format, in, desc, s->colorspace_in,
> -                    s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
> -        format_init(&s->dst_format, out, odesc, s->colorspace,
> -                    s->primaries, s->trc, s->range, s->chromal);
> -
> -        update_output_color_information(s, out);
> -
> -        ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
> -                          &s->tmp, &s->tmp_size);
> -        if (ret < 0)
> -            goto fail;
> -
> -        s->in_colorspace  = in->colorspace;
> -        s->in_trc         = in->color_trc;
> -        s->in_primaries   = in->color_primaries;
> -        s->in_range       = in->color_range;
> -        s->out_colorspace = out->colorspace;
> -        s->out_trc        = out->color_trc;
> -        s->out_primaries  = out->color_primaries;
> -        s->out_range      = out->color_range;
> -
>          if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
>              zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
>              zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> @@ -670,76 +849,54 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>              s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>              s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
>  
> -            s->alpha_dst_format.width = out->width;
> -            s->alpha_dst_format.height = out->height;
>              s->alpha_dst_format.depth = odesc->comp[0].depth;
>              s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>              s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
> -
> -            zimg_filter_graph_free(s->alpha_graph);
> -            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
> -            if (!s->alpha_graph) {
> -                ret = print_zimg_error(link->dst);
> -                goto fail;
> -            }
>          }
> -    }
> -
> -    update_output_color_information(s, out);
> -
> -    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
> -              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
> -              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
> -              INT_MAX);
> -
> -    for (plane = 0; plane < 3; plane++) {
> -        int p = desc->comp[plane].plane;
> -        src_buf.plane[plane].data   = in->data[p];
> -        src_buf.plane[plane].stride = in->linesize[p];
> -        src_buf.plane[plane].mask   = -1;
> -
> -        p = odesc->comp[plane].plane;
> -        dst_buf.plane[plane].data   = out->data[p];
> -        dst_buf.plane[plane].stride = out->linesize[p];
> -        dst_buf.plane[plane].mask   = -1;
> -    }
>  
> -    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
> -    if (ret) {
> -        ret = print_zimg_error(link->dst);
> -        goto fail;
> -    }
> +        update_output_color_information(s, out);
> +        av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
> +                  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
> +                  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
> +                  INT_MAX);
>  
> -    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> -        src_buf.plane[0].data   = in->data[3];
> -        src_buf.plane[0].stride = in->linesize[3];
> -        src_buf.plane[0].mask   = -1;
> +        td.in = in;
> +        td.out = out;
> +        td.desc = desc;
> +        td.odesc = odesc;
>  
> -        dst_buf.plane[0].data   = out->data[3];
> -        dst_buf.plane[0].stride = out->linesize[3];
> -        dst_buf.plane[0].mask   = -1;
> +        ff_filter_execute(ctx, filter_slice, &td, NULL, s->nb_threads);
>  
> -        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
> -        if (ret) {
> -            ret = print_zimg_error(link->dst);
> -            goto fail;
> +        s->src_format_tmp = s->src_format;
> +        s->dst_format_tmp = s->dst_format;
> +        s->params_tmp = s->params;
> +        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +            s->alpha_src_format_tmp = s->alpha_src_format;
> +            s->alpha_dst_format_tmp = s->alpha_dst_format;
> +            s->alpha_params_tmp = s->alpha_params;
>          }
> -    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> -        int x, y;
> -
> -        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
> -            for (y = 0; y < out->height; y++) {
> -                for (x = 0; x < out->width; x++) {
> -                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
> -                            av_float2int(1.0f));
> +
> +        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) && (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) ){
> +            int x, y;
> +            if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
> +                for (y = 0; y < out->height; y++) {
> +                    for (x = 0; x < out->width; x++) {
> +                        AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
> +                                av_float2int(1.0f));
> +                    }
>                  }
> +            } else {
> +                for (y = 0; y < outlink->h; y++)
> +                    memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
>              }
> -        } else {
> -            for (y = 0; y < outlink->h; y++)
> -                memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
>          }
>      }
> -
> +    else {
> +        /*no need for any filtering */
> +        ret = av_frame_copy(out, in);

You could just return 'in' frame, no need to do full frame copy in nop scenario.

> +        if (ret < 0)
> +            return ret;
> +    }
>  fail:
>      av_frame_free(&in);
>      if (ret) {
> @@ -753,11 +910,12 @@ fail:
>  static av_cold void uninit(AVFilterContext *ctx)
>  {
>      ZScaleContext *s = ctx->priv;
> -
> -    zimg_filter_graph_free(s->graph);
> -    zimg_filter_graph_free(s->alpha_graph);
> -    av_freep(&s->tmp);
> -    s->tmp_size = 0;
> +    int i;
> +    for (i = 0; i < s->nb_threads; i++) {

    for (int i = 0; i < s->nb_threads; i++) {

> +        if (s->tmp[i]) av_freep(&s->tmp[i]);

use just:
           av_freep(&s->tmp[i]);

> +        zimg_filter_graph_free(s->graph[i]);
> +        zimg_filter_graph_free(s->alpha_graph[i]);
> +    }
>  }
>  
>  static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
> @@ -941,4 +1099,5 @@ const AVFilter ff_vf_zscale = {
>      FILTER_OUTPUTS(avfilter_vf_zscale_outputs),
>      FILTER_QUERY_FUNC(query_formats),
>      .process_command = process_command,
> +    .flags           = AVFILTER_FLAG_SLICE_THREADS,
>  };
> -- 
> 2.31.1.windows.1
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".


More information about the ffmpeg-devel mailing list