[FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x

Paul B Mahol onemda at gmail.com
Fri Feb 18 18:00:03 EET 2022


On Fri, Feb 18, 2022 at 12:48:10PM -0300, James Almer wrote:
> 
> 
> On 2/18/2022 12:24 PM, Victoria Zhislina wrote:
> > By ffmpeg threading support implementation via frame slicing and doing
> > zimg_filter_graph_build that used to take 30-60% of each frame processig
> > only if necessary (some parameters changed)
> > the performance increase vs original version
> > in video downscale and color conversion  >4x is seen
> > on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
> > 
> > Signed-off-by: Victoria Zhislina <Victoria.Zhislina at intel.com>
> > ---
> >   libavfilter/vf_zscale.c | 787 ++++++++++++++++++++++++----------------
> >   1 file changed, 475 insertions(+), 312 deletions(-)
> > 
> > diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
> > index 1288c5efc1..ea2565025f 100644
> > --- a/libavfilter/vf_zscale.c
> > +++ b/libavfilter/vf_zscale.c
> > @@ -1,6 +1,7 @@
> >   /*
> >    * Copyright (c) 2015 Paul B Mahol
> > - *
> > + * 2022 Victoria Zhislina, Intel
> > +
> >    * This file is part of FFmpeg.
> >    *
> >    * FFmpeg is free software; you can redistribute it and/or
> > @@ -44,6 +45,8 @@
> >   #include "libavutil/imgutils.h"
> >   #define ZIMG_ALIGNMENT 32
> > +#define MIN_TILESIZE 64
> > +#define MAX_THREADS 64
> >   static const char *const var_names[] = {
> >       "in_w",   "iw",
> > @@ -113,13 +116,17 @@ typedef struct ZScaleContext {
> >       int force_original_aspect_ratio;
> > -    void *tmp;
> > -    size_t tmp_size;
> > +    void *tmp[MAX_THREADS]; //separate for each thread;
> > +    int nb_threads;
> > +    int slice_h;
> >       zimg_image_format src_format, dst_format;
> >       zimg_image_format alpha_src_format, alpha_dst_format;
> > +    zimg_image_format src_format_tmp, dst_format_tmp;
> > +    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
> >       zimg_graph_builder_params alpha_params, params;
> > -    zimg_filter_graph *alpha_graph, *graph;
> > +    zimg_graph_builder_params alpha_params_tmp, params_tmp;
> > +    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
> >       enum AVColorSpace in_colorspace, out_colorspace;
> >       enum AVColorTransferCharacteristic in_trc, out_trc;
> > @@ -128,10 +135,181 @@ typedef struct ZScaleContext {
> >       enum AVChromaLocation in_chromal, out_chromal;
> >   } ZScaleContext;
> > +
> > +typedef struct ThreadData {
> > +    const AVPixFmtDescriptor *desc, *odesc;
> > +    AVFrame *in, *out;
> > +} ThreadData;
> > +
> > +static int convert_chroma_location(enum AVChromaLocation chroma_location)
> > +{
> > +    switch (chroma_location) {
> > +    case AVCHROMA_LOC_UNSPECIFIED:
> > +    case AVCHROMA_LOC_LEFT:
> > +        return ZIMG_CHROMA_LEFT;
> > +    case AVCHROMA_LOC_CENTER:
> > +        return ZIMG_CHROMA_CENTER;
> > +    case AVCHROMA_LOC_TOPLEFT:
> > +        return ZIMG_CHROMA_TOP_LEFT;
> > +    case AVCHROMA_LOC_TOP:
> > +        return ZIMG_CHROMA_TOP;
> > +    case AVCHROMA_LOC_BOTTOMLEFT:
> > +        return ZIMG_CHROMA_BOTTOM_LEFT;
> > +    case AVCHROMA_LOC_BOTTOM:
> > +        return ZIMG_CHROMA_BOTTOM;
> > +    }
> > +    return ZIMG_CHROMA_LEFT;
> > +}
> > +
> > +static int convert_matrix(enum AVColorSpace colorspace)
> > +{
> > +    switch (colorspace) {
> > +    case AVCOL_SPC_RGB:
> > +        return ZIMG_MATRIX_RGB;
> > +    case AVCOL_SPC_BT709:
> > +        return ZIMG_MATRIX_709;
> > +    case AVCOL_SPC_UNSPECIFIED:
> > +        return ZIMG_MATRIX_UNSPECIFIED;
> > +    case AVCOL_SPC_FCC:
> > +        return ZIMG_MATRIX_FCC;
> > +    case AVCOL_SPC_BT470BG:
> > +        return ZIMG_MATRIX_470BG;
> > +    case AVCOL_SPC_SMPTE170M:
> > +        return ZIMG_MATRIX_170M;
> > +    case AVCOL_SPC_SMPTE240M:
> > +        return ZIMG_MATRIX_240M;
> > +    case AVCOL_SPC_YCGCO:
> > +        return ZIMG_MATRIX_YCGCO;
> > +    case AVCOL_SPC_BT2020_NCL:
> > +        return ZIMG_MATRIX_2020_NCL;
> > +    case AVCOL_SPC_BT2020_CL:
> > +        return ZIMG_MATRIX_2020_CL;
> > +    case AVCOL_SPC_CHROMA_DERIVED_NCL:
> > +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
> > +    case AVCOL_SPC_CHROMA_DERIVED_CL:
> > +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
> > +    case AVCOL_SPC_ICTCP:
> > +        return ZIMG_MATRIX_ICTCP;
> > +    }
> > +    return ZIMG_MATRIX_UNSPECIFIED;
> > +}
> > +
> > +static int convert_trc(enum AVColorTransferCharacteristic color_trc)
> > +{
> > +    switch (color_trc) {
> > +    case AVCOL_TRC_UNSPECIFIED:
> > +        return ZIMG_TRANSFER_UNSPECIFIED;
> > +    case AVCOL_TRC_BT709:
> > +        return ZIMG_TRANSFER_709;
> > +    case AVCOL_TRC_GAMMA22:
> > +        return ZIMG_TRANSFER_470_M;
> > +    case AVCOL_TRC_GAMMA28:
> > +        return ZIMG_TRANSFER_470_BG;
> > +    case AVCOL_TRC_SMPTE170M:
> > +        return ZIMG_TRANSFER_601;
> > +    case AVCOL_TRC_SMPTE240M:
> > +        return ZIMG_TRANSFER_240M;
> > +    case AVCOL_TRC_LINEAR:
> > +        return ZIMG_TRANSFER_LINEAR;
> > +    case AVCOL_TRC_LOG:
> > +        return ZIMG_TRANSFER_LOG_100;
> > +    case AVCOL_TRC_LOG_SQRT:
> > +        return ZIMG_TRANSFER_LOG_316;
> > +    case AVCOL_TRC_IEC61966_2_4:
> > +        return ZIMG_TRANSFER_IEC_61966_2_4;
> > +    case AVCOL_TRC_BT2020_10:
> > +        return ZIMG_TRANSFER_2020_10;
> > +    case AVCOL_TRC_BT2020_12:
> > +        return ZIMG_TRANSFER_2020_12;
> > +    case AVCOL_TRC_SMPTE2084:
> > +        return ZIMG_TRANSFER_ST2084;
> > +    case AVCOL_TRC_ARIB_STD_B67:
> > +        return ZIMG_TRANSFER_ARIB_B67;
> > +    case AVCOL_TRC_IEC61966_2_1:
> > +        return ZIMG_TRANSFER_IEC_61966_2_1;
> > +    }
> > +    return ZIMG_TRANSFER_UNSPECIFIED;
> > +}
> > +
> > +static int convert_primaries(enum AVColorPrimaries color_primaries)
> > +{
> > +    switch (color_primaries) {
> > +    case AVCOL_PRI_UNSPECIFIED:
> > +        return ZIMG_PRIMARIES_UNSPECIFIED;
> > +    case AVCOL_PRI_BT709:
> > +        return ZIMG_PRIMARIES_709;
> > +    case AVCOL_PRI_BT470M:
> > +        return ZIMG_PRIMARIES_470_M;
> > +    case AVCOL_PRI_BT470BG:
> > +        return ZIMG_PRIMARIES_470_BG;
> > +    case AVCOL_PRI_SMPTE170M:
> > +        return ZIMG_PRIMARIES_170M;
> > +    case AVCOL_PRI_SMPTE240M:
> > +        return ZIMG_PRIMARIES_240M;
> > +    case AVCOL_PRI_FILM:
> > +        return ZIMG_PRIMARIES_FILM;
> > +    case AVCOL_PRI_BT2020:
> > +        return ZIMG_PRIMARIES_2020;
> > +    case AVCOL_PRI_SMPTE428:
> > +        return ZIMG_PRIMARIES_ST428;
> > +    case AVCOL_PRI_SMPTE431:
> > +        return ZIMG_PRIMARIES_ST431_2;
> > +    case AVCOL_PRI_SMPTE432:
> > +        return ZIMG_PRIMARIES_ST432_1;
> > +    case AVCOL_PRI_JEDEC_P22:
> > +        return ZIMG_PRIMARIES_EBU3213_E;
> > +    }
> > +    return ZIMG_PRIMARIES_UNSPECIFIED;
> > +}
> > +
> > +static int convert_range(enum AVColorRange color_range)
> > +{
> > +    switch (color_range) {
> > +    case AVCOL_RANGE_UNSPECIFIED:
> > +    case AVCOL_RANGE_MPEG:
> > +        return ZIMG_RANGE_LIMITED;
> > +    case AVCOL_RANGE_JPEG:
> > +        return ZIMG_RANGE_FULL;
> > +    }
> > +    return ZIMG_RANGE_LIMITED;
> > +}
> > +
> > +static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
> > +{
> > +    switch (color_range) {
> > +    case ZIMG_RANGE_LIMITED:
> > +        return AVCOL_RANGE_MPEG;
> > +    case ZIMG_RANGE_FULL:
> > +        return AVCOL_RANGE_JPEG;
> > +    }
> > +    return AVCOL_RANGE_UNSPECIFIED;
> > +}
> > +
> >   static av_cold int init(AVFilterContext *ctx)
> >   {
> >       ZScaleContext *s = ctx->priv;
> >       int ret;
> > +    int i;
> > +
> > +    for (i = 0; i < MAX_THREADS; i++) {
> > +        s->tmp[i] = NULL;
> > +        s->graph[i] = NULL;
> > +        s->alpha_graph[i] = NULL;
> > +    }
> > +    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
> > +
> > +    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
> > +
> > +    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
> > +    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
> > +    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
> > +    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
> >       if (s->size_str && (s->w_expr || s->h_expr)) {
> >           av_log(ctx, AV_LOG_ERROR,
> > @@ -158,7 +336,6 @@ static av_cold int init(AVFilterContext *ctx)
> >           av_opt_set(s, "w", "iw", 0);
> >       if (!s->h_expr)
> >           av_opt_set(s, "h", "ih", 0);
> > -
> >       return 0;
> >   }
> > @@ -194,6 +371,153 @@ static int query_formats(AVFilterContext *ctx)
> >       return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats);
> >   }
> > +/* returns 0 if image formats are the same and 1 otherwise */
> > +static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
> > +{
> > +    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
> > +#if ZIMG_API_VERSION >= 0x204
> > +        (img_fmt0->alpha != img_fmt1->alpha) ||
> > +#endif
> > +        (img_fmt0->color_family != img_fmt1->color_family) ||
> > +        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
> > +        (img_fmt0->depth != img_fmt1->depth) ||
> > +        (img_fmt0->field_parity != img_fmt1->field_parity) ||
> > +        (img_fmt0->height != img_fmt1->height) ||
> > +        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
> > +        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
> > +        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
> > +        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
> > +        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
> > +        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
> > +        (img_fmt0->width != img_fmt1->width));
> > +}
> > +
> > +/* returns 0 if graph builder parameters are the same and 1 otherwise */
> > +static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
> > +{
> > +    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
> > +    and NaN values that are default for some params are treated properly*/
> > +    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
> > +        (parm0->dither_type != parm1->dither_type) ||
> > +        (parm0->resample_filter != parm1->resample_filter) ||
> > +        (parm0->resample_filter_uv != parm1->resample_filter_uv);
> > +
> > +    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
> > +        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
> > +    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
> > +        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
> > +    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
> > +        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
> > +    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
> > +        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
> > +    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
> > +        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
> > +
> > +    return ret;
> > +}
> > +
> > +static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
> > +    int colorspace, int primaries, int transfer, int range, int location)
> > +{
> > +    format->width = frame->width;
> > +    format->height = frame->height;
> > +    format->subsample_w = desc->log2_chroma_w;
> > +    format->subsample_h = desc->log2_chroma_h;
> > +    format->depth = desc->comp[0].depth;
> > +    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> > +    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
> > +    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
> > +    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> > +    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
> > +    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
> > +    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
> > +}
> 
> Why are you moving all these functions up in the file? They make the patch
> much harder to read.
> 
> If moving them is necessary, then please split this patch in two. One moving
> the functions, then one applying the actual changes to them and the rest of
> the file. It will make reviewing much easier.

Also please remove trailing whitespaces in patch.


More information about the ffmpeg-devel mailing list