[FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x

Victoria Zhislina niva213 at gmail.com
Thu Feb 10 12:08:04 EET 2022


By ffmpeg threading support implementation via frame slicing and doing
zimg_filter_graph_build that used to take 30-60% of each frame processig
only if necessary (some parameters changed)
the performance increase vs original version
in video downscale and color conversion  >4x is seen
on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)

Signed-off-by: Victoria Zhislina <Victoria.Zhislina at intel.com>
---
 libavfilter/vf_zscale.c | 786 ++++++++++++++++++++++++----------------
 1 file changed, 475 insertions(+), 311 deletions(-)

diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
index 1288c5efc1..ce4c0b2c76 100644
--- a/libavfilter/vf_zscale.c
+++ b/libavfilter/vf_zscale.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2015 Paul B Mahol
- *
+ * * 2022 Victoria Zhislina, Intel - performance optimization
+ 
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -44,6 +45,8 @@
 #include "libavutil/imgutils.h"
 
 #define ZIMG_ALIGNMENT 32
+#define MIN_TILESIZE 64
+#define MAX_THREADS 64
 
 static const char *const var_names[] = {
     "in_w",   "iw",
@@ -113,13 +116,17 @@ typedef struct ZScaleContext {
 
     int force_original_aspect_ratio;
 
-    void *tmp;
-    size_t tmp_size;
+    void *tmp[MAX_THREADS]; //separate for each thread;
+	int nb_threads;
+    int slice_h;
 
     zimg_image_format src_format, dst_format;
     zimg_image_format alpha_src_format, alpha_dst_format;
+    zimg_image_format src_format_tmp, dst_format_tmp;
+    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
     zimg_graph_builder_params alpha_params, params;
-    zimg_filter_graph *alpha_graph, *graph;
+    zimg_graph_builder_params alpha_params_tmp, params_tmp;
+    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
 
     enum AVColorSpace in_colorspace, out_colorspace;
     enum AVColorTransferCharacteristic in_trc, out_trc;
@@ -128,10 +135,181 @@ typedef struct ZScaleContext {
     enum AVChromaLocation in_chromal, out_chromal;
 } ZScaleContext;
 
+
+typedef struct ThreadData {
+    const AVPixFmtDescriptor *desc, *odesc;
+    AVFrame *in, *out;
+} ThreadData;
+
+static int convert_chroma_location(enum AVChromaLocation chroma_location)
+{
+    switch (chroma_location) {
+    case AVCHROMA_LOC_UNSPECIFIED:
+    case AVCHROMA_LOC_LEFT:
+        return ZIMG_CHROMA_LEFT;
+    case AVCHROMA_LOC_CENTER:
+        return ZIMG_CHROMA_CENTER;
+    case AVCHROMA_LOC_TOPLEFT:
+        return ZIMG_CHROMA_TOP_LEFT;
+    case AVCHROMA_LOC_TOP:
+        return ZIMG_CHROMA_TOP;
+    case AVCHROMA_LOC_BOTTOMLEFT:
+        return ZIMG_CHROMA_BOTTOM_LEFT;
+    case AVCHROMA_LOC_BOTTOM:
+        return ZIMG_CHROMA_BOTTOM;
+    }
+    return ZIMG_CHROMA_LEFT;
+}
+
+static int convert_matrix(enum AVColorSpace colorspace)
+{
+    switch (colorspace) {
+    case AVCOL_SPC_RGB:
+        return ZIMG_MATRIX_RGB;
+    case AVCOL_SPC_BT709:
+        return ZIMG_MATRIX_709;
+    case AVCOL_SPC_UNSPECIFIED:
+        return ZIMG_MATRIX_UNSPECIFIED;
+    case AVCOL_SPC_FCC:
+        return ZIMG_MATRIX_FCC;
+    case AVCOL_SPC_BT470BG:
+        return ZIMG_MATRIX_470BG;
+    case AVCOL_SPC_SMPTE170M:
+        return ZIMG_MATRIX_170M;
+    case AVCOL_SPC_SMPTE240M:
+        return ZIMG_MATRIX_240M;
+    case AVCOL_SPC_YCGCO:
+        return ZIMG_MATRIX_YCGCO;
+    case AVCOL_SPC_BT2020_NCL:
+        return ZIMG_MATRIX_2020_NCL;
+    case AVCOL_SPC_BT2020_CL:
+        return ZIMG_MATRIX_2020_CL;
+    case AVCOL_SPC_CHROMA_DERIVED_NCL:
+        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
+    case AVCOL_SPC_CHROMA_DERIVED_CL:
+        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
+    case AVCOL_SPC_ICTCP:
+        return ZIMG_MATRIX_ICTCP;
+    }
+    return ZIMG_MATRIX_UNSPECIFIED;
+}
+
+static int convert_trc(enum AVColorTransferCharacteristic color_trc)
+{
+    switch (color_trc) {
+    case AVCOL_TRC_UNSPECIFIED:
+        return ZIMG_TRANSFER_UNSPECIFIED;
+    case AVCOL_TRC_BT709:
+        return ZIMG_TRANSFER_709;
+    case AVCOL_TRC_GAMMA22:
+        return ZIMG_TRANSFER_470_M;
+    case AVCOL_TRC_GAMMA28:
+        return ZIMG_TRANSFER_470_BG;
+    case AVCOL_TRC_SMPTE170M:
+        return ZIMG_TRANSFER_601;
+    case AVCOL_TRC_SMPTE240M:
+        return ZIMG_TRANSFER_240M;
+    case AVCOL_TRC_LINEAR:
+        return ZIMG_TRANSFER_LINEAR;
+    case AVCOL_TRC_LOG:
+        return ZIMG_TRANSFER_LOG_100;
+    case AVCOL_TRC_LOG_SQRT:
+        return ZIMG_TRANSFER_LOG_316;
+    case AVCOL_TRC_IEC61966_2_4:
+        return ZIMG_TRANSFER_IEC_61966_2_4;
+    case AVCOL_TRC_BT2020_10:
+        return ZIMG_TRANSFER_2020_10;
+    case AVCOL_TRC_BT2020_12:
+        return ZIMG_TRANSFER_2020_12;
+    case AVCOL_TRC_SMPTE2084:
+        return ZIMG_TRANSFER_ST2084;
+    case AVCOL_TRC_ARIB_STD_B67:
+        return ZIMG_TRANSFER_ARIB_B67;
+    case AVCOL_TRC_IEC61966_2_1:
+        return ZIMG_TRANSFER_IEC_61966_2_1;
+    }
+    return ZIMG_TRANSFER_UNSPECIFIED;
+}
+
+static int convert_primaries(enum AVColorPrimaries color_primaries)
+{
+    switch (color_primaries) {
+    case AVCOL_PRI_UNSPECIFIED:
+        return ZIMG_PRIMARIES_UNSPECIFIED;
+    case AVCOL_PRI_BT709:
+        return ZIMG_PRIMARIES_709;
+    case AVCOL_PRI_BT470M:
+        return ZIMG_PRIMARIES_470_M;
+    case AVCOL_PRI_BT470BG:
+        return ZIMG_PRIMARIES_470_BG;
+    case AVCOL_PRI_SMPTE170M:
+        return ZIMG_PRIMARIES_170M;
+    case AVCOL_PRI_SMPTE240M:
+        return ZIMG_PRIMARIES_240M;
+    case AVCOL_PRI_FILM:
+        return ZIMG_PRIMARIES_FILM;
+    case AVCOL_PRI_BT2020:
+        return ZIMG_PRIMARIES_2020;
+    case AVCOL_PRI_SMPTE428:
+        return ZIMG_PRIMARIES_ST428;
+    case AVCOL_PRI_SMPTE431:
+        return ZIMG_PRIMARIES_ST431_2;
+    case AVCOL_PRI_SMPTE432:
+        return ZIMG_PRIMARIES_ST432_1;
+    case AVCOL_PRI_JEDEC_P22:
+        return ZIMG_PRIMARIES_EBU3213_E;
+    }
+    return ZIMG_PRIMARIES_UNSPECIFIED;
+}
+
+static int convert_range(enum AVColorRange color_range)
+{
+    switch (color_range) {
+    case AVCOL_RANGE_UNSPECIFIED:
+    case AVCOL_RANGE_MPEG:
+        return ZIMG_RANGE_LIMITED;
+    case AVCOL_RANGE_JPEG:
+        return ZIMG_RANGE_FULL;
+    }
+    return ZIMG_RANGE_LIMITED;
+}
+
+static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
+{
+    switch (color_range) {
+    case ZIMG_RANGE_LIMITED:
+        return AVCOL_RANGE_MPEG;
+    case ZIMG_RANGE_FULL:
+        return AVCOL_RANGE_JPEG;
+    }
+    return AVCOL_RANGE_UNSPECIFIED;
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
     int ret;
+    int i;
+
+    for (i = 0; i < MAX_THREADS; i++) {
+        s->tmp[i] = NULL;
+        s->graph[i] = NULL;
+        s->alpha_graph[i] = NULL;
+    }
+    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
 
     if (s->size_str && (s->w_expr || s->h_expr)) {
         av_log(ctx, AV_LOG_ERROR,
@@ -194,6 +372,153 @@ static int query_formats(AVFilterContext *ctx)
     return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats);
 }
 
+/* returns 0 if image formats are the same and 1 otherwise */
+static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
+{
+    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
+#if ZIMG_API_VERSION >= 0x204
+        (img_fmt0->alpha != img_fmt1->alpha) ||
+#endif
+        (img_fmt0->color_family != img_fmt1->color_family) ||
+        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
+        (img_fmt0->depth != img_fmt1->depth) ||
+        (img_fmt0->field_parity != img_fmt1->field_parity) ||
+        (img_fmt0->height != img_fmt1->height) ||
+        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
+        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
+        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
+        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
+        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
+        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
+        (img_fmt0->width != img_fmt1->width));
+}
+
+/* returns 0 if graph builder parameters are the same and 1 otherwise */
+static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
+{
+    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
+    and NaN values that are default for some params are treated properly*/
+    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
+        (parm0->dither_type != parm1->dither_type) ||
+        (parm0->resample_filter != parm1->resample_filter) ||
+        (parm0->resample_filter_uv != parm1->resample_filter_uv);
+
+    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
+        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
+    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
+        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
+    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
+        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
+    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
+        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
+    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
+        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
+
+    return ret;
+}
+
+static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
+    int colorspace, int primaries, int transfer, int range, int location)
+{
+    format->width = frame->width;
+    format->height = frame->height;
+    format->subsample_w = desc->log2_chroma_w;
+    format->subsample_h = desc->log2_chroma_h;
+    format->depth = desc->comp[0].depth;
+    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
+    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
+    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
+    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
+    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
+    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
+}
+
+static int print_zimg_error(AVFilterContext *ctx)
+{
+    char err_msg[1024];
+    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
+
+    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
+
+    return AVERROR_EXTERNAL;
+}
+
+static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *desc, const AVPixFmtDescriptor *out_desc,
+    ZScaleContext *s, int job_nr)
+{
+    int ret;
+    size_t size;
+    zimg_image_format src_format;
+    zimg_image_format dst_format;
+    zimg_image_format alpha_src_format;
+    zimg_image_format alpha_dst_format;
+
+    src_format = s->src_format;
+    dst_format = s->dst_format;
+    /* The input slice is specified through the active_region field, 
+    unlike the output slice.
+    according to zimg requirements input and output slices should have even dimentions */
+    src_format.active_region.width = in->width;
+    src_format.active_region.height = s->slice_h;
+    src_format.active_region.left = 0;
+    src_format.active_region.top = job_nr * src_format.active_region.height;
+    //dst now is the single tile only!!
+    dst_format.width = out->width;
+    dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+    //the last slice could differ from the previous ones due to the slices division "tail"
+    if (job_nr == (s->nb_threads - 1)) {
+        src_format.active_region.height = src_format.height - src_format.active_region.top;
+        dst_format.height = out->height - job_nr * dst_format.height;
+    }
+
+    if (s->graph[job_nr]) {
+        zimg_filter_graph_free(s->graph[job_nr]);
+    }
+    s->graph[job_nr] = zimg_filter_graph_build(&src_format, &dst_format, &s->params);
+    if (!s->graph[job_nr])
+        return print_zimg_error(NULL);
+
+    ret = zimg_filter_graph_get_tmp_size(s->graph[job_nr], &size);
+    if (ret)
+        return print_zimg_error(NULL);
+
+    if (s->tmp[job_nr])
+        av_freep(&s->tmp[job_nr]);
+    s->tmp[job_nr] = av_malloc(size);
+    if (!s->tmp[job_nr])
+        return AVERROR(ENOMEM);
+
+    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && out_desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        alpha_src_format = s->alpha_src_format;
+        alpha_dst_format = s->alpha_dst_format;
+        /* The input slice is specified through the active_region field, unlike the output slice.
+        according to zimg requirements input and output slices should have even dimentions */
+        alpha_src_format.active_region.width = in->width;
+        alpha_src_format.active_region.height = s->slice_h;
+        alpha_src_format.active_region.left = 0;
+        alpha_src_format.active_region.top = job_nr * alpha_src_format.active_region.height;
+        //dst now is the single tile only!!
+        alpha_dst_format.width = out->width;
+        alpha_dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+        //the last slice could differ from the previous ones due to the slices division "tail"
+        if (job_nr == (s->nb_threads - 1)) {
+            alpha_src_format.active_region.height = alpha_src_format.height - alpha_src_format.active_region.top;
+            alpha_dst_format.height = out->height - job_nr * alpha_dst_format.height;
+        }
+
+        if (s->alpha_graph[job_nr]) {
+            zimg_filter_graph_free(s->alpha_graph[job_nr]);
+        }
+        s->alpha_graph[job_nr] = zimg_filter_graph_build(&alpha_src_format, &alpha_dst_format, &s->alpha_params);
+        if (!s->alpha_graph[job_nr])
+            return print_zimg_error(NULL);
+     }
+    return 0;
+}
+
 static int config_props(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
@@ -317,212 +642,15 @@ fail:
     return ret;
 }
 
-static int print_zimg_error(AVFilterContext *ctx)
-{
-    char err_msg[1024];
-    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
-
-    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
-
-    return AVERROR_EXTERNAL;
-}
-
-static int convert_chroma_location(enum AVChromaLocation chroma_location)
-{
-    switch (chroma_location) {
-    case AVCHROMA_LOC_UNSPECIFIED:
-    case AVCHROMA_LOC_LEFT:
-        return ZIMG_CHROMA_LEFT;
-    case AVCHROMA_LOC_CENTER:
-        return ZIMG_CHROMA_CENTER;
-    case AVCHROMA_LOC_TOPLEFT:
-        return ZIMG_CHROMA_TOP_LEFT;
-    case AVCHROMA_LOC_TOP:
-        return ZIMG_CHROMA_TOP;
-    case AVCHROMA_LOC_BOTTOMLEFT:
-        return ZIMG_CHROMA_BOTTOM_LEFT;
-    case AVCHROMA_LOC_BOTTOM:
-        return ZIMG_CHROMA_BOTTOM;
-    }
-    return ZIMG_CHROMA_LEFT;
-}
-
-static int convert_matrix(enum AVColorSpace colorspace)
-{
-    switch (colorspace) {
-    case AVCOL_SPC_RGB:
-        return ZIMG_MATRIX_RGB;
-    case AVCOL_SPC_BT709:
-        return ZIMG_MATRIX_709;
-    case AVCOL_SPC_UNSPECIFIED:
-        return ZIMG_MATRIX_UNSPECIFIED;
-    case AVCOL_SPC_FCC:
-        return ZIMG_MATRIX_FCC;
-    case AVCOL_SPC_BT470BG:
-        return ZIMG_MATRIX_470BG;
-    case AVCOL_SPC_SMPTE170M:
-        return ZIMG_MATRIX_170M;
-    case AVCOL_SPC_SMPTE240M:
-        return ZIMG_MATRIX_240M;
-    case AVCOL_SPC_YCGCO:
-        return ZIMG_MATRIX_YCGCO;
-    case AVCOL_SPC_BT2020_NCL:
-        return ZIMG_MATRIX_2020_NCL;
-    case AVCOL_SPC_BT2020_CL:
-        return ZIMG_MATRIX_2020_CL;
-    case AVCOL_SPC_CHROMA_DERIVED_NCL:
-        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
-    case AVCOL_SPC_CHROMA_DERIVED_CL:
-        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
-    case AVCOL_SPC_ICTCP:
-        return ZIMG_MATRIX_ICTCP;
-    }
-    return ZIMG_MATRIX_UNSPECIFIED;
-}
-
-static int convert_trc(enum AVColorTransferCharacteristic color_trc)
-{
-    switch (color_trc) {
-    case AVCOL_TRC_UNSPECIFIED:
-        return ZIMG_TRANSFER_UNSPECIFIED;
-    case AVCOL_TRC_BT709:
-        return ZIMG_TRANSFER_709;
-    case AVCOL_TRC_GAMMA22:
-        return ZIMG_TRANSFER_470_M;
-    case AVCOL_TRC_GAMMA28:
-        return ZIMG_TRANSFER_470_BG;
-    case AVCOL_TRC_SMPTE170M:
-        return ZIMG_TRANSFER_601;
-    case AVCOL_TRC_SMPTE240M:
-        return ZIMG_TRANSFER_240M;
-    case AVCOL_TRC_LINEAR:
-        return ZIMG_TRANSFER_LINEAR;
-    case AVCOL_TRC_LOG:
-        return ZIMG_TRANSFER_LOG_100;
-    case AVCOL_TRC_LOG_SQRT:
-        return ZIMG_TRANSFER_LOG_316;
-    case AVCOL_TRC_IEC61966_2_4:
-        return ZIMG_TRANSFER_IEC_61966_2_4;
-    case AVCOL_TRC_BT2020_10:
-        return ZIMG_TRANSFER_2020_10;
-    case AVCOL_TRC_BT2020_12:
-        return ZIMG_TRANSFER_2020_12;
-    case AVCOL_TRC_SMPTE2084:
-        return ZIMG_TRANSFER_ST2084;
-    case AVCOL_TRC_ARIB_STD_B67:
-        return ZIMG_TRANSFER_ARIB_B67;
-    case AVCOL_TRC_IEC61966_2_1:
-        return ZIMG_TRANSFER_IEC_61966_2_1;
-    }
-    return ZIMG_TRANSFER_UNSPECIFIED;
-}
-
-static int convert_primaries(enum AVColorPrimaries color_primaries)
-{
-    switch (color_primaries) {
-    case AVCOL_PRI_UNSPECIFIED:
-        return ZIMG_PRIMARIES_UNSPECIFIED;
-    case AVCOL_PRI_BT709:
-        return ZIMG_PRIMARIES_709;
-    case AVCOL_PRI_BT470M:
-        return ZIMG_PRIMARIES_470_M;
-    case AVCOL_PRI_BT470BG:
-        return ZIMG_PRIMARIES_470_BG;
-    case AVCOL_PRI_SMPTE170M:
-        return ZIMG_PRIMARIES_170M;
-    case AVCOL_PRI_SMPTE240M:
-        return ZIMG_PRIMARIES_240M;
-    case AVCOL_PRI_FILM:
-        return ZIMG_PRIMARIES_FILM;
-    case AVCOL_PRI_BT2020:
-        return ZIMG_PRIMARIES_2020;
-    case AVCOL_PRI_SMPTE428:
-        return ZIMG_PRIMARIES_ST428;
-    case AVCOL_PRI_SMPTE431:
-        return ZIMG_PRIMARIES_ST431_2;
-    case AVCOL_PRI_SMPTE432:
-        return ZIMG_PRIMARIES_ST432_1;
-    case AVCOL_PRI_JEDEC_P22:
-        return ZIMG_PRIMARIES_EBU3213_E;
-    }
-    return ZIMG_PRIMARIES_UNSPECIFIED;
-}
-
-static int convert_range(enum AVColorRange color_range)
-{
-    switch (color_range) {
-    case AVCOL_RANGE_UNSPECIFIED:
-    case AVCOL_RANGE_MPEG:
-        return ZIMG_RANGE_LIMITED;
-    case AVCOL_RANGE_JPEG:
-        return ZIMG_RANGE_FULL;
-    }
-    return ZIMG_RANGE_LIMITED;
-}
-
-static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
-{
-    switch (color_range) {
-    case ZIMG_RANGE_LIMITED:
-        return AVCOL_RANGE_MPEG;
-    case ZIMG_RANGE_FULL:
-        return AVCOL_RANGE_JPEG;
-    }
-    return AVCOL_RANGE_UNSPECIFIED;
-}
-
-static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
-                        int colorspace, int primaries, int transfer, int range, int location)
-{
-    format->width = frame->width;
-    format->height = frame->height;
-    format->subsample_w = desc->log2_chroma_w;
-    format->subsample_h = desc->log2_chroma_h;
-    format->depth = desc->comp[0].depth;
-    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
-    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
-    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
-    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
-    format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
-    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
-    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
-}
-
-static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
-                       zimg_image_format *src_format, zimg_image_format *dst_format,
-                       void **tmp, size_t *tmp_size)
-{
-    int ret;
-    size_t size;
-
-    zimg_filter_graph_free(*graph);
-    *graph = zimg_filter_graph_build(src_format, dst_format, params);
-    if (!*graph)
-        return print_zimg_error(NULL);
-
-    ret = zimg_filter_graph_get_tmp_size(*graph, &size);
-    if (ret)
-        return print_zimg_error(NULL);
-
-    if (size > *tmp_size) {
-        av_freep(tmp);
-        *tmp = av_malloc(size);
-        if (!*tmp)
-            return AVERROR(ENOMEM);
-
-        *tmp_size = size;
-    }
-
-    return 0;
-}
 
 static int realign_frame(const AVPixFmtDescriptor *desc, AVFrame **frame)
 {
     AVFrame *aligned = NULL;
-    int ret = 0, plane;
+    int ret = 0, plane, planes;
 
     /* Realign any unaligned input frame. */
-    for (plane = 0; plane < 3; plane++) {
+    planes = av_pix_fmt_count_planes(desc->nb_components);
+    for (plane = 0; plane < planes; plane++) {
         int p = desc->comp[plane].plane;
         if ((uintptr_t)(*frame)->data[p] % ZIMG_ALIGNMENT || (*frame)->linesize[p] % ZIMG_ALIGNMENT) {
             if (!(aligned = av_frame_alloc())) {
@@ -554,6 +682,7 @@ fail:
     return ret;
 }
 
+
 static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
 {
     if (s->colorspace != -1)
@@ -572,20 +701,77 @@ static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
         frame->chroma_location = (int)s->dst_format.chroma_location + 1;
 }
 
+static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
+{
+    ThreadData *td = data;
+    int ret = 0;
+    int p;
+    int out_sampl;
+    int need_gb;
+    ZScaleContext *s = ctx->priv;
+    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
+    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
+    int  dst_tile_height = ((unsigned int)(td->out->height / n_jobs)) & 0xfffffffe; 
+
+    /* create zimg filter graphs for each thread
+     only if not created earlier or there is some change in frame parameters */
+    need_gb = compare_zimg_image_formats(&s->src_format, &s->src_format_tmp) ||
+        compare_zimg_image_formats(&s->dst_format, &s->dst_format_tmp) ||
+        compare_zimg_graph_builder_params(&s->params, &s->params_tmp);
+    if(td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA)
+        need_gb = need_gb || compare_zimg_image_formats(&s->alpha_src_format, &s->alpha_src_format_tmp) ||
+            compare_zimg_image_formats(&s->alpha_dst_format, &s->alpha_dst_format_tmp) ||
+            compare_zimg_graph_builder_params(&s->alpha_params, &s->alpha_params_tmp);
+
+    if (need_gb){
+        ret = graphs_build(td->in, td->out, td->desc, td->odesc, s, job_nr);
+        if (ret < 0)
+            return print_zimg_error(ctx);
+    }
+    out_sampl = FFMAX3(td->out->linesize[0], td->out->linesize[1], td->out->linesize[2]);
+    for (int i = 0; i < 3; i++) {
+        p = td->desc->comp[i].plane;      
+
+        src_buf.plane[i].data = td->in->data[p];
+        src_buf.plane[i].stride = td->in->linesize[p];
+        src_buf.plane[i].mask = -1;
+
+        p = td->odesc->comp[i].plane;
+        dst_buf.plane[i].data = td->out->data[p] + td->out->linesize[p] * dst_tile_height * td->out->linesize[p] / out_sampl * job_nr;
+        dst_buf.plane[i].stride = td->out->linesize[p];
+        dst_buf.plane[i].mask = -1;
+    }
+    ret = zimg_filter_graph_process(s->graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+    if (ret) 
+        return  print_zimg_error(ctx);
+
+    if (td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        src_buf.plane[0].data = td->in->data[3];
+        src_buf.plane[0].stride = td->in->linesize[3];
+        src_buf.plane[0].mask = -1;
+
+        dst_buf.plane[0].data = td->out->data[3] + td->out->linesize[3] * dst_tile_height  * job_nr;
+        dst_buf.plane[0].stride = td->out->linesize[3];
+        dst_buf.plane[0].mask = -1;
+
+        ret = zimg_filter_graph_process(s->alpha_graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+        if (ret)
+            return print_zimg_error(ctx);
+    }
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
-    ZScaleContext *s = link->dst->priv;
-    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFilterContext *ctx = link->dst;
+    ZScaleContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
     const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
-    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
-    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
     char buf[32];
-    int ret = 0, plane;
+    int ret = 0;
     AVFrame *out = NULL;
-
-    if ((ret = realign_frame(desc, &in)) < 0)
-        goto fail;
+    ThreadData td;
 
     if (!(out = ff_get_video_buffer(outlink, outlink->w, outlink->h))) {
         ret =  AVERROR(ENOMEM);
@@ -596,35 +782,60 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     out->width  = outlink->w;
     out->height = outlink->h;
 
-    if(   in->width  != link->w
-       || in->height != link->h
-       || in->format != link->format
-       || s->in_colorspace != in->colorspace
-       || s->in_trc  != in->color_trc
-       || s->in_primaries != in->color_primaries
-       || s->in_range != in->color_range
-       || s->out_colorspace != out->colorspace
-       || s->out_trc  != out->color_trc
-       || s->out_primaries != out->color_primaries
-       || s->out_range != out->color_range
-       || s->in_chromal != in->chroma_location
-       || s->out_chromal != out->chroma_location) {
+    //we need to use this filter if something is different for an input and output only
+    //otherwise - just copy the input frame to the output
+    if ((link->w != outlink->w) ||
+        (link->h != outlink->h) ||
+        (s->src_format.chroma_location != s->dst_format.chroma_location)||
+        (s->src_format.color_family !=s->dst_format.color_family)||
+        (s->src_format.color_primaries !=s->dst_format.color_primaries)||
+        (s->src_format.depth !=s->dst_format.depth)||
+        (s->src_format.matrix_coefficients !=s->dst_format.matrix_coefficients)||
+        (s->src_format.field_parity !=s->dst_format.field_parity)||
+        (s->src_format.pixel_range !=s->dst_format.pixel_range)||
+        (s->src_format.pixel_type !=s->dst_format.pixel_type)||
+        (s->src_format.transfer_characteristics !=s->dst_format.transfer_characteristics)
+    ){
+        if ((ret = realign_frame(desc, &in)) < 0)
+            goto fail;
+
         snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
         av_opt_set(s, "w", buf, 0);
         snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
         av_opt_set(s, "h", buf, 0);
 
+ 
         link->dst->inputs[0]->format = in->format;
         link->dst->inputs[0]->w      = in->width;
         link->dst->inputs[0]->h      = in->height;
 
-        if ((ret = config_props(outlink)) < 0)
-            goto fail;
+        update_output_color_information(s, out);
+    
+        s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), link->h / MIN_TILESIZE);
+        s->slice_h = ((unsigned int)(link->h / s->nb_threads)) & 0xfffffffe; // slice_h should be even for zimg
+        s->in_colorspace = in->colorspace;
+        s->in_trc = in->color_trc;
+        s->in_primaries = in->color_primaries;
+        s->in_range = in->color_range;
+        s->out_colorspace = out->colorspace;
+        s->out_trc = out->color_trc;
+        s->out_primaries = out->color_primaries;
+        s->out_range = out->color_range;
+    
+        av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
+                  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
+                  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
+                  INT_MAX);
 
         zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
         zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
         zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
 
+        format_init(&s->src_format, in, desc, s->colorspace_in,
+            s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
+        format_init(&s->dst_format, out, odesc, s->colorspace,
+            s->primaries, s->trc, s->range, s->chromal);
+
         s->params.dither_type = s->dither;
         s->params.cpu_type = ZIMG_CPU_AUTO;
         s->params.resample_filter = s->filter;
@@ -634,27 +845,6 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
         s->params.filter_param_a = s->params.filter_param_a_uv = s->param_a;
         s->params.filter_param_b = s->params.filter_param_b_uv = s->param_b;
 
-        format_init(&s->src_format, in, desc, s->colorspace_in,
-                    s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
-        format_init(&s->dst_format, out, odesc, s->colorspace,
-                    s->primaries, s->trc, s->range, s->chromal);
-
-        update_output_color_information(s, out);
-
-        ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
-                          &s->tmp, &s->tmp_size);
-        if (ret < 0)
-            goto fail;
-
-        s->in_colorspace  = in->colorspace;
-        s->in_trc         = in->color_trc;
-        s->in_primaries   = in->color_primaries;
-        s->in_range       = in->color_range;
-        s->out_colorspace = out->colorspace;
-        s->out_trc        = out->color_trc;
-        s->out_primaries  = out->color_primaries;
-        s->out_range      = out->color_range;
-
         if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
             zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
             zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
@@ -670,76 +860,48 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
             s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
 
-            s->alpha_dst_format.width = out->width;
-            s->alpha_dst_format.height = out->height;
             s->alpha_dst_format.depth = odesc->comp[0].depth;
             s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
-
-            zimg_filter_graph_free(s->alpha_graph);
-            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
-            if (!s->alpha_graph) {
-                ret = print_zimg_error(link->dst);
-                goto fail;
-            }
         }
-    }
 
-    update_output_color_information(s, out);
+        td.in = in;
+        td.out = out;
+        td.desc = desc;
+        td.odesc = odesc;
 
-    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
-              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
-              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
-              INT_MAX);
-
-    for (plane = 0; plane < 3; plane++) {
-        int p = desc->comp[plane].plane;
-        src_buf.plane[plane].data   = in->data[p];
-        src_buf.plane[plane].stride = in->linesize[p];
-        src_buf.plane[plane].mask   = -1;
-
-        p = odesc->comp[plane].plane;
-        dst_buf.plane[plane].data   = out->data[p];
-        dst_buf.plane[plane].stride = out->linesize[p];
-        dst_buf.plane[plane].mask   = -1;
-    }
-
-    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-    if (ret) {
-        ret = print_zimg_error(link->dst);
-        goto fail;
-    }
+        ff_filter_execute(ctx, filter_slice, &td, NULL, s->nb_threads);
 
-    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        src_buf.plane[0].data   = in->data[3];
-        src_buf.plane[0].stride = in->linesize[3];
-        src_buf.plane[0].mask   = -1;
-
-        dst_buf.plane[0].data   = out->data[3];
-        dst_buf.plane[0].stride = out->linesize[3];
-        dst_buf.plane[0].mask   = -1;
-
-        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-        if (ret) {
-            ret = print_zimg_error(link->dst);
-            goto fail;
+        s->src_format_tmp = s->src_format;
+        s->dst_format_tmp = s->dst_format;
+        s->params_tmp = s->params;
+        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+            s->alpha_src_format_tmp = s->alpha_src_format;
+            s->alpha_dst_format_tmp = s->alpha_dst_format;
+            s->alpha_params_tmp = s->alpha_params;
         }
-    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        int x, y;
-
-        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
-            for (y = 0; y < out->height; y++) {
-                for (x = 0; x < out->width; x++) {
-                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
-                            av_float2int(1.0f));
+
+        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) && (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) ){
+            int x, y;
+            if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
+                for (y = 0; y < out->height; y++) {
+                    for (x = 0; x < out->width; x++) {
+                        AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
+                                av_float2int(1.0f));
+                    }
                 }
+            } else {
+                for (y = 0; y < outlink->h; y++)
+                    memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
             }
-        } else {
-            for (y = 0; y < outlink->h; y++)
-                memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
         }
     }
-
+    else {
+        /*no need for any filtering */
+        ret = av_frame_copy(out, in);
+        if (ret < 0)
+            return ret;
+    }
 fail:
     av_frame_free(&in);
     if (ret) {
@@ -753,11 +915,12 @@ fail:
 static av_cold void uninit(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
-
-    zimg_filter_graph_free(s->graph);
-    zimg_filter_graph_free(s->alpha_graph);
-    av_freep(&s->tmp);
-    s->tmp_size = 0;
+    int i;
+    for (i = 0; i < s->nb_threads; i++) {
+        if (s->tmp[i]) av_freep(&s->tmp[i]);
+        if (s->graph[i]) zimg_filter_graph_free(s->graph[i]);
+        if (s->alpha_graph[i]) zimg_filter_graph_free(s->alpha_graph[i]);
+    }
 }
 
 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
@@ -941,4 +1104,5 @@ const AVFilter ff_vf_zscale = {
     FILTER_OUTPUTS(avfilter_vf_zscale_outputs),
     FILTER_QUERY_FUNC(query_formats),
     .process_command = process_command,
+    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };
-- 
2.31.1.windows.1



More information about the ffmpeg-devel mailing list