[FFmpeg-devel] [PATCH]avfilter: add Intel IPP library based x86 optimized video scaling filter

Sun May 23 15:31:11 EEST 2021

>From b01b4130d7301a289b3ad771eaeaf1ca2839045a Mon Sep 17 00:00:00 2001
From: Zhislina <Victoria.Zhislina at intel.com>
Date: Mon, 3 May 2021 21:53:54 +0300
Subject: [PATCH] avfilter: add Intel IPP library based x86 optimized video
 scaling filter.

Uses vector instructions from SSE2 (default) up to AVX512 for the suitable CPUs
and multithreading (slices) to provide multiple performance increase vs libswscale.

Introduces superscaling interpolation method for video downscale.
Adds antialiasing option to linear, cubic and lanczos interpolation.

Filter usage sample command lines.
Downscaling using supersampling with 16 threads:
ffmpeg -i input4K.webm -filter:v "scale_ipp=2560:1440:flags=super"
-threads 16 -c:v libsomeencoder 2560x1440output.file

Upscaling using default bilinear filtering with antialiasing:
ffmpeg -i input1280x720.webm -filter:v
"scale_ipp=1920:1080:ipp_antialiasing=1" -c:v libsomeencoder 1920x1080output.file

Signed-off-by: Zhislina <Victoria.Zhislina at intel.com>
---
 Changelog                  |    1 +
 configure                  |   15 +-
 doc/filters.texi           |   48 ++
 libavfilter/Makefile       |    1 +
 libavfilter/allfilters.c   |    1 +
 libavfilter/version.h      |    4 +-
 libavfilter/vf_scale_ipp.c | 1247 ++++++++++++++++++++++++++++++++++++
 7 files changed, 1314 insertions(+), 3 deletions(-)
 create mode 100644 libavfilter/vf_scale_ipp.c

diff --git a/Changelog b/Changelog
index ad950354d0..48b2f368ea 100644
--- a/Changelog
+++ b/Changelog
@@ -4,6 +4,7 @@ releases are sorted from youngest to oldest.
 version <next>:
 - ADPCM IMA Westwood encoder
 - Westwood AUD muxer
+- Intel IPP accelerated video scaling filter
 
 
 version 4.4:
diff --git a/configure b/configure
index 6580859ef2..e28f4ba8a9 100755
--- a/configure
+++ b/configure
@@ -240,6 +240,7 @@ External library support:
   --enable-libgsm          enable GSM de/encoding via libgsm [no]
   --enable-libiec61883     enable iec61883 via libiec61883 [no]
   --enable-libilbc         enable iLBC de/encoding via libilbc [no]
+  --enable-libipp          enable Intel IPP libary based scaling [no]
   --enable-libjack         enable JACK audio sound server [no]
   --enable-libklvanc       enable Kernel Labs VANC processing [no]
   --enable-libkvazaar      enable HEVC encoding via libkvazaar [no]
@@ -1766,6 +1767,7 @@ EXTERNAL_LIBRARY_NONFREE_LIST="
     libfdk_aac
     openssl
     libtls
+    libipp
 "
 
 EXTERNAL_LIBRARY_VERSION3_LIST="
@@ -3643,6 +3645,7 @@ rubberband_filter_deps="librubberband"
 sab_filter_deps="gpl swscale"
 scale2ref_filter_deps="swscale"
 scale_filter_deps="swscale"
+scale_ipp_filter_deps="libipp"
 scale_qsv_filter_deps="libmfx"
 scdet_filter_select="scene_sad"
 select_filter_select="scene_sad"
@@ -6412,6 +6415,17 @@ if enabled libmfx; then
    check_cc MFX_CODEC_VP9 "mfx/mfxvp9.h mfx/mfxstructures.h" "MFX_CODEC_VP9"
 fi
 
+if enabled libipp; then
+   ipp_header_for_check='ippcore.h'
+   case $target_os in
+       mingw32*|mingw64*)
+           ipp_header_for_check='_mingw.h ippcore.h'
+           ;;
+   esac
+   check_lib libipp "$ipp_header_for_check" ippInit -Wl,--start-group -lippi -lipps -lippcore -lippvm -Wl,--end-group ||
+   die "ERROR: Intel IPP not found"
+fi
+
 enabled libmodplug        && require_pkg_config libmodplug libmodplug libmodplug/modplug.h ModPlug_Load
 enabled libmp3lame        && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_quality -lmp3lame $libm_extralibs
 enabled libmysofa         && { check_pkg_config libmysofa libmysofa mysofa.h mysofa_neighborhood_init_withstepdefine ||
@@ -6490,7 +6504,6 @@ enabled libvpx            && {
         die "libvpx enabled but no supported decoders found"
     fi
 }
-
 enabled libwebp           && {
     enabled libwebp_encoder      && require_pkg_config libwebp "libwebp >= 0.2.0" webp/encode.h WebPGetEncoderVersion
     enabled libwebp_anim_encoder && check_pkg_config libwebp_anim_encoder "libwebpmux >= 0.4.0" webp/mux.h WebPAnimEncoderOptionsInit; }
diff --git a/doc/filters.texi b/doc/filters.texi
index 36e35a175b..85480b6013 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -17706,6 +17706,54 @@ If the specified expression is not valid, it is kept at its current
 value.
 @end table
 
+ at section scale_ipp
+
+Use the Intel Performance Primitives library (libipp) to perform x86 optimized frames scaling.
+Setting the output width, height and the output display aspect ratio 
+works in the same way as for the @ref{scale} filter.
+
+Filter supports YUV420 (AV_PIX_FMT_YUV420P) and YUV420p10 (AV_PIX_FMT_YUV420P10LE) image formats only,
+no input-output format conversions provided.
+
+Interlaced images scaling is not supported.
+
+The following additional options are accepted:
+ at table @option
+
+ at item interpolation, flags
+The interpolation algorithm used for resizing. One of the following:
+ at table @option
+ at item nn
+Nearest neighbour.
+
+ at item linear
+ at item cubic
+2-parameter cubic (B=0, C=1/2)
+
+ at item super
+Supersampling (could be used for downscaling only).
+Default interpolation value.
+
+ at item lanczos
+ at end table
+
+ at item ipp_antialiasing
+Enables internal IPP anti-aliasing ( @code{0} by default).
+Provides smoothing of jagged edges, but decreases performance. 
+Could be used for linear, cubic and lancsoz interpolation algorithms only.
+
+ at item ipp_threading
+Enables IPP scaling filter threading usage (@code{1} by default).
+By default the number of threads used is selected automatically basing on the number of logical processors available,
+output image size and interpolation used for scaling.
+Set @var{ipp_threading}  to @code{0} to switch IPP scaling filter threading off. 
+
+ at item threads
+The maximal allowed number of threads executing the IPP scaling filter. The actual number of threads used will be equal to
+ @var{threads} or in some cases less  basing on the number of logical processors available,
+output image size and interpolation used for scaling
+ at end table
+
 @section scale_npp
 
 Use the NVIDIA Performance Primitives (libnpp) to perform scaling and/or pixel
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 5a287364b0..101719c192 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -393,6 +393,7 @@ OBJS-$(CONFIG_SAB_FILTER)                    += vf_sab.o
 OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale_eval.o
 OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o scale_eval.o \
                                                 vf_scale_cuda.ptx.o vf_scale_cuda_bicubic.ptx.o
+OBJS-$(CONFIG_SCALE_IPP_FILTER)              += vf_scale_ipp.o scale_eval.o
 OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale_eval.o
 OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_scale_qsv.o
 OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 931d7dbb0d..2b5014da0c 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -375,6 +375,7 @@ extern const AVFilter ff_vf_rotate;
 extern const AVFilter ff_vf_sab;
 extern const AVFilter ff_vf_scale;
 extern const AVFilter ff_vf_scale_cuda;
+extern const AVFilter ff_vf_scale_ipp;
 extern const AVFilter ff_vf_scale_npp;
 extern const AVFilter ff_vf_scale_qsv;
 extern const AVFilter ff_vf_scale_vaapi;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index f12bc876ae..1be88c6c79 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,8 +30,8 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   8
-#define LIBAVFILTER_VERSION_MINOR   0
-#define LIBAVFILTER_VERSION_MICRO 101
+#define LIBAVFILTER_VERSION_MINOR   1
+#define LIBAVFILTER_VERSION_MICRO 100
 
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/vf_scale_ipp.c b/libavfilter/vf_scale_ipp.c
new file mode 100644
index 0000000000..bfd5fed084
--- /dev/null
+++ b/libavfilter/vf_scale_ipp.c
@@ -0,0 +1,1247 @@
+/*
+ * Copyright (c) 2021 Intel
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Intel IPP library based x86 optimized scale video filter.
+ * Uses SIMD instructions up to AVX512 for suitable CPUs
+ * and multithreading for scaling optimization
+ */
+#include <stdio.h>
+#include <string.h>
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/eval.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/x86/cpu.h"
+#include "scale_eval.h"
+#include "video.h"
+
+#include "ippcore.h"
+#include "ippi.h"
+#include "ipps.h"
+
+
+static const char *const var_names[] = {"in_w",  "iw", "in_h",  "ih",
+                                        "out_w", "ow", "out_h", "oh",
+                                        "a", // antialiasing
+                                        NULL};
+
+enum var_name {
+    VAR_IN_W,
+    VAR_IW,
+    VAR_IN_H,
+    VAR_IH,
+    VAR_OUT_W,
+    VAR_OW,
+    VAR_OUT_H,
+    VAR_OH,
+    VAR_A,
+    VARS_NB
+};
+
+struct IPPparallelResizeInfo;
+struct IPPScaleContext;
+// IPP wrapper function ptr
+typedef IppStatus (*ippResize_YUV420_ptr)(struct IPPScaleContext *scale,  const Ipp8u* src_yuv[], int* src_step, Ipp8u* dst_yuv[], int* dst_step);
+//Exact ippi resize function ptr
+typedef IppStatus (IPP_STDCALL *ippiResize_C1R_L_ptr)(const Ipp8u* src_yuv, IppSizeL src_step, Ipp8u* dst_yuv, IppSizeL dst_step, IppiPointL dstOffset, IppiSizeL dst_size, const IppiResizeSpec* ipp_spec, Ipp8u* pbuffer_y);
+typedef IppStatus (IPP_STDCALL *ippiResize_border_C1R_L_ptr)(const Ipp8u* src_yuv, IppSizeL src_step, Ipp8u* dst_yuv, IppSizeL dst_step, IppiPointL dstOffset, IppiSizeL dst_size, IppiBorderType border, const Ipp8u* pBorderValue, const IppiResizeSpec* ipp_spec, Ipp8u* pbuffer_y);
+
+
+typedef IppStatus(*functype_l)(IppSizeL i, void* arg);
+
+typedef struct
+{
+    IppiPointL        split;
+    IppiSizeL         tile_size;
+    IppiSizeL         last_tile_size;
+    IppSizeL          tile_buffer_size;
+    IppSizeL          last_tile_buffer_size;
+    IppSizeL          num_tiles;
+} IppResizeInfo_LT;
+
+static const enum AVPixelFormat supported_formats[] = {
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUV420P10LE,
+    AV_PIX_FMT_NONE
+};
+
+typedef struct IPPScaleContext {
+   const AVClass *class;
+   AVDictionary *opts;
+
+   /*
+    * New dimensions. Special values are:
+    *   0 = original width/height
+    *  -1 = keep original aspect
+    *  -N = try to keep aspect but make sure it is divisible by N
+    */
+   int w, h;
+   char *size_str;
+   unsigned int flags; /// scaling flags
+   
+   int interlaced; // not in use currently
+   
+   char *w_expr; ///< width  expression string
+   char *h_expr; ///< height expression string
+   AVExpr *w_pexpr;
+   AVExpr *h_pexpr;
+   double var_values[VARS_NB];
+   
+   char *flags_str;
+   
+   int force_original_aspect_ratio;
+   int force_divisible_by;
+   
+   int interpolation;
+   int ipp_antialiasing;
+   
+   int ipp_threading;
+   Ipp32u ipp_threads;
+   int ipp_data_type; //ipp8u or ipp16u
+   
+   //ipp library functions pointers to switch between interpolation types and  8/16 bits data
+   ippResize_YUV420_ptr ipp_resize_yuv420_func;
+   ippiResize_C1R_L_ptr ippi_resize_c1r_func;
+   ippiResize_border_C1R_L_ptr ippi_resize_border_c1r_func;
+   
+   avfilter_action_func *ippi_resize_y420_lt_ptr;
+   avfilter_action_func *ippi_resize_uv420_lt_ptr;
+   
+   IppiResizeSpec *ipp_spec;
+   IppiResizeSpec *ipp_spec_uv; //2 times smaller components
+   
+   Ipp8u* src_yuv[3]; //all components source ptr
+   Ipp8u* dst_yuv[3]; //all components dst ptr
+   int src_step[3];
+   int dst_step[3];
+   
+   IppiSizeL src_size;
+   IppiSizeL src_size_uv;
+   IppiSizeL dst_size;
+   IppiSizeL dst_size_uv;
+   IppResizeInfo_LT tile_resize_info;
+   IppResizeInfo_LT tile_resize_info_uv;
+   
+   unsigned char *init_buffer;
+   unsigned char **pbuffer_y;
+   unsigned char **pbuffer_u;
+   unsigned char **pbuffer_v;
+} IPPScaleContext;
+
+AVFilter ff_vf_scale_ipp;
+
+#define IPP_ALIGNED_SIZE(size, align) (((size)+(align)-1)&~((align)-1))
+/* Heuristic constants for image tiling */
+#define RESIZE_NEAREST_MIN_PIX_NUMBER     (256 * 128)
+#define RESIZE_LINEAR_MIN_PIX_NUMBER      (256 * 224)
+#define RESIZE_CUBIC_MIN_PIX_NUMBER       (256 * 224)
+#define RESIZE_LANCZOS_MIN_PIX_NUMBER     RESIZE_CUBIC_MIN_PIX_NUMBER
+#define RESIZE_SUPER_MIN_PIX_NUMBER       (256 * 192)
+
+static const IppiSizeL RESIZE_NEAREST_MIN_TILE_SIZE = { 1,1 };
+static const IppiSizeL RESIZE_LINEAR_MIN_TILE_SIZE = { 8,8 };
+static const IppiSizeL RESIZE_CUBIC_MIN_TILE_SIZE = { 16,16 };
+static const IppiSizeL RESIZE_LANCZOS_MIN_TILE_SIZE = { 24,24 };
+static const IppiSizeL RESIZE_SUPER_MIN_TILE_SIZE = { 16,16 };
+
+static IppStatus ipp_resize_get_tilebuffer_size(IppiResizeSpec *ipp_spec, IppResizeInfo_LT* tile_resize_info)
+{
+    IppStatus status = ippStsNoErr;
+    IppSizeL  s0 = 0, s1 = 0;
+    /* compute work buffer for each thread/tile */
+    /* if split is done by rows and cols considering the last col could be wider
+    and/or the last raw could be higher we need to use max buffer size for all tiles */
+    if ((tile_resize_info->split.y > 1) && (tile_resize_info->split.x > 1)) {
+        IppiSizeL maxTileSize = {IPP_MAX(tile_resize_info->tile_size.width, tile_resize_info->last_tile_size.width),
+                             IPP_MAX(tile_resize_info->tile_size.height, tile_resize_info->last_tile_size.height)};
+        status = ippiResizeGetBufferSize_L(ipp_spec, maxTileSize, 1, &s0);
+    } else {
+        /* the single row or column */
+        status = ippiResizeGetBufferSize_L(ipp_spec, tile_resize_info->tile_size, 1, &s0);
+    }
+    status = ippiResizeGetBufferSize_L(ipp_spec, tile_resize_info->last_tile_size, 1, &s1);
+
+    tile_resize_info->tile_buffer_size = IPP_ALIGNED_SIZE(s0, 64);
+    tile_resize_info->last_tile_buffer_size = IPP_ALIGNED_SIZE(s1, 64);
+    return status;
+}
+
+static void split(IppSizeL splitSize, IppSizeL multiplier, IppSizeL threshold,  IppSizeL *tileLength, IppSizeL *residual, IppSizeL *num_tiles)
+{
+    IppSizeL k, size, res;
+    size = *tileLength;
+    res = *residual;
+    if (size * multiplier > threshold)
+    {
+        k = *num_tiles;
+
+        while (size * multiplier > threshold)
+        {
+            if (splitSize / (k + 1) == 0)
+                break;
+            k++;
+            size = splitSize / k;
+            res = splitSize % k;
+        }
+        if (res > size) {
+            k += res / size;
+            size = splitSize / k;
+            res = splitSize % k;
+        }
+        *tileLength = size;
+        *residual = res;
+        *num_tiles = k;
+    }
+}
+
+static void get_tilesize_simple(IppiSizeL roiSize, IppSizeL min_item_number, IppiSizeL minTileSize, IppiSizeL *pTileSize, Ipp32u numThreads)
+{
+    if (roiSize.width * roiSize.height <= min_item_number || numThreads == 1) {
+        pTileSize->width = roiSize.width;
+        pTileSize->height = roiSize.height;
+    } else {
+        IppiSizeL tile_size;
+        IppiSizeL residualSize = { 0 };
+        IppSizeL  cols = 1, rows = 1;
+        IppSizeL  desired_item_number;
+
+        tile_size.width = roiSize.width;
+        tile_size.height = roiSize.height;
+
+        desired_item_number = tile_size.width * tile_size.height / numThreads;
+        desired_item_number = IPP_MAX(min_item_number, desired_item_number);
+        desired_item_number = IPP_MAX(tile_size.width * minTileSize.height, desired_item_number);
+
+        /* splitting by row */
+        split(roiSize.height, tile_size.width, desired_item_number,  &tile_size.height, &residualSize.height, &rows);
+
+        desired_item_number = tile_size.width * tile_size.height * rows / numThreads;
+        desired_item_number = IPP_MAX(min_item_number, desired_item_number);
+        desired_item_number = IPP_MAX(tile_size.height * minTileSize.width, desired_item_number);
+
+        /* splitting by col */
+        split(roiSize.width, tile_size.height, desired_item_number, &tile_size.width, &residualSize.width, &cols);
+
+        pTileSize->width = tile_size.width;
+        pTileSize->height = tile_size.height;
+    }
+    return;
+}
+
+static void split_to_tiles(IppiSizeL roiSize, IppiSizeL tile_size, IppiPointL *pSplit, IppiSizeL *pTileSize, IppiSizeL *pLastSize)
+{
+    IppSizeL width = roiSize.width;
+    IppSizeL height = roiSize.height;
+    IppSizeL width_tile = tile_size.width;
+    IppSizeL height_tile = tile_size.height;
+    IppSizeL width_last, height_last;
+    IppSizeL addition_x, addition_y, addition_last_x, addition_last_y;
+    if (width_tile > width)   width_tile = width;
+    if (height_tile > height) height_tile = height;
+    width_last = width % width_tile;
+    height_last = height % height_tile;
+    (*pSplit).x = (IppSizeL)(width / width_tile);
+    (*pSplit).y = (IppSizeL)(height / height_tile);
+    (*pTileSize).height = height_tile;
+
+    if ((height_last < (*pSplit).y) && height_last) {
+        (*pTileSize).width = width_tile;
+        (*pTileSize).height = height_tile;
+        (*pLastSize).width = width_tile + width_last;
+        (*pLastSize).height = height_tile + height_last;
+    } else {
+        addition_x = width_last / (*pSplit).x;
+        addition_y = height_last / (*pSplit).y;
+        (*pTileSize).width = width_tile + addition_x;
+        (*pTileSize).height = height_tile + addition_y;
+        addition_last_x = width_last % ((*pSplit).x);
+        addition_last_y = height_last % ((*pSplit).y);
+        (*pLastSize).width = (*pTileSize).width + addition_last_x;
+        (*pLastSize).height = height_tile + addition_y + addition_last_y;
+    }
+}
+
+static IppStatus get_tile_params_by_index(IppSizeL index, IppiPointL split_image, IppiSizeL tile_size, IppiSizeL tail_size, IppiPointL *pTileOffset, IppiSizeL *pTileSize)
+{
+    IppSizeL i, j;
+    IppSizeL first_greater_index = 1;
+    int k;
+    int add;
+    if (pTileOffset == NULL || pTileSize == NULL) return ippStsNullPtrErr;
+    i = index / split_image.x;
+    j = index % split_image.x;
+
+    if (i >= split_image.y)
+        return ippStsSizeErr;
+
+    (*pTileOffset).x = j * tile_size.width;
+    (*pTileOffset).y = i * tile_size.height;
+    (*pTileSize).width = (j < split_image.x - 1) ? tile_size.width : tail_size.width;
+    first_greater_index = tail_size.height - tile_size.height;
+    k = split_image.y - index;
+    add = split_image.y - first_greater_index;
+    if ((first_greater_index < split_image.y) && (tail_size.height > tile_size.height) && (first_greater_index > 0)) {
+        if (i < split_image.y - first_greater_index) {
+            (*pTileSize).height = tile_size.height;
+        } else {
+            (*pTileSize).height = (tile_size.height + 1);
+            if (k < first_greater_index)
+                (*pTileOffset).y = i * tile_size.height + (i - add) * 1;
+            else
+                (*pTileOffset).y = i * tile_size.height;
+        }
+    } else {
+        (*pTileSize).height = (i < split_image.y - 1) ? tile_size.height : tail_size.height;
+    }
+    return ippStsNoErr;
+}
+
+static void get_tile_by_index(int index, IppiPointL split_image, IppiSizeL tile_size, IppiSizeL tail_size, IppiPointL *pTileOffset, IppiSizeL *pTileSize)
+{
+    IppiPointL tileOffsetL = { 0 };
+    get_tile_params_by_index(index, split_image, tile_size, tail_size, &tileOffsetL, pTileSize);
+    pTileOffset->x = (int)tileOffsetL.x;
+    pTileOffset->y = (int)tileOffsetL.y;
+}
+
+static Ipp8u* get_image_pointer_8u(const Ipp8u* pData, IppSizeL dataStep, IppSizeL w, IppSizeL h, int ipp_data_type)
+{
+    Ipp8u* ret_ptr = 0;
+    if (ipp_data_type == ipp8u) {
+        ret_ptr = (Ipp8u*)((Ipp8u*)(pData + w) + h * dataStep);
+    } else { //Ipp16u type
+        ret_ptr = (Ipp8u*)((Ipp8u*)((Ipp16u*)(pData) + w) + h * dataStep);
+    }
+    return ret_ptr;
+}
+
+static int alloc_internal_ipp_buffers(unsigned char **pbuffer[], IppResizeInfo_LT* tile_resize_info, int ipp_data_type)
+{
+    int buffer_size = tile_resize_info->tile_buffer_size;
+    *pbuffer = av_calloc(tile_resize_info->num_tiles, sizeof(unsigned char*));
+    for (int i = 0; i < tile_resize_info->num_tiles; i++) {
+        if (i == (tile_resize_info->num_tiles - 1))
+            buffer_size = tile_resize_info->last_tile_buffer_size;
+        if (ipp_data_type == ipp8u)
+            (*pbuffer)[i] = ippsMalloc_8u(buffer_size);
+        else
+            (*pbuffer)[i] = (unsigned char*)ippsMalloc_16u(buffer_size);
+
+        if ((*pbuffer)[i] == 0)
+            return AVERROR(ENOMEM);
+    }
+    return 0;
+}
+
+static int ipp_resize_uv420_lt_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    IPPScaleContext *scale = ctx->priv;
+    IppStatus thread_status;
+    const Ipp8u *src_roi_uv = 0;
+    Ipp8u       *dst_roi_uv = 0;
+    IppiSizeL     roi_size_uv = { 0 };
+    IppiPointL    roi_offset_uv, src_roi_offset_uv;
+    IppResizeInfo_LT *tile_info_uv = &scale->tile_resize_info_uv;
+
+    if (jobnr < tile_info_uv->num_tiles) { //U
+        get_tile_by_index(jobnr, tile_info_uv->split, tile_info_uv->tile_size, tile_info_uv->last_tile_size, &roi_offset_uv, &roi_size_uv);
+        thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec_uv, roi_offset_uv, &src_roi_offset_uv);
+        if (thread_status < 0)
+            return thread_status;
+
+        src_roi_uv = get_image_pointer_8u(scale->src_yuv[1], scale->src_step[1], src_roi_offset_uv.x, src_roi_offset_uv.y, scale->ipp_data_type);
+        dst_roi_uv = get_image_pointer_8u(scale->dst_yuv[1], scale->dst_step[1], roi_offset_uv.x, roi_offset_uv.y, scale->ipp_data_type);
+        thread_status = scale->ippi_resize_c1r_func(src_roi_uv, (IppSizeL)scale->src_step[1], dst_roi_uv, (IppSizeL)scale->dst_step[1], roi_offset_uv, roi_size_uv, scale->ipp_spec_uv, scale->pbuffer_u[jobnr]);
+    } else { //V
+        get_tile_by_index(jobnr - tile_info_uv->num_tiles, tile_info_uv->split, tile_info_uv->tile_size, tile_info_uv->last_tile_size, &roi_offset_uv, &roi_size_uv);
+        thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec_uv, roi_offset_uv, &src_roi_offset_uv);
+        if (thread_status < 0)
+            return thread_status;
+
+        src_roi_uv = get_image_pointer_8u(scale->src_yuv[2], scale->src_step[2], src_roi_offset_uv.x, src_roi_offset_uv.y, scale->ipp_data_type);
+        dst_roi_uv = get_image_pointer_8u(scale->dst_yuv[2], scale->dst_step[2], roi_offset_uv.x, roi_offset_uv.y, scale->ipp_data_type);
+        thread_status = scale->ippi_resize_c1r_func(src_roi_uv, (IppSizeL)scale->src_step[2], dst_roi_uv, (IppSizeL)scale->dst_step[2], roi_offset_uv, roi_size_uv, scale->ipp_spec_uv, scale->pbuffer_v[jobnr- tile_info_uv->num_tiles]);
+    }
+    return thread_status;
+}
+
+static int ipp_resize_y420_lt_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    IPPScaleContext *scale = ctx->priv;
+    IppStatus thread_status;
+    IppResizeInfo_LT *tile_info = &scale->tile_resize_info;
+
+    if (jobnr < tile_info->num_tiles) {
+        /* Pointers to the source and destination tiles */
+        const Ipp8u *src_roi = 0;
+        Ipp8u       *dst_roi = 0;
+        /* Source and destination tile ROI parameters */
+        IppiPointL     roiOffset = { 0 }, srcRoiOffset = { 0 };
+        IppiSizeL      roiSize = { 0 };
+
+        get_tile_by_index(jobnr, tile_info->split, tile_info->tile_size, tile_info->last_tile_size, &roiOffset, &roiSize);
+
+        thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec, roiOffset, &srcRoiOffset);
+        if (thread_status < 0)
+            return thread_status;
+
+        /* Compute pointers to ROIs */
+        src_roi = get_image_pointer_8u(scale->src_yuv[0], scale->src_step[0], srcRoiOffset.x, srcRoiOffset.y, scale->ipp_data_type);
+        dst_roi = get_image_pointer_8u(scale->dst_yuv[0], scale->dst_step[0], roiOffset.x, roiOffset.y, scale->ipp_data_type);
+        thread_status = scale->ippi_resize_c1r_func(src_roi, (IppSizeL)scale->src_step[0], dst_roi, (IppSizeL)scale->dst_step[0], roiOffset, roiSize, scale->ipp_spec, scale->pbuffer_y[jobnr]);
+    } else {
+        thread_status = ipp_resize_uv420_lt_func(ctx, NULL, jobnr- tile_info->num_tiles, nb_jobs); //start from zero job
+    }
+    return thread_status;
+}
+
+static int ipp_resize_uv420_border_lt_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    IPPScaleContext *scale = ctx->priv;
+    IppStatus thread_status;
+    const Ipp8u *src_roi_uv = 0;
+    Ipp8u       *dst_roi_uv = 0;
+    IppiSizeL     roi_size_uv = { 0 };
+    IppiPointL    roi_offset_uv, src_roi_offset_uv;
+    IppResizeInfo_LT *tile_info_uv = &scale->tile_resize_info_uv;
+
+    if (jobnr < tile_info_uv->num_tiles) {
+        get_tile_by_index(jobnr, tile_info_uv->split, tile_info_uv->tile_size, tile_info_uv->last_tile_size, &roi_offset_uv, &roi_size_uv);
+
+        thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec_uv, roi_offset_uv, &src_roi_offset_uv);
+        if (thread_status < 0) return thread_status;
+        /* Compute pointers to ROIs */
+        src_roi_uv = get_image_pointer_8u(scale->src_yuv[1], scale->src_step[1], src_roi_offset_uv.x, src_roi_offset_uv.y, scale->ipp_data_type);
+        dst_roi_uv = get_image_pointer_8u(scale->dst_yuv[1], scale->dst_step[1], roi_offset_uv.x, roi_offset_uv.y, scale->ipp_data_type);
+        thread_status = scale->ippi_resize_border_c1r_func(src_roi_uv, (IppSizeL)scale->src_step[1], dst_roi_uv, (IppSizeL)scale->dst_step[1], roi_offset_uv, roi_size_uv, ippBorderRepl, NULL, scale->ipp_spec_uv, scale->pbuffer_u[jobnr]);
+    } else {
+        /* Compute pointers to ROIs */
+        get_tile_by_index(jobnr - tile_info_uv->num_tiles, tile_info_uv->split, tile_info_uv->tile_size, tile_info_uv->last_tile_size, &roi_offset_uv, &roi_size_uv);
+
+        thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec_uv, roi_offset_uv, &src_roi_offset_uv);
+        if (thread_status < 0)
+            return thread_status;
+
+        src_roi_uv = get_image_pointer_8u(scale->src_yuv[2], scale->src_step[2], src_roi_offset_uv.x, src_roi_offset_uv.y, scale->ipp_data_type);
+        dst_roi_uv = get_image_pointer_8u(scale->dst_yuv[2], scale->dst_step[2], roi_offset_uv.x, roi_offset_uv.y, scale->ipp_data_type);
+        thread_status = scale->ippi_resize_border_c1r_func(src_roi_uv, (IppSizeL)scale->src_step[2], dst_roi_uv, (IppSizeL)scale->dst_step[2], roi_offset_uv, roi_size_uv, ippBorderRepl, NULL, scale->ipp_spec_uv, scale->pbuffer_v[jobnr - tile_info_uv->num_tiles]);
+    }
+    return thread_status;
+}
+
+static int ipp_resize_y420_border_lt_func(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    IPPScaleContext *scale = ctx->priv;
+    IppStatus thread_status;
+    IppResizeInfo_LT *tile_info = &scale->tile_resize_info;
+
+    if (jobnr < tile_info->num_tiles) {
+
+        /* Here roi means particular tile used */
+        const Ipp8u *src_roi = 0;
+        Ipp8u       *dst_roi = 0;
+        /* Source and destination tile ROI parameters */
+        IppiPointL     roiOffset = { 0 }, srcRoiOffset = { 0 };
+        IppiSizeL      roiSize = { 0 };
+        get_tile_by_index(jobnr, tile_info->split, tile_info->tile_size, tile_info->last_tile_size, &roiOffset, &roiSize);
+
+        thread_status = ippiResizeGetSrcOffset_L(scale->ipp_spec, roiOffset, &srcRoiOffset);
+        if (thread_status < 0)
+            return thread_status;
+
+        /* Compute pointers to ROIs */
+        src_roi = get_image_pointer_8u(scale->src_yuv[0], scale->src_step[0], srcRoiOffset.x, srcRoiOffset.y, scale->ipp_data_type);
+        dst_roi = get_image_pointer_8u(scale->dst_yuv[0], scale->dst_step[0], roiOffset.x, roiOffset.y, scale->ipp_data_type);
+        thread_status = scale->ippi_resize_border_c1r_func(src_roi, (IppSizeL)scale->src_step[0], dst_roi, (IppSizeL)scale->dst_step[0], roiOffset, roiSize, ippBorderRepl, NULL, scale->ipp_spec, scale->pbuffer_y[jobnr]);
+
+    } else {
+       thread_status = ipp_resize_uv420_border_lt_func(ctx, NULL, jobnr - tile_info->num_tiles, nb_jobs);//start from zero
+    }
+    return thread_status;
+}
+
+//single threaded version - all channels resize
+static IppStatus ipp_resize_yuv420_l(IPPScaleContext *scale, const Ipp8u* src_yuv[], int* src_step, Ipp8u* dst_yuv[], int* dst_step)
+{
+    IppiPointL dst_roi_offset = { 0, 0 };
+    IppStatus ipp_sts, ipp_sts_u, ipp_sts_v;
+    ipp_sts = scale->ippi_resize_c1r_func(src_yuv[0], (IppSizeL)src_step[0], dst_yuv[0], (IppSizeL)dst_step[0], dst_roi_offset, scale->dst_size, scale->ipp_spec, scale->pbuffer_y[0]);
+
+    ipp_sts_u = scale->ippi_resize_c1r_func(src_yuv[1], (IppSizeL)src_step[1], dst_yuv[1], (IppSizeL)dst_step[1], dst_roi_offset, scale->dst_size_uv, scale->ipp_spec_uv, scale->pbuffer_y[0]);
+    ipp_sts_v = scale->ippi_resize_c1r_func(src_yuv[2], (IppSizeL)src_step[2], dst_yuv[2], (IppSizeL)dst_step[2], dst_roi_offset, scale->dst_size_uv, scale->ipp_spec_uv, scale->pbuffer_y[0]);
+
+    return ipp_sts | ipp_sts_u | ipp_sts_v;
+}
+
+//single threaded version - all channels resize
+static IppStatus ipp_resize_yuv420_border_l(IPPScaleContext *scale, const Ipp8u* src_yuv[], int* src_step, Ipp8u* dst_yuv[], int* dst_step)
+{
+    IppiPointL dst_roi_offset = { 0, 0 };
+    IppStatus ipp_sts, ipp_sts_u, ipp_sts_v;
+    ipp_sts = scale->ippi_resize_border_c1r_func(src_yuv[0], src_step[0], dst_yuv[0], dst_step[0], dst_roi_offset, scale->dst_size, ippBorderRepl, NULL, scale->ipp_spec, scale->pbuffer_y[0]);
+
+    ipp_sts_u = scale->ippi_resize_border_c1r_func(src_yuv[1], src_step[1], dst_yuv[1], dst_step[1], dst_roi_offset, scale->dst_size_uv, ippBorderRepl, NULL, scale->ipp_spec_uv, scale->pbuffer_y[0]);
+    ipp_sts_v = scale->ippi_resize_border_c1r_func(src_yuv[2], src_step[2], dst_yuv[2], dst_step[2], dst_roi_offset, scale->dst_size_uv, ippBorderRepl, NULL, scale->ipp_spec_uv, scale->pbuffer_y[0]);
+
+    return ipp_sts | ipp_sts_u | ipp_sts_v;
+}
+
+// Init ipp functions pointers
+// Get sizes for internal and initialization buffers
+static IppStatus ipp_init_resize(AVFilterLink *inlink, AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    IPPScaleContext *scale = ctx->priv;
+    IppSizeL spec_size=0, init_size=0, buffer_size=0;
+    IppSizeL spec_size_uv=0, init_size_uv=0,buffer_size_uv=0;
+    IppStatus ipp_sts, ipp_sts_uv;
+    IppSizeL min_item_number=0;
+    IppiSizeL minTileSize = { 0,0 };
+
+    //if scale->ipp_threading==0  don't use threading independently on other params
+    if (scale->ipp_threading == 1) {
+        if (ctx->nb_threads > 1) {
+            scale->ipp_threads = ff_filter_get_nb_threads(ctx);
+        } else {
+            if (ctx->nb_threads <= 0)
+                scale->ipp_threads = av_cpu_count();
+            if (ctx->nb_threads == 1)
+                scale->ipp_threading = 0;
+        }
+    }
+
+    scale->src_size.width = inlink->w;
+    scale->src_size.height = inlink->h;
+    scale->src_size_uv.width = inlink->w >> 1;
+    scale->src_size_uv.height = inlink->h >> 1;
+
+    scale->dst_size.width = outlink->w;
+    scale->dst_size.height = outlink->h;
+    scale->dst_size_uv.width = outlink->w >> 1;
+    scale->dst_size_uv.height = outlink->h >> 1;
+
+    scale->ipp_data_type = ipp8u;
+    if (inlink->format == AV_PIX_FMT_YUV420P10LE) {
+        scale->ipp_data_type = ipp16u;
+    }
+
+    scale->ippi_resize_c1r_func = NULL;
+    scale->ippi_resize_border_c1r_func = NULL;
+
+    ipp_sts = ippiResizeGetSize_L(scale->src_size, scale->dst_size, scale->ipp_data_type, scale->interpolation, scale->ipp_antialiasing,
+                 &spec_size, &init_size);
+    if (ipp_sts) {
+        av_log(ctx, AV_LOG_ERROR, "ippiResizeGetSize Y error: %d\n", ipp_sts);
+        return ipp_sts;
+    }
+    if (!scale->ipp_spec)
+        scale->ipp_spec = (IppiResizeSpec *)ippsMalloc_8u(spec_size);
+    if (!scale->ipp_spec) {
+        av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize Y spec \n");
+        return AVERROR(ENOMEM);
+    }
+
+    ipp_sts_uv = ippiResizeGetSize_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, scale->interpolation, scale->ipp_antialiasing,
+                   &spec_size_uv, &init_size_uv);
+    if (ipp_sts_uv) {
+        av_log(ctx, AV_LOG_ERROR, "ippiResizeGetSize UV error: %d\n", ipp_sts_uv);
+        return ipp_sts;
+    }
+    if (!scale->ipp_spec_uv)
+        scale->ipp_spec_uv = (IppiResizeSpec *)ippsMalloc_8u(spec_size_uv);
+    if (!scale->ipp_spec_uv) {
+        av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize UV spec \n");
+        return AVERROR(ENOMEM);
+    }
+
+    // allocate initialization buffer, use the max Y capacity
+    init_size = init_size > init_size_uv ? init_size : init_size_uv;
+    if (init_size &&
+        ((scale->interpolation == ippCubic) || (scale->interpolation == ippLanczos) ||
+         ((scale->ipp_antialiasing == 1) && (scale->interpolation == ippLinear)))) {
+        if (!scale->init_buffer){
+            if (scale->ipp_data_type == ipp8u)
+                scale->init_buffer = ippsMalloc_8u(init_size);
+            else
+                scale->init_buffer = (unsigned char*)ippsMalloc_16u(init_size);
+        }
+        if (!scale->init_buffer) {
+            av_log(ctx, AV_LOG_ERROR,
+                "Cannot allocate memory for resize init buffer");
+            return AVERROR(ENOMEM);
+        }
+    }
+    // init ipp resizer
+    if (scale->interpolation == ippSuper) {
+        ipp_sts = ippiResizeSuperInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, scale->ipp_spec);
+        ipp_sts_uv = ippiResizeSuperInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type,  scale->ipp_spec_uv);
+        scale->ippi_resize_y420_lt_ptr = ipp_resize_y420_lt_func;
+        scale->ippi_resize_uv420_lt_ptr = ipp_resize_uv420_lt_func;
+        scale->ipp_resize_yuv420_func = ipp_resize_yuv420_l;
+        min_item_number =  RESIZE_SUPER_MIN_PIX_NUMBER;
+        minTileSize = RESIZE_SUPER_MIN_TILE_SIZE;
+        if (scale->ipp_data_type == ipp8u)
+            scale->ippi_resize_c1r_func = ippiResizeSuper_8u_C1R_L;
+        else
+            scale->ippi_resize_c1r_func = (ippiResize_C1R_L_ptr)ippiResizeSuper_16u_C1R_L;
+    } else if (scale->interpolation == ippLinear) {
+        if (scale->ipp_antialiasing == 1) {
+            ipp_sts = ippiResizeAntialiasingLinearInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type,  scale->ipp_spec,
+                scale->init_buffer);
+            ipp_sts_uv = ippiResizeAntialiasingLinearInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type,  scale->ipp_spec_uv,
+                scale->init_buffer);
+            if (scale->ipp_data_type == ipp8u)
+                scale->ippi_resize_border_c1r_func = ippiResizeAntialiasing_8u_C1R_L;
+            if (scale->ipp_data_type == ipp16u)
+                scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeAntialiasing_16u_C1R_L;
+        } else {
+            ipp_sts = ippiResizeLinearInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type,  scale->ipp_spec);
+            ipp_sts_uv = ippiResizeLinearInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type,  scale->ipp_spec_uv);
+            if (scale->ipp_data_type == ipp8u)
+                scale->ippi_resize_border_c1r_func = ippiResizeLinear_8u_C1R_L;
+            if (scale->ipp_data_type == ipp16u)
+                scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeLinear_16u_C1R_L;
+        }
+        scale->ippi_resize_y420_lt_ptr = ipp_resize_y420_border_lt_func;
+        scale->ippi_resize_uv420_lt_ptr = ipp_resize_uv420_border_lt_func;
+        scale->ipp_resize_yuv420_func = ipp_resize_yuv420_border_l;
+        min_item_number = RESIZE_LINEAR_MIN_PIX_NUMBER;
+        minTileSize = RESIZE_LINEAR_MIN_TILE_SIZE;
+    } else if (scale->interpolation == ippLanczos) {
+        // here the simplest Lanczos filtering with numLobes =2 is used.
+        if (scale->ipp_antialiasing == 1) {
+            ipp_sts = ippiResizeAntialiasingLanczosInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, 2, scale->ipp_spec,
+                scale->init_buffer);
+            ipp_sts_uv = ippiResizeAntialiasingLanczosInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, 2, scale->ipp_spec_uv,
+                scale->init_buffer);
+            if (scale->ipp_data_type == ipp8u)
+                scale->ippi_resize_border_c1r_func = ippiResizeAntialiasing_8u_C1R_L;
+            else
+                scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeAntialiasing_16u_C1R_L;
+        } else {
+            ipp_sts = ippiResizeLanczosInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, 2, scale->ipp_spec,
+                scale->init_buffer);
+            ipp_sts_uv = ippiResizeLanczosInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, 2, scale->ipp_spec_uv,
+                scale->init_buffer);
+            if (scale->ipp_data_type == ipp8u)
+                scale->ippi_resize_border_c1r_func = ippiResizeLanczos_8u_C1R_L;
+            else
+                scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeLanczos_16u_C1R_L;
+        }
+        scale->ippi_resize_y420_lt_ptr = ipp_resize_y420_border_lt_func;
+        scale->ippi_resize_uv420_lt_ptr = ipp_resize_uv420_border_lt_func;
+        scale->ipp_resize_yuv420_func = ipp_resize_yuv420_border_l;
+        min_item_number = RESIZE_LANCZOS_MIN_PIX_NUMBER;
+        minTileSize = RESIZE_LANCZOS_MIN_TILE_SIZE;
+    } else if (scale->interpolation == ippCubic) {
+        // cubic  IPPI_INTER_CUBIC2P_CATMULLROM is used  (B=0, C=1/2)
+        if (scale->ipp_antialiasing == 1) {
+            ipp_sts = ippiResizeAntialiasingCubicInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, 0, 0.5, scale->ipp_spec,
+                scale->init_buffer);
+            ipp_sts_uv = ippiResizeAntialiasingCubicInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, 0, 0.5, scale->ipp_spec_uv,
+                scale->init_buffer);
+            if (scale->ipp_data_type == ipp8u)
+                scale->ippi_resize_border_c1r_func = ippiResizeAntialiasing_8u_C1R_L;
+            else
+                scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeAntialiasing_16u_C1R_L;
+        } else {
+            ipp_sts = ippiResizeCubicInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, 0, 0.5, scale->ipp_spec,
+                scale->init_buffer);
+            ipp_sts_uv = ippiResizeCubicInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, 0, 0.5, scale->ipp_spec_uv,
+                scale->init_buffer);
+            if (scale->ipp_data_type == ipp8u)
+                scale->ippi_resize_border_c1r_func = ippiResizeCubic_8u_C1R_L;
+            else
+                scale->ippi_resize_border_c1r_func = (ippiResize_border_C1R_L_ptr)ippiResizeCubic_16u_C1R_L;
+        }
+        scale->ippi_resize_y420_lt_ptr = ipp_resize_y420_border_lt_func;
+        scale->ippi_resize_uv420_lt_ptr = ipp_resize_uv420_border_lt_func;
+        scale->ipp_resize_yuv420_func = ipp_resize_yuv420_border_l;
+        min_item_number = RESIZE_CUBIC_MIN_PIX_NUMBER;
+        minTileSize = RESIZE_CUBIC_MIN_TILE_SIZE;
+    } else if (scale->interpolation == ippNearest) {
+        ipp_sts = ippiResizeNearestInit_L(scale->src_size, scale->dst_size, scale->ipp_data_type, scale->ipp_spec);
+        ipp_sts_uv = ippiResizeNearestInit_L(scale->src_size_uv, scale->dst_size_uv, scale->ipp_data_type, scale->ipp_spec_uv);
+        scale->ipp_resize_yuv420_func = ipp_resize_yuv420_l;
+        scale->ippi_resize_y420_lt_ptr = ipp_resize_y420_lt_func;
+        scale->ippi_resize_uv420_lt_ptr = ipp_resize_uv420_lt_func;
+        min_item_number = RESIZE_NEAREST_MIN_PIX_NUMBER;
+        minTileSize = RESIZE_NEAREST_MIN_TILE_SIZE;
+        if (scale->ipp_data_type == ipp8u)
+            scale->ippi_resize_c1r_func = ippiResizeNearest_8u_C1R_L;
+        else
+            scale->ippi_resize_c1r_func = (ippiResize_C1R_L_ptr)ippiResizeNearest_16u_C1R_L;
+    }
+    if (ipp_sts) {
+        av_log(ctx, AV_LOG_ERROR, "ippiResizeInit Y error: %d\n", ipp_sts);
+        return ipp_sts;
+    }
+    if (ipp_sts_uv) {
+        av_log(ctx, AV_LOG_ERROR, "ippiResizeInit UV error: %d\n", ipp_sts_uv);
+        return ipp_sts;
+    }
+
+    if (scale->ipp_threading) {
+        /*Split to tiles !!*/
+        IppiSizeL tile_size, tileSizeUV;
+        int num_threads_y, num_threads_uv;
+        // 2/3 of the threads work on Y, 1/3 = 1/6 + 1/6 of threads work on U and V
+        num_threads_uv = (scale->ipp_threads > 6) ? scale->ipp_threads / 6 : 1;
+        num_threads_y =  (scale->ipp_threads > 3) ? scale->ipp_threads - 2 * num_threads_uv : 1;
+
+        get_tilesize_simple(scale->dst_size, min_item_number, minTileSize, &tile_size, num_threads_y);
+        split_to_tiles(scale->dst_size, tile_size, &scale->tile_resize_info.split, &scale->tile_resize_info.tile_size, &scale->tile_resize_info.last_tile_size);
+
+        //need to do it for UV planes (not just divide Y tile Size by 2) to deal with uneven tile sizes and num_tiles difference
+        get_tilesize_simple(scale->dst_size_uv, min_item_number, minTileSize, &tileSizeUV, num_threads_uv);
+        split_to_tiles(scale->dst_size_uv, tileSizeUV, &scale->tile_resize_info_uv.split, &scale->tile_resize_info_uv.tile_size, &scale->tile_resize_info_uv.last_tile_size);
+
+        scale->tile_resize_info.num_tiles = scale->tile_resize_info.split.x * scale->tile_resize_info.split.y;
+        scale->tile_resize_info_uv.num_tiles = scale->tile_resize_info_uv.split.x * scale->tile_resize_info_uv.split.y;
+        av_log(NULL, AV_LOG_INFO, "Intel IPP uses  %lld tiles (threads) for Y and %lld tiles for UV scale \n", scale->tile_resize_info.num_tiles,
+                scale->tile_resize_info_uv.num_tiles * 2);
+
+        ipp_sts = ipp_resize_get_tilebuffer_size(scale->ipp_spec, &scale->tile_resize_info);
+        ipp_sts_uv = ipp_resize_get_tilebuffer_size(scale->ipp_spec_uv, &scale->tile_resize_info_uv);
+    } else {
+        ipp_sts = ippiResizeGetBufferSize_L(scale->ipp_spec, scale->dst_size, 1, &buffer_size);
+        ipp_sts_uv = ippiResizeGetBufferSize_L(scale->ipp_spec_uv, scale->dst_size_uv, 1, &buffer_size_uv);
+        scale->tile_resize_info.last_tile_buffer_size = buffer_size;
+        scale->tile_resize_info_uv.last_tile_buffer_size = buffer_size_uv;
+    }
+    if (!scale->pbuffer_y) {
+        int err = alloc_internal_ipp_buffers(&scale->pbuffer_y, &scale->tile_resize_info, scale->ipp_data_type);
+            if (err) {
+                av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize buffer Y");
+                return AVERROR(ENOMEM);
+            }
+    }
+    if (!scale->pbuffer_u) {
+        int err = alloc_internal_ipp_buffers(&scale->pbuffer_u, &scale->tile_resize_info_uv, scale->ipp_data_type);
+        if (err) {
+            av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize buffer U");
+            return AVERROR(ENOMEM);
+        }
+    }
+    if (!scale->pbuffer_v) {
+        int err = alloc_internal_ipp_buffers(&scale->pbuffer_v, &scale->tile_resize_info_uv, scale->ipp_data_type);
+        if (err) {
+            av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory for resize buffer V");
+            return AVERROR(ENOMEM);
+        }
+    }
+    return ipp_sts;
+}
+
+static int ippscale_config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    IPPScaleContext *s = ctx->priv;
+    int w, h;
+    int ret;
+
+    if ((ret = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink,
+                                      &w, &h)) < 0)
+        return ret;
+
+    ff_scale_adjust_dimensions(inlink, &w, &h, s->force_original_aspect_ratio,
+                               s->force_divisible_by);
+
+    if (((int64_t)h * inlink->w) > INT_MAX || ((int64_t)w * inlink->h) > INT_MAX)
+        av_log(ctx, AV_LOG_ERROR,
+               "Rescaled value for width or height is too big.\n");
+
+    outlink->w = w;
+    outlink->h = h;
+
+    av_log(NULL, AV_LOG_INFO, "Intel IPP based scaling  w:%d h:%d -> w:%d h:%d, interpolation %d \n", inlink->w, inlink->h,
+           outlink->w, outlink->h, s->interpolation);
+    if ( (((s->interpolation == ippSuper) || (s->interpolation == ippNearest)) && s->ipp_antialiasing) ) {
+        s->ipp_antialiasing =0;
+        av_log(ctx, AV_LOG_WARNING,
+            "Supersampling and Nearest neighbor interpolations don't support antialiasing,"
+            "antialiasing is disabled.\n");
+    }
+
+    if ((s->interpolation == ippSuper) &&
+        ((outlink->w > inlink->w) || (outlink->h > inlink->h)) ){
+        s->interpolation = ippCubic;
+        av_log(ctx, AV_LOG_WARNING,
+               "Supersampling not supported for upscaling, using cubic "
+               "instead.\n");
+    }
+
+    if (inlink->sample_aspect_ratio.num)
+       ff_scale_adjust_dimensions(inlink, &w, &h, s->force_original_aspect_ratio,
+                             s->force_divisible_by);
+
+  if (((int64_t)h * inlink->w) > INT_MAX || ((int64_t)w * inlink->h) > INT_MAX)
+    av_log(ctx, AV_LOG_ERROR,
+           "Rescaled value for width or height is too big.\n");
+
+  outlink->w = w;
+  outlink->h = h;
+
+  av_log(NULL, AV_LOG_INFO, "Intel IPP based scaling  w:%d h:%d -> w:%d h:%d, interpolation %d \n", inlink->w, inlink->h,
+         outlink->w, outlink->h, s->interpolation);
+  if ( (((s->interpolation == ippSuper) || (s->interpolation == ippNearest)) && s->ipp_antialiasing) )
+  {
+      s->ipp_antialiasing =0;
+      av_log(ctx, AV_LOG_WARNING,
+          "Supersampling and Nearest neighbor interpolations don't support antialiasing,"
+          "antialiasing is disabled.\n");
+  }
+
+  if ((s->interpolation == ippSuper) &&
+      !((outlink->w <= inlink->w) && outlink->h <= inlink->h)) {
+    s->interpolation = ippLinear;
+    av_log(ctx, AV_LOG_WARNING,
+           "Supersampling not supported for upscaling, using linear"
+           "instead.\n");
+  }
+
+  if (inlink->sample_aspect_ratio.num)
+    outlink->sample_aspect_ratio =
+        av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h},
+                 inlink->sample_aspect_ratio);
+    else
+        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+    ipp_init_resize(inlink, outlink);
+
+    return 0;
+}
+
+static int check_exprs(AVFilterContext *ctx)
+{
+    IPPScaleContext *scale = ctx->priv;
+    unsigned vars_w[VARS_NB] = {0}, vars_h[VARS_NB] = {0};
+
+    if (!scale->w_pexpr && !scale->h_pexpr)
+        return AVERROR(EINVAL);
+
+    if (scale->w_pexpr)
+        av_expr_count_vars(scale->w_pexpr, vars_w, VARS_NB);
+    if (scale->h_pexpr)
+        av_expr_count_vars(scale->h_pexpr, vars_h, VARS_NB);
+
+    if (vars_w[VAR_OUT_W] || vars_w[VAR_OW]) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Width expression cannot be self-referencing: '%s'.\n",
+               scale->w_expr);
+        return AVERROR(EINVAL);
+    }
+
+    if (vars_h[VAR_OUT_H] || vars_h[VAR_OH]) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Height expression cannot be self-referencing: '%s'.\n",
+               scale->h_expr);
+        return AVERROR(EINVAL);
+    }
+
+    if ((vars_w[VAR_OUT_H] || vars_w[VAR_OH]) &&
+        (vars_h[VAR_OUT_W] || vars_h[VAR_OW])) {
+        av_log(ctx, AV_LOG_WARNING,
+               "Circular references detected for width '%s' and height '%s' - "
+               "possibly invalid.\n",
+               scale->w_expr, scale->h_expr);
+    }
+
+    return 0;
+}
+
+static int scale_parse_expr(AVFilterContext *ctx, char *str_expr,
+                            AVExpr **pexpr_ptr, const char *var,
+                            const char *args)
+{
+    IPPScaleContext *scale = ctx->priv;
+    int ret, is_inited = 0;
+    char *old_str_expr = NULL;
+    AVExpr *old_pexpr = NULL;
+
+    if (str_expr) {
+        old_str_expr = av_strdup(str_expr);
+        if (!old_str_expr)
+            return AVERROR(ENOMEM);
+        av_opt_set(scale, var, args, 0);
+    }
+
+    if (*pexpr_ptr) {
+        old_pexpr = *pexpr_ptr;
+        *pexpr_ptr = NULL;
+        is_inited = 1;
+    }
+
+    ret =
+        av_expr_parse(pexpr_ptr, args, var_names, NULL, NULL, NULL, NULL, 0, ctx);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Cannot parse expression for %s: '%s'\n", var,
+               args);
+        goto revert;
+    }
+
+    ret = check_exprs(ctx);
+    if (ret < 0)
+        goto revert;
+
+    if (is_inited && (ret = ippscale_config_props(ctx->outputs[0])) < 0)
+        goto revert;
+
+    av_expr_free(old_pexpr);
+    old_pexpr = NULL;
+    av_freep(&old_str_expr);
+
+    return 0;
+
+revert:
+    av_expr_free(*pexpr_ptr);
+    *pexpr_ptr = NULL;
+    if (old_str_expr) {
+        av_opt_set(scale, var, old_str_expr, 0);
+        av_free(old_str_expr);
+    }
+    if (old_pexpr)
+        *pexpr_ptr = old_pexpr;
+
+    return ret;
+}
+
+#if defined (_M_AMD64) || defined (__x86_64__)
+
+#define IPP_SSE2 ( ippCPUID_MMX | ippCPUID_SSE | ippCPUID_SSE2 )
+#define IPP_SSE3 ( IPP_SSE2 | ippCPUID_SSE3 )
+#define IPP_SSSE3 ( IPP_SSE3 | ippCPUID_SSSE3 )
+#define IPP_SSE4 IPP_SSSE3
+#define IPP_SSE42 ( IPP_SSSE3 | ippCPUID_SSE41 | ippCPUID_SSE42 )
+#define IPP_AVX ( IPP_SSE42 | ippCPUID_AVX | ippAVX_ENABLEDBYOS | ippCPUID_F16C )
+#define IPP_AVX2 ( IPP_AVX | ippCPUID_MOVBE | ippCPUID_AVX2 | ippCPUID_PREFETCHW )
+#define IPP_AVX512 ( IPP_AVX2 | ippCPUID_AVX512F | ippCPUID_AVX512CD | ippCPUID_AVX512VL | ippCPUID_AVX512BW | ippCPUID_AVX512DQ | ippAVX512_ENABLEDBYOS )
+
+#else
+//32-bit architecture
+#define IPP_SSE2 ( ippCPUID_MMX | ippCPUID_SSE | ippCPUID_SSE2 )
+#define IPP_SSE3 IPP_SSE2
+#define IPP_SSE3_MOVBE ( IPP_SSE2 | ippCPUID_SSE3 | ippCPUID_SSSE3 | ippCPUID_MOVBE )
+#define IPP_SSSE3 IPP_SSE2
+#define IPP_SSE4 IPP_SSE2
+#define IPP_SSE42 ( IPP_SSE2 | ippCPUID_SSE3 | ippCPUID_SSSE3 | ippCPUID_SSE41 | ippCPUID_SSE42 )
+#define IPP_AVX ( IPP_SSE42 | ippCPUID_AVX | ippAVX_ENABLEDBYOS |  ippCPUID_F16C )
+#define IPP_AVX2 ( IPP_AVX | ippCPUID_AVX2 | ippCPUID_MOVBE |  ippCPUID_PREFETCHW )
+#define IPP_AVX512 IPP_AVX2
+
+#endif
+
+
+static IppStatus ipp_init_preferred_cpu(void)
+{
+    IppStatus status = ippStsNoErr;
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_AVX512(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_AVX512);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_AVX2(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_AVX2);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_AVX(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_AVX);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_SSE42(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_SSE42);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_SSE4(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_SSE4);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_SSSE3(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_SSSE3);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    } else if (EXTERNAL_SSE3(cpu_flags)) {
+        status = ippSetCpuFeatures(IPP_SSE3);
+        if (status != ippStsNoErr)
+            status = ippStsErr;
+        return status;
+    }
+
+    return ippInit();
+}
+
+static av_cold int ippscale_init(AVFilterContext *ctx, AVDictionary **opts)
+{
+    IPPScaleContext *scale = ctx->priv;
+    int ret;
+    if (scale->w_expr && !scale->h_expr)
+        FFSWAP(char *, scale->w_expr, scale->size_str);
+
+    if (scale->size_str) {
+        char buf[32];
+        if ((ret = av_parse_video_size(&scale->w, &scale->h, scale->size_str)) < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid size '%s'\n", scale->size_str);
+            return ret;
+        }
+        snprintf(buf, sizeof(buf) - 1, "%d", scale->w);
+        av_opt_set(scale, "w", buf, 0);
+        snprintf(buf, sizeof(buf) - 1, "%d", scale->h);
+        av_opt_set(scale, "h", buf, 0);
+    }
+    if (!scale->w_expr)
+        av_opt_set(scale, "w", "iw", 0);
+    if (!scale->h_expr)
+        av_opt_set(scale, "h", "ih", 0);
+
+    ret = scale_parse_expr(ctx, NULL, &scale->w_pexpr, "width", scale->w_expr);
+    if (ret < 0)
+        return ret;
+
+    ret = scale_parse_expr(ctx, NULL, &scale->h_pexpr, "height", scale->h_expr);
+    if (ret < 0)
+        return ret;
+
+    scale->flags = 0;
+    scale->opts = *opts;
+    *opts = NULL;
+
+    scale->ipp_spec = NULL;
+    scale->ipp_spec_uv = NULL;
+    scale->init_buffer = NULL;
+    scale->pbuffer_y = NULL;
+    scale->pbuffer_u = NULL;
+    scale->pbuffer_v = NULL;
+    scale->tile_resize_info.num_tiles = 1;
+    scale->tile_resize_info_uv.num_tiles = 1;
+
+    ret = (int) ipp_init_preferred_cpu();
+
+    return ret;
+}
+
+static av_cold void ippscale_uninit(AVFilterContext *ctx)
+{
+    IPPScaleContext *scale = ctx->priv;
+
+    if (scale->pbuffer_y) {
+        for (int i = 0; i < scale->tile_resize_info.num_tiles; i++) {
+            if (scale->pbuffer_y[i])
+                ippsFree(scale->pbuffer_y[i]);
+        }
+        av_free(scale->pbuffer_y);
+    }
+
+   if (scale->pbuffer_u) {
+        for (int i = 0; i < scale->tile_resize_info_uv.num_tiles; i++) {
+            if (scale->pbuffer_u[i])
+                ippsFree(scale->pbuffer_u[i]);
+        }
+    av_free(scale->pbuffer_u);
+    }
+   if (scale->pbuffer_v) {
+        for (int i = 0; i < scale->tile_resize_info_uv.num_tiles; i++) {
+            if (scale->pbuffer_v[i])
+                ippsFree(scale->pbuffer_v[i]);
+        }
+    av_free(scale->pbuffer_v);
+    }
+
+    if (scale->init_buffer)
+        ippsFree(scale->init_buffer);
+    if (scale->ipp_spec)
+        ippsFree(scale->ipp_spec);
+    if (scale->ipp_spec_uv)
+        ippsFree(scale->ipp_spec_uv);
+
+    av_expr_free(scale->w_pexpr);
+    av_expr_free(scale->h_pexpr);
+    scale->w_pexpr = scale->h_pexpr = NULL;
+
+    av_dict_free(&scale->opts);
+}
+
+static int ippscale_query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *fmts_list = ff_make_format_list(supported_formats);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int scale_frame(AVFilterLink *link, AVFrame *in, AVFrame **frame_out)
+{
+    AVFilterContext *ctx = link->dst;
+    IPPScaleContext *scale = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    IppStatus ipp_sts=0;
+    *frame_out = NULL;
+
+    link->dst->inputs[0]->format = in->format;
+    link->dst->inputs[0]->w = in->width;
+    link->dst->inputs[0]->h = in->height;
+
+    link->dst->inputs[0]->sample_aspect_ratio.den = in->sample_aspect_ratio.den;
+    link->dst->inputs[0]->sample_aspect_ratio.num = in->sample_aspect_ratio.num;
+
+
+    if (in->interlaced_frame) {
+        // tbd do deinterlaced path
+        av_log(ctx, AV_LOG_ERROR, "Interlaced input format is not supported. \n");
+        return AVERROR(ENOSYS);
+    }
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    *frame_out = out;
+
+    av_frame_copy_props(out, in);
+    out->width = outlink->w;
+    out->height = outlink->h;
+
+    // the whole frame processing
+    scale->src_yuv[0] = in->data[0]; scale->src_yuv[1] = in->data[1]; scale->src_yuv[2] = in->data[2];
+    scale->dst_yuv[0] = out->data[0]; scale->dst_yuv[1] = out->data[1]; scale->dst_yuv[2] = out->data[2];
+    scale->src_step[0] = in->linesize[0]; scale->src_step[1] = in->linesize[1];  scale->src_step[2] = in->linesize[2];
+    scale->dst_step[0] = out->linesize[0]; scale->dst_step[1] = out->linesize[1];  scale->dst_step[2] = out->linesize[2];
+
+    if(scale->ipp_threading){
+        ctx->internal->execute(ctx, scale->ippi_resize_y420_lt_ptr, &scale, NULL, scale->tile_resize_info.num_tiles+2*scale->tile_resize_info_uv.num_tiles);
+    } else {
+        scale->ipp_resize_yuv420_func(scale, (const Ipp8u**)in->data, in->linesize, out->data, out->linesize);
+    }
+
+    if (ipp_sts) {
+        av_log(ctx, AV_LOG_ERROR, "ippiResize_C1R error: ipp_sts %d \n",
+               ipp_sts);
+        return ipp_sts;
+    }
+    av_frame_free(&in);
+    return 0;
+}
+
+static int ippscale_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    AVFilterContext *ctx = link->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    int ret;
+
+    ret = scale_frame(link, in, &out);
+    if (out)
+        return ff_filter_frame(outlink, out);
+
+    return ret;
+}
+
+#define OFFSET(x) offsetof(IPPScaleContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+#define TFLAGS                                                                 \
+  AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM |                      \
+      AV_OPT_FLAG_RUNTIME_PARAM
+
+static const AVOption ippscale_options[] = {
+    {"w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, .flags = TFLAGS},
+    {"width", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, .flags = TFLAGS},
+    {"h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING,.flags = TFLAGS},
+    {"height", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING,.flags = TFLAGS},
+    {"size", "set video size", OFFSET(size_str),AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS},
+    {"s", "set video size", OFFSET(size_str),AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS},
+    {"force_original_aspect_ratio", "Change w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio),AV_OPT_TYPE_INT, {.i64 = 0}, 0,2, FLAGS, "force_oar"},
+    {"disable",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
+    {"decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
+    {"increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
+    {"force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
+
+    {"ipp_threading", "Enables IPP scalinfg filter threading usage (ON by default)", OFFSET(ipp_threading),AV_OPT_TYPE_BOOL, {.i64 = 1},0,1, FLAGS},
+    {"ipp_antialiasing", "Enables internal IPP anti-aliasing (OFF by default)", OFFSET(ipp_antialiasing),AV_OPT_TYPE_BOOL, {.i64 = 0},0,1, FLAGS},
+
+    {"interpolation","Interpolation algorithm used for resizing", OFFSET(interpolation), AV_OPT_TYPE_INT,{.i64 = IPPI_INTER_LINEAR},0, INT_MAX,  FLAGS,  "interpolation"},
+    //same as interpolation above, for compatibility with the swscale  filter
+    {"flags", "Interpolation algorithm used for resizing", OFFSET(interpolation), AV_OPT_TYPE_INT,{.i64 = IPPI_INTER_LINEAR},0, INT_MAX,  FLAGS,  "interpolation"},
+    {"nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_NN}, 0, 0, FLAGS, "interpolation"},
+    {"linear", "linear", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_LINEAR}, 0, 0, FLAGS, "interpolation"},
+    {"bilinear", "linear by X and Y, same as linear", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_LINEAR}, 0, 0, FLAGS, "interpolation"},
+    {"cubic", "cubic (B=0, C=1/2)", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_CUBIC2P_CATMULLROM}, 0, 0, FLAGS, "interpolation"},
+    {"super", "supersampling", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_SUPER}, 0, 0, FLAGS, "interpolation"},
+    {"lanczos", "Lanczos", 0, AV_OPT_TYPE_CONST, {.i64 = IPPI_INTER_LANCZOS}, 0, 0, FLAGS, "interpolation"},
+    {NULL}};
+
+static const AVClass ippscale_class = {
+    .class_name = "ippscale",
+    .item_name = av_default_item_name,
+    .option = ippscale_options,
+    .version = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVFilterPad ippscale_inputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = ippscale_filter_frame,
+    },
+    {NULL}
+};
+
+static const AVFilterPad ippscale_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = ippscale_config_props,
+    },
+    {NULL}
+};
+
+AVFilter ff_vf_scale_ipp = {
+    .name = "scale_ipp",
+    .description = NULL_IF_CONFIG_SMALL(
+        "Intel Performance Primitives library based video scaling"),
+    .init_dict = ippscale_init,
+    .uninit = ippscale_uninit,
+    .query_formats = ippscale_query_formats,
+    .priv_size = sizeof(IPPScaleContext),
+    .priv_class = &ippscale_class,
+
+    .inputs = ippscale_inputs,
+    .outputs = ippscale_outputs,
+    .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
-- 
2.31.1.windows.1


Victoria Zhislina
Cloud Software Engineering, Senior Application Engineer, Intel Corp.
Nizhny Novgorod, Russia; Office: (831)-2969-444 ext.2231; INET: 312-2231


--------------------------------------------------------------------
Joint Stock Company Intel A/O
Registered legal address: Krylatsky Hills Business Park,
17 Krylatskaya Str., Bldg 4, Moscow 121614,
Russian Federation

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.