[FFmpeg-devel] [PATCH 20/24] sws: add a function for scaling dst slices

Anton Khirnov anton at khirnov.net
Mon May 31 10:55:11 EEST 2021


Currently existing sws_scale() accepts as input a user-determined slice
of input data and produces an indeterminate number of output lines.
Since the calling code does not know the amount of output, it cannot
easily parallelize scaling by calling sws_scale() simultaneously on
different parts of the frame.

Add a new function - sws_scale_dst_slice() - that accepts as input the
entire input frame and produces a specified slice of the output. This
function can be called simultaneously on different slices of the output
frame (using different sws contexts) to implement slice threading.
---
 doc/APIchanges       |   3 +
 libswscale/swscale.c | 186 +++++++++++++++++++++++++++++--------------
 libswscale/swscale.h |  30 +++++++
 libswscale/version.h |   2 +-
 4 files changed, 162 insertions(+), 59 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index c46f4d5304..e5cdd8ee77 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -14,6 +14,9 @@ libavutil:     2021-04-27
 
 API changes, most recent first:
 
+2021-xx-xx - xxxxxxxxxx - sws 6.1.100 - swscale.h
+  Add sws_scale_dst_slice().
+
 2021-04-27 - cb3ac722f4 - lavc 59.0.100 - avcodec.h
   Constified AVCodecParserContext.parser.
 
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 3b03fd16dd..748e606935 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -236,13 +236,16 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
         av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
 
 static int swscale(SwsContext *c, const uint8_t *src[],
-                   int srcStride[], int srcSliceY,
-                   int srcSliceH, uint8_t *dst[], int dstStride[])
+                   int srcStride[], int srcSliceY, int srcSliceH,
+                   uint8_t *dst[], int dstStride[],
+                   int dstSliceY, int dstSliceH)
 {
+    const int scale_dst = dstSliceY > 0 || dstSliceH < c->dstH;
+
     /* load a few things into local vars to make the code more readable?
      * and faster */
     const int dstW                   = c->dstW;
-    const int dstH                   = c->dstH;
+    int dstH                         = c->dstH;
 
     const enum AVPixelFormat dstFormat = c->dstFormat;
     const int flags                  = c->flags;
@@ -331,10 +334,15 @@ static int swscale(SwsContext *c, const uint8_t *src[],
         }
     }
 
-    /* Note the user might start scaling the picture in the middle so this
-     * will not get executed. This is not really intended but works
-     * currently, so people might do it. */
-    if (srcSliceY == 0) {
+    if (scale_dst) {
+        dstY         = dstSliceY;
+        dstH         = dstY + dstSliceH;
+        lastInLumBuf = -1;
+        lastInChrBuf = -1;
+    } else if (srcSliceY == 0) {
+        /* Note the user might start scaling the picture in the middle so this
+         * will not get executed. This is not really intended but works
+         * currently, so people might do it. */
         dstY         = 0;
         lastInLumBuf = -1;
         lastInChrBuf = -1;
@@ -352,8 +360,8 @@ static int swscale(SwsContext *c, const uint8_t *src[],
             srcSliceY, srcSliceH, chrSrcSliceY, chrSrcSliceH, 1);
 
     ff_init_slice_from_src(vout_slice, (uint8_t**)dst, dstStride, c->dstW,
-            dstY, dstH, dstY >> c->chrDstVSubSample,
-            AV_CEIL_RSHIFT(dstH, c->chrDstVSubSample), 0);
+            dstY, dstSliceH, dstY >> c->chrDstVSubSample,
+            AV_CEIL_RSHIFT(dstSliceH, c->chrDstVSubSample), scale_dst);
     if (srcSliceY == 0) {
         hout_slice->plane[0].sliceY = lastInLumBuf + 1;
         hout_slice->plane[1].sliceY = lastInChrBuf + 1;
@@ -373,7 +381,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
 
         // First line needed as input
         const int firstLumSrcY  = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
-        const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), dstH - 1)]);
+        const int firstLumSrcY2 = FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1 << c->chrDstVSubSample) - 1), c->dstH - 1)]);
         // First line needed as input
         const int firstChrSrcY  = FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
 
@@ -477,7 +485,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
             c->chrDither8 = ff_dither_8x8_128[chrDstY & 7];
             c->lumDither8 = ff_dither_8x8_128[dstY    & 7];
         }
-        if (dstY >= dstH - 2) {
+        if (dstY >= c->dstH - 2) {
             /* hmm looks like we can't use MMX here without overwriting
              * this array's tail */
             ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
@@ -491,21 +499,22 @@ static int swscale(SwsContext *c, const uint8_t *src[],
             desc[i].process(c, &desc[i], dstY, 1);
     }
     if (isPlanar(dstFormat) && isALPHA(dstFormat) && !needAlpha) {
+        int offset = lastDstY - dstSliceY;
         int length = dstW;
         int height = dstY - lastDstY;
 
         if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
             const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
-            fillPlane16(dst[3], dstStride[3], length, height, lastDstY,
+            fillPlane16(dst[3], dstStride[3], length, height, offset,
                     1, desc->comp[3].depth,
                     isBE(dstFormat));
         } else if (is32BPS(dstFormat)) {
             const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
-            fillPlane32(dst[3], dstStride[3], length, height, lastDstY,
+            fillPlane32(dst[3], dstStride[3], length, height, offset,
                     1, desc->comp[3].depth,
                     isBE(dstFormat), desc->flags & AV_PIX_FMT_FLAG_FLOAT);
         } else
-            fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
+            fillPlane(dst[3], dstStride[3], length, height, offset, 255);
     }
 
 #if HAVE_MMXEXT_INLINE
@@ -809,33 +818,42 @@ static void update_palette(SwsContext *c, const uint32_t *pal)
     }
 }
 
+static int scale_internal(SwsContext *c,
+                          const uint8_t * const srcSlice[], const int srcStride[],
+                          int srcSliceY, int srcSliceH,
+                          uint8_t *const dstSlice[], const int dstStride[],
+                          int dstSliceY, int dstSliceH);
+
 static int scale_gamma(SwsContext *c,
                        const uint8_t * const srcSlice[], const int srcStride[],
                        int srcSliceY, int srcSliceH,
-                       uint8_t * const dst[], const int dstStride[])
+                       uint8_t * const dstSlice[], const int dstStride[],
+                       int dstSliceY, int dstSliceH)
 {
-    int ret = sws_scale(c->cascaded_context[0],
-                        srcSlice, srcStride, srcSliceY, srcSliceH,
-                        c->cascaded_tmp, c->cascaded_tmpStride);
+    int ret = scale_internal(c->cascaded_context[0],
+                             srcSlice, srcStride, srcSliceY, srcSliceH,
+                             c->cascaded_tmp, c->cascaded_tmpStride, 0, c->srcH);
 
     if (ret < 0)
         return ret;
 
     if (c->cascaded_context[2])
-        ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
-                        c->cascaded_tmpStride, srcSliceY, srcSliceH, c->cascaded1_tmp,
-                        c->cascaded1_tmpStride);
+        ret = scale_internal(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
+                             c->cascaded_tmpStride, srcSliceY, srcSliceH,
+                             c->cascaded1_tmp, c->cascaded1_tmpStride, 0, c->dstH);
     else
-        ret = sws_scale(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
-                        c->cascaded_tmpStride, srcSliceY, srcSliceH, dst, dstStride);
+        ret = scale_internal(c->cascaded_context[1], (const uint8_t * const *)c->cascaded_tmp,
+                             c->cascaded_tmpStride, srcSliceY, srcSliceH,
+                             dstSlice, dstStride, dstSliceY, dstSliceH);
 
     if (ret < 0)
         return ret;
 
     if (c->cascaded_context[2]) {
-        ret = sws_scale(c->cascaded_context[2], (const uint8_t * const *)c->cascaded1_tmp,
-                        c->cascaded1_tmpStride, c->cascaded_context[1]->dstY - ret,
-                        c->cascaded_context[1]->dstY, dst, dstStride);
+        ret = scale_internal(c->cascaded_context[2], (const uint8_t * const *)c->cascaded1_tmp,
+                             c->cascaded1_tmpStride, c->cascaded_context[1]->dstY - ret,
+                             c->cascaded_context[1]->dstY,
+                             dstSlice, dstStride, dstSliceY, dstSliceH);
     }
     return ret;
 }
@@ -843,56 +861,64 @@ static int scale_gamma(SwsContext *c,
 static int scale_cascaded(SwsContext *c,
                           const uint8_t * const srcSlice[], const int srcStride[],
                           int srcSliceY, int srcSliceH,
-                          uint8_t * const dst[], const int dstStride[])
+                          uint8_t * const dstSlice[], const int dstStride[],
+                          int dstSliceY, int dstSliceH)
 {
-    int ret = sws_scale(c->cascaded_context[0],
-                        srcSlice, srcStride, srcSliceY, srcSliceH,
-                        c->cascaded_tmp, c->cascaded_tmpStride);
+    int ret = scale_internal(c->cascaded_context[0],
+                             srcSlice, srcStride, srcSliceY, srcSliceH,
+                             c->cascaded_tmp, c->cascaded_tmpStride,
+                             0, c->cascaded_context[0]->dstH);
     if (ret < 0)
         return ret;
-    ret = sws_scale(c->cascaded_context[1],
-                    (const uint8_t * const * )c->cascaded_tmp, c->cascaded_tmpStride,
-                    0, c->cascaded_context[0]->dstH, dst, dstStride);
+    ret = scale_internal(c->cascaded_context[1],
+                         (const uint8_t * const * )c->cascaded_tmp, c->cascaded_tmpStride,
+                         0, c->cascaded_context[0]->dstH,
+                         dstSlice, dstStride, dstSliceY, dstSliceH);
     return ret;
 }
 
-/**
- * swscale wrapper, so we don't need to export the SwsContext.
- * Assumes planar YUV to be in YUV order instead of YVU.
- */
-int attribute_align_arg sws_scale(struct SwsContext *c,
-                                  const uint8_t * const srcSlice[],
-                                  const int srcStride[], int srcSliceY,
-                                  int srcSliceH, uint8_t *const dst[],
-                                  const int dstStride[])
+static int scale_internal(SwsContext *c,
+                          const uint8_t * const srcSlice[], const int srcStride[],
+                          int srcSliceY, int srcSliceH,
+                          uint8_t *const dstSlice[], const int dstStride[],
+                          int dstSliceY, int dstSliceH)
 {
-    const int frame_start = !c->sliceDir;
+    const int scale_dst = dstSliceY > 0 || dstSliceH < c->dstH;
+    const int frame_start = scale_dst || !c->sliceDir;
     int i, ret;
     const uint8_t *src2[4];
     uint8_t *dst2[4];
-    int macro_height = isBayer(c->srcFormat) ? 2 : (1 << c->chrSrcVSubSample);
+    int macro_height_src = isBayer(c->srcFormat) ? 2 : (1 << c->chrSrcVSubSample);
+    int macro_height_dst = isBayer(c->dstFormat) ? 2 : (1 << c->chrDstVSubSample);
     // copy strides, so they can safely be modified
     int srcStride2[4];
     int dstStride2[4];
     int srcSliceY_internal = srcSliceY;
 
-    if (!srcStride || !dstStride || !dst || !srcSlice) {
+    if (!srcStride || !dstStride || !dstSlice || !srcSlice) {
         av_log(c, AV_LOG_ERROR, "One of the input parameters to sws_scale() is NULL, please check the calling code\n");
         return AVERROR(EINVAL);
     }
 
-    if ((srcSliceY & (macro_height-1)) ||
-        ((srcSliceH& (macro_height-1)) && srcSliceY + srcSliceH != c->srcH) ||
+    if ((srcSliceY  & (macro_height_src - 1)) ||
+        ((srcSliceH & (macro_height_src - 1)) && srcSliceY + srcSliceH != c->srcH) ||
         srcSliceY + srcSliceH > c->srcH) {
         av_log(c, AV_LOG_ERROR, "Slice parameters %d, %d are invalid\n", srcSliceY, srcSliceH);
         return AVERROR(EINVAL);
     }
 
+    if ((dstSliceY  & (macro_height_dst - 1)) ||
+        ((dstSliceH & (macro_height_dst - 1)) && dstSliceY + dstSliceH != c->dstH) ||
+        dstSliceY + dstSliceH > c->dstH) {
+        av_log(c, AV_LOG_ERROR, "Slice parameters %d, %d are invalid\n", dstSliceY, dstSliceH);
+        return AVERROR(EINVAL);
+    }
+
     if (!check_image_pointers(srcSlice, c->srcFormat, srcStride)) {
         av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
         return AVERROR(EINVAL);
     }
-    if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) {
+    if (!check_image_pointers((const uint8_t* const*)dstSlice, c->dstFormat, dstStride)) {
         av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
         return AVERROR(EINVAL);
     }
@@ -902,10 +928,12 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
         return 0;
 
     if (c->gamma_flag && c->cascaded_context[0])
-        return scale_gamma(c, srcSlice, srcStride, srcSliceY, srcSliceH, dst, dstStride);
+        return scale_gamma(c, srcSlice, srcStride, srcSliceY, srcSliceH,
+                           dstSlice, dstStride, dstSliceY, dstSliceH);
 
     if (c->cascaded_context[0] && srcSliceY == 0 && srcSliceH == c->cascaded_context[0]->srcH)
-        return scale_cascaded(c, srcSlice, srcStride, srcSliceY, srcSliceH, dst, dstStride);
+        return scale_cascaded(c, srcSlice, srcStride, srcSliceY, srcSliceH,
+                              dstSlice, dstStride, dstSliceY, dstSliceH);
 
     if (!srcSliceY && (c->flags & SWS_BITEXACT) && c->dither == SWS_DITHER_ED && c->dither_error[0])
         for (i = 0; i < 4; i++)
@@ -915,18 +943,19 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
         update_palette(c, (const uint32_t *)srcSlice[1]);
 
     memcpy(src2,       srcSlice,  sizeof(src2));
-    memcpy(dst2,       dst,       sizeof(dst2));
+    memcpy(dst2,       dstSlice,  sizeof(dst2));
     memcpy(srcStride2, srcStride, sizeof(srcStride2));
     memcpy(dstStride2, dstStride, sizeof(dstStride2));
 
-    if (frame_start) {
+    if (frame_start && !scale_dst) {
         if (srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
             av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
             return AVERROR(EINVAL);
         }
 
         c->sliceDir = (srcSliceY == 0) ? 1 : -1;
-    }
+    } else if (scale_dst)
+        c->sliceDir = 1;
 
     if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) {
         uint8_t *base;
@@ -985,10 +1014,27 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
     reset_ptr(src2, c->srcFormat);
     reset_ptr((void*)dst2, c->dstFormat);
 
-    if (c->swscale)
-        ret = c->swscale(c, src2, srcStride2, srcSliceY_internal, srcSliceH, dst2, dstStride2);
-    else
-        ret = swscale(c, src2, srcStride2, srcSliceY_internal, srcSliceH, dst2, dstStride2);
+    if (c->swscale) {
+        int offset  = srcSliceY_internal;
+        int slice_h = srcSliceH;
+
+        // for dst slice scaling, offset the src pointers to match the dst slice
+        if (scale_dst) {
+            av_assert0(offset == 0);
+            for (i = 0; i < 4 && src2[i]; i++) {
+                if (!src2[i] || (i > 0 && usePal(c->srcFormat)))
+                    break;
+                src2[i] += (dstSliceY >> ((i == 1 || i == 2) ? c->chrSrcVSubSample : 0)) * srcStride2[i];
+            }
+            offset  = 0;
+            slice_h = dstSliceH;
+        }
+
+        ret = c->swscale(c, src2, srcStride2, offset, slice_h, dst2, dstStride2);
+    } else {
+        ret = swscale(c, src2, srcStride2, srcSliceY_internal, srcSliceH,
+                      dst2, dstStride2, dstSliceY, dstSliceH);
+    }
 
     if (c->dstXYZ && !(c->srcXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) {
         int dstY = c->dstY ? c->dstY : srcSliceY + srcSliceH;
@@ -1002,8 +1048,32 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
     }
 
     /* reset slice direction at end of frame */
-    if (srcSliceY_internal + srcSliceH == c->srcH)
+    if ((srcSliceY_internal + srcSliceH == c->srcH) || scale_dst)
         c->sliceDir = 0;
 
     return ret;
 }
+
+/**
+ * swscale wrapper, so we don't need to export the SwsContext.
+ * Assumes planar YUV to be in YUV order instead of YVU.
+ */
+int attribute_align_arg sws_scale(struct SwsContext *c,
+                                  const uint8_t * const srcSlice[],
+                                  const int srcStride[], int srcSliceY,
+                                  int srcSliceH, uint8_t *const dst[],
+                                  const int dstStride[])
+{
+    return scale_internal(c, srcSlice, srcStride, srcSliceY, srcSliceH,
+                          dst, dstStride, 0, c->dstH);
+}
+
+int attribute_align_arg
+sws_scale_dst_slice(struct SwsContext *c,
+                    const uint8_t *const src[], const int srcStride[],
+                    uint8_t *const dstSlice[], const int dstStride[],
+                    int dstSliceY, int dstSliceH)
+{
+    return scale_internal(c, src, srcStride, 0, c->srcH,
+                          dstSlice, dstStride, dstSliceY, dstSliceH);
+}
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 50d6d46553..9a284bca03 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -218,6 +218,36 @@ int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
               const int srcStride[], int srcSliceY, int srcSliceH,
               uint8_t *const dst[], const int dstStride[]);
 
+/**
+ * Scale the image in src to produce a slice of the output image. A slice is a
+ * sequence of consecutive rows in an image.
+ *
+ * Unlike sws_scale() - which consumes a fixed slice of input data to produce a
+ * variable amount of output data - this function takes in the entire input
+ * image to produce a fixed slice of output.
+ *
+ * @param c         the scaling context previously created with
+ *                  sws_getContext()
+ * @param src       the array containing the pointers to the planes of
+ *                  the source image
+ * @param srcStride the array containing the strides for each plane of
+ *                  the source image
+ * @param dstSlice  the array containing the pointers to the planes of
+ *                  the destination slice
+ * @param dstStride the array containing the strides for each plane of
+ *                  the destination image
+ * @param dstSliceY the position in the destination image of the slice to
+ *                  output, that is the number (counted starting from
+ *                  zero) in the image of the first row of the slice
+ * @param dstSliceH the height of the destination slice, that is the number
+ *                  of rows in the slice
+ * @return          0 on success, a negative error code on failure
+ */
+int sws_scale_dst_slice(struct SwsContext *c,
+                        const uint8_t *const src[], const int srcStride[],
+                        uint8_t *const dstSlice[], const int dstStride[],
+                        int dstSliceY, int dstSliceH);
+
 /**
  * @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg)
  * @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg)
diff --git a/libswscale/version.h b/libswscale/version.h
index f86a8049c1..0d3b7cf363 100644
--- a/libswscale/version.h
+++ b/libswscale/version.h
@@ -27,7 +27,7 @@
 #include "libavutil/version.h"
 
 #define LIBSWSCALE_VERSION_MAJOR   6
-#define LIBSWSCALE_VERSION_MINOR   0
+#define LIBSWSCALE_VERSION_MINOR   1
 #define LIBSWSCALE_VERSION_MICRO 100
 
 #define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
-- 
2.30.2



More information about the ffmpeg-devel mailing list