[MPlayer-dev-eng] [PATCH] vf_ass: reduce useless up/downsampling.

Nicolas George nicolas.george at normalesup.org
Wed Oct 3 12:19:06 CEST 2012


For each line of pixels, compute the leftmost and rightmost
affected pixels and only up/downsample this interval.

The extents is computed only once when the ASS layout changes.

Speed += ~15% for a typical subtitle with a long and a short line.
---
 libmpcodecs/vf_ass.c |  232 +++++++++++++++++++++++++++-----------------------
 1 file changed, 127 insertions(+), 105 deletions(-)


Note that this filter has still a few policy flaws, such as unchecked
mallocs, but this is unrelated to this patch.


diff --git a/libmpcodecs/vf_ass.c b/libmpcodecs/vf_ass.c
index d69cc5f..939cc92 100644
--- a/libmpcodecs/vf_ass.c
+++ b/libmpcodecs/vf_ass.c
@@ -52,8 +52,7 @@
 #define rgba2u(c)  ( ((-152*_r(c) - 298*_g(c) + 450*_b(c)) >> 10) + 128 )
 #define rgba2v(c)  ( (( 450*_r(c) - 376*_g(c) -  73*_b(c)) >> 10) + 128 )
 
-typedef void (*copy_from_image_func)(struct vf_instance *vf,
-                                     int first_row, int last_row);
+typedef void (*copy_from_image_func)(struct vf_instance *vf);
 typedef void (*copy_to_image_func)(struct vf_instance *vf);
 
 static const struct vf_priv_s {
@@ -61,21 +60,24 @@ static const struct vf_priv_s {
 
     int is_planar;
     unsigned int outfmt;
+    int sub_y;
 
     // 1 = auto-added filter: insert only if chain does not support EOSD already
     // 0 = insert always
     int auto_insert;
 
     unsigned char *planes[3];
-    unsigned char *dirty_rows;
+    struct dirty_row_extent {
+        int xmin, xmax;
+    } *dirty_rows;
 
     copy_from_image_func copy_from_image;
     copy_to_image_func copy_to_image;
 } vf_priv_dflt;
 
-static void copy_from_image_yuv420p(struct vf_instance *, int, int);
+static void copy_from_image_yuv420p(struct vf_instance *);
 static void copy_to_image_yuv420p(struct vf_instance *);
-static void copy_from_image_yuv422(struct vf_instance *, int, int);
+static void copy_from_image_yuv422(struct vf_instance *);
 static void copy_to_image_yuv422(struct vf_instance *);
 
 static int config(struct vf_instance *vf,
@@ -91,12 +93,14 @@ static int config(struct vf_instance *vf,
         vf->priv->is_planar = 1;
         vf->priv->copy_from_image = copy_from_image_yuv420p;
         vf->priv->copy_to_image = copy_to_image_yuv420p;
+        vf->priv->sub_y = 1;
         break;
     case IMGFMT_UYVY:
     case IMGFMT_YUY2:
         vf->priv->is_planar = 0;
         vf->priv->copy_from_image = copy_from_image_yuv422;
         vf->priv->copy_to_image = copy_to_image_yuv422;
+        vf->priv->sub_y = 0;
         break;
     default:
         return 0;
@@ -115,7 +119,8 @@ static int config(struct vf_instance *vf,
         vf->priv->planes[0] = malloc(vf->priv->outw * vf->priv->outh);
     vf->priv->planes[1]  = malloc(vf->priv->outw * vf->priv->outh);
     vf->priv->planes[2]  = malloc(vf->priv->outw * vf->priv->outh);
-    vf->priv->dirty_rows = malloc(vf->priv->outh);
+    vf->priv->dirty_rows = calloc(vf->priv->outh,
+                                  sizeof(*vf->priv->dirty_rows));
 
     res.w    = vf->priv->outw;
     res.h    = vf->priv->outh;
@@ -272,50 +277,74 @@ static int prepare_image(struct vf_instance *vf, mp_image_t *mpi)
     return 0;
 }
 
+static void compute_dirty_extents(struct vf_instance *vf,
+                                  struct mp_eosd_image_list *images)
+{
+    struct mp_eosd_image *img;
+    int xmin, xmax, ymin, ymax, y;
+    struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
+
+    for (y = 0; y < vf->priv->outh; y++) {
+        dirty_rows[y].xmin = vf->priv->outw;
+        dirty_rows[y].xmax = 0;
+    }
+
+    img = eosd_image_first(images);
+    while (img) {
+        xmin = FFMAX(0,              img->dst_x);
+        xmax = FFMIN(vf->priv->outw, img->dst_x + img->w);
+        ymin = FFMAX(0,              img->dst_y);
+        ymax = FFMIN(vf->priv->outh, img->dst_y + img->h);
+        xmin = (xmin + 0) & ~1;
+        xmax = (xmax + 1) & ~1;
+        if (vf->priv->sub_y) {
+            ymin = (ymin + 0) & ~1;
+            ymax = (ymax + 1) & ~1;
+        }
+        for (y = ymin; y < ymax; y++) {
+            dirty_rows[y].xmin = FFMIN(dirty_rows[y].xmin, xmin);
+            dirty_rows[y].xmax = FFMAX(dirty_rows[y].xmax, xmax);
+        }
+        img = eosd_image_next(images);
+    }
+}
+
 /**
  * \brief Copy specified rows from render_context.dmpi to render_context.planes, upsampling to 4:4:4
  */
-static void copy_from_image_yuv420p(struct vf_instance *vf, int first_row,
-                            int last_row)
+static void copy_from_image_yuv420p(struct vf_instance *vf)
 {
-    int pl;
-    int i, j, k;
-    unsigned char val;
-    int chroma_rows;
-
-    first_row  -= (first_row % 2);
-    last_row   += (last_row  % 2);
-    chroma_rows = (last_row - first_row) / 2;
-
-    assert(first_row >= 0);
-    assert(first_row <= last_row);
-    assert(last_row  <= vf->priv->outh);
+    int pl, y, x;
 
     for (pl = 1; pl < 3; ++pl) {
         int dst_stride = vf->priv->outw;
         int src_stride = vf->dmpi->stride[pl];
 
-        unsigned char *src      = vf->dmpi->planes[pl] + (first_row / 2) * src_stride;
-        unsigned char *dst      = vf->priv->planes[pl] +  first_row      * dst_stride;
+        unsigned char *src      = vf->dmpi->planes[pl];
+        unsigned char *dst      = vf->priv->planes[pl];
         unsigned char *dst_next = dst + dst_stride;
-        for (i = 0; i < chroma_rows; ++i) {
-            if ((vf->priv->dirty_rows[first_row + i * 2    ] == 0) ||
-                (vf->priv->dirty_rows[first_row + i * 2 + 1] == 0)) {
-                for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k += 2) {
-                    val = *(src + j);
-                    *(dst + k    ) = val;
-                    *(dst + k + 1) = val;
-                    *(dst_next + k    ) = val;
-                    *(dst_next + k + 1) = val;
-                }
+        struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
+
+        for (y = 0; y < vf->priv->outh; y += 2) {
+            int xmin = dirty_rows->xmin;
+            int xmax = dirty_rows->xmax;
+            int width = (xmax - xmin) >> 1;
+            unsigned char *srccur  = src      + (xmin >> 1);
+            unsigned char *dstcur1 = dst      + xmin;
+            unsigned char *dstcur2 = dst_next + xmin;
+
+            for (x = 0; x < width; x++) {
+                dstcur1[0] = dstcur1[1] = dstcur2[0] = dstcur2[1] = *srccur;
+                srccur++;
+                dstcur1 += 2;
+                dstcur2 += 2;
             }
             src += src_stride;
             dst      = dst_next + dst_stride;
             dst_next = dst      + dst_stride;
+            dirty_rows += 2;
         }
     }
-    for (i = first_row; i < last_row; ++i)
-        vf->priv->dirty_rows[i] = 1;
 }
 
 /**
@@ -323,8 +352,8 @@ static void copy_from_image_yuv420p(struct vf_instance *vf, int first_row,
  */
 static void copy_to_image_yuv420p(struct vf_instance *vf)
 {
-    int pl;
-    int i, j, k;
+    int pl, x, y;
+
     for (pl = 1; pl < 3; ++pl) {
         int dst_stride = vf->dmpi->stride[pl];
         int src_stride = vf->priv->outw;
@@ -332,17 +361,22 @@ static void copy_to_image_yuv420p(struct vf_instance *vf)
         unsigned char *dst      = vf->dmpi->planes[pl];
         unsigned char *src      = vf->priv->planes[pl];
         unsigned char *src_next = vf->priv->planes[pl] + src_stride;
-        for (i = 0; i < vf->priv->outh / 2; ++i) {
-            if ((vf->priv->dirty_rows[i * 2] == 1)) {
-                assert(vf->priv->dirty_rows[i * 2 + 1] == 1);
-                for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k += 2) {
-                    unsigned val = 0;
-                    val += *(src + k);
-                    val += *(src + k + 1);
-                    val += *(src_next + k);
-                    val += *(src_next + k + 1);
-                    *(dst + j) = val >> 2;
-                }
+        struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
+
+        for (y = 0; y < vf->priv->outh; y += 2) {
+            int xmin = dirty_rows->xmin;
+            int xmax = dirty_rows->xmax;
+            int width = (xmax - xmin) >> 1;
+            unsigned char *srccur1 = src      + xmin;
+            unsigned char *srccur2 = src_next + xmin;
+            unsigned char *dstcur  = dst      + (xmin >> 1);
+
+            for (x = 0; x < width; x++) {
+                *dst = (srccur1[0] + srccur1[1] +
+                        srccur2[0] + srccur2[1] + 2) >> 2;
+                srccur1 += 2;
+                srccur2 += 2;
+                dstcur++;
             }
             dst += dst_stride;
             src      = src_next + src_stride;
@@ -351,83 +385,69 @@ static void copy_to_image_yuv420p(struct vf_instance *vf)
     }
 }
 
-static void copy_from_image_yuv422(struct vf_instance *vf,
-                                   int first_row, int last_row)
+static void copy_from_image_yuv422(struct vf_instance *vf)
 {
-    unsigned char *dirty_rows = vf->priv->dirty_rows;
+    struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
     int src_stride = vf->dmpi->stride[0];
     int dst_stride = vf->priv->outw;
-    unsigned char *src = vf->dmpi->planes[0] + first_row * src_stride;
+    unsigned char *src = vf->dmpi->planes[0];
     unsigned char **dst = vf->priv->planes;
-    int dst_off = first_row * dst_stride;
+    int dst_off = 0;
     int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
-    int i, j, k;
-
-    for (i = first_row; i < last_row; ++i) {
-        int next_off = dst_off + dst_stride;
-        if (!dirty_rows[i]) {
-            if (is_uyvy) {
-                for (j = dst_off, k = 0; j < next_off; j += 2, k += 4) {
-                    dst[0][j    ] = src[k + 1];
-                    dst[0][j + 1] = src[k + 3];
-                    dst[1][j    ] = src[k    ];
-                    dst[1][j + 1] = src[k    ];
-                    dst[2][j    ] = src[k + 2];
-                    dst[2][j + 1] = src[k + 2];
-                }
-            } else {
-                for (j = dst_off, k = 0; j < next_off; j += 2, k += 4) {
-                    dst[0][j    ] = src[k    ];
-                    dst[0][j + 1] = src[k + 2];
-                    dst[1][j    ] = src[k + 1];
-                    dst[1][j + 1] = src[k + 1];
-                    dst[2][j    ] = src[k + 3];
-                    dst[2][j + 1] = src[k + 3];
-                }
-            }
+    int x, y;
+
+    for (y = 0; y < vf->priv->outh; y++) {
+        int xmin = dirty_rows[y].xmin;
+        int xmax = dirty_rows[y].xmax;
+        int width = (xmax - xmin) >> 1;
+        unsigned char *srccur = src + (xmin << 1);
+        int dstcur = dst_off + xmin;
+
+        for (x = 0; x < width; x++) {
+            dst[0][dstcur + 0] = srccur[0 + is_uyvy];
+            dst[0][dstcur + 1] = srccur[2 + is_uyvy];
+            dst[1][dstcur + 0] =
+            dst[1][dstcur + 1] = srccur[1 - is_uyvy];
+            dst[2][dstcur + 0] =
+            dst[2][dstcur + 1] = srccur[3 - is_uyvy];
+            srccur += 4;
+            dstcur += 2;
         }
-        src += src_stride;
-        dst_off = next_off;
+        src     += src_stride;
+        dst_off += dst_stride;
     }
-    for (i = first_row; i < last_row; ++i)
-        dirty_rows[i] = 1;
 }
 
 static void copy_to_image_yuv422(struct vf_instance *vf)
 {
-    unsigned char *dirty_rows = vf->priv->dirty_rows;
+    struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
     int src_stride = vf->priv->outw;
     int dst_stride = vf->dmpi->stride[0];
-    int height = vf->priv->outh;
     unsigned char **src = vf->priv->planes;
     unsigned char *dst = vf->dmpi->planes[0];
     int src_off = 0;
     int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
-    int i, j, k;
+    int x, y;
+
+    for (y = 0; y < vf->priv->outh; y++) {
+        int xmin = dirty_rows[y].xmin;
+        int xmax = dirty_rows[y].xmax;
+        int width = (xmax - xmin) >> 1;
+        int srccur = src_off + xmin;
+        unsigned char *dstcur = dst + (xmin << 1);
 
-    for (i = 0; i < height; ++i) {
-        int next_off = src_off + src_stride;
-        if (*dirty_rows++) {
 #define AVERAGE(a, b) (((unsigned)(a) + (unsigned)(b)) >> 1)
-            if (is_uyvy) {
-                for (j = src_off, k = 0; j < next_off; j += 2, k += 4) {
-                    dst[k    ] = AVERAGE(src[1][j], src[1][j + 1]);
-                    dst[k + 1] = src[0][j];
-                    dst[k + 2] = AVERAGE(src[2][j], src[2][j + 1]);
-                    dst[k + 3] = src[0][j + 1];
-                }
-            } else {
-                for (j = src_off, k = 0; j < next_off; j += 2, k += 4) {
-                    dst[k    ] = src[0][j];
-                    dst[k + 1] = AVERAGE(src[1][j], src[1][j + 1]);
-                    dst[k + 2] = src[0][j + 1];
-                    dst[k + 3] = AVERAGE(src[2][j], src[2][j + 1]);
-                }
-            }
-#undef AVERAGE
+        for (x = 0; x < width; x++) {
+            dstcur[0 + is_uyvy] = src[0][srccur + 0];
+            dstcur[2 + is_uyvy] = src[0][srccur + 1];
+            dstcur[1 - is_uyvy] = AVERAGE(src[1][srccur], src[1][srccur + 1]);
+            dstcur[3 - is_uyvy] = AVERAGE(src[2][srccur], src[2][srccur + 1]);
+            srccur += 2;
+            dstcur += 4;
         }
-        src_off = next_off;
-        dst += dst_stride;
+#undef AVERAGE
+        src_off += src_stride;
+        dst     += dst_stride;
     }
 }
 
@@ -475,12 +495,12 @@ static void render_frame(struct vf_instance *vf, mp_image_t *mpi,
     copy_from_image_func copy_from_image = vf->priv->copy_from_image;
     copy_to_image_func copy_to_image = vf->priv->copy_to_image;
 
+    copy_from_image(vf);
     img = eosd_image_first(images);
     if (!img)
         return;
         memset(vf->priv->dirty_rows, 0, vf->priv->outh);        // reset dirty rows
         while (img) {
-            copy_from_image(vf, img->dst_y, img->dst_y + img->h);
             my_draw_bitmap(vf, img->bitmap, img->w, img->h, img->stride,
                            img->dst_x, img->dst_y, img->color);
             img = eosd_image_next(images);
@@ -493,6 +513,8 @@ static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
     struct mp_eosd_image_list images;
     eosd_render_frame(pts, &images);
     prepare_image(vf, mpi);
+    if (images.changed)
+        compute_dirty_extents(vf, &images);
     render_frame(vf, mpi, &images);
     return vf_next_put_image(vf, vf->dmpi, pts);
 }
-- 
1.7.10.4



More information about the MPlayer-dev-eng mailing list