[MPlayer-dev-eng] [PATCH] vf_ass: reduce useless up/downsampling.

Fri Oct 5 13:40:42 CEST 2012

On Wed, Oct 3, 2012 at 6:19 PM, Nicolas George <
nicolas.george at normalesup.org> wrote:

> For each line of pixels, compute the leftmost and rightmost
> affected pixels and only up/downsample this interval.
>
> The extents is computed only once when the ASS layout changes.
>
> Speed += ~15% for a typical subtitle with a long and a short line.
> ---
>  libmpcodecs/vf_ass.c |  232
> +++++++++++++++++++++++++++-----------------------
>  1 file changed, 127 insertions(+), 105 deletions(-)
>
>
> Note that this filter has still a few policy flaws, such as unchecked
> mallocs, but this is unrelated to this patch.
>
>
> diff --git a/libmpcodecs/vf_ass.c b/libmpcodecs/vf_ass.c
> index d69cc5f..939cc92 100644
> --- a/libmpcodecs/vf_ass.c
> +++ b/libmpcodecs/vf_ass.c
> @@ -52,8 +52,7 @@
>  #define rgba2u(c)  ( ((-152*_r(c) - 298*_g(c) + 450*_b(c)) >> 10) + 128 )
>  #define rgba2v(c)  ( (( 450*_r(c) - 376*_g(c) -  73*_b(c)) >> 10) + 128 )
>
> -typedef void (*copy_from_image_func)(struct vf_instance *vf,
> -                                     int first_row, int last_row);
> +typedef void (*copy_from_image_func)(struct vf_instance *vf);
>  typedef void (*copy_to_image_func)(struct vf_instance *vf);
>
>  static const struct vf_priv_s {
> @@ -61,21 +60,24 @@ static const struct vf_priv_s {
>
>      int is_planar;
>      unsigned int outfmt;
> +    int sub_y;
>
>      // 1 = auto-added filter: insert only if chain does not support EOSD
> already
>      // 0 = insert always
>      int auto_insert;
>
>      unsigned char *planes[3];
> -    unsigned char *dirty_rows;
> +    struct dirty_row_extent {
> +        int xmin, xmax;
> +    } *dirty_rows;
>
>      copy_from_image_func copy_from_image;
>      copy_to_image_func copy_to_image;
>  } vf_priv_dflt;
>
> -static void copy_from_image_yuv420p(struct vf_instance *, int, int);
> +static void copy_from_image_yuv420p(struct vf_instance *);
>  static void copy_to_image_yuv420p(struct vf_instance *);
> -static void copy_from_image_yuv422(struct vf_instance *, int, int);
> +static void copy_from_image_yuv422(struct vf_instance *);
>  static void copy_to_image_yuv422(struct vf_instance *);
>
>  static int config(struct vf_instance *vf,
> @@ -91,12 +93,14 @@ static int config(struct vf_instance *vf,
>          vf->priv->is_planar = 1;
>          vf->priv->copy_from_image = copy_from_image_yuv420p;
>          vf->priv->copy_to_image = copy_to_image_yuv420p;
> +        vf->priv->sub_y = 1;
>          break;
>      case IMGFMT_UYVY:
>      case IMGFMT_YUY2:
>          vf->priv->is_planar = 0;
>          vf->priv->copy_from_image = copy_from_image_yuv422;
>          vf->priv->copy_to_image = copy_to_image_yuv422;
> +        vf->priv->sub_y = 0;
>          break;
>      default:
>          return 0;
> @@ -115,7 +119,8 @@ static int config(struct vf_instance *vf,
>          vf->priv->planes[0] = malloc(vf->priv->outw * vf->priv->outh);
>      vf->priv->planes[1]  = malloc(vf->priv->outw * vf->priv->outh);
>      vf->priv->planes[2]  = malloc(vf->priv->outw * vf->priv->outh);
> -    vf->priv->dirty_rows = malloc(vf->priv->outh);
> +    vf->priv->dirty_rows = calloc(vf->priv->outh,
> +                                  sizeof(*vf->priv->dirty_rows));
>
>      res.w    = vf->priv->outw;
>      res.h    = vf->priv->outh;
> @@ -272,50 +277,74 @@ static int prepare_image(struct vf_instance *vf,
> mp_image_t *mpi)
>      return 0;
>  }
>
> +static void compute_dirty_extents(struct vf_instance *vf,
> +                                  struct mp_eosd_image_list *images)
> +{
> +    struct mp_eosd_image *img;
> +    int xmin, xmax, ymin, ymax, y;
> +    struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
> +
> +    for (y = 0; y < vf->priv->outh; y++) {
> +        dirty_rows[y].xmin = vf->priv->outw;
> +        dirty_rows[y].xmax = 0;
> +    }
> +
> +    img = eosd_image_first(images);
> +    while (img) {
> +        xmin = FFMAX(0,              img->dst_x);
> +        xmax = FFMIN(vf->priv->outw, img->dst_x + img->w);
> +        ymin = FFMAX(0,              img->dst_y);
> +        ymax = FFMIN(vf->priv->outh, img->dst_y + img->h);
> +        xmin = (xmin + 0) & ~1;
> +        xmax = (xmax + 1) & ~1;
> +        if (vf->priv->sub_y) {
> +            ymin = (ymin + 0) & ~1;
> +            ymax = (ymax + 1) & ~1;
> +        }
> +        for (y = ymin; y < ymax; y++) {
> +            dirty_rows[y].xmin = FFMIN(dirty_rows[y].xmin, xmin);
> +            dirty_rows[y].xmax = FFMAX(dirty_rows[y].xmax, xmax);
> +        }
> +        img = eosd_image_next(images);
> +    }
> +}
> +
>  /**
>   * \brief Copy specified rows from render_context.dmpi to
> render_context.planes, upsampling to 4:4:4
>   */
> -static void copy_from_image_yuv420p(struct vf_instance *vf, int first_row,
> -                            int last_row)
> +static void copy_from_image_yuv420p(struct vf_instance *vf)
>  {
> -    int pl;
> -    int i, j, k;
> -    unsigned char val;
> -    int chroma_rows;
> -
> -    first_row  -= (first_row % 2);
> -    last_row   += (last_row  % 2);
> -    chroma_rows = (last_row - first_row) / 2;
> -
> -    assert(first_row >= 0);
> -    assert(first_row <= last_row);
> -    assert(last_row  <= vf->priv->outh);
> +    int pl, y, x;
>
>      for (pl = 1; pl < 3; ++pl) {
>          int dst_stride = vf->priv->outw;
>          int src_stride = vf->dmpi->stride[pl];
>
> -        unsigned char *src      = vf->dmpi->planes[pl] + (first_row / 2)
> * src_stride;
> -        unsigned char *dst      = vf->priv->planes[pl] +  first_row
>  * dst_stride;
> +        unsigned char *src      = vf->dmpi->planes[pl];
> +        unsigned char *dst      = vf->priv->planes[pl];
>          unsigned char *dst_next = dst + dst_stride;
> -        for (i = 0; i < chroma_rows; ++i) {
> -            if ((vf->priv->dirty_rows[first_row + i * 2    ] == 0) ||
> -                (vf->priv->dirty_rows[first_row + i * 2 + 1] == 0)) {
> -                for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k +=
> 2) {
> -                    val = *(src + j);
> -                    *(dst + k    ) = val;
> -                    *(dst + k + 1) = val;
> -                    *(dst_next + k    ) = val;
> -                    *(dst_next + k + 1) = val;
> -                }
> +        struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
> +
> +        for (y = 0; y < vf->priv->outh; y += 2) {
> +            int xmin = dirty_rows->xmin;
> +            int xmax = dirty_rows->xmax;
> +            int width = (xmax - xmin) >> 1;
> +            unsigned char *srccur  = src      + (xmin >> 1);
> +            unsigned char *dstcur1 = dst      + xmin;
> +            unsigned char *dstcur2 = dst_next + xmin;
> +
> +            for (x = 0; x < width; x++) {
> +                dstcur1[0] = dstcur1[1] = dstcur2[0] = dstcur2[1] =
> *srccur;
> +                srccur++;
> +                dstcur1 += 2;
> +                dstcur2 += 2;
>              }
>              src += src_stride;
>              dst      = dst_next + dst_stride;
>              dst_next = dst      + dst_stride;
> +            dirty_rows += 2;
>          }
>      }
> -    for (i = first_row; i < last_row; ++i)
> -        vf->priv->dirty_rows[i] = 1;
>  }
>
>  /**
> @@ -323,8 +352,8 @@ static void copy_from_image_yuv420p(struct vf_instance
> *vf, int first_row,
>   */
>  static void copy_to_image_yuv420p(struct vf_instance *vf)
>  {
> -    int pl;
> -    int i, j, k;
> +    int pl, x, y;
> +
>      for (pl = 1; pl < 3; ++pl) {
>          int dst_stride = vf->dmpi->stride[pl];
>          int src_stride = vf->priv->outw;
> @@ -332,17 +361,22 @@ static void copy_to_image_yuv420p(struct vf_instance
> *vf)
>          unsigned char *dst      = vf->dmpi->planes[pl];
>          unsigned char *src      = vf->priv->planes[pl];
>          unsigned char *src_next = vf->priv->planes[pl] + src_stride;
> -        for (i = 0; i < vf->priv->outh / 2; ++i) {
> -            if ((vf->priv->dirty_rows[i * 2] == 1)) {
> -                assert(vf->priv->dirty_rows[i * 2 + 1] == 1);
> -                for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k +=
> 2) {
> -                    unsigned val = 0;
> -                    val += *(src + k);
> -                    val += *(src + k + 1);
> -                    val += *(src_next + k);
> -                    val += *(src_next + k + 1);
> -                    *(dst + j) = val >> 2;
> -                }
> +        struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
> +
> +        for (y = 0; y < vf->priv->outh; y += 2) {
> +            int xmin = dirty_rows->xmin;
> +            int xmax = dirty_rows->xmax;
> +            int width = (xmax - xmin) >> 1;
> +            unsigned char *srccur1 = src      + xmin;
> +            unsigned char *srccur2 = src_next + xmin;
> +            unsigned char *dstcur  = dst      + (xmin >> 1);
> +
> +            for (x = 0; x < width; x++) {
> +                *dst = (srccur1[0] + srccur1[1] +
> +                        srccur2[0] + srccur2[1] + 2) >> 2;
> +                srccur1 += 2;
> +                srccur2 += 2;
> +                dstcur++;
>              }
>              dst += dst_stride;
>              src      = src_next + src_stride;
> @@ -351,83 +385,69 @@ static void copy_to_image_yuv420p(struct vf_instance
> *vf)
>      }
>  }
>
> -static void copy_from_image_yuv422(struct vf_instance *vf,
> -                                   int first_row, int last_row)
> +static void copy_from_image_yuv422(struct vf_instance *vf)
>  {
> -    unsigned char *dirty_rows = vf->priv->dirty_rows;
> +    struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
>      int src_stride = vf->dmpi->stride[0];
>      int dst_stride = vf->priv->outw;
> -    unsigned char *src = vf->dmpi->planes[0] + first_row * src_stride;
> +    unsigned char *src = vf->dmpi->planes[0];
>      unsigned char **dst = vf->priv->planes;
> -    int dst_off = first_row * dst_stride;
> +    int dst_off = 0;
>      int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
> -    int i, j, k;
> -
> -    for (i = first_row; i < last_row; ++i) {
> -        int next_off = dst_off + dst_stride;
> -        if (!dirty_rows[i]) {
> -            if (is_uyvy) {
> -                for (j = dst_off, k = 0; j < next_off; j += 2, k += 4) {
> -                    dst[0][j    ] = src[k + 1];
> -                    dst[0][j + 1] = src[k + 3];
> -                    dst[1][j    ] = src[k    ];
> -                    dst[1][j + 1] = src[k    ];
> -                    dst[2][j    ] = src[k + 2];
> -                    dst[2][j + 1] = src[k + 2];
> -                }
> -            } else {
> -                for (j = dst_off, k = 0; j < next_off; j += 2, k += 4) {
> -                    dst[0][j    ] = src[k    ];
> -                    dst[0][j + 1] = src[k + 2];
> -                    dst[1][j    ] = src[k + 1];
> -                    dst[1][j + 1] = src[k + 1];
> -                    dst[2][j    ] = src[k + 3];
> -                    dst[2][j + 1] = src[k + 3];
> -                }
> -            }
> +    int x, y;
> +
> +    for (y = 0; y < vf->priv->outh; y++) {
> +        int xmin = dirty_rows[y].xmin;
> +        int xmax = dirty_rows[y].xmax;
> +        int width = (xmax - xmin) >> 1;
> +        unsigned char *srccur = src + (xmin << 1);
> +        int dstcur = dst_off + xmin;
> +
> +        for (x = 0; x < width; x++) {
> +            dst[0][dstcur + 0] = srccur[0 + is_uyvy];
> +            dst[0][dstcur + 1] = srccur[2 + is_uyvy];
> +            dst[1][dstcur + 0] =
> +            dst[1][dstcur + 1] = srccur[1 - is_uyvy];
> +            dst[2][dstcur + 0] =
> +            dst[2][dstcur + 1] = srccur[3 - is_uyvy];
> +            srccur += 4;
> +            dstcur += 2;
>          }
> -        src += src_stride;
> -        dst_off = next_off;
> +        src     += src_stride;
> +        dst_off += dst_stride;
>      }
> -    for (i = first_row; i < last_row; ++i)
> -        dirty_rows[i] = 1;
>  }
>
>  static void copy_to_image_yuv422(struct vf_instance *vf)
>  {
> -    unsigned char *dirty_rows = vf->priv->dirty_rows;
> +    struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
>      int src_stride = vf->priv->outw;
>      int dst_stride = vf->dmpi->stride[0];
> -    int height = vf->priv->outh;
>      unsigned char **src = vf->priv->planes;
>      unsigned char *dst = vf->dmpi->planes[0];
>      int src_off = 0;
>      int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
> -    int i, j, k;
> +    int x, y;
> +
> +    for (y = 0; y < vf->priv->outh; y++) {
> +        int xmin = dirty_rows[y].xmin;
> +        int xmax = dirty_rows[y].xmax;
> +        int width = (xmax - xmin) >> 1;
> +        int srccur = src_off + xmin;
> +        unsigned char *dstcur = dst + (xmin << 1);
>
> -    for (i = 0; i < height; ++i) {
> -        int next_off = src_off + src_stride;
> -        if (*dirty_rows++) {
>  #define AVERAGE(a, b) (((unsigned)(a) + (unsigned)(b)) >> 1)
> -            if (is_uyvy) {
> -                for (j = src_off, k = 0; j < next_off; j += 2, k += 4) {
> -                    dst[k    ] = AVERAGE(src[1][j], src[1][j + 1]);
> -                    dst[k + 1] = src[0][j];
> -                    dst[k + 2] = AVERAGE(src[2][j], src[2][j + 1]);
> -                    dst[k + 3] = src[0][j + 1];
> -                }
> -            } else {
> -                for (j = src_off, k = 0; j < next_off; j += 2, k += 4) {
> -                    dst[k    ] = src[0][j];
> -                    dst[k + 1] = AVERAGE(src[1][j], src[1][j + 1]);
> -                    dst[k + 2] = src[0][j + 1];
> -                    dst[k + 3] = AVERAGE(src[2][j], src[2][j + 1]);
> -                }
> -            }
> -#undef AVERAGE
> +        for (x = 0; x < width; x++) {
> +            dstcur[0 + is_uyvy] = src[0][srccur + 0];
> +            dstcur[2 + is_uyvy] = src[0][srccur + 1];
> +            dstcur[1 - is_uyvy] = AVERAGE(src[1][srccur], src[1][srccur +
> 1]);
> +            dstcur[3 - is_uyvy] = AVERAGE(src[2][srccur], src[2][srccur +
> 1]);
> +            srccur += 2;
> +            dstcur += 4;
>          }
> -        src_off = next_off;
> -        dst += dst_stride;
> +#undef AVERAGE
> +        src_off += src_stride;
> +        dst     += dst_stride;
>      }
>  }
>
> @@ -475,12 +495,12 @@ static void render_frame(struct vf_instance *vf,
> mp_image_t *mpi,
>      copy_from_image_func copy_from_image = vf->priv->copy_from_image;
>      copy_to_image_func copy_to_image = vf->priv->copy_to_image;
>
> +    copy_from_image(vf);
>      img = eosd_image_first(images);
>      if (!img)
>          return;
>          memset(vf->priv->dirty_rows, 0, vf->priv->outh);        // reset
> dirty rows
>          while (img) {
> -            copy_from_image(vf, img->dst_y, img->dst_y + img->h);
>              my_draw_bitmap(vf, img->bitmap, img->w, img->h, img->stride,
>                             img->dst_x, img->dst_y, img->color);
>              img = eosd_image_next(images);
> @@ -493,6 +513,8 @@ static int put_image(struct vf_instance *vf,
> mp_image_t *mpi, double pts)
>      struct mp_eosd_image_list images;
>      eosd_render_frame(pts, &images);
>      prepare_image(vf, mpi);
> +    if (images.changed)
> +        compute_dirty_extents(vf, &images);
>      render_frame(vf, mpi, &images);
>      return vf_next_put_image(vf, vf->dmpi, pts);
>  }
> --
> 1.7.10.4

This patch seems to break the original vf_ass. At least it doesn't
work correctly for me. Comparision images have been attached.
shot0001.jpg from current vf_ass,
shot0002.jpg from vf_ass with your patch.

However, you provide me with some good ideas. I would like to modify
the whole vf_ass a lot to improve its performance with the idea
initially for vf_ass2. I will send the patch as soon as I think it be
worth such a big change.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: shot0001.jpg
Type: image/jpeg
Size: 40486 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/mplayer-dev-eng/attachments/20121005/e2b52153/attachment-0002.jpg>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: shot0002.jpg
Type: image/jpeg
Size: 44001 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/mplayer-dev-eng/attachments/20121005/e2b52153/attachment-0003.jpg>