[MPlayer-dev-eng] [PATCH] vf_ass: reduce useless up/downsampling.
Xidorn Quan
quanxunzhen at gmail.com
Fri Oct 5 13:40:42 CEST 2012
On Wed, Oct 3, 2012 at 6:19 PM, Nicolas George <
nicolas.george at normalesup.org> wrote:
> For each line of pixels, compute the leftmost and rightmost
> affected pixels and only up/downsample this interval.
>
> The extents is computed only once when the ASS layout changes.
>
> Speed += ~15% for a typical subtitle with a long and a short line.
> ---
> libmpcodecs/vf_ass.c | 232
> +++++++++++++++++++++++++++-----------------------
> 1 file changed, 127 insertions(+), 105 deletions(-)
>
>
> Note that this filter has still a few policy flaws, such as unchecked
> mallocs, but this is unrelated to this patch.
>
>
> diff --git a/libmpcodecs/vf_ass.c b/libmpcodecs/vf_ass.c
> index d69cc5f..939cc92 100644
> --- a/libmpcodecs/vf_ass.c
> +++ b/libmpcodecs/vf_ass.c
> @@ -52,8 +52,7 @@
> #define rgba2u(c) ( ((-152*_r(c) - 298*_g(c) + 450*_b(c)) >> 10) + 128 )
> #define rgba2v(c) ( (( 450*_r(c) - 376*_g(c) - 73*_b(c)) >> 10) + 128 )
>
> -typedef void (*copy_from_image_func)(struct vf_instance *vf,
> - int first_row, int last_row);
> +typedef void (*copy_from_image_func)(struct vf_instance *vf);
> typedef void (*copy_to_image_func)(struct vf_instance *vf);
>
> static const struct vf_priv_s {
> @@ -61,21 +60,24 @@ static const struct vf_priv_s {
>
> int is_planar;
> unsigned int outfmt;
> + int sub_y;
>
> // 1 = auto-added filter: insert only if chain does not support EOSD
> already
> // 0 = insert always
> int auto_insert;
>
> unsigned char *planes[3];
> - unsigned char *dirty_rows;
> + struct dirty_row_extent {
> + int xmin, xmax;
> + } *dirty_rows;
>
> copy_from_image_func copy_from_image;
> copy_to_image_func copy_to_image;
> } vf_priv_dflt;
>
> -static void copy_from_image_yuv420p(struct vf_instance *, int, int);
> +static void copy_from_image_yuv420p(struct vf_instance *);
> static void copy_to_image_yuv420p(struct vf_instance *);
> -static void copy_from_image_yuv422(struct vf_instance *, int, int);
> +static void copy_from_image_yuv422(struct vf_instance *);
> static void copy_to_image_yuv422(struct vf_instance *);
>
> static int config(struct vf_instance *vf,
> @@ -91,12 +93,14 @@ static int config(struct vf_instance *vf,
> vf->priv->is_planar = 1;
> vf->priv->copy_from_image = copy_from_image_yuv420p;
> vf->priv->copy_to_image = copy_to_image_yuv420p;
> + vf->priv->sub_y = 1;
> break;
> case IMGFMT_UYVY:
> case IMGFMT_YUY2:
> vf->priv->is_planar = 0;
> vf->priv->copy_from_image = copy_from_image_yuv422;
> vf->priv->copy_to_image = copy_to_image_yuv422;
> + vf->priv->sub_y = 0;
> break;
> default:
> return 0;
> @@ -115,7 +119,8 @@ static int config(struct vf_instance *vf,
> vf->priv->planes[0] = malloc(vf->priv->outw * vf->priv->outh);
> vf->priv->planes[1] = malloc(vf->priv->outw * vf->priv->outh);
> vf->priv->planes[2] = malloc(vf->priv->outw * vf->priv->outh);
> - vf->priv->dirty_rows = malloc(vf->priv->outh);
> + vf->priv->dirty_rows = calloc(vf->priv->outh,
> + sizeof(*vf->priv->dirty_rows));
>
> res.w = vf->priv->outw;
> res.h = vf->priv->outh;
> @@ -272,50 +277,74 @@ static int prepare_image(struct vf_instance *vf,
> mp_image_t *mpi)
> return 0;
> }
>
> +static void compute_dirty_extents(struct vf_instance *vf,
> + struct mp_eosd_image_list *images)
> +{
> + struct mp_eosd_image *img;
> + int xmin, xmax, ymin, ymax, y;
> + struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
> +
> + for (y = 0; y < vf->priv->outh; y++) {
> + dirty_rows[y].xmin = vf->priv->outw;
> + dirty_rows[y].xmax = 0;
> + }
> +
> + img = eosd_image_first(images);
> + while (img) {
> + xmin = FFMAX(0, img->dst_x);
> + xmax = FFMIN(vf->priv->outw, img->dst_x + img->w);
> + ymin = FFMAX(0, img->dst_y);
> + ymax = FFMIN(vf->priv->outh, img->dst_y + img->h);
> + xmin = (xmin + 0) & ~1;
> + xmax = (xmax + 1) & ~1;
> + if (vf->priv->sub_y) {
> + ymin = (ymin + 0) & ~1;
> + ymax = (ymax + 1) & ~1;
> + }
> + for (y = ymin; y < ymax; y++) {
> + dirty_rows[y].xmin = FFMIN(dirty_rows[y].xmin, xmin);
> + dirty_rows[y].xmax = FFMAX(dirty_rows[y].xmax, xmax);
> + }
> + img = eosd_image_next(images);
> + }
> +}
> +
> /**
> * \brief Copy specified rows from render_context.dmpi to
> render_context.planes, upsampling to 4:4:4
> */
> -static void copy_from_image_yuv420p(struct vf_instance *vf, int first_row,
> - int last_row)
> +static void copy_from_image_yuv420p(struct vf_instance *vf)
> {
> - int pl;
> - int i, j, k;
> - unsigned char val;
> - int chroma_rows;
> -
> - first_row -= (first_row % 2);
> - last_row += (last_row % 2);
> - chroma_rows = (last_row - first_row) / 2;
> -
> - assert(first_row >= 0);
> - assert(first_row <= last_row);
> - assert(last_row <= vf->priv->outh);
> + int pl, y, x;
>
> for (pl = 1; pl < 3; ++pl) {
> int dst_stride = vf->priv->outw;
> int src_stride = vf->dmpi->stride[pl];
>
> - unsigned char *src = vf->dmpi->planes[pl] + (first_row / 2)
> * src_stride;
> - unsigned char *dst = vf->priv->planes[pl] + first_row
> * dst_stride;
> + unsigned char *src = vf->dmpi->planes[pl];
> + unsigned char *dst = vf->priv->planes[pl];
> unsigned char *dst_next = dst + dst_stride;
> - for (i = 0; i < chroma_rows; ++i) {
> - if ((vf->priv->dirty_rows[first_row + i * 2 ] == 0) ||
> - (vf->priv->dirty_rows[first_row + i * 2 + 1] == 0)) {
> - for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k +=
> 2) {
> - val = *(src + j);
> - *(dst + k ) = val;
> - *(dst + k + 1) = val;
> - *(dst_next + k ) = val;
> - *(dst_next + k + 1) = val;
> - }
> + struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
> +
> + for (y = 0; y < vf->priv->outh; y += 2) {
> + int xmin = dirty_rows->xmin;
> + int xmax = dirty_rows->xmax;
> + int width = (xmax - xmin) >> 1;
> + unsigned char *srccur = src + (xmin >> 1);
> + unsigned char *dstcur1 = dst + xmin;
> + unsigned char *dstcur2 = dst_next + xmin;
> +
> + for (x = 0; x < width; x++) {
> + dstcur1[0] = dstcur1[1] = dstcur2[0] = dstcur2[1] =
> *srccur;
> + srccur++;
> + dstcur1 += 2;
> + dstcur2 += 2;
> }
> src += src_stride;
> dst = dst_next + dst_stride;
> dst_next = dst + dst_stride;
> + dirty_rows += 2;
> }
> }
> - for (i = first_row; i < last_row; ++i)
> - vf->priv->dirty_rows[i] = 1;
> }
>
> /**
> @@ -323,8 +352,8 @@ static void copy_from_image_yuv420p(struct vf_instance
> *vf, int first_row,
> */
> static void copy_to_image_yuv420p(struct vf_instance *vf)
> {
> - int pl;
> - int i, j, k;
> + int pl, x, y;
> +
> for (pl = 1; pl < 3; ++pl) {
> int dst_stride = vf->dmpi->stride[pl];
> int src_stride = vf->priv->outw;
> @@ -332,17 +361,22 @@ static void copy_to_image_yuv420p(struct vf_instance
> *vf)
> unsigned char *dst = vf->dmpi->planes[pl];
> unsigned char *src = vf->priv->planes[pl];
> unsigned char *src_next = vf->priv->planes[pl] + src_stride;
> - for (i = 0; i < vf->priv->outh / 2; ++i) {
> - if ((vf->priv->dirty_rows[i * 2] == 1)) {
> - assert(vf->priv->dirty_rows[i * 2 + 1] == 1);
> - for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k +=
> 2) {
> - unsigned val = 0;
> - val += *(src + k);
> - val += *(src + k + 1);
> - val += *(src_next + k);
> - val += *(src_next + k + 1);
> - *(dst + j) = val >> 2;
> - }
> + struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
> +
> + for (y = 0; y < vf->priv->outh; y += 2) {
> + int xmin = dirty_rows->xmin;
> + int xmax = dirty_rows->xmax;
> + int width = (xmax - xmin) >> 1;
> + unsigned char *srccur1 = src + xmin;
> + unsigned char *srccur2 = src_next + xmin;
> + unsigned char *dstcur = dst + (xmin >> 1);
> +
> + for (x = 0; x < width; x++) {
> + *dst = (srccur1[0] + srccur1[1] +
> + srccur2[0] + srccur2[1] + 2) >> 2;
> + srccur1 += 2;
> + srccur2 += 2;
> + dstcur++;
> }
> dst += dst_stride;
> src = src_next + src_stride;
> @@ -351,83 +385,69 @@ static void copy_to_image_yuv420p(struct vf_instance
> *vf)
> }
> }
>
> -static void copy_from_image_yuv422(struct vf_instance *vf,
> - int first_row, int last_row)
> +static void copy_from_image_yuv422(struct vf_instance *vf)
> {
> - unsigned char *dirty_rows = vf->priv->dirty_rows;
> + struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
> int src_stride = vf->dmpi->stride[0];
> int dst_stride = vf->priv->outw;
> - unsigned char *src = vf->dmpi->planes[0] + first_row * src_stride;
> + unsigned char *src = vf->dmpi->planes[0];
> unsigned char **dst = vf->priv->planes;
> - int dst_off = first_row * dst_stride;
> + int dst_off = 0;
> int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
> - int i, j, k;
> -
> - for (i = first_row; i < last_row; ++i) {
> - int next_off = dst_off + dst_stride;
> - if (!dirty_rows[i]) {
> - if (is_uyvy) {
> - for (j = dst_off, k = 0; j < next_off; j += 2, k += 4) {
> - dst[0][j ] = src[k + 1];
> - dst[0][j + 1] = src[k + 3];
> - dst[1][j ] = src[k ];
> - dst[1][j + 1] = src[k ];
> - dst[2][j ] = src[k + 2];
> - dst[2][j + 1] = src[k + 2];
> - }
> - } else {
> - for (j = dst_off, k = 0; j < next_off; j += 2, k += 4) {
> - dst[0][j ] = src[k ];
> - dst[0][j + 1] = src[k + 2];
> - dst[1][j ] = src[k + 1];
> - dst[1][j + 1] = src[k + 1];
> - dst[2][j ] = src[k + 3];
> - dst[2][j + 1] = src[k + 3];
> - }
> - }
> + int x, y;
> +
> + for (y = 0; y < vf->priv->outh; y++) {
> + int xmin = dirty_rows[y].xmin;
> + int xmax = dirty_rows[y].xmax;
> + int width = (xmax - xmin) >> 1;
> + unsigned char *srccur = src + (xmin << 1);
> + int dstcur = dst_off + xmin;
> +
> + for (x = 0; x < width; x++) {
> + dst[0][dstcur + 0] = srccur[0 + is_uyvy];
> + dst[0][dstcur + 1] = srccur[2 + is_uyvy];
> + dst[1][dstcur + 0] =
> + dst[1][dstcur + 1] = srccur[1 - is_uyvy];
> + dst[2][dstcur + 0] =
> + dst[2][dstcur + 1] = srccur[3 - is_uyvy];
> + srccur += 4;
> + dstcur += 2;
> }
> - src += src_stride;
> - dst_off = next_off;
> + src += src_stride;
> + dst_off += dst_stride;
> }
> - for (i = first_row; i < last_row; ++i)
> - dirty_rows[i] = 1;
> }
>
> static void copy_to_image_yuv422(struct vf_instance *vf)
> {
> - unsigned char *dirty_rows = vf->priv->dirty_rows;
> + struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
> int src_stride = vf->priv->outw;
> int dst_stride = vf->dmpi->stride[0];
> - int height = vf->priv->outh;
> unsigned char **src = vf->priv->planes;
> unsigned char *dst = vf->dmpi->planes[0];
> int src_off = 0;
> int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
> - int i, j, k;
> + int x, y;
> +
> + for (y = 0; y < vf->priv->outh; y++) {
> + int xmin = dirty_rows[y].xmin;
> + int xmax = dirty_rows[y].xmax;
> + int width = (xmax - xmin) >> 1;
> + int srccur = src_off + xmin;
> + unsigned char *dstcur = dst + (xmin << 1);
>
> - for (i = 0; i < height; ++i) {
> - int next_off = src_off + src_stride;
> - if (*dirty_rows++) {
> #define AVERAGE(a, b) (((unsigned)(a) + (unsigned)(b)) >> 1)
> - if (is_uyvy) {
> - for (j = src_off, k = 0; j < next_off; j += 2, k += 4) {
> - dst[k ] = AVERAGE(src[1][j], src[1][j + 1]);
> - dst[k + 1] = src[0][j];
> - dst[k + 2] = AVERAGE(src[2][j], src[2][j + 1]);
> - dst[k + 3] = src[0][j + 1];
> - }
> - } else {
> - for (j = src_off, k = 0; j < next_off; j += 2, k += 4) {
> - dst[k ] = src[0][j];
> - dst[k + 1] = AVERAGE(src[1][j], src[1][j + 1]);
> - dst[k + 2] = src[0][j + 1];
> - dst[k + 3] = AVERAGE(src[2][j], src[2][j + 1]);
> - }
> - }
> -#undef AVERAGE
> + for (x = 0; x < width; x++) {
> + dstcur[0 + is_uyvy] = src[0][srccur + 0];
> + dstcur[2 + is_uyvy] = src[0][srccur + 1];
> + dstcur[1 - is_uyvy] = AVERAGE(src[1][srccur], src[1][srccur +
> 1]);
> + dstcur[3 - is_uyvy] = AVERAGE(src[2][srccur], src[2][srccur +
> 1]);
> + srccur += 2;
> + dstcur += 4;
> }
> - src_off = next_off;
> - dst += dst_stride;
> +#undef AVERAGE
> + src_off += src_stride;
> + dst += dst_stride;
> }
> }
>
> @@ -475,12 +495,12 @@ static void render_frame(struct vf_instance *vf,
> mp_image_t *mpi,
> copy_from_image_func copy_from_image = vf->priv->copy_from_image;
> copy_to_image_func copy_to_image = vf->priv->copy_to_image;
>
> + copy_from_image(vf);
> img = eosd_image_first(images);
> if (!img)
> return;
> memset(vf->priv->dirty_rows, 0, vf->priv->outh); // reset
> dirty rows
> while (img) {
> - copy_from_image(vf, img->dst_y, img->dst_y + img->h);
> my_draw_bitmap(vf, img->bitmap, img->w, img->h, img->stride,
> img->dst_x, img->dst_y, img->color);
> img = eosd_image_next(images);
> @@ -493,6 +513,8 @@ static int put_image(struct vf_instance *vf,
> mp_image_t *mpi, double pts)
> struct mp_eosd_image_list images;
> eosd_render_frame(pts, &images);
> prepare_image(vf, mpi);
> + if (images.changed)
> + compute_dirty_extents(vf, &images);
> render_frame(vf, mpi, &images);
> return vf_next_put_image(vf, vf->dmpi, pts);
> }
> --
> 1.7.10.4
This patch seems to break the original vf_ass. At least it doesn't
work correctly for me. Comparision images have been attached.
shot0001.jpg from current vf_ass,
shot0002.jpg from vf_ass with your patch.
However, you provide me with some good ideas. I would like to modify
the whole vf_ass a lot to improve its performance with the idea
initially for vf_ass2. I will send the patch as soon as I think it be
worth such a big change.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: shot0001.jpg
Type: image/jpeg
Size: 40486 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/mplayer-dev-eng/attachments/20121005/e2b52153/attachment-0002.jpg>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: shot0002.jpg
Type: image/jpeg
Size: 44001 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/mplayer-dev-eng/attachments/20121005/e2b52153/attachment-0003.jpg>
More information about the MPlayer-dev-eng
mailing list