[MPlayer-dev-eng] [PATCH] vf_ass: reduce useless up/downsampling.
Nicolas George
nicolas.george at normalesup.org
Wed Oct 3 12:19:06 CEST 2012
For each line of pixels, compute the leftmost and rightmost
affected pixels and only up/downsample this interval.
The extents is computed only once when the ASS layout changes.
Speed += ~15% for a typical subtitle with a long and a short line.
---
libmpcodecs/vf_ass.c | 232 +++++++++++++++++++++++++++-----------------------
1 file changed, 127 insertions(+), 105 deletions(-)
Note that this filter has still a few policy flaws, such as unchecked
mallocs, but this is unrelated to this patch.
diff --git a/libmpcodecs/vf_ass.c b/libmpcodecs/vf_ass.c
index d69cc5f..939cc92 100644
--- a/libmpcodecs/vf_ass.c
+++ b/libmpcodecs/vf_ass.c
@@ -52,8 +52,7 @@
#define rgba2u(c) ( ((-152*_r(c) - 298*_g(c) + 450*_b(c)) >> 10) + 128 )
#define rgba2v(c) ( (( 450*_r(c) - 376*_g(c) - 73*_b(c)) >> 10) + 128 )
-typedef void (*copy_from_image_func)(struct vf_instance *vf,
- int first_row, int last_row);
+typedef void (*copy_from_image_func)(struct vf_instance *vf);
typedef void (*copy_to_image_func)(struct vf_instance *vf);
static const struct vf_priv_s {
@@ -61,21 +60,24 @@ static const struct vf_priv_s {
int is_planar;
unsigned int outfmt;
+ int sub_y;
// 1 = auto-added filter: insert only if chain does not support EOSD already
// 0 = insert always
int auto_insert;
unsigned char *planes[3];
- unsigned char *dirty_rows;
+ struct dirty_row_extent {
+ int xmin, xmax;
+ } *dirty_rows;
copy_from_image_func copy_from_image;
copy_to_image_func copy_to_image;
} vf_priv_dflt;
-static void copy_from_image_yuv420p(struct vf_instance *, int, int);
+static void copy_from_image_yuv420p(struct vf_instance *);
static void copy_to_image_yuv420p(struct vf_instance *);
-static void copy_from_image_yuv422(struct vf_instance *, int, int);
+static void copy_from_image_yuv422(struct vf_instance *);
static void copy_to_image_yuv422(struct vf_instance *);
static int config(struct vf_instance *vf,
@@ -91,12 +93,14 @@ static int config(struct vf_instance *vf,
vf->priv->is_planar = 1;
vf->priv->copy_from_image = copy_from_image_yuv420p;
vf->priv->copy_to_image = copy_to_image_yuv420p;
+ vf->priv->sub_y = 1;
break;
case IMGFMT_UYVY:
case IMGFMT_YUY2:
vf->priv->is_planar = 0;
vf->priv->copy_from_image = copy_from_image_yuv422;
vf->priv->copy_to_image = copy_to_image_yuv422;
+ vf->priv->sub_y = 0;
break;
default:
return 0;
@@ -115,7 +119,8 @@ static int config(struct vf_instance *vf,
vf->priv->planes[0] = malloc(vf->priv->outw * vf->priv->outh);
vf->priv->planes[1] = malloc(vf->priv->outw * vf->priv->outh);
vf->priv->planes[2] = malloc(vf->priv->outw * vf->priv->outh);
- vf->priv->dirty_rows = malloc(vf->priv->outh);
+ vf->priv->dirty_rows = calloc(vf->priv->outh,
+ sizeof(*vf->priv->dirty_rows));
res.w = vf->priv->outw;
res.h = vf->priv->outh;
@@ -272,50 +277,74 @@ static int prepare_image(struct vf_instance *vf, mp_image_t *mpi)
return 0;
}
+static void compute_dirty_extents(struct vf_instance *vf,
+ struct mp_eosd_image_list *images)
+{
+ struct mp_eosd_image *img;
+ int xmin, xmax, ymin, ymax, y;
+ struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
+
+ for (y = 0; y < vf->priv->outh; y++) {
+ dirty_rows[y].xmin = vf->priv->outw;
+ dirty_rows[y].xmax = 0;
+ }
+
+ img = eosd_image_first(images);
+ while (img) {
+ xmin = FFMAX(0, img->dst_x);
+ xmax = FFMIN(vf->priv->outw, img->dst_x + img->w);
+ ymin = FFMAX(0, img->dst_y);
+ ymax = FFMIN(vf->priv->outh, img->dst_y + img->h);
+ xmin = (xmin + 0) & ~1;
+ xmax = (xmax + 1) & ~1;
+ if (vf->priv->sub_y) {
+ ymin = (ymin + 0) & ~1;
+ ymax = (ymax + 1) & ~1;
+ }
+ for (y = ymin; y < ymax; y++) {
+ dirty_rows[y].xmin = FFMIN(dirty_rows[y].xmin, xmin);
+ dirty_rows[y].xmax = FFMAX(dirty_rows[y].xmax, xmax);
+ }
+ img = eosd_image_next(images);
+ }
+}
+
/**
* \brief Copy specified rows from render_context.dmpi to render_context.planes, upsampling to 4:4:4
*/
-static void copy_from_image_yuv420p(struct vf_instance *vf, int first_row,
- int last_row)
+static void copy_from_image_yuv420p(struct vf_instance *vf)
{
- int pl;
- int i, j, k;
- unsigned char val;
- int chroma_rows;
-
- first_row -= (first_row % 2);
- last_row += (last_row % 2);
- chroma_rows = (last_row - first_row) / 2;
-
- assert(first_row >= 0);
- assert(first_row <= last_row);
- assert(last_row <= vf->priv->outh);
+ int pl, y, x;
for (pl = 1; pl < 3; ++pl) {
int dst_stride = vf->priv->outw;
int src_stride = vf->dmpi->stride[pl];
- unsigned char *src = vf->dmpi->planes[pl] + (first_row / 2) * src_stride;
- unsigned char *dst = vf->priv->planes[pl] + first_row * dst_stride;
+ unsigned char *src = vf->dmpi->planes[pl];
+ unsigned char *dst = vf->priv->planes[pl];
unsigned char *dst_next = dst + dst_stride;
- for (i = 0; i < chroma_rows; ++i) {
- if ((vf->priv->dirty_rows[first_row + i * 2 ] == 0) ||
- (vf->priv->dirty_rows[first_row + i * 2 + 1] == 0)) {
- for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k += 2) {
- val = *(src + j);
- *(dst + k ) = val;
- *(dst + k + 1) = val;
- *(dst_next + k ) = val;
- *(dst_next + k + 1) = val;
- }
+ struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
+
+ for (y = 0; y < vf->priv->outh; y += 2) {
+ int xmin = dirty_rows->xmin;
+ int xmax = dirty_rows->xmax;
+ int width = (xmax - xmin) >> 1;
+ unsigned char *srccur = src + (xmin >> 1);
+ unsigned char *dstcur1 = dst + xmin;
+ unsigned char *dstcur2 = dst_next + xmin;
+
+ for (x = 0; x < width; x++) {
+ dstcur1[0] = dstcur1[1] = dstcur2[0] = dstcur2[1] = *srccur;
+ srccur++;
+ dstcur1 += 2;
+ dstcur2 += 2;
}
src += src_stride;
dst = dst_next + dst_stride;
dst_next = dst + dst_stride;
+ dirty_rows += 2;
}
}
- for (i = first_row; i < last_row; ++i)
- vf->priv->dirty_rows[i] = 1;
}
/**
@@ -323,8 +352,8 @@ static void copy_from_image_yuv420p(struct vf_instance *vf, int first_row,
*/
static void copy_to_image_yuv420p(struct vf_instance *vf)
{
- int pl;
- int i, j, k;
+ int pl, x, y;
+
for (pl = 1; pl < 3; ++pl) {
int dst_stride = vf->dmpi->stride[pl];
int src_stride = vf->priv->outw;
@@ -332,17 +361,22 @@ static void copy_to_image_yuv420p(struct vf_instance *vf)
unsigned char *dst = vf->dmpi->planes[pl];
unsigned char *src = vf->priv->planes[pl];
unsigned char *src_next = vf->priv->planes[pl] + src_stride;
- for (i = 0; i < vf->priv->outh / 2; ++i) {
- if ((vf->priv->dirty_rows[i * 2] == 1)) {
- assert(vf->priv->dirty_rows[i * 2 + 1] == 1);
- for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k += 2) {
- unsigned val = 0;
- val += *(src + k);
- val += *(src + k + 1);
- val += *(src_next + k);
- val += *(src_next + k + 1);
- *(dst + j) = val >> 2;
- }
+ struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
+
+ for (y = 0; y < vf->priv->outh; y += 2) {
+ int xmin = dirty_rows->xmin;
+ int xmax = dirty_rows->xmax;
+ int width = (xmax - xmin) >> 1;
+ unsigned char *srccur1 = src + xmin;
+ unsigned char *srccur2 = src_next + xmin;
+ unsigned char *dstcur = dst + (xmin >> 1);
+
+ for (x = 0; x < width; x++) {
+ *dst = (srccur1[0] + srccur1[1] +
+ srccur2[0] + srccur2[1] + 2) >> 2;
+ srccur1 += 2;
+ srccur2 += 2;
+ dstcur++;
}
dst += dst_stride;
src = src_next + src_stride;
@@ -351,83 +385,69 @@ static void copy_to_image_yuv420p(struct vf_instance *vf)
}
}
-static void copy_from_image_yuv422(struct vf_instance *vf,
- int first_row, int last_row)
+static void copy_from_image_yuv422(struct vf_instance *vf)
{
- unsigned char *dirty_rows = vf->priv->dirty_rows;
+ struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
int src_stride = vf->dmpi->stride[0];
int dst_stride = vf->priv->outw;
- unsigned char *src = vf->dmpi->planes[0] + first_row * src_stride;
+ unsigned char *src = vf->dmpi->planes[0];
unsigned char **dst = vf->priv->planes;
- int dst_off = first_row * dst_stride;
+ int dst_off = 0;
int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
- int i, j, k;
-
- for (i = first_row; i < last_row; ++i) {
- int next_off = dst_off + dst_stride;
- if (!dirty_rows[i]) {
- if (is_uyvy) {
- for (j = dst_off, k = 0; j < next_off; j += 2, k += 4) {
- dst[0][j ] = src[k + 1];
- dst[0][j + 1] = src[k + 3];
- dst[1][j ] = src[k ];
- dst[1][j + 1] = src[k ];
- dst[2][j ] = src[k + 2];
- dst[2][j + 1] = src[k + 2];
- }
- } else {
- for (j = dst_off, k = 0; j < next_off; j += 2, k += 4) {
- dst[0][j ] = src[k ];
- dst[0][j + 1] = src[k + 2];
- dst[1][j ] = src[k + 1];
- dst[1][j + 1] = src[k + 1];
- dst[2][j ] = src[k + 3];
- dst[2][j + 1] = src[k + 3];
- }
- }
+ int x, y;
+
+ for (y = 0; y < vf->priv->outh; y++) {
+ int xmin = dirty_rows[y].xmin;
+ int xmax = dirty_rows[y].xmax;
+ int width = (xmax - xmin) >> 1;
+ unsigned char *srccur = src + (xmin << 1);
+ int dstcur = dst_off + xmin;
+
+ for (x = 0; x < width; x++) {
+ dst[0][dstcur + 0] = srccur[0 + is_uyvy];
+ dst[0][dstcur + 1] = srccur[2 + is_uyvy];
+ dst[1][dstcur + 0] =
+ dst[1][dstcur + 1] = srccur[1 - is_uyvy];
+ dst[2][dstcur + 0] =
+ dst[2][dstcur + 1] = srccur[3 - is_uyvy];
+ srccur += 4;
+ dstcur += 2;
}
- src += src_stride;
- dst_off = next_off;
+ src += src_stride;
+ dst_off += dst_stride;
}
- for (i = first_row; i < last_row; ++i)
- dirty_rows[i] = 1;
}
static void copy_to_image_yuv422(struct vf_instance *vf)
{
- unsigned char *dirty_rows = vf->priv->dirty_rows;
+ struct dirty_row_extent *dirty_rows = vf->priv->dirty_rows;
int src_stride = vf->priv->outw;
int dst_stride = vf->dmpi->stride[0];
- int height = vf->priv->outh;
unsigned char **src = vf->priv->planes;
unsigned char *dst = vf->dmpi->planes[0];
int src_off = 0;
int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
- int i, j, k;
+ int x, y;
+
+ for (y = 0; y < vf->priv->outh; y++) {
+ int xmin = dirty_rows[y].xmin;
+ int xmax = dirty_rows[y].xmax;
+ int width = (xmax - xmin) >> 1;
+ int srccur = src_off + xmin;
+ unsigned char *dstcur = dst + (xmin << 1);
- for (i = 0; i < height; ++i) {
- int next_off = src_off + src_stride;
- if (*dirty_rows++) {
#define AVERAGE(a, b) (((unsigned)(a) + (unsigned)(b)) >> 1)
- if (is_uyvy) {
- for (j = src_off, k = 0; j < next_off; j += 2, k += 4) {
- dst[k ] = AVERAGE(src[1][j], src[1][j + 1]);
- dst[k + 1] = src[0][j];
- dst[k + 2] = AVERAGE(src[2][j], src[2][j + 1]);
- dst[k + 3] = src[0][j + 1];
- }
- } else {
- for (j = src_off, k = 0; j < next_off; j += 2, k += 4) {
- dst[k ] = src[0][j];
- dst[k + 1] = AVERAGE(src[1][j], src[1][j + 1]);
- dst[k + 2] = src[0][j + 1];
- dst[k + 3] = AVERAGE(src[2][j], src[2][j + 1]);
- }
- }
-#undef AVERAGE
+ for (x = 0; x < width; x++) {
+ dstcur[0 + is_uyvy] = src[0][srccur + 0];
+ dstcur[2 + is_uyvy] = src[0][srccur + 1];
+ dstcur[1 - is_uyvy] = AVERAGE(src[1][srccur], src[1][srccur + 1]);
+ dstcur[3 - is_uyvy] = AVERAGE(src[2][srccur], src[2][srccur + 1]);
+ srccur += 2;
+ dstcur += 4;
}
- src_off = next_off;
- dst += dst_stride;
+#undef AVERAGE
+ src_off += src_stride;
+ dst += dst_stride;
}
}
@@ -475,12 +495,12 @@ static void render_frame(struct vf_instance *vf, mp_image_t *mpi,
copy_from_image_func copy_from_image = vf->priv->copy_from_image;
copy_to_image_func copy_to_image = vf->priv->copy_to_image;
+ copy_from_image(vf);
img = eosd_image_first(images);
if (!img)
return;
memset(vf->priv->dirty_rows, 0, vf->priv->outh); // reset dirty rows
while (img) {
- copy_from_image(vf, img->dst_y, img->dst_y + img->h);
my_draw_bitmap(vf, img->bitmap, img->w, img->h, img->stride,
img->dst_x, img->dst_y, img->color);
img = eosd_image_next(images);
@@ -493,6 +513,8 @@ static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
struct mp_eosd_image_list images;
eosd_render_frame(pts, &images);
prepare_image(vf, mpi);
+ if (images.changed)
+ compute_dirty_extents(vf, &images);
render_frame(vf, mpi, &images);
return vf_next_put_image(vf, vf->dmpi, pts);
}
--
1.7.10.4
More information about the MPlayer-dev-eng
mailing list