[MPlayer-dev-eng] [PATCH] new vf_ass implementation
Xidorn Quan
quanxunzhen at gmail.com
Wed Oct 31 06:00:55 CET 2012
On Mon, Oct 29, 2012 at 8:54 AM, Xidorn Quan <quanxunzhen at gmail.com> wrote:
> On Mon, Oct 29, 2012 at 12:10 AM, Nicolas George <
> nicolas.george at normalesup.org> wrote:
>
>> Le septidi 17 vendémiaire, an CCXXI, Xidorn Quan a écrit :
>> > I modified some code so that it can run faster for planar YUV format.
>> > The filter with this new patch is now never slower than the current
>> > one, though it is only ~3% faster in the worst case (subtitle is
>> > changed every frame, planar YUV format).
>>
>> I will trust you about the benchmark. I still believe it would have been
>> nice to know how much each different optimization contributes to the speed
>> enhancement, but it would probably be clumsy.
>>
>>
>> > Index: libmpcodecs/vf_ass.c
>> > ===================================================================
>> > --- libmpcodecs/vf_ass.c (revision 35233)
>> > +++ libmpcodecs/vf_ass.c (working copy)
>> > @@ -1,5 +1,6 @@
>> > /*
>> > * Copyright (C) 2006 Evgeniy Stepanov <eugeni.stepanov at gmail.com>
>> > + * Copyright (C) 2012 Xidorn Quan <quanxunzhen at gmail.com>
>> > *
>> > * This file is part of MPlayer.
>> > *
>> > @@ -52,9 +53,13 @@
>> > #define rgba2u(c) ( ((-152*_r(c) - 298*_g(c) + 450*_b(c)) >> 10) +
>> 128 )
>> > #define rgba2v(c) ( (( 450*_r(c) - 376*_g(c) - 73*_b(c)) >> 10) +
>> 128 )
>> >
>> > -typedef void (*copy_from_image_func)(struct vf_instance *vf,
>> > - int first_row, int last_row);
>> > -typedef void (*copy_to_image_func)(struct vf_instance *vf);
>> > +#define rshr(v, b) (((v) + (1 << ((b) - 1))) >> (b))
>> > +#define rshr8(v) rshr(v, 8)
>> > +#define rshr24(v) rshr(v, 24)
>>
>> Could have a more explicit name.
>>
>
> Is "round_shr" a better name?
>
>
>> > +/* map 0 - 0xFF => 0 - 0x101 */
>>
>> Nitpick: "=>" means "implies"; a single arrow "->" would be more correct.
>>
>
> OK.
>
>
>> > +#define map_16bit(v) rshr8(0x102 * (v))
>> > +/* map 0 - 0xFF => 0 - 0x10101 */
>> > +#define map_24bit(v) rshr8(0x10203 * (v))
>> >
>> > static const struct vf_priv_s {
>> > int outh, outw;
>> > @@ -66,56 +71,295 @@
>> > // 0 = insert always
>> > int auto_insert;
>> >
>> > - unsigned char *planes[3];
>> > - unsigned char *dirty_rows;
>>
>> > + uint8_t *planes[MP_MAX_PLANES];
>>
>> I would be happier with a comment that states that the contents of these
>> frames are with pre-multiplied alpha.
>>
>
> OK, I'll add a comment.
>
>
>> (Also, I am a bit uncertain about the loss of precision caused by the
>> pre-multiplication, but that is probably negligible.)
>>
>
> I agree that there could be slightly different between the output of
> this and the original code, but it should be really unnoticable and
> trivial.
>
>
>> > + uint8_t *alphas[MP_MAX_PLANES];
>>
>> As far as I can see, this is not alpha, this is transparency. Alpha is
>> opacity: alpha=1 means fully opaque. libass makes the mistake everywhere,
>> but I believe we should prevent it from propagating in new code.
>>
>
> Well, you are right. It was alpha but I changed it for performance.
> I'll rename it.
>
>
>> > + struct dirty_rows_extent {
>> > + int xmin, xmax;
>> > + } *dirty_rows;
>> >
>> > - copy_from_image_func copy_from_image;
>> > - copy_to_image_func copy_to_image;
>> > + // called for every eosd image when subtitle is changed
>> > + void (*draw_image)(vf_instance_t *, struct mp_eosd_image *);
>> > + // called for every time subtitle is changed
>> > + void (*prepare_buffer)(vf_instance_t *);
>> > + // called for every frame
>> > + void (*render_frame)(vf_instance_t *);
>> > } vf_priv_dflt;
>> >
>> > -static void copy_from_image_yuv420p(struct vf_instance *, int, int);
>> > -static void copy_to_image_yuv420p(struct vf_instance *);
>> > -static void copy_from_image_yuv422(struct vf_instance *, int, int);
>> > -static void copy_to_image_yuv422(struct vf_instance *);
>> > +static void draw_image_yuv(vf_instance_t *vf, struct mp_eosd_image
>> *img)
>> > +{
>> > + uint32_t color = img->color;
>> > + uint32_t opacity = 0xFF - _a(color);
>> > + uint8_t y = rgba2y(color),
>> > + u = rgba2u(color),
>> > + v = rgba2v(color);
>> > + int outw = vf->priv->outw,
>> > + outh = vf->priv->outh;
>> > + uint8_t *alpha = vf->priv->alphas[0],
>> > + *dst_y = vf->priv->planes[0],
>> > + *dst_u = vf->priv->planes[1],
>> > + *dst_v = vf->priv->planes[2];
>> > + struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
>> > + int src_x = img->dst_x, src_w = img->w,
>> > + src_y = img->dst_y, src_h = img->h,
>> > + stride = img->stride;
>> > + uint8_t *src = img->bitmap;
>> > + int i, j;
>> >
>> > + opacity = map_24bit(opacity);
>> > + for (i = 0; i < src_h; i++) {
>> > + struct dirty_rows_extent *dirty_row = &dirty_rows[src_y + i];
>> > + dirty_row->xmin = FFMIN(dirty_row->xmin, src_x);
>> > + dirty_row->xmax = FFMAX(dirty_row->xmax, src_x + src_w);
>> > +
>> > + for (j = 0; j < src_w; j++) {
>> > + uint32_t k = src[i * stride + j];
>> > + if (k) {
>> > + size_t p = (src_y + i) * outw + src_x + j;
>> > + k *= opacity;
>> > + alpha[p] = rshr24((0xFFFFFF - k) * alpha[p]);
>> > + dst_y[p] = rshr24((0xFFFFFF - k) * dst_y[p] + k * y);
>> > + dst_u[p] = rshr24((0xFFFFFF - k) * dst_u[p] + k * u);
>> > + dst_v[p] = rshr24((0xFFFFFF - k) * dst_v[p] + k * v);
>> > + }
>> > + }
>> > + }
>> > +}
>> > +
>> > +static void prepare_buffer_422(vf_instance_t *vf)
>> > +{
>> > + uint8_t *dst_u = vf->priv->planes[1],
>> > + *dst_v = vf->priv->planes[2];
>> > + int outw = vf->priv->outw,
>> > + outh = vf->priv->outh;
>> > + struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
>> > + int i, j;
>> > +
>> > + for (i = 0; i < outh; i++) {
>> > + size_t xmin = dirty_rows[i].xmin,
>> > + xmax = dirty_rows[i].xmax;
>>
>> size_t seems exaggerated.
>>
>
> I'll consider this. There was a document said that using a integer
> type with the same width as pointers' in pointer algebra is better for
> performance. But here I just find xmin and xmax seem to be unrelated
> with pointer computing.
>
>
>> > + xmin -= xmin % 2;
>>
>> xmin = dirty_rows[i].xmin & ~1
>>
>> is probably faster.
>>
>
> I think compilers will optimize this for us if they find it faster.
>
> > + for (j = xmin; j < xmax; j += 2) {
>> > + size_t p = i * outw + j;
>> > + dst_u[p] = (dst_u[p] + dst_u[p + 1]) / 2;
>> > + dst_v[p] = (dst_v[p] + dst_v[p + 1]) / 2;
>> > + }
>> > + }
>> > +}
>> > +
>> > +static void render_frame_yuv422(vf_instance_t *vf)
>> > +{
>> > + uint8_t *alpha = vf->priv->alphas[0];
>> > + uint8_t *src_y = vf->priv->planes[0],
>> > + *src_u = vf->priv->planes[1],
>> > + *src_v = vf->priv->planes[2];
>> > + int outw = vf->priv->outw,
>> > + outh = vf->priv->outh;
>> > + struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
>> > + uint8_t *dest = vf->dmpi->planes[0];
>> > + int stride = vf->dmpi->stride[0];
>> > + int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
>> > + int i, j;
>> > +
>> > + for (i = 0; i < outh; i++) {
>> > + size_t xmin = dirty_rows[i].xmin,
>> > + xmax = dirty_rows[i].xmax;
>> > + xmin -= xmin % 2;
>>
>> Same as above for size_t and % 2.
>>
>> > + for (j = xmin; j < xmax; j += 2) {
>> > + size_t src = i * outw + j,
>> > + dst = i * stride + j * 2;
>>
>> Did you try to eliminate the multiplication from the inner loop?
>>
>
> We shouldn't worry about it. Modern compilers can realize that all of
> those variables except j are loop invariable, and they are able to
> eliminate the multiplication themselves. I leave the code to compilers
> because I believe that they can do better than us in this aspect.
>
> > + uint_fast16_t a0 = alpha[src],
>> > + a1 = alpha[src + 1];
>> > + uint8_t y0, y1, u, v;
>> > +
>> > + if (a0 == 0xFF && a1 == 0xFF)
>> > + continue;
>> > +
>> > + y0 = dest[dst + is_uyvy + 0];
>> > + y1 = dest[dst + is_uyvy + 2];
>> > + u = dest[dst - is_uyvy + 1];
>> > + v = dest[dst - is_uyvy + 3];
>> > +
>> > + a0 = map_16bit(a0);
>> > + a1 = map_16bit(a1);
>> > + y0 = ((a0 * y0) >> 8) + src_y[src];
>> > + y1 = ((a1 * y1) >> 8) + src_y[src + 1];
>> > +
>> > + a0 = (a0 + a1) / 2;
>> > + u = ((a0 * u) >> 8) + src_u[src];
>> > + v = ((a0 * v) >> 8) + src_v[src];
>> > +
>> > + dest[dst + is_uyvy + 0] = y0;
>> > + dest[dst + is_uyvy + 2] = y1;
>> > + dest[dst - is_uyvy + 1] = u;
>> > + dest[dst - is_uyvy + 3] = v;
>> > + }
>> > + }
>> > +}
>> > +
>> > +static void prepare_buffer_420p(vf_instance_t *vf)
>> > +{
>> > + int outw = vf->priv->outw,
>> > + outh = vf->priv->outh;
>> > + uint8_t *dst_u = vf->priv->planes[1],
>> > + *dst_v = vf->priv->planes[2];
>> > + uint8_t *src_a = vf->priv->alphas[0],
>> > + *dst_a = vf->priv->alphas[1];
>> > + struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
>> > + int i, j;
>> > +
>> > + for (i = 0; i < outh; i += 2) {
>> > + size_t xmin = FFMIN(dirty_rows[i].xmin, dirty_rows[i +
>> 1].xmin),
>> > + xmax = FFMAX(dirty_rows[i].xmax, dirty_rows[i +
>> 1].xmax);
>> > + xmin -= xmin % 2;
>> > + for (j = xmin; j < xmax; j += 2) {
>> > + size_t p = i * outw / 4 + j / 2,
>> > + q1 = i * outw + j,
>> > + q2 = q1 + outw;
>> > + dst_a[p] = (src_a[q1] + src_a[q1 + 1] +
>> > + src_a[q2] + src_a[q2 + 1] + 2) / 4;
>> > + dst_u[p] = (dst_u[q1] + dst_u[q1 + 1] +
>> > + dst_u[q2] + dst_u[q2 + 1] + 2) / 4;
>> > + dst_v[p] = (dst_v[q1] + dst_v[q1 + 1] +
>> > + dst_v[q2] + dst_v[q2 + 1] + 2) / 4;
>> > + }
>> > + }
>> > +}
>> > +
>> > +static void render_frame_yuv420p(vf_instance_t *vf)
>> > +{
>> > + uint8_t **planes = vf->priv->planes;
>> > + uint8_t **dest = vf->dmpi->planes;
>> > + struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
>> > + uint8_t *alpha;
>> > + uint8_t *src_y = vf->priv->planes[0],
>> > + *src_u = vf->priv->planes[1],
>> > + *src_v = vf->priv->planes[2];
>> > + uint8_t *dst_y = vf->dmpi->planes[0],
>> > + *dst_u = vf->dmpi->planes[1],
>> > + *dst_v = vf->dmpi->planes[2];
>> > + int stride;
>> > + int outw = vf->priv->outw,
>> > + outh = vf->priv->outh;
>> > + int i, j;
>> > +
>> > + // y
>> > + alpha = vf->priv->alphas[0];
>> > + stride = vf->dmpi->stride[0];
>> > + for (i = 0; i < outh; i++) {
>> > + size_t xmin = dirty_rows[i].xmin,
>> > + xmax = dirty_rows[i].xmax;
>> > + for (j = xmin; j < xmax; j++) {
>> > + size_t s = i * outw + j,
>> > + d = i * stride + j;
>> > + if (alpha[s] != 0xFF)
>> > + dst_y[d] = ((map_16bit(alpha[s]) * dst_y[d]) >> 8) +
>> src_y[s];
>> > + }
>> > + }
>> > +
>> > + // u & v
>> > + alpha = vf->priv->alphas[1];
>> > + stride = vf->dmpi->stride[1];
>> > + for (i = 0; i < outh / 2; i++) {
>> > + size_t xmin = FFMIN(dirty_rows[i * 2].xmin, dirty_rows[i * 2 +
>> 1].xmin),
>> > + xmax = FFMAX(dirty_rows[i * 2].xmax, dirty_rows[i * 2 +
>> 1].xmax);
>> > + for (j = xmin / 2; j < (xmax + 1) / 2; j++) {
>> > + size_t s = i * outw / 2 + j,
>> > + d = i * stride + j;
>> > + if (alpha[s] != 0xFF) {
>> > + uint_fast16_t a = map_16bit(alpha[s]);
>> > + dst_u[d] = ((a * dst_u[d]) >> 8) + src_u[s];
>> > + dst_v[d] = ((a * dst_v[d]) >> 8) + src_v[s];
>> > + }
>> > + }
>> > + }
>> > +}
>> > +
>> > +static void clean_buffer(vf_instance_t *vf)
>> > +{
>> > + int outw = vf->priv->outw,
>> > + outh = vf->priv->outh;
>> > + struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
>> > + uint8_t **planes = vf->priv->planes;
>> > + uint8_t *alpha = vf->priv->alphas[0];
>> > + int i, j;
>> > +
>> > + for (i = 0; i < MP_MAX_PLANES; i++) {
>> > + uint8_t *plane = planes[i];
>> > + if (!plane)
>> > + break;
>> > + for (j = 0; j < outh; j++) {
>> > + size_t xmin = dirty_rows[j].xmin;
>> > + ssize_t width = dirty_rows[j].xmax - xmin;
>> > + if (width > 0)
>> > + memset(plane + j * outw + xmin, 0, width);
>> > + }
>> > + }
>> > + for (i = 0; i < outh; i++) {
>> > + size_t xmin = dirty_rows[i].xmin;
>> > + ssize_t width = dirty_rows[i].xmax - xmin;
>>
>> If I am not mistaken, this subtraction is technically invalid: since xmin
>> is
>> size_t, the subtraction is made as an unsigned and then cast to as signed.
>>
>> Using an int for both seems more reasonable. Or comparing them before
>> subtracting them.
>>
>
> I'll check it.
>
>
>> > + if (width > 0)
>> > + memset(alpha + i * outw + xmin, -1, width);
>> > + }
>> > + for (i = 0; i < outh; i++) {
>> > + dirty_rows[i].xmin = outw;
>> > + dirty_rows[i].xmax = 0;
>> > + }
>> > +}
>> > +
>> > static int config(struct vf_instance *vf,
>> > int width, int height, int d_width, int d_height,
>> > unsigned int flags, unsigned int outfmt)
>> > {
>> > struct mp_eosd_settings res = {0};
>> > + struct dirty_rows_extent *dirty_rows;
>> > + int outw, outh;
>> > + int planes, alphas;
>> > + int i;
>> >
>> > switch (outfmt) {
>> > case IMGFMT_YV12:
>> > case IMGFMT_I420:
>> > case IMGFMT_IYUV:
>> > vf->priv->is_planar = 1;
>> > - vf->priv->copy_from_image = copy_from_image_yuv420p;
>> > - vf->priv->copy_to_image = copy_to_image_yuv420p;
>> > + planes = 3;
>> > + alphas = 2;
>> > + vf->priv->draw_image = draw_image_yuv;
>> > + vf->priv->render_frame = render_frame_yuv420p;
>> > + vf->priv->prepare_buffer = prepare_buffer_420p;
>> > break;
>> > case IMGFMT_UYVY:
>> > case IMGFMT_YUY2:
>> > vf->priv->is_planar = 0;
>> > - vf->priv->copy_from_image = copy_from_image_yuv422;
>> > - vf->priv->copy_to_image = copy_to_image_yuv422;
>> > + planes = 3;
>> > + alphas = 1;
>> > + vf->priv->draw_image = draw_image_yuv;
>> > + vf->priv->render_frame = render_frame_yuv422;
>> > + vf->priv->prepare_buffer = prepare_buffer_422;
>> > break;
>> > default:
>> > return 0;
>> > }
>> >
>> > vf->priv->outfmt = outfmt;
>> > - vf->priv->outh = height + ass_top_margin + ass_bottom_margin;
>> > - vf->priv->outw = width;
>> > + vf->priv->outh = outh = height + ass_top_margin +
>> ass_bottom_margin;
>> > + vf->priv->outw = outw = width;
>> >
>> > if (!opt_screen_size_x && !opt_screen_size_y) {
>> > d_width = d_width * vf->priv->outw / width;
>> > d_height = d_height * vf->priv->outh / height;
>> > }
>> >
>> > - if (!vf->priv->is_planar)
>> > - vf->priv->planes[0] = malloc(vf->priv->outw * vf->priv->outh);
>> > - vf->priv->planes[1] = malloc(vf->priv->outw * vf->priv->outh);
>> > - vf->priv->planes[2] = malloc(vf->priv->outw * vf->priv->outh);
>> > - vf->priv->dirty_rows = malloc(vf->priv->outh);
>> > + for (i = 0; i < planes; i++)
>> > + vf->priv->planes[i] = malloc(outw * outh);
>> > + for (i = 0; i < alphas; i++)
>> > + vf->priv->alphas[i] = malloc(outw * outh);
>> > + dirty_rows = malloc(outh * sizeof(*dirty_rows));
>> > + for (i = 0; i < outh; i++) {
>> > + dirty_rows[i].xmin = 0;
>> > + dirty_rows[i].xmax = outw;
>> > + }
>> > + vf->priv->dirty_rows = dirty_rows;
>> > + clean_buffer(vf);
>>
>> Please correct me if I am wrong: clean_buffer() will initialize the planes
>> between the bounds stated by dirty_rows, and then reset dirty_rows, so for
>> the initial clear you init dirty_rows to cover the full image.
>>
>> If that is so, maybe a comment to explain, because it is a bit convoluted.
>>
>
> Your understanding is completely right. I'll add comments.
>
>
>> >
>> > res.w = vf->priv->outw;
>> > res.h = vf->priv->outh;
>> > @@ -272,228 +516,26 @@
>> > return 0;
>> > }
>> >
>> > -/**
>> > - * \brief Copy specified rows from render_context.dmpi to
>> render_context.planes, upsampling to 4:4:4
>> > - */
>> > -static void copy_from_image_yuv420p(struct vf_instance *vf, int
>> first_row,
>> > - int last_row)
>> > +static void prepare_eosd(vf_instance_t *vf, struct mp_eosd_image_list
>> *imgs)
>> > {
>> > - int pl;
>> > - int i, j, k;
>> > - unsigned char val;
>> > - int chroma_rows;
>> > + struct mp_eosd_image *img = eosd_image_first(imgs);
>> > + void (*draw_image)(vf_instance_t *, struct mp_eosd_image *);
>> >
>> > - first_row -= (first_row % 2);
>> > - last_row += (last_row % 2);
>> > - chroma_rows = (last_row - first_row) / 2;
>> > -
>> > - assert(first_row >= 0);
>> > - assert(first_row <= last_row);
>> > - assert(last_row <= vf->priv->outh);
>> > -
>> > - for (pl = 1; pl < 3; ++pl) {
>> > - int dst_stride = vf->priv->outw;
>> > - int src_stride = vf->dmpi->stride[pl];
>> > -
>> > - unsigned char *src = vf->dmpi->planes[pl] + (first_row /
>> 2) * src_stride;
>> > - unsigned char *dst = vf->priv->planes[pl] + first_row
>> * dst_stride;
>> > - unsigned char *dst_next = dst + dst_stride;
>> > - for (i = 0; i < chroma_rows; ++i) {
>> > - if ((vf->priv->dirty_rows[first_row + i * 2 ] == 0) ||
>> > - (vf->priv->dirty_rows[first_row + i * 2 + 1] == 0)) {
>> > - for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k
>> += 2) {
>> > - val = *(src + j);
>> > - *(dst + k ) = val;
>> > - *(dst + k + 1) = val;
>> > - *(dst_next + k ) = val;
>> > - *(dst_next + k + 1) = val;
>> > - }
>> > - }
>> > - src += src_stride;
>> > - dst = dst_next + dst_stride;
>> > - dst_next = dst + dst_stride;
>> > - }
>> > - }
>> > - for (i = first_row; i < last_row; ++i)
>> > - vf->priv->dirty_rows[i] = 1;
>> > + clean_buffer(vf);
>> > + draw_image = vf->priv->draw_image;
>> > + for (; img; img = eosd_image_next(imgs))
>> > + draw_image(vf, img);
>>
>> I am not sure the extra variable is really useful.
>>
>
> It can slightly improve the performance because vf->priv->draw_image
> is not treated as a loop invariable by compilers but draw_image is.
>
>
>> > + vf->priv->prepare_buffer(vf);
>> > }
>> >
>> > -/**
>> > - * \brief Copy all previously copied rows back to render_context.dmpi
>> > - */
>> > -static void copy_to_image_yuv420p(struct vf_instance *vf)
>> > -{
>> > - int pl;
>> > - int i, j, k;
>> > - for (pl = 1; pl < 3; ++pl) {
>> > - int dst_stride = vf->dmpi->stride[pl];
>> > - int src_stride = vf->priv->outw;
>> > -
>> > - unsigned char *dst = vf->dmpi->planes[pl];
>> > - unsigned char *src = vf->priv->planes[pl];
>> > - unsigned char *src_next = vf->priv->planes[pl] + src_stride;
>> > - for (i = 0; i < vf->priv->outh / 2; ++i) {
>> > - if ((vf->priv->dirty_rows[i * 2] == 1)) {
>> > - assert(vf->priv->dirty_rows[i * 2 + 1] == 1);
>> > - for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k
>> += 2) {
>> > - unsigned val = 0;
>> > - val += *(src + k);
>> > - val += *(src + k + 1);
>> > - val += *(src_next + k);
>> > - val += *(src_next + k + 1);
>> > - *(dst + j) = val >> 2;
>> > - }
>> > - }
>> > - dst += dst_stride;
>> > - src = src_next + src_stride;
>> > - src_next = src + src_stride;
>> > - }
>> > - }
>> > -}
>> > -
>> > -static void copy_from_image_yuv422(struct vf_instance *vf,
>> > - int first_row, int last_row)
>> > -{
>> > - unsigned char *dirty_rows = vf->priv->dirty_rows;
>> > - int src_stride = vf->dmpi->stride[0];
>> > - int dst_stride = vf->priv->outw;
>> > - unsigned char *src = vf->dmpi->planes[0] + first_row * src_stride;
>> > - unsigned char **dst = vf->priv->planes;
>> > - int dst_off = first_row * dst_stride;
>> > - int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
>> > - int i, j, k;
>> > -
>> > - for (i = first_row; i < last_row; ++i) {
>> > - int next_off = dst_off + dst_stride;
>> > - if (!dirty_rows[i]) {
>> > - if (is_uyvy) {
>> > - for (j = dst_off, k = 0; j < next_off; j += 2, k += 4)
>> {
>> > - dst[0][j ] = src[k + 1];
>> > - dst[0][j + 1] = src[k + 3];
>> > - dst[1][j ] = src[k ];
>> > - dst[1][j + 1] = src[k ];
>> > - dst[2][j ] = src[k + 2];
>> > - dst[2][j + 1] = src[k + 2];
>> > - }
>> > - } else {
>> > - for (j = dst_off, k = 0; j < next_off; j += 2, k += 4)
>> {
>> > - dst[0][j ] = src[k ];
>> > - dst[0][j + 1] = src[k + 2];
>> > - dst[1][j ] = src[k + 1];
>> > - dst[1][j + 1] = src[k + 1];
>> > - dst[2][j ] = src[k + 3];
>> > - dst[2][j + 1] = src[k + 3];
>> > - }
>> > - }
>> > - }
>> > - src += src_stride;
>> > - dst_off = next_off;
>> > - }
>> > - for (i = first_row; i < last_row; ++i)
>> > - dirty_rows[i] = 1;
>> > -}
>> > -
>> > -static void copy_to_image_yuv422(struct vf_instance *vf)
>> > -{
>> > - unsigned char *dirty_rows = vf->priv->dirty_rows;
>> > - int src_stride = vf->priv->outw;
>> > - int dst_stride = vf->dmpi->stride[0];
>> > - int height = vf->priv->outh;
>> > - unsigned char **src = vf->priv->planes;
>> > - unsigned char *dst = vf->dmpi->planes[0];
>> > - int src_off = 0;
>> > - int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
>> > - int i, j, k;
>> > -
>> > - for (i = 0; i < height; ++i) {
>> > - int next_off = src_off + src_stride;
>> > - if (*dirty_rows++) {
>> > -#define AVERAGE(a, b) (((unsigned)(a) + (unsigned)(b)) >> 1)
>> > - if (is_uyvy) {
>> > - for (j = src_off, k = 0; j < next_off; j += 2, k += 4)
>> {
>> > - dst[k ] = AVERAGE(src[1][j], src[1][j + 1]);
>> > - dst[k + 1] = src[0][j];
>> > - dst[k + 2] = AVERAGE(src[2][j], src[2][j + 1]);
>> > - dst[k + 3] = src[0][j + 1];
>> > - }
>> > - } else {
>> > - for (j = src_off, k = 0; j < next_off; j += 2, k += 4)
>> {
>> > - dst[k ] = src[0][j];
>> > - dst[k + 1] = AVERAGE(src[1][j], src[1][j + 1]);
>> > - dst[k + 2] = src[0][j + 1];
>> > - dst[k + 3] = AVERAGE(src[2][j], src[2][j + 1]);
>> > - }
>> > - }
>> > -#undef AVERAGE
>> > - }
>> > - src_off = next_off;
>> > - dst += dst_stride;
>> > - }
>> > -}
>> > -
>> > -static void my_draw_bitmap(struct vf_instance *vf, unsigned char
>> *bitmap,
>> > - int bitmap_w, int bitmap_h, int stride,
>> > - int dst_x, int dst_y, unsigned color)
>> > -{
>> > - unsigned char y = rgba2y(color);
>> > - unsigned char u = rgba2u(color);
>> > - unsigned char v = rgba2v(color);
>> > - unsigned opacity = 255 - _a(color);
>> > - unsigned char *src, *dsty, *dstu, *dstv;
>> > - int i, j;
>> > - mp_image_t *dmpi = vf->dmpi;
>> > - int stride_y = vf->priv->is_planar ? dmpi->stride[0] :
>> vf->priv->outw;
>> > -
>> > - opacity = (0x10203 * opacity + 0x80) >> 8; /* 0x10203 =
>> (1<<32)/(255*255) */
>> > - /* 0 <= opacity <= 0x10101 */
>> > - src = bitmap;
>> > - dsty = vf->priv->is_planar ? dmpi->planes[0] : vf->priv->planes[0];
>> > - dsty += dst_x + dst_y * stride_y;
>> > - dstu = vf->priv->planes[1] + dst_x + dst_y * vf->priv->outw;
>> > - dstv = vf->priv->planes[2] + dst_x + dst_y * vf->priv->outw;
>> > - for (i = 0; i < bitmap_h; ++i) {
>> > - for (j = 0; j < bitmap_w; ++j) {
>> > - unsigned k = src[j];
>> > - if (!k)
>> > - continue;
>> > - k *= opacity; /* 0 <= k <= 0xFFFFFF */
>> > - dsty[j] = (k * y + (0xFFFFFF - k) * dsty[j] + 0x800000) >>
>> 24;
>> > - dstu[j] = (k * u + (0xFFFFFF - k) * dstu[j] + 0x800000) >>
>> 24;
>> > - dstv[j] = (k * v + (0xFFFFFF - k) * dstv[j] + 0x800000) >>
>> 24;
>> > - }
>> > - src += stride;
>> > - dsty += stride_y;
>> > - dstu += vf->priv->outw;
>> > - dstv += vf->priv->outw;
>> > - }
>> > -}
>> > -
>> > -static void render_frame(struct vf_instance *vf, mp_image_t *mpi,
>> > - struct mp_eosd_image_list *images)
>> > -{
>> > - struct mp_eosd_image *img;
>> > - copy_from_image_func copy_from_image = vf->priv->copy_from_image;
>> > - copy_to_image_func copy_to_image = vf->priv->copy_to_image;
>> > -
>> > - img = eosd_image_first(images);
>> > - if (!img)
>> > - return;
>> > - memset(vf->priv->dirty_rows, 0, vf->priv->outh); //
>> reset dirty rows
>> > - while (img) {
>> > - copy_from_image(vf, img->dst_y, img->dst_y + img->h);
>> > - my_draw_bitmap(vf, img->bitmap, img->w, img->h,
>> img->stride,
>> > - img->dst_x, img->dst_y, img->color);
>> > - img = eosd_image_next(images);
>> > - }
>> > - copy_to_image(vf);
>> > -}
>> > -
>> > static int put_image(struct vf_instance *vf, mp_image_t *mpi, double
>> pts)
>> > {
>> > struct mp_eosd_image_list images;
>> > eosd_render_frame(pts, &images);
>> > prepare_image(vf, mpi);
>> > - render_frame(vf, mpi, &images);
>> > + if (images.changed)
>> > + prepare_eosd(vf, &images);
>> > + vf->priv->render_frame(vf);
>> > return vf_next_put_image(vf, vf->dmpi, pts);
>> > }
>> >
>> > @@ -523,10 +565,13 @@
>> >
>> > static void uninit(struct vf_instance *vf)
>> > {
>> > - if (!vf->priv->is_planar)
>> > - free(vf->priv->planes[0]);
>> > - free(vf->priv->planes[1]);
>> > - free(vf->priv->planes[2]);
>> > + int i;
>> > + for (i = 0; i < MP_MAX_PLANES; i++)
>> > + if (vf->priv->planes[i])
>> > + free(vf->priv->planes[i]);
>> > + for (i = 0; i < MP_MAX_PLANES; i++)
>> > + if (vf->priv->alphas[i])
>> > + free(vf->priv->alphas[i]);
>>
>> No need to check, free(NULL) is a nop.
>>
>
> OK.
>
> > free(vf->priv->dirty_rows);
>> > }
>> >
>> > @@ -579,7 +624,7 @@
>> > const vf_info_t vf_info_ass = {
>> > "Render ASS/SSA subtitles",
>> > "ass",
>> > - "Evgeniy Stepanov",
>> > + "Evgeniy Stepanov, Xidorn Quan",
>> > "",
>> > vf_open,
>> > &vf_opts
>>
>> Sorry for the delay, and thanks for the works.
>>
>
> Thanks for your detailed reviewing. I will modify and send it soon.
>
The modified patch has been attached.
If there are no further comments, I'll apply it in two days.
Regards,
Xidorn Quan
-------------- next part --------------
Index: libmpcodecs/vf_ass.c
===================================================================
--- libmpcodecs/vf_ass.c (revision 35279)
+++ libmpcodecs/vf_ass.c (working copy)
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2006 Evgeniy Stepanov <eugeni.stepanov at gmail.com>
+ * Copyright (C) 2012 Xidorn Quan <quanxunzhen at gmail.com>
*
* This file is part of MPlayer.
*
@@ -52,9 +53,10 @@
#define rgba2u(c) ( ((-152*_r(c) - 298*_g(c) + 450*_b(c)) >> 10) + 128 )
#define rgba2v(c) ( (( 450*_r(c) - 376*_g(c) - 73*_b(c)) >> 10) + 128 )
-typedef void (*copy_from_image_func)(struct vf_instance *vf,
- int first_row, int last_row);
-typedef void (*copy_to_image_func)(struct vf_instance *vf);
+/* map 0 - 0xFF -> 0 - 0x101 */
+#define MAP_16BIT(v) RSHIFT(0x102 * (v), 8)
+/* map 0 - 0xFF -> 0 - 0x10101 */
+#define MAP_24BIT(v) RSHIFT(0x10203 * (v), 8)
static const struct vf_priv_s {
int outh, outw;
@@ -66,56 +68,296 @@
// 0 = insert always
int auto_insert;
- unsigned char *planes[3];
- unsigned char *dirty_rows;
+ // planar data to be directly rendered on frames
+ uint8_t *planes[MP_MAX_PLANES];
+ // alpha here is actually transparency, not opacity
+ uint8_t *alphas[MP_MAX_PLANES];
+ struct dirty_rows_extent {
+ int xmin, xmax;
+ } *dirty_rows;
- copy_from_image_func copy_from_image;
- copy_to_image_func copy_to_image;
+ // called for every eosd image when subtitle is changed
+ void (*draw_image)(vf_instance_t *, struct mp_eosd_image *);
+ // called for every time subtitle is changed
+ void (*prepare_buffer)(vf_instance_t *);
+ // called for every frame
+ void (*render_frame)(vf_instance_t *);
} vf_priv_dflt;
-static void copy_from_image_yuv420p(struct vf_instance *, int, int);
-static void copy_to_image_yuv420p(struct vf_instance *);
-static void copy_from_image_yuv422(struct vf_instance *, int, int);
-static void copy_to_image_yuv422(struct vf_instance *);
+static void draw_image_yuv(vf_instance_t *vf, struct mp_eosd_image *img)
+{
+ uint32_t color = img->color;
+ uint32_t opacity = 0xFF - _a(color);
+ uint8_t y = rgba2y(color),
+ u = rgba2u(color),
+ v = rgba2v(color);
+ int outw = vf->priv->outw,
+ outh = vf->priv->outh;
+ uint8_t *alpha = vf->priv->alphas[0],
+ *dst_y = vf->priv->planes[0],
+ *dst_u = vf->priv->planes[1],
+ *dst_v = vf->priv->planes[2];
+ struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
+ int src_x = img->dst_x, src_w = img->w,
+ src_y = img->dst_y, src_h = img->h,
+ stride = img->stride;
+ uint8_t *src = img->bitmap;
+ int i, j;
+ opacity = MAP_24BIT(opacity);
+ for (i = 0; i < src_h; i++) {
+ struct dirty_rows_extent *dirty_row = &dirty_rows[src_y + i];
+ dirty_row->xmin = FFMIN(dirty_row->xmin, src_x);
+ dirty_row->xmax = FFMAX(dirty_row->xmax, src_x + src_w);
+
+ for (j = 0; j < src_w; j++) {
+ uint32_t k = src[i * stride + j];
+ if (k) {
+ size_t p = (src_y + i) * outw + src_x + j;
+ k *= opacity;
+ alpha[p] = RSHIFT((0xFFFFFF - k) * alpha[p], 24);
+ dst_y[p] = RSHIFT((0xFFFFFF - k) * dst_y[p] + k * y, 24);
+ dst_u[p] = RSHIFT((0xFFFFFF - k) * dst_u[p] + k * u, 24);
+ dst_v[p] = RSHIFT((0xFFFFFF - k) * dst_v[p] + k * v, 24);
+ }
+ }
+ }
+}
+
+static void prepare_buffer_422(vf_instance_t *vf)
+{
+ uint8_t *dst_u = vf->priv->planes[1],
+ *dst_v = vf->priv->planes[2];
+ int outw = vf->priv->outw,
+ outh = vf->priv->outh;
+ struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
+ int i, j;
+
+ for (i = 0; i < outh; i++) {
+ int xmin = dirty_rows[i].xmin & ~1,
+ xmax = dirty_rows[i].xmax;
+ for (j = xmin; j < xmax; j += 2) {
+ size_t p = i * outw + j;
+ dst_u[p] = (dst_u[p] + dst_u[p + 1]) / 2;
+ dst_v[p] = (dst_v[p] + dst_v[p + 1]) / 2;
+ }
+ }
+}
+
+static void render_frame_yuv422(vf_instance_t *vf)
+{
+ uint8_t *alpha = vf->priv->alphas[0];
+ uint8_t *src_y = vf->priv->planes[0],
+ *src_u = vf->priv->planes[1],
+ *src_v = vf->priv->planes[2];
+ int outw = vf->priv->outw,
+ outh = vf->priv->outh;
+ struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
+ uint8_t *dest = vf->dmpi->planes[0];
+ int stride = vf->dmpi->stride[0];
+ int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
+ int i, j;
+
+ for (i = 0; i < outh; i++) {
+ int xmin = dirty_rows[i].xmin & ~1,
+ xmax = dirty_rows[i].xmax;
+ for (j = xmin; j < xmax; j += 2) {
+ size_t src = i * outw + j,
+ dst = i * stride + j * 2;
+ uint_fast16_t a0 = alpha[src],
+ a1 = alpha[src + 1];
+ uint8_t y0, y1, u, v;
+
+ if (a0 == 0xFF && a1 == 0xFF)
+ continue;
+
+ y0 = dest[dst + is_uyvy + 0];
+ y1 = dest[dst + is_uyvy + 2];
+ u = dest[dst - is_uyvy + 1];
+ v = dest[dst - is_uyvy + 3];
+
+ a0 = MAP_16BIT(a0);
+ a1 = MAP_16BIT(a1);
+ y0 = ((a0 * y0) >> 8) + src_y[src];
+ y1 = ((a1 * y1) >> 8) + src_y[src + 1];
+
+ a0 = (a0 + a1) / 2;
+ u = ((a0 * u) >> 8) + src_u[src];
+ v = ((a0 * v) >> 8) + src_v[src];
+
+ dest[dst + is_uyvy + 0] = y0;
+ dest[dst + is_uyvy + 2] = y1;
+ dest[dst - is_uyvy + 1] = u;
+ dest[dst - is_uyvy + 3] = v;
+ }
+ }
+}
+
+static void prepare_buffer_420p(vf_instance_t *vf)
+{
+ int outw = vf->priv->outw,
+ outh = vf->priv->outh;
+ uint8_t *dst_u = vf->priv->planes[1],
+ *dst_v = vf->priv->planes[2];
+ uint8_t *src_a = vf->priv->alphas[0],
+ *dst_a = vf->priv->alphas[1];
+ struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
+ int i, j;
+
+ for (i = 0; i < outh; i += 2) {
+ int xmin = FFMIN(dirty_rows[i].xmin, dirty_rows[i + 1].xmin) & ~1,
+ xmax = FFMAX(dirty_rows[i].xmax, dirty_rows[i + 1].xmax);
+ for (j = xmin; j < xmax; j += 2) {
+ size_t p = i * outw / 4 + j / 2,
+ q1 = i * outw + j,
+ q2 = q1 + outw;
+ dst_a[p] = (src_a[q1] + src_a[q1 + 1] +
+ src_a[q2] + src_a[q2 + 1] + 2) / 4;
+ dst_u[p] = (dst_u[q1] + dst_u[q1 + 1] +
+ dst_u[q2] + dst_u[q2 + 1] + 2) / 4;
+ dst_v[p] = (dst_v[q1] + dst_v[q1 + 1] +
+ dst_v[q2] + dst_v[q2 + 1] + 2) / 4;
+ }
+ }
+}
+
+static void render_frame_yuv420p(vf_instance_t *vf)
+{
+ uint8_t **planes = vf->priv->planes;
+ uint8_t **dest = vf->dmpi->planes;
+ struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
+ uint8_t *alpha;
+ uint8_t *src_y = vf->priv->planes[0],
+ *src_u = vf->priv->planes[1],
+ *src_v = vf->priv->planes[2];
+ uint8_t *dst_y = vf->dmpi->planes[0],
+ *dst_u = vf->dmpi->planes[1],
+ *dst_v = vf->dmpi->planes[2];
+ int stride;
+ int outw = vf->priv->outw,
+ outh = vf->priv->outh;
+ int i, j;
+
+ // y
+ alpha = vf->priv->alphas[0];
+ stride = vf->dmpi->stride[0];
+ for (i = 0; i < outh; i++) {
+ int xmin = dirty_rows[i].xmin,
+ xmax = dirty_rows[i].xmax;
+ for (j = xmin; j < xmax; j++) {
+ size_t s = i * outw + j,
+ d = i * stride + j;
+ if (alpha[s] != 0xFF)
+ dst_y[d] = ((MAP_16BIT(alpha[s]) * dst_y[d]) >> 8) + src_y[s];
+ }
+ }
+
+ // u & v
+ alpha = vf->priv->alphas[1];
+ stride = vf->dmpi->stride[1];
+ for (i = 0; i < outh / 2; i++) {
+ int xmin = FFMIN(dirty_rows[i * 2].xmin, dirty_rows[i * 2 + 1].xmin),
+ xmax = FFMAX(dirty_rows[i * 2].xmax, dirty_rows[i * 2 + 1].xmax);
+ for (j = xmin / 2; j < (xmax + 1) / 2; j++) {
+ size_t s = i * outw / 2 + j,
+ d = i * stride + j;
+ if (alpha[s] != 0xFF) {
+ uint_fast16_t a = MAP_16BIT(alpha[s]);
+ dst_u[d] = ((a * dst_u[d]) >> 8) + src_u[s];
+ dst_v[d] = ((a * dst_v[d]) >> 8) + src_v[s];
+ }
+ }
+ }
+}
+
+static void clean_buffer(vf_instance_t *vf)
+{
+ int outw = vf->priv->outw,
+ outh = vf->priv->outh;
+ struct dirty_rows_extent *dirty_rows = vf->priv->dirty_rows;
+ uint8_t **planes = vf->priv->planes;
+ uint8_t *alpha = vf->priv->alphas[0];
+ int i, j;
+
+ for (i = 0; i < MP_MAX_PLANES; i++) {
+ uint8_t *plane = planes[i];
+ if (!plane)
+ break;
+ for (j = 0; j < outh; j++) {
+ int xmin = dirty_rows[j].xmin;
+ int width = dirty_rows[j].xmax - xmin;
+ if (width > 0)
+ memset(plane + j * outw + xmin, 0, width);
+ }
+ }
+ for (i = 0; i < outh; i++) {
+ int xmin = dirty_rows[i].xmin;
+ int width = dirty_rows[i].xmax - xmin;
+ if (width > 0)
+ memset(alpha + i * outw + xmin, -1, width);
+ }
+ for (i = 0; i < outh; i++) {
+ dirty_rows[i].xmin = outw;
+ dirty_rows[i].xmax = 0;
+ }
+}
+
static int config(struct vf_instance *vf,
int width, int height, int d_width, int d_height,
unsigned int flags, unsigned int outfmt)
{
struct mp_eosd_settings res = {0};
+ struct dirty_rows_extent *dirty_rows;
+ int outw, outh;
+ int planes, alphas;
+ int i;
switch (outfmt) {
case IMGFMT_YV12:
case IMGFMT_I420:
case IMGFMT_IYUV:
vf->priv->is_planar = 1;
- vf->priv->copy_from_image = copy_from_image_yuv420p;
- vf->priv->copy_to_image = copy_to_image_yuv420p;
+ planes = 3;
+ alphas = 2;
+ vf->priv->draw_image = draw_image_yuv;
+ vf->priv->render_frame = render_frame_yuv420p;
+ vf->priv->prepare_buffer = prepare_buffer_420p;
break;
case IMGFMT_UYVY:
case IMGFMT_YUY2:
vf->priv->is_planar = 0;
- vf->priv->copy_from_image = copy_from_image_yuv422;
- vf->priv->copy_to_image = copy_to_image_yuv422;
+ planes = 3;
+ alphas = 1;
+ vf->priv->draw_image = draw_image_yuv;
+ vf->priv->render_frame = render_frame_yuv422;
+ vf->priv->prepare_buffer = prepare_buffer_422;
break;
default:
return 0;
}
vf->priv->outfmt = outfmt;
- vf->priv->outh = height + ass_top_margin + ass_bottom_margin;
- vf->priv->outw = width;
+ vf->priv->outh = outh = height + ass_top_margin + ass_bottom_margin;
+ vf->priv->outw = outw = width;
if (!opt_screen_size_x && !opt_screen_size_y) {
d_width = d_width * vf->priv->outw / width;
d_height = d_height * vf->priv->outh / height;
}
- if (!vf->priv->is_planar)
- vf->priv->planes[0] = malloc(vf->priv->outw * vf->priv->outh);
- vf->priv->planes[1] = malloc(vf->priv->outw * vf->priv->outh);
- vf->priv->planes[2] = malloc(vf->priv->outw * vf->priv->outh);
- vf->priv->dirty_rows = malloc(vf->priv->outh);
+ for (i = 0; i < planes; i++)
+ vf->priv->planes[i] = av_malloc(outw * outh);
+ for (i = 0; i < alphas; i++)
+ vf->priv->alphas[i] = av_malloc(outw * outh);
+ dirty_rows = av_malloc(outh * sizeof(*dirty_rows));
+ // mark all rows dirty here
+ // so that they can be properly cleaned in clear_buffer()
+ for (i = 0; i < outh; i++) {
+ dirty_rows[i].xmin = 0;
+ dirty_rows[i].xmax = outw;
+ }
+ vf->priv->dirty_rows = dirty_rows;
+ clean_buffer(vf);
res.w = vf->priv->outw;
res.h = vf->priv->outh;
@@ -272,228 +514,26 @@
return 0;
}
-/**
- * \brief Copy specified rows from render_context.dmpi to render_context.planes, upsampling to 4:4:4
- */
-static void copy_from_image_yuv420p(struct vf_instance *vf, int first_row,
- int last_row)
+static void prepare_eosd(vf_instance_t *vf, struct mp_eosd_image_list *imgs)
{
- int pl;
- int i, j, k;
- unsigned char val;
- int chroma_rows;
+ struct mp_eosd_image *img = eosd_image_first(imgs);
+ void (*draw_image)(vf_instance_t *, struct mp_eosd_image *);
- first_row -= (first_row % 2);
- last_row += (last_row % 2);
- chroma_rows = (last_row - first_row) / 2;
-
- assert(first_row >= 0);
- assert(first_row <= last_row);
- assert(last_row <= vf->priv->outh);
-
- for (pl = 1; pl < 3; ++pl) {
- int dst_stride = vf->priv->outw;
- int src_stride = vf->dmpi->stride[pl];
-
- unsigned char *src = vf->dmpi->planes[pl] + (first_row / 2) * src_stride;
- unsigned char *dst = vf->priv->planes[pl] + first_row * dst_stride;
- unsigned char *dst_next = dst + dst_stride;
- for (i = 0; i < chroma_rows; ++i) {
- if ((vf->priv->dirty_rows[first_row + i * 2 ] == 0) ||
- (vf->priv->dirty_rows[first_row + i * 2 + 1] == 0)) {
- for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k += 2) {
- val = *(src + j);
- *(dst + k ) = val;
- *(dst + k + 1) = val;
- *(dst_next + k ) = val;
- *(dst_next + k + 1) = val;
- }
- }
- src += src_stride;
- dst = dst_next + dst_stride;
- dst_next = dst + dst_stride;
- }
- }
- for (i = first_row; i < last_row; ++i)
- vf->priv->dirty_rows[i] = 1;
+ clean_buffer(vf);
+ draw_image = vf->priv->draw_image;
+ for (; img; img = eosd_image_next(imgs))
+ draw_image(vf, img);
+ vf->priv->prepare_buffer(vf);
}
-/**
- * \brief Copy all previously copied rows back to render_context.dmpi
- */
-static void copy_to_image_yuv420p(struct vf_instance *vf)
-{
- int pl;
- int i, j, k;
- for (pl = 1; pl < 3; ++pl) {
- int dst_stride = vf->dmpi->stride[pl];
- int src_stride = vf->priv->outw;
-
- unsigned char *dst = vf->dmpi->planes[pl];
- unsigned char *src = vf->priv->planes[pl];
- unsigned char *src_next = vf->priv->planes[pl] + src_stride;
- for (i = 0; i < vf->priv->outh / 2; ++i) {
- if ((vf->priv->dirty_rows[i * 2] == 1)) {
- assert(vf->priv->dirty_rows[i * 2 + 1] == 1);
- for (j = 0, k = 0; j < vf->dmpi->chroma_width; ++j, k += 2) {
- unsigned val = 0;
- val += *(src + k);
- val += *(src + k + 1);
- val += *(src_next + k);
- val += *(src_next + k + 1);
- *(dst + j) = val >> 2;
- }
- }
- dst += dst_stride;
- src = src_next + src_stride;
- src_next = src + src_stride;
- }
- }
-}
-
-static void copy_from_image_yuv422(struct vf_instance *vf,
- int first_row, int last_row)
-{
- unsigned char *dirty_rows = vf->priv->dirty_rows;
- int src_stride = vf->dmpi->stride[0];
- int dst_stride = vf->priv->outw;
- unsigned char *src = vf->dmpi->planes[0] + first_row * src_stride;
- unsigned char **dst = vf->priv->planes;
- int dst_off = first_row * dst_stride;
- int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
- int i, j, k;
-
- for (i = first_row; i < last_row; ++i) {
- int next_off = dst_off + dst_stride;
- if (!dirty_rows[i]) {
- if (is_uyvy) {
- for (j = dst_off, k = 0; j < next_off; j += 2, k += 4) {
- dst[0][j ] = src[k + 1];
- dst[0][j + 1] = src[k + 3];
- dst[1][j ] = src[k ];
- dst[1][j + 1] = src[k ];
- dst[2][j ] = src[k + 2];
- dst[2][j + 1] = src[k + 2];
- }
- } else {
- for (j = dst_off, k = 0; j < next_off; j += 2, k += 4) {
- dst[0][j ] = src[k ];
- dst[0][j + 1] = src[k + 2];
- dst[1][j ] = src[k + 1];
- dst[1][j + 1] = src[k + 1];
- dst[2][j ] = src[k + 3];
- dst[2][j + 1] = src[k + 3];
- }
- }
- }
- src += src_stride;
- dst_off = next_off;
- }
- for (i = first_row; i < last_row; ++i)
- dirty_rows[i] = 1;
-}
-
-static void copy_to_image_yuv422(struct vf_instance *vf)
-{
- unsigned char *dirty_rows = vf->priv->dirty_rows;
- int src_stride = vf->priv->outw;
- int dst_stride = vf->dmpi->stride[0];
- int height = vf->priv->outh;
- unsigned char **src = vf->priv->planes;
- unsigned char *dst = vf->dmpi->planes[0];
- int src_off = 0;
- int is_uyvy = vf->priv->outfmt == IMGFMT_UYVY;
- int i, j, k;
-
- for (i = 0; i < height; ++i) {
- int next_off = src_off + src_stride;
- if (*dirty_rows++) {
-#define AVERAGE(a, b) (((unsigned)(a) + (unsigned)(b)) >> 1)
- if (is_uyvy) {
- for (j = src_off, k = 0; j < next_off; j += 2, k += 4) {
- dst[k ] = AVERAGE(src[1][j], src[1][j + 1]);
- dst[k + 1] = src[0][j];
- dst[k + 2] = AVERAGE(src[2][j], src[2][j + 1]);
- dst[k + 3] = src[0][j + 1];
- }
- } else {
- for (j = src_off, k = 0; j < next_off; j += 2, k += 4) {
- dst[k ] = src[0][j];
- dst[k + 1] = AVERAGE(src[1][j], src[1][j + 1]);
- dst[k + 2] = src[0][j + 1];
- dst[k + 3] = AVERAGE(src[2][j], src[2][j + 1]);
- }
- }
-#undef AVERAGE
- }
- src_off = next_off;
- dst += dst_stride;
- }
-}
-
-static void my_draw_bitmap(struct vf_instance *vf, unsigned char *bitmap,
- int bitmap_w, int bitmap_h, int stride,
- int dst_x, int dst_y, unsigned color)
-{
- unsigned char y = rgba2y(color);
- unsigned char u = rgba2u(color);
- unsigned char v = rgba2v(color);
- unsigned opacity = 255 - _a(color);
- unsigned char *src, *dsty, *dstu, *dstv;
- int i, j;
- mp_image_t *dmpi = vf->dmpi;
- int stride_y = vf->priv->is_planar ? dmpi->stride[0] : vf->priv->outw;
-
- opacity = (0x10203 * opacity + 0x80) >> 8; /* 0x10203 = (1<<32)/(255*255) */
- /* 0 <= opacity <= 0x10101 */
- src = bitmap;
- dsty = vf->priv->is_planar ? dmpi->planes[0] : vf->priv->planes[0];
- dsty += dst_x + dst_y * stride_y;
- dstu = vf->priv->planes[1] + dst_x + dst_y * vf->priv->outw;
- dstv = vf->priv->planes[2] + dst_x + dst_y * vf->priv->outw;
- for (i = 0; i < bitmap_h; ++i) {
- for (j = 0; j < bitmap_w; ++j) {
- unsigned k = src[j];
- if (!k)
- continue;
- k *= opacity; /* 0 <= k <= 0xFFFFFF */
- dsty[j] = (k * y + (0xFFFFFF - k) * dsty[j] + 0x800000) >> 24;
- dstu[j] = (k * u + (0xFFFFFF - k) * dstu[j] + 0x800000) >> 24;
- dstv[j] = (k * v + (0xFFFFFF - k) * dstv[j] + 0x800000) >> 24;
- }
- src += stride;
- dsty += stride_y;
- dstu += vf->priv->outw;
- dstv += vf->priv->outw;
- }
-}
-
-static void render_frame(struct vf_instance *vf, mp_image_t *mpi,
- struct mp_eosd_image_list *images)
-{
- struct mp_eosd_image *img;
- copy_from_image_func copy_from_image = vf->priv->copy_from_image;
- copy_to_image_func copy_to_image = vf->priv->copy_to_image;
-
- img = eosd_image_first(images);
- if (!img)
- return;
- memset(vf->priv->dirty_rows, 0, vf->priv->outh); // reset dirty rows
- while (img) {
- copy_from_image(vf, img->dst_y, img->dst_y + img->h);
- my_draw_bitmap(vf, img->bitmap, img->w, img->h, img->stride,
- img->dst_x, img->dst_y, img->color);
- img = eosd_image_next(images);
- }
- copy_to_image(vf);
-}
-
static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
{
struct mp_eosd_image_list images;
eosd_render_frame(pts, &images);
prepare_image(vf, mpi);
- render_frame(vf, mpi, &images);
+ if (images.changed)
+ prepare_eosd(vf, &images);
+ vf->priv->render_frame(vf);
return vf_next_put_image(vf, vf->dmpi, pts);
}
@@ -523,11 +563,12 @@
static void uninit(struct vf_instance *vf)
{
- if (!vf->priv->is_planar)
- free(vf->priv->planes[0]);
- free(vf->priv->planes[1]);
- free(vf->priv->planes[2]);
- free(vf->priv->dirty_rows);
+ int i;
+ for (i = 0; i < MP_MAX_PLANES; i++)
+ av_free(vf->priv->planes[i]);
+ for (i = 0; i < MP_MAX_PLANES; i++)
+ av_free(vf->priv->alphas[i]);
+ av_free(vf->priv->dirty_rows);
}
static const unsigned int fmt_list[] = {
@@ -579,7 +620,7 @@
const vf_info_t vf_info_ass = {
"Render ASS/SSA subtitles",
"ass",
- "Evgeniy Stepanov",
+ "Evgeniy Stepanov, Xidorn Quan",
"",
vf_open,
&vf_opts
More information about the MPlayer-dev-eng
mailing list