[FFmpeg-devel] [PATCH] lavfi: new colorspace conversion filter.

Wed Apr 6 01:01:27 CEST 2016

On Thu, Mar 31, 2016 at 08:29:37PM -0400, Ronald S. Bultje wrote:
> The intent here is similar to colormatrix, but it's LGPLv2.1-or-later
> (instead of GPLv2.0) and supports gamma/chromaticity correction.
> ---
>  doc/filters.texi                     | 183 +++++++
>  libavfilter/Makefile                 |   1 +
>  libavfilter/allfilters.c             |   1 +
>  libavfilter/colorspacedsp.c          | 130 +++++
>  libavfilter/colorspacedsp.h          |  51 ++
>  libavfilter/colorspacedsp_template.c | 256 ++++++++++
>  libavfilter/vf_colorspace.c          | 909 +++++++++++++++++++++++++++++++++++

note: don't forget Changelog and minor lavfi bump

note2: sorry in advance if my comments aren't very deeply related to the
algorithms themselves.

[...]
> +#include "colorspacedsp.h"
> +
> +#define SS_W 0
> +#define SS_H 0
> +
> +#define BIT_DEPTH 8
> +#include "colorspacedsp_template.c"
> +
> +#undef BIT_DEPTH
> +#define BIT_DEPTH 10
> +#include "colorspacedsp_template.c"
> +
> +#undef BIT_DEPTH
> +#define BIT_DEPTH 12
> +#include "colorspacedsp_template.c"
> +
> +#undef SS_W
> +#undef SS_H
> +
> +#define SS_W 1
> +#define SS_H 0
> +
> +#undef BIT_DEPTH
> +#define BIT_DEPTH 8
> +#include "colorspacedsp_template.c"
> +
> +#undef BIT_DEPTH
> +#define BIT_DEPTH 10
> +#include "colorspacedsp_template.c"
> +
> +#undef BIT_DEPTH
> +#define BIT_DEPTH 12
> +#include "colorspacedsp_template.c"
> +
> +#undef SS_W
> +#undef SS_H
> +
> +#define SS_W 1
> +#define SS_H 1
> +
> +#undef BIT_DEPTH
> +#define BIT_DEPTH 8
> +#include "colorspacedsp_template.c"
> +
> +#undef BIT_DEPTH
> +#define BIT_DEPTH 10
> +#include "colorspacedsp_template.c"
> +
> +#undef BIT_DEPTH
> +#define BIT_DEPTH 12
> +#include "colorspacedsp_template.c"
> +

Note: a cleaner way to do this (IMO) is to do put the settings into the
template file, and do something like:

   #define TEMPLATE_444
   #include "colorspacedsp_template.c"
   #undef TEMPLATE_444

   #define TEMPLATE_422
   #include "colorspacedsp_template.c"
   #undef TEMPLATE_422

   #define TEMPLATE_420
   #include "colorspacedsp_template.c"
   #undef TEMPLATE_420

it's simpler for the caller, and in the template you see the exact
settings in use for each "profile".

See libswresample/rematrix{,_template}.c for a complete example.

[...]
> +typedef void (*yuv2rgb_fn)(int16_t *rgb[3], ptrdiff_t rgb_stride,
> +                           uint8_t *yuv[3], ptrdiff_t yuv_stride[3],
> +                           int w, int h, const int16_t yuv2rgb_coeffs[3][3][8],
> +                           const int16_t yuv_offset[8]);
> +typedef void (*rgb2yuv_fn)(uint8_t *yuv[3], ptrdiff_t yuv_stride[3],
> +                           int16_t *rgb[3], ptrdiff_t rgb_stride,
> +                           int w, int h, const int16_t rgb2yuv_coeffs[3][3][8],
> +                           const int16_t yuv_offset[8]);
> +typedef void (*yuv2yuv_fn)(uint8_t *yuv_out[3], ptrdiff_t yuv_out_stride[3],
> +                           uint8_t *yuv_in[3], ptrdiff_t yuv_in_stride[3],
> +                           int w, int h, const int16_t yuv2yuv_coeffs[3][3][8],
> +                           const int16_t yuv_offset[2][8]);

I suppose you didn't make use of const for the input because of the
pain of the multiple dimensions?

uint8_t * const in[N] doesn't do the trick?

[...]
> +static void fn(yuv2rgb)(int16_t *rgb[3], ptrdiff_t rgb_stride,
> +                        uint8_t *_yuv[3], ptrdiff_t yuv_stride[3],
> +                        int w, int h, const int16_t yuv2rgb_coeffs[3][3][8],
> +                        const int16_t yuv_offset[8])
> +{
> +    pixel **yuv = (pixel **) _yuv;
> +    const pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
> +    int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2];
> +    int y, x;

> +    int cy = yuv2rgb_coeffs[0][0][0];
> +    int crv = yuv2rgb_coeffs[0][2][0];
> +    int cgu = yuv2rgb_coeffs[1][1][0];
> +    int cgv = yuv2rgb_coeffs[1][2][0];
> +    int cbu = yuv2rgb_coeffs[2][1][0];
> +    int sh = BIT_DEPTH - 1;
> +    int uv_offset = 128 << (BIT_DEPTH - 8);

nit: if these aren't going to change in this big function, you might want
to set them const. It could help the compilers, and particularly the last
two.

> +
> +    av_assert2(yuv2rgb_coeffs[0][1][0] == 0);
> +    av_assert2(yuv2rgb_coeffs[2][2][0] == 0);
> +    av_assert2(yuv2rgb_coeffs[1][0][0] == cy && yuv2rgb_coeffs[2][0][0] == cy);
> +

> +#if SS_W == 1
> +    w = (w + 1) >> 1;
> +#if SS_H == 1
> +    h = (h + 1) >> 1;
> +#endif
> +#endif

this should save some ifdefery, still be a noop when SS_X are 0, and
generate the same instruction (if i'm not mistaken) when not 0:

    w = AV_CEIL_RSHIFT(w, SS_W);
    h = AV_CEIL_RSHIFT(h, SS_H);

but maybe that's not what you want.

> +    for (y = 0; y < h; y++) {
> +        for (x = 0; x < w; x++) {
> +            int y00 = yuv0[x << SS_W] - yuv_offset[0];
> +#if SS_W == 1
> +            int y01 = yuv0[2 * x + 1] - yuv_offset[0];
> +#if SS_H == 1
> +            int y10 = yuv0[yuv_stride[0] / sizeof(pixel) + 2 * x] - yuv_offset[0];
> +            int y11 = yuv0[yuv_stride[0] / sizeof(pixel) + 2 * x + 1] - yuv_offset[0];
> +#endif
> +#endif
> +            int u = yuv1[x] - uv_offset, v = yuv2[x] - uv_offset;
> +
> +            rgb0[x << SS_W]              = av_clip_int16((y00 * cy + crv * v + 8192) >> sh);
> +#if SS_W == 1
> +            rgb0[2 * x + 1]              = av_clip_int16((y01 * cy + crv * v + 8192) >> sh);
> +#if SS_H == 1
> +            rgb0[2 * x + rgb_stride]     = av_clip_int16((y10 * cy + crv * v + 8192) >> sh);
> +            rgb0[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + crv * v + 8192) >> sh);
> +#endif
> +#endif

mmmh... 8192? I might be completely mistaken, but assuming this is for
rounding, isn't this supposed to be...  1<<(sh-1) or something like that?
If not, I probably need an explanation, even if obvious.

[...]
> +        yuv0 += (yuv_stride[0] * (1 << SS_H)) / sizeof(pixel);
> +        yuv1 += yuv_stride[1] / sizeof(pixel);
> +        yuv2 += yuv_stride[2] / sizeof(pixel);
> +        rgb0 += rgb_stride * (1 << SS_H);
> +        rgb1 += rgb_stride * (1 << SS_H);
> +        rgb2 += rgb_stride * (1 << SS_H);

i know we will have some asm for this, but compiler will probably like to
have these increment variables in some const before the loop instead of
computing them every time.

I don't think we will have ASM for all the architectures soon so having a
fast C is still relevant for such important code.

[...]
> +// FIXME deal with odd width/heights (or just forbid it)
> +// FIXME simd
> +// FIXME add some asserts in random parts of the table generation code to ensure
> +// that we never overflow, e.g. if coeffs are 14bit, they can't exceed [-2.0,2.0>
> +// range, and I'm not entirely sure that's always true (e.g. yuv2yuv for bt2020
> +// to/from 601, blue might go off quite a bit? If it exceeds, change the bitrange.
> +// FIXME bt2020cl support (linearization between yuv/rgb step instead of between rgb/xyz)
> +// FIXME test that the values in (de)lin_lut don't exceed their container storage
> +// type size
> +
> +/*
> + * All constants explained in e.g. https://linuxtv.org/downloads/v4l-dvb-apis/ch02s06.html
> + */
> +static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = {
> +    [AVCOL_SPC_FCC]        = { 0.30,   0.59,   0.11   },
> +    [AVCOL_SPC_BT470BG]    = { 0.299,  0.587,  0.114  },
> +    [AVCOL_SPC_SMPTE170M]  = { 0.299,  0.587,  0.114  },
> +    [AVCOL_SPC_BT709]      = { 0.2126, 0.7152, 0.0722 },
> +    [AVCOL_SPC_SMPTE240M]  = { 0.212,  0.701,  0.087  },
> +    [AVCOL_SPC_BT2020_NCL] = { 0.2627, 0.6780, 0.0593 },
> +    [AVCOL_SPC_BT2020_CL]  = { 0.2627, 0.6780, 0.0593 },
> +};
> +

Maybe add bitextact in the FIXME/TODO list above, or is it mentioned
somewhere else already? Would be nice at least for FATE integration.

[...]
> +static int fill_gamma_table(ColorSpaceContext *s)
> +{
> +    int n;
> +    double in_alpha = s->in_txchr->alpha, in_beta = s->in_txchr->beta;
> +    double in_gamma = s->in_txchr->gamma, in_delta = s->in_txchr->delta;
> +    double in_ialpha = 1.0 / in_alpha, in_igamma = 1.0 / in_gamma, in_idelta = 1.0 / in_delta;
> +    double out_alpha = s->out_txchr->alpha, out_beta = s->out_txchr->beta;
> +    double out_gamma = s->out_txchr->gamma, out_delta = s->out_txchr->delta;
> +

> +    s->lin_lut = av_malloc(sizeof(*s->lin_lut) * 32768 * 2);

av_malloc_array()?

[...]
> +static int create_filtergraph(AVFilterContext *ctx,
> +                              const AVFrame *in, const AVFrame *out)

omg this function... :)

[...]
> +AVFilter ff_vf_colorspace = {
> +    .name            = "colorspace",
> +    .description     = NULL_IF_CONFIG_SMALL("Convert between colorspaces."),
> +    .init            = init,
> +    .uninit          = uninit,
> +    .query_formats   = query_formats,
> +    .priv_size       = sizeof(ColorSpaceContext),
> +    .priv_class      = &colorspace_class,
> +    .inputs          = inputs,
> +    .outputs         = outputs,

I think you can safely add the AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC flag
to this filter.

Is threading hard to add?

-- 
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 473 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20160406/a21789ef/attachment.sig>