[FFmpeg-devel] [PATCH] ppm rgb48 support

Sun Feb 22 15:16:48 CET 2009

On Sun, Feb 22, 2009 at 11:00:43PM +1100, Peter Ross wrote:
> On Fri, Feb 20, 2009 at 05:14:44PM +0100, Ivo wrote:
> > On Friday 20 February 2009 15:24, Michael Niedermayer wrote:
> > > On Sat, Feb 21, 2009 at 12:01:17AM +1100, Peter Ross wrote:
> > > > Nb: RGB48 support for swscaler was posted some time ago.
> > > > http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2007-May/029784.html
> > >
> > > IIRC there where comments and the issues where never fixed
> > 
> > True. I didn't have the time at that moment to fix them right away, so I put 
> > it on my todo-list and it's still there. Kind of lost interest too, though 
> > we'll probably need >8 bits per component support in the end as digital 
> > cinema is getting more popular. And medical images would need it too. 
> > Peter, if you want, you can take my last patch and fix the few remaining 
> > issues. There's not much left IIRC.
> 
> Enclosed is an updated version of the rgb48 swscaler patch.
> 
> Additions:
>   - compacted rgb48togray16_template() to produce less .text
>   - added CONFIG_HDR_PIXFMT to disable rgb48 code
> 
> -- Peter
> (A907 E02F A6E5 0CD2 34CD 20D2 6760 79C5 AC40 DD6B)

> Index: libswscale/yuv2rgb.c
> ===================================================================
> --- libswscale/yuv2rgb.c	(revision 28699)
> +++ libswscale/yuv2rgb.c	(working copy)

like kostya said, this part is too late, yuv2rgb has been replaced by kostyas
LGPL yuv2rgb2

[...]

> @@ -1780,12 +1788,16 @@
>      const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
>      const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
>      const int dstId= fmt_depth(dstFormat) >> 2;
> +    const int srcCbe= srcFormat==PIX_FMT_RGB48BE; /* components big-endian */
> +    const int dstCbe= dstFormat==PIX_FMT_RGB48BE;
>      void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
>  
>      /* BGR -> BGR */
>      if (  (isBGR(srcFormat) && isBGR(dstFormat))
>         || (isRGB(srcFormat) && isRGB(dstFormat))){
> -        switch(srcId | (dstId<<4)){
> +        /* FIXME: use bytes instead of nibbles to avoid overflow when
> +         *        RGB64* is added */
> +        switch(srcId | (dstId<<4) | (srcCbe<<8) | (dstCbe<<12)){

i would prefer
const int srcId= (fmt_depth(srcFormat) >> 2) - (srcFormat==PIX_FMT_RGB48BE);
because this leads to a smaller value for the switch and makes a jump table
a more practical choice if the compiler wants to use such table

[...]
> @@ -2023,7 +2075,80 @@
>      return srcSliceH;
>  }
>  
> +static int gray16torgb48(SwsContext *c, uint8_t *src[], int srcStride[],
> +                         int srcSliceY, int srcSliceH, uint8_t *dst[],
> +                         int dstStride[]) {

src and the strides should be const

> +    int length      = c->srcW,
> +        y           = srcSliceY,
> +        height      = srcSliceH,
> +        stride_s2   = srcStride[0] / 2,
> +        stride_d2   = dstStride[0] / 2,

> +        swap        = (c->srcFormat == PIX_FMT_GRAY16BE) ==
> +                      (c->dstFormat == PIX_FMT_RGB48BE),

swap = (c->srcFormat^c->dstFormat)&1;

and ive added a note to avutil.h to make sure this also stays true for future
pixfmts

> +        i, j;
> +    uint16_t *s = (uint16_t *) src[0], *d = (uint16_t *) dst[0] + stride_d2 * y;
>  
> +    for (i=0; i<height; i++) {

> +        if (swap)
> +            for (j=0; j<length; j++)
> +                d[3*j] = d[3*j+1] = d[3*j+2] = s[j];
> +        else

nitpick:
i prefer if(){ }else over if() else
because it makes future patches smaller when lines are added and it costs
no line for thr {}

[...]
> +static int rgb48togray16_template(SwsContext *c, uint8_t *src[],
> +                                         int srcStride[], int srcSliceY,
> +                                         int srcSliceH, uint8_t *dst[],
> +                                         int dstStride[], const int srcLE,
> +                                         const int dstLE) {
> +    int length      = c->srcW,
> +        y           = srcSliceY,
> +        height      = srcSliceH,
> +        stride_s2   = srcStride[0] / 2,
> +        stride_d2   = dstStride[0] / 2,
> +        i;
> +    uint16_t *s = (uint16_t *) src[0], *d = (uint16_t *) dst[0] + stride_d2 * y;
> +
> +    for (i=0; i<height; i++) {
> +             if (!srcLE && !dstLE) rgb48togray16row_template(s, d, length, 0, 0);
> +        else if (!srcLE &&  dstLE) rgb48togray16row_template(s, d, length, 0, 1);
> +        else if ( srcLE && !dstLE) rgb48togray16row_template(s, d, length, 1, 0);
> +        else                       rgb48togray16row_template(s, d, length, 1, 1);
> +        s += stride_s2;
> +        d += stride_d2;
> +    }
> +    return height;
> +}
> +
> +#define FUNC_RGB48TOGRAY16(name, sLE, dLE) \
> +    static int name(SwsContext *c, uint8_t *s[], int sS[], int sSY, int sSH, \
> +                    uint8_t *d[], int dS[]) { \
> +        return rgb48togray16_template(c, s, sS, sSY, sSH, d, dS, sLE, dLE); \
> +    }
> +
> +FUNC_RGB48TOGRAY16( rgb48letogray16le, 1, 1)
> +FUNC_RGB48TOGRAY16( rgb48betogray16le, 0, 1)
> +FUNC_RGB48TOGRAY16( rgb48letogray16be, 1, 0)
> +FUNC_RGB48TOGRAY16( rgb48betogray16be, 0, 0)

also please move the rgb48<->gray stuff (and others) into its own file(s),
currently sws is kinda bloated all in one file mess and i would like to
see this change thus i think its better if new additions are seperated into
files nicer.

> +
>  static void getSubSampleFactors(int *h, int *v, int format){
>      switch(format){
>      case PIX_FMT_UYVY422:

> @@ -2381,6 +2506,27 @@
>              else if (dstFormat == PIX_FMT_UYVY422)
>                  c->swScale= YUV422PToUyvyWrapper;
>          }
> +#if CONFIG_HDR_PIXFMT
> +        if (isGray16(srcFormat) && (dstFormat == PIX_FMT_RGB48BE ||
> +                                    dstFormat == PIX_FMT_RGB48LE    ))
> +        {
> +            c->swScale= gray16torgb48;
> +        }
> +        if (isGray16(dstFormat) && (srcFormat == PIX_FMT_RGB48BE ||
> +                                    srcFormat == PIX_FMT_RGB48LE    ))
> +        {

the { doesnt follow K&R style

[...]
> Index: libswscale/swscale_internal.h
> ===================================================================
> --- libswscale/swscale_internal.h	(revision 28699)
> +++ libswscale/swscale_internal.h	(working copy)
> @@ -260,6 +260,8 @@
>          || (x)==PIX_FMT_RGB4_BYTE   \
>          || (x)==PIX_FMT_MONOBLACK   \
>          || (x)==PIX_FMT_MONOWHITE   \
> +        || (x)==PIX_FMT_RGB48BE     \
> +        || (x)==PIX_FMT_RGB48LE     \
>      )
>  #define isBGR(x)        (           \
>             (x)==PIX_FMT_BGR32       \
> @@ -277,6 +279,9 @@
>  static inline int fmt_depth(int fmt)
>  {
>      switch(fmt) {
> +        case PIX_FMT_RGB48BE:
> +        case PIX_FMT_RGB48LE:
> +            return 48;
>          case PIX_FMT_BGRA:
>          case PIX_FMT_ABGR:
>          case PIX_FMT_RGBA:

changes to swscale_internal.h are ok

[..]
> +static inline void RENAME(rgb24to48)(const uint8_t *s, uint8_t *dst, long src_size)
> +{
> +    uint8_t *d = dst;
> +    const uint8_t *end = s + src_size;

> +#ifdef HAVE_MMX

its #if HAVE_MMX now

[...]
> +static inline void rgb1516to48(const uint8_t *src, uint8_t *d, long src_size,
> +                               const int be, const int g6, const int bgr) {
> +    const uint16_t *s = (const uint16_t *) src, *end = s + src_size/2;
> +
> +    for (; s<end; s++, d+=6) {
> +        unsigned int v = *s,
> +                     r = (v& (0x7c00<<g6)         ) << (1-g6),
> +                     g = (v&((0x03e0<<g6)+(g6<<5))) << (6-g6),
> +                     b = (v&  0x001f              ) << 11;
> +        if (bgr) {
> +            unsigned int t = r;
> +            r = b;
> +            b = t;
> +        }
> +        r += r>>5;
> +        r += r>>10;
> +        g += g>>( 5+g6   );
> +        g += g>>(10+g6+g6);
> +        b += b>>5;
> +        b += b>>10;

> +        if (be) {
> +            AV_WB16(&d[0], r);
> +            AV_WB16(&d[2], g);
> +            AV_WB16(&d[4], b);
> +        } else {
> +            AV_WL16(&d[0], r);
> +            AV_WL16(&d[2], g);
> +            AV_WL16(&d[4], b);
> +        }

on systems where these are implemented as 16bit writes (with bswap)
its fast but on systems where unaligend 16bit access isnt allowed
they would be implemented as byte read/write even when endianness
matches native one.
l/be2me_16 could be used with forced 16bit but this then would be
slow on a system that can write 2 bytes bswaped faster than doing a
bswap within 16bit + 16bit write.
well iam not asking you to do anything about this, just wanted to
mention it for the recored that it seems we need a aligned variant
of these macros. (AV_WLA/AV_WBA)

[...]
> Index: libswscale/swscale_template.c
> ===================================================================
> --- libswscale/swscale_template.c	(revision 28699)
> +++ libswscale/swscale_template.c	(working copy)
> @@ -1942,6 +1942,62 @@
>      }
>  }
>  
> +static inline void RENAME(rgb48beToY)(uint8_t *dst, uint8_t *src, int width)

const

> +{
> +    int i;

> +    for(i=0; i<width; i++)
> +    {

K&R style {

[...]
> @@ -59,6 +65,33 @@
>  void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
>  void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
>  void bgr8torgb8  (const uint8_t *src, uint8_t *dst, long src_size);
> +#if CONFIG_HDR_PIXFMT
> +void bgr32torgb48  (const uint8_t *src, uint8_t *dst, long src_size);
> +void bgr24torgb48  (const uint8_t *src, uint8_t *dst, long src_size);
> +void bgr16torgb48be(const uint8_t *src, uint8_t *dst, long src_size);
> +void bgr16torgb48le(const uint8_t *src, uint8_t *dst, long src_size);
> +void bgr15torgb48be(const uint8_t *src, uint8_t *dst, long src_size);
> +void bgr15torgb48le(const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48betobgr32(const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48letobgr32(const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48betobgr24(const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48letobgr24(const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48betobgr16(const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48letobgr16(const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48betobgr15(const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48letobgr15(const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48beto32   (const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48leto32   (const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48beto16   (const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48leto16   (const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48beto15   (const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb48leto15   (const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb32to48     (const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb16to48be   (const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb16to48le   (const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb15to48be   (const uint8_t *src, uint8_t *dst, long src_size);
> +void rgb15to48le   (const uint8_t *src, uint8_t *dst, long src_size);
> +#endif

they need a ff_ prefix

also you are missing a changelog entry, i think rgb48 support deserves one

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

He who knows, does not speak. He who speaks, does not know. -- Lao Tsu
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090222/53eaeefe/attachment.pgp>