[FFmpeg-devel] [PATCH] Yet another stab at RGB48 support

Kostya kostya.shishkov
Fri May 15 07:56:28 CEST 2009


On Wed, May 13, 2009 at 01:59:19AM +0200, Michael Niedermayer wrote:
> On Sun, May 10, 2009 at 03:21:23PM +0300, Kostya wrote:
> > On Sat, May 09, 2009 at 07:31:20PM +0200, Diego Biurrun wrote:
> > > On Sat, May 09, 2009 at 02:28:18PM +0300, Kostya wrote:
> > > > $subj
> > > 
> > > Changelog update is missing.
> > > 
> > > $nits below
> > > 
> > [...]
> > reformatted, splitted some long lines.
> > Since this is diff against libswscale, no Changelog entry (but I
> > remember about it).
>
> [...]
> > +static int rgb48togray16_template(SwsContext *c, uint8_t *src[],
> > +                                  int srcStride[], int srcSliceY,
> > +                                  int srcSliceH, uint8_t *dst[],
> > +                                  int dstStride[], const int srcLE,
> > +                                  const int dstLE) {
> > +    int length      = c->srcW,
> > +        y           = srcSliceY,
> > +        height      = srcSliceH,
> > +        stride_s2   = srcStride[0] / 2,
> > +        stride_d2   = dstStride[0] / 2,
> > +        i;
> > +    uint16_t *s = (uint16_t *) src[0], *d = (uint16_t *) dst[0] + stride_d2 * y;
> > +
> > +    for (i = 0; i < height; i++) {
> > +             if (!srcLE && !dstLE) rgb48togray16row_template(s, d, length, 0, 0);
> > +        else if (!srcLE &&  dstLE) rgb48togray16row_template(s, d, length, 0, 1);
> > +        else if ( srcLE && !dstLE) rgb48togray16row_template(s, d, length, 1, 0);
> > +        else                       rgb48togray16row_template(s, d, length, 1, 1);
> > +        s += stride_s2;
> > +        d += stride_d2;
> > +    }
> > +    return height;
> > +}
> > +
> 
> > +#define FUNC_RGB48TOGRAY16(name, sLE, dLE) \
> > +    static int name(SwsContext *c, uint8_t *s[], int sS[], int sSY, int sSH, \
> > +                    uint8_t *d[], int dS[]) { \
> > +        return rgb48togray16_template(c, s, sS, sSY, sSH, d, dS, sLE, dLE); \
> > +    }
> > +
> > +FUNC_RGB48TOGRAY16( rgb48letogray16le, 1, 1)
> > +FUNC_RGB48TOGRAY16( rgb48betogray16le, 0, 1)
> > +FUNC_RGB48TOGRAY16( rgb48letogray16be, 1, 0)
> > +FUNC_RGB48TOGRAY16( rgb48betogray16be, 0, 0)
> 
> bloat, a single function does the trick too in this case

huh? What do you mean "single function"? 
That's the only place where low bits of 16-bit variable are used.

> [...]
> > +static inline void rgb1516to48(const uint8_t *src, uint8_t *d, long src_size,
> > +                               const int be, const int g6, const int bgr)
> > +{
> 
> > +    const uint16_t *s = (const uint16_t*) src, *end = s + src_size/2;
> 
> unreadable

split 

> [...]
> > +static void rgb48to48(const uint8_t *src, uint8_t *dst, long src_size)
> > +{
> > +    long i = 23 - src_size;
> > +    uint8_t *d = dst - i;
> > +    const uint8_t *s = src - i;
> > +#if HAVE_MMX
> > +    __asm__ volatile(
> > +    "test           %0,   %0            \n\t"
> > +    "jns            2f                  \n\t"
> > +    PREFETCH"     (%1,    %0)           \n\t"
> > +    ASMALIGN(4)
> > +    "1:                                 \n\t"
> > +    PREFETCH"    32(%1,   %0)           \n\t"
> > +    "movq          (%1,   %0), %%mm0    \n\t"
> > +    "movq         8(%1,   %0), %%mm1    \n\t"
> > +    "movq        16(%1,   %0), %%mm2    \n\t"
> > +    "movq        %%mm0, %%mm3           \n\t"
> > +    "movq        %%mm1, %%mm4           \n\t"
> > +    "movq        %%mm2, %%mm5           \n\t"
> > +    "psllw          $8, %%mm0           \n\t"
> > +    "psllw          $8, %%mm1           \n\t"
> > +    "psllw          $8, %%mm2           \n\t"
> > +    "psrlw          $8, %%mm3           \n\t"
> > +    "psrlw          $8, %%mm4           \n\t"
> > +    "psrlw          $8, %%mm5           \n\t"
> > +    "por         %%mm3, %%mm0           \n\t"
> > +    "por         %%mm4, %%mm1           \n\t"
> > +    "por         %%mm5, %%mm2           \n\t"
> > +    "movq        %%mm0,   (%2,  %0)     \n\t"
> > +    "movq        %%mm1,  8(%2,  %0)     \n\t"
> > +    "movq        %%mm2, 16(%2,  %0)     \n\t"
> > +    "add           $24,    %0           \n\t"
> > +    "js 1b                              \n\t"
> > +    "2:                                 \n\t"
> > +    SFENCE"                             \n\t"
> > +    EMMS"                               \n\t"
> > +    : "+&r"(i)
> > +    : "r" (s), "r" (d)
> > +    : "memory");
> > +#endif
> > +    for (; i < 21; i += 4) {
> > +        unsigned int x = *(uint32_t*)&s[i];
> > +        *(uint32_t*)&d[i] = ((x>>8) & 0x00FF00FF) + ((x<<8) & 0xFF00FF00);
> > +    }
> > +    if (i < 23) {
> > +        d[i]   = s[i+1];
> > +        d[i+1] = s[i];
> > +    }
> 
> is this some duplicate of a gray le <-> be function?

Haven't found that byteswapping function but should be, yes. 

> > +}
> > +
> > +static void rgb48leto24(const uint8_t *s, uint8_t *dst, long src_size)
> > +{
> > +    uint8_t *d = dst;
> > +    const uint8_t *end = s + src_size;
> > +#if HAVE_MMX
> > +    __asm__ volatile(
> > +    PREFETCH"     (%1)          \n\t"
> > +    "jmp 2f                     \n\t"
> > +    ASMALIGN(4)
> > +    "1:                         \n\t"
> > +    PREFETCH"   32(%1)          \n\t"
> > +    "movq         (%1), %%mm0   \n\t"
> > +    "movq        8(%1), %%mm1   \n\t"
> > +    "movq       16(%1), %%mm2   \n\t"
> > +    "movq       24(%1), %%mm3   \n\t"
> > +    "movq       32(%1), %%mm4   \n\t"
> > +    "movq       40(%1), %%mm5   \n\t"
> > +    "psrlw          $8, %%mm0   \n\t"
> > +    "psrlw          $8, %%mm1   \n\t"
> > +    "psrlw          $8, %%mm2   \n\t"
> > +    "psrlw          $8, %%mm3   \n\t"
> > +    "psrlw          $8, %%mm4   \n\t"
> > +    "psrlw          $8, %%mm5   \n\t"
> > +    "packuswb    %%mm1, %%mm0   \n\t"
> > +    "packuswb    %%mm3, %%mm2   \n\t"
> > +    "packuswb    %%mm5, %%mm4   \n\t"
> > +    "movq        %%mm0,   (%0)  \n\t"
> > +    "movq        %%mm2,  8(%0)  \n\t"
> > +    "movq        %%mm4, 16(%0)  \n\t"
> > +    "add           $24,    %0   \n\t"
> > +    "add           $48,    %1   \n\t"
> > +    "2:                         \n\t"
> > +    "cmp            %1,    %2   \n\t"
> > +    "ja 1b                      \n\t"
> > +    SFENCE"                     \n\t"
> > +    EMMS"                       \n\t"
> > +    : "+&r"(d), "+&r"(s)
> > +    : "r" (end-47)
> > +    : "memory");
> > +#endif
> > +    for (; s < end; s += 2, d++)
> > +        *d = *(s+1);
> > +}
> 
> same?

No. That was byte-swapping, this is byte-shaving. 
 
> > +
> > +static void rgb48beto24(const uint8_t *s, uint8_t *dst, long src_size)
> > +{
> > +    uint8_t *d = dst;
> > +    const uint8_t *end = s + src_size;
> > +#if HAVE_MMX
> > +    __asm__ volatile(
> > +    PREFETCH"     (%1)          \n\t"
> > +    "pcmpeqw     %%mm7, %%mm7   \n\t"
> > +    "psrlw          $8, %%mm7   \n\t"
> > +    "movq        %%mm7, %%mm6   \n\t"
> > +    "jmp 2f                     \n\t"
> > +    ASMALIGN(4)
> > +    "1:                         \n\t"
> > +    PREFETCH"   32(%1)          \n\t"
> > +    "movq         (%1), %%mm0   \n\t"
> > +    "movq        8(%1), %%mm1   \n\t"
> > +    "movq       16(%1), %%mm2   \n\t"
> > +    "movq       24(%1), %%mm3   \n\t"
> > +    "movq       32(%1), %%mm4   \n\t"
> > +    "movq       40(%1), %%mm5   \n\t"
> > +    "pand        %%mm6, %%mm0   \n\t"
> > +    "pand        %%mm7, %%mm1   \n\t"
> > +    "pand        %%mm6, %%mm2   \n\t"
> > +    "pand        %%mm7, %%mm3   \n\t"
> > +    "pand        %%mm6, %%mm4   \n\t"
> > +    "pand        %%mm7, %%mm5   \n\t"
> > +    "packuswb    %%mm1, %%mm0   \n\t"
> > +    "packuswb    %%mm3, %%mm2   \n\t"
> > +    "packuswb    %%mm5, %%mm4   \n\t"
> > +    "movq        %%mm0,   (%0)  \n\t"
> > +    "movq        %%mm2,  8(%0)  \n\t"
> > +    "movq        %%mm4, 16(%0)  \n\t"
> > +    "add           $24,    %0   \n\t"
> > +    "add           $48,    %1   \n\t"
> > +    "2:                         \n\t"
> > +    "cmp            %1,    %2   \n\t"
> > +    "ja 1b                      \n\t"
> > +    SFENCE"                     \n\t"
> > +    EMMS"                       \n\t"
> > +    : "+&r"(d), "+&r"(s)
> > +    : "r" (end-47)
> > +    : "memory");
> > +#endif
> > +    for (; s < end; s += 2, d++)
> > +        *d = *s;
> > +}
> > +
> 
> so seems this ...

same as the previous one 
 
> > +static void rgb24to48(const uint8_t *s, uint8_t *dst, long src_size)
> > +{
> > +    uint8_t *d = dst;
> > +    const uint8_t *end = s + src_size;
> > +#if HAVE_MMX
> > +    __asm__ volatile(
> > +    PREFETCH"     (%1)          \n\t"
> > +    "jmp 2f                     \n\t"
> > +    ASMALIGN(4)
> > +    "1:                         \n\t"
> > +    PREFETCH"   32(%1)          \n\t"
> > +    "movd         (%1), %%mm0   \n\t"
> > +    "movd        4(%1), %%mm1   \n\t"
> > +    "movd        8(%1), %%mm2   \n\t"
> > +    "movd       12(%1), %%mm3   \n\t"
> > +    "movd       16(%1), %%mm4   \n\t"
> > +    "movd       20(%1), %%mm5   \n\t"
> > +    "punpcklbw   %%mm0, %%mm0   \n\t"
> > +    "punpcklbw   %%mm1, %%mm1   \n\t"
> > +    "punpcklbw   %%mm2, %%mm2   \n\t"
> > +    "punpcklbw   %%mm3, %%mm3   \n\t"
> > +    "punpcklbw   %%mm4, %%mm4   \n\t"
> > +    "punpcklbw   %%mm5, %%mm5   \n\t"
> > +    MOVNTQ"      %%mm0,   (%0)  \n\t"
> > +    MOVNTQ"      %%mm1,  8(%0)  \n\t"
> > +    MOVNTQ"      %%mm2, 16(%0)  \n\t"
> > +    MOVNTQ"      %%mm3, 24(%0)  \n\t"
> > +    MOVNTQ"      %%mm4, 32(%0)  \n\t"
> > +    MOVNTQ"      %%mm5, 40(%0)  \n\t"
> > +    "add           $48,    %0   \n\t"
> > +    "add           $24,    %1   \n\t"
> > +    "2:                         \n\t"
> > +    "cmp            %1,    %2   \n\t"
> > +    "ja 1b                      \n\t"
> > +    SFENCE"                     \n\t"
> > +    EMMS"                       \n\t"
> > +   : "+&r"(d), "+&r"(s)
> > +   : "r" (end-23)
> > +   : "memory");
> > +#endif
> > +    for (; s < end; s++, d += 2) {
> > +        int x = *s;
> > +        *(uint16_t*)d = (x << 8) + x;
> > +    }
> > +}
> > +
> 
> and this
> 
> 
> > +SwsRGBFunc sws_hires_getRGBFunc(SwsContext *c)
> > +{
> > +    const enum PixelFormat srcFormat = c->srcFormat;
> > +    const enum PixelFormat dstFormat = c->dstFormat;
> > +    const int srcCbe = srcFormat==PIX_FMT_RGB48BE; /* components big-endian */
> > +    const int dstCbe = dstFormat==PIX_FMT_RGB48BE;
> > +    const int srcBpp = (fmt_depth(srcFormat) + 7) >> 3;
> > +    const int dstBpp = (fmt_depth(dstFormat) + 7) >> 3;
> > +    const int srcId = (fmt_depth(srcFormat) >> 2) - (srcBpp > 4); /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8, 48:11 */
> > +    const int dstId = (fmt_depth(dstFormat) >> 2) - (dstBpp > 4);
> > +    SwsRGBFunc conv = NULL;
> > +
> > +    if (  (isBGR(srcFormat) && isBGR(dstFormat))
> > +       || (isRGB(srcFormat) && isRGB(dstFormat))) {
> > +        switch(srcId | (dstId<<4) | (srcCbe<<8) | (dstCbe<<12)){
> > +        case 0x013B: conv = rgb48beto15; break;
> > +        case 0x003B: conv = rgb48leto15; break;
> > +        case 0x014B: conv = rgb48beto16; break;
> > +        case 0x004B: conv = rgb48leto16; break;
> > +        case 0x016B: conv = rgb48beto24; break;
> > +        case 0x006B: conv = rgb48leto24; break;
> > +        case 0x018B: conv = rgb48beto32; break;
> > +        case 0x008B: conv = rgb48leto32; break;
> > +        case 0x10B3: conv = rgb15to48be; break;
> > +        case 0x00B3: conv = rgb15to48le; break;
> > +        case 0x10B4: conv = rgb16to48be; break;
> > +        case 0x00B4: conv = rgb16to48le; break;
> > +        case 0x10B6:
> > +        case 0x00B6: conv = rgb24to48; break;
> > +        case 0x10B8:
> > +        case 0x00B8: conv = rgb32to48; break;
> > +        case 0x10BB:
> > +        case 0x01BB: conv = rgb48to48; break;
> > +        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> > +                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
> > +        }
> > +    } else if (  (isBGR(srcFormat) && isRGB(dstFormat))
> > +             || (isRGB(srcFormat) && isBGR(dstFormat))) {
> > +        switch(srcId | (dstId<<4) | (srcCbe<<8) | (dstCbe<<12)){
> > +        case 0x013B: conv = rgb48betobgr15; break;
> > +        case 0x003B: conv = rgb48letobgr15; break;
> > +        case 0x014B: conv = rgb48betobgr16; break;
> > +        case 0x004B: conv = rgb48letobgr16; break;
> > +        case 0x016B: conv = rgb48betobgr24; break;
> > +        case 0x006B: conv = rgb48letobgr24; break;
> > +        case 0x018B: conv = rgb48betobgr32; break;
> > +        case 0x008B: conv = rgb48letobgr32; break;
> > +        case 0x10B3: conv = bgr15torgb48be; break;
> > +        case 0x00B3: conv = bgr15torgb48le; break;
> > +        case 0x10B4: conv = bgr16torgb48be; break;
> > +        case 0x00B4: conv = bgr16torgb48le; break;
> > +        case 0x10B6:
> > +        case 0x00B6: conv = bgr24torgb48; break;
> > +        case 0x10B8:
> > +        case 0x00B8: conv = bgr32torgb48; break;
> > +        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> > +                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
> > +        }
> > +    } else {
> > +        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> > +               sws_format_name(srcFormat), sws_format_name(dstFormat));
> > +    }
> > +
> > +    return conv;
> > +}
> 
> code duplication

hmm? 
 
> [...]
> > Index: hires.h
> > ===================================================================
> > --- hires.h	(revision 0)
> > +++ hires.h	(revision 0)
> > @@ -0,0 +1,32 @@
> > +/*
> > + * colorspace conversion routines for depths >8 bits
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > + */
> > +
> > +#ifndef SWSCALE_HIRES_H
> > +#define SWSCALE_HIRES_H
> > +
> > +#include "swscale.h"
> > +#include "swscale_internal.h"
> > +
> > +typedef void (*SwsRGBFunc)(const uint8_t *src, uint8_t *dst, long src_size);
> > +
> > +SwsRGBFunc sws_hires_getRGBFunc(SwsContext *c);
> > +SwsFunc sws_hires_gray16rgbConv(SwsContext *c);
> > +
> > +#endif /* SWSCALE_HIRES_H */
> 
> I think we dont need a new header for this
> 
> 
> > Index: swscale_template.c
> > ===================================================================
> > --- swscale_template.c	(revision 29274)
> > +++ swscale_template.c	(working copy)
> > @@ -2130,6 +2130,90 @@
> >      }
> >  }
> >  
> > +static inline void RENAME(rgb48beToY)(uint8_t *dst, const uint8_t *src, int width)
> > +{
> > +    int i;
> > +    for (i = 0; i < width; i++) {
> > +        int r = src[i*6+0];
> > +        int g = src[i*6+2];
> > +        int b = src[i*6+4];
> > +
> > +        dst[i] = ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT);
> > +    }
> > +}
> > +
> 
> > +static inline void RENAME(rgb48beToUV)(uint8_t *dstU, uint8_t *dstV,
> > +                                       uint8_t *src1, uint8_t *src2, int width)
> > +{
> > +    int i;
> > +    assert(src1==src2);
> > +    for (i = 0; i < width; i++) {
> > +        int r = src1[12*i + 0];
> > +        int g = src1[12*i + 2];
> > +        int b = src1[12*i + 4];
> > +
> > +        dstU[i] = ((RU*r + GU*g + BU*b) >> RGB2YUV_SHIFT) + 128;
> > +        dstV[i] = ((RV*r + GV*g + BV*b) >> RGB2YUV_SHIFT) + 128;
> > +    }
> > +}
> [...]
> > +
> > +static inline void RENAME(rgb48leToUV)(uint8_t *dstU, uint8_t *dstV,
> > +                                       uint8_t *src1, uint8_t *src2, int width)
> > +{
> > +    int i;
> > +    assert(src1==src2);
> > +    for (i = 0; i < width; i++) {
> > +        int r = src1[12*i + 1];
> > +        int g = src1[12*i + 3];
> > +        int b = src1[12*i + 5];
> > +
> > +        dstU[i] = ((RU*r + GU*g + BU*b) >> RGB2YUV_SHIFT) + 128;
> > +        dstV[i] = ((RV*r + GV*g + BV*b) >> RGB2YUV_SHIFT) + 128;
> > +    }
> > +}
> 
> we dont need a seperate function to add +1 to one of its arguments

got rid of that
 
> [...]
> -- 
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 48bpp.patch
Type: text/x-diff
Size: 19156 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090515/d3e3a825/attachment.patch>



More information about the ffmpeg-devel mailing list