[FFmpeg-devel] [PATCH] Yet another stab at RGB48 support
Michael Niedermayer
michaelni
Wed May 13 01:59:19 CEST 2009
On Sun, May 10, 2009 at 03:21:23PM +0300, Kostya wrote:
> On Sat, May 09, 2009 at 07:31:20PM +0200, Diego Biurrun wrote:
> > On Sat, May 09, 2009 at 02:28:18PM +0300, Kostya wrote:
> > > $subj
> >
> > Changelog update is missing.
> >
> > $nits below
> >
> [...]
> reformatted, splitted some long lines.
> Since this is diff against libswscale, no Changelog entry (but I
> remember about it).
[...]
> +static int rgb48togray16_template(SwsContext *c, uint8_t *src[],
> + int srcStride[], int srcSliceY,
> + int srcSliceH, uint8_t *dst[],
> + int dstStride[], const int srcLE,
> + const int dstLE) {
> + int length = c->srcW,
> + y = srcSliceY,
> + height = srcSliceH,
> + stride_s2 = srcStride[0] / 2,
> + stride_d2 = dstStride[0] / 2,
> + i;
> + uint16_t *s = (uint16_t *) src[0], *d = (uint16_t *) dst[0] + stride_d2 * y;
> +
> + for (i = 0; i < height; i++) {
> + if (!srcLE && !dstLE) rgb48togray16row_template(s, d, length, 0, 0);
> + else if (!srcLE && dstLE) rgb48togray16row_template(s, d, length, 0, 1);
> + else if ( srcLE && !dstLE) rgb48togray16row_template(s, d, length, 1, 0);
> + else rgb48togray16row_template(s, d, length, 1, 1);
> + s += stride_s2;
> + d += stride_d2;
> + }
> + return height;
> +}
> +
> +#define FUNC_RGB48TOGRAY16(name, sLE, dLE) \
> + static int name(SwsContext *c, uint8_t *s[], int sS[], int sSY, int sSH, \
> + uint8_t *d[], int dS[]) { \
> + return rgb48togray16_template(c, s, sS, sSY, sSH, d, dS, sLE, dLE); \
> + }
> +
> +FUNC_RGB48TOGRAY16( rgb48letogray16le, 1, 1)
> +FUNC_RGB48TOGRAY16( rgb48betogray16le, 0, 1)
> +FUNC_RGB48TOGRAY16( rgb48letogray16be, 1, 0)
> +FUNC_RGB48TOGRAY16( rgb48betogray16be, 0, 0)
bloat, a single function does the trick too in this case
[...]
> +static inline void rgb1516to48(const uint8_t *src, uint8_t *d, long src_size,
> + const int be, const int g6, const int bgr)
> +{
> + const uint16_t *s = (const uint16_t*) src, *end = s + src_size/2;
unreadable
[...]
> +static void rgb48to48(const uint8_t *src, uint8_t *dst, long src_size)
> +{
> + long i = 23 - src_size;
> + uint8_t *d = dst - i;
> + const uint8_t *s = src - i;
> +#if HAVE_MMX
> + __asm__ volatile(
> + "test %0, %0 \n\t"
> + "jns 2f \n\t"
> + PREFETCH" (%1, %0) \n\t"
> + ASMALIGN(4)
> + "1: \n\t"
> + PREFETCH" 32(%1, %0) \n\t"
> + "movq (%1, %0), %%mm0 \n\t"
> + "movq 8(%1, %0), %%mm1 \n\t"
> + "movq 16(%1, %0), %%mm2 \n\t"
> + "movq %%mm0, %%mm3 \n\t"
> + "movq %%mm1, %%mm4 \n\t"
> + "movq %%mm2, %%mm5 \n\t"
> + "psllw $8, %%mm0 \n\t"
> + "psllw $8, %%mm1 \n\t"
> + "psllw $8, %%mm2 \n\t"
> + "psrlw $8, %%mm3 \n\t"
> + "psrlw $8, %%mm4 \n\t"
> + "psrlw $8, %%mm5 \n\t"
> + "por %%mm3, %%mm0 \n\t"
> + "por %%mm4, %%mm1 \n\t"
> + "por %%mm5, %%mm2 \n\t"
> + "movq %%mm0, (%2, %0) \n\t"
> + "movq %%mm1, 8(%2, %0) \n\t"
> + "movq %%mm2, 16(%2, %0) \n\t"
> + "add $24, %0 \n\t"
> + "js 1b \n\t"
> + "2: \n\t"
> + SFENCE" \n\t"
> + EMMS" \n\t"
> + : "+&r"(i)
> + : "r" (s), "r" (d)
> + : "memory");
> +#endif
> + for (; i < 21; i += 4) {
> + unsigned int x = *(uint32_t*)&s[i];
> + *(uint32_t*)&d[i] = ((x>>8) & 0x00FF00FF) + ((x<<8) & 0xFF00FF00);
> + }
> + if (i < 23) {
> + d[i] = s[i+1];
> + d[i+1] = s[i];
> + }
is this some duplicate of a gray le <-> be function?
> +}
> +
> +static void rgb48leto24(const uint8_t *s, uint8_t *dst, long src_size)
> +{
> + uint8_t *d = dst;
> + const uint8_t *end = s + src_size;
> +#if HAVE_MMX
> + __asm__ volatile(
> + PREFETCH" (%1) \n\t"
> + "jmp 2f \n\t"
> + ASMALIGN(4)
> + "1: \n\t"
> + PREFETCH" 32(%1) \n\t"
> + "movq (%1), %%mm0 \n\t"
> + "movq 8(%1), %%mm1 \n\t"
> + "movq 16(%1), %%mm2 \n\t"
> + "movq 24(%1), %%mm3 \n\t"
> + "movq 32(%1), %%mm4 \n\t"
> + "movq 40(%1), %%mm5 \n\t"
> + "psrlw $8, %%mm0 \n\t"
> + "psrlw $8, %%mm1 \n\t"
> + "psrlw $8, %%mm2 \n\t"
> + "psrlw $8, %%mm3 \n\t"
> + "psrlw $8, %%mm4 \n\t"
> + "psrlw $8, %%mm5 \n\t"
> + "packuswb %%mm1, %%mm0 \n\t"
> + "packuswb %%mm3, %%mm2 \n\t"
> + "packuswb %%mm5, %%mm4 \n\t"
> + "movq %%mm0, (%0) \n\t"
> + "movq %%mm2, 8(%0) \n\t"
> + "movq %%mm4, 16(%0) \n\t"
> + "add $24, %0 \n\t"
> + "add $48, %1 \n\t"
> + "2: \n\t"
> + "cmp %1, %2 \n\t"
> + "ja 1b \n\t"
> + SFENCE" \n\t"
> + EMMS" \n\t"
> + : "+&r"(d), "+&r"(s)
> + : "r" (end-47)
> + : "memory");
> +#endif
> + for (; s < end; s += 2, d++)
> + *d = *(s+1);
> +}
same?
> +
> +static void rgb48beto24(const uint8_t *s, uint8_t *dst, long src_size)
> +{
> + uint8_t *d = dst;
> + const uint8_t *end = s + src_size;
> +#if HAVE_MMX
> + __asm__ volatile(
> + PREFETCH" (%1) \n\t"
> + "pcmpeqw %%mm7, %%mm7 \n\t"
> + "psrlw $8, %%mm7 \n\t"
> + "movq %%mm7, %%mm6 \n\t"
> + "jmp 2f \n\t"
> + ASMALIGN(4)
> + "1: \n\t"
> + PREFETCH" 32(%1) \n\t"
> + "movq (%1), %%mm0 \n\t"
> + "movq 8(%1), %%mm1 \n\t"
> + "movq 16(%1), %%mm2 \n\t"
> + "movq 24(%1), %%mm3 \n\t"
> + "movq 32(%1), %%mm4 \n\t"
> + "movq 40(%1), %%mm5 \n\t"
> + "pand %%mm6, %%mm0 \n\t"
> + "pand %%mm7, %%mm1 \n\t"
> + "pand %%mm6, %%mm2 \n\t"
> + "pand %%mm7, %%mm3 \n\t"
> + "pand %%mm6, %%mm4 \n\t"
> + "pand %%mm7, %%mm5 \n\t"
> + "packuswb %%mm1, %%mm0 \n\t"
> + "packuswb %%mm3, %%mm2 \n\t"
> + "packuswb %%mm5, %%mm4 \n\t"
> + "movq %%mm0, (%0) \n\t"
> + "movq %%mm2, 8(%0) \n\t"
> + "movq %%mm4, 16(%0) \n\t"
> + "add $24, %0 \n\t"
> + "add $48, %1 \n\t"
> + "2: \n\t"
> + "cmp %1, %2 \n\t"
> + "ja 1b \n\t"
> + SFENCE" \n\t"
> + EMMS" \n\t"
> + : "+&r"(d), "+&r"(s)
> + : "r" (end-47)
> + : "memory");
> +#endif
> + for (; s < end; s += 2, d++)
> + *d = *s;
> +}
> +
so seems this ...
> +static void rgb24to48(const uint8_t *s, uint8_t *dst, long src_size)
> +{
> + uint8_t *d = dst;
> + const uint8_t *end = s + src_size;
> +#if HAVE_MMX
> + __asm__ volatile(
> + PREFETCH" (%1) \n\t"
> + "jmp 2f \n\t"
> + ASMALIGN(4)
> + "1: \n\t"
> + PREFETCH" 32(%1) \n\t"
> + "movd (%1), %%mm0 \n\t"
> + "movd 4(%1), %%mm1 \n\t"
> + "movd 8(%1), %%mm2 \n\t"
> + "movd 12(%1), %%mm3 \n\t"
> + "movd 16(%1), %%mm4 \n\t"
> + "movd 20(%1), %%mm5 \n\t"
> + "punpcklbw %%mm0, %%mm0 \n\t"
> + "punpcklbw %%mm1, %%mm1 \n\t"
> + "punpcklbw %%mm2, %%mm2 \n\t"
> + "punpcklbw %%mm3, %%mm3 \n\t"
> + "punpcklbw %%mm4, %%mm4 \n\t"
> + "punpcklbw %%mm5, %%mm5 \n\t"
> + MOVNTQ" %%mm0, (%0) \n\t"
> + MOVNTQ" %%mm1, 8(%0) \n\t"
> + MOVNTQ" %%mm2, 16(%0) \n\t"
> + MOVNTQ" %%mm3, 24(%0) \n\t"
> + MOVNTQ" %%mm4, 32(%0) \n\t"
> + MOVNTQ" %%mm5, 40(%0) \n\t"
> + "add $48, %0 \n\t"
> + "add $24, %1 \n\t"
> + "2: \n\t"
> + "cmp %1, %2 \n\t"
> + "ja 1b \n\t"
> + SFENCE" \n\t"
> + EMMS" \n\t"
> + : "+&r"(d), "+&r"(s)
> + : "r" (end-23)
> + : "memory");
> +#endif
> + for (; s < end; s++, d += 2) {
> + int x = *s;
> + *(uint16_t*)d = (x << 8) + x;
> + }
> +}
> +
and this
> +SwsRGBFunc sws_hires_getRGBFunc(SwsContext *c)
> +{
> + const enum PixelFormat srcFormat = c->srcFormat;
> + const enum PixelFormat dstFormat = c->dstFormat;
> + const int srcCbe = srcFormat==PIX_FMT_RGB48BE; /* components big-endian */
> + const int dstCbe = dstFormat==PIX_FMT_RGB48BE;
> + const int srcBpp = (fmt_depth(srcFormat) + 7) >> 3;
> + const int dstBpp = (fmt_depth(dstFormat) + 7) >> 3;
> + const int srcId = (fmt_depth(srcFormat) >> 2) - (srcBpp > 4); /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8, 48:11 */
> + const int dstId = (fmt_depth(dstFormat) >> 2) - (dstBpp > 4);
> + SwsRGBFunc conv = NULL;
> +
> + if ( (isBGR(srcFormat) && isBGR(dstFormat))
> + || (isRGB(srcFormat) && isRGB(dstFormat))) {
> + switch(srcId | (dstId<<4) | (srcCbe<<8) | (dstCbe<<12)){
> + case 0x013B: conv = rgb48beto15; break;
> + case 0x003B: conv = rgb48leto15; break;
> + case 0x014B: conv = rgb48beto16; break;
> + case 0x004B: conv = rgb48leto16; break;
> + case 0x016B: conv = rgb48beto24; break;
> + case 0x006B: conv = rgb48leto24; break;
> + case 0x018B: conv = rgb48beto32; break;
> + case 0x008B: conv = rgb48leto32; break;
> + case 0x10B3: conv = rgb15to48be; break;
> + case 0x00B3: conv = rgb15to48le; break;
> + case 0x10B4: conv = rgb16to48be; break;
> + case 0x00B4: conv = rgb16to48le; break;
> + case 0x10B6:
> + case 0x00B6: conv = rgb24to48; break;
> + case 0x10B8:
> + case 0x00B8: conv = rgb32to48; break;
> + case 0x10BB:
> + case 0x01BB: conv = rgb48to48; break;
> + default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> + sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
> + }
> + } else if ( (isBGR(srcFormat) && isRGB(dstFormat))
> + || (isRGB(srcFormat) && isBGR(dstFormat))) {
> + switch(srcId | (dstId<<4) | (srcCbe<<8) | (dstCbe<<12)){
> + case 0x013B: conv = rgb48betobgr15; break;
> + case 0x003B: conv = rgb48letobgr15; break;
> + case 0x014B: conv = rgb48betobgr16; break;
> + case 0x004B: conv = rgb48letobgr16; break;
> + case 0x016B: conv = rgb48betobgr24; break;
> + case 0x006B: conv = rgb48letobgr24; break;
> + case 0x018B: conv = rgb48betobgr32; break;
> + case 0x008B: conv = rgb48letobgr32; break;
> + case 0x10B3: conv = bgr15torgb48be; break;
> + case 0x00B3: conv = bgr15torgb48le; break;
> + case 0x10B4: conv = bgr16torgb48be; break;
> + case 0x00B4: conv = bgr16torgb48le; break;
> + case 0x10B6:
> + case 0x00B6: conv = bgr24torgb48; break;
> + case 0x10B8:
> + case 0x00B8: conv = bgr32torgb48; break;
> + default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> + sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
> + }
> + } else {
> + av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
> + sws_format_name(srcFormat), sws_format_name(dstFormat));
> + }
> +
> + return conv;
> +}
code duplication
[...]
> Index: hires.h
> ===================================================================
> --- hires.h (revision 0)
> +++ hires.h (revision 0)
> @@ -0,0 +1,32 @@
> +/*
> + * colorspace conversion routines for depths >8 bits
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef SWSCALE_HIRES_H
> +#define SWSCALE_HIRES_H
> +
> +#include "swscale.h"
> +#include "swscale_internal.h"
> +
> +typedef void (*SwsRGBFunc)(const uint8_t *src, uint8_t *dst, long src_size);
> +
> +SwsRGBFunc sws_hires_getRGBFunc(SwsContext *c);
> +SwsFunc sws_hires_gray16rgbConv(SwsContext *c);
> +
> +#endif /* SWSCALE_HIRES_H */
I think we dont need a new header for this
> Index: swscale_template.c
> ===================================================================
> --- swscale_template.c (revision 29274)
> +++ swscale_template.c (working copy)
> @@ -2130,6 +2130,90 @@
> }
> }
>
> +static inline void RENAME(rgb48beToY)(uint8_t *dst, const uint8_t *src, int width)
> +{
> + int i;
> + for (i = 0; i < width; i++) {
> + int r = src[i*6+0];
> + int g = src[i*6+2];
> + int b = src[i*6+4];
> +
> + dst[i] = ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT);
> + }
> +}
> +
> +static inline void RENAME(rgb48beToUV)(uint8_t *dstU, uint8_t *dstV,
> + uint8_t *src1, uint8_t *src2, int width)
> +{
> + int i;
> + assert(src1==src2);
> + for (i = 0; i < width; i++) {
> + int r = src1[12*i + 0];
> + int g = src1[12*i + 2];
> + int b = src1[12*i + 4];
> +
> + dstU[i] = ((RU*r + GU*g + BU*b) >> RGB2YUV_SHIFT) + 128;
> + dstV[i] = ((RV*r + GV*g + BV*b) >> RGB2YUV_SHIFT) + 128;
> + }
> +}
[...]
> +
> +static inline void RENAME(rgb48leToUV)(uint8_t *dstU, uint8_t *dstV,
> + uint8_t *src1, uint8_t *src2, int width)
> +{
> + int i;
> + assert(src1==src2);
> + for (i = 0; i < width; i++) {
> + int r = src1[12*i + 1];
> + int g = src1[12*i + 3];
> + int b = src1[12*i + 5];
> +
> + dstU[i] = ((RU*r + GU*g + BU*b) >> RGB2YUV_SHIFT) + 128;
> + dstV[i] = ((RV*r + GV*g + BV*b) >> RGB2YUV_SHIFT) + 128;
> + }
> +}
we dont need a seperate function to add +1 to one of its arguments
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Republics decline into democracies and democracies degenerate into
despotisms. -- Aristotle
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090513/c874092c/attachment.pgp>
More information about the ffmpeg-devel
mailing list