>From 3856f6000cf7842a139e4a89501caf8fab4b4208 Mon Sep 17 00:00:00 2001 From: Keiji Costantini Date: Wed, 13 Aug 2008 17:41:47 +0200 Subject: [PATCH] Taken out swscale_C.c and required swscale_yuv.h from nodame branch - this is the playground --- swscale_C.c | 1032 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ swscale_yuv.h | 551 ++++++++++++++++++++++++++++++ 2 files changed, 1583 insertions(+), 0 deletions(-) create mode 100644 swscale_C.c create mode 100644 swscale_yuv.h diff --git a/swscale_C.c b/swscale_C.c new file mode 100644 index 0000000..6751f49 --- /dev/null +++ b/swscale_C.c @@ -0,0 +1,1032 @@ +/* + * Copyright (C) 2001-2003 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * the C code (not assembly, mmx, ...) of this file can be used + * under the LGPL license too + */ + +#include "swscale.h" +#include "swscale_internal.h" +#include "rgb2rgb.h" +#include "swscale_yuv.h" + +static inline void yuv2yuvX_C(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, + int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW) +{ +yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, + chrFilter, chrSrc, chrFilterSize, + dest, uDest, vDest, dstW, chrDstW); +} + +static inline void yuv2nv12X_C(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, + int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, + uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat) +{ +yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize, + chrFilter, chrSrc, chrFilterSize, + dest, uDest, dstW, chrDstW, dstFormat); +} + +static inline void yuv2yuv1_C(int16_t *lumSrc, int16_t *chrSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW) +{ + int i; + for (i=0; i>7; + + if (val&256){ + if (val<0) val=0; + else val=255; + } + + dest[i]= val; + } + + if (uDest) + for (i=0; i>7; + int v=chrSrc[i + VOFW]>>7; + + if ((u|v)&256){ + if (u<0) u=0; + else if (u>255) u=255; + if (v<0) v=0; + else if (v>255) v=255; + } + + uDest[i]= u; + vDest[i]= v; + } +} + + +/** + * vertical scale YV12 to RGB + */ +static inline void yuv2packedX_C(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, + int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, + uint8_t *dest, long dstW, long dstY) +{ + yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, + chrFilter, chrSrc, chrFilterSize, + dest, dstW, dstY); +} + +/** + * vertical bilinear scale YV12 to RGB + */ +static inline void yuv2packed2_C(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, + uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) +{ + int yalpha1=yalpha^4095; + int uvalpha1=uvalpha^4095; + int i; + +#if 0 //isn't used + if (flags&SWS_FULL_CHR_H_INT) + { + switch(dstFormat) + { + case PIX_FMT_BGR32: + if (dstFormat==PIX_FMT_RGB32) + { + int i; +#ifdef WORDS_BIGENDIAN + dest++; +#endif + for (i=0;i>19)]; + int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); + int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); + dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; + dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; + dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; + dest+= 4; + } + } + else if (dstFormat==PIX_FMT_BGR24) + { + int i; + for (i=0;i>19)]; + int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); + int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); + dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; + dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; + dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; + dest+= 3; + } + } + else if (dstFormat==PIX_FMT_BGR565) + { + int i; + for (i=0;i>19)]; + int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); + int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); + + ((uint16_t*)dest)[i] = + clip_table16b[(Y + yuvtab_40cf[U]) >>13] | + clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | + clip_table16r[(Y + yuvtab_3343[V]) >>13]; + } + } + else if (dstFormat==PIX_FMT_BGR555) + { + int i; + for (i=0;i>19)]; + int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); + int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); + + ((uint16_t*)dest)[i] = + clip_table15b[(Y + yuvtab_40cf[U]) >>13] | + clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | + clip_table15r[(Y + yuvtab_3343[V]) >>13]; + } + } + }//FULL_UV_IPOL + else + { +#endif // if 0 +YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C) +} + +/** + * YV12 to RGB without scaling or interpolating + */ +static inline void yuv2packed1_C(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, + uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y) +{ + const int yalpha1=0; + int i; + + uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 + const int yalpha= 4096; //FIXME ... + + if (flags&SWS_FULL_CHR_H_INT) + { + yuv2packed2_C(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y); + return; + } + + if (uvalpha < 2048) + { + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C) + }else{ + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C) + } +} + +//FIXME yuy2* can read upto 7 samples to much + +static inline void yuy2ToY_C(uint8_t *dst, uint8_t *src, long width) +{ + int i; + for (i=0; i>8)&0xFF; + int r= (((uint32_t*)src)[i]>>16)&0xFF; + + dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + } +} + +static inline void bgr32ToUV_C(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) +{ + int i; + assert(src1 == src2); + for (i=0; i>8; + const int r= l>>16; + + dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128; + dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128; + } +} + +static inline void bgr24ToY_C(uint8_t *dst, uint8_t *src, long width) +{ + int i; + for (i=0; i>RGB2YUV_SHIFT); + } +} + +static inline void bgr24ToUV_C(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) +{ + int i; + for (i=0; i>(RGB2YUV_SHIFT+1)) + 128; + dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128; + } + assert(src1 == src2); +} + +static inline void rgb16ToY_C(uint8_t *dst, uint8_t *src, int width) +{ + int i; + for (i=0; i>5)&0x3F; + int r= (d>>11)&0x1F; + + dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16; + } +} + +static inline void rgb16ToUV_C(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) +{ + int i; + assert(src1==src2); + for (i=0; i>5)&0x07C0F83F); + + int dh2= (dh>>11) + (dh<<21); + int d= dh2 + dl; + + int b= d&0x7F; + int r= (d>>11)&0x7F; + int g= d>>21; + dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+1-2)) + 128; + dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+1-2)) + 128; + } +} + +static inline void rgb15ToY_C(uint8_t *dst, uint8_t *src, int width) +{ + int i; + for (i=0; i>5)&0x1F; + int r= (d>>10)&0x1F; + + dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16; + } +} + +static inline void rgb15ToUV_C(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) +{ + int i; + assert(src1==src2); + for (i=0; i>5)&0x03E0F81F); + + int dh2= (dh>>11) + (dh<<21); + int d= dh2 + dl; + + int b= d&0x7F; + int r= (d>>10)&0x7F; + int g= d>>21; + dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1-3)) + 128; + dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1-3)) + 128; + } +} + + +static inline void rgb32ToY_C(uint8_t *dst, uint8_t *src, int width) +{ + int i; + for (i=0; i>8)&0xFF; + int b= (((uint32_t*)src)[i]>>16)&0xFF; + + dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + } +} + +static inline void rgb32ToUV_C(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) +{ + int i; + assert(src1==src2); + for (i=0; i>8; + const int b= l>>16; + + dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128; + dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128; + } +} + +static inline void rgb24ToY_C(uint8_t *dst, uint8_t *src, int width) +{ + int i; + for (i=0; i>RGB2YUV_SHIFT); + } +} + +static inline void rgb24ToUV_C(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) +{ + int i; + assert(src1==src2); + for (i=0; i>(RGB2YUV_SHIFT+1)) + 128; + dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128; + } +} + +static inline void bgr16ToY_C(uint8_t *dst, uint8_t *src, int width) +{ + int i; + for (i=0; i>5)&0x3F; + int b= (d>>11)&0x1F; + + dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16; + } +} + +static inline void bgr16ToUV_C(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) +{ + int i; + assert(src1 == src2); + for (i=0; i>16) + (d0<<16))&0x07E0F81F); + + int r= d&0x3F; + int b= (d>>11)&0x3F; + int g= d>>21; + dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+1-2)) + 128; + dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+1-2)) + 128; + } +} + +static inline void bgr15ToY_C(uint8_t *dst, uint8_t *src, int width) +{ + int i; + for (i=0; i>5)&0x1F; + int b= (d>>10)&0x1F; + + dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16; + } +} + +static inline void bgr15ToUV_C(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) +{ + int i; + assert(src1 == src2); + for (i=0; i>16) + (d0<<16))&0x03E07C1F); + + int r= d&0x3F; + int b= (d>>10)&0x3F; + int g= d>>21; + dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1-3)) + 128; + dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1-3)) + 128; + } +} + +static inline void palToY_C(uint8_t *dst, uint8_t *src, int width, uint32_t *pal) +{ + int i; + for (i=0; i>8; + dstV[i]= p>>16; + } +} + +// Bilinear / Bicubic scaling +static inline void hScale_C(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, + int16_t *filter, int16_t *filterPos, long filterSize) +{ + int i; + for (i=0; i>7, 0, (1<<15)-1); // the cubic equation does overflow ... + //dst[i] = val>>7; + } +} + +static inline uint8_t *convert_to_Y_C(int srcFormat, uint8_t *formatConvBuffer, + uint8_t *src, int srcW, uint8_t *pal) +{ + + if (srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE) + { + yuy2ToY_C(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_UYVY422 || srcFormat==PIX_FMT_GRAY16LE) + { + uyvyToY_C(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_RGB32) + { + bgr32ToY_C(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_BGR24) + { + bgr24ToY_C(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_BGR565) + { + bgr16ToY_C(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_BGR555) + { + bgr15ToY_C(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_BGR32) + { + rgb32ToY_C(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_RGB24) + { + rgb24ToY_C(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_RGB565) + { + rgb16ToY_C(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_RGB555) + { + rgb15ToY_C(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE) + { + palToY_C(formatConvBuffer, src, srcW, (uint32_t*)pal); + src= formatConvBuffer; + } + return src; +} + + // *** horizontal scale Y line to temp buffer +static inline void hyscale_C(uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc, + int flags, int16_t *hLumFilter, + int16_t *hLumFilterPos, int hLumFilterSize, + int srcFormat, uint8_t *formatConvBuffer, uint8_t *pal) +{ + src = convert_to_Y_C(srcFormat, formatConvBuffer, src, srcW, pal); + + if (!(flags&SWS_FAST_BILINEAR)) + { + hScale_C(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); + } + else // Fast Bilinear upscale / crap downscale + { + int i; + unsigned int xpos=0; + for (i=0;i>16; + register unsigned int xalpha=(xpos&0xFFFF)>>9; + dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; + xpos+=xInc; + } + } +} + +static inline uint8_t *convert_to_UV_C(int srcFormat, + uint8_t *formatConvBuffer, + uint8_t *src1, uint8_t *src2, + int srcW, uint32_t *pal) +{ + if (srcFormat==PIX_FMT_YUYV422) + { + yuy2ToUV_C(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_UYVY422) + { + uyvyToUV_C(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_RGB32) + { + bgr32ToUV_C(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_BGR24) + { + bgr24ToUV_C(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_BGR565) + { + bgr16ToUV_C(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_BGR555) + { + bgr15ToUV_C(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_BGR32) + { + rgb32ToUV_C(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_RGB24) + { + rgb24ToUV_C(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_RGB565) + { + rgb16ToUV_C(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_RGB555) + { + rgb15ToUV_C(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + else if (srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE) + { + palToUV_C(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, (uint32_t*)pal); + src1= formatConvBuffer; + src2= formatConvBuffer+VOFW; + } + return src1; +} + +inline static void hcscale_C(uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2, + int srcW, int xInc, int flags, int16_t *hChrFilter, + int16_t *hChrFilterPos, int hChrFilterSize, + int srcFormat, uint8_t *formatConvBuffer, uint8_t *pal) +{ + src1 = convert_to_UV_C(srcFormat, formatConvBuffer, src1, src2, srcW, (uint32_t*)pal); + src2 = formatConvBuffer+VOFW; + + if (!(flags&SWS_FAST_BILINEAR)) + { + hScale_C(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); + hScale_C(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); + } + else // Fast Bilinear upscale / crap downscale + { + int i; + unsigned int xpos=0; + for (i=0;i>16; + register unsigned int xalpha=(xpos&0xFFFF)>>9; + dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); + dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); + /* slower + dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; + dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; + */ + xpos+=xInc; + } + } +} + +int swScale_C(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + + /* load a few things into local vars to make the code more readable? and faster */ + const int srcW= c->srcW; + const int dstW= c->dstW; + const int dstH= c->dstH; + const int chrDstW= c->chrDstW; + const int chrSrcW= c->chrSrcW; + const int lumXInc= c->lumXInc; + const int chrXInc= c->chrXInc; + const int dstFormat= c->dstFormat; + const int srcFormat= c->srcFormat; + const int flags= c->flags; + int16_t *vLumFilterPos= c->vLumFilterPos; + int16_t *vChrFilterPos= c->vChrFilterPos; + int16_t *hLumFilterPos= c->hLumFilterPos; + int16_t *hChrFilterPos= c->hChrFilterPos; + int16_t *vLumFilter= c->vLumFilter; + int16_t *vChrFilter= c->vChrFilter; + int16_t *hLumFilter= c->hLumFilter; + int16_t *hChrFilter= c->hChrFilter; + const int vLumFilterSize= c->vLumFilterSize; + const int vChrFilterSize= c->vChrFilterSize; + const int hLumFilterSize= c->hLumFilterSize; + const int hChrFilterSize= c->hChrFilterSize; + int16_t **lumPixBuf= c->lumPixBuf; + int16_t **chrPixBuf= c->chrPixBuf; + const int vLumBufSize= c->vLumBufSize; + const int vChrBufSize= c->vChrBufSize; + uint8_t *formatConvBuffer= c->formatConvBuffer; + const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; + const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); + int lastDstY; + uint8_t *pal=NULL; + + /* vars whch will change and which we need to store back in the context */ + int dstY= c->dstY; + int lumBufIndex= c->lumBufIndex; + int chrBufIndex= c->chrBufIndex; + int lastInLumBuf= c->lastInLumBuf; + int lastInChrBuf= c->lastInChrBuf; + + if (isPacked(c->srcFormat)){ + pal= src[1]; + src[0]= + src[1]= + src[2]= src[0]; + srcStride[0]= + srcStride[1]= + srcStride[2]= srcStride[0]; + } + srcStride[1]<<= c->vChrDrop; + srcStride[2]<<= c->vChrDrop; + + //printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2], + // (int)dst[0], (int)dst[1], (int)dst[2]); + +#if 0 //self test FIXME move to a vfilter or something + { + static volatile int i=0; + i++; + if (srcFormat==PIX_FMT_YUV420P && i==1 && srcSliceH>= c->srcH) + selfTest(src, srcStride, c->srcW, c->srcH); + i--; + } +#endif + + //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2], + //dstStride[0],dstStride[1],dstStride[2]); + + if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0) + { + static int firstTime=1; //FIXME move this into the context perhaps + if (flags & SWS_PRINT_INFO && firstTime) + { + av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" + " ->cannot do aligned memory acesses anymore\n"); + firstTime=0; + } + } + + /* Note the user might start scaling the picture in the middle so this will not get executed + this is not really intended but works currently, so ppl might do it */ + if (srcSliceY ==0){ + lumBufIndex=0; + chrBufIndex=0; + dstY=0; + lastInLumBuf= -1; + lastInChrBuf= -1; + } + + lastDstY= dstY; + + for (;dstY < dstH; dstY++){ + unsigned char *dest =dst[0]+dstStride[0]*dstY; + const int chrDstY= dstY>>c->chrDstVSubSample; + unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; + unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; + + const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input + const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input + const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input + const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input + + //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n", + // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample); + //handle holes (FAST_BILINEAR & weird filters) + if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; + if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; + //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize); + assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1); + assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1); + + // Do we have enough lines in this slice to output the dstY line + if (lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample)) + { + //Do horizontal scaling + while(lastInLumBuf < lastLumSrcY) + { + uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; + lumBufIndex++; + //printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY); + assert(lumBufIndex < 2*vLumBufSize); + assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); + assert(lastInLumBuf + 1 - srcSliceY >= 0); + //printf("%d %d\n", lumBufIndex, vLumBufSize); + hyscale_C(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, + flags, hLumFilter, hLumFilterPos, hLumFilterSize, + c->srcFormat, formatConvBuffer, pal); + lastInLumBuf++; + } + while(lastInChrBuf < lastChrSrcY) + { + uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; + uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; + chrBufIndex++; + assert(chrBufIndex < 2*vChrBufSize); + assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)); + assert(lastInChrBuf + 1 - chrSrcSliceY >= 0); + //FIXME replace parameters through context struct (some at least) + + if (!(isGray(srcFormat) || isGray(dstFormat))) + hcscale_C(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, + flags, hChrFilter, hChrFilterPos, hChrFilterSize, + c->srcFormat, formatConvBuffer, pal); + lastInChrBuf++; + } + //wrap buf index around to stay inside the ring buffer + if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize; + if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize; + } + else // not enough lines left in this slice -> load the rest in the buffer + { + /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n", + firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY, + lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize, + vChrBufSize, vLumBufSize);*/ + + //Do horizontal scaling + while(lastInLumBuf+1 < srcSliceY + srcSliceH) + { + uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; + lumBufIndex++; + assert(lumBufIndex < 2*vLumBufSize); + assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); + assert(lastInLumBuf + 1 - srcSliceY >= 0); + hyscale_C(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, + flags, hLumFilter, hLumFilterPos, hLumFilterSize, + c->srcFormat, formatConvBuffer, pal); + lastInLumBuf++; + } + while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH)) + { + uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; + uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; + chrBufIndex++; + assert(chrBufIndex < 2*vChrBufSize); + assert(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH); + assert(lastInChrBuf + 1 - chrSrcSliceY >= 0); + + if (!(isGray(srcFormat) || isGray(dstFormat))) + hcscale_C(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, + flags, hChrFilter, hChrFilterPos, hChrFilterSize, + c->srcFormat, formatConvBuffer, pal); + lastInChrBuf++; + } + //wrap buf index around to stay inside the ring buffer + if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize; + if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize; + break; //we can't output a dstY line so let's try with the next slice + } + + if (dstY < dstH-2) + { + int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){ + const int chrSkipMask= (1<chrDstVSubSample)-1; + if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi + yuv2nv12X_C(c, + vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, uDest, dstW, chrDstW, dstFormat); + } + else if (isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like + { + const int chrSkipMask= (1<chrDstVSubSample)-1; + if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi + if (vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12 + { + int16_t *lumBuf = lumPixBuf[0]; + int16_t *chrBuf= chrPixBuf[0]; + yuv2yuv1_C(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW); + } + else //General YV12 + { + yuv2yuvX_C(c, + vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, uDest, vDest, dstW, chrDstW); + } + } + else + { + assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); + assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); + if (vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB + { + int chrAlpha= vChrFilter[2*dstY+1]; + yuv2packed1_C(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), + dest, dstW, chrAlpha, dstFormat, flags, dstY); + } + else if (vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB + { + int lumAlpha= vLumFilter[2*dstY+1]; + int chrAlpha= vChrFilter[2*dstY+1]; + yuv2packed2_C(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), + dest, dstW, lumAlpha, chrAlpha, dstY); + } + else //General RGB + { + yuv2packedX_C(c, + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, dstW, dstY); + } + } + } + else // hmm looks like we can't use MMX here without overwriting this array's tail + { + int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){ + const int chrSkipMask= (1<chrDstVSubSample)-1; + if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi + yuv2nv12XinC( + vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, uDest, dstW, chrDstW, dstFormat); + } + else if (isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 + { + const int chrSkipMask= (1<chrDstVSubSample)-1; + if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi + yuv2yuvXinC( + vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, + vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, uDest, vDest, dstW, chrDstW); + } + else + { + assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); + assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); + yuv2packedXinC(c, + vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, + vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, + dest, dstW, dstY); + } + } + } + /* store changed local vars back in the context */ + c->dstY= dstY; + c->lumBufIndex= lumBufIndex; + c->chrBufIndex= chrBufIndex; + c->lastInLumBuf= lastInLumBuf; + c->lastInChrBuf= lastInChrBuf; + + return dstY - lastDstY; +} diff --git a/swscale_yuv.h b/swscale_yuv.h new file mode 100644 index 0000000..cc36552 --- /dev/null +++ b/swscale_yuv.h @@ -0,0 +1,551 @@ +/* + * Copyright (C) 2001-2003 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef FFMPEG_SWSCALE_YUV_H +#define FFMPEG_SWSCALE_YUV_H + +#define isPacked(x) ( \ + (x)==PIX_FMT_PAL8 \ + || (x)==PIX_FMT_YUYV422 \ + || (x)==PIX_FMT_UYVY422 \ + || isRGB(x) \ + || isBGR(x) \ + ) + + +#define RGB2YUV_SHIFT 16 +#define BY ((int)( 0.098*(1<>19); + } + + if (uDest) + for (i=0; i>19); + vDest[i]= av_clip_uint8(v>>19); + } +} + +static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, + int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, + uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat) +{ + //FIXME Optimize (just quickly writen not opti..) + int i; + for (i=0; i>19); + } + + if (!uDest) + return; + + if (dstFormat == PIX_FMT_NV12) + for (i=0; i>19); + uDest[2*i+1]= av_clip_uint8(v>>19); + } + else + for (i=0; i>19); + uDest[2*i+1]= av_clip_uint8(u>>19); + } +} + +#define YSCALE_YUV_2_PACKEDX_C(type) \ + for (i=0; i<(dstW>>1); i++){\ + int j;\ + int Y1 = 1<<18;\ + int Y2 = 1<<18;\ + int U = 1<<18;\ + int V = 1<<18;\ + type av_unused *r, *b, *g;\ + const int i2= 2*i;\ + \ + for (j=0; j>=19;\ + Y2>>=19;\ + U >>=19;\ + V >>=19;\ + if ((Y1|Y2|U|V)&256)\ + {\ + if (Y1>255) Y1=255; \ + else if (Y1<0)Y1=0; \ + if (Y2>255) Y2=255; \ + else if (Y2<0)Y2=0; \ + if (U>255) U=255; \ + else if (U<0) U=0; \ + if (V>255) V=255; \ + else if (V<0) V=0; \ + } + +#define YSCALE_YUV_2_RGBX_C(type) \ + YSCALE_YUV_2_PACKEDX_C(type) \ + r = (type *)c->table_rV[V]; \ + g = (type *)(c->table_gU[U] + c->table_gV[V]); \ + b = (type *)c->table_bU[U]; \ + +#define YSCALE_YUV_2_PACKED2_C \ + for (i=0; i<(dstW>>1); i++){ \ + const int i2= 2*i; \ + int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \ + int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \ + int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19; \ + int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19; \ + +#define YSCALE_YUV_2_RGB2_C(type) \ + YSCALE_YUV_2_PACKED2_C\ + type *r, *b, *g;\ + r = (type *)c->table_rV[V];\ + g = (type *)(c->table_gU[U] + c->table_gV[V]);\ + b = (type *)c->table_bU[U];\ + +#define YSCALE_YUV_2_PACKED1_C \ + for (i=0; i<(dstW>>1); i++){\ + const int i2= 2*i;\ + int Y1= buf0[i2 ]>>7;\ + int Y2= buf0[i2+1]>>7;\ + int U= (uvbuf1[i ])>>7;\ + int V= (uvbuf1[i+VOFW])>>7;\ + +#define YSCALE_YUV_2_RGB1_C(type) \ + YSCALE_YUV_2_PACKED1_C\ + type *r, *b, *g;\ + r = (type *)c->table_rV[V];\ + g = (type *)(c->table_gU[U] + c->table_gV[V]);\ + b = (type *)c->table_bU[U];\ + +#define YSCALE_YUV_2_PACKED1B_C \ + for (i=0; i<(dstW>>1); i++){\ + const int i2= 2*i;\ + int Y1= buf0[i2 ]>>7;\ + int Y2= buf0[i2+1]>>7;\ + int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\ + int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\ + +#define YSCALE_YUV_2_RGB1B_C(type) \ + YSCALE_YUV_2_PACKED1B_C\ + type *r, *b, *g;\ + r = (type *)c->table_rV[V];\ + g = (type *)(c->table_gU[U] + c->table_gV[V]);\ + b = (type *)c->table_bU[U];\ + +#define YSCALE_YUV_2_ANYRGB_C(func, func2)\ + switch(c->dstFormat)\ + {\ + case PIX_FMT_RGB32:\ + case PIX_FMT_BGR32:\ + func(uint32_t)\ + ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\ + ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\ + } \ + break;\ + case PIX_FMT_RGB24:\ + func(uint8_t)\ + ((uint8_t*)dest)[0]= r[Y1];\ + ((uint8_t*)dest)[1]= g[Y1];\ + ((uint8_t*)dest)[2]= b[Y1];\ + ((uint8_t*)dest)[3]= r[Y2];\ + ((uint8_t*)dest)[4]= g[Y2];\ + ((uint8_t*)dest)[5]= b[Y2];\ + dest+=6;\ + }\ + break;\ + case PIX_FMT_BGR24:\ + func(uint8_t)\ + ((uint8_t*)dest)[0]= b[Y1];\ + ((uint8_t*)dest)[1]= g[Y1];\ + ((uint8_t*)dest)[2]= r[Y1];\ + ((uint8_t*)dest)[3]= b[Y2];\ + ((uint8_t*)dest)[4]= g[Y2];\ + ((uint8_t*)dest)[5]= r[Y2];\ + dest+=6;\ + }\ + break;\ + case PIX_FMT_RGB565:\ + case PIX_FMT_BGR565:\ + {\ + const int dr1= dither_2x2_8[y&1 ][0];\ + const int dg1= dither_2x2_4[y&1 ][0];\ + const int db1= dither_2x2_8[(y&1)^1][0];\ + const int dr2= dither_2x2_8[y&1 ][1];\ + const int dg2= dither_2x2_4[y&1 ][1];\ + const int db2= dither_2x2_8[(y&1)^1][1];\ + func(uint16_t)\ + ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\ + ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\ + }\ + }\ + break;\ + case PIX_FMT_RGB555:\ + case PIX_FMT_BGR555:\ + {\ + const int dr1= dither_2x2_8[y&1 ][0];\ + const int dg1= dither_2x2_8[y&1 ][1];\ + const int db1= dither_2x2_8[(y&1)^1][0];\ + const int dr2= dither_2x2_8[y&1 ][1];\ + const int dg2= dither_2x2_8[y&1 ][0];\ + const int db2= dither_2x2_8[(y&1)^1][1];\ + func(uint16_t)\ + ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\ + ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\ + }\ + }\ + break;\ + case PIX_FMT_RGB8:\ + case PIX_FMT_BGR8:\ + {\ + const uint8_t * const d64= dither_8x8_73[y&7];\ + const uint8_t * const d32= dither_8x8_32[y&7];\ + func(uint8_t)\ + ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\ + ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\ + }\ + }\ + break;\ + case PIX_FMT_RGB4:\ + case PIX_FMT_BGR4:\ + {\ + const uint8_t * const d64= dither_8x8_73 [y&7];\ + const uint8_t * const d128=dither_8x8_220[y&7];\ + func(uint8_t)\ + ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\ + + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\ + }\ + }\ + break;\ + case PIX_FMT_RGB4_BYTE:\ + case PIX_FMT_BGR4_BYTE:\ + {\ + const uint8_t * const d64= dither_8x8_73 [y&7];\ + const uint8_t * const d128=dither_8x8_220[y&7];\ + func(uint8_t)\ + ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\ + ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\ + }\ + }\ + break;\ + case PIX_FMT_MONOBLACK:\ + {\ + const uint8_t * const d128=dither_8x8_220[y&7];\ + uint8_t *g= c->table_gU[128] + c->table_gV[128];\ + for (i=0; i>19) + d128[0]];\ + acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\ + acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\ + acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\ + acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\ + acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\ + acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\ + acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\ + ((uint8_t*)dest)[0]= acc;\ + dest++;\ + }\ +\ +/*\ +((uint8_t*)dest)-= dstW>>4;\ +{\ + int acc=0;\ + int left=0;\ + static int top[1024];\ + static int last_new[1024][1024];\ + static int last_in3[1024][1024];\ + static int drift[1024][1024];\ + int topLeft=0;\ + int shift=0;\ + int count=0;\ + const uint8_t * const d128=dither_8x8_220[y&7];\ + int error_new=0;\ + int error_in3=0;\ + int f=0;\ + \ + for (i=dstW>>1; i>19);\ + int in2 = (76309 * (in - 16) + 32768) >> 16;\ + int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\ + int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\ + + (last_new[y][i] - in3)*f/256;\ + int new= old> 128 ? 255 : 0;\ +\ + error_new+= FFABS(last_new[y][i] - new);\ + error_in3+= FFABS(last_in3[y][i] - in3);\ + f= error_new - error_in3*4;\ + if (f<0) f=0;\ + if (f>256) f=256;\ +\ + topLeft= top[i];\ + left= top[i]= old - new;\ + last_new[y][i]= new;\ + last_in3[y][i]= in3;\ +\ + acc+= acc + (new&1);\ + if ((i&7)==6){\ + ((uint8_t*)dest)[0]= acc;\ + ((uint8_t*)dest)++;\ + }\ + }\ +}\ +*/\ + }\ + break;\ + case PIX_FMT_YUYV422:\ + func2\ + ((uint8_t*)dest)[2*i2+0]= Y1;\ + ((uint8_t*)dest)[2*i2+1]= U;\ + ((uint8_t*)dest)[2*i2+2]= Y2;\ + ((uint8_t*)dest)[2*i2+3]= V;\ + } \ + break;\ + case PIX_FMT_UYVY422:\ + func2\ + ((uint8_t*)dest)[2*i2+0]= U;\ + ((uint8_t*)dest)[2*i2+1]= Y1;\ + ((uint8_t*)dest)[2*i2+2]= V;\ + ((uint8_t*)dest)[2*i2+3]= Y2;\ + } \ + break;\ + }\ + + +static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, + int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, + uint8_t *dest, int dstW, int y) +{ + int i; + switch(c->dstFormat) + { + case PIX_FMT_BGR32: + case PIX_FMT_RGB32: + YSCALE_YUV_2_RGBX_C(uint32_t) + ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1]; + ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2]; + } + break; + case PIX_FMT_RGB24: + YSCALE_YUV_2_RGBX_C(uint8_t) + ((uint8_t*)dest)[0]= r[Y1]; + ((uint8_t*)dest)[1]= g[Y1]; + ((uint8_t*)dest)[2]= b[Y1]; + ((uint8_t*)dest)[3]= r[Y2]; + ((uint8_t*)dest)[4]= g[Y2]; + ((uint8_t*)dest)[5]= b[Y2]; + dest+=6; + } + break; + case PIX_FMT_BGR24: + YSCALE_YUV_2_RGBX_C(uint8_t) + ((uint8_t*)dest)[0]= b[Y1]; + ((uint8_t*)dest)[1]= g[Y1]; + ((uint8_t*)dest)[2]= r[Y1]; + ((uint8_t*)dest)[3]= b[Y2]; + ((uint8_t*)dest)[4]= g[Y2]; + ((uint8_t*)dest)[5]= r[Y2]; + dest+=6; + } + break; + case PIX_FMT_RGB565: + case PIX_FMT_BGR565: + { + const int dr1= dither_2x2_8[y&1 ][0]; + const int dg1= dither_2x2_4[y&1 ][0]; + const int db1= dither_2x2_8[(y&1)^1][0]; + const int dr2= dither_2x2_8[y&1 ][1]; + const int dg2= dither_2x2_4[y&1 ][1]; + const int db2= dither_2x2_8[(y&1)^1][1]; + YSCALE_YUV_2_RGBX_C(uint16_t) + ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1]; + ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2]; + } + } + break; + case PIX_FMT_RGB555: + case PIX_FMT_BGR555: + { + const int dr1= dither_2x2_8[y&1 ][0]; + const int dg1= dither_2x2_8[y&1 ][1]; + const int db1= dither_2x2_8[(y&1)^1][0]; + const int dr2= dither_2x2_8[y&1 ][1]; + const int dg2= dither_2x2_8[y&1 ][0]; + const int db2= dither_2x2_8[(y&1)^1][1]; + YSCALE_YUV_2_RGBX_C(uint16_t) + ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1]; + ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2]; + } + } + break; + case PIX_FMT_RGB8: + case PIX_FMT_BGR8: + { + const uint8_t * const d64= dither_8x8_73[y&7]; + const uint8_t * const d32= dither_8x8_32[y&7]; + YSCALE_YUV_2_RGBX_C(uint8_t) + ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]]; + ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]]; + } + } + break; + case PIX_FMT_RGB4: + case PIX_FMT_BGR4: + { + const uint8_t * const d64= dither_8x8_73 [y&7]; + const uint8_t * const d128=dither_8x8_220[y&7]; + YSCALE_YUV_2_RGBX_C(uint8_t) + ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]] + +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4); + } + } + break; + case PIX_FMT_RGB4_BYTE: + case PIX_FMT_BGR4_BYTE: + { + const uint8_t * const d64= dither_8x8_73 [y&7]; + const uint8_t * const d128=dither_8x8_220[y&7]; + YSCALE_YUV_2_RGBX_C(uint8_t) + ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]; + ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]]; + } + } + break; + case PIX_FMT_MONOBLACK: + { + const uint8_t * const d128=dither_8x8_220[y&7]; + uint8_t *g= c->table_gU[128] + c->table_gV[128]; + int acc=0; + for (i=0; i>=19; + Y2>>=19; + if ((Y1|Y2)&256) + { + if (Y1>255) Y1=255; + else if (Y1<0)Y1=0; + if (Y2>255) Y2=255; + else if (Y2<0)Y2=0; + } + acc+= acc + g[Y1+d128[(i+0)&7]]; + acc+= acc + g[Y2+d128[(i+1)&7]]; + if ((i&7)==6){ + ((uint8_t*)dest)[0]= acc; + dest++; + } + } + } + break; + case PIX_FMT_YUYV422: + YSCALE_YUV_2_PACKEDX_C(void) + ((uint8_t*)dest)[2*i2+0]= Y1; + ((uint8_t*)dest)[2*i2+1]= U; + ((uint8_t*)dest)[2*i2+2]= Y2; + ((uint8_t*)dest)[2*i2+3]= V; + } + break; + case PIX_FMT_UYVY422: + YSCALE_YUV_2_PACKEDX_C(void) + ((uint8_t*)dest)[2*i2+0]= U; + ((uint8_t*)dest)[2*i2+1]= Y1; + ((uint8_t*)dest)[2*i2+2]= V; + ((uint8_t*)dest)[2*i2+3]= Y2; + } + break; + } +} + +#endif /* FFMPEG_SWSCALE_YUV_H */ -- 1.5.6.4