[FFmpeg-devel] [PATCH] yuv8->yuv16 vertical scaler.
Michael Niedermayer
michaelni
Thu Aug 13 18:10:10 CEST 2009
On Thu, Aug 13, 2009 at 11:22:08AM -0300, Ramiro Polla wrote:
> On Wed, Aug 12, 2009 at 10:43 PM, Michael Niedermayer<michaelni at gmx.at> wrote:
> > On Wed, Aug 12, 2009 at 09:38:38PM -0300, Ramiro Polla wrote:
> >> $subj
> >
> >> ?swscale.c ? ? ? ? ?| ? 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >> ?swscale_template.c | ? 20 ++++++++++++++++
> >> ?2 files changed, 86 insertions(+)
> >> e97c6ae96069d3e1b576d547dd8674b278b94efd ?0002-yuv8-yuv16-vertical-scaler.patch
> >> From 7bb251a34a9121dcdbb522b8446a3cd6aec4a2b3 Mon Sep 17 00:00:00 2001
> >> From: Ramiro Polla <ramiro.polla at gmail.com>
> >> Date: Wed, 12 Aug 2009 20:10:05 -0300
> >> Subject: [PATCH] yuv8->yuv16 vertical scaler.
> >>
> >> ---
> >> ?swscale.c ? ? ? ? ?| ? 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >> ?swscale_template.c | ? 20 +++++++++++++++
> >> ?2 files changed, 86 insertions(+), 0 deletions(-)
> >>
> >> diff --git a/swscale.c b/swscale.c
> >> index 6e0535c..1880bbf 100644
> >> --- a/swscale.c
> >> +++ b/swscale.c
> >> @@ -474,6 +474,72 @@ const char *sws_format_name(enum PixelFormat format)
> >> ? ? ?}
> >> ?}
> >>
> >> +static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW,
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? enum PixelFormat dstFormat)
> >> +{
> >> + ? ?//FIXME Optimize (just quickly written not optimized..)
> >> + ? ?int i;
> >> + ? ?for (i=0; i<dstW; i++)
> >> + ? ?{
> >> + ? ? ? ?int val=1<<18;
> >> + ? ? ? ?int j;
> >> + ? ? ? ?for (j=0; j<lumFilterSize; j++)
> >> + ? ? ? ? ? ?val += lumSrc[j][i] * lumFilter[j];
> >> +
> >> + ? ? ? ?if (isBE(dstFormat)) {
> >> + ? ? ? ? ? ?dest[2*i+0]= av_clip_uint8(val>>19);
> >> + ? ? ? ? ? ?dest[2*i+1]= 0;
> >> + ? ? ? ?} else {
> >> + ? ? ? ? ? ?dest[2*i+0]= 0;
> >> + ? ? ? ? ? ?dest[2*i+1]= av_clip_uint8(val>>19);
> >> + ? ? ? ?}
> >
> > its
> >
> > dest[2*i+0]=
> > dest[2*i+1]= av_clip_uint8(val>>19);
> >
> > white-> white 0xFF -> 0xFFFF
> >
> > but you should not throw bits away thus it really should be writing 16bit
>
> Hmm, that felt stupid.
>
> > also this touches at the "how to handle non native endian formats" issue,
> > checking in the inner loop instead of a seperate bswap loop may or may not
> > be wise ...
>
> Attached patch templates the code for BE and LE. It needs another
> patch for common.h which is also attached.
>
> Ramiro Polla
> common.h | 11 +++++++++++
> 1 file changed, 11 insertions(+)
> 1aecd7fd4cd19aabe79e0603cac34b09cb8fd286 av_clip_uint16.diff
> diff --git a/libavutil/common.h b/libavutil/common.h
> index 47666ab..9fac1c0 100644
> --- a/libavutil/common.h
> +++ b/libavutil/common.h
ok
[...]
> swscale.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> swscale_template.c | 20 ++++++++++
> 2 files changed, 121 insertions(+)
> b74deb46c8fb7701b24a0624654e5acae91280db yuv8_yuv16.diff
> diff --git a/swscale.c b/swscale.c
> index 2adf393..0576dde 100644
> --- a/swscale.c
> +++ b/swscale.c
> @@ -74,6 +74,7 @@ untested special converters
> #include "swscale.h"
> #include "swscale_internal.h"
> #include "rgb2rgb.h"
> +#include "libavutil/intreadwrite.h"
> #include "libavutil/x86_cpu.h"
> #include "libavutil/bswap.h"
>
> @@ -474,6 +475,106 @@ const char *sws_format_name(enum PixelFormat format)
> }
> }
>
> +static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
> + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
> + const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
> + int big_endian)
> +{
> + //FIXME Optimize (just quickly written not optimized..)
> + int i;
> + for (i=0; i<dstW; i++)
> + {
> + int val=1<<10;
> + int j;
> + for (j=0; j<lumFilterSize; j++)
> + val += lumSrc[j][i] * lumFilter[j];
> +
> + if (big_endian) {
> + AV_WB16(&dest[i], av_clip_uint16(val>>11));
> + } else {
> + AV_WL16(&dest[i], av_clip_uint16(val>>11));
> + }
> + }
> +
> + if (uDest)
> + for (i=0; i<chrDstW; i++)
> + {
> + int u=1<<10;
> + int v=1<<10;
> + int j;
> + for (j=0; j<chrFilterSize; j++)
> + {
> + u += chrSrc[j][i] * chrFilter[j];
> + v += chrSrc[j][i + VOFW] * chrFilter[j];
> + }
> +
> + if (big_endian) {
> + AV_WB16(&uDest[i], av_clip_uint16(u>>11));
> + AV_WB16(&vDest[i], av_clip_uint16(v>>11));
> + } else {
> + AV_WL16(&uDest[i], av_clip_uint16(u>>11));
> + AV_WL16(&vDest[i], av_clip_uint16(v>>11));
> + }
> + }
> +
> + if (CONFIG_SWSCALE_ALPHA && aDest)
> + for (i=0; i<dstW; i++){
> + int val=1<<10;
> + int j;
> + for (j=0; j<lumFilterSize; j++)
> + val += alpSrc[j][i] * lumFilter[j];
> +
> + if (big_endian) {
> + AV_WB16(&aDest[i], av_clip_uint16(val>>11));
> + } else {
> + AV_WL16(&aDest[i], av_clip_uint16(val>>11));
> + }
> + }
> +
> +}
> +
> +static inline void yuv2yuvX16BEinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
> + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
> + const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW)
> +{
> + yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
> + chrFilter, chrSrc, chrFilterSize,
> + alpSrc,
> + dest, uDest, vDest, aDest,
> + dstW, chrDstW, 1);
> +}
> +
> +static inline void yuv2yuvX16LEinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
> + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
> + const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW)
> +{
> + yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
> + chrFilter, chrSrc, chrFilterSize,
> + alpSrc,
> + dest, uDest, vDest, aDest,
> + dstW, chrDstW, 0);
> +}
> +
> +static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
> + const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
> + const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
> + enum PixelFormat dstFormat)
> +{
> + if (isBE(dstFormat))
> + yuv2yuvX16BEinC(lumFilter, lumSrc, lumFilterSize,
> + chrFilter, chrSrc, chrFilterSize,
> + alpSrc,
> + dest, uDest, vDest, aDest,
> + dstW, chrDstW);
> + else
> + yuv2yuvX16LEinC(lumFilter, lumSrc, lumFilterSize,
> + chrFilter, chrSrc, chrFilterSize,
> + alpSrc,
> + dest, uDest, vDest, aDest,
> + dstW, chrDstW);
> +
> +}
isnt there one layer (yuv2yuvX16BEinC/yuv2yuvX16LEinC) too much?
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
The greatest way to live with honor in this world is to be what we pretend
to be. -- Socrates
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090813/36b7c92c/attachment.pgp>
More information about the ffmpeg-devel
mailing list