[Mplayer-cvslog] CVS: main/postproc swscale.c,1.34,1.35
Michael Niedermayer
michael at mplayer.dev.hu
Sun Nov 4 03:28:49 CET 2001
Update of /cvsroot/mplayer/main/postproc
In directory mplayer:/var/tmp.root/cvs-serv6450/postproc
Modified Files:
swscale.c
Log Message:
faster bgr15/16
Index: swscale.c
===================================================================
RCS file: /cvsroot/mplayer/main/postproc/swscale.c,v
retrieving revision 1.34
retrieving revision 1.35
diff -u -r1.34 -r1.35
--- swscale.c 2 Nov 2001 19:21:02 -0000 1.34
+++ swscale.c 4 Nov 2001 02:28:23 -0000 1.35
@@ -61,6 +61,8 @@
static uint64_t __attribute__((aligned(8))) ubCoeff= 0x40cf40cf40cf40cfLL;
static uint64_t __attribute__((aligned(8))) vgCoeff= 0xE5E2E5E2E5E2E5E2LL;
static uint64_t __attribute__((aligned(8))) ugCoeff= 0xF36EF36EF36EF36ELL;
+static uint64_t __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL;
+static uint64_t __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL;
static uint64_t __attribute__((aligned(8))) w400= 0x0400040004000400LL;
static uint64_t __attribute__((aligned(8))) w80= 0x0080008000800080LL;
static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
@@ -368,37 +370,24 @@
" jb 1b \n\t"
#define WRITEBGR16 \
- "movq %%mm2, %%mm1 \n\t" /* B */\
- "movq %%mm4, %%mm3 \n\t" /* G */\
- "movq %%mm5, %%mm6 \n\t" /* R */\
+ "pand bF8, %%mm2 \n\t" /* B */\
+ "pand bFC, %%mm4 \n\t" /* G */\
+ "pand bF8, %%mm5 \n\t" /* R */\
+ "psrlq $3, %%mm2 \n\t"\
+\
+ "movq %%mm2, %%mm1 \n\t"\
+ "movq %%mm4, %%mm3 \n\t"\
+\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm5, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm4 \n\t"\
+ "punpckhbw %%mm5, %%mm1 \n\t"\
\
- "punpcklbw %%mm7, %%mm3 \n\t" /* 0G0G0G0G */\
- "punpcklbw %%mm7, %%mm2 \n\t" /* 0B0B0B0B */\
- "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R */\
-\
- "psrlw $3, %%mm2 \n\t"\
- "psllw $3, %%mm3 \n\t"\
- "psllw $8, %%mm5 \n\t"\
-\
- "pand g16Mask, %%mm3 \n\t"\
- "pand r16Mask, %%mm5 \n\t"\
+ "psllq $3, %%mm3 \n\t"\
+ "psllq $3, %%mm4 \n\t"\
\
"por %%mm3, %%mm2 \n\t"\
- "por %%mm5, %%mm2 \n\t"\
-\
- "punpckhbw %%mm7, %%mm4 \n\t" /* 0G0G0G0G */\
- "punpckhbw %%mm7, %%mm1 \n\t" /* 0B0B0B0B */\
- "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R */\
-\
- "psrlw $3, %%mm1 \n\t"\
- "psllw $3, %%mm4 \n\t"\
- "psllw $8, %%mm6 \n\t"\
-\
- "pand g16Mask, %%mm4 \n\t"\
- "pand r16Mask, %%mm6 \n\t"\
-\
"por %%mm4, %%mm1 \n\t"\
- "por %%mm6, %%mm1 \n\t"\
\
MOVNTQ(%%mm2, (%4, %%eax, 2))\
MOVNTQ(%%mm1, 8(%4, %%eax, 2))\
@@ -408,37 +397,25 @@
" jb 1b \n\t"
#define WRITEBGR15 \
- "movq %%mm2, %%mm1 \n\t" /* B */\
- "movq %%mm4, %%mm3 \n\t" /* G */\
- "movq %%mm5, %%mm6 \n\t" /* R */\
-\
- "punpcklbw %%mm7, %%mm3 \n\t" /* 0G0G0G0G */\
- "punpcklbw %%mm7, %%mm2 \n\t" /* 0B0B0B0B */\
- "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R */\
-\
- "psrlw $3, %%mm2 \n\t"\
- "psllw $2, %%mm3 \n\t"\
- "psllw $7, %%mm5 \n\t"\
+ "pand bF8, %%mm2 \n\t" /* B */\
+ "pand bF8, %%mm4 \n\t" /* G */\
+ "pand bF8, %%mm5 \n\t" /* R */\
+ "psrlq $3, %%mm2 \n\t"\
+ "psrlq $1, %%mm5 \n\t"\
+\
+ "movq %%mm2, %%mm1 \n\t"\
+ "movq %%mm4, %%mm3 \n\t"\
+\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm5, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm4 \n\t"\
+ "punpckhbw %%mm5, %%mm1 \n\t"\
\
- "pand g15Mask, %%mm3 \n\t"\
- "pand r15Mask, %%mm5 \n\t"\
+ "psllq $2, %%mm3 \n\t"\
+ "psllq $2, %%mm4 \n\t"\
\
"por %%mm3, %%mm2 \n\t"\
- "por %%mm5, %%mm2 \n\t"\
-\
- "punpckhbw %%mm7, %%mm4 \n\t" /* 0G0G0G0G */\
- "punpckhbw %%mm7, %%mm1 \n\t" /* 0B0B0B0B */\
- "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R */\
-\
- "psrlw $3, %%mm1 \n\t"\
- "psllw $2, %%mm4 \n\t"\
- "psllw $7, %%mm6 \n\t"\
-\
- "pand g15Mask, %%mm4 \n\t"\
- "pand r15Mask, %%mm6 \n\t"\
-\
"por %%mm4, %%mm1 \n\t"\
- "por %%mm6, %%mm1 \n\t"\
\
MOVNTQ(%%mm2, (%4, %%eax, 2))\
MOVNTQ(%%mm1, 8(%4, %%eax, 2))\
@@ -446,6 +423,7 @@
"addl $8, %%eax \n\t"\
"cmpl %5, %%eax \n\t"\
" jb 1b \n\t"
+
// FIXME find a faster way to shuffle it to BGR24
#define WRITEBGR24 \
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
More information about the MPlayer-cvslog
mailing list