[Mplayer-cvslog] CVS: main/libavcodec/i386 dsputil_mmx.c,1.2,1.3
Nick Kurshev
nick at mplayer.dev.hu
Tue Jul 17 11:08:04 CEST 2001
Update of /cvsroot/mplayer/main/libavcodec/i386
In directory mplayer:/var/tmp.root/cvs-serv23114/main/libavcodec/i386
Modified Files:
dsputil_mmx.c
Log Message:
Minor improvements and 3dNow! - MMX2 support
Index: dsputil_mmx.c
===================================================================
RCS file: /cvsroot/mplayer/main/libavcodec/i386/dsputil_mmx.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- dsputil_mmx.c 16 Jul 2001 09:16:20 -0000 1.2
+++ dsputil_mmx.c 17 Jul 2001 09:07:57 -0000 1.3
@@ -20,6 +20,8 @@
*/
/* pixel operations */
+static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 };
+static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 };
/*
cropTable[0..255] = idx;
@@ -31,7 +33,7 @@
const DCTELEM *p;
UINT8 *pix;
int i;
-
+
/* read the pixels */
p = block;
pix = pixels;
@@ -68,7 +70,7 @@
const DCTELEM *p;
UINT8 *pix;
int i;
-
+
/* read the pixels */
p = block;
pix = pixels;
@@ -104,21 +106,724 @@
emms();
}
-static void put_pixels_long(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+static void put_pixels(UINT8 *block, const UINT8 *pixels, int line_size, int h)
{
- UINT32 *p;
- const UINT32 *pix;
- p = (UINT32*)block;
- pix = (UINT32*)pixels;
- do {
+ int dh, hh;
+ UINT8 *p;
+ const UINT8 *pix;
+ p = block;
+ pix = pixels;
+ hh=h>>2;
+ dh=h%4;
+ while(hh--) {
__asm __volatile(
+ "movq %4, %%mm0\n\t"
+ "movq %5, %%mm1\n\t"
+ "movq %6, %%mm2\n\t"
+ "movq %7, %%mm3\n\t"
+ "movq %%mm0, %0\n\t"
+ "movq %%mm1, %1\n\t"
+ "movq %%mm2, %2\n\t"
+ "movq %%mm3, %3\n\t"
+ :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3))
+ :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3))
+ :"memory");
+ pix = pix + line_size*4;
+ p = p + line_size*4;
+ }
+ while(dh--) {
+ __asm __volatile(
"movq %1, %%mm0\n\t"
"movq %%mm0, %0\n\t"
- :"=m"(p[0])
- :"m"(pix[0])
+ :"=m"(*p)
+ :"m"(*pix)
:"memory");
- pix = (UINT32*) ((char*)pix + line_size);
- p = (UINT32*) ((char*)p + line_size);
- } while (--h);
+ pix = pix + line_size;
+ p = p + line_size;
+ }
emms();
}
+
+
+#ifdef HAVE_3DNOW
+/* for Athlons PAVGUSB is prefered */
+#define PAVGB "pavgusb"
+#else
+/* Introduced only in MMX2 set */
+#define PAVGB "pavgb"
+#endif
+
+#if defined ( HAVE_3DNOW ) || defined ( HAVE_MMX2 )
+static void put_pixels_x2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ int dh, hh;
+ UINT8 *p;
+ const UINT8 *pix;
+ p = block;
+ pix = pixels;
+ hh=h>>2;
+ dh=h%4;
+ while(hh--) {
+ __asm __volatile(
+ "movq %4, %%mm0\n\t"
+ "movq 1%4, %%mm1\n\t"
+ "movq %5, %%mm2\n\t"
+ "movq 1%5, %%mm3\n\t"
+ "movq %6, %%mm4\n\t"
+ "movq 1%6, %%mm5\n\t"
+ "movq %7, %%mm6\n\t"
+ "movq 1%7, %%mm7\n\t"
+ PAVGB" %%mm1, %%mm0\n\t"
+ PAVGB" %%mm3, %%mm2\n\t"
+ PAVGB" %%mm5, %%mm4\n\t"
+ PAVGB" %%mm7, %%mm6\n\t"
+ "movq %%mm0, %0\n\t"
+ "movq %%mm2, %1\n\t"
+ "movq %%mm4, %2\n\t"
+ "movq %%mm6, %3\n\t"
+ :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3))
+ :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3))
+ :"memory");
+ pix += line_size*4; p += line_size*4;
+ }
+ while(dh--) {
+ __asm __volatile(
+ "movq %1, %%mm0\n\t"
+ "movq 1%1, %%mm1\n\t"
+ PAVGB" %%mm1, %%mm0\n\t"
+ "movq %%mm0, %0\n\t"
+ :"=m"(*p)
+ :"m"(*pix)
+ :"memory");
+ pix += line_size; p += line_size;
+ }
+ emms();
+}
+#else
+static void put_pixels_x2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ p = block;
+ pix = pixels;
+ __asm __volatile(
+ "pxor %%mm7, %%mm7\n\t"
+ "movq %0, %%mm4\n\t"
+ ::"m"(mm_wone[0]):"memory");
+ do {
+ __asm __volatile(
+ "movq %1, %%mm0\n\t"
+ "movq 1%1, %%mm1\n\t"
+ "movq %%mm0, %%mm2\n\t"
+ "movq %%mm1, %%mm3\n\t"
+ "punpcklbw %%mm7, %%mm0\n\t"
+ "punpcklbw %%mm7, %%mm1\n\t"
+ "punpckhbw %%mm7, %%mm2\n\t"
+ "punpckhbw %%mm7, %%mm3\n\t"
+ "paddusw %%mm1, %%mm0\n\t"
+ "paddusw %%mm3, %%mm2\n\t"
+ "paddusw %%mm4, %%mm0\n\t"
+ "paddusw %%mm4, %%mm2\n\t"
+ "psrlw $1, %%mm0\n\t"
+ "psrlw $1, %%mm2\n\t"
+ "packuswb %%mm2, %%mm0\n\t"
+ "movq %%mm0, %0\n\t"
+ :"=m"(*p)
+ :"m"(*pix)
+ :"memory");
+ pix += line_size; p += line_size;
+ } while (--h);
+ emms();
+}
+#endif
+
+#if defined ( HAVE_3DNOW ) || defined ( HAVE_MMX2 )
+static void put_pixels_y2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ int dh, hh;
+ UINT8 *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels;
+ pix1 = pixels + line_size;
+ hh=h>>1;
+ dh=h%2;
+ while(hh--) {
+ __asm __volatile(
+ "movq %2, %%mm0\n\t"
+ "movq %4, %%mm1\n\t"
+ "movq %3, %%mm2\n\t"
+ "movq %5, %%mm3\n\t"
+ PAVGB" %%mm1, %%mm0\n\t"
+ PAVGB" %%mm3, %%mm2\n\t"
+ "movq %%mm0, %0\n\t"
+ "movq %%mm2, %1\n\t"
+ :"=m"(*p), "=m"(*(p+line_size))
+ :"m"(*pix), "m"(*(pix+line_size)),
+ "m"(*pix1), "m"(*(pix1+line_size))
+ :"memory");
+ pix += line_size*2; p += line_size*2; pix1 += line_size*2;
+ }
+ while(dh--) {
+ __asm __volatile(
+ "movq %1, %%mm0\n\t"
+ "movq %2, %%mm1\n\t"
+ PAVGB" %%mm1, %%mm0\n\t"
+ "movq %%mm0, %0\n\t"
+ :"=m"(*p)
+ :"m"(*pix),
+ "m"(*pix1)
+ :"memory");
+ pix += line_size; p += line_size; pix1 += line_size;
+ }
+ emms();
+}
+#else
+static void put_pixels_y2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels;
+ pix1 = pixels + line_size;
+ __asm __volatile(
+ "pxor %%mm7, %%mm7\n\t"
+ "movq %0, %%mm4\n\t"
+ ::"m"(mm_wone[0]):"memory");
+ do {
+ __asm __volatile(
+ "movq %1, %%mm0\n\t"
+ "movq %2, %%mm1\n\t"
+ "movq %%mm0, %%mm2\n\t"
+ "movq %%mm1, %%mm3\n\t"
+ "punpcklbw %%mm7, %%mm0\n\t"
+ "punpcklbw %%mm7, %%mm1\n\t"
+ "punpckhbw %%mm7, %%mm2\n\t"
+ "punpckhbw %%mm7, %%mm3\n\t"
+ "paddusw %%mm1, %%mm0\n\t"
+ "paddusw %%mm3, %%mm2\n\t"
+ "paddusw %%mm4, %%mm0\n\t"
+ "paddusw %%mm4, %%mm2\n\t"
+ "psrlw $1, %%mm0\n\t"
+ "psrlw $1, %%mm2\n\t"
+ "packuswb %%mm2, %%mm0\n\t"
+ "movq %%mm0, %0\n\t"
+ :"=m"(*p)
+ :"m"(*pix),
+ "m"(*pix1)
+ :"memory");
+ pix += line_size; p += line_size; pix1 += line_size;
+ } while (--h);
+ emms();
+}
+#endif
+
+#if defined ( HAVE_3DNOW ) || defined ( HAVE_MMX2 )
+static void put_pixels_xy2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ int dh, hh;
+ UINT8 *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels;
+ pix1 = pixels + line_size;
+ hh=h>>1;
+ dh=h%2;
+ while(hh--) {
+ __asm __volatile(
+ "movq %2, %%mm0\n\t"
+ "movq %4, %%mm1\n\t"
+ "movq 1%2, %%mm2\n\t"
+ "movq 1%4, %%mm3\n\t"
+ "movq %3, %%mm4\n\t"
+ "movq %5, %%mm5\n\t"
+ "movq 1%3, %%mm6\n\t"
+ "movq 1%5, %%mm7\n\t"
+ PAVGB" %%mm1, %%mm0\n\t"
+ PAVGB" %%mm3, %%mm2\n\t"
+ PAVGB" %%mm5, %%mm4\n\t"
+ PAVGB" %%mm7, %%mm6\n\t"
+ PAVGB" %%mm2, %%mm0\n\t"
+ PAVGB" %%mm6, %%mm4\n\t"
+ "movq %%mm0, %0\n\t"
+ "movq %%mm4, %1\n\t"
+ :"=m"(*p), "=m"(*(p+line_size))
+ :"m"(*pix), "m"(*(pix+line_size)),
+ "m"(*pix1), "m"(*(pix1+line_size))
+ :"memory");
+ pix += line_size*2;
+ pix1 += line_size*2;
+ p += line_size*2;
+ }
+ while(dh--) {
+ __asm __volatile(
+ "movq %1, %%mm0\n\t"
+ "movq %2, %%mm1\n\t"
+ "movq 1%1, %%mm2\n\t"
+ "movq 1%2, %%mm3\n\t"
+ PAVGB" %%mm1, %%mm0\n\t"
+ PAVGB" %%mm3, %%mm2\n\t"
+ PAVGB" %%mm2, %%mm0\n\t"
+ "movq %%mm0, %0\n\t"
+ :"=m"(*p)
+ :"m"(*pix),
+ "m"(*pix1)
+ :"memory");
+ pix += line_size;
+ pix1 += line_size;
+ p += line_size;
+ }
+ emms();
+}
+#else
+static void put_pixels_xy2(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels;
+ pix1 = pixels + line_size;
+ __asm __volatile(
+ "pxor %%mm7, %%mm7\n\t"
+ "movq %0, %%mm6\n\t"
+ ::"m"(mm_wtwo[0]):"memory");
+ do {
+ __asm __volatile(
+ "movq %1, %%mm0\n\t"
+ "movq %2, %%mm1\n\t"
+ "movq 1%1, %%mm4\n\t"
+ "movq 1%2, %%mm5\n\t"
+ "movq %%mm0, %%mm2\n\t"
+ "movq %%mm1, %%mm3\n\t"
+ "punpcklbw %%mm7, %%mm0\n\t"
+ "punpcklbw %%mm7, %%mm1\n\t"
+ "punpckhbw %%mm7, %%mm2\n\t"
+ "punpckhbw %%mm7, %%mm3\n\t"
+ "paddusw %%mm1, %%mm0\n\t"
+ "paddusw %%mm3, %%mm2\n\t"
+ "movq %%mm4, %%mm1\n\t"
+ "movq %%mm5, %%mm3\n\t"
+ "punpcklbw %%mm7, %%mm4\n\t"
+ "punpcklbw %%mm7, %%mm5\n\t"
+ "punpckhbw %%mm7, %%mm1\n\t"
+ "punpckhbw %%mm7, %%mm3\n\t"
+ "paddusw %%mm5, %%mm4\n\t"
+ "paddusw %%mm3, %%mm1\n\t"
+ "paddusw %%mm6, %%mm4\n\t"
+ "paddusw %%mm6, %%mm1\n\t"
+ "paddusw %%mm4, %%mm0\n\t"
+ "paddusw %%mm1, %%mm2\n\t"
+ "psrlw $2, %%mm0\n\t"
+ "psrlw $2, %%mm2\n\t"
+ "packuswb %%mm2, %%mm0\n\t"
+ "movq %%mm0, %0\n\t"
+ :"=m"(*p)
+ :"m"(*pix),
+ "m"(*pix1)
+ :"memory");
+ pix += line_size;
+ pix1 += line_size;
+ p += line_size;
+ } while(--h);
+ emms();
+}
+#endif
+void (* put_pixels_tab[4])(UINT8 *block, const UINT8 *pixels, int line_size, int h) =
+{ put_pixels, put_pixels_x2, put_pixels_y2, put_pixels_xy2, };
+
+static void put_no_rnd_pixels_x2( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ p = block;
+ pix = pixels;
+ __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory");
+ do {
+ __asm __volatile(
+ "movq %1, %%mm0\n\t"
+ "movq 1%1, %%mm1\n\t"
+ "movq %%mm0, %%mm2\n\t"
+ "movq %%mm1, %%mm3\n\t"
+ "punpcklbw %%mm7, %%mm0\n\t"
+ "punpcklbw %%mm7, %%mm1\n\t"
+ "punpckhbw %%mm7, %%mm2\n\t"
+ "punpckhbw %%mm7, %%mm3\n\t"
+ "paddusw %%mm1, %%mm0\n\t"
+ "paddusw %%mm3, %%mm2\n\t"
+ "psrlw $1, %%mm0\n\t"
+ "psrlw $1, %%mm2\n\t"
+ "packuswb %%mm2, %%mm0\n\t"
+ "movq %%mm0, %0\n\t"
+ :"=m"(*p)
+ :"m"(*pix)
+ :"memory");
+ pix += line_size;
+ p += line_size;
+ } while (--h);
+}
+
+static void put_no_rnd_pixels_y2( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels;
+ pix1 = pixels + line_size;
+ __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory");
+ do {
+ __asm __volatile(
+ "movq %1, %%mm0\n\t"
+ "movq %2, %%mm1\n\t"
+ "movq %%mm0, %%mm2\n\t"
+ "movq %%mm1, %%mm3\n\t"
+ "punpcklbw %%mm7, %%mm0\n\t"
+ "punpcklbw %%mm7, %%mm1\n\t"
+ "punpckhbw %%mm7, %%mm2\n\t"
+ "punpckhbw %%mm7, %%mm3\n\t"
+ "paddusw %%mm1, %%mm0\n\t"
+ "paddusw %%mm3, %%mm2\n\t"
+ "psrlw $1, %%mm0\n\t"
+ "psrlw $1, %%mm2\n\t"
+ "packuswb %%mm2, %%mm0\n\t"
+ "movq %%mm0, %0\n\t"
+ :"=m"(*p)
+ :"m"(*pix),
+ "m"(*pix1)
+ :"memory");
+ pix += line_size;
+ pix1 += line_size;
+ p += line_size;
+ } while(--h);
+}
+
+static void put_no_rnd_pixels_xy2( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels;
+ pix1 = pixels + line_size;
+ __asm __volatile(
+ "pxor %%mm7, %%mm7\n\t"
+ "movq %0, %%mm6\n\t"
+ ::"m"(mm_wone[0]):"memory");
+ do {
+ __asm __volatile(
+ "movq %1, %%mm0\n\t"
+ "movq %2, %%mm1\n\t"
+ "movq 1%1, %%mm4\n\t"
+ "movq 1%2, %%mm5\n\t"
+ "movq %%mm0, %%mm2\n\t"
+ "movq %%mm1, %%mm3\n\t"
+ "punpcklbw %%mm7, %%mm0\n\t"
+ "punpcklbw %%mm7, %%mm1\n\t"
+ "punpckhbw %%mm7, %%mm2\n\t"
+ "punpckhbw %%mm7, %%mm3\n\t"
+ "paddusw %%mm1, %%mm0\n\t"
+ "paddusw %%mm3, %%mm2\n\t"
+ "movq %%mm4, %%mm1\n\t"
+ "movq %%mm5, %%mm3\n\t"
+ "punpcklbw %%mm7, %%mm4\n\t"
+ "punpcklbw %%mm7, %%mm5\n\t"
+ "punpckhbw %%mm7, %%mm1\n\t"
+ "punpckhbw %%mm7, %%mm3\n\t"
+ "paddusw %%mm5, %%mm4\n\t"
+ "paddusw %%mm3, %%mm1\n\t"
+ "paddusw %%mm6, %%mm4\n\t"
+ "paddusw %%mm6, %%mm1\n\t"
+ "paddusw %%mm4, %%mm0\n\t"
+ "paddusw %%mm1, %%mm2\n\t"
+ "psrlw $2, %%mm0\n\t"
+ "psrlw $2, %%mm2\n\t"
+ "packuswb %%mm2, %%mm0\n\t"
+ "movq %%mm0, %0\n\t"
+ :"=m"(*p)
+ :"m"(*pix),
+ "m"(*pix1)
+ :"memory");
+ pix += line_size;
+ pix1 += line_size;
+ p += line_size;
+ } while(--h);
+}
+
+void (* put_no_rnd_pixels_tab[4])(UINT8 *block, const UINT8 *pixels, int line_size, int h) =
+{ put_pixels, put_no_rnd_pixels_x2, put_no_rnd_pixels_y2, put_no_rnd_pixels_xy2, };
+
+/*
+ NK: Stuff below is no tested and not optimized!
+ Simply I have no necessary samples for now.
+*/
+
+static void avg_pixels(UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ p = block;
+ pix = pixels;
+ do {
+ p[0] = (( p[0] + pix[0] +1)>>1) ;
+ p[1] = (( p[1] + pix[1] +1)>>1) ;
+ p[2] = (( p[2] + pix[2] +1)>>1) ;
+ p[3] = (( p[3] + pix[3] +1)>>1) ;
+ p[4] = (( p[4] + pix[4] +1)>>1) ;
+ p[5] = (( p[5] + pix[5] +1)>>1) ;
+ p[6] = (( p[6] + pix[6] +1)>>1) ;
+ p[7] = (( p[7] + pix[7] +1)>>1) ;
+ pix += line_size;
+ p += line_size;
+ }
+ while (--h);
+}
+
+static void avg_pixels_x2( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ p = block;
+ pix = pixels;
+ do {
+ p[0] = (( p[0] + (( pix[0] + pix[1] +1)>>1) +1)>>1) ;
+ p[1] = (( p[1] + (( pix[1] + pix[2] +1)>>1) +1)>>1) ;
+ p[2] = (( p[2] + (( pix[2] + pix[3] +1)>>1) +1)>>1) ;
+ p[3] = (( p[3] + (( pix[3] + pix[4] +1)>>1) +1)>>1) ;
+ p[4] = (( p[4] + (( pix[4] + pix[5] +1)>>1) +1)>>1) ;
+ p[5] = (( p[5] + (( pix[5] + pix[6] +1)>>1) +1)>>1) ;
+ p[6] = (( p[6] + (( pix[6] + pix[7] +1)>>1) +1)>>1) ;
+ p[7] = (( p[7] + (( pix[7] + pix[8] +1)>>1) +1)>>1) ;
+ pix += line_size;
+ p += line_size;
+ } while (--h);
+}
+
+static void avg_pixels_y2( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels;
+ pix1 = pixels + line_size;
+ do {
+ p[0] = (( p[0] + (( pix[0] + pix1[0] +1)>>1) +1)>>1) ;
+ p[1] = (( p[1] + (( pix[1] + pix1[1] +1)>>1) +1)>>1) ;
+ p[2] = (( p[2] + (( pix[2] + pix1[2] +1)>>1) +1)>>1) ;
+ p[3] = (( p[3] + (( pix[3] + pix1[3] +1)>>1) +1)>>1) ;
+ p[4] = (( p[4] + (( pix[4] + pix1[4] +1)>>1) +1)>>1) ;
+ p[5] = (( p[5] + (( pix[5] + pix1[5] +1)>>1) +1)>>1) ;
+ p[6] = (( p[6] + (( pix[6] + pix1[6] +1)>>1) +1)>>1) ;
+ p[7] = (( p[7] + (( pix[7] + pix1[7] +1)>>1) +1)>>1) ;
+ pix += line_size;
+ pix1 += line_size;
+ p += line_size ;
+ } while(--h);
+}
+
+static void avg_pixels_xy2( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels;
+ pix1 = pixels + line_size;
+ do {
+ p[0] = (( p[0] + (( pix[0] + pix[1] + pix1[0] + pix1[1] +2)>>2)+1)>>1) ;
+ p[1] = (( p[1] + (( pix[1] + pix[2] + pix1[1] + pix1[2] +2)>>2)+1)>>1) ;
+ p[2] = (( p[2] + (( pix[2] + pix[3] + pix1[2] + pix1[3] +2)>>2)+1)>>1) ;
+ p[3] = (( p[3] + (( pix[3] + pix[4] + pix1[3] + pix1[4] +2)>>2)+1)>>1) ;
+ p[4] = (( p[4] + (( pix[4] + pix[5] + pix1[4] + pix1[5] +2)>>2)+1)>>1) ;
+ p[5] = (( p[5] + (( pix[5] + pix[6] + pix1[5] + pix1[6] +2)>>2)+1)>>1) ;
+ p[6] = (( p[6] + (( pix[6] + pix[7] + pix1[6] + pix1[7] +2)>>2)+1)>>1) ;
+ p[7] = (( p[7] + (( pix[7] + pix[8] + pix1[7] + pix1[8] +2)>>2)+1)>>1) ;
+ pix += line_size;
+ pix1 += line_size;
+ p += line_size ;
+ } while(--h);
+}
+
+void (*avg_pixels_tab[4])(UINT8 *block, const UINT8 *pixels, int line_size, int h) =
+{ avg_pixels, avg_pixels_x2, avg_pixels_y2, avg_pixels_xy2, };
+
+static void avg_no_rnd_pixels( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ p = block;
+ pix = pixels;
+ do {
+ p[0] = (( p[0] + pix[0] )>>1) ;
+ p[1] = (( p[1] + pix[1] )>>1) ;
+ p[2] = (( p[2] + pix[2] )>>1) ;
+ p[3] = (( p[3] + pix[3] )>>1) ;
+ p[4] = (( p[4] + pix[4] )>>1) ;
+ p[5] = (( p[5] + pix[5] )>>1) ;
+ p[6] = (( p[6] + pix[6] )>>1) ;
+ p[7] = (( p[7] + pix[7] )>>1) ;
+ pix += line_size;
+ p += line_size ;
+ } while (--h);
+}
+
+static void avg_no_rnd_pixels_x2( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ p = block;
+ pix = pixels;
+ do {
+ p[0] = (( p[0] + (( pix[0] + pix[1] )>>1) )>>1) ;
+ p[1] = (( p[1] + (( pix[1] + pix[2] )>>1) )>>1) ;
+ p[2] = (( p[2] + (( pix[2] + pix[3] )>>1) )>>1) ;
+ p[3] = (( p[3] + (( pix[3] + pix[4] )>>1) )>>1) ;
+ p[4] = (( p[4] + (( pix[4] + pix[5] )>>1) )>>1) ;
+ p[5] = (( p[5] + (( pix[5] + pix[6] )>>1) )>>1) ;
+ p[6] = (( p[6] + (( pix[6] + pix[7] )>>1) )>>1) ;
+ p[7] = (( p[7] + (( pix[7] + pix[8] )>>1) )>>1) ;
+ pix += line_size;
+ p += line_size;
+ } while (--h);
+}
+
+static void avg_no_rnd_pixels_y2( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels;
+ pix1 = pixels + line_size;
+ do {
+ p[0] = (( p[0] + (( pix[0] + pix1[0] )>>1) )>>1) ;
+ p[1] = (( p[1] + (( pix[1] + pix1[1] )>>1) )>>1) ;
+ p[2] = (( p[2] + (( pix[2] + pix1[2] )>>1) )>>1) ;
+ p[3] = (( p[3] + (( pix[3] + pix1[3] )>>1) )>>1) ;
+ p[4] = (( p[4] + (( pix[4] + pix1[4] )>>1) )>>1) ;
+ p[5] = (( p[5] + (( pix[5] + pix1[5] )>>1) )>>1) ;
+ p[6] = (( p[6] + (( pix[6] + pix1[6] )>>1) )>>1) ;
+ p[7] = (( p[7] + (( pix[7] + pix1[7] )>>1) )>>1) ;
+ pix += line_size;
+ pix1 += line_size;
+ p += line_size ;
+ } while(--h);
+}
+
+static void avg_no_rnd_pixels_xy2( UINT8 *block, const UINT8 *pixels, int line_size, int h)
+{
+ UINT8 *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels;
+ pix1 = pixels + line_size;
+ do {
+ p[0] = (( p[0] + (( pix[0] + pix[1] + pix1[0] + pix1[1] +1)>>2) )>>1) ;
+ p[1] = (( p[1] + (( pix[1] + pix[2] + pix1[1] + pix1[2] +1)>>2) )>>1) ;
+ p[2] = (( p[2] + (( pix[2] + pix[3] + pix1[2] + pix1[3] +1)>>2) )>>1) ;
+ p[3] = (( p[3] + (( pix[3] + pix[4] + pix1[3] + pix1[4] +1)>>2) )>>1) ;
+ p[4] = (( p[4] + (( pix[4] + pix[5] + pix1[4] + pix1[5] +1)>>2) )>>1) ;
+ p[5] = (( p[5] + (( pix[5] + pix[6] + pix1[5] + pix1[6] +1)>>2) )>>1) ;
+ p[6] = (( p[6] + (( pix[6] + pix[7] + pix1[6] + pix1[7] +1)>>2) )>>1) ;
+ p[7] = (( p[7] + (( pix[7] + pix[8] + pix1[7] + pix1[8] +1)>>2) )>>1) ;
+ pix += line_size;
+ pix1 += line_size;
+ p += line_size;
+ } while(--h);
+}
+
+void (* avg_no_rnd_pixels_tab[4])(UINT8 *block, const UINT8 *pixels, int line_size, int h) =
+{ avg_no_rnd_pixels, avg_no_rnd_pixels_x2, avg_no_rnd_pixels_y2, avg_no_rnd_pixels_xy2, };
+
+static void sub_pixels( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
+{
+ DCTELEM *p;
+ const UINT8 *pix;
+ p = block;
+ pix = pixels;
+ do {
+ p[0] -= pix[0] ;
+ p[1] -= pix[1] ;
+ p[2] -= pix[2] ;
+ p[3] -= pix[3] ;
+ p[4] -= pix[4] ;
+ p[5] -= pix[5] ;
+ p[6] -= pix[6] ;
+ p[7] -= pix[7] ;
+ pix += line_size;
+ p += 8 ;
+ } while (--h);
+}
+
+static void sub_pixels_x2( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
+{
+ DCTELEM *p;
+ const UINT8 *pix;
+ p = block;
+ pix = pixels;
+ do {
+ p[0] -= (( pix[0] + pix[1] +1)>>1) ;
+ p[1] -= (( pix[1] + pix[2] +1)>>1) ;
+ p[2] -= (( pix[2] + pix[3] +1)>>1) ;
+ p[3] -= (( pix[3] + pix[4] +1)>>1) ;
+ p[4] -= (( pix[4] + pix[5] +1)>>1) ;
+ p[5] -= (( pix[5] + pix[6] +1)>>1) ;
+ p[6] -= (( pix[6] + pix[7] +1)>>1) ;
+ p[7] -= (( pix[7] + pix[8] +1)>>1) ;
+ pix += line_size;
+ p += 8;
+ } while (--h);
+}
+
+static void sub_pixels_y2( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
+{
+ DCTELEM *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels;
+ pix1 = pixels + line_size;
+ do {
+ p[0] -= (( pix[0] + pix1[0] +1)>>1) ;
+ p[1] -= (( pix[1] + pix1[1] +1)>>1) ;
+ p[2] -= (( pix[2] + pix1[2] +1)>>1) ;
+ p[3] -= (( pix[3] + pix1[3] +1)>>1) ;
+ p[4] -= (( pix[4] + pix1[4] +1)>>1) ;
+ p[5] -= (( pix[5] + pix1[5] +1)>>1) ;
+ p[6] -= (( pix[6] + pix1[6] +1)>>1) ;
+ p[7] -= (( pix[7] + pix1[7] +1)>>1) ;
+ pix += line_size;
+ pix1 += line_size;
+ p += 8 ;
+ } while(--h);
+}
+
+static void sub_pixels_xy2( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
+{
+ DCTELEM *p;
+ const UINT8 *pix;
+ const UINT8 *pix1;
+ p = block;
+ pix = pixels; pix1 = pixels + line_size;
+ do {
+ p[0] -= (( pix[0] + pix[1] + pix1[0] + pix1[1] +2)>>2) ;
+ p[1] -= (( pix[1] + pix[2] + pix1[1] + pix1[2] +2)>>2) ;
+ p[2] -= (( pix[2] + pix[3] + pix1[2] + pix1[3] +2)>>2) ;
+ p[3] -= (( pix[3] + pix[4] + pix1[3] + pix1[4] +2)>>2) ;
+ p[4] -= (( pix[4] + pix[5] + pix1[4] + pix1[5] +2)>>2) ;
+ p[5] -= (( pix[5] + pix[6] + pix1[5] + pix1[6] +2)>>2) ;
+ p[6] -= (( pix[6] + pix[7] + pix1[6] + pix1[7] +2)>>2) ;
+ p[7] -= (( pix[7] + pix[8] + pix1[7] + pix1[8] +2)>>2) ;
+ pix += line_size;
+ pix1 += line_size;
+ p += 8 ;
+ } while(--h);
+}
+
+void (* sub_pixels_tab[4])(DCTELEM *block, const UINT8 *pixels, int line_size, int h) =
+{ sub_pixels, sub_pixels_x2, sub_pixels_y2, sub_pixels_xy2, };
More information about the MPlayer-cvslog
mailing list