[FFmpeg-cvslog] r23463 - in trunk/libavcodec/x86: dsputil_mmx.c dsputil_mmx_avg_template.c

conrad subversion
Fri Jun 4 06:46:26 CEST 2010


Author: conrad
Date: Fri Jun  4 06:46:26 2010
New Revision: 23463

Log:
Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora

Modified:
   trunk/libavcodec/x86/dsputil_mmx.c
   trunk/libavcodec/x86/dsputil_mmx_avg_template.c

Modified: trunk/libavcodec/x86/dsputil_mmx.c
==============================================================================
--- trunk/libavcodec/x86/dsputil_mmx.c	Fri Jun  4 03:15:41 2010	(r23462)
+++ trunk/libavcodec/x86/dsputil_mmx.c	Fri Jun  4 06:46:26 2010	(r23463)
@@ -2657,6 +2657,12 @@ void dsputil_init_mmx(DSPContext* c, AVC
                 c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;
             }
 
+            if (CONFIG_VP3_DECODER
+                && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
+                c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2;
+                c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
+            }
+
 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
             c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
             c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \
@@ -2745,6 +2751,12 @@ void dsputil_init_mmx(DSPContext* c, AVC
                 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
             }
 
+            if (CONFIG_VP3_DECODER
+                && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
+                c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow;
+                c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
+            }
+
             SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow);
             SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow);
             SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow);

Modified: trunk/libavcodec/x86/dsputil_mmx_avg_template.c
==============================================================================
--- trunk/libavcodec/x86/dsputil_mmx_avg_template.c	Fri Jun  4 03:15:41 2010	(r23462)
+++ trunk/libavcodec/x86/dsputil_mmx_avg_template.c	Fri Jun  4 06:46:26 2010	(r23463)
@@ -586,6 +586,49 @@ static void DEF(put_no_rnd_pixels8_x2)(u
         :"%"REG_a, "memory");
 }
 
+static void DEF(put_no_rnd_pixels8_x2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm__ volatile (
+        "pcmpeqb %%mm6, %%mm6           \n\t"
+        "1:                             \n\t"
+        "movq  (%1),     %%mm0          \n\t"
+        "movq  (%1, %3), %%mm2          \n\t"
+        "movq 1(%1),     %%mm1          \n\t"
+        "movq 1(%1, %3), %%mm3          \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm3             \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "movq  %%mm0, (%2)              \n\t"
+        "movq  %%mm2, (%2, %3)          \n\t"
+        "movq  (%1, %3,2), %%mm0        \n\t"
+        "movq 1(%1, %3,2), %%mm1        \n\t"
+        "movq  (%1, %4),   %%mm2        \n\t"
+        "movq 1(%1, %4),   %%mm3        \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm3             \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "movq  %%mm0, (%2, %3,2)        \n\t"
+        "movq  %%mm2, (%2, %4)          \n\t"
+        "lea   (%1, %3,4), %1           \n\t"
+        "lea   (%2, %3,4), %2           \n\t"
+        "subl  $4, %0                   \n\t"
+        "jg 1b                          \n\t"
+        : "+g"(h), "+r"(pixels), "+r"(block)
+        : "r" ((x86_reg)line_size), "r"((x86_reg)3*line_size)
+        : "memory"
+    );
+}
+
 static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm__ volatile(
@@ -650,6 +693,44 @@ static void DEF(put_no_rnd_pixels8_y2)(u
         :"%"REG_a, "memory");
 }
 
+static void DEF(put_no_rnd_pixels8_y2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm__ volatile (
+        "movq     (%1), %%mm0           \n\t"
+        "pcmpeqb %%mm6, %%mm6           \n\t"
+        "add        %3, %1              \n\t"
+        "pxor    %%mm6, %%mm0           \n\t"
+        "1:                             \n\t"
+        "movq  (%1),     %%mm1          \n\t"
+        "movq  (%1, %3), %%mm2          \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm2, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "movq  %%mm0, (%2)              \n\t"
+        "movq  %%mm1, (%2, %3)          \n\t"
+        "movq  (%1, %3,2), %%mm1        \n\t"
+        "movq  (%1, %4),   %%mm0        \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        PAVGB" %%mm1, %%mm2             \n\t"
+        PAVGB" %%mm0, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "movq %%mm2, (%2, %3,2)         \n\t"
+        "movq %%mm1, (%2, %4)           \n\t"
+        "lea   (%1, %3,4), %1           \n\t"
+        "lea   (%2, %3,4), %2           \n\t"
+        "subl $4, %0                    \n\t"
+        "jg 1b                          \n\t"
+        :"+g"(h), "+r"(pixels), "+r" (block)
+        :"r" ((x86_reg)line_size), "r"((x86_reg)3*line_size)
+        :"memory"
+    );
+}
+
 static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm__ volatile(



More information about the ffmpeg-cvslog mailing list