[Ffmpeg-cvslog] CVS: ffmpeg/libavcodec/i386 h264dsp_mmx.c, 1.15, 1.16

Loren Merritt lorenm
Sat Mar 4 22:29:29 CET 2006


On Fri, 3 Mar 2006, Diego Biurrun wrote:
> On Thu, Mar 02, 2006 at 09:21:10AM +0100, Loren Merritt CVS wrote:
>>
>> Modified Files:
>> 	h264dsp_mmx.c
>> Log Message:
>> 4% faster h264_qpel_mc
>
> On my K6-III with gcc 2.95.4:
>
> In file included from i386/dsputil_mmx.c:2492:
> i386/h264dsp_mmx.c: In function `put_h264_qpel4_h_lowpass_l2_3dnow':
> i386/h264dsp_mmx.c:1158: more than 10 operands in `asm'
[...]

how's this?

--Loren Merritt
-------------- next part --------------
Index: i386/h264dsp_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/h264dsp_mmx.c,v
retrieving revision 1.17
diff -u -r1.17 h264dsp_mmx.c
--- i386/h264dsp_mmx.c	4 Mar 2006 19:56:01 -0000	1.17
+++ i386/h264dsp_mmx.c	4 Mar 2006 21:09:56 -0000
@@ -518,12 +518,14 @@
 }\
 static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
     int h=4;\
-\
     asm volatile(\
         "pxor %%mm7, %%mm7          \n\t"\
-        "movq %6, %%mm4             \n\t"\
-        "movq %7, %%mm5             \n\t"\
-        "1:                         \n\t"\
+        "movq %0, %%mm4             \n\t"\
+        "movq %1, %%mm5             \n\t"\
+        :: "m"(ff_pw_5), "m"(ff_pw_16)\
+    );\
+    do{\
+    asm volatile(\
         "movd  -1(%0), %%mm1        \n\t"\
         "movd    (%0), %%mm2        \n\t"\
         "movd   1(%0), %%mm3        \n\t"\
@@ -549,16 +551,14 @@
         "packuswb %%mm0, %%mm0      \n\t"\
         PAVGB" %%mm3, %%mm0         \n\t"\
         OP(%%mm0, (%1),%%mm6, d)\
-        "add %5, %0                 \n\t"\
-        "add %5, %1                 \n\t"\
-        "add %4, %2                 \n\t"\
-        "decl %3                    \n\t"\
-        " jnz 1b                    \n\t"\
-        : "+a"(src), "+c"(dst), "+d"(src2), "+m"(h)\
-        : "D"((long)src2Stride), "S"((long)dstStride),\
-          "m"(ff_pw_5), "m"(ff_pw_16)\
+        "add %4, %0                 \n\t"\
+        "add %4, %1                 \n\t"\
+        "add %3, %2                 \n\t"\
+        : "+a"(src), "+c"(dst), "+d"(src2)\
+        : "D"((long)src2Stride), "S"((long)dstStride)\
         : "memory"\
     );\
+    }while(--h);\
 }\
 static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
     src -= 2*srcStride;\
@@ -714,8 +714,11 @@
     int h=8;\
     asm volatile(\
         "pxor %%mm7, %%mm7          \n\t"\
-        "movq %6, %%mm6             \n\t"\
-        "1:                         \n\t"\
+        "movq %0, %%mm6             \n\t"\
+        :: "m"(ff_pw_5)\
+    );\
+    do{\
+    asm volatile(\
         "movq    (%0), %%mm0        \n\t"\
         "movq   1(%0), %%mm2        \n\t"\
         "movq %%mm0, %%mm1          \n\t"\
@@ -748,7 +751,7 @@
         "punpcklbw %%mm7, %%mm5     \n\t"\
         "paddw %%mm3, %%mm2         \n\t"\
         "paddw %%mm5, %%mm4         \n\t"\
-        "movq %7, %%mm5             \n\t"\
+        "movq %5, %%mm5             \n\t"\
         "paddw %%mm5, %%mm2         \n\t"\
         "paddw %%mm5, %%mm4         \n\t"\
         "paddw %%mm2, %%mm0         \n\t"\
@@ -759,16 +762,15 @@
         "packuswb %%mm1, %%mm0      \n\t"\
         PAVGB" %%mm4, %%mm0         \n\t"\
         OP(%%mm0, (%1),%%mm5, q)\
-        "add %5, %0                 \n\t"\
-        "add %5, %1                 \n\t"\
-        "add %4, %2                 \n\t"\
-        "decl %3                    \n\t"\
-        " jnz 1b                    \n\t"\
-        : "+a"(src), "+c"(dst), "+d"(src2), "+m"(h)\
+        "add %4, %0                 \n\t"\
+        "add %4, %1                 \n\t"\
+        "add %3, %2                 \n\t"\
+        : "+a"(src), "+c"(dst), "+d"(src2)\
         : "D"((long)src2Stride), "S"((long)dstStride),\
-          "m"(ff_pw_5), "m"(ff_pw_16)\
+          "m"(ff_pw_16)\
         : "memory"\
     );\
+    }while(--h);\
 }\
 \
 static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\



More information about the ffmpeg-cvslog mailing list