[FFmpeg-devel] [RFC] use MANGLE in some case for h264dsp_mmx.c

Reimar Döffinger Reimar.Doeffinger
Sun Nov 22 12:17:12 CET 2009


Hello,
I know M?ns has a similar bunch of patches, but here is what I have.
It is not exactly, but probably close to the minimum amount of mangle to make
h264dsp_mmx.c compile on x86_32 with --enable-pic.
Of course you could say it is pointless since the generated code will
not be fully pic anyway, but I guess the idea is to get as close to PIC
as possible without a huge maintenance effort or huge speed loss.
-------------- next part --------------
Index: libavcodec/x86/h264dsp_mmx.c
===================================================================
--- libavcodec/x86/h264dsp_mmx.c	(revision 20575)
+++ libavcodec/x86/h264dsp_mmx.c	(working copy)
@@ -278,7 +278,7 @@
         "movdqa   0x70(%1), %%xmm7 \n"
         H264_IDCT8_1D_SSE2(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)
         TRANSPOSE8(%%xmm4, %%xmm1, %%xmm7, %%xmm3, %%xmm5, %%xmm0, %%xmm2, %%xmm6, (%1))
-        "paddw          %4, %%xmm4 \n"
+        "paddw "MANGLE(ff_pw_32)", %%xmm4 \n"
         "movdqa     %%xmm4, 0x00(%1) \n"
         "movdqa     %%xmm2, 0x40(%1) \n"
         H264_IDCT8_1D_SSE2(%%xmm4, %%xmm0, %%xmm6, %%xmm3, %%xmm2, %%xmm5, %%xmm7, %%xmm1)
@@ -297,7 +297,7 @@
         STORE_DIFF_8P(%%xmm0, (%0,%2,2), %%xmm6, %%xmm7)
         STORE_DIFF_8P(%%xmm1, (%0,%3),   %%xmm6, %%xmm7)
         :"+r"(dst)
-        :"r"(block), "r"((x86_reg)stride), "r"((x86_reg)3L*stride), "m"(ff_pw_32)
+        :"r"(block), "r"((x86_reg)stride), "r"((x86_reg)3L*stride)
     );
 }
 
@@ -906,8 +906,8 @@
         "psubw "#B", "#T"           \n\t"\
         "psubw "#E", "#T"           \n\t"\
         "punpcklbw "#Z", "#F"       \n\t"\
-        "pmullw %4, "#T"            \n\t"\
-        "paddw %5, "#A"             \n\t"\
+        "pmullw "MANGLE(ff_pw_5)", "#T"\n\t"\
+        "paddw "MANGLE(ff_pw_16)", "#A"\n\t"\
         "add %2, %0                 \n\t"\
         "paddw "#F", "#A"           \n\t"\
         "paddw "#A", "#T"           \n\t"\
@@ -921,11 +921,11 @@
         "mov"#d" (%0), "#F"         \n\t"\
         "paddw "#D", "#T"           \n\t"\
         "psllw $2, "#T"             \n\t"\
-        "paddw %4, "#A"             \n\t"\
+        "paddw "MANGLE(ff_pw_16)", "#A"\n\t"\
         "psubw "#B", "#T"           \n\t"\
         "psubw "#E", "#T"           \n\t"\
         "punpcklbw "#Z", "#F"       \n\t"\
-        "pmullw %3, "#T"            \n\t"\
+        "pmullw "MANGLE(ff_pw_5)", "#T"\n\t"\
         "paddw "#F", "#A"           \n\t"\
         "add %2, %0                 \n\t"\
         "paddw "#A", "#T"           \n\t"\
@@ -943,8 +943,8 @@
 \
     __asm__ volatile(\
         "pxor %%mm7, %%mm7          \n\t"\
-        "movq %5, %%mm4             \n\t"\
-        "movq %6, %%mm5             \n\t"\
+        "movq "MANGLE(ff_pw_5 )", %%mm4\n\t"\
+        "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
         "1:                         \n\t"\
         "movd  -1(%0), %%mm1        \n\t"\
         "movd    (%0), %%mm2        \n\t"\
@@ -974,7 +974,7 @@
         "decl %2                    \n\t"\
         " jnz 1b                    \n\t"\
         : "+a"(src), "+c"(dst), "+g"(h)\
-        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
         : "memory"\
     );\
 }\
@@ -1047,7 +1047,7 @@
         QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
          \
         : "+a"(src), "+c"(dst)\
-        : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+        : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride)\
         : "memory"\
     );\
 }\
@@ -1079,7 +1079,7 @@
             QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
              \
             : "+a"(src)\
-            : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+            : "c"(tmp), "S"((x86_reg)srcStride)\
             : "memory"\
         );\
         tmp += 4;\
@@ -1117,7 +1117,7 @@
     int h=8;\
     __asm__ volatile(\
         "pxor %%mm7, %%mm7          \n\t"\
-        "movq %5, %%mm6             \n\t"\
+        "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
         "1:                         \n\t"\
         "movq    (%0), %%mm0        \n\t"\
         "movq   1(%0), %%mm2        \n\t"\
@@ -1151,7 +1151,7 @@
         "punpcklbw %%mm7, %%mm5     \n\t"\
         "paddw %%mm3, %%mm2         \n\t"\
         "paddw %%mm5, %%mm4         \n\t"\
-        "movq %6, %%mm5             \n\t"\
+        "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
         "paddw %%mm5, %%mm2         \n\t"\
         "paddw %%mm5, %%mm4         \n\t"\
         "paddw %%mm2, %%mm0         \n\t"\
@@ -1165,7 +1165,7 @@
         "decl %2                    \n\t"\
         " jnz 1b                    \n\t"\
         : "+a"(src), "+c"(dst), "+g"(h)\
-        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
         : "memory"\
     );\
 }\
@@ -1211,7 +1211,7 @@
         "punpcklbw %%mm7, %%mm5     \n\t"\
         "paddw %%mm3, %%mm2         \n\t"\
         "paddw %%mm5, %%mm4         \n\t"\
-        "movq %5, %%mm5             \n\t"\
+        "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
         "paddw %%mm5, %%mm2         \n\t"\
         "paddw %%mm5, %%mm4         \n\t"\
         "paddw %%mm2, %%mm0         \n\t"\
@@ -1226,8 +1226,7 @@
         "add %4, %1                 \n\t"\
         "add %3, %2                 \n\t"\
         : "+a"(src), "+c"(dst), "+d"(src2)\
-        : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
-          "m"(ff_pw_16)\
+        : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
         : "memory"\
     );\
     }while(--h);\
@@ -1265,7 +1264,7 @@
         QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
          \
         : "+a"(src), "+c"(dst)\
-        : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+        : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride)\
         : "memory"\
      );\
      if(h==16){\
@@ -1280,7 +1279,7 @@
             QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
             \
            : "+a"(src), "+c"(dst)\
-           : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+           : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride)\
            : "memory"\
         );\
      }\
@@ -1318,7 +1317,7 @@
             QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
             QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
             : "+a"(src)\
-            : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+            : "c"(tmp), "S"((x86_reg)srcStride)\
             : "memory"\
         );\
         if(size==16){\
@@ -1332,7 +1331,7 @@
                 QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\
                 QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
                 : "+a"(src)\
-                : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+                : "c"(tmp), "S"((x86_reg)srcStride)\
                 : "memory"\
             );\
         }\
@@ -1596,7 +1595,7 @@
         "psllw   $2,     %%xmm2     \n\t"\
         "movq    (%2),   %%xmm3     \n\t"\
         "psubw   %%xmm1, %%xmm2     \n\t"\
-        "paddw   %5,     %%xmm0     \n\t"\
+        "paddw   "MANGLE(ff_pw_16)",     %%xmm0\n\t"\
         "pmullw  %%xmm6, %%xmm2     \n\t"\
         "paddw   %%xmm0, %%xmm2     \n\t"\
         "psraw   $5,     %%xmm2     \n\t"\
@@ -1607,8 +1606,7 @@
         "add %4, %1                 \n\t"\
         "add %3, %2                 \n\t"\
         : "+a"(src), "+c"(dst), "+d"(src2)\
-        : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
-          "m"(ff_pw_16)\
+        : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
         : "memory"\
     );\
     }while(--h);\
@@ -1619,7 +1617,7 @@
     int h=8;\
     __asm__ volatile(\
         "pxor %%xmm7, %%xmm7        \n\t"\
-        "movdqa %5, %%xmm6          \n\t"\
+        "movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\
         "1:                         \n\t"\
         "lddqu   -2(%0), %%xmm1     \n\t"\
         "movdqa  %%xmm1, %%xmm0     \n\t"\
@@ -1639,7 +1637,7 @@
         "paddw   %%xmm4, %%xmm1     \n\t"\
         "psllw   $2,     %%xmm2     \n\t"\
         "psubw   %%xmm1, %%xmm2     \n\t"\
-        "paddw   %6,     %%xmm0     \n\t"\
+        "paddw   "MANGLE(ff_pw_16)",     %%xmm0\n\t"\
         "pmullw  %%xmm6, %%xmm2     \n\t"\
         "paddw   %%xmm0, %%xmm2     \n\t"\
         "psraw   $5,     %%xmm2     \n\t"\
@@ -1650,8 +1648,7 @@
         "decl %2                    \n\t"\
         " jnz 1b                    \n\t"\
         : "+a"(src), "+c"(dst), "+g"(h)\
-        : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride),\
-          "m"(ff_pw_5), "m"(ff_pw_16)\
+        : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
         : "memory"\
     );\
 }\
@@ -1695,7 +1692,7 @@
         QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
          \
         : "+a"(src), "+c"(dst)\
-        : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+        : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride)\
         : "memory"\
     );\
     if(h==16){\
@@ -1710,7 +1707,7 @@
             QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
             \
             : "+a"(src), "+c"(dst)\
-            : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+            : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride)\
             : "memory"\
         );\
     }\
@@ -1753,7 +1750,7 @@
             QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 6*48)
             QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 7*48)
             : "+a"(src)
-            : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
+            : "c"(tmp), "S"((x86_reg)srcStride)
             : "memory"
         );
         if(size==16){
@@ -1767,7 +1764,7 @@
                 QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 14*48)
                 QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 15*48)
                 : "+a"(src)
-                : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
+                : "c"(tmp), "S"((x86_reg)srcStride)
                 : "memory"
             );
         }



More information about the ffmpeg-devel mailing list