[FFmpeg-devel] [RFC] use MANGLE in some case for h264dsp_mmx.c
Reimar Döffinger
Reimar.Doeffinger
Sun Nov 22 12:17:12 CET 2009
Hello,
I know M?ns has a similar bunch of patches, but here is what I have.
It is not exactly, but probably close to the minimum amount of mangle to make
h264dsp_mmx.c compile on x86_32 with --enable-pic.
Of course you could say it is pointless since the generated code will
not be fully pic anyway, but I guess the idea is to get as close to PIC
as possible without a huge maintenance effort or huge speed loss.
-------------- next part --------------
Index: libavcodec/x86/h264dsp_mmx.c
===================================================================
--- libavcodec/x86/h264dsp_mmx.c (revision 20575)
+++ libavcodec/x86/h264dsp_mmx.c (working copy)
@@ -278,7 +278,7 @@
"movdqa 0x70(%1), %%xmm7 \n"
H264_IDCT8_1D_SSE2(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)
TRANSPOSE8(%%xmm4, %%xmm1, %%xmm7, %%xmm3, %%xmm5, %%xmm0, %%xmm2, %%xmm6, (%1))
- "paddw %4, %%xmm4 \n"
+ "paddw "MANGLE(ff_pw_32)", %%xmm4 \n"
"movdqa %%xmm4, 0x00(%1) \n"
"movdqa %%xmm2, 0x40(%1) \n"
H264_IDCT8_1D_SSE2(%%xmm4, %%xmm0, %%xmm6, %%xmm3, %%xmm2, %%xmm5, %%xmm7, %%xmm1)
@@ -297,7 +297,7 @@
STORE_DIFF_8P(%%xmm0, (%0,%2,2), %%xmm6, %%xmm7)
STORE_DIFF_8P(%%xmm1, (%0,%3), %%xmm6, %%xmm7)
:"+r"(dst)
- :"r"(block), "r"((x86_reg)stride), "r"((x86_reg)3L*stride), "m"(ff_pw_32)
+ :"r"(block), "r"((x86_reg)stride), "r"((x86_reg)3L*stride)
);
}
@@ -906,8 +906,8 @@
"psubw "#B", "#T" \n\t"\
"psubw "#E", "#T" \n\t"\
"punpcklbw "#Z", "#F" \n\t"\
- "pmullw %4, "#T" \n\t"\
- "paddw %5, "#A" \n\t"\
+ "pmullw "MANGLE(ff_pw_5)", "#T"\n\t"\
+ "paddw "MANGLE(ff_pw_16)", "#A"\n\t"\
"add %2, %0 \n\t"\
"paddw "#F", "#A" \n\t"\
"paddw "#A", "#T" \n\t"\
@@ -921,11 +921,11 @@
"mov"#d" (%0), "#F" \n\t"\
"paddw "#D", "#T" \n\t"\
"psllw $2, "#T" \n\t"\
- "paddw %4, "#A" \n\t"\
+ "paddw "MANGLE(ff_pw_16)", "#A"\n\t"\
"psubw "#B", "#T" \n\t"\
"psubw "#E", "#T" \n\t"\
"punpcklbw "#Z", "#F" \n\t"\
- "pmullw %3, "#T" \n\t"\
+ "pmullw "MANGLE(ff_pw_5)", "#T"\n\t"\
"paddw "#F", "#A" \n\t"\
"add %2, %0 \n\t"\
"paddw "#A", "#T" \n\t"\
@@ -943,8 +943,8 @@
\
__asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
- "movq %5, %%mm4 \n\t"\
- "movq %6, %%mm5 \n\t"\
+ "movq "MANGLE(ff_pw_5 )", %%mm4\n\t"\
+ "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
"1: \n\t"\
"movd -1(%0), %%mm1 \n\t"\
"movd (%0), %%mm2 \n\t"\
@@ -974,7 +974,7 @@
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(src), "+c"(dst), "+g"(h)\
- : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
: "memory"\
);\
}\
@@ -1047,7 +1047,7 @@
QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
\
: "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride)\
: "memory"\
);\
}\
@@ -1079,7 +1079,7 @@
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
\
: "+a"(src)\
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "c"(tmp), "S"((x86_reg)srcStride)\
: "memory"\
);\
tmp += 4;\
@@ -1117,7 +1117,7 @@
int h=8;\
__asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\
- "movq %5, %%mm6 \n\t"\
+ "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
"1: \n\t"\
"movq (%0), %%mm0 \n\t"\
"movq 1(%0), %%mm2 \n\t"\
@@ -1151,7 +1151,7 @@
"punpcklbw %%mm7, %%mm5 \n\t"\
"paddw %%mm3, %%mm2 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
- "movq %6, %%mm5 \n\t"\
+ "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
"paddw %%mm5, %%mm2 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
@@ -1165,7 +1165,7 @@
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(src), "+c"(dst), "+g"(h)\
- : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
: "memory"\
);\
}\
@@ -1211,7 +1211,7 @@
"punpcklbw %%mm7, %%mm5 \n\t"\
"paddw %%mm3, %%mm2 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
- "movq %5, %%mm5 \n\t"\
+ "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
"paddw %%mm5, %%mm2 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
"paddw %%mm2, %%mm0 \n\t"\
@@ -1226,8 +1226,7 @@
"add %4, %1 \n\t"\
"add %3, %2 \n\t"\
: "+a"(src), "+c"(dst), "+d"(src2)\
- : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
- "m"(ff_pw_16)\
+ : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
: "memory"\
);\
}while(--h);\
@@ -1265,7 +1264,7 @@
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
\
: "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride)\
: "memory"\
);\
if(h==16){\
@@ -1280,7 +1279,7 @@
QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
\
: "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride)\
: "memory"\
);\
}\
@@ -1318,7 +1317,7 @@
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
: "+a"(src)\
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "c"(tmp), "S"((x86_reg)srcStride)\
: "memory"\
);\
if(size==16){\
@@ -1332,7 +1331,7 @@
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
: "+a"(src)\
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "c"(tmp), "S"((x86_reg)srcStride)\
: "memory"\
);\
}\
@@ -1596,7 +1595,7 @@
"psllw $2, %%xmm2 \n\t"\
"movq (%2), %%xmm3 \n\t"\
"psubw %%xmm1, %%xmm2 \n\t"\
- "paddw %5, %%xmm0 \n\t"\
+ "paddw "MANGLE(ff_pw_16)", %%xmm0\n\t"\
"pmullw %%xmm6, %%xmm2 \n\t"\
"paddw %%xmm0, %%xmm2 \n\t"\
"psraw $5, %%xmm2 \n\t"\
@@ -1607,8 +1606,7 @@
"add %4, %1 \n\t"\
"add %3, %2 \n\t"\
: "+a"(src), "+c"(dst), "+d"(src2)\
- : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
- "m"(ff_pw_16)\
+ : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
: "memory"\
);\
}while(--h);\
@@ -1619,7 +1617,7 @@
int h=8;\
__asm__ volatile(\
"pxor %%xmm7, %%xmm7 \n\t"\
- "movdqa %5, %%xmm6 \n\t"\
+ "movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\
"1: \n\t"\
"lddqu -2(%0), %%xmm1 \n\t"\
"movdqa %%xmm1, %%xmm0 \n\t"\
@@ -1639,7 +1637,7 @@
"paddw %%xmm4, %%xmm1 \n\t"\
"psllw $2, %%xmm2 \n\t"\
"psubw %%xmm1, %%xmm2 \n\t"\
- "paddw %6, %%xmm0 \n\t"\
+ "paddw "MANGLE(ff_pw_16)", %%xmm0\n\t"\
"pmullw %%xmm6, %%xmm2 \n\t"\
"paddw %%xmm0, %%xmm2 \n\t"\
"psraw $5, %%xmm2 \n\t"\
@@ -1650,8 +1648,7 @@
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(src), "+c"(dst), "+g"(h)\
- : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride),\
- "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
: "memory"\
);\
}\
@@ -1695,7 +1692,7 @@
QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
\
: "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride)\
: "memory"\
);\
if(h==16){\
@@ -1710,7 +1707,7 @@
QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
\
: "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride)\
: "memory"\
);\
}\
@@ -1753,7 +1750,7 @@
QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 6*48)
QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 7*48)
: "+a"(src)
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
+ : "c"(tmp), "S"((x86_reg)srcStride)
: "memory"
);
if(size==16){
@@ -1767,7 +1764,7 @@
QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 14*48)
QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 15*48)
: "+a"(src)
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
+ : "c"(tmp), "S"((x86_reg)srcStride)
: "memory"
);
}
More information about the ffmpeg-devel
mailing list