[FFmpeg-cvslog] x86: yadif: fix asm with suncc
Mans Rullgard
git at videolan.org
Tue Aug 14 15:39:42 CEST 2012
ffmpeg | branch: master | Mans Rullgard <mans at mansr.com> | Sun Aug 12 19:45:46 2012 +0100| [480178a29587df8ed6d5e93bfe79e4a08a61f9e1] | committer: Mans Rullgard
x86: yadif: fix asm with suncc
Under some circumstances, suncc will use a single register for the
address of all memory operands, inserting lea instructions loading
the correct address prior to each memory operand being used in the
code. In the yadif code, the branch in the asm block bypasses such
an lea instruction, causing an incorrect address to be used in the
following load.
This patch replaces the tmpX arrays with a single array and uses a
register operand to hold its address. Although this prevents using
offsets from the stack pointer to access these locations, the code
still builds as 32-bit PIC even with old compilers.
Signed-off-by: Mans Rullgard <mans at mansr.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=480178a29587df8ed6d5e93bfe79e4a08a61f9e1
---
libavfilter/x86/yadif_template.c | 32 +++++++++++++-------------------
1 file changed, 13 insertions(+), 19 deletions(-)
diff --git a/libavfilter/x86/yadif_template.c b/libavfilter/x86/yadif_template.c
index 1de0a58..3e45f4f 100644
--- a/libavfilter/x86/yadif_template.c
+++ b/libavfilter/x86/yadif_template.c
@@ -107,10 +107,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
uint8_t *next, int w, int prefs,
int mrefs, int parity, int mode)
{
- DECLARE_ALIGNED(16, uint8_t, tmp0)[16];
- DECLARE_ALIGNED(16, uint8_t, tmp1)[16];
- DECLARE_ALIGNED(16, uint8_t, tmp2)[16];
- DECLARE_ALIGNED(16, uint8_t, tmp3)[16];
+ DECLARE_ALIGNED(16, uint8_t, tmp)[16*4];
int x;
#define FILTER\
@@ -124,9 +121,9 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
MOVQ" "MM"3, "MM"4 \n\t"\
"paddw "MM"2, "MM"3 \n\t"\
"psraw $1, "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
- MOVQ" "MM"0, %[tmp0] \n\t" /* c */\
- MOVQ" "MM"3, %[tmp1] \n\t" /* d */\
- MOVQ" "MM"1, %[tmp2] \n\t" /* e */\
+ MOVQ" "MM"0, (%[tmp]) \n\t" /* c */\
+ MOVQ" "MM"3, 16(%[tmp]) \n\t" /* d */\
+ MOVQ" "MM"1, 32(%[tmp]) \n\t" /* e */\
"psubw "MM"4, "MM"2 \n\t"\
PABS( MM"4", MM"2") /* temporal_diff0 */\
LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
@@ -148,7 +145,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
"paddw "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
"psrlw $1, "MM"3 \n\t"\
"pmaxsw "MM"3, "MM"2 \n\t"\
- MOVQ" "MM"2, %[tmp3] \n\t" /* diff */\
+ MOVQ" "MM"2, 48(%[tmp]) \n\t" /* diff */\
\
"paddw "MM"0, "MM"1 \n\t"\
"paddw "MM"0, "MM"0 \n\t"\
@@ -179,7 +176,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
CHECK2\
\
/* if(p->mode<2) ... */\
- MOVQ" %[tmp3], "MM"6 \n\t" /* diff */\
+ MOVQ" 48(%[tmp]), "MM"6 \n\t" /* diff */\
"cmpl $2, %[mode] \n\t"\
"jge 1f \n\t"\
LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
@@ -190,9 +187,9 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
"paddw "MM"5, "MM"3 \n\t"\
"psrlw $1, "MM"2 \n\t" /* b */\
"psrlw $1, "MM"3 \n\t" /* f */\
- MOVQ" %[tmp0], "MM"4 \n\t" /* c */\
- MOVQ" %[tmp1], "MM"5 \n\t" /* d */\
- MOVQ" %[tmp2], "MM"7 \n\t" /* e */\
+ MOVQ" (%[tmp]), "MM"4 \n\t" /* c */\
+ MOVQ" 16(%[tmp]), "MM"5 \n\t" /* d */\
+ MOVQ" 32(%[tmp]), "MM"7 \n\t" /* e */\
"psubw "MM"4, "MM"2 \n\t" /* b-c */\
"psubw "MM"7, "MM"3 \n\t" /* f-e */\
MOVQ" "MM"5, "MM"0 \n\t"\
@@ -211,7 +208,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
"pmaxsw "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
"1: \n\t"\
\
- MOVQ" %[tmp1], "MM"2 \n\t" /* d */\
+ MOVQ" 16(%[tmp]), "MM"2 \n\t" /* d */\
MOVQ" "MM"2, "MM"3 \n\t"\
"psubw "MM"6, "MM"2 \n\t" /* d-diff */\
"paddw "MM"6, "MM"3 \n\t" /* d+diff */\
@@ -219,16 +216,13 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
"pminsw "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
"packuswb "MM"1, "MM"1 \n\t"\
\
- :[tmp0]"=m"(tmp0),\
- [tmp1]"=m"(tmp1),\
- [tmp2]"=m"(tmp2),\
- [tmp3]"=m"(tmp3)\
- :[prev] "r"(prev),\
+ ::[prev] "r"(prev),\
[cur] "r"(cur),\
[next] "r"(next),\
[prefs]"r"((x86_reg)prefs),\
[mrefs]"r"((x86_reg)mrefs),\
- [mode] "g"(mode)\
+ [mode] "g"(mode),\
+ [tmp] "r"(tmp)\
);\
__asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
dst += STEP;\
More information about the ffmpeg-cvslog
mailing list