[FFmpeg-cvslog] libpostproc: mmx code uses stack below %esp, fix that

Yuriy Kaminskiy git at videolan.org
Tue Sep 20 21:38:38 CEST 2011


ffmpeg | branch: master | Yuriy Kaminskiy <yumkam at mail.ru> | Tue Sep 20 21:32:35 2011 +0200| [2b2617da6b26d575c4fd9a6b2fb0906ece861210] | committer: Michael Niedermayer

libpostproc: mmx code uses stack below %esp, fix that

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2b2617da6b26d575c4fd9a6b2fb0906ece861210
---

 libpostproc/postprocess_template.c |   61 ++++++++++++++++-------------------
 1 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 918d005..21a8626 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -25,8 +25,6 @@
 
 #include "libavutil/x86_cpu.h"
 
-#define ALIGN_MASK "$-8"
-
 #undef REAL_PAVGB
 #undef PAVGB
 #undef PMINUB
@@ -767,11 +765,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
 }
 */
 #elif HAVE_MMX
+    DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars
     src+= stride*4;
     __asm__ volatile(
         "pxor %%mm7, %%mm7                      \n\t"
-        "lea -40(%%"REG_SP"), %%"REG_c"         \n\t" // make space for 4 8-byte vars
-        "and "ALIGN_MASK", %%"REG_c"            \n\t" // align
 //      0       1       2       3       4       5       6       7
 //      %0      %0+%1   %0+2%1  eax+2%1 %0+4%1  eax+4%1 edx+%1  edx+2%1
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1
@@ -813,8 +810,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2 - H3
         "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2 - 2L3
         "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2 - 2H3
-        "movq %%mm0, (%%"REG_c")                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
-        "movq %%mm1, 8(%%"REG_c")               \n\t" // 2H0 - 5H1 + 5H2 - 2H3
+        "movq %%mm0, (%3)                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+        "movq %%mm1, 8(%3)                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3
 
         "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
         "movq %%mm0, %%mm1                      \n\t"
@@ -823,8 +820,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
 
         "psubw %%mm0, %%mm2                     \n\t" // L3 - L4
         "psubw %%mm1, %%mm3                     \n\t" // H3 - H4
-        "movq %%mm2, 16(%%"REG_c")              \n\t" // L3 - L4
-        "movq %%mm3, 24(%%"REG_c")              \n\t" // H3 - H4
+        "movq %%mm2, 16(%3)                     \n\t" // L3 - L4
+        "movq %%mm3, 24(%3)                     \n\t" // H3 - H4
         "paddw %%mm4, %%mm4                     \n\t" // 2L2
         "paddw %%mm5, %%mm5                     \n\t" // 2H2
         "psubw %%mm2, %%mm4                     \n\t" // 2L2 - L3 + L4
@@ -872,8 +869,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "psubw %%mm2, %%mm0                     \n\t" // 2L4 - 5L5 + 5L6 - 2L7
         "psubw %%mm3, %%mm1                     \n\t" // 2H4 - 5H5 + 5H6 - 2H7
 
-        "movq (%%"REG_c"), %%mm2                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
-        "movq 8(%%"REG_c"), %%mm3               \n\t" // 2H0 - 5H1 + 5H2 - 2H3
+        "movq (%3), %%mm2                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+        "movq 8(%3), %%mm3                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3
 
 #if HAVE_MMX2
         "movq %%mm7, %%mm6                      \n\t" // 0
@@ -951,8 +948,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "psrlw $6, %%mm4                        \n\t"
         "psrlw $6, %%mm5                        \n\t"
 
-        "movq 16(%%"REG_c"), %%mm0              \n\t" // L3 - L4
-        "movq 24(%%"REG_c"), %%mm1              \n\t" // H3 - H4
+        "movq 16(%3), %%mm0                     \n\t" // L3 - L4
+        "movq 24(%3), %%mm1                     \n\t" // H3 - H4
 
         "pxor %%mm2, %%mm2                      \n\t"
         "pxor %%mm3, %%mm3                      \n\t"
@@ -995,8 +992,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "movq %%mm0, (%0, %1)                   \n\t"
 
         : "+r" (src)
-        : "r" ((x86_reg)stride), "m" (c->pQPb)
-        : "%"REG_a, "%"REG_c
+        : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
+        : "%"REG_a
     );
 #else //HAVE_MMX2 || HAVE_AMD3DNOW
     const int l1= stride;
@@ -1044,6 +1041,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
 {
 #if HAVE_MMX2 || HAVE_AMD3DNOW
+    DECLARE_ALIGNED(8, uint64_t, tmp)[3];
     __asm__ volatile(
         "pxor %%mm6, %%mm6                      \n\t"
         "pcmpeqb %%mm7, %%mm7                   \n\t"
@@ -1137,13 +1135,11 @@ FIND_MIN_MAX((%0, %1, 8))
         "movd %%mm6, %%ecx                      \n\t"
         "cmpb "MANGLE(deringThreshold)", %%cl   \n\t"
         " jb 1f                                 \n\t"
-        "lea -24(%%"REG_SP"), %%"REG_c"         \n\t"
-        "and "ALIGN_MASK", %%"REG_c"            \n\t"
         PAVGB(%%mm0, %%mm7)                           // a=(max + min)/2
         "punpcklbw %%mm7, %%mm7                 \n\t"
         "punpcklbw %%mm7, %%mm7                 \n\t"
         "punpcklbw %%mm7, %%mm7                 \n\t"
-        "movq %%mm7, (%%"REG_c")                \n\t"
+        "movq %%mm7, (%4)                       \n\t"
 
         "movq (%0), %%mm0                       \n\t" // L10
         "movq %%mm0, %%mm1                      \n\t" // L10
@@ -1207,8 +1203,8 @@ FIND_MIN_MAX((%0, %1, 8))
         PAVGB(t0, lx)                                 /* (src[-1] + src[+1])/2 */\
         PAVGB(sx, lx)                                 /* (src[-1] + 2src[0] + src[+1])/4 */\
         PAVGB(lx, pplx)                                     \
-        "movq " #lx ", 8(%%"REG_c")             \n\t"\
-        "movq (%%"REG_c"), " #lx "              \n\t"\
+        "movq " #lx ", 8(%4)                    \n\t"\
+        "movq (%4), " #lx "                     \n\t"\
         "psubusb " #lx ", " #t1 "               \n\t"\
         "psubusb " #lx ", " #t0 "               \n\t"\
         "psubusb " #lx ", " #sx "               \n\t"\
@@ -1235,7 +1231,7 @@ FIND_MIN_MAX((%0, %1, 8))
         "pandn " #dst ", " #ppsx "              \n\t"\
         "por " #pplx ", " #ppsx "               \n\t"\
         "movq " #ppsx ", " #dst "               \n\t"\
-        "movq 8(%%"REG_c"), " #lx "             \n\t"
+        "movq 8(%4), " #lx "                    \n\t"
 
 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
    REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
@@ -1265,7 +1261,7 @@ DERING_CORE((%%REGd, %1, 2),(%0, %1, 8)    ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,
 DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
 
         "1:                        \n\t"
-        : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2)
+        : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "r"(tmp)
         : "%"REG_a, "%"REG_d, "%"REG_c
     );
 #else //HAVE_MMX2 || HAVE_AMD3DNOW
@@ -2762,10 +2758,9 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
 
     if(eq_mask != -1LL){
         uint8_t *temp_src= src;
+        DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars
         __asm__ volatile(
             "pxor %%mm7, %%mm7                      \n\t"
-            "lea -40(%%"REG_SP"), %%"REG_c"         \n\t" // make space for 4 8-byte vars
-            "and "ALIGN_MASK", %%"REG_c"            \n\t" // align
 //      0       1       2       3       4       5       6       7       8       9
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %1+8%1  ecx+4%1
 
@@ -2806,8 +2801,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2 - H3
             "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2 - 2L3
             "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2 - 2H3
-            "movq %%mm0, (%%"REG_c")                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
-            "movq %%mm1, 8(%%"REG_c")               \n\t" // 2H0 - 5H1 + 5H2 - 2H3
+            "movq %%mm0, (%4)                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+            "movq %%mm1, 8(%4)                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3
 
             "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
             "movq %%mm0, %%mm1                      \n\t"
@@ -2816,8 +2811,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
 
             "psubw %%mm0, %%mm2                     \n\t" // L3 - L4
             "psubw %%mm1, %%mm3                     \n\t" // H3 - H4
-            "movq %%mm2, 16(%%"REG_c")              \n\t" // L3 - L4
-            "movq %%mm3, 24(%%"REG_c")              \n\t" // H3 - H4
+            "movq %%mm2, 16(%4)                     \n\t" // L3 - L4
+            "movq %%mm3, 24(%4)                     \n\t" // H3 - H4
             "paddw %%mm4, %%mm4                     \n\t" // 2L2
             "paddw %%mm5, %%mm5                     \n\t" // 2H2
             "psubw %%mm2, %%mm4                     \n\t" // 2L2 - L3 + L4
@@ -2865,8 +2860,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "psubw %%mm2, %%mm0                     \n\t" // 2L4 - 5L5 + 5L6 - 2L7
             "psubw %%mm3, %%mm1                     \n\t" // 2H4 - 5H5 + 5H6 - 2H7
 
-            "movq (%%"REG_c"), %%mm2                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
-            "movq 8(%%"REG_c"), %%mm3               \n\t" // 2H0 - 5H1 + 5H2 - 2H3
+            "movq (%4), %%mm2                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+            "movq 8(%4), %%mm3                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3
 
 #if HAVE_MMX2
             "movq %%mm7, %%mm6                      \n\t" // 0
@@ -2944,8 +2939,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "psrlw $6, %%mm4                        \n\t"
             "psrlw $6, %%mm5                        \n\t"
 
-            "movq 16(%%"REG_c"), %%mm0              \n\t" // L3 - L4
-            "movq 24(%%"REG_c"), %%mm1              \n\t" // H3 - H4
+            "movq 16(%4), %%mm0                     \n\t" // L3 - L4
+            "movq 24(%4), %%mm1                     \n\t" // H3 - H4
 
             "pxor %%mm2, %%mm2                      \n\t"
             "pxor %%mm3, %%mm3                      \n\t"
@@ -2990,8 +2985,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "movq %%mm0, (%0, %1)                   \n\t"
 
             : "+r" (temp_src)
-            : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask)
-            : "%"REG_a, "%"REG_c
+            : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask), "r"(tmp)
+            : "%"REG_a
         );
     }
 /*if(step==16){



More information about the ffmpeg-cvslog mailing list