[FFmpeg-cvslog] asm: FF_-prefix internal macros used in inline assembly

Matthieu Bouron git at videolan.org
Mon Jun 27 17:25:27 CEST 2016


ffmpeg | branch: master | Matthieu Bouron <matthieu.bouron at stupeflix.com> | Mon Jun 27 17:21:04 2016 +0200| [9eb3da2f9942cf1b1148d242bccfc383f666feb6] | committer: Matthieu Bouron

asm: FF_-prefix internal macros used in inline assembly

See merge commit '39d6d3618d48625decaff7d9bdbb45b44ef2a805'.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9eb3da2f9942cf1b1148d242bccfc383f666feb6
---

 libavcodec/x86/cabac.h                     |   20 +-
 libavcodec/x86/h264_i386.h                 |   12 +-
 libavcodec/x86/hpeldsp_rnd_template.c      |   56 +--
 libavcodec/x86/me_cmp_init.c               |   44 +-
 libavcodec/x86/mpegvideo.c                 |   88 ++--
 libavcodec/x86/mpegvideoenc_template.c     |   36 +-
 libavcodec/x86/rnd_template.c              |   44 +-
 libavcodec/x86/snowdsp.c                   |  180 ++++----
 libavcodec/x86/vc1dsp_mmx.c                |    6 +-
 libavfilter/x86/vf_noise.c                 |   40 +-
 libavutil/x86/asm.h                        |   66 +--
 libavutil/x86/cpu.c                        |    4 +-
 libpostproc/postprocess_template.c         |  644 ++++++++++++++--------------
 libswscale/x86/hscale_fast_bilinear_simd.c |  124 +++---
 libswscale/x86/rgb2rgb_template.c          |  386 ++++++++---------
 libswscale/x86/swscale.c                   |   30 +-
 libswscale/x86/swscale_template.c          |  446 +++++++++----------
 17 files changed, 1113 insertions(+), 1113 deletions(-)

diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 4795f5b..cfd3b75 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -45,7 +45,7 @@
 #define END_CHECK(end) ""
 #else
 #define END_CHECK(end) \
-        "cmp    "end"       , %%"REG_c"                                 \n\t"\
+        "cmp    "end"       , %%"FF_REG_c"                              \n\t"\
         "jge    1f                                                      \n\t"
 #endif
 
@@ -92,11 +92,11 @@
         "mov    "tmpbyte"   , "statep"                                  \n\t"\
         "test   "lowword"   , "lowword"                                 \n\t"\
         "jnz    2f                                                      \n\t"\
-        "mov    "byte"      , %%"REG_c"                                 \n\t"\
+        "mov    "byte"      , %%"FF_REG_c"                              \n\t"\
         END_CHECK(end)\
-        "add"OPSIZE" $2     , "byte"                                    \n\t"\
+        "add"FF_OPSIZE" $2  , "byte"                                    \n\t"\
         "1:                                                             \n\t"\
-        "movzwl (%%"REG_c") , "tmp"                                     \n\t"\
+        "movzwl (%%"FF_REG_c") , "tmp"                                  \n\t"\
         "lea    -1("low")   , %%ecx                                     \n\t"\
         "xor    "low"       , %%ecx                                     \n\t"\
         "shr    $15         , %%ecx                                     \n\t"\
@@ -153,11 +153,11 @@
         "mov    "tmpbyte"   , "statep"                                  \n\t"\
         "test   "lowword"   , "lowword"                                 \n\t"\
         " jnz   2f                                                      \n\t"\
-        "mov    "byte"      , %%"REG_c"                                 \n\t"\
+        "mov    "byte"      , %%"FF_REG_c"                              \n\t"\
         END_CHECK(end)\
-        "add"OPSIZE" $2     , "byte"                                    \n\t"\
+        "add"FF_OPSIZE" $2  , "byte"                                    \n\t"\
         "1:                                                             \n\t"\
-        "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
+        "movzwl (%%"FF_REG_c") , "tmp"                                  \n\t"\
         "lea    -1("low")   , %%ecx                                     \n\t"\
         "xor    "low"       , %%ecx                                     \n\t"\
         "shr    $15         , %%ecx                                     \n\t"\
@@ -203,7 +203,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
           "i"(offsetof(CABACContext, bytestream_end))
           TABLES_ARG
           ,"1"(c->low), "2"(c->range)
-        : "%"REG_c, "memory"
+        : "%"FF_REG_c, "memory"
     );
     return bit & 1;
 }
@@ -240,7 +240,7 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
         "addl          %%edx, %%eax     \n\t"
         "cmp         %c5(%2), %1        \n\t"
         "jge              1f            \n\t"
-        "add"OPSIZE"      $2, %c4(%2)   \n\t"
+        "add"FF_OPSIZE"   $2, %c4(%2)   \n\t"
 #endif
         "1:                             \n\t"
         "movl          %%eax, %c3(%2)   \n\t"
@@ -281,7 +281,7 @@ static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
         "addl          %%ecx, %%eax     \n\t"
         "cmp         %c5(%2), %1        \n\t"
         "jge              1f            \n\t"
-        "add"OPSIZE"      $2, %c4(%2)   \n\t"
+        "add"FF_OPSIZE"   $2, %c4(%2)   \n\t"
         "1:                             \n\t"
         "movl          %%eax, %c3(%2)   \n\t"
 
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index 4dfbc30..19cd128 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -91,13 +91,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
         "sub  %10, %1                           \n\t"
         "mov  %2, %0                            \n\t"
         "movl %7, %%ecx                         \n\t"
-        "add  %1, %%"REG_c"                     \n\t"
+        "add  %1, %%"FF_REG_c"                  \n\t"
         "movl %%ecx, (%0)                       \n\t"
 
         "test $1, %4                            \n\t"
         " jnz 5f                                \n\t"
 
-        "add"OPSIZE"  $4, %2                    \n\t"
+        "add"FF_OPSIZE"  $4, %2                 \n\t"
 
         "4:                                     \n\t"
         "add  $1, %1                            \n\t"
@@ -105,7 +105,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
         " jb 3b                                 \n\t"
         "mov  %2, %0                            \n\t"
         "movl %7, %%ecx                         \n\t"
-        "add  %1, %%"REG_c"                     \n\t"
+        "add  %1, %%"FF_REG_c"                  \n\t"
         "movl %%ecx, (%0)                       \n\t"
         "5:                                     \n\t"
         "add  %9, %k0                           \n\t"
@@ -116,7 +116,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
           "i"(offsetof(CABACContext, bytestream)),
           "i"(offsetof(CABACContext, bytestream_end))
           TABLES_ARG
-        : "%"REG_c, "memory"
+        : "%"FF_REG_c, "memory"
     );
     return coeff_count;
 }
@@ -183,7 +183,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "test $1, %4                            \n\t"
         " jnz 5f                                \n\t"
 
-        "add"OPSIZE"  $4, %2                    \n\t"
+        "add"FF_OPSIZE"  $4, %2                 \n\t"
 
         "4:                                     \n\t"
         "add $1, %6                             \n\t"
@@ -202,7 +202,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
           "i"(offsetof(CABACContext, bytestream)),
           "i"(offsetof(CABACContext, bytestream_end)),
           "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG
-        : "%"REG_c, "memory"
+        : "%"FF_REG_c, "memory"
     );
     return coeff_count;
 }
diff --git a/libavcodec/x86/hpeldsp_rnd_template.c b/libavcodec/x86/hpeldsp_rnd_template.c
index e20d065..2bff2d2 100644
--- a/libavcodec/x86/hpeldsp_rnd_template.c
+++ b/libavcodec/x86/hpeldsp_rnd_template.c
@@ -32,7 +32,7 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
 {
     MOVQ_BFE(mm6);
     __asm__ volatile(
-        "lea    (%3, %3), %%"REG_a"     \n\t"
+        "lea    (%3, %3), %%"FF_REG_a"  \n\t"
         ".p2align 3                     \n\t"
         "1:                             \n\t"
         "movq   (%1), %%mm0             \n\t"
@@ -42,8 +42,8 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
         PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
         "movq   %%mm4, (%2)             \n\t"
         "movq   %%mm5, (%2, %3)         \n\t"
-        "add    %%"REG_a", %1           \n\t"
-        "add    %%"REG_a", %2           \n\t"
+        "add    %%"FF_REG_a", %1        \n\t"
+        "add    %%"FF_REG_a", %2        \n\t"
         "movq   (%1), %%mm0             \n\t"
         "movq   1(%1), %%mm1            \n\t"
         "movq   (%1, %3), %%mm2         \n\t"
@@ -51,20 +51,20 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
         PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
         "movq   %%mm4, (%2)             \n\t"
         "movq   %%mm5, (%2, %3)         \n\t"
-        "add    %%"REG_a", %1           \n\t"
-        "add    %%"REG_a", %2           \n\t"
+        "add    %%"FF_REG_a", %1        \n\t"
+        "add    %%"FF_REG_a", %2        \n\t"
         "subl   $4, %0                  \n\t"
         "jnz    1b                      \n\t"
         :"+g"(h), "+S"(pixels), "+D"(block)
         :"r"((x86_reg)line_size)
-        :REG_a, "memory");
+        :FF_REG_a, "memory");
 }
 
 av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 {
     MOVQ_BFE(mm6);
     __asm__ volatile(
-        "lea        (%3, %3), %%"REG_a" \n\t"
+        "lea    (%3, %3), %%"FF_REG_a"  \n\t"
         ".p2align 3                     \n\t"
         "1:                             \n\t"
         "movq   (%1), %%mm0             \n\t"
@@ -81,8 +81,8 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel
         PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
         "movq   %%mm4, 8(%2)            \n\t"
         "movq   %%mm5, 8(%2, %3)        \n\t"
-        "add    %%"REG_a", %1           \n\t"
-        "add    %%"REG_a", %2           \n\t"
+        "add    %%"FF_REG_a", %1        \n\t"
+        "add    %%"FF_REG_a", %2        \n\t"
         "movq   (%1), %%mm0             \n\t"
         "movq   1(%1), %%mm1            \n\t"
         "movq   (%1, %3), %%mm2         \n\t"
@@ -97,42 +97,42 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel
         PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
         "movq   %%mm4, 8(%2)            \n\t"
         "movq   %%mm5, 8(%2, %3)        \n\t"
-        "add    %%"REG_a", %1           \n\t"
-        "add    %%"REG_a", %2           \n\t"
+        "add    %%"FF_REG_a", %1        \n\t"
+        "add    %%"FF_REG_a", %2        \n\t"
         "subl   $4, %0                  \n\t"
         "jnz    1b                      \n\t"
         :"+g"(h), "+S"(pixels), "+D"(block)
         :"r"((x86_reg)line_size)
-        :REG_a, "memory");
+        :FF_REG_a, "memory");
 }
 
 av_unused static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 {
     MOVQ_BFE(mm6);
     __asm__ volatile(
-        "lea (%3, %3), %%"REG_a"        \n\t"
+        "lea (%3, %3), %%"FF_REG_a"     \n\t"
         "movq (%1), %%mm0               \n\t"
         ".p2align 3                     \n\t"
         "1:                             \n\t"
         "movq   (%1, %3), %%mm1         \n\t"
-        "movq   (%1, %%"REG_a"),%%mm2   \n\t"
+        "movq   (%1, %%"FF_REG_a"),%%mm2\n\t"
         PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5)
         "movq   %%mm4, (%2)             \n\t"
         "movq   %%mm5, (%2, %3)         \n\t"
-        "add    %%"REG_a", %1           \n\t"
-        "add    %%"REG_a", %2           \n\t"
+        "add    %%"FF_REG_a", %1        \n\t"
+        "add    %%"FF_REG_a", %2        \n\t"
         "movq   (%1, %3), %%mm1         \n\t"
-        "movq   (%1, %%"REG_a"),%%mm0   \n\t"
+        "movq   (%1, %%"FF_REG_a"),%%mm0\n\t"
         PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5)
         "movq   %%mm4, (%2)             \n\t"
         "movq   %%mm5, (%2, %3)         \n\t"
-        "add    %%"REG_a", %1           \n\t"
-        "add    %%"REG_a", %2           \n\t"
+        "add    %%"FF_REG_a", %1        \n\t"
+        "add    %%"FF_REG_a", %2        \n\t"
         "subl   $4, %0                  \n\t"
         "jnz    1b                      \n\t"
         :"+g"(h), "+S"(pixels), "+D"(block)
         :"r"((x86_reg)line_size)
-        :REG_a, "memory");
+        :FF_REG_a, "memory");
 }
 
 av_unused static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
@@ -166,12 +166,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
 {
     MOVQ_BFE(mm6);
     __asm__ volatile(
-        "lea    (%3, %3), %%"REG_a"     \n\t"
+        "lea    (%3, %3), %%"FF_REG_a"  \n\t"
         "movq   (%1), %%mm0             \n\t"
         ".p2align 3                     \n\t"
         "1:                             \n\t"
         "movq   (%1, %3), %%mm1         \n\t"
-        "movq   (%1, %%"REG_a"), %%mm2  \n\t"
+        "movq   (%1, %%"FF_REG_a"), %%mm2 \n\t"
         PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5)
         "movq   (%2), %%mm3             \n\t"
         PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6)
@@ -179,11 +179,11 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
         PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
         "movq   %%mm0, (%2)             \n\t"
         "movq   %%mm1, (%2, %3)         \n\t"
-        "add    %%"REG_a", %1           \n\t"
-        "add    %%"REG_a", %2           \n\t"
+        "add    %%"FF_REG_a", %1        \n\t"
+        "add    %%"FF_REG_a", %2        \n\t"
 
         "movq   (%1, %3), %%mm1         \n\t"
-        "movq   (%1, %%"REG_a"), %%mm0  \n\t"
+        "movq   (%1, %%"FF_REG_a"), %%mm0 \n\t"
         PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5)
         "movq   (%2), %%mm3             \n\t"
         PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6)
@@ -191,12 +191,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
         PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
         "movq   %%mm2, (%2)             \n\t"
         "movq   %%mm1, (%2, %3)         \n\t"
-        "add    %%"REG_a", %1           \n\t"
-        "add    %%"REG_a", %2           \n\t"
+        "add    %%"FF_REG_a", %1        \n\t"
+        "add    %%"FF_REG_a", %2        \n\t"
 
         "subl   $4, %0                  \n\t"
         "jnz    1b                      \n\t"
         :"+g"(h), "+S"(pixels), "+D"(block)
         :"r"((x86_reg)line_size)
-        :REG_a, "memory");
+        :FF_REG_a, "memory");
 }
diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c
index 49f50d0..dc3e6f8 100644
--- a/libavcodec/x86/me_cmp_init.c
+++ b/libavcodec/x86/me_cmp_init.c
@@ -283,15 +283,15 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2,
     __asm__ volatile (
         ".p2align 4                     \n\t"
         "1:                             \n\t"
-        "movq (%1, %%"REG_a"), %%mm0    \n\t"
-        "movq (%2, %%"REG_a"), %%mm2    \n\t"
-        "movq (%2, %%"REG_a"), %%mm4    \n\t"
-        "add %3, %%"REG_a"              \n\t"
+        "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
+        "movq (%2, %%"FF_REG_a"), %%mm2 \n\t"
+        "movq (%2, %%"FF_REG_a"), %%mm4 \n\t"
+        "add %3, %%"FF_REG_a"           \n\t"
         "psubusb %%mm0, %%mm2           \n\t"
         "psubusb %%mm4, %%mm0           \n\t"
-        "movq (%1, %%"REG_a"), %%mm1    \n\t"
-        "movq (%2, %%"REG_a"), %%mm3    \n\t"
-        "movq (%2, %%"REG_a"), %%mm5    \n\t"
+        "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
+        "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
+        "movq (%2, %%"FF_REG_a"), %%mm5 \n\t"
         "psubusb %%mm1, %%mm3           \n\t"
         "psubusb %%mm5, %%mm1           \n\t"
         "por %%mm2, %%mm0               \n\t"
@@ -306,7 +306,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2,
         "paddw %%mm3, %%mm2             \n\t"
         "paddw %%mm2, %%mm0             \n\t"
         "paddw %%mm0, %%mm6             \n\t"
-        "add %3, %%"REG_a"              \n\t"
+        "add %3, %%"FF_REG_a"           \n\t"
         " js 1b                         \n\t"
         : "+a" (len)
         : "r" (blk1 - len), "r" (blk2 - len), "r" (stride));
@@ -319,18 +319,18 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
     __asm__ volatile (
         ".p2align 4                     \n\t"
         "1:                             \n\t"
-        "movq (%1, %%"REG_a"), %%mm0    \n\t"
-        "movq (%2, %%"REG_a"), %%mm1    \n\t"
-        "movq (%1, %%"REG_a"), %%mm2    \n\t"
-        "movq (%2, %%"REG_a"), %%mm3    \n\t"
+        "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
+        "movq (%2, %%"FF_REG_a"), %%mm1 \n\t"
+        "movq (%1, %%"FF_REG_a"), %%mm2 \n\t"
+        "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
         "punpcklbw %%mm7, %%mm0         \n\t"
         "punpcklbw %%mm7, %%mm1         \n\t"
         "punpckhbw %%mm7, %%mm2         \n\t"
         "punpckhbw %%mm7, %%mm3         \n\t"
         "paddw %%mm0, %%mm1             \n\t"
         "paddw %%mm2, %%mm3             \n\t"
-        "movq (%3, %%"REG_a"), %%mm4    \n\t"
-        "movq (%3, %%"REG_a"), %%mm2    \n\t"
+        "movq (%3, %%"FF_REG_a"), %%mm4 \n\t"
+        "movq (%3, %%"FF_REG_a"), %%mm2 \n\t"
         "paddw %%mm5, %%mm1             \n\t"
         "paddw %%mm5, %%mm3             \n\t"
         "psrlw $1, %%mm1                \n\t"
@@ -344,7 +344,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
         "punpckhbw %%mm7, %%mm1         \n\t"
         "paddw %%mm1, %%mm0             \n\t"
         "paddw %%mm0, %%mm6             \n\t"
-        "add %4, %%"REG_a"              \n\t"
+        "add %4, %%"FF_REG_a"           \n\t"
         " js 1b                         \n\t"
         : "+a" (len)
         : "r" (blk1a - len), "r" (blk1b - len), "r" (blk2 - len),
@@ -356,8 +356,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
 {
     x86_reg len = -stride * h;
     __asm__ volatile (
-        "movq  (%1, %%"REG_a"), %%mm0   \n\t"
-        "movq 1(%1, %%"REG_a"), %%mm2   \n\t"
+        "movq  (%1, %%"FF_REG_a"), %%mm0\n\t"
+        "movq 1(%1, %%"FF_REG_a"), %%mm2\n\t"
         "movq %%mm0, %%mm1              \n\t"
         "movq %%mm2, %%mm3              \n\t"
         "punpcklbw %%mm7, %%mm0         \n\t"
@@ -368,8 +368,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
         "paddw %%mm3, %%mm1             \n\t"
         ".p2align 4                     \n\t"
         "1:                             \n\t"
-        "movq  (%2, %%"REG_a"), %%mm2   \n\t"
-        "movq 1(%2, %%"REG_a"), %%mm4   \n\t"
+        "movq  (%2, %%"FF_REG_a"), %%mm2\n\t"
+        "movq 1(%2, %%"FF_REG_a"), %%mm4\n\t"
         "movq %%mm2, %%mm3              \n\t"
         "movq %%mm4, %%mm5              \n\t"
         "punpcklbw %%mm7, %%mm2         \n\t"
@@ -383,8 +383,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
         "paddw %%mm3, %%mm1             \n\t"
         "paddw %%mm5, %%mm0             \n\t"
         "paddw %%mm5, %%mm1             \n\t"
-        "movq (%3, %%"REG_a"), %%mm4    \n\t"
-        "movq (%3, %%"REG_a"), %%mm5    \n\t"
+        "movq (%3, %%"FF_REG_a"), %%mm4 \n\t"
+        "movq (%3, %%"FF_REG_a"), %%mm5 \n\t"
         "psrlw $2, %%mm0                \n\t"
         "psrlw $2, %%mm1                \n\t"
         "packuswb %%mm1, %%mm0          \n\t"
@@ -398,7 +398,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
         "paddw %%mm4, %%mm6             \n\t"
         "movq  %%mm2, %%mm0             \n\t"
         "movq  %%mm3, %%mm1             \n\t"
-        "add %4, %%"REG_a"              \n\t"
+        "add %4, %%"FF_REG_a"           \n\t"
         " js 1b                         \n\t"
         : "+a" (len)
         : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len),
diff --git a/libavcodec/x86/mpegvideo.c b/libavcodec/x86/mpegvideo.c
index 1811326..35a8264 100644
--- a/libavcodec/x86/mpegvideo.c
+++ b/libavcodec/x86/mpegvideo.c
@@ -188,13 +188,13 @@ __asm__ volatile(
                 "movd %2, %%mm6                 \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
-                "mov %3, %%"REG_a"              \n\t"
+                "mov %3, %%"FF_REG_a"           \n\t"
                 ".p2align 4                     \n\t"
                 "1:                             \n\t"
-                "movq (%0, %%"REG_a"), %%mm0    \n\t"
-                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
-                "movq (%1, %%"REG_a"), %%mm4    \n\t"
-                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
+                "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
+                "movq (%1, %%"FF_REG_a"), %%mm4 \n\t"
+                "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
                 "pxor %%mm2, %%mm2              \n\t"
@@ -209,8 +209,8 @@ __asm__ volatile(
                 "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*q
                 "pxor %%mm4, %%mm4              \n\t"
                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow
-                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
-                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
                 "psraw $3, %%mm0                \n\t"
                 "psraw $3, %%mm1                \n\t"
                 "psubw %%mm7, %%mm0             \n\t"
@@ -223,13 +223,13 @@ __asm__ volatile(
                 "psubw %%mm3, %%mm1             \n\t"
                 "pandn %%mm0, %%mm4             \n\t"
                 "pandn %%mm1, %%mm5             \n\t"
-                "movq %%mm4, (%0, %%"REG_a")    \n\t"
-                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+                "movq %%mm4, (%0, %%"FF_REG_a") \n\t"
+                "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
 
-                "add $16, %%"REG_a"             \n\t"
+                "add $16, %%"FF_REG_a"          \n\t"
                 "js 1b                          \n\t"
                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
-                : "%"REG_a, "memory"
+                : "%"FF_REG_a, "memory"
         );
     block[0]= block0;
 }
@@ -251,13 +251,13 @@ __asm__ volatile(
                 "movd %2, %%mm6                 \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
-                "mov %3, %%"REG_a"              \n\t"
+                "mov %3, %%"FF_REG_a"           \n\t"
                 ".p2align 4                     \n\t"
                 "1:                             \n\t"
-                "movq (%0, %%"REG_a"), %%mm0    \n\t"
-                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
-                "movq (%1, %%"REG_a"), %%mm4    \n\t"
-                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
+                "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
+                "movq (%1, %%"FF_REG_a"), %%mm4 \n\t"
+                "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
                 "pxor %%mm2, %%mm2              \n\t"
@@ -276,8 +276,8 @@ __asm__ volatile(
                 "pmullw %%mm5, %%mm1            \n\t" // (abs(block[i])*2 + 1)*q
                 "pxor %%mm4, %%mm4              \n\t"
                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow
-                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
-                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
                 "psraw $4, %%mm0                \n\t"
                 "psraw $4, %%mm1                \n\t"
                 "psubw %%mm7, %%mm0             \n\t"
@@ -290,13 +290,13 @@ __asm__ volatile(
                 "psubw %%mm3, %%mm1             \n\t"
                 "pandn %%mm0, %%mm4             \n\t"
                 "pandn %%mm1, %%mm5             \n\t"
-                "movq %%mm4, (%0, %%"REG_a")    \n\t"
-                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+                "movq %%mm4, (%0, %%"FF_REG_a") \n\t"
+                "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
 
-                "add $16, %%"REG_a"             \n\t"
+                "add $16, %%"FF_REG_a"          \n\t"
                 "js 1b                          \n\t"
                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
-                : "%"REG_a, "memory"
+                : "%"FF_REG_a, "memory"
         );
 }
 
@@ -326,13 +326,13 @@ __asm__ volatile(
                 "movd %2, %%mm6                 \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
-                "mov %3, %%"REG_a"              \n\t"
+                "mov %3, %%"FF_REG_a"           \n\t"
                 ".p2align 4                     \n\t"
                 "1:                             \n\t"
-                "movq (%0, %%"REG_a"), %%mm0    \n\t"
-                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
-                "movq (%1, %%"REG_a"), %%mm4    \n\t"
-                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
+                "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
+                "movq (%1, %%"FF_REG_a"), %%mm4 \n\t"
+                "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
                 "pxor %%mm2, %%mm2              \n\t"
@@ -347,8 +347,8 @@ __asm__ volatile(
                 "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*q
                 "pxor %%mm4, %%mm4              \n\t"
                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow
-                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
-                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
                 "psraw $4, %%mm0                \n\t"
                 "psraw $4, %%mm1                \n\t"
                 "pxor %%mm2, %%mm0              \n\t"
@@ -357,13 +357,13 @@ __asm__ volatile(
                 "psubw %%mm3, %%mm1             \n\t"
                 "pandn %%mm0, %%mm4             \n\t"
                 "pandn %%mm1, %%mm5             \n\t"
-                "movq %%mm4, (%0, %%"REG_a")    \n\t"
-                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+                "movq %%mm4, (%0, %%"FF_REG_a") \n\t"
+                "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
 
-                "add $16, %%"REG_a"             \n\t"
+                "add $16, %%"FF_REG_a"          \n\t"
                 "jng 1b                         \n\t"
                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
-                : "%"REG_a, "memory"
+                : "%"FF_REG_a, "memory"
         );
     block[0]= block0;
         //Note, we do not do mismatch control for intra as errors cannot accumulate
@@ -390,13 +390,13 @@ __asm__ volatile(
                 "movd %2, %%mm6                 \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
                 "packssdw %%mm6, %%mm6          \n\t"
-                "mov %3, %%"REG_a"              \n\t"
+                "mov %3, %%"FF_REG_a"           \n\t"
                 ".p2align 4                     \n\t"
                 "1:                             \n\t"
-                "movq (%0, %%"REG_a"), %%mm0    \n\t"
-                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
-                "movq (%1, %%"REG_a"), %%mm4    \n\t"
-                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
+                "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
+                "movq (%1, %%"FF_REG_a"), %%mm4 \n\t"
+                "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
                 "pxor %%mm2, %%mm2              \n\t"
@@ -415,8 +415,8 @@ __asm__ volatile(
                 "paddw %%mm5, %%mm1             \n\t" // (abs(block[i])*2 + 1)*q
                 "pxor %%mm4, %%mm4              \n\t"
                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow
-                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
-                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
                 "psrlw $5, %%mm0                \n\t"
                 "psrlw $5, %%mm1                \n\t"
                 "pxor %%mm2, %%mm0              \n\t"
@@ -427,10 +427,10 @@ __asm__ volatile(
                 "pandn %%mm1, %%mm5             \n\t"
                 "pxor %%mm4, %%mm7              \n\t"
                 "pxor %%mm5, %%mm7              \n\t"
-                "movq %%mm4, (%0, %%"REG_a")    \n\t"
-                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+                "movq %%mm4, (%0, %%"FF_REG_a") \n\t"
+                "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
 
-                "add $16, %%"REG_a"             \n\t"
+                "add $16, %%"FF_REG_a"          \n\t"
                 "jng 1b                         \n\t"
                 "movd 124(%0, %3), %%mm0        \n\t"
                 "movq %%mm7, %%mm6              \n\t"
@@ -445,7 +445,7 @@ __asm__ volatile(
                 "movd %%mm0, 124(%0, %3)        \n\t"
 
                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs)
-                : "%"REG_a, "memory"
+                : "%"FF_REG_a, "memory"
         );
 }
 
diff --git a/libavcodec/x86/mpegvideoenc_template.c b/libavcodec/x86/mpegvideoenc_template.c
index da76459..b251274 100644
--- a/libavcodec/x86/mpegvideoenc_template.c
+++ b/libavcodec/x86/mpegvideoenc_template.c
@@ -150,32 +150,32 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
     if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
 
         __asm__ volatile(
-            "movd %%"REG_a", "MM"3              \n\t" // last_non_zero_p1
+            "movd %%"FF_REG_a", "MM"3           \n\t" // last_non_zero_p1
             SPREADW(MM"3")
             "pxor "MM"7, "MM"7                  \n\t" // 0
             "pxor "MM"4, "MM"4                  \n\t" // 0
             MOVQ" (%2), "MM"5                   \n\t" // qmat[0]
             "pxor "MM"6, "MM"6                  \n\t"
             "psubw (%3), "MM"6                  \n\t" // -bias[0]
-            "mov $-128, %%"REG_a"               \n\t"
+            "mov $-128, %%"FF_REG_a"            \n\t"
             ".p2align 4                         \n\t"
             "1:                                 \n\t"
-            MOVQ" (%1, %%"REG_a"), "MM"0        \n\t" // block[i]
+            MOVQ" (%1, %%"FF_REG_a"), "MM"0     \n\t" // block[i]
             SAVE_SIGN(MM"1", MM"0")                   // ABS(block[i])
             "psubusw "MM"6, "MM"0               \n\t" // ABS(block[i]) + bias[0]
             "pmulhw "MM"5, "MM"0                \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
             "por "MM"0, "MM"4                   \n\t"
             RESTORE_SIGN(MM"1", MM"0")                // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
-            MOVQ" "MM"0, (%5, %%"REG_a")        \n\t"
+            MOVQ" "MM"0, (%5, %%"FF_REG_a")     \n\t"
             "pcmpeqw "MM"7, "MM"0               \n\t" // out==0 ? 0xFF : 0x00
-            MOVQ" (%4, %%"REG_a"), "MM"1        \n\t"
-            MOVQ" "MM"7, (%1, %%"REG_a")        \n\t" // 0
+            MOVQ" (%4, %%"FF_REG_a"), "MM"1     \n\t"
+            MOVQ" "MM"7, (%1, %%"FF_REG_a")     \n\t" // 0
             "pandn "MM"1, "MM"0                 \n\t"
             PMAXW(MM"0", MM"3")
-            "add $"MMREG_WIDTH", %%"REG_a"      \n\t"
+            "add $"MMREG_WIDTH", %%"FF_REG_a"   \n\t"
             " js 1b                             \n\t"
             PMAX(MM"3", MM"0")
-            "movd "MM"3, %%"REG_a"              \n\t"
+            "movd "MM"3, %%"FF_REG_a"           \n\t"
             "movzbl %%al, %%eax                 \n\t" // last_non_zero_p1
             : "+a" (last_non_zero_p1)
             : "r" (block+64), "r" (qmat), "r" (bias),
@@ -185,31 +185,31 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
         );
     }else{ // FMT_H263
         __asm__ volatile(
-            "movd %%"REG_a", "MM"3              \n\t" // last_non_zero_p1
+            "movd %%"FF_REG_a", "MM"3           \n\t" // last_non_zero_p1
             SPREADW(MM"3")
             "pxor "MM"7, "MM"7                  \n\t" // 0
             "pxor "MM"4, "MM"4                  \n\t" // 0
-            "mov $-128, %%"REG_a"               \n\t"
+            "mov $-128, %%"FF_REG_a"            \n\t"
             ".p2align 4                         \n\t"
             "1:                                 \n\t"
-            MOVQ" (%1, %%"REG_a"), "MM"0        \n\t" // block[i]
+            MOVQ" (%1, %%"FF_REG_a"), "MM"0     \n\t" // block[i]
             SAVE_SIGN(MM"1", MM"0")                   // ABS(block[i])
-            MOVQ" (%3, %%"REG_a"), "MM"6        \n\t" // bias[0]
+            MOVQ" (%3, %%"FF_REG_a"), "MM"6     \n\t" // bias[0]
             "paddusw "MM"6, "MM"0               \n\t" // ABS(block[i]) + bias[0]
-            MOVQ" (%2, %%"REG_a"), "MM"5        \n\t" // qmat[i]
+            MOVQ" (%2, %%"FF_REG_a"), "MM"5     \n\t" // qmat[i]
             "pmulhw "MM"5, "MM"0                \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
             "por "MM"0, "MM"4                   \n\t"
             RESTORE_SIGN(MM"1", MM"0")                // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
-            MOVQ" "MM"0, (%5, %%"REG_a")        \n\t"
+            MOVQ" "MM"0, (%5, %%"FF_REG_a")     \n\t"
             "pcmpeqw "MM"7, "MM"0               \n\t" // out==0 ? 0xFF : 0x00
-            MOVQ" (%4, %%"REG_a"), "MM"1        \n\t"
-            MOVQ" "MM"7, (%1, %%"REG_a")        \n\t" // 0
+            MOVQ" (%4, %%"FF_REG_a"), "MM"1     \n\t"
+            MOVQ" "MM"7, (%1, %%"FF_REG_a")     \n\t" // 0
             "pandn "MM"1, "MM"0                 \n\t"
             PMAXW(MM"0", MM"3")
-            "add $"MMREG_WIDTH", %%"REG_a"      \n\t"
+            "add $"MMREG_WIDTH", %%"FF_REG_a"   \n\t"
             " js 1b                             \n\t"
             PMAX(MM"3", MM"0")
-            "movd "MM"3, %%"REG_a"              \n\t"
+            "movd "MM"3, %%"FF_REG_a"           \n\t"
             "movzbl %%al, %%eax                 \n\t" // last_non_zero_p1
             : "+a" (last_non_zero_p1)
             : "r" (block+64), "r" (qmat+64), "r" (bias+64),
diff --git a/libavcodec/x86/rnd_template.c b/libavcodec/x86/rnd_template.c
index ddca4eb..09946bd 100644
--- a/libavcodec/x86/rnd_template.c
+++ b/libavcodec/x86/rnd_template.c
@@ -46,12 +46,12 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
         "punpckhbw %%mm7, %%mm5         \n\t"
         "paddusw %%mm0, %%mm4           \n\t"
         "paddusw %%mm1, %%mm5           \n\t"
-        "xor    %%"REG_a", %%"REG_a"    \n\t"
+        "xor    %%"FF_REG_a", %%"FF_REG_a" \n\t"
         "add    %3, %1                  \n\t"
         ".p2align 3                     \n\t"
         "1:                             \n\t"
-        "movq   (%1, %%"REG_a"), %%mm0  \n\t"
-        "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
+        "movq   (%1, %%"FF_REG_a"), %%mm0  \n\t"
+        "movq   1(%1, %%"FF_REG_a"), %%mm2 \n\t"
         "movq   %%mm0, %%mm1            \n\t"
         "movq   %%mm2, %%mm3            \n\t"
         "punpcklbw %%mm7, %%mm0         \n\t"
@@ -67,11 +67,11 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
         "psrlw  $2, %%mm4               \n\t"
         "psrlw  $2, %%mm5               \n\t"
         "packuswb  %%mm5, %%mm4         \n\t"
-        "movq   %%mm4, (%2, %%"REG_a")  \n\t"
-        "add    %3, %%"REG_a"           \n\t"
+        "movq   %%mm4, (%2, %%"FF_REG_a")  \n\t"
+        "add    %3, %%"FF_REG_a"           \n\t"
 
-        "movq   (%1, %%"REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
-        "movq   1(%1, %%"REG_a"), %%mm4 \n\t"
+        "movq   (%1, %%"FF_REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
+        "movq   1(%1, %%"FF_REG_a"), %%mm4 \n\t"
         "movq   %%mm2, %%mm3            \n\t"
         "movq   %%mm4, %%mm5            \n\t"
         "punpcklbw %%mm7, %%mm2         \n\t"
@@ -87,14 +87,14 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
         "psrlw  $2, %%mm0               \n\t"
         "psrlw  $2, %%mm1               \n\t"
         "packuswb  %%mm1, %%mm0         \n\t"
-        "movq   %%mm0, (%2, %%"REG_a")  \n\t"
-        "add    %3, %%"REG_a"           \n\t"
+        "movq   %%mm0, (%2, %%"FF_REG_a")  \n\t"
+        "add    %3, %%"FF_REG_a"        \n\t"
 
         "subl   $2, %0                  \n\t"
         "jnz    1b                      \n\t"
         :"+g"(h), "+S"(pixels)
         :"D"(block), "r"((x86_reg)line_size)
-        :REG_a, "memory");
+        :FF_REG_a, "memory");
 }
 
 // avg_pixels
@@ -115,12 +115,12 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
         "punpckhbw %%mm7, %%mm5         \n\t"
         "paddusw %%mm0, %%mm4           \n\t"
         "paddusw %%mm1, %%mm5           \n\t"
-        "xor    %%"REG_a", %%"REG_a"    \n\t"
+        "xor    %%"FF_REG_a", %%"FF_REG_a" \n\t"
         "add    %3, %1                  \n\t"
         ".p2align 3                     \n\t"
         "1:                             \n\t"
-        "movq   (%1, %%"REG_a"), %%mm0  \n\t"
-        "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
+        "movq   (%1, %%"FF_REG_a"), %%mm0  \n\t"
+        "movq   1(%1, %%"FF_REG_a"), %%mm2 \n\t"
         "movq   %%mm0, %%mm1            \n\t"
         "movq   %%mm2, %%mm3            \n\t"
         "punpcklbw %%mm7, %%mm0         \n\t"
@@ -135,16 +135,16 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
         "paddusw %%mm1, %%mm5           \n\t"
         "psrlw  $2, %%mm4               \n\t"
         "psrlw  $2, %%mm5               \n\t"
-                "movq   (%2, %%"REG_a"), %%mm3  \n\t"
+                "movq   (%2, %%"FF_REG_a"), %%mm3  \n\t"
         "packuswb  %%mm5, %%mm4         \n\t"
                 "pcmpeqd %%mm2, %%mm2   \n\t"
                 "paddb %%mm2, %%mm2     \n\t"
                 PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2)
-                "movq   %%mm5, (%2, %%"REG_a")  \n\t"
-        "add    %3, %%"REG_a"                \n\t"
+                "movq   %%mm5, (%2, %%"FF_REG_a")  \n\t"
+        "add    %3, %%"FF_REG_a"        \n\t"
 
-        "movq   (%1, %%"REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
-        "movq   1(%1, %%"REG_a"), %%mm4 \n\t"
+        "movq   (%1, %%"FF_REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
+        "movq   1(%1, %%"FF_REG_a"), %%mm4 \n\t"
         "movq   %%mm2, %%mm3            \n\t"
         "movq   %%mm4, %%mm5            \n\t"
         "punpcklbw %%mm7, %%mm2         \n\t"
@@ -159,17 +159,17 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
         "paddusw %%mm5, %%mm1           \n\t"
         "psrlw  $2, %%mm0               \n\t"
         "psrlw  $2, %%mm1               \n\t"
-                "movq   (%2, %%"REG_a"), %%mm3  \n\t"
+                "movq   (%2, %%"FF_REG_a"), %%mm3  \n\t"
         "packuswb  %%mm1, %%mm0         \n\t"
                 "pcmpeqd %%mm2, %%mm2   \n\t"
                 "paddb %%mm2, %%mm2     \n\t"
                 PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2)
-                "movq   %%mm1, (%2, %%"REG_a")  \n\t"
-        "add    %3, %%"REG_a"           \n\t"
+                "movq   %%mm1, (%2, %%"FF_REG_a")  \n\t"
+        "add    %3, %%"FF_REG_a"           \n\t"
 
         "subl   $2, %0                  \n\t"
         "jnz    1b                      \n\t"
         :"+g"(h), "+S"(pixels)
         :"D"(block), "r"((x86_reg)line_size)
-        :REG_a, "memory");
+        :FF_REG_a, "memory");
 }
diff --git a/libavcodec/x86/snowdsp.c b/libavcodec/x86/snowdsp.c
index e2ad511..218e686 100644
--- a/libavcodec/x86/snowdsp.c
+++ b/libavcodec/x86/snowdsp.c
@@ -390,10 +390,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w
 
 #if HAVE_7REGS
 #define snow_vertical_compose_sse2_load_add(op,r,t0,t1,t2,t3)\
-        ""op" ("r",%%"REG_d"), %%"t0"      \n\t"\
-        ""op" 16("r",%%"REG_d"), %%"t1"    \n\t"\
-        ""op" 32("r",%%"REG_d"), %%"t2"    \n\t"\
-        ""op" 48("r",%%"REG_d"), %%"t3"    \n\t"
+        ""op" ("r",%%"FF_REG_d"), %%"t0"      \n\t"\
+        ""op" 16("r",%%"FF_REG_d"), %%"t1"    \n\t"\
+        ""op" 32("r",%%"FF_REG_d"), %%"t2"    \n\t"\
+        ""op" 48("r",%%"FF_REG_d"), %%"t3"    \n\t"
 
 #define snow_vertical_compose_sse2_load(r,t0,t1,t2,t3)\
         snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3)
@@ -408,10 +408,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w
         "psubw %%"s3", %%"t3" \n\t"
 
 #define snow_vertical_compose_sse2_store(w,s0,s1,s2,s3)\
-        "movdqa %%"s0", ("w",%%"REG_d")      \n\t"\
-        "movdqa %%"s1", 16("w",%%"REG_d")    \n\t"\
-        "movdqa %%"s2", 32("w",%%"REG_d")    \n\t"\
-        "movdqa %%"s3", 48("w",%%"REG_d")    \n\t"
+        "movdqa %%"s0", ("w",%%"FF_REG_d")    \n\t"\
+        "movdqa %%"s1", 16("w",%%"FF_REG_d")  \n\t"\
+        "movdqa %%"s2", 32("w",%%"FF_REG_d")  \n\t"\
+        "movdqa %%"s3", 48("w",%%"FF_REG_d")  \n\t"
 
 #define snow_vertical_compose_sra(n,t0,t1,t2,t3)\
         "psraw $"n", %%"t0" \n\t"\
@@ -477,14 +477,14 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
         "psrlw $13, %%xmm5                           \n\t"
         "paddw %%xmm7, %%xmm5                        \n\t"
         snow_vertical_compose_r2r_add("xmm5","xmm5","xmm5","xmm5","xmm0","xmm2","xmm4","xmm6")
-        "movq   (%2,%%"REG_d"), %%xmm1        \n\t"
-        "movq  8(%2,%%"REG_d"), %%xmm3        \n\t"
+        "movq   (%2,%%"FF_REG_d"), %%xmm1            \n\t"
+        "movq  8(%2,%%"FF_REG_d"), %%xmm3            \n\t"
         "paddw %%xmm7, %%xmm1                        \n\t"
         "paddw %%xmm7, %%xmm3                        \n\t"
         "pavgw %%xmm1, %%xmm0                        \n\t"
         "pavgw %%xmm3, %%xmm2                        \n\t"
-        "movq 16(%2,%%"REG_d"), %%xmm1        \n\t"
-        "movq 24(%2,%%"REG_d"), %%xmm3        \n\t"
+        "movq 16(%2,%%"FF_REG_d"), %%xmm1            \n\t"
+        "movq 24(%2,%%"FF_REG_d"), %%xmm3            \n\t"
         "paddw %%xmm7, %%xmm1                        \n\t"
         "paddw %%xmm7, %%xmm3                        \n\t"
         "pavgw %%xmm1, %%xmm4                        \n\t"
@@ -504,17 +504,17 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
         snow_vertical_compose_sse2_store("%2","xmm0","xmm2","xmm4","xmm6")
 
         "2:                                          \n\t"
-        "sub $64, %%"REG_d"                          \n\t"
+        "sub $64, %%"FF_REG_d"                       \n\t"
         "jge 1b                                      \n\t"
         :"+d"(i)
         :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5));
 }
 
 #define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\
-        ""op" ("r",%%"REG_d"), %%"t0"   \n\t"\
-        ""op" 8("r",%%"REG_d"), %%"t1"  \n\t"\
-        ""op" 16("r",%%"REG_d"), %%"t2" \n\t"\
-        ""op" 24("r",%%"REG_d"), %%"t3" \n\t"
+        ""op" ("r",%%"FF_REG_d"), %%"t0"   \n\t"\
+        ""op" 8("r",%%"FF_REG_d"), %%"t1"  \n\t"\
+        ""op" 16("r",%%"FF_REG_d"), %%"t2" \n\t"\
+        ""op" 24("r",%%"FF_REG_d"), %%"t3" \n\t"
 
 #define snow_vertical_compose_mmx_load(r,t0,t1,t2,t3)\
         snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3)
@@ -523,10 +523,10 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
         snow_vertical_compose_mmx_load_add("paddw",r,t0,t1,t2,t3)
 
 #define snow_vertical_compose_mmx_store(w,s0,s1,s2,s3)\
-        "movq %%"s0", ("w",%%"REG_d")   \n\t"\
-        "movq %%"s1", 8("w",%%"REG_d")  \n\t"\
-        "movq %%"s2", 16("w",%%"REG_d") \n\t"\
-        "movq %%"s3", 24("w",%%"REG_d") \n\t"
+        "movq %%"s0", ("w",%%"FF_REG_d")   \n\t"\
+        "movq %%"s1", 8("w",%%"FF_REG_d")  \n\t"\
+        "movq %%"s2", 16("w",%%"FF_REG_d") \n\t"\
+        "movq %%"s3", 24("w",%%"FF_REG_d") \n\t"
 
 #define snow_vertical_compose_mmx_move(s0,s1,s2,s3,t0,t1,t2,t3)\
         "movq %%"s0", %%"t0" \n\t"\
@@ -571,14 +571,14 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
         "psrlw $13, %%mm5                            \n\t"
         "paddw %%mm7, %%mm5                          \n\t"
         snow_vertical_compose_r2r_add("mm5","mm5","mm5","mm5","mm0","mm2","mm4","mm6")
-        "movq   (%2,%%"REG_d"), %%mm1         \n\t"
-        "movq  8(%2,%%"REG_d"), %%mm3         \n\t"
+        "movq   (%2,%%"FF_REG_d"), %%mm1             \n\t"
+        "movq  8(%2,%%"FF_REG_d"), %%mm3             \n\t"
         "paddw %%mm7, %%mm1                          \n\t"
         "paddw %%mm7, %%mm3                          \n\t"
         "pavgw %%mm1, %%mm0                          \n\t"
         "pavgw %%mm3, %%mm2                          \n\t"
-        "movq 16(%2,%%"REG_d"), %%mm1         \n\t"
-        "movq 24(%2,%%"REG_d"), %%mm3         \n\t"
+        "movq 16(%2,%%"FF_REG_d"), %%mm1             \n\t"
+        "movq 24(%2,%%"FF_REG_d"), %%mm3             \n\t"
         "paddw %%mm7, %%mm1                          \n\t"
         "paddw %%mm7, %%mm3                          \n\t"
         "pavgw %%mm1, %%mm4                          \n\t"
@@ -598,7 +598,7 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
         snow_vertical_compose_mmx_store("%2","mm0","mm2","mm4","mm6")
 
         "2:                                          \n\t"
-        "sub $32, %%"REG_d"                          \n\t"
+        "sub $32, %%"FF_REG_d"                       \n\t"
         "jge 1b                                      \n\t"
         :"+d"(i)
         :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5));
@@ -610,39 +610,39 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
     IDWTELEM * * dst_array = sb->line + src_y;\
     x86_reg tmp;\
     __asm__ volatile(\
-             "mov  %7, %%"REG_c"             \n\t"\
+             "mov  %7, %%"FF_REG_c"          \n\t"\
              "mov  %6, %2                    \n\t"\
-             "mov  %4, %%"REG_S"             \n\t"\
+             "mov  %4, %%"FF_REG_S"          \n\t"\
              "pxor %%xmm7, %%xmm7            \n\t" /* 0 */\
              "pcmpeqd %%xmm3, %%xmm3         \n\t"\
              "psllw $15, %%xmm3              \n\t"\
              "psrlw $12, %%xmm3              \n\t" /* FRAC_BITS >> 1 */\
              "1:                             \n\t"\
-             "mov %1, %%"REG_D"              \n\t"\
-             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
-             "add %3, %%"REG_D"              \n\t"
+             "mov %1, %%"FF_REG_D"           \n\t"\
+             "mov (%%"FF_REG_D"), %%"FF_REG_D" \n\t"\
+             "add %3, %%"FF_REG_D"           \n\t"
 
 #define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\
-             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
-             "movq (%%"REG_d"), %%"out_reg1" \n\t"\
-             "movq (%%"REG_d", %%"REG_c"), %%"out_reg2" \n\t"\
+             "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\
+             "movq (%%"FF_REG_d"), %%"out_reg1"                           \n\t"\
+             "movq (%%"FF_REG_d", %%"FF_REG_c"), %%"out_reg2"             \n\t"\
              "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
              "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
-             "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
-             "movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\
+             "movq "s_offset"(%%"FF_REG_S"), %%xmm0    \n\t"\
+             "movq "s_offset"+16(%%"FF_REG_S"), %%xmm4 \n\t"\
              "punpcklbw %%xmm7, %%xmm0       \n\t"\
              "punpcklbw %%xmm7, %%xmm4       \n\t"\
              "pmullw %%xmm0, %%"out_reg1"    \n\t"\
              "pmullw %%xmm4, %%"out_reg2"    \n\t"
 
 #define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\
-             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
-             "movq (%%"REG_d"), %%"out_reg1" \n\t"\
-             "movq 8(%%"REG_d"), %%"out_reg2" \n\t"\
+             "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\
+             "movq (%%"FF_REG_d"), %%"out_reg1"                           \n\t"\
+             "movq 8(%%"FF_REG_d"), %%"out_reg2"                          \n\t"\
              "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
              "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
-             "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
-             "movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\
+             "movq "s_offset"(%%"FF_REG_S"), %%xmm0   \n\t"\
+             "movq "s_offset"+8(%%"FF_REG_S"), %%xmm4 \n\t"\
              "punpcklbw %%xmm7, %%xmm0       \n\t"\
              "punpcklbw %%xmm7, %%xmm4       \n\t"\
              "pmullw %%xmm0, %%"out_reg1"    \n\t"\
@@ -659,12 +659,12 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
              "paddusw %%xmm6, %%xmm5         \n\t"
 
 #define snow_inner_add_yblock_sse2_end_common1\
-             "add $32, %%"REG_S"             \n\t"\
-             "add %%"REG_c", %0              \n\t"\
-             "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
-             "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
-             "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
-             "add %%"REG_c", (%%"REG_a")     \n\t"
+             "add $32, %%"FF_REG_S"                            \n\t"\
+             "add %%"FF_REG_c", %0                             \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", (%%"FF_REG_a")                 \n\t"
 
 #define snow_inner_add_yblock_sse2_end_common2\
              "jnz 1b                         \n\t"\
@@ -672,18 +672,18 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
              :\
              "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
              XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", )\
-             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
+             "%"FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_REG_d"");
 
 #define snow_inner_add_yblock_sse2_end_8\
-             "sal $1, %%"REG_c"              \n\t"\
-             "add"OPSIZE" $"PTR_SIZE"*2, %1  \n\t"\
+             "sal $1, %%"FF_REG_c"                \n\t"\
+             "add"FF_OPSIZE" $"FF_PTR_SIZE"*2, %1 \n\t"\
              snow_inner_add_yblock_sse2_end_common1\
-             "sar $1, %%"REG_c"              \n\t"\
+             "sar $1, %%"FF_REG_c"           \n\t"\
              "sub $2, %2                     \n\t"\
              snow_inner_add_yblock_sse2_end_common2
 
 #define snow_inner_add_yblock_sse2_end_16\
-             "add"OPSIZE" $"PTR_SIZE"*1, %1  \n\t"\
+             "add"FF_OPSIZE" $"FF_PTR_SIZE"*1, %1 \n\t"\
              snow_inner_add_yblock_sse2_end_common1\
              "dec %2                         \n\t"\
              snow_inner_add_yblock_sse2_end_common2
@@ -696,28 +696,28 @@ snow_inner_add_yblock_sse2_accum_8("2", "8")
 snow_inner_add_yblock_sse2_accum_8("1", "128")
 snow_inner_add_yblock_sse2_accum_8("0", "136")
 
-             "mov %0, %%"REG_d"              \n\t"
-             "movdqa (%%"REG_D"), %%xmm0     \n\t"
+             "mov %0, %%"FF_REG_d"           \n\t"
+             "movdqa (%%"FF_REG_D"), %%xmm0  \n\t"
              "movdqa %%xmm1, %%xmm2          \n\t"
 
              "punpckhwd %%xmm7, %%xmm1       \n\t"
              "punpcklwd %%xmm7, %%xmm2       \n\t"
              "paddd %%xmm2, %%xmm0           \n\t"
-             "movdqa 16(%%"REG_D"), %%xmm2   \n\t"
+             "movdqa 16(%%"FF_REG_D"), %%xmm2\n\t"
              "paddd %%xmm1, %%xmm2           \n\t"
              "paddd %%xmm3, %%xmm0           \n\t"
              "paddd %%xmm3, %%xmm2           \n\t"
 
-             "mov %1, %%"REG_D"              \n\t"
-             "mov "PTR_SIZE"(%%"REG_D"), %%"REG_D";\n\t"
-             "add %3, %%"REG_D"              \n\t"
+             "mov %1, %%"FF_REG_D"           \n\t"
+             "mov "FF_PTR_SIZE"(%%"FF_REG_D"), %%"FF_REG_D"; \n\t"
+             "add %3, %%"FF_REG_D"           \n\t"
 
-             "movdqa (%%"REG_D"), %%xmm4     \n\t"
+             "movdqa (%%"FF_REG_D"), %%xmm4  \n\t"
              "movdqa %%xmm5, %%xmm6          \n\t"
              "punpckhwd %%xmm7, %%xmm5       \n\t"
              "punpcklwd %%xmm7, %%xmm6       \n\t"
              "paddd %%xmm6, %%xmm4           \n\t"
-             "movdqa 16(%%"REG_D"), %%xmm6   \n\t"
+             "movdqa 16(%%"FF_REG_D"), %%xmm6\n\t"
              "paddd %%xmm5, %%xmm6           \n\t"
              "paddd %%xmm3, %%xmm4           \n\t"
              "paddd %%xmm3, %%xmm6           \n\t"
@@ -726,13 +726,13 @@ snow_inner_add_yblock_sse2_accum_8("0", "136")
              "psrad $8, %%xmm2               \n\t" /* FRAC_BITS. */
              "packssdw %%xmm2, %%xmm0        \n\t"
              "packuswb %%xmm7, %%xmm0        \n\t"
-             "movq %%xmm0, (%%"REG_d")       \n\t"
+             "movq %%xmm0, (%%"FF_REG_d")    \n\t"
 
              "psrad $8, %%xmm4               \n\t" /* FRAC_BITS. */
              "psrad $8, %%xmm6               \n\t" /* FRAC_BITS. */
              "packssdw %%xmm6, %%xmm4        \n\t"
              "packuswb %%xmm7, %%xmm4        \n\t"
-             "movq %%xmm4, (%%"REG_d",%%"REG_c");\n\t"
+             "movq %%xmm4, (%%"FF_REG_d",%%"FF_REG_c"); \n\t"
 snow_inner_add_yblock_sse2_end_8
 }
 
@@ -744,18 +744,18 @@ snow_inner_add_yblock_sse2_accum_16("2", "16")
 snow_inner_add_yblock_sse2_accum_16("1", "512")
 snow_inner_add_yblock_sse2_accum_16("0", "528")
 
-             "mov %0, %%"REG_d"              \n\t"
+             "mov %0, %%"FF_REG_d"           \n\t"
              "psrlw $4, %%xmm1               \n\t"
              "psrlw $4, %%xmm5               \n\t"
-             "paddw   (%%"REG_D"), %%xmm1    \n\t"
-             "paddw 16(%%"REG_D"), %%xmm5    \n\t"
+             "paddw   (%%"FF_REG_D"), %%xmm1 \n\t"
+             "paddw 16(%%"FF_REG_D"), %%xmm5 \n\t"
              "paddw %%xmm3, %%xmm1           \n\t"
              "paddw %%xmm3, %%xmm5           \n\t"
              "psraw $4, %%xmm1               \n\t" /* FRAC_BITS. */
              "psraw $4, %%xmm5               \n\t" /* FRAC_BITS. */
              "packuswb %%xmm5, %%xmm1        \n\t"
 
-             "movdqu %%xmm1, (%%"REG_d")       \n\t"
+             "movdqu %%xmm1, (%%"FF_REG_d")  \n\t"
 
 snow_inner_add_yblock_sse2_end_16
 }
@@ -764,30 +764,30 @@ snow_inner_add_yblock_sse2_end_16
     IDWTELEM * * dst_array = sb->line + src_y;\
     x86_reg tmp;\
     __asm__ volatile(\
-             "mov  %7, %%"REG_c"             \n\t"\
+             "mov  %7, %%"FF_REG_c"          \n\t"\
              "mov  %6, %2                    \n\t"\
-             "mov  %4, %%"REG_S"             \n\t"\
+             "mov  %4, %%"FF_REG_S"          \n\t"\
              "pxor %%mm7, %%mm7              \n\t" /* 0 */\
              "pcmpeqd %%mm3, %%mm3           \n\t"\
              "psllw $15, %%mm3               \n\t"\
              "psrlw $12, %%mm3               \n\t" /* FRAC_BITS >> 1 */\
              "1:                             \n\t"\
-             "mov %1, %%"REG_D"              \n\t"\
-             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
-             "add %3, %%"REG_D"              \n\t"
+             "mov %1, %%"FF_REG_D"           \n\t"\
+             "mov (%%"FF_REG_D"), %%"FF_REG_D" \n\t"\
+             "add %3, %%"FF_REG_D"           \n\t"
 
 #define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\
-             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
-             "movd "d_offset"(%%"REG_d"), %%"out_reg1" \n\t"\
-             "movd "d_offset"+4(%%"REG_d"), %%"out_reg2" \n\t"\
+             "mov "FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_REG_d"; \n\t"\
+             "movd "d_offset"(%%"FF_REG_d"), %%"out_reg1"                 \n\t"\
+             "movd "d_offset"+4(%%"FF_REG_d"), %%"out_reg2"               \n\t"\
              "punpcklbw %%mm7, %%"out_reg1" \n\t"\
              "punpcklbw %%mm7, %%"out_reg2" \n\t"\
-             "movd "s_offset"(%%"REG_S"), %%mm0 \n\t"\
-             "movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\
+             "movd "s_offset"(%%"FF_REG_S"), %%mm0   \n\t"\
+             "movd "s_offset"+4(%%"FF_REG_S"), %%mm4 \n\t"\
              "punpcklbw %%mm7, %%mm0       \n\t"\
              "punpcklbw %%mm7, %%mm4       \n\t"\
-             "pmullw %%mm0, %%"out_reg1"    \n\t"\
-             "pmullw %%mm4, %%"out_reg2"    \n\t"
+             "pmullw %%mm0, %%"out_reg1"   \n\t"\
+             "pmullw %%mm4, %%"out_reg2"   \n\t"
 
 #define snow_inner_add_yblock_mmx_accum(ptr_offset, s_offset, d_offset) \
              snow_inner_add_yblock_mmx_start("mm2", "mm6", ptr_offset, s_offset, d_offset)\
@@ -795,32 +795,32 @@ snow_inner_add_yblock_sse2_end_16
              "paddusw %%mm6, %%mm5         \n\t"
 
 #define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\
-             "mov %0, %%"REG_d"              \n\t"\
+             "mov %0, %%"FF_REG_d"           \n\t"\
              "psrlw $4, %%mm1                \n\t"\
              "psrlw $4, %%mm5                \n\t"\
-             "paddw "read_offset"(%%"REG_D"), %%mm1 \n\t"\
-             "paddw "read_offset"+8(%%"REG_D"), %%mm5 \n\t"\
+             "paddw "read_offset"(%%"FF_REG_D"), %%mm1   \n\t"\
+             "paddw "read_offset"+8(%%"FF_REG_D"), %%mm5 \n\t"\
              "paddw %%mm3, %%mm1             \n\t"\
              "paddw %%mm3, %%mm5             \n\t"\
              "psraw $4, %%mm1                \n\t"\
              "psraw $4, %%mm5                \n\t"\
              "packuswb %%mm5, %%mm1          \n\t"\
-             "movq %%mm1, "write_offset"(%%"REG_d") \n\t"
+             "movq %%mm1, "write_offset"(%%"FF_REG_d") \n\t"
 
 #define snow_inner_add_yblock_mmx_end(s_step)\
-             "add $"s_step", %%"REG_S"             \n\t"\
-             "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
-             "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
-             "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
-             "add %%"REG_c", (%%"REG_a")     \n\t"\
-             "add"OPSIZE " $"PTR_SIZE"*1, %1 \n\t"\
-             "add %%"REG_c", %0              \n\t"\
+             "add $"s_step", %%"FF_REG_S"                      \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a"); \n\t"\
+             "add %%"FF_REG_c", (%%"FF_REG_a")                 \n\t"\
+             "add"FF_OPSIZE " $"FF_PTR_SIZE"*1, %1             \n\t"\
+             "add %%"FF_REG_c", %0                             \n\t"\
              "dec %2                         \n\t"\
              "jnz 1b                         \n\t"\
              :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
              :\
              "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
-             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
+             "%"FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_REG_d"");
 
 static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
                       int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index da32a3e..45c8a68 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -84,7 +84,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
 {\
     rnd = 8-rnd;\
     __asm__ volatile(\
-        "mov       $8, %%"REG_c"           \n\t"\
+        "mov       $8, %%"FF_REG_c"        \n\t"\
         LOAD_ROUNDER_MMX("%5")\
         "movq      "MANGLE(ff_pw_9)", %%mm6\n\t"\
         "1:                                \n\t"\
@@ -119,13 +119,13 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
         "movq      %%mm3, (%1)             \n\t"\
         "add       %6, %0                  \n\t"\
         "add       %4, %1                  \n\t"\
-        "dec       %%"REG_c"               \n\t"\
+        "dec       %%"FF_REG_c"            \n\t"\
         "jnz 1b                            \n\t"\
         : "+r"(src),  "+r"(dst)\
         : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\
           "g"(stride-offset)\
           NAMED_CONSTRAINTS_ADD(ff_pw_9)\
-        : "%"REG_c, "memory"\
+        : "%"FF_REG_c, "memory"\
     );\
 }
 
diff --git a/libavfilter/x86/vf_noise.c b/libavfilter/x86/vf_noise.c
index 0a86cb0..f7a4d00 100644
--- a/libavfilter/x86/vf_noise.c
+++ b/libavfilter/x86/vf_noise.c
@@ -32,22 +32,22 @@ static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
     noise += shift;
 
     __asm__ volatile(
-            "mov %3, %%"REG_a"               \n\t"
+            "mov %3, %%"FF_REG_a"            \n\t"
             "pcmpeqb %%mm7, %%mm7            \n\t"
             "psllw $15, %%mm7                \n\t"
             "packsswb %%mm7, %%mm7           \n\t"
             ".p2align 4                      \n\t"
             "1:                              \n\t"
-            "movq (%0, %%"REG_a"), %%mm0     \n\t"
-            "movq (%1, %%"REG_a"), %%mm1     \n\t"
+            "movq (%0, %%"FF_REG_a"), %%mm0  \n\t"
+            "movq (%1, %%"FF_REG_a"), %%mm1  \n\t"
             "pxor %%mm7, %%mm0               \n\t"
             "paddsb %%mm1, %%mm0             \n\t"
             "pxor %%mm7, %%mm0               \n\t"
-            "movq %%mm0, (%2, %%"REG_a")     \n\t"
-            "add $8, %%"REG_a"               \n\t"
+            "movq %%mm0, (%2, %%"FF_REG_a")  \n\t"
+            "add $8, %%"FF_REG_a"            \n\t"
             " js 1b                          \n\t"
             :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
-            : "%"REG_a
+            : "%"FF_REG_a
     );
     if (mmx_len != len)
         ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
@@ -60,13 +60,13 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
     x86_reg mmx_len = len & (~7);
 
     __asm__ volatile(
-            "mov %5, %%"REG_a"              \n\t"
+            "mov %5, %%"FF_REG_a"           \n\t"
             ".p2align 4                     \n\t"
             "1:                             \n\t"
-            "movq (%1, %%"REG_a"), %%mm1    \n\t"
-            "movq (%0, %%"REG_a"), %%mm0    \n\t"
-            "paddb (%2, %%"REG_a"), %%mm1   \n\t"
-            "paddb (%3, %%"REG_a"), %%mm1   \n\t"
+            "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
+            "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
+            "paddb (%2, %%"FF_REG_a"), %%mm1\n\t"
+            "paddb (%3, %%"FF_REG_a"), %%mm1\n\t"
             "movq %%mm0, %%mm2              \n\t"
             "movq %%mm1, %%mm3              \n\t"
             "punpcklbw %%mm0, %%mm0         \n\t"
@@ -82,12 +82,12 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
             "psrlw $8, %%mm1                \n\t"
             "psrlw $8, %%mm3                \n\t"
             "packuswb %%mm3, %%mm1          \n\t"
-            "movq %%mm1, (%4, %%"REG_a")    \n\t"
-            "add $8, %%"REG_a"              \n\t"
+            "movq %%mm1, (%4, %%"FF_REG_a") \n\t"
+            "add $8, %%"FF_REG_a"           \n\t"
             " js 1b                         \n\t"
             :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len),
                "r" (dst+mmx_len), "g" (-mmx_len)
-            : "%"REG_a
+            : "%"FF_REG_a
         );
 
     if (mmx_len != len){
@@ -104,22 +104,22 @@ static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
     noise += shift;
 
     __asm__ volatile(
-            "mov %3, %%"REG_a"                \n\t"
+            "mov %3, %%"FF_REG_a"             \n\t"
             "pcmpeqb %%mm7, %%mm7             \n\t"
             "psllw $15, %%mm7                 \n\t"
             "packsswb %%mm7, %%mm7            \n\t"
             ".p2align 4                       \n\t"
             "1:                               \n\t"
-            "movq (%0, %%"REG_a"), %%mm0      \n\t"
-            "movq (%1, %%"REG_a"), %%mm1      \n\t"
+            "movq (%0, %%"FF_REG_a"), %%mm0   \n\t"
+            "movq (%1, %%"FF_REG_a"), %%mm1   \n\t"
             "pxor %%mm7, %%mm0                \n\t"
             "paddsb %%mm1, %%mm0              \n\t"
             "pxor %%mm7, %%mm0                \n\t"
-            "movntq %%mm0, (%2, %%"REG_a")    \n\t"
-            "add $8, %%"REG_a"                \n\t"
+            "movntq %%mm0, (%2, %%"FF_REG_a") \n\t"
+            "add $8, %%"FF_REG_a"             \n\t"
             " js 1b                           \n\t"
             :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
-            : "%"REG_a
+            : "%"FF_REG_a
             );
     if (mmx_len != len)
         ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
diff --git a/libavutil/x86/asm.h b/libavutil/x86/asm.h
index 109b65e..9bff42d 100644
--- a/libavutil/x86/asm.h
+++ b/libavutil/x86/asm.h
@@ -28,46 +28,46 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg;
 typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg;
 
 #if ARCH_X86_64
-#    define OPSIZE "q"
-#    define REG_a "rax"
-#    define REG_b "rbx"
-#    define REG_c "rcx"
-#    define REG_d "rdx"
-#    define REG_D "rdi"
-#    define REG_S "rsi"
-#    define PTR_SIZE "8"
+#    define FF_OPSIZE "q"
+#    define FF_REG_a "rax"
+#    define FF_REG_b "rbx"
+#    define FF_REG_c "rcx"
+#    define FF_REG_d "rdx"
+#    define FF_REG_D "rdi"
+#    define FF_REG_S "rsi"
+#    define FF_PTR_SIZE "8"
 typedef int64_t x86_reg;
 
-/* REG_SP is defined in Solaris sys headers, so use REG_sp */
-#    define REG_sp "rsp"
-#    define REG_BP "rbp"
-#    define REGBP   rbp
-#    define REGa    rax
-#    define REGb    rbx
-#    define REGc    rcx
-#    define REGd    rdx
-#    define REGSP   rsp
+/* FF_REG_SP is defined in Solaris sys headers, so use FF_REG_sp */
+#    define FF_REG_sp "rsp"
+#    define FF_REG_BP "rbp"
+#    define FF_REGBP   rbp
+#    define FF_REGa    rax
+#    define FF_REGb    rbx
+#    define FF_REGc    rcx
+#    define FF_REGd    rdx
+#    define FF_REGSP   rsp
 
 #elif ARCH_X86_32
 
-#    define OPSIZE "l"
-#    define REG_a "eax"
-#    define REG_b "ebx"
-#    define REG_c "ecx"
-#    define REG_d "edx"
-#    define REG_D "edi"
-#    define REG_S "esi"
-#    define PTR_SIZE "4"
+#    define FF_OPSIZE "l"
+#    define FF_REG_a "eax"
+#    define FF_REG_b "ebx"
+#    define FF_REG_c "ecx"
+#    define FF_REG_d "edx"
+#    define FF_REG_D "edi"
+#    define FF_REG_S "esi"
+#    define FF_PTR_SIZE "4"
 typedef int32_t x86_reg;
 
-#    define REG_sp "esp"
-#    define REG_BP "ebp"
-#    define REGBP   ebp
-#    define REGa    eax
-#    define REGb    ebx
-#    define REGc    ecx
-#    define REGd    edx
-#    define REGSP   esp
+#    define FF_REG_sp "esp"
+#    define FF_REG_BP "ebp"
+#    define FF_REGBP   ebp
+#    define FF_REGa    eax
+#    define FF_REGb    ebx
+#    define FF_REGc    ecx
+#    define FF_REGd    edx
+#    define FF_REGSP   esp
 #else
 typedef int x86_reg;
 #endif
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index b9f239b..f3a49c6 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -41,9 +41,9 @@
 /* ebx saving is necessary for PIC. gcc seems unable to see it alone */
 #define cpuid(index, eax, ebx, ecx, edx)                        \
     __asm__ volatile (                                          \
-        "mov    %%"REG_b", %%"REG_S" \n\t"                      \
+        "mov    %%"FF_REG_b", %%"FF_REG_S" \n\t"                \
         "cpuid                       \n\t"                      \
-        "xchg   %%"REG_b", %%"REG_S                             \
+        "xchg   %%"FF_REG_b", %%"FF_REG_S                       \
         : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)        \
         : "0" (index), "2"(0))
 
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index b01be58..2a25ce4 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -118,12 +118,12 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
         );
 
     __asm__ volatile(
-        "lea (%2, %3), %%"REG_a"                \n\t"
+        "lea (%2, %3), %%"FF_REG_a"             \n\t"
 //      0       1       2       3       4       5       6       7       8       9
 //      %1      eax     eax+%2  eax+2%2 %1+4%2  ecx     ecx+%2  ecx+2%2 %1+8%2  ecx+4%2
 
         "movq (%2), %%mm0                       \n\t"
-        "movq (%%"REG_a"), %%mm1                \n\t"
+        "movq (%%"FF_REG_a"), %%mm1             \n\t"
         "movq %%mm0, %%mm3                      \n\t"
         "movq %%mm0, %%mm4                      \n\t"
         PMAXUB(%%mm1, %%mm4)
@@ -132,7 +132,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
         "paddb %%mm7, %%mm0                     \n\t"
         "pcmpgtb %%mm6, %%mm0                   \n\t"
 
-        "movq (%%"REG_a",%3), %%mm2             \n\t"
+        "movq (%%"FF_REG_a",%3), %%mm2          \n\t"
         PMAXUB(%%mm2, %%mm4)
         PMINUB(%%mm2, %%mm3, %%mm5)
         "psubb %%mm2, %%mm1                     \n\t"
@@ -140,7 +140,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
         "pcmpgtb %%mm6, %%mm1                   \n\t"
         "paddb %%mm1, %%mm0                     \n\t"
 
-        "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
+        "movq (%%"FF_REG_a", %3, 2), %%mm1      \n\t"
         PMAXUB(%%mm1, %%mm4)
         PMINUB(%%mm1, %%mm3, %%mm5)
         "psubb %%mm1, %%mm2                     \n\t"
@@ -148,7 +148,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
         "pcmpgtb %%mm6, %%mm2                   \n\t"
         "paddb %%mm2, %%mm0                     \n\t"
 
-        "lea (%%"REG_a", %3, 4), %%"REG_a"      \n\t"
+        "lea (%%"FF_REG_a", %3, 4), %%"FF_REG_a"\n\t"
 
         "movq (%2, %3, 4), %%mm2                \n\t"
         PMAXUB(%%mm2, %%mm4)
@@ -158,7 +158,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
         "pcmpgtb %%mm6, %%mm1                   \n\t"
         "paddb %%mm1, %%mm0                     \n\t"
 
-        "movq (%%"REG_a"), %%mm1                \n\t"
+        "movq (%%"FF_REG_a"), %%mm1             \n\t"
         PMAXUB(%%mm1, %%mm4)
         PMINUB(%%mm1, %%mm3, %%mm5)
         "psubb %%mm1, %%mm2                     \n\t"
@@ -166,7 +166,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
         "pcmpgtb %%mm6, %%mm2                   \n\t"
         "paddb %%mm2, %%mm0                     \n\t"
 
-        "movq (%%"REG_a", %3), %%mm2            \n\t"
+        "movq (%%"FF_REG_a", %3), %%mm2         \n\t"
         PMAXUB(%%mm2, %%mm4)
         PMINUB(%%mm2, %%mm3, %%mm5)
         "psubb %%mm2, %%mm1                     \n\t"
@@ -174,7 +174,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
         "pcmpgtb %%mm6, %%mm1                   \n\t"
         "paddb %%mm1, %%mm0                     \n\t"
 
-        "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
+        "movq (%%"FF_REG_a", %3, 2), %%mm1      \n\t"
         PMAXUB(%%mm1, %%mm4)
         PMINUB(%%mm1, %%mm3, %%mm5)
         "psubb %%mm1, %%mm2                     \n\t"
@@ -207,7 +207,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
 
         : "=r" (numEq), "=r" (dcOk)
         : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
-        : "%"REG_a
+        : "%"FF_REG_a
         );
 
     numEq= (-numEq) &0xFF;
@@ -248,9 +248,9 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
         "por %%mm2, %%mm6                       \n\t"// First Line to Filter
 
         "movq (%0, %1, 8), %%mm5                \n\t"
-        "lea (%0, %1, 4), %%"REG_a"             \n\t"
-        "lea (%0, %1, 8), %%"REG_c"             \n\t"
-        "sub %1, %%"REG_c"                      \n\t"
+        "lea (%0, %1, 4), %%"FF_REG_a"          \n\t"
+        "lea (%0, %1, 8), %%"FF_REG_c"          \n\t"
+        "sub %1, %%"FF_REG_c"                   \n\t"
         "add %1, %0                             \n\t" // %0 points to line 1 not 0
         "movq (%0, %1, 8), %%mm7                \n\t"
         "movq %%mm5, %%mm1                      \n\t"
@@ -279,7 +279,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
 
         "movq (%0, %1, 4), %%mm2                \n\t" //     1
         "movq %%mm2, %%mm5                      \n\t" //     1
-        PAVGB((%%REGa), %%mm2)                        //    11        /2
+        PAVGB((%%FF_REGa), %%mm2)                     //    11        /2
         PAVGB((%0, %1, 2), %%mm2)                     //   211        /4
         "movq %%mm2, %%mm3                      \n\t" //   211        /4
         "movq (%0), %%mm4                       \n\t" // 1
@@ -291,15 +291,15 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
         PAVGB(%%mm6, %%mm0)                           //1 1        /2
         "movq %%mm4, %%mm3                      \n\t" // 1
         PAVGB((%0,%1,2), %%mm3)                       // 1 1        /2
-        PAVGB((%%REGa,%1,2), %%mm5)                   //     11        /2
-        PAVGB((%%REGa), %%mm5)                        //    211 /4
+        PAVGB((%%FF_REGa,%1,2), %%mm5)                //     11        /2
+        PAVGB((%%FF_REGa), %%mm5)                     //    211 /4
         PAVGB(%%mm5, %%mm3)                           // 2 2211 /8
         PAVGB(%%mm0, %%mm3)                           //4242211 /16
         "movq %%mm3, (%0,%1)                    \n\t" //  X
         // mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9
         PAVGB(%%mm4, %%mm6)                                   //11        /2
-        "movq (%%"REG_c"), %%mm0                \n\t" //       1
-        PAVGB((%%REGa, %1, 2), %%mm0)                 //      11/2
+        "movq (%%"FF_REG_c"), %%mm0             \n\t" //       1
+        PAVGB((%%FF_REGa, %1, 2), %%mm0)              //      11/2
         "movq %%mm0, %%mm3                      \n\t" //      11/2
         PAVGB(%%mm1, %%mm0)                           //  2   11/4
         PAVGB(%%mm6, %%mm0)                           //222   11/8
@@ -307,17 +307,17 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
         "movq (%0, %1, 2), %%mm2                \n\t" //   1
         "movq %%mm0, (%0, %1, 2)                \n\t" //   X
         // mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9
-        "movq (%%"REG_a", %1, 4), %%mm0         \n\t" //        1
-        PAVGB((%%REGc), %%mm0)                        //       11        /2
+        "movq (%%"FF_REG_a", %1, 4), %%mm0      \n\t" //        1
+        PAVGB((%%FF_REGc), %%mm0)                     //       11        /2
         PAVGB(%%mm0, %%mm6)                           //11     11        /4
         PAVGB(%%mm1, %%mm4)                           // 11                /2
         PAVGB(%%mm2, %%mm1)                           //  11                /2
         PAVGB(%%mm1, %%mm6)                           //1122   11        /8
         PAVGB(%%mm5, %%mm6)                           //112242211        /16
-        "movq (%%"REG_a"), %%mm5                \n\t" //    1
-        "movq %%mm6, (%%"REG_a")                \n\t" //    X
+        "movq (%%"FF_REG_a"), %%mm5             \n\t" //    1
+        "movq %%mm6, (%%"FF_REG_a")             \n\t" //    X
         // mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9
-        "movq (%%"REG_a", %1, 4), %%mm6         \n\t" //        1
+        "movq (%%"FF_REG_a", %1, 4), %%mm6      \n\t" //        1
         PAVGB(%%mm7, %%mm6)                           //        11        /2
         PAVGB(%%mm4, %%mm6)                           // 11     11        /4
         PAVGB(%%mm3, %%mm6)                           // 11   2211        /8
@@ -330,29 +330,29 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
         PAVGB(%%mm7, %%mm1)                           //  11     2        /4
         PAVGB(%%mm4, %%mm5)                           //    11                /2
         PAVGB(%%mm5, %%mm0)                           //    11 11        /4
-        "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //      1
+        "movq (%%"FF_REG_a", %1, 2), %%mm6      \n\t" //      1
         PAVGB(%%mm6, %%mm1)                           //  11  4  2        /8
         PAVGB(%%mm0, %%mm1)                           //  11224222        /16
-        "movq %%mm1, (%%"REG_a", %1, 2)         \n\t" //      X
+        "movq %%mm1, (%%"FF_REG_a", %1, 2)      \n\t" //      X
         // mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9
-        PAVGB((%%REGc), %%mm2)                        //   112 4        /8
-        "movq (%%"REG_a", %1, 4), %%mm0         \n\t" //        1
+        PAVGB((%%FF_REGc), %%mm2)                     //   112 4        /8
+        "movq (%%"FF_REG_a", %1, 4), %%mm0      \n\t" //        1
         PAVGB(%%mm0, %%mm6)                           //      1 1        /2
         PAVGB(%%mm7, %%mm6)                           //      1 12        /4
         PAVGB(%%mm2, %%mm6)                           //   1122424        /4
-        "movq %%mm6, (%%"REG_c")                \n\t" //       X
+        "movq %%mm6, (%%"FF_REG_c")             \n\t" //       X
         // mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9
         PAVGB(%%mm7, %%mm5)                           //    11   2        /4
         PAVGB(%%mm7, %%mm5)                           //    11   6        /8
 
         PAVGB(%%mm3, %%mm0)                           //      112        /4
         PAVGB(%%mm0, %%mm5)                           //    112246        /16
-        "movq %%mm5, (%%"REG_a", %1, 4)         \n\t" //        X
+        "movq %%mm5, (%%"FF_REG_a", %1, 4)      \n\t" //        X
         "sub %1, %0                             \n\t"
 
         :
         : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
-        : "%"REG_a, "%"REG_c
+        : "%"FF_REG_a, "%"FF_REG_c
     );
 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
     const int l1= stride;
@@ -411,18 +411,18 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
 
     __asm__ volatile(
         "pxor %%mm7, %%mm7                      \n\t" // 0
-        "lea (%0, %1), %%"REG_a"                \n\t"
-        "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_c"\n\t"
 //      0       1       2       3       4       5       6       7       8       9
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %0+8%1  ecx+4%1
-        "movq (%%"REG_a", %1, 2), %%mm0         \n\t" // line 3
+        "movq (%%"FF_REG_a", %1, 2), %%mm0      \n\t" // line 3
         "movq (%0, %1, 4), %%mm1                \n\t" // line 4
         "movq %%mm1, %%mm2                      \n\t" // line 4
         "psubusb %%mm0, %%mm1                   \n\t"
         "psubusb %%mm2, %%mm0                   \n\t"
         "por %%mm1, %%mm0                       \n\t" // |l2 - l3|
-        "movq (%%"REG_c"), %%mm3                \n\t" // line 5
-        "movq (%%"REG_c", %1), %%mm4            \n\t" // line 6
+        "movq (%%"FF_REG_c"), %%mm3             \n\t" // line 5
+        "movq (%%"FF_REG_c", %1), %%mm4         \n\t" // line 6
         "movq %%mm3, %%mm5                      \n\t" // line 5
         "psubusb %%mm4, %%mm3                   \n\t"
         "psubusb %%mm5, %%mm4                   \n\t"
@@ -454,44 +454,44 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
         "pxor %%mm2, %%mm0                      \n\t"
         "movq %%mm0, (%0, %1, 4)                \n\t" // line 4
 
-        "movq (%%"REG_c"), %%mm0                \n\t" // line 5
+        "movq (%%"FF_REG_c"), %%mm0             \n\t" // line 5
         "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
         "paddusb %%mm3, %%mm0                   \n\t"
         "pxor %%mm2, %%mm0                      \n\t"
-        "movq %%mm0, (%%"REG_c")                \n\t" // line 5
+        "movq %%mm0, (%%"FF_REG_c")             \n\t" // line 5
 
         PAVGB(%%mm7, %%mm1)                           // d/4
 
-        "movq (%%"REG_a", %1, 2), %%mm0         \n\t" // line 3
+        "movq (%%"FF_REG_a", %1, 2), %%mm0      \n\t" // line 3
         "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
         "psubusb %%mm1, %%mm0                   \n\t"
         "pxor %%mm2, %%mm0                      \n\t"
-        "movq %%mm0, (%%"REG_a", %1, 2)         \n\t" // line 3
+        "movq %%mm0, (%%"FF_REG_a", %1, 2)      \n\t" // line 3
 
-        "movq (%%"REG_c", %1), %%mm0            \n\t" // line 6
+        "movq (%%"FF_REG_c", %1), %%mm0         \n\t" // line 6
         "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
         "paddusb %%mm1, %%mm0                   \n\t"
         "pxor %%mm2, %%mm0                      \n\t"
-        "movq %%mm0, (%%"REG_c", %1)            \n\t" // line 6
+        "movq %%mm0, (%%"FF_REG_c", %1)         \n\t" // line 6
 
         PAVGB(%%mm7, %%mm1)                           // d/8
 
-        "movq (%%"REG_a", %1), %%mm0            \n\t" // line 2
+        "movq (%%"FF_REG_a", %1), %%mm0         \n\t" // line 2
         "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l2-1 : l2
         "psubusb %%mm1, %%mm0                   \n\t"
         "pxor %%mm2, %%mm0                      \n\t"
-        "movq %%mm0, (%%"REG_a", %1)            \n\t" // line 2
+        "movq %%mm0, (%%"FF_REG_a", %1)         \n\t" // line 2
 
-        "movq (%%"REG_c", %1, 2), %%mm0         \n\t" // line 7
+        "movq (%%"FF_REG_c", %1, 2), %%mm0      \n\t" // line 7
         "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l7-1 : l7
         "paddusb %%mm1, %%mm0                   \n\t"
         "pxor %%mm2, %%mm0                      \n\t"
-        "movq %%mm0, (%%"REG_c", %1, 2)         \n\t" // line 7
+        "movq %%mm0, (%%"FF_REG_c", %1, 2)      \n\t" // line 7
 
         :
         : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
           NAMED_CONSTRAINTS_ADD(b01)
-        : "%"REG_a, "%"REG_c
+        : "%"FF_REG_a, "%"FF_REG_c
     );
 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
 
@@ -553,8 +553,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
 
 #if 0 //slightly more accurate and slightly slower
         "pxor %%mm7, %%mm7                      \n\t" // 0
-        "lea (%0, %1), %%"REG_a"                \n\t"
-        "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_c"\n\t"
 //      0       1       2       3       4       5       6       7
 //      %0      %0+%1   %0+2%1  eax+2%1 %0+4%1  eax+4%1 ecx+%1  ecx+2%1
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1
@@ -567,8 +567,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         PAVGB(%%mm1, %%mm0)                           // ~(l2 + 2l0)/4
         PAVGB(%%mm2, %%mm0)                           // ~(5l2 + 2l0)/8
 
-        "movq (%%"REG_a"), %%mm1                \n\t" // l1
-        "movq (%%"REG_a", %1, 2), %%mm3         \n\t" // l3
+        "movq (%%"FF_REG_a"), %%mm1             \n\t" // l1
+        "movq (%%"FF_REG_a", %1, 2), %%mm3      \n\t" // l3
         "movq %%mm1, %%mm4                      \n\t" // l1
         PAVGB(%%mm7, %%mm1)                           // ~l1/2
         PAVGB(%%mm3, %%mm1)                           // ~(l1 + 2l3)/4
@@ -586,7 +586,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         PAVGB(%%mm2, %%mm0)                           // ~(l4 + 2l2)/4
         PAVGB(%%mm4, %%mm0)                           // ~(5l4 + 2l2)/8
 
-        "movq (%%"REG_c"), %%mm2                \n\t" // l5
+        "movq (%%"FF_REG_c"), %%mm2             \n\t" // l5
         "movq %%mm3, %%mm5                      \n\t" // l3
         PAVGB(%%mm7, %%mm3)                           // ~l3/2
         PAVGB(%%mm2, %%mm3)                           // ~(l3 + 2l5)/4
@@ -599,13 +599,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "pcmpeqb %%mm7, %%mm0                   \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5)
 // mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0
 
-        "movq (%%"REG_c", %1), %%mm6            \n\t" // l6
+        "movq (%%"FF_REG_c", %1), %%mm6         \n\t" // l6
         "movq %%mm6, %%mm5                      \n\t" // l6
         PAVGB(%%mm7, %%mm6)                           // ~l6/2
         PAVGB(%%mm4, %%mm6)                           // ~(l6 + 2l4)/4
         PAVGB(%%mm5, %%mm6)                           // ~(5l6 + 2l4)/8
 
-        "movq (%%"REG_c", %1, 2), %%mm5         \n\t" // l7
+        "movq (%%"FF_REG_c", %1, 2), %%mm5      \n\t" // l7
         "movq %%mm2, %%mm4                      \n\t" // l5
         PAVGB(%%mm7, %%mm2)                           // ~l5/2
         PAVGB(%%mm5, %%mm2)                           // ~(l5 + 2l7)/4
@@ -632,7 +632,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "paddusb %%mm1, %%mm3                   \n\t"
 //        "paddusb "MANGLE(b01)", %%mm3           \n\t"
 
-        "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //l3
+        "movq (%%"FF_REG_a", %1, 2), %%mm6      \n\t" //l3
         "movq (%0, %1, 4), %%mm5                \n\t" //l4
         "movq (%0, %1, 4), %%mm4                \n\t" //l4
         "psubusb %%mm6, %%mm5                   \n\t"
@@ -646,7 +646,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "psubusb "MANGLE(b01)", %%mm3           \n\t"
         PAVGB(%%mm7, %%mm3)
 
-        "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
+        "movq (%%"FF_REG_a", %1, 2), %%mm0      \n\t"
         "movq (%0, %1, 4), %%mm2                \n\t"
         "pxor %%mm6, %%mm0                      \n\t"
         "pxor %%mm6, %%mm2                      \n\t"
@@ -654,36 +654,36 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "paddb %%mm3, %%mm2                     \n\t"
         "pxor %%mm6, %%mm0                      \n\t"
         "pxor %%mm6, %%mm2                      \n\t"
-        "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
+        "movq %%mm0, (%%"FF_REG_a", %1, 2)      \n\t"
         "movq %%mm2, (%0, %1, 4)                \n\t"
 #endif //0
 
-        "lea (%0, %1), %%"REG_a"                \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
         "pcmpeqb %%mm6, %%mm6                   \n\t" // -1
 //      0       1       2       3       4       5       6       7
 //      %0      %0+%1   %0+2%1  eax+2%1 %0+4%1  eax+4%1 ecx+%1  ecx+2%1
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1
 
 
-        "movq (%%"REG_a", %1, 2), %%mm1         \n\t" // l3
+        "movq (%%"FF_REG_a", %1, 2), %%mm1      \n\t" // l3
         "movq (%0, %1, 4), %%mm0                \n\t" // l4
         "pxor %%mm6, %%mm1                      \n\t" // -l3-1
         PAVGB(%%mm1, %%mm0)                           // -q+128 = (l4-l3+256)/2
 // mm1=-l3-1, mm0=128-q
 
-        "movq (%%"REG_a", %1, 4), %%mm2         \n\t" // l5
-        "movq (%%"REG_a", %1), %%mm3            \n\t" // l2
+        "movq (%%"FF_REG_a", %1, 4), %%mm2      \n\t" // l5
+        "movq (%%"FF_REG_a", %1), %%mm3         \n\t" // l2
         "pxor %%mm6, %%mm2                      \n\t" // -l5-1
         "movq %%mm2, %%mm5                      \n\t" // -l5-1
         "movq "MANGLE(b80)", %%mm4              \n\t" // 128
-        "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_c"\n\t"
         PAVGB(%%mm3, %%mm2)                           // (l2-l5+256)/2
         PAVGB(%%mm0, %%mm4)                           // ~(l4-l3)/4 + 128
         PAVGB(%%mm2, %%mm4)                           // ~(l2-l5)/4 +(l4-l3)/8 + 128
         PAVGB(%%mm0, %%mm4)                           // ~(l2-l5)/8 +5(l4-l3)/16 + 128
 // mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1
 
-        "movq (%%"REG_a"), %%mm2                \n\t" // l1
+        "movq (%%"FF_REG_a"), %%mm2             \n\t" // l1
         "pxor %%mm6, %%mm2                      \n\t" // -l1-1
         PAVGB(%%mm3, %%mm2)                           // (l2-l1+256)/2
         PAVGB((%0), %%mm1)                            // (l0-l3+256)/2
@@ -693,8 +693,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         PAVGB(%%mm2, %%mm3)                           // ~(l0-l3)/8 +5(l2-l1)/16 + 128
 // mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1
 
-        PAVGB((%%REGc, %1), %%mm5)                    // (l6-l5+256)/2
-        "movq (%%"REG_c", %1, 2), %%mm1         \n\t" // l7
+        PAVGB((%%FF_REGc, %1), %%mm5)                 // (l6-l5+256)/2
+        "movq (%%"FF_REG_c", %1, 2), %%mm1      \n\t" // l7
         "pxor %%mm6, %%mm1                      \n\t" // -l7-1
         PAVGB((%0, %1, 4), %%mm1)                     // (l4-l7+256)/2
         "movq "MANGLE(b80)", %%mm2              \n\t" // 128
@@ -743,7 +743,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "pxor %%mm1, %%mm7                      \n\t" // SIGN(d*q)
 
         "pand %%mm7, %%mm4                      \n\t"
-        "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
+        "movq (%%"FF_REG_a", %1, 2), %%mm0      \n\t"
         "movq (%0, %1, 4), %%mm2                \n\t"
         "pxor %%mm1, %%mm0                      \n\t"
         "pxor %%mm1, %%mm2                      \n\t"
@@ -751,13 +751,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "psubb %%mm4, %%mm2                     \n\t"
         "pxor %%mm1, %%mm0                      \n\t"
         "pxor %%mm1, %%mm2                      \n\t"
-        "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
+        "movq %%mm0, (%%"FF_REG_a", %1, 2)      \n\t"
         "movq %%mm2, (%0, %1, 4)                \n\t"
 
         :
         : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
           NAMED_CONSTRAINTS_ADD(b80,b00,b01)
-        : "%"REG_a, "%"REG_c
+        : "%"FF_REG_a, "%"FF_REG_c
     );
 
 /*
@@ -830,12 +830,12 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "punpckhbw %%mm7, %%mm1                 \n\t" // high part of line 0
 
         "movq (%0, %1), %%mm2                   \n\t"
-        "lea (%0, %1, 2), %%"REG_a"             \n\t"
+        "lea (%0, %1, 2), %%"FF_REG_a"          \n\t"
         "movq %%mm2, %%mm3                      \n\t"
         "punpcklbw %%mm7, %%mm2                 \n\t" // low part of line 1
         "punpckhbw %%mm7, %%mm3                 \n\t" // high part of line 1
 
-        "movq (%%"REG_a"), %%mm4                \n\t"
+        "movq (%%"FF_REG_a"), %%mm4             \n\t"
         "movq %%mm4, %%mm5                      \n\t"
         "punpcklbw %%mm7, %%mm4                 \n\t" // low part of line 2
         "punpckhbw %%mm7, %%mm5                 \n\t" // high part of line 2
@@ -852,7 +852,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2
         "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2
 
-        "movq (%%"REG_a", %1), %%mm2            \n\t"
+        "movq (%%"FF_REG_a", %1), %%mm2         \n\t"
         "movq %%mm2, %%mm3                      \n\t"
         "punpcklbw %%mm7, %%mm2                 \n\t" // L3
         "punpckhbw %%mm7, %%mm3                 \n\t" // H3
@@ -864,7 +864,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "movq %%mm0, (%3)                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3
         "movq %%mm1, 8(%3)                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3
 
-        "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
+        "movq (%%"FF_REG_a", %1, 2), %%mm0      \n\t"
         "movq %%mm0, %%mm1                      \n\t"
         "punpcklbw %%mm7, %%mm0                 \n\t" // L4
         "punpckhbw %%mm7, %%mm1                 \n\t" // H4
@@ -878,7 +878,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "psubw %%mm2, %%mm4                     \n\t" // 2L2 - L3 + L4
         "psubw %%mm3, %%mm5                     \n\t" // 2H2 - H3 + H4
 
-        "lea (%%"REG_a", %1), %0                \n\t"
+        "lea (%%"FF_REG_a", %1), %0             \n\t"
         "psllw $2, %%mm2                        \n\t" // 4L3 - 4L4
         "psllw $2, %%mm3                        \n\t" // 4H3 - 4H4
         "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4
@@ -893,10 +893,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4 - 2L5
         "psubw %%mm3, %%mm5                     \n\t" // 2H2 - 5H3 + 5H4 - 2H5
 
-        "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
+        "movq (%%"FF_REG_a", %1, 4), %%mm6      \n\t"
         "punpcklbw %%mm7, %%mm6                 \n\t" // L6
         "psubw %%mm6, %%mm2                     \n\t" // L5 - L6
-        "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
+        "movq (%%"FF_REG_a", %1, 4), %%mm6      \n\t"
         "punpckhbw %%mm7, %%mm6                 \n\t" // H6
         "psubw %%mm6, %%mm3                     \n\t" // H5 - H6
 
@@ -1045,7 +1045,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
         : "+r" (src)
         : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
           NAMED_CONSTRAINTS_ADD(w05,w20)
-        : "%"REG_a
+        : "%"FF_REG_a
     );
 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
     const int l1= stride;
@@ -1104,8 +1104,8 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
         "packuswb %%mm0, %%mm0                  \n\t"
         "movq %%mm0, %3                         \n\t"
 
-        "lea (%0, %1), %%"REG_a"                \n\t"
-        "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t"
 
 //        0        1        2        3        4        5        6        7        8        9
 //        %0        eax        eax+%1        eax+2%1        %0+4%1        edx        edx+%1        edx+2%1        %0+8%1        edx+4%1
@@ -1128,13 +1128,13 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
 #endif
 #define FIND_MIN_MAX(addr)  REAL_FIND_MIN_MAX(addr)
 
-FIND_MIN_MAX((%%REGa))
-FIND_MIN_MAX((%%REGa, %1))
-FIND_MIN_MAX((%%REGa, %1, 2))
+FIND_MIN_MAX((%%FF_REGa))
+FIND_MIN_MAX((%%FF_REGa, %1))
+FIND_MIN_MAX((%%FF_REGa, %1, 2))
 FIND_MIN_MAX((%0, %1, 4))
-FIND_MIN_MAX((%%REGd))
-FIND_MIN_MAX((%%REGd, %1))
-FIND_MIN_MAX((%%REGd, %1, 2))
+FIND_MIN_MAX((%%FF_REGd))
+FIND_MIN_MAX((%%FF_REGd, %1))
+FIND_MIN_MAX((%%FF_REGd, %1, 2))
 FIND_MIN_MAX((%0, %1, 8))
 
         "movq %%mm7, %%mm4                      \n\t"
@@ -1218,13 +1218,13 @@ FIND_MIN_MAX((%0, %1, 8))
         "paddb %%mm2, %%mm0                     \n\t"
         "paddb %%mm3, %%mm0                     \n\t"
 
-        "movq (%%"REG_a"), %%mm2                \n\t" // L11
+        "movq (%%"FF_REG_a"), %%mm2             \n\t" // L11
         "movq %%mm2, %%mm3                      \n\t" // L11
         "movq %%mm2, %%mm4                      \n\t" // L11
         "psllq $8, %%mm3                        \n\t"
         "psrlq $8, %%mm4                        \n\t"
-        "movd -4(%%"REG_a"), %%mm5              \n\t"
-        "movd 8(%%"REG_a"), %%mm6               \n\t"
+        "movd -4(%%"FF_REG_a"), %%mm5           \n\t"
+        "movd 8(%%"FF_REG_a"), %%mm6            \n\t"
         "psrlq $24, %%mm5                       \n\t"
         "psllq $56, %%mm6                       \n\t"
         "por %%mm5, %%mm3                       \n\t" // L01
@@ -1304,20 +1304,20 @@ FIND_MIN_MAX((%0, %1, 8))
 1110111
 
 */
-//DERING_CORE(dst          ,src            ,ppsx ,psx  ,sx   ,pplx ,plx  ,lx   ,t0   ,t1)
-DERING_CORE((%%REGa)       ,(%%REGa, %1)   ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
-DERING_CORE((%%REGa, %1)   ,(%%REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
-DERING_CORE((%%REGa, %1, 2),(%0, %1, 4)    ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
-DERING_CORE((%0, %1, 4)    ,(%%REGd)       ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
-DERING_CORE((%%REGd)       ,(%%REGd, %1)   ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
-DERING_CORE((%%REGd, %1)   ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
-DERING_CORE((%%REGd, %1, 2),(%0, %1, 8)    ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
-DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
+//DERING_CORE(dst             ,src               ,ppsx ,psx  ,sx   ,pplx ,plx  ,lx   ,t0   ,t1)
+DERING_CORE((%%FF_REGa)       ,(%%FF_REGa, %1)   ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
+DERING_CORE((%%FF_REGa, %1)   ,(%%FF_REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
+DERING_CORE((%%FF_REGa, %1, 2),(%0, %1, 4)       ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
+DERING_CORE((%0, %1, 4)       ,(%%FF_REGd)       ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
+DERING_CORE((%%FF_REGd)       ,(%%FF_REGd, %1)   ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
+DERING_CORE((%%FF_REGd, %1)   ,(%%FF_REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
+DERING_CORE((%%FF_REGd, %1, 2),(%0, %1, 8)       ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
+DERING_CORE((%0, %1, 8)       ,(%%FF_REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
 
         "1:                        \n\t"
         : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
           NAMED_CONSTRAINTS_ADD(deringThreshold,b00,b02,b08)
-        : "%"REG_a, "%"REG_d, "%"REG_sp
+        : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_sp
     );
 #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
     int y;
@@ -1452,27 +1452,27 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
     src+= 4*stride;
     __asm__ volatile(
-        "lea (%0, %1), %%"REG_a"                \n\t"
-        "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_c"\n\t"
 //      0       1       2       3       4       5       6       7       8       9
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %0+8%1  ecx+4%1
 
         "movq (%0), %%mm0                       \n\t"
-        "movq (%%"REG_a", %1), %%mm1            \n\t"
+        "movq (%%"FF_REG_a", %1), %%mm1         \n\t"
         PAVGB(%%mm1, %%mm0)
-        "movq %%mm0, (%%"REG_a")                \n\t"
+        "movq %%mm0, (%%"FF_REG_a")             \n\t"
         "movq (%0, %1, 4), %%mm0                \n\t"
         PAVGB(%%mm0, %%mm1)
-        "movq %%mm1, (%%"REG_a", %1, 2)         \n\t"
-        "movq (%%"REG_c", %1), %%mm1            \n\t"
+        "movq %%mm1, (%%"FF_REG_a", %1, 2)      \n\t"
+        "movq (%%"FF_REG_c", %1), %%mm1         \n\t"
         PAVGB(%%mm1, %%mm0)
-        "movq %%mm0, (%%"REG_c")                \n\t"
+        "movq %%mm0, (%%"FF_REG_c")             \n\t"
         "movq (%0, %1, 8), %%mm0                \n\t"
         PAVGB(%%mm0, %%mm1)
-        "movq %%mm1, (%%"REG_c", %1, 2)         \n\t"
+        "movq %%mm1, (%%"FF_REG_c", %1, 2)      \n\t"
 
         : : "r" (src), "r" ((x86_reg)stride)
-        : "%"REG_a, "%"REG_c
+        : "%"FF_REG_a, "%"FF_REG_c
     );
 #else
     int a, b, x;
@@ -1505,10 +1505,10 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride
 #if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
     src+= stride*3;
     __asm__ volatile(
-        "lea (%0, %1), %%"REG_a"                \n\t"
-        "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
-        "lea (%%"REG_d", %1, 4), %%"REG_c"      \n\t"
-        "add %1, %%"REG_c"                      \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t"
+        "lea (%%"FF_REG_d", %1, 4), %%"FF_REG_c"\n\t"
+        "add %1, %%"FF_REG_c"                   \n\t"
 #if TEMPLATE_PP_SSE2
         "pxor %%xmm7, %%xmm7                    \n\t"
 #define REAL_DEINT_CUBIC(a,b,c,d,e)\
@@ -1554,17 +1554,17 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride
 #endif //TEMPLATE_PP_SSE2
 #define DEINT_CUBIC(a,b,c,d,e)  REAL_DEINT_CUBIC(a,b,c,d,e)
 
-DEINT_CUBIC((%0)        , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1))
-DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd)       , (%%REGd, %1), (%0, %1, 8))
-DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc))
-DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc, %1, 2))
+DEINT_CUBIC((%0)           , (%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4)    , (%%FF_REGd, %1))
+DEINT_CUBIC((%%FF_REGa, %1), (%0, %1, 4)    , (%%FF_REGd)       , (%%FF_REGd, %1), (%0, %1, 8))
+DEINT_CUBIC((%0, %1, 4)    , (%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8)    , (%%FF_REGc))
+DEINT_CUBIC((%%FF_REGd, %1), (%0, %1, 8)    , (%%FF_REGd, %1, 4), (%%FF_REGc)    , (%%FF_REGc, %1, 2))
 
         : : "r" (src), "r" ((x86_reg)stride)
         :
 #if TEMPLATE_PP_SSE2
         XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm7",)
 #endif
-        "%"REG_a, "%"REG_d, "%"REG_c
+        "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_c
     );
 #undef REAL_DEINT_CUBIC
 #else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
@@ -1592,8 +1592,8 @@ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp
 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
     src+= stride*4;
     __asm__ volatile(
-        "lea (%0, %1), %%"REG_a"                \n\t"
-        "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t"
         "pxor %%mm7, %%mm7                      \n\t"
         "movq (%2), %%mm0                       \n\t"
 //      0       1       2       3       4       5       6       7       8       9       10
@@ -1629,14 +1629,14 @@ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp
 
 #define DEINT_FF(a,b,c,d)  REAL_DEINT_FF(a,b,c,d)
 
-DEINT_FF((%0)        , (%%REGa)       , (%%REGa, %1), (%%REGa, %1, 2))
-DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd)       )
-DEINT_FF((%0, %1, 4) , (%%REGd)       , (%%REGd, %1), (%%REGd, %1, 2))
-DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
+DEINT_FF((%0)           , (%%FF_REGa)       , (%%FF_REGa, %1), (%%FF_REGa, %1, 2))
+DEINT_FF((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4)    , (%%FF_REGd)       )
+DEINT_FF((%0, %1, 4)    , (%%FF_REGd)       , (%%FF_REGd, %1), (%%FF_REGd, %1, 2))
+DEINT_FF((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8)    , (%%FF_REGd, %1, 4))
 
         "movq %%mm0, (%2)                       \n\t"
         : : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
-        : "%"REG_a, "%"REG_d
+        : "%"FF_REG_a, "%"FF_REG_d
     );
 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
     int x;
@@ -1671,8 +1671,8 @@ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp
 #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
     src+= stride*4;
     __asm__ volatile(
-        "lea (%0, %1), %%"REG_a"                \n\t"
-        "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t"
         "pxor %%mm7, %%mm7                      \n\t"
         "movq (%2), %%mm0                       \n\t"
         "movq (%3), %%mm1                       \n\t"
@@ -1714,19 +1714,19 @@ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp
 
 #define DEINT_L5(t1,t2,a,b,c)  REAL_DEINT_L5(t1,t2,a,b,c)
 
-DEINT_L5(%%mm0, %%mm1, (%0)           , (%%REGa)       , (%%REGa, %1)   )
-DEINT_L5(%%mm1, %%mm0, (%%REGa)       , (%%REGa, %1)   , (%%REGa, %1, 2))
-DEINT_L5(%%mm0, %%mm1, (%%REGa, %1)   , (%%REGa, %1, 2), (%0, %1, 4)   )
-DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4)    , (%%REGd)       )
-DEINT_L5(%%mm0, %%mm1, (%0, %1, 4)    , (%%REGd)       , (%%REGd, %1)   )
-DEINT_L5(%%mm1, %%mm0, (%%REGd)       , (%%REGd, %1)   , (%%REGd, %1, 2))
-DEINT_L5(%%mm0, %%mm1, (%%REGd, %1)   , (%%REGd, %1, 2), (%0, %1, 8)   )
-DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))
+DEINT_L5(%%mm0, %%mm1, (%0)              , (%%FF_REGa)       , (%%FF_REGa, %1)   )
+DEINT_L5(%%mm1, %%mm0, (%%FF_REGa)       , (%%FF_REGa, %1)   , (%%FF_REGa, %1, 2))
+DEINT_L5(%%mm0, %%mm1, (%%FF_REGa, %1)   , (%%FF_REGa, %1, 2), (%0, %1, 4)   )
+DEINT_L5(%%mm1, %%mm0, (%%FF_REGa, %1, 2), (%0, %1, 4)       , (%%FF_REGd)       )
+DEINT_L5(%%mm0, %%mm1, (%0, %1, 4)       , (%%FF_REGd)       , (%%FF_REGd, %1)   )
+DEINT_L5(%%mm1, %%mm0, (%%FF_REGd)       , (%%FF_REGd, %1)   , (%%FF_REGd, %1, 2))
+DEINT_L5(%%mm0, %%mm1, (%%FF_REGd, %1)   , (%%FF_REGd, %1, 2), (%0, %1, 8)   )
+DEINT_L5(%%mm1, %%mm0, (%%FF_REGd, %1, 2), (%0, %1, 8)       , (%%FF_REGd, %1, 4))
 
         "movq %%mm0, (%2)                       \n\t"
         "movq %%mm1, (%3)                       \n\t"
         : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
-        : "%"REG_a, "%"REG_d
+        : "%"FF_REG_a, "%"FF_REG_d
     );
 #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
     int x;
@@ -1772,49 +1772,49 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
     src+= 4*stride;
     __asm__ volatile(
-        "lea (%0, %1), %%"REG_a"                \n\t"
-        "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t"
 //      0       1       2       3       4       5       6       7       8       9
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1
 
         "movq (%2), %%mm0                       \n\t" // L0
-        "movq (%%"REG_a"), %%mm1                \n\t" // L2
+        "movq (%%"FF_REG_a"), %%mm1             \n\t" // L2
         PAVGB(%%mm1, %%mm0)                           // L0+L2
         "movq (%0), %%mm2                       \n\t" // L1
         PAVGB(%%mm2, %%mm0)
         "movq %%mm0, (%0)                       \n\t"
-        "movq (%%"REG_a", %1), %%mm0            \n\t" // L3
+        "movq (%%"FF_REG_a", %1), %%mm0         \n\t" // L3
         PAVGB(%%mm0, %%mm2)                           // L1+L3
         PAVGB(%%mm1, %%mm2)                           // 2L2 + L1 + L3
-        "movq %%mm2, (%%"REG_a")                \n\t"
-        "movq (%%"REG_a", %1, 2), %%mm2         \n\t" // L4
+        "movq %%mm2, (%%"FF_REG_a")             \n\t"
+        "movq (%%"FF_REG_a", %1, 2), %%mm2      \n\t" // L4
         PAVGB(%%mm2, %%mm1)                           // L2+L4
         PAVGB(%%mm0, %%mm1)                           // 2L3 + L2 + L4
-        "movq %%mm1, (%%"REG_a", %1)            \n\t"
+        "movq %%mm1, (%%"FF_REG_a", %1)         \n\t"
         "movq (%0, %1, 4), %%mm1                \n\t" // L5
         PAVGB(%%mm1, %%mm0)                           // L3+L5
         PAVGB(%%mm2, %%mm0)                           // 2L4 + L3 + L5
-        "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
-        "movq (%%"REG_d"), %%mm0                \n\t" // L6
+        "movq %%mm0, (%%"FF_REG_a", %1, 2)      \n\t"
+        "movq (%%"FF_REG_d"), %%mm0             \n\t" // L6
         PAVGB(%%mm0, %%mm2)                           // L4+L6
         PAVGB(%%mm1, %%mm2)                           // 2L5 + L4 + L6
         "movq %%mm2, (%0, %1, 4)                \n\t"
-        "movq (%%"REG_d", %1), %%mm2            \n\t" // L7
+        "movq (%%"FF_REG_d", %1), %%mm2         \n\t" // L7
         PAVGB(%%mm2, %%mm1)                           // L5+L7
         PAVGB(%%mm0, %%mm1)                           // 2L6 + L5 + L7
-        "movq %%mm1, (%%"REG_d")                \n\t"
-        "movq (%%"REG_d", %1, 2), %%mm1         \n\t" // L8
+        "movq %%mm1, (%%"FF_REG_d")             \n\t"
+        "movq (%%"FF_REG_d", %1, 2), %%mm1      \n\t" // L8
         PAVGB(%%mm1, %%mm0)                           // L6+L8
         PAVGB(%%mm2, %%mm0)                           // 2L7 + L6 + L8
-        "movq %%mm0, (%%"REG_d", %1)            \n\t"
+        "movq %%mm0, (%%"FF_REG_d", %1)         \n\t"
         "movq (%0, %1, 8), %%mm0                \n\t" // L9
         PAVGB(%%mm0, %%mm2)                           // L7+L9
         PAVGB(%%mm1, %%mm2)                           // 2L8 + L7 + L9
-        "movq %%mm2, (%%"REG_d", %1, 2)         \n\t"
+        "movq %%mm2, (%%"FF_REG_d", %1, 2)      \n\t"
         "movq %%mm1, (%2)                       \n\t"
 
         : : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
-        : "%"REG_a, "%"REG_d
+        : "%"FF_REG_a, "%"FF_REG_d
     );
 #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
     int a, b, c, x;
@@ -1874,57 +1874,57 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
     src+= 4*stride;
 #if TEMPLATE_PP_MMXEXT
     __asm__ volatile(
-        "lea (%0, %1), %%"REG_a"                \n\t"
-        "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t"
 //      0       1       2       3       4       5       6       7       8       9
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1
 
         "movq (%0), %%mm0                       \n\t"
-        "movq (%%"REG_a", %1), %%mm2            \n\t"
-        "movq (%%"REG_a"), %%mm1                \n\t"
+        "movq (%%"FF_REG_a", %1), %%mm2         \n\t"
+        "movq (%%"FF_REG_a"), %%mm1             \n\t"
         "movq %%mm0, %%mm3                      \n\t"
         "pmaxub %%mm1, %%mm0                    \n\t"
         "pminub %%mm3, %%mm1                    \n\t"
         "pmaxub %%mm2, %%mm1                    \n\t"
         "pminub %%mm1, %%mm0                    \n\t"
-        "movq %%mm0, (%%"REG_a")                \n\t"
+        "movq %%mm0, (%%"FF_REG_a")             \n\t"
 
         "movq (%0, %1, 4), %%mm0                \n\t"
-        "movq (%%"REG_a", %1, 2), %%mm1         \n\t"
+        "movq (%%"FF_REG_a", %1, 2), %%mm1      \n\t"
         "movq %%mm2, %%mm3                      \n\t"
         "pmaxub %%mm1, %%mm2                    \n\t"
         "pminub %%mm3, %%mm1                    \n\t"
         "pmaxub %%mm0, %%mm1                    \n\t"
         "pminub %%mm1, %%mm2                    \n\t"
-        "movq %%mm2, (%%"REG_a", %1, 2)         \n\t"
+        "movq %%mm2, (%%"FF_REG_a", %1, 2)      \n\t"
 
-        "movq (%%"REG_d"), %%mm2                \n\t"
-        "movq (%%"REG_d", %1), %%mm1            \n\t"
+        "movq (%%"FF_REG_d"), %%mm2             \n\t"
+        "movq (%%"FF_REG_d", %1), %%mm1         \n\t"
         "movq %%mm2, %%mm3                      \n\t"
         "pmaxub %%mm0, %%mm2                    \n\t"
         "pminub %%mm3, %%mm0                    \n\t"
         "pmaxub %%mm1, %%mm0                    \n\t"
         "pminub %%mm0, %%mm2                    \n\t"
-        "movq %%mm2, (%%"REG_d")                \n\t"
+        "movq %%mm2, (%%"FF_REG_d")             \n\t"
 
-        "movq (%%"REG_d", %1, 2), %%mm2         \n\t"
+        "movq (%%"FF_REG_d", %1, 2), %%mm2      \n\t"
         "movq (%0, %1, 8), %%mm0                \n\t"
         "movq %%mm2, %%mm3                      \n\t"
         "pmaxub %%mm0, %%mm2                    \n\t"
         "pminub %%mm3, %%mm0                    \n\t"
         "pmaxub %%mm1, %%mm0                    \n\t"
         "pminub %%mm0, %%mm2                    \n\t"
-        "movq %%mm2, (%%"REG_d", %1, 2)         \n\t"
+        "movq %%mm2, (%%"FF_REG_d", %1, 2)      \n\t"
 
 
         : : "r" (src), "r" ((x86_reg)stride)
-        : "%"REG_a, "%"REG_d
+        : "%"FF_REG_a, "%"FF_REG_d
     );
 
 #else // MMX without MMX2
     __asm__ volatile(
-        "lea (%0, %1), %%"REG_a"                \n\t"
-        "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t"
 //      0       1       2       3       4       5       6       7       8       9
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1
         "pxor %%mm7, %%mm7                      \n\t"
@@ -1954,13 +1954,13 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
         "movq %%mm0, " #b "                     \n\t"
 #define MEDIAN(a,b,c)  REAL_MEDIAN(a,b,c)
 
-MEDIAN((%0)        , (%%REGa)       , (%%REGa, %1))
-MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
-MEDIAN((%0, %1, 4) , (%%REGd)       , (%%REGd, %1))
-MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
+MEDIAN((%0)           , (%%FF_REGa)       , (%%FF_REGa, %1))
+MEDIAN((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4))
+MEDIAN((%0, %1, 4)    , (%%FF_REGd)       , (%%FF_REGd, %1))
+MEDIAN((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8))
 
         : : "r" (src), "r" ((x86_reg)stride)
-        : "%"REG_a, "%"REG_d
+        : "%"FF_REG_a, "%"FF_REG_d
     );
 #endif //TEMPLATE_PP_MMXEXT
 #else //TEMPLATE_PP_MMX
@@ -1992,17 +1992,17 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_t *src, int srcStride)
 {
     __asm__(
-        "lea (%0, %1), %%"REG_a"                \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
 //      0       1       2       3       4       5       6       7       8       9
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1
         "movq (%0), %%mm0                       \n\t" // 12345678
-        "movq (%%"REG_a"), %%mm1                \n\t" // abcdefgh
+        "movq (%%"FF_REG_a"), %%mm1             \n\t" // abcdefgh
         "movq %%mm0, %%mm2                      \n\t" // 12345678
         "punpcklbw %%mm1, %%mm0                 \n\t" // 1a2b3c4d
         "punpckhbw %%mm1, %%mm2                 \n\t" // 5e6f7g8h
 
-        "movq (%%"REG_a", %1), %%mm1            \n\t"
-        "movq (%%"REG_a", %1, 2), %%mm3         \n\t"
+        "movq (%%"FF_REG_a", %1), %%mm1         \n\t"
+        "movq (%%"FF_REG_a", %1, 2), %%mm3      \n\t"
         "movq %%mm1, %%mm4                      \n\t"
         "punpcklbw %%mm3, %%mm1                 \n\t"
         "punpckhbw %%mm3, %%mm4                 \n\t"
@@ -2029,16 +2029,16 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_
         "psrlq $32, %%mm1                       \n\t"
         "movd %%mm1, 112(%3)                    \n\t"
 
-        "lea (%%"REG_a", %1, 4), %%"REG_a"      \n\t"
+        "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_a"\n\t"
 
         "movq (%0, %1, 4), %%mm0                \n\t" // 12345678
-        "movq (%%"REG_a"), %%mm1                \n\t" // abcdefgh
+        "movq (%%"FF_REG_a"), %%mm1             \n\t" // abcdefgh
         "movq %%mm0, %%mm2                      \n\t" // 12345678
         "punpcklbw %%mm1, %%mm0                 \n\t" // 1a2b3c4d
         "punpckhbw %%mm1, %%mm2                 \n\t" // 5e6f7g8h
 
-        "movq (%%"REG_a", %1), %%mm1            \n\t"
-        "movq (%%"REG_a", %1, 2), %%mm3         \n\t"
+        "movq (%%"FF_REG_a", %1), %%mm1         \n\t"
+        "movq (%%"FF_REG_a", %1, 2), %%mm3      \n\t"
         "movq %%mm1, %%mm4                      \n\t"
         "punpcklbw %%mm3, %%mm1                 \n\t"
         "punpckhbw %%mm3, %%mm4                 \n\t"
@@ -2067,7 +2067,7 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_
 
 
         :: "r" (src), "r" ((x86_reg)srcStride), "r" (dst1), "r" (dst2)
-        : "%"REG_a
+        : "%"FF_REG_a
     );
 }
 
@@ -2077,8 +2077,8 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_
 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, const uint8_t *src)
 {
     __asm__(
-        "lea (%0, %1), %%"REG_a"                \n\t"
-        "lea (%%"REG_a",%1,4), %%"REG_d"        \n\t"
+        "lea (%0, %1), %%"FF_REG_a"             \n\t"
+        "lea (%%"FF_REG_a",%1,4), %%"FF_REG_d"  \n\t"
 //      0       1       2       3       4       5       6       7       8       9
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1
         "movq (%2), %%mm0                       \n\t" // 12345678
@@ -2102,16 +2102,16 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, const uint8_t
 
         "movd %%mm0, (%0)                       \n\t"
         "psrlq $32, %%mm0                       \n\t"
-        "movd %%mm0, (%%"REG_a")                \n\t"
-        "movd %%mm3, (%%"REG_a", %1)            \n\t"
+        "movd %%mm0, (%%"FF_REG_a")             \n\t"
+        "movd %%mm3, (%%"FF_REG_a", %1)         \n\t"
         "psrlq $32, %%mm3                       \n\t"
-        "movd %%mm3, (%%"REG_a", %1, 2)         \n\t"
+        "movd %%mm3, (%%"FF_REG_a", %1, 2)      \n\t"
         "movd %%mm2, (%0, %1, 4)                \n\t"
         "psrlq $32, %%mm2                       \n\t"
-        "movd %%mm2, (%%"REG_d")                \n\t"
-        "movd %%mm1, (%%"REG_d", %1)            \n\t"
+        "movd %%mm2, (%%"FF_REG_d")             \n\t"
+        "movd %%mm1, (%%"FF_REG_d", %1)         \n\t"
         "psrlq $32, %%mm1                       \n\t"
-        "movd %%mm1, (%%"REG_d", %1, 2)         \n\t"
+        "movd %%mm1, (%%"FF_REG_d", %1, 2)      \n\t"
 
 
         "movq 64(%2), %%mm0                     \n\t" // 12345678
@@ -2135,19 +2135,19 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, const uint8_t
 
         "movd %%mm0, 4(%0)                      \n\t"
         "psrlq $32, %%mm0                       \n\t"
-        "movd %%mm0, 4(%%"REG_a")               \n\t"
-        "movd %%mm3, 4(%%"REG_a", %1)           \n\t"
+        "movd %%mm0, 4(%%"FF_REG_a")            \n\t"
+        "movd %%mm3, 4(%%"FF_REG_a", %1)        \n\t"
         "psrlq $32, %%mm3                       \n\t"
-        "movd %%mm3, 4(%%"REG_a", %1, 2)        \n\t"
+        "movd %%mm3, 4(%%"FF_REG_a", %1, 2)     \n\t"
         "movd %%mm2, 4(%0, %1, 4)               \n\t"
         "psrlq $32, %%mm2                       \n\t"
-        "movd %%mm2, 4(%%"REG_d")               \n\t"
-        "movd %%mm1, 4(%%"REG_d", %1)           \n\t"
+        "movd %%mm2, 4(%%"FF_REG_d")            \n\t"
+        "movd %%mm1, 4(%%"FF_REG_d", %1)        \n\t"
         "psrlq $32, %%mm1                       \n\t"
-        "movd %%mm1, 4(%%"REG_d", %1, 2)        \n\t"
+        "movd %%mm1, 4(%%"FF_REG_d", %1, 2)     \n\t"
 
         :: "r" (dst), "r" ((x86_reg)dstStride), "r" (src)
-        : "%"REG_a, "%"REG_d
+        : "%"FF_REG_a, "%"FF_REG_d
     );
 }
 #endif //TEMPLATE_PP_MMX
@@ -2166,9 +2166,9 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
 //#define L1_DIFF //u should change the thresholds too if u try that one
 #if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
     __asm__ volatile(
-        "lea (%2, %2, 2), %%"REG_a"             \n\t" // 3*stride
-        "lea (%2, %2, 4), %%"REG_d"             \n\t" // 5*stride
-        "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
+        "lea (%2, %2, 2), %%"FF_REG_a"          \n\t" // 3*stride
+        "lea (%2, %2, 4), %%"FF_REG_d"          \n\t" // 5*stride
+        "lea (%%"FF_REG_d", %2, 2), %%"FF_REG_c"\n\t" // 7*stride
 //      0       1       2       3       4       5       6       7       8       9
 //      %x      %x+%2   %x+2%2  %x+eax  %x+4%2  %x+edx  %x+2eax %x+ecx  %x+8%2
 //FIXME reorder?
@@ -2179,21 +2179,21 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
         "psadbw (%1, %2), %%mm1                 \n\t" // |L1-R1|
         "movq (%0, %2, 2), %%mm2                \n\t" // L2
         "psadbw (%1, %2, 2), %%mm2              \n\t" // |L2-R2|
-        "movq (%0, %%"REG_a"), %%mm3            \n\t" // L3
-        "psadbw (%1, %%"REG_a"), %%mm3          \n\t" // |L3-R3|
+        "movq (%0, %%"FF_REG_a"), %%mm3         \n\t" // L3
+        "psadbw (%1, %%"FF_REG_a"), %%mm3       \n\t" // |L3-R3|
 
         "movq (%0, %2, 4), %%mm4                \n\t" // L4
         "paddw %%mm1, %%mm0                     \n\t"
         "psadbw (%1, %2, 4), %%mm4              \n\t" // |L4-R4|
-        "movq (%0, %%"REG_d"), %%mm5            \n\t" // L5
+        "movq (%0, %%"FF_REG_d"), %%mm5         \n\t" // L5
         "paddw %%mm2, %%mm0                     \n\t"
-        "psadbw (%1, %%"REG_d"), %%mm5          \n\t" // |L5-R5|
-        "movq (%0, %%"REG_a", 2), %%mm6         \n\t" // L6
+        "psadbw (%1, %%"FF_REG_d"), %%mm5       \n\t" // |L5-R5|
+        "movq (%0, %%"FF_REG_a", 2), %%mm6      \n\t" // L6
         "paddw %%mm3, %%mm0                     \n\t"
-        "psadbw (%1, %%"REG_a", 2), %%mm6       \n\t" // |L6-R6|
-        "movq (%0, %%"REG_c"), %%mm7            \n\t" // L7
+        "psadbw (%1, %%"FF_REG_a", 2), %%mm6    \n\t" // |L6-R6|
+        "movq (%0, %%"FF_REG_c"), %%mm7         \n\t" // L7
         "paddw %%mm4, %%mm0                     \n\t"
-        "psadbw (%1, %%"REG_c"), %%mm7          \n\t" // |L7-R7|
+        "psadbw (%1, %%"FF_REG_c"), %%mm7       \n\t" // |L7-R7|
         "paddw %%mm5, %%mm6                     \n\t"
         "paddw %%mm7, %%mm6                     \n\t"
         "paddw %%mm6, %%mm0                     \n\t"
@@ -2239,14 +2239,14 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
 
 #define L2_DIFF_CORE(a, b)  REAL_L2_DIFF_CORE(a, b)
 
-L2_DIFF_CORE((%0)          , (%1))
-L2_DIFF_CORE((%0, %2)      , (%1, %2))
-L2_DIFF_CORE((%0, %2, 2)   , (%1, %2, 2))
-L2_DIFF_CORE((%0, %%REGa)  , (%1, %%REGa))
-L2_DIFF_CORE((%0, %2, 4)   , (%1, %2, 4))
-L2_DIFF_CORE((%0, %%REGd)  , (%1, %%REGd))
-L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
-L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc))
+L2_DIFF_CORE((%0)             , (%1))
+L2_DIFF_CORE((%0, %2)         , (%1, %2))
+L2_DIFF_CORE((%0, %2, 2)      , (%1, %2, 2))
+L2_DIFF_CORE((%0, %%FF_REGa)  , (%1, %%FF_REGa))
+L2_DIFF_CORE((%0, %2, 4)      , (%1, %2, 4))
+L2_DIFF_CORE((%0, %%FF_REGd)  , (%1, %%FF_REGd))
+L2_DIFF_CORE((%0, %%FF_REGa,2), (%1, %%FF_REGa,2))
+L2_DIFF_CORE((%0, %%FF_REGc)  , (%1, %%FF_REGc))
 
 #endif //L1_DIFF
 
@@ -2255,94 +2255,94 @@ L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc))
         "paddd %%mm0, %%mm4                     \n\t"
         "movd %%mm4, %%ecx                      \n\t"
         "shll $2, %%ecx                         \n\t"
-        "mov %3, %%"REG_d"                      \n\t"
-        "addl -4(%%"REG_d"), %%ecx              \n\t"
-        "addl 4(%%"REG_d"), %%ecx               \n\t"
-        "addl -1024(%%"REG_d"), %%ecx           \n\t"
+        "mov %3, %%"FF_REG_d"                   \n\t"
+        "addl -4(%%"FF_REG_d"), %%ecx           \n\t"
+        "addl 4(%%"FF_REG_d"), %%ecx            \n\t"
+        "addl -1024(%%"FF_REG_d"), %%ecx        \n\t"
         "addl $4, %%ecx                         \n\t"
-        "addl 1024(%%"REG_d"), %%ecx            \n\t"
+        "addl 1024(%%"FF_REG_d"), %%ecx         \n\t"
         "shrl $3, %%ecx                         \n\t"
-        "movl %%ecx, (%%"REG_d")                \n\t"
+        "movl %%ecx, (%%"FF_REG_d")             \n\t"
 
-//        "mov %3, %%"REG_c"                      \n\t"
-//        "mov %%"REG_c", test                    \n\t"
+//        "mov %3, %%"FF_REG_c"                   \n\t"
+//        "mov %%"FF_REG_c", test                 \n\t"
 //        "jmp 4f                                 \n\t"
-        "cmpl 512(%%"REG_d"), %%ecx             \n\t"
+        "cmpl 512(%%"FF_REG_d"), %%ecx          \n\t"
         " jb 2f                                 \n\t"
-        "cmpl 516(%%"REG_d"), %%ecx             \n\t"
+        "cmpl 516(%%"FF_REG_d"), %%ecx          \n\t"
         " jb 1f                                 \n\t"
 
-        "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" // 5*stride
-        "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
+        "lea (%%"FF_REG_a", %2, 2), %%"FF_REG_d"\n\t" // 5*stride
+        "lea (%%"FF_REG_d", %2, 2), %%"FF_REG_c"\n\t" // 7*stride
         "movq (%0), %%mm0                       \n\t" // L0
         "movq (%0, %2), %%mm1                   \n\t" // L1
         "movq (%0, %2, 2), %%mm2                \n\t" // L2
-        "movq (%0, %%"REG_a"), %%mm3            \n\t" // L3
+        "movq (%0, %%"FF_REG_a"), %%mm3         \n\t" // L3
         "movq (%0, %2, 4), %%mm4                \n\t" // L4
-        "movq (%0, %%"REG_d"), %%mm5            \n\t" // L5
-        "movq (%0, %%"REG_a", 2), %%mm6         \n\t" // L6
-        "movq (%0, %%"REG_c"), %%mm7            \n\t" // L7
+        "movq (%0, %%"FF_REG_d"), %%mm5         \n\t" // L5
+        "movq (%0, %%"FF_REG_a", 2), %%mm6      \n\t" // L6
+        "movq (%0, %%"FF_REG_c"), %%mm7         \n\t" // L7
         "movq %%mm0, (%1)                       \n\t" // L0
         "movq %%mm1, (%1, %2)                   \n\t" // L1
         "movq %%mm2, (%1, %2, 2)                \n\t" // L2
-        "movq %%mm3, (%1, %%"REG_a")            \n\t" // L3
+        "movq %%mm3, (%1, %%"FF_REG_a")         \n\t" // L3
         "movq %%mm4, (%1, %2, 4)                \n\t" // L4
-        "movq %%mm5, (%1, %%"REG_d")            \n\t" // L5
-        "movq %%mm6, (%1, %%"REG_a", 2)         \n\t" // L6
-        "movq %%mm7, (%1, %%"REG_c")            \n\t" // L7
+        "movq %%mm5, (%1, %%"FF_REG_d")         \n\t" // L5
+        "movq %%mm6, (%1, %%"FF_REG_a", 2)      \n\t" // L6
+        "movq %%mm7, (%1, %%"FF_REG_c")         \n\t" // L7
         "jmp 4f                                 \n\t"
 
         "1:                                     \n\t"
-        "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" // 5*stride
-        "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
+        "lea (%%"FF_REG_a", %2, 2), %%"FF_REG_d"\n\t" // 5*stride
+        "lea (%%"FF_REG_d", %2, 2), %%"FF_REG_c"\n\t" // 7*stride
         "movq (%0), %%mm0                       \n\t" // L0
         PAVGB((%1), %%mm0)                            // L0
         "movq (%0, %2), %%mm1                   \n\t" // L1
         PAVGB((%1, %2), %%mm1)                        // L1
         "movq (%0, %2, 2), %%mm2                \n\t" // L2
         PAVGB((%1, %2, 2), %%mm2)                     // L2
-        "movq (%0, %%"REG_a"), %%mm3            \n\t" // L3
-        PAVGB((%1, %%REGa), %%mm3)                    // L3
+        "movq (%0, %%"FF_REG_a"), %%mm3         \n\t" // L3
+        PAVGB((%1, %%FF_REGa), %%mm3)                 // L3
         "movq (%0, %2, 4), %%mm4                \n\t" // L4
         PAVGB((%1, %2, 4), %%mm4)                     // L4
-        "movq (%0, %%"REG_d"), %%mm5            \n\t" // L5
-        PAVGB((%1, %%REGd), %%mm5)                    // L5
-        "movq (%0, %%"REG_a", 2), %%mm6         \n\t" // L6
-        PAVGB((%1, %%REGa, 2), %%mm6)                 // L6
-        "movq (%0, %%"REG_c"), %%mm7            \n\t" // L7
-        PAVGB((%1, %%REGc), %%mm7)                    // L7
+        "movq (%0, %%"FF_REG_d"), %%mm5         \n\t" // L5
+        PAVGB((%1, %%FF_REGd), %%mm5)                 // L5
+        "movq (%0, %%"FF_REG_a", 2), %%mm6      \n\t" // L6
+        PAVGB((%1, %%FF_REGa, 2), %%mm6)              // L6
+        "movq (%0, %%"FF_REG_c"), %%mm7         \n\t" // L7
+        PAVGB((%1, %%FF_REGc), %%mm7)                 // L7
         "movq %%mm0, (%1)                       \n\t" // R0
         "movq %%mm1, (%1, %2)                   \n\t" // R1
         "movq %%mm2, (%1, %2, 2)                \n\t" // R2
-        "movq %%mm3, (%1, %%"REG_a")            \n\t" // R3
+        "movq %%mm3, (%1, %%"FF_REG_a")         \n\t" // R3
         "movq %%mm4, (%1, %2, 4)                \n\t" // R4
-        "movq %%mm5, (%1, %%"REG_d")            \n\t" // R5
-        "movq %%mm6, (%1, %%"REG_a", 2)         \n\t" // R6
-        "movq %%mm7, (%1, %%"REG_c")            \n\t" // R7
+        "movq %%mm5, (%1, %%"FF_REG_d")         \n\t" // R5
+        "movq %%mm6, (%1, %%"FF_REG_a", 2)      \n\t" // R6
+        "movq %%mm7, (%1, %%"FF_REG_c")         \n\t" // R7
         "movq %%mm0, (%0)                       \n\t" // L0
         "movq %%mm1, (%0, %2)                   \n\t" // L1
         "movq %%mm2, (%0, %2, 2)                \n\t" // L2
-        "movq %%mm3, (%0, %%"REG_a")            \n\t" // L3
+        "movq %%mm3, (%0, %%"FF_REG_a")         \n\t" // L3
         "movq %%mm4, (%0, %2, 4)                \n\t" // L4
-        "movq %%mm5, (%0, %%"REG_d")            \n\t" // L5
-        "movq %%mm6, (%0, %%"REG_a", 2)         \n\t" // L6
-        "movq %%mm7, (%0, %%"REG_c")            \n\t" // L7
+        "movq %%mm5, (%0, %%"FF_REG_d")         \n\t" // L5
+        "movq %%mm6, (%0, %%"FF_REG_a", 2)      \n\t" // L6
+        "movq %%mm7, (%0, %%"FF_REG_c")         \n\t" // L7
         "jmp 4f                                 \n\t"
 
         "2:                                     \n\t"
-        "cmpl 508(%%"REG_d"), %%ecx             \n\t"
+        "cmpl 508(%%"FF_REG_d"), %%ecx          \n\t"
         " jb 3f                                 \n\t"
 
-        "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" // 5*stride
-        "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
+        "lea (%%"FF_REG_a", %2, 2), %%"FF_REG_d"\n\t" // 5*stride
+        "lea (%%"FF_REG_d", %2, 2), %%"FF_REG_c"\n\t" // 7*stride
         "movq (%0), %%mm0                       \n\t" // L0
         "movq (%0, %2), %%mm1                   \n\t" // L1
         "movq (%0, %2, 2), %%mm2                \n\t" // L2
-        "movq (%0, %%"REG_a"), %%mm3            \n\t" // L3
+        "movq (%0, %%"FF_REG_a"), %%mm3         \n\t" // L3
         "movq (%1), %%mm4                       \n\t" // R0
         "movq (%1, %2), %%mm5                   \n\t" // R1
         "movq (%1, %2, 2), %%mm6                \n\t" // R2
-        "movq (%1, %%"REG_a"), %%mm7            \n\t" // R3
+        "movq (%1, %%"FF_REG_a"), %%mm7         \n\t" // R3
         PAVGB(%%mm4, %%mm0)
         PAVGB(%%mm5, %%mm1)
         PAVGB(%%mm6, %%mm2)
@@ -2354,20 +2354,20 @@ L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc))
         "movq %%mm0, (%1)                       \n\t" // R0
         "movq %%mm1, (%1, %2)                   \n\t" // R1
         "movq %%mm2, (%1, %2, 2)                \n\t" // R2
-        "movq %%mm3, (%1, %%"REG_a")            \n\t" // R3
+        "movq %%mm3, (%1, %%"FF_REG_a")         \n\t" // R3
         "movq %%mm0, (%0)                       \n\t" // L0
         "movq %%mm1, (%0, %2)                   \n\t" // L1
         "movq %%mm2, (%0, %2, 2)                \n\t" // L2
-        "movq %%mm3, (%0, %%"REG_a")            \n\t" // L3
+        "movq %%mm3, (%0, %%"FF_REG_a")         \n\t" // L3
 
         "movq (%0, %2, 4), %%mm0                \n\t" // L4
-        "movq (%0, %%"REG_d"), %%mm1            \n\t" // L5
-        "movq (%0, %%"REG_a", 2), %%mm2         \n\t" // L6
-        "movq (%0, %%"REG_c"), %%mm3            \n\t" // L7
+        "movq (%0, %%"FF_REG_d"), %%mm1         \n\t" // L5
+        "movq (%0, %%"FF_REG_a", 2), %%mm2      \n\t" // L6
+        "movq (%0, %%"FF_REG_c"), %%mm3         \n\t" // L7
         "movq (%1, %2, 4), %%mm4                \n\t" // R4
-        "movq (%1, %%"REG_d"), %%mm5            \n\t" // R5
-        "movq (%1, %%"REG_a", 2), %%mm6         \n\t" // R6
-        "movq (%1, %%"REG_c"), %%mm7            \n\t" // R7
+        "movq (%1, %%"FF_REG_d"), %%mm5         \n\t" // R5
+        "movq (%1, %%"FF_REG_a", 2), %%mm6      \n\t" // R6
+        "movq (%1, %%"FF_REG_c"), %%mm7         \n\t" // R7
         PAVGB(%%mm4, %%mm0)
         PAVGB(%%mm5, %%mm1)
         PAVGB(%%mm6, %%mm2)
@@ -2377,26 +2377,26 @@ L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc))
         PAVGB(%%mm6, %%mm2)
         PAVGB(%%mm7, %%mm3)
         "movq %%mm0, (%1, %2, 4)                \n\t" // R4
-        "movq %%mm1, (%1, %%"REG_d")            \n\t" // R5
-        "movq %%mm2, (%1, %%"REG_a", 2)         \n\t" // R6
-        "movq %%mm3, (%1, %%"REG_c")            \n\t" // R7
+        "movq %%mm1, (%1, %%"FF_REG_d")         \n\t" // R5
+        "movq %%mm2, (%1, %%"FF_REG_a", 2)      \n\t" // R6
+        "movq %%mm3, (%1, %%"FF_REG_c")         \n\t" // R7
         "movq %%mm0, (%0, %2, 4)                \n\t" // L4
-        "movq %%mm1, (%0, %%"REG_d")            \n\t" // L5
-        "movq %%mm2, (%0, %%"REG_a", 2)         \n\t" // L6
-        "movq %%mm3, (%0, %%"REG_c")            \n\t" // L7
+        "movq %%mm1, (%0, %%"FF_REG_d")         \n\t" // L5
+        "movq %%mm2, (%0, %%"FF_REG_a", 2)      \n\t" // L6
+        "movq %%mm3, (%0, %%"FF_REG_c")         \n\t" // L7
         "jmp 4f                                 \n\t"
 
         "3:                                     \n\t"
-        "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" // 5*stride
-        "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
+        "lea (%%"FF_REG_a", %2, 2), %%"FF_REG_d"\n\t" // 5*stride
+        "lea (%%"FF_REG_d", %2, 2), %%"FF_REG_c"\n\t" // 7*stride
         "movq (%0), %%mm0                       \n\t" // L0
         "movq (%0, %2), %%mm1                   \n\t" // L1
         "movq (%0, %2, 2), %%mm2                \n\t" // L2
-        "movq (%0, %%"REG_a"), %%mm3            \n\t" // L3
+        "movq (%0, %%"FF_REG_a"), %%mm3         \n\t" // L3
         "movq (%1), %%mm4                       \n\t" // R0
         "movq (%1, %2), %%mm5                   \n\t" // R1
         "movq (%1, %2, 2), %%mm6                \n\t" // R2
-        "movq (%1, %%"REG_a"), %%mm7            \n\t" // R3
+        "movq (%1, %%"FF_REG_a"), %%mm7         \n\t" // R3
         PAVGB(%%mm4, %%mm0)
         PAVGB(%%mm5, %%mm1)
         PAVGB(%%mm6, %%mm2)
@@ -2412,20 +2412,20 @@ L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc))
         "movq %%mm0, (%1)                       \n\t" // R0
         "movq %%mm1, (%1, %2)                   \n\t" // R1
         "movq %%mm2, (%1, %2, 2)                \n\t" // R2
-        "movq %%mm3, (%1, %%"REG_a")            \n\t" // R3
+        "movq %%mm3, (%1, %%"FF_REG_a")         \n\t" // R3
         "movq %%mm0, (%0)                       \n\t" // L0
         "movq %%mm1, (%0, %2)                   \n\t" // L1
         "movq %%mm2, (%0, %2, 2)                \n\t" // L2
-        "movq %%mm3, (%0, %%"REG_a")            \n\t" // L3
+        "movq %%mm3, (%0, %%"FF_REG_a")         \n\t" // L3
 
         "movq (%0, %2, 4), %%mm0                \n\t" // L4
-        "movq (%0, %%"REG_d"), %%mm1            \n\t" // L5
-        "movq (%0, %%"REG_a", 2), %%mm2         \n\t" // L6
-        "movq (%0, %%"REG_c"), %%mm3            \n\t" // L7
+        "movq (%0, %%"FF_REG_d"), %%mm1         \n\t" // L5
+        "movq (%0, %%"FF_REG_a", 2), %%mm2      \n\t" // L6
+        "movq (%0, %%"FF_REG_c"), %%mm3         \n\t" // L7
         "movq (%1, %2, 4), %%mm4                \n\t" // R4
-        "movq (%1, %%"REG_d"), %%mm5            \n\t" // R5
-        "movq (%1, %%"REG_a", 2), %%mm6         \n\t" // R6
-        "movq (%1, %%"REG_c"), %%mm7            \n\t" // R7
+        "movq (%1, %%"FF_REG_d"), %%mm5         \n\t" // R5
+        "movq (%1, %%"FF_REG_a", 2), %%mm6      \n\t" // R6
+        "movq (%1, %%"FF_REG_c"), %%mm7         \n\t" // R7
         PAVGB(%%mm4, %%mm0)
         PAVGB(%%mm5, %%mm1)
         PAVGB(%%mm6, %%mm2)
@@ -2439,19 +2439,19 @@ L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc))
         PAVGB(%%mm6, %%mm2)
         PAVGB(%%mm7, %%mm3)
         "movq %%mm0, (%1, %2, 4)                \n\t" // R4
-        "movq %%mm1, (%1, %%"REG_d")            \n\t" // R5
-        "movq %%mm2, (%1, %%"REG_a", 2)         \n\t" // R6
-        "movq %%mm3, (%1, %%"REG_c")            \n\t" // R7
+        "movq %%mm1, (%1, %%"FF_REG_d")         \n\t" // R5
+        "movq %%mm2, (%1, %%"FF_REG_a", 2)      \n\t" // R6
+        "movq %%mm3, (%1, %%"FF_REG_c")         \n\t" // R7
         "movq %%mm0, (%0, %2, 4)                \n\t" // L4
-        "movq %%mm1, (%0, %%"REG_d")            \n\t" // L5
-        "movq %%mm2, (%0, %%"REG_a", 2)         \n\t" // L6
-        "movq %%mm3, (%0, %%"REG_c")            \n\t" // L7
+        "movq %%mm1, (%0, %%"FF_REG_d")         \n\t" // L5
+        "movq %%mm2, (%0, %%"FF_REG_a", 2)      \n\t" // L6
+        "movq %%mm3, (%0, %%"FF_REG_c")         \n\t" // L7
 
         "4:                                     \n\t"
 
         :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
           NAMED_CONSTRAINTS_ADD(b80)
-        : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
+        : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_c, "memory"
     );
 #else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
 {
@@ -2556,19 +2556,19 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
         );
 
     __asm__ volatile(
-        "lea (%2, %3), %%"REG_a"                \n\t"
+        "lea (%2, %3), %%"FF_REG_a"             \n\t"
 //      0       1       2       3       4       5       6       7       8       9
 //      %1      eax     eax+%2  eax+2%2 %1+4%2  ecx     ecx+%2  ecx+2%2 %1+8%2  ecx+4%2
 
         "movq (%2), %%mm0                       \n\t"
-        "movq (%%"REG_a"), %%mm1                \n\t"
+        "movq (%%"FF_REG_a"), %%mm1             \n\t"
         "movq %%mm1, %%mm3                      \n\t"
         "movq %%mm1, %%mm4                      \n\t"
         "psubb %%mm1, %%mm0                     \n\t" // mm0 = difference
         "paddb %%mm7, %%mm0                     \n\t"
         "pcmpgtb %%mm6, %%mm0                   \n\t"
 
-        "movq (%%"REG_a",%3), %%mm2             \n\t"
+        "movq (%%"FF_REG_a",%3), %%mm2          \n\t"
         PMAXUB(%%mm2, %%mm4)
         PMINUB(%%mm2, %%mm3, %%mm5)
         "psubb %%mm2, %%mm1                     \n\t"
@@ -2576,7 +2576,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
         "pcmpgtb %%mm6, %%mm1                   \n\t"
         "paddb %%mm1, %%mm0                     \n\t"
 
-        "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
+        "movq (%%"FF_REG_a", %3, 2), %%mm1      \n\t"
         PMAXUB(%%mm1, %%mm4)
         PMINUB(%%mm1, %%mm3, %%mm5)
         "psubb %%mm1, %%mm2                     \n\t"
@@ -2584,7 +2584,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
         "pcmpgtb %%mm6, %%mm2                   \n\t"
         "paddb %%mm2, %%mm0                     \n\t"
 
-        "lea (%%"REG_a", %3, 4), %%"REG_a"      \n\t"
+        "lea (%%"FF_REG_a", %3, 4), %%"FF_REG_a"\n\t"
 
         "movq (%2, %3, 4), %%mm2                \n\t"
         PMAXUB(%%mm2, %%mm4)
@@ -2594,7 +2594,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
         "pcmpgtb %%mm6, %%mm1                   \n\t"
         "paddb %%mm1, %%mm0                     \n\t"
 
-        "movq (%%"REG_a"), %%mm1                \n\t"
+        "movq (%%"FF_REG_a"), %%mm1             \n\t"
         PMAXUB(%%mm1, %%mm4)
         PMINUB(%%mm1, %%mm3, %%mm5)
         "psubb %%mm1, %%mm2                     \n\t"
@@ -2602,7 +2602,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
         "pcmpgtb %%mm6, %%mm2                   \n\t"
         "paddb %%mm2, %%mm0                     \n\t"
 
-        "movq (%%"REG_a", %3), %%mm2            \n\t"
+        "movq (%%"FF_REG_a", %3), %%mm2         \n\t"
         PMAXUB(%%mm2, %%mm4)
         PMINUB(%%mm2, %%mm3, %%mm5)
         "psubb %%mm2, %%mm1                     \n\t"
@@ -2610,7 +2610,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
         "pcmpgtb %%mm6, %%mm1                   \n\t"
         "paddb %%mm1, %%mm0                     \n\t"
 
-        "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
+        "movq (%%"FF_REG_a", %3, 2), %%mm1      \n\t"
         PMAXUB(%%mm1, %%mm4)
         PMINUB(%%mm1, %%mm3, %%mm5)
         "psubb %%mm1, %%mm2                     \n\t"
@@ -2626,7 +2626,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
         "pcmpgtb %%mm6, %%mm1                   \n\t"
         "paddb %%mm1, %%mm0                     \n\t"
 
-        "movq (%%"REG_a", %3, 4), %%mm1         \n\t"
+        "movq (%%"FF_REG_a", %3, 4), %%mm1      \n\t"
         "psubb %%mm1, %%mm2                     \n\t"
         "paddb %%mm7, %%mm2                     \n\t"
         "pcmpgtb %%mm6, %%mm2                   \n\t"
@@ -2651,7 +2651,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
 
         : "=m" (eq_mask), "=m" (dc_mask)
         : "r" (src), "r" ((x86_reg)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
-        : "%"REG_a
+        : "%"FF_REG_a
     );
 
     both_masks = dc_mask & eq_mask;
@@ -2851,12 +2851,12 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "punpckhbw %%mm7, %%mm1                 \n\t" // high part of line 0
 
             "movq (%0, %1), %%mm2                   \n\t"
-            "lea (%0, %1, 2), %%"REG_a"             \n\t"
+            "lea (%0, %1, 2), %%"FF_REG_a"          \n\t"
             "movq %%mm2, %%mm3                      \n\t"
             "punpcklbw %%mm7, %%mm2                 \n\t" // low part of line 1
             "punpckhbw %%mm7, %%mm3                 \n\t" // high part of line 1
 
-            "movq (%%"REG_a"), %%mm4                \n\t"
+            "movq (%%"FF_REG_a"), %%mm4             \n\t"
             "movq %%mm4, %%mm5                      \n\t"
             "punpcklbw %%mm7, %%mm4                 \n\t" // low part of line 2
             "punpckhbw %%mm7, %%mm5                 \n\t" // high part of line 2
@@ -2873,7 +2873,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2
             "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2
 
-            "movq (%%"REG_a", %1), %%mm2            \n\t"
+            "movq (%%"FF_REG_a", %1), %%mm2         \n\t"
             "movq %%mm2, %%mm3                      \n\t"
             "punpcklbw %%mm7, %%mm2                 \n\t" // L3
             "punpckhbw %%mm7, %%mm3                 \n\t" // H3
@@ -2885,7 +2885,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "movq %%mm0, (%4)                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3
             "movq %%mm1, 8(%4)                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3
 
-            "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
+            "movq (%%"FF_REG_a", %1, 2), %%mm0      \n\t"
             "movq %%mm0, %%mm1                      \n\t"
             "punpcklbw %%mm7, %%mm0                 \n\t" // L4
             "punpckhbw %%mm7, %%mm1                 \n\t" // H4
@@ -2899,7 +2899,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "psubw %%mm2, %%mm4                     \n\t" // 2L2 - L3 + L4
             "psubw %%mm3, %%mm5                     \n\t" // 2H2 - H3 + H4
 
-            "lea (%%"REG_a", %1), %0                \n\t"
+            "lea (%%"FF_REG_a", %1), %0             \n\t"
             "psllw $2, %%mm2                        \n\t" // 4L3 - 4L4
             "psllw $2, %%mm3                        \n\t" // 4H3 - 4H4
             "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4
@@ -2914,10 +2914,10 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4 - 2L5
             "psubw %%mm3, %%mm5                     \n\t" // 2H2 - 5H3 + 5H4 - 2H5
 
-            "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
+            "movq (%%"FF_REG_a", %1, 4), %%mm6      \n\t"
             "punpcklbw %%mm7, %%mm6                 \n\t" // L6
             "psubw %%mm6, %%mm2                     \n\t" // L5 - L6
-            "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
+            "movq (%%"FF_REG_a", %1, 4), %%mm6      \n\t"
             "punpckhbw %%mm7, %%mm6                 \n\t" // H6
             "psubw %%mm6, %%mm3                     \n\t" // H5 - H6
 
@@ -3068,7 +3068,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
             : "+r" (temp_src)
             : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask), "r"(tmp)
               NAMED_CONSTRAINTS_ADD(w05,w20)
-            : "%"REG_a
+            : "%"FF_REG_a
         );
     }
 /*if(step==16){
@@ -3099,10 +3099,10 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
     if(levelFix){
 #if TEMPLATE_PP_MMX && HAVE_6REGS
     __asm__ volatile(
-        "movq (%%"REG_a"), %%mm2        \n\t" // packedYOffset
-        "movq 8(%%"REG_a"), %%mm3       \n\t" // packedYScale
-        "lea (%2,%4), %%"REG_a"         \n\t"
-        "lea (%3,%5), %%"REG_d"         \n\t"
+        "movq (%%"FF_REG_a"), %%mm2     \n\t" // packedYOffset
+        "movq 8(%%"FF_REG_a"), %%mm3    \n\t" // packedYScale
+        "lea (%2,%4), %%"FF_REG_a"      \n\t"
+        "lea (%3,%5), %%"FF_REG_d"      \n\t"
         "pxor %%mm4, %%mm4              \n\t"
 #if TEMPLATE_PP_MMXEXT
 #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \
@@ -3159,11 +3159,11 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
    REAL_SCALED_CPY(src1, src2, dst1, dst2)
 
 SCALED_CPY((%2)       , (%2, %4)      , (%3)       , (%3, %5))
-SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
-SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
-        "lea (%%"REG_a",%4,4), %%"REG_a"        \n\t"
-        "lea (%%"REG_d",%5,4), %%"REG_d"        \n\t"
-SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
+SCALED_CPY((%2, %4, 2), (%%FF_REGa, %4, 2), (%3, %5, 2), (%%FF_REGd, %5, 2))
+SCALED_CPY((%2, %4, 4), (%%FF_REGa, %4, 4), (%3, %5, 4), (%%FF_REGd, %5, 4))
+        "lea (%%"FF_REG_a",%4,4), %%"FF_REG_a"        \n\t"
+        "lea (%%"FF_REG_d",%5,4), %%"FF_REG_d"        \n\t"
+SCALED_CPY((%%FF_REGa, %4), (%%FF_REGa, %4, 2), (%%FF_REGd, %5), (%%FF_REGd, %5, 2))
 
 
         : "=&a" (packedOffsetAndScale)
@@ -3172,7 +3172,7 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
         "r"(dst),
         "r" ((x86_reg)srcStride),
         "r" ((x86_reg)dstStride)
-        : "%"REG_d
+        : "%"FF_REG_d
     );
 #else //TEMPLATE_PP_MMX && HAVE_6REGS
     for(i=0; i<8; i++)
@@ -3182,8 +3182,8 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
     }else{
 #if TEMPLATE_PP_MMX && HAVE_6REGS
     __asm__ volatile(
-        "lea (%0,%2), %%"REG_a"                 \n\t"
-        "lea (%1,%3), %%"REG_d"                 \n\t"
+        "lea (%0,%2), %%"FF_REG_a"      \n\t"
+        "lea (%1,%3), %%"FF_REG_d"      \n\t"
 
 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)                              \
         "movq " #src1 ", %%mm0          \n\t"\
@@ -3194,18 +3194,18 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
 #define SIMPLE_CPY(src1, src2, dst1, dst2)\
    REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
 
-SIMPLE_CPY((%0)       , (%0, %2)       , (%1)       , (%1, %3))
-SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
-SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
-        "lea (%%"REG_a",%2,4), %%"REG_a"        \n\t"
-        "lea (%%"REG_d",%3,4), %%"REG_d"        \n\t"
-SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
+SIMPLE_CPY((%0)       , (%0, %2)          , (%1)       , (%1, %3))
+SIMPLE_CPY((%0, %2, 2), (%%FF_REGa, %2, 2), (%1, %3, 2), (%%FF_REGd, %3, 2))
+SIMPLE_CPY((%0, %2, 4), (%%FF_REGa, %2, 4), (%1, %3, 4), (%%FF_REGd, %3, 4))
+        "lea (%%"FF_REG_a",%2,4), %%"FF_REG_a"        \n\t"
+        "lea (%%"FF_REG_d",%3,4), %%"FF_REG_d"        \n\t"
+SIMPLE_CPY((%%FF_REGa, %2), (%%FF_REGa, %2, 2), (%%FF_REGd, %3), (%%FF_REGd, %3, 2))
 
         : : "r" (src),
         "r" (dst),
         "r" ((x86_reg)srcStride),
         "r" ((x86_reg)dstStride)
-        : "%"REG_a, "%"REG_d
+        : "%"FF_REG_a, "%"FF_REG_d
     );
 #else //TEMPLATE_PP_MMX && HAVE_6REGS
     for(i=0; i<8; i++)
diff --git a/libswscale/x86/hscale_fast_bilinear_simd.c b/libswscale/x86/hscale_fast_bilinear_simd.c
index b37b63c..2cba5f0 100644
--- a/libswscale/x86/hscale_fast_bilinear_simd.c
+++ b/libswscale/x86/hscale_fast_bilinear_simd.c
@@ -55,9 +55,9 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
         "jmp                         9f                 \n\t"
         // Begin
         "0:                                             \n\t"
-        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
-        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
-        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
+        "movq    (%%"FF_REG_d", %%"FF_REG_a"), %%mm3    \n\t"
+        "movd    (%%"FF_REG_c", %%"FF_REG_S"), %%mm0    \n\t"
+        "movd   1(%%"FF_REG_c", %%"FF_REG_S"), %%mm1    \n\t"
         "punpcklbw                %%mm7, %%mm1          \n\t"
         "punpcklbw                %%mm7, %%mm0          \n\t"
         "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
@@ -65,14 +65,14 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
         "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
         "2:                                             \n\t"
         "psubw                    %%mm1, %%mm0          \n\t"
-        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
+        "movl   8(%%"FF_REG_b", %%"FF_REG_a"), %%esi    \n\t"
         "pmullw                   %%mm3, %%mm0          \n\t"
         "psllw                       $7, %%mm1          \n\t"
         "paddw                    %%mm1, %%mm0          \n\t"
 
-        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
+        "movq                     %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t"
 
-        "add                         $8, %%"REG_a"      \n\t"
+        "add                         $8, %%"FF_REG_a"   \n\t"
         // End
         "9:                                             \n\t"
         "lea       " LOCAL_MANGLE(0b) ", %0             \n\t"
@@ -94,22 +94,22 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
         "jmp                         9f                 \n\t"
         // Begin
         "0:                                             \n\t"
-        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
-        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
+        "movq    (%%"FF_REG_d", %%"FF_REG_a"), %%mm3    \n\t"
+        "movd    (%%"FF_REG_c", %%"FF_REG_S"), %%mm0    \n\t"
         "punpcklbw                %%mm7, %%mm0          \n\t"
         "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
         "1:                                             \n\t"
         "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
         "2:                                             \n\t"
         "psubw                    %%mm1, %%mm0          \n\t"
-        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
+        "movl   8(%%"FF_REG_b", %%"FF_REG_a"), %%esi    \n\t"
         "pmullw                   %%mm3, %%mm0          \n\t"
         "psllw                       $7, %%mm1          \n\t"
         "paddw                    %%mm1, %%mm0          \n\t"
 
-        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
+        "movq                     %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t"
 
-        "add                         $8, %%"REG_a"      \n\t"
+        "add                         $8, %%"FF_REG_a"   \n\t"
         // End
         "9:                                             \n\t"
         "lea       " LOCAL_MANGLE(0b) ", %0             \n\t"
@@ -206,39 +206,39 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
 
     __asm__ volatile(
 #if ARCH_X86_64
-        "mov               -8(%%rsp), %%"REG_a" \n\t"
-        "mov               %%"REG_a", %5        \n\t"  // retsave
+        "mov               -8(%%rsp), %%"FF_REG_a"    \n\t"
+        "mov            %%"FF_REG_a", %5              \n\t"  // retsave
 #else
 #if defined(PIC)
-        "mov               %%"REG_b", %5        \n\t"  // ebxsave
+        "mov            %%"FF_REG_b", %5              \n\t"  // ebxsave
 #endif
 #endif
-        "pxor                  %%mm7, %%mm7     \n\t"
-        "mov                      %0, %%"REG_c" \n\t"
-        "mov                      %1, %%"REG_D" \n\t"
-        "mov                      %2, %%"REG_d" \n\t"
-        "mov                      %3, %%"REG_b" \n\t"
-        "xor               %%"REG_a", %%"REG_a" \n\t" // i
-        PREFETCH"        (%%"REG_c")            \n\t"
-        PREFETCH"      32(%%"REG_c")            \n\t"
-        PREFETCH"      64(%%"REG_c")            \n\t"
+        "pxor                  %%mm7, %%mm7           \n\t"
+        "mov                      %0, %%"FF_REG_c"    \n\t"
+        "mov                      %1, %%"FF_REG_D"    \n\t"
+        "mov                      %2, %%"FF_REG_d"    \n\t"
+        "mov                      %3, %%"FF_REG_b"    \n\t"
+        "xor            %%"FF_REG_a", %%"FF_REG_a"    \n\t" // i
+        PREFETCH"      (%%"FF_REG_c")                 \n\t"
+        PREFETCH"    32(%%"FF_REG_c")                 \n\t"
+        PREFETCH"    64(%%"FF_REG_c")                 \n\t"
 
 #if ARCH_X86_64
 #define CALL_MMXEXT_FILTER_CODE \
-        "movl            (%%"REG_b"), %%esi     \n\t"\
-        "call                    *%4            \n\t"\
-        "movl (%%"REG_b", %%"REG_a"), %%esi     \n\t"\
-        "add               %%"REG_S", %%"REG_c" \n\t"\
-        "add               %%"REG_a", %%"REG_D" \n\t"\
-        "xor               %%"REG_a", %%"REG_a" \n\t"\
+        "movl               (%%"FF_REG_b"), %%esi        \n\t"\
+        "call                          *%4               \n\t"\
+        "movl (%%"FF_REG_b", %%"FF_REG_a"), %%esi        \n\t"\
+        "add                  %%"FF_REG_S", %%"FF_REG_c" \n\t"\
+        "add                  %%"FF_REG_a", %%"FF_REG_D" \n\t"\
+        "xor                  %%"FF_REG_a", %%"FF_REG_a" \n\t"\
 
 #else
 #define CALL_MMXEXT_FILTER_CODE \
-        "movl (%%"REG_b"), %%esi        \n\t"\
-        "call         *%4                       \n\t"\
-        "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
-        "add               %%"REG_a", %%"REG_D" \n\t"\
-        "xor               %%"REG_a", %%"REG_a" \n\t"\
+        "movl               (%%"FF_REG_b"), %%esi        \n\t"\
+        "call                          *%4               \n\t"\
+        "addl (%%"FF_REG_b", %%"FF_REG_a"), %%"FF_REG_c" \n\t"\
+        "add                  %%"FF_REG_a", %%"FF_REG_D" \n\t"\
+        "xor                  %%"FF_REG_a", %%"FF_REG_a" \n\t"\
 
 #endif /* ARCH_X86_64 */
 
@@ -252,11 +252,11 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
         CALL_MMXEXT_FILTER_CODE
 
 #if ARCH_X86_64
-        "mov                      %5, %%"REG_a" \n\t"
-        "mov               %%"REG_a", -8(%%rsp) \n\t"
+        "mov                      %5, %%"FF_REG_a" \n\t"
+        "mov            %%"FF_REG_a", -8(%%rsp)    \n\t"
 #else
 #if defined(PIC)
-        "mov                      %5, %%"REG_b" \n\t"
+        "mov                      %5, %%"FF_REG_b" \n\t"
 #endif
 #endif
         :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
@@ -268,9 +268,9 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
           ,"m" (ebxsave)
 #endif
 #endif
-        : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+        : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D
 #if ARCH_X86_64 || !defined(PIC)
-         ,"%"REG_b
+         ,"%"FF_REG_b
 #endif
     );
 
@@ -295,33 +295,33 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
 #endif
     __asm__ volatile(
 #if ARCH_X86_64
-        "mov          -8(%%rsp), %%"REG_a"  \n\t"
-        "mov          %%"REG_a", %7         \n\t"  // retsave
+        "mov          -8(%%rsp), %%"FF_REG_a"    \n\t"
+        "mov       %%"FF_REG_a", %7              \n\t"  // retsave
 #else
 #if defined(PIC)
-        "mov          %%"REG_b", %7         \n\t"  // ebxsave
+        "mov       %%"FF_REG_b", %7              \n\t"  // ebxsave
 #endif
 #endif
-        "pxor             %%mm7, %%mm7      \n\t"
-        "mov                 %0, %%"REG_c"  \n\t"
-        "mov                 %1, %%"REG_D"  \n\t"
-        "mov                 %2, %%"REG_d"  \n\t"
-        "mov                 %3, %%"REG_b"  \n\t"
-        "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-        PREFETCH"   (%%"REG_c")             \n\t"
-        PREFETCH" 32(%%"REG_c")             \n\t"
-        PREFETCH" 64(%%"REG_c")             \n\t"
+        "pxor             %%mm7, %%mm7           \n\t"
+        "mov                 %0, %%"FF_REG_c"    \n\t"
+        "mov                 %1, %%"FF_REG_D"    \n\t"
+        "mov                 %2, %%"FF_REG_d"    \n\t"
+        "mov                 %3, %%"FF_REG_b"    \n\t"
+        "xor          %%"FF_REG_a", %%"FF_REG_a" \n\t" // i
+        PREFETCH"   (%%"FF_REG_c")               \n\t"
+        PREFETCH" 32(%%"FF_REG_c")               \n\t"
+        PREFETCH" 64(%%"FF_REG_c")               \n\t"
 
         CALL_MMXEXT_FILTER_CODE
         CALL_MMXEXT_FILTER_CODE
         CALL_MMXEXT_FILTER_CODE
         CALL_MMXEXT_FILTER_CODE
-        "xor          %%"REG_a", %%"REG_a"  \n\t" // i
-        "mov                 %5, %%"REG_c"  \n\t" // src2
-        "mov                 %6, %%"REG_D"  \n\t" // dst2
-        PREFETCH"   (%%"REG_c")             \n\t"
-        PREFETCH" 32(%%"REG_c")             \n\t"
-        PREFETCH" 64(%%"REG_c")             \n\t"
+        "xor          %%"FF_REG_a", %%"FF_REG_a" \n\t" // i
+        "mov                    %5, %%"FF_REG_c" \n\t" // src2
+        "mov                    %6, %%"FF_REG_D" \n\t" // dst2
+        PREFETCH"   (%%"FF_REG_c")               \n\t"
+        PREFETCH" 32(%%"FF_REG_c")               \n\t"
+        PREFETCH" 64(%%"FF_REG_c")               \n\t"
 
         CALL_MMXEXT_FILTER_CODE
         CALL_MMXEXT_FILTER_CODE
@@ -329,11 +329,11 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
         CALL_MMXEXT_FILTER_CODE
 
 #if ARCH_X86_64
-        "mov                 %7, %%"REG_a"  \n\t"
-        "mov          %%"REG_a", -8(%%rsp)  \n\t"
+        "mov                    %7, %%"FF_REG_a" \n\t"
+        "mov          %%"FF_REG_a", -8(%%rsp)    \n\t"
 #else
 #if defined(PIC)
-        "mov %7, %%"REG_b"    \n\t"
+        "mov %7, %%"FF_REG_b"    \n\t"
 #endif
 #endif
         :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
@@ -345,9 +345,9 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
           ,"m" (ebxsave)
 #endif
 #endif
-        : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+        : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D
 #if ARCH_X86_64 || !defined(PIC)
-         ,"%"REG_b
+         ,"%"FF_REG_b
 #endif
     );
 
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 95d4f8f..c655ae1 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -1101,43 +1101,43 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
     unsigned i;
     x86_reg mmx_size= 23 - src_size;
     __asm__ volatile (
-        "test             %%"REG_a", %%"REG_a"          \n\t"
+        "test             %%"FF_REG_a", %%"FF_REG_a"    \n\t"
         "jns                     2f                     \n\t"
         "movq     "MANGLE(mask24r)", %%mm5              \n\t"
         "movq     "MANGLE(mask24g)", %%mm6              \n\t"
         "movq     "MANGLE(mask24b)", %%mm7              \n\t"
         ".p2align                 4                     \n\t"
         "1:                                             \n\t"
-        PREFETCH" 32(%1, %%"REG_a")                     \n\t"
-        "movq       (%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
-        "movq       (%1, %%"REG_a"), %%mm1              \n\t" // BGR BGR BG
-        "movq      2(%1, %%"REG_a"), %%mm2              \n\t" // R BGR BGR B
+        PREFETCH" 32(%1, %%"FF_REG_a")                  \n\t"
+        "movq    (%1, %%"FF_REG_a"), %%mm0              \n\t" // BGR BGR BG
+        "movq    (%1, %%"FF_REG_a"), %%mm1              \n\t" // BGR BGR BG
+        "movq   2(%1, %%"FF_REG_a"), %%mm2              \n\t" // R BGR BGR B
         "psllq                  $16, %%mm0              \n\t" // 00 BGR BGR
         "pand                 %%mm5, %%mm0              \n\t"
         "pand                 %%mm6, %%mm1              \n\t"
         "pand                 %%mm7, %%mm2              \n\t"
         "por                  %%mm0, %%mm1              \n\t"
         "por                  %%mm2, %%mm1              \n\t"
-        "movq      6(%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
-        MOVNTQ"               %%mm1,   (%2, %%"REG_a")  \n\t" // RGB RGB RG
-        "movq      8(%1, %%"REG_a"), %%mm1              \n\t" // R BGR BGR B
-        "movq     10(%1, %%"REG_a"), %%mm2              \n\t" // GR BGR BGR
+        "movq   6(%1, %%"FF_REG_a"), %%mm0              \n\t" // BGR BGR BG
+        MOVNTQ"               %%mm1,(%2, %%"FF_REG_a")  \n\t" // RGB RGB RG
+        "movq   8(%1, %%"FF_REG_a"), %%mm1              \n\t" // R BGR BGR B
+        "movq  10(%1, %%"FF_REG_a"), %%mm2              \n\t" // GR BGR BGR
         "pand                 %%mm7, %%mm0              \n\t"
         "pand                 %%mm5, %%mm1              \n\t"
         "pand                 %%mm6, %%mm2              \n\t"
         "por                  %%mm0, %%mm1              \n\t"
         "por                  %%mm2, %%mm1              \n\t"
-        "movq     14(%1, %%"REG_a"), %%mm0              \n\t" // R BGR BGR B
-        MOVNTQ"               %%mm1,  8(%2, %%"REG_a")  \n\t" // B RGB RGB R
-        "movq     16(%1, %%"REG_a"), %%mm1              \n\t" // GR BGR BGR
-        "movq     18(%1, %%"REG_a"), %%mm2              \n\t" // BGR BGR BG
+        "movq  14(%1, %%"FF_REG_a"), %%mm0              \n\t" // R BGR BGR B
+        MOVNTQ"               %%mm1, 8(%2, %%"FF_REG_a")\n\t" // B RGB RGB R
+        "movq  16(%1, %%"FF_REG_a"), %%mm1              \n\t" // GR BGR BGR
+        "movq  18(%1, %%"FF_REG_a"), %%mm2              \n\t" // BGR BGR BG
         "pand                 %%mm6, %%mm0              \n\t"
         "pand                 %%mm7, %%mm1              \n\t"
         "pand                 %%mm5, %%mm2              \n\t"
         "por                  %%mm0, %%mm1              \n\t"
         "por                  %%mm2, %%mm1              \n\t"
-        MOVNTQ"               %%mm1, 16(%2, %%"REG_a")  \n\t"
-        "add                    $24, %%"REG_a"          \n\t"
+        MOVNTQ"               %%mm1, 16(%2, %%"FF_REG_a") \n\t"
+        "add                    $24, %%"FF_REG_a"       \n\t"
         " js                     1b                     \n\t"
         "2:                                             \n\t"
         : "+a" (mmx_size)
@@ -1173,20 +1173,20 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
     for (y=0; y<height; y++) {
         //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
         __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
+            "xor                 %%"FF_REG_a", %%"FF_REG_a" \n\t"
             ".p2align                    4              \n\t"
             "1:                                         \n\t"
-            PREFETCH"    32(%1, %%"REG_a", 2)           \n\t"
-            PREFETCH"    32(%2, %%"REG_a")              \n\t"
-            PREFETCH"    32(%3, %%"REG_a")              \n\t"
-            "movq          (%2, %%"REG_a"), %%mm0       \n\t" // U(0)
+            PREFETCH" 32(%1, %%"FF_REG_a", 2)           \n\t"
+            PREFETCH" 32(%2, %%"FF_REG_a")              \n\t"
+            PREFETCH" 32(%3, %%"FF_REG_a")              \n\t"
+            "movq       (%2, %%"FF_REG_a"), %%mm0       \n\t" // U(0)
             "movq                    %%mm0, %%mm2       \n\t" // U(0)
-            "movq          (%3, %%"REG_a"), %%mm1       \n\t" // V(0)
+            "movq       (%3, %%"FF_REG_a"), %%mm1       \n\t" // V(0)
             "punpcklbw               %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
             "punpckhbw               %%mm1, %%mm2       \n\t" // UVUV UVUV(8)
 
-            "movq        (%1, %%"REG_a",2), %%mm3       \n\t" // Y(0)
-            "movq       8(%1, %%"REG_a",2), %%mm5       \n\t" // Y(8)
+            "movq     (%1, %%"FF_REG_a",2), %%mm3       \n\t" // Y(0)
+            "movq    8(%1, %%"FF_REG_a",2), %%mm5       \n\t" // Y(8)
             "movq                    %%mm3, %%mm4       \n\t" // Y(0)
             "movq                    %%mm5, %%mm6       \n\t" // Y(8)
             "punpcklbw               %%mm0, %%mm3       \n\t" // YUYV YUYV(0)
@@ -1194,16 +1194,16 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
             "punpcklbw               %%mm2, %%mm5       \n\t" // YUYV YUYV(8)
             "punpckhbw               %%mm2, %%mm6       \n\t" // YUYV YUYV(12)
 
-            MOVNTQ"                  %%mm3,   (%0, %%"REG_a", 4)    \n\t"
-            MOVNTQ"                  %%mm4,  8(%0, %%"REG_a", 4)    \n\t"
-            MOVNTQ"                  %%mm5, 16(%0, %%"REG_a", 4)    \n\t"
-            MOVNTQ"                  %%mm6, 24(%0, %%"REG_a", 4)    \n\t"
+            MOVNTQ"                  %%mm3,   (%0, %%"FF_REG_a", 4)    \n\t"
+            MOVNTQ"                  %%mm4,  8(%0, %%"FF_REG_a", 4)    \n\t"
+            MOVNTQ"                  %%mm5, 16(%0, %%"FF_REG_a", 4)    \n\t"
+            MOVNTQ"                  %%mm6, 24(%0, %%"FF_REG_a", 4)    \n\t"
 
-            "add                        $8, %%"REG_a"   \n\t"
-            "cmp                        %4, %%"REG_a"   \n\t"
-            " jb                        1b              \n\t"
+            "add                        $8, %%"FF_REG_a" \n\t"
+            "cmp                        %4, %%"FF_REG_a" \n\t"
+            " jb                        1b               \n\t"
             ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
-            : "%"REG_a
+            : "%"FF_REG_a
         );
         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
             usrc += chromStride;
@@ -1238,20 +1238,20 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
     for (y=0; y<height; y++) {
         //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
         __asm__ volatile(
-            "xor                %%"REG_a", %%"REG_a"    \n\t"
+            "xor             %%"FF_REG_a", %%"FF_REG_a" \n\t"
             ".p2align                   4               \n\t"
             "1:                                         \n\t"
-            PREFETCH"   32(%1, %%"REG_a", 2)            \n\t"
-            PREFETCH"   32(%2, %%"REG_a")               \n\t"
-            PREFETCH"   32(%3, %%"REG_a")               \n\t"
-            "movq         (%2, %%"REG_a"), %%mm0        \n\t" // U(0)
+            PREFETCH" 32(%1, %%"FF_REG_a", 2)           \n\t"
+            PREFETCH" 32(%2, %%"FF_REG_a")              \n\t"
+            PREFETCH" 32(%3, %%"FF_REG_a")              \n\t"
+            "movq      (%2, %%"FF_REG_a"), %%mm0        \n\t" // U(0)
             "movq                   %%mm0, %%mm2        \n\t" // U(0)
-            "movq         (%3, %%"REG_a"), %%mm1        \n\t" // V(0)
+            "movq      (%3, %%"FF_REG_a"), %%mm1        \n\t" // V(0)
             "punpcklbw              %%mm1, %%mm0        \n\t" // UVUV UVUV(0)
             "punpckhbw              %%mm1, %%mm2        \n\t" // UVUV UVUV(8)
 
-            "movq       (%1, %%"REG_a",2), %%mm3        \n\t" // Y(0)
-            "movq      8(%1, %%"REG_a",2), %%mm5        \n\t" // Y(8)
+            "movq    (%1, %%"FF_REG_a",2), %%mm3        \n\t" // Y(0)
+            "movq   8(%1, %%"FF_REG_a",2), %%mm5        \n\t" // Y(8)
             "movq                   %%mm0, %%mm4        \n\t" // Y(0)
             "movq                   %%mm2, %%mm6        \n\t" // Y(8)
             "punpcklbw              %%mm3, %%mm0        \n\t" // YUYV YUYV(0)
@@ -1259,16 +1259,16 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
             "punpcklbw              %%mm5, %%mm2        \n\t" // YUYV YUYV(8)
             "punpckhbw              %%mm5, %%mm6        \n\t" // YUYV YUYV(12)
 
-            MOVNTQ"                 %%mm0,   (%0, %%"REG_a", 4)     \n\t"
-            MOVNTQ"                 %%mm4,  8(%0, %%"REG_a", 4)     \n\t"
-            MOVNTQ"                 %%mm2, 16(%0, %%"REG_a", 4)     \n\t"
-            MOVNTQ"                 %%mm6, 24(%0, %%"REG_a", 4)     \n\t"
+            MOVNTQ"                 %%mm0,   (%0, %%"FF_REG_a", 4)     \n\t"
+            MOVNTQ"                 %%mm4,  8(%0, %%"FF_REG_a", 4)     \n\t"
+            MOVNTQ"                 %%mm2, 16(%0, %%"FF_REG_a", 4)     \n\t"
+            MOVNTQ"                 %%mm6, 24(%0, %%"FF_REG_a", 4)     \n\t"
 
-            "add                       $8, %%"REG_a"    \n\t"
-            "cmp                       %4, %%"REG_a"    \n\t"
+            "add                       $8, %%"FF_REG_a" \n\t"
+            "cmp                       %4, %%"FF_REG_a" \n\t"
             " jb                       1b               \n\t"
             ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
-            : "%"REG_a
+            : "%"FF_REG_a
         );
         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
             usrc += chromStride;
@@ -1326,14 +1326,14 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y+=2) {
         __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
+            "xor              %%"FF_REG_a", %%"FF_REG_a"\n\t"
             "pcmpeqw                 %%mm7, %%mm7       \n\t"
             "psrlw                      $8, %%mm7       \n\t" // FF,00,FF,00...
             ".p2align                    4              \n\t"
             "1:                \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
+            PREFETCH" 64(%0, %%"FF_REG_a", 4)           \n\t"
+            "movq    (%0, %%"FF_REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
+            "movq   8(%0, %%"FF_REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
             "movq                    %%mm0, %%mm2       \n\t" // YUYV YUYV(0)
             "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(4)
             "psrlw                      $8, %%mm0       \n\t" // U0V0 U0V0(0)
@@ -1343,10 +1343,10 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
             "packuswb                %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
             "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(0)
 
-            MOVNTQ"                  %%mm2, (%1, %%"REG_a", 2)  \n\t"
+            MOVNTQ"                  %%mm2, (%1, %%"FF_REG_a", 2) \n\t"
 
-            "movq     16(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12)
+            "movq  16(%0, %%"FF_REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8)
+            "movq  24(%0, %%"FF_REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12)
             "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(8)
             "movq                    %%mm2, %%mm4       \n\t" // YUYV YUYV(12)
             "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(8)
@@ -1356,7 +1356,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
             "packuswb                %%mm2, %%mm1       \n\t" // UVUV UVUV(8)
             "packuswb                %%mm4, %%mm3       \n\t" // YYYY YYYY(8)
 
-            MOVNTQ"                  %%mm3, 8(%1, %%"REG_a", 2) \n\t"
+            MOVNTQ"                  %%mm3, 8(%1, %%"FF_REG_a", 2) \n\t"
 
             "movq                    %%mm0, %%mm2       \n\t" // UVUV UVUV(0)
             "movq                    %%mm1, %%mm3       \n\t" // UVUV UVUV(8)
@@ -1367,28 +1367,28 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
             "packuswb                %%mm1, %%mm0       \n\t" // VVVV VVVV(0)
             "packuswb                %%mm3, %%mm2       \n\t" // UUUU UUUU(0)
 
-            MOVNTQ"                  %%mm0, (%3, %%"REG_a")     \n\t"
-            MOVNTQ"                  %%mm2, (%2, %%"REG_a")     \n\t"
+            MOVNTQ"                  %%mm0, (%3, %%"FF_REG_a")     \n\t"
+            MOVNTQ"                  %%mm2, (%2, %%"FF_REG_a")     \n\t"
 
-            "add                        $8, %%"REG_a"   \n\t"
-            "cmp                        %4, %%"REG_a"   \n\t"
-            " jb                        1b              \n\t"
+            "add                        $8, %%"FF_REG_a" \n\t"
+            "cmp                        %4, %%"FF_REG_a" \n\t"
+            " jb                        1b               \n\t"
             ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
+            : "memory", "%"FF_REG_a
         );
 
         ydst += lumStride;
         src  += srcStride;
 
         __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
+            "xor              %%"FF_REG_a", %%"FF_REG_a"\n\t"
             ".p2align                    4              \n\t"
             "1:                                         \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
-            "movq     16(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12)
+            PREFETCH" 64(%0, %%"FF_REG_a", 4)           \n\t"
+            "movq    (%0, %%"FF_REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
+            "movq   8(%0, %%"FF_REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
+            "movq  16(%0, %%"FF_REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8)
+            "movq  24(%0, %%"FF_REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12)
             "pand                    %%mm7, %%mm0       \n\t" // Y0Y0 Y0Y0(0)
             "pand                    %%mm7, %%mm1       \n\t" // Y0Y0 Y0Y0(4)
             "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(8)
@@ -1396,15 +1396,15 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
             "packuswb                %%mm1, %%mm0       \n\t" // YYYY YYYY(0)
             "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(8)
 
-            MOVNTQ"                  %%mm0,  (%1, %%"REG_a", 2) \n\t"
-            MOVNTQ"                  %%mm2, 8(%1, %%"REG_a", 2) \n\t"
+            MOVNTQ"                  %%mm0,  (%1, %%"FF_REG_a", 2) \n\t"
+            MOVNTQ"                  %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t"
 
-            "add                        $8, %%"REG_a"   \n\t"
-            "cmp                        %4, %%"REG_a"   \n\t"
+            "add                        $8, %%"FF_REG_a"\n\t"
+            "cmp                        %4, %%"FF_REG_a"\n\t"
             " jb                        1b              \n\t"
 
             ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
+            : "memory", "%"FF_REG_a
         );
         udst += chromStride;
         vdst += chromStride;
@@ -1438,23 +1438,23 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
 
         if (mmxSize) {
         __asm__ volatile(
-            "mov           %4, %%"REG_a"            \n\t"
+            "mov                       %4, %%"FF_REG_a" \n\t"
             "movq        "MANGLE(mmx_ff)", %%mm0    \n\t"
-            "movq         (%0, %%"REG_a"), %%mm4    \n\t"
+            "movq      (%0, %%"FF_REG_a"), %%mm4    \n\t"
             "movq                   %%mm4, %%mm2    \n\t"
             "psllq                     $8, %%mm4    \n\t"
             "pand                   %%mm0, %%mm2    \n\t"
             "por                    %%mm2, %%mm4    \n\t"
-            "movq         (%1, %%"REG_a"), %%mm5    \n\t"
+            "movq      (%1, %%"FF_REG_a"), %%mm5    \n\t"
             "movq                   %%mm5, %%mm3    \n\t"
             "psllq                     $8, %%mm5    \n\t"
             "pand                   %%mm0, %%mm3    \n\t"
             "por                    %%mm3, %%mm5    \n\t"
             "1:                                     \n\t"
-            "movq         (%0, %%"REG_a"), %%mm0    \n\t"
-            "movq         (%1, %%"REG_a"), %%mm1    \n\t"
-            "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
-            "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
+            "movq      (%0, %%"FF_REG_a"), %%mm0    \n\t"
+            "movq      (%1, %%"FF_REG_a"), %%mm1    \n\t"
+            "movq     1(%0, %%"FF_REG_a"), %%mm2    \n\t"
+            "movq     1(%1, %%"FF_REG_a"), %%mm3    \n\t"
             PAVGB"                  %%mm0, %%mm5    \n\t"
             PAVGB"                  %%mm0, %%mm3    \n\t"
             PAVGB"                  %%mm0, %%mm5    \n\t"
@@ -1469,19 +1469,19 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
             "punpckhbw              %%mm3, %%mm7    \n\t"
             "punpcklbw              %%mm2, %%mm4    \n\t"
             "punpckhbw              %%mm2, %%mm6    \n\t"
-            MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
-            MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
-            MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
-            MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
-            "add                       $8, %%"REG_a"            \n\t"
-            "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
-            "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
-            " js                       1b                       \n\t"
+            MOVNTQ"                 %%mm5,  (%2, %%"FF_REG_a", 2)  \n\t"
+            MOVNTQ"                 %%mm7, 8(%2, %%"FF_REG_a", 2)  \n\t"
+            MOVNTQ"                 %%mm4,  (%3, %%"FF_REG_a", 2)  \n\t"
+            MOVNTQ"                 %%mm6, 8(%3, %%"FF_REG_a", 2)  \n\t"
+            "add                       $8, %%"FF_REG_a"            \n\t"
+            "movq    -1(%0, %%"FF_REG_a"), %%mm4    \n\t"
+            "movq    -1(%1, %%"FF_REG_a"), %%mm5    \n\t"
+            " js                       1b           \n\t"
             :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
                "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
                "g" (-mmxSize)
                NAMED_CONSTRAINTS_ADD(mmx_ff)
-            : "%"REG_a
+            : "%"FF_REG_a
         );
         } else {
             mmxSize = 1;
@@ -1532,14 +1532,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
     const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y+=2) {
         __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
+            "xor          %%"FF_REG_a", %%"FF_REG_a" \n\t"
             "pcmpeqw             %%mm7, %%mm7   \n\t"
             "psrlw                  $8, %%mm7   \n\t" // FF,00,FF,00...
             ".p2align                4          \n\t"
             "1:                                 \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4)
+            PREFETCH" 64(%0, %%"FF_REG_a", 4)          \n\t"
+            "movq       (%0, %%"FF_REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0)
+            "movq      8(%0, %%"FF_REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4)
             "movq                %%mm0, %%mm2   \n\t" // UYVY UYVY(0)
             "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(4)
             "pand                %%mm7, %%mm0   \n\t" // U0V0 U0V0(0)
@@ -1549,10 +1549,10 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
             "packuswb            %%mm1, %%mm0   \n\t" // UVUV UVUV(0)
             "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(0)
 
-            MOVNTQ"              %%mm2,  (%1, %%"REG_a", 2) \n\t"
+            MOVNTQ"              %%mm2,  (%1, %%"FF_REG_a", 2) \n\t"
 
-            "movq     16(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12)
+            "movq     16(%0, %%"FF_REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8)
+            "movq     24(%0, %%"FF_REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12)
             "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(8)
             "movq                %%mm2, %%mm4   \n\t" // UYVY UYVY(12)
             "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(8)
@@ -1562,7 +1562,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
             "packuswb            %%mm2, %%mm1   \n\t" // UVUV UVUV(8)
             "packuswb            %%mm4, %%mm3   \n\t" // YYYY YYYY(8)
 
-            MOVNTQ"              %%mm3, 8(%1, %%"REG_a", 2) \n\t"
+            MOVNTQ"              %%mm3, 8(%1, %%"FF_REG_a", 2) \n\t"
 
             "movq                %%mm0, %%mm2   \n\t" // UVUV UVUV(0)
             "movq                %%mm1, %%mm3   \n\t" // UVUV UVUV(8)
@@ -1573,28 +1573,28 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
             "packuswb            %%mm1, %%mm0   \n\t" // VVVV VVVV(0)
             "packuswb            %%mm3, %%mm2   \n\t" // UUUU UUUU(0)
 
-            MOVNTQ"              %%mm0, (%3, %%"REG_a") \n\t"
-            MOVNTQ"              %%mm2, (%2, %%"REG_a") \n\t"
+            MOVNTQ"              %%mm0, (%3, %%"FF_REG_a") \n\t"
+            MOVNTQ"              %%mm2, (%2, %%"FF_REG_a") \n\t"
 
-            "add                    $8, %%"REG_a"   \n\t"
-            "cmp                    %4, %%"REG_a"   \n\t"
-            " jb                    1b          \n\t"
+            "add                    $8, %%"FF_REG_a" \n\t"
+            "cmp                    %4, %%"FF_REG_a" \n\t"
+            " jb                    1b               \n\t"
             ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
+            : "memory", "%"FF_REG_a
         );
 
         ydst += lumStride;
         src  += srcStride;
 
         __asm__ volatile(
-            "xor                 %%"REG_a", %%"REG_a"   \n\t"
-            ".p2align                    4              \n\t"
-            "1:                                 \n\t"
-            PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
-            "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // YUYV YUYV(0)
-            "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // YUYV YUYV(4)
-            "movq     16(%0, %%"REG_a", 4), %%mm2   \n\t" // YUYV YUYV(8)
-            "movq     24(%0, %%"REG_a", 4), %%mm3   \n\t" // YUYV YUYV(12)
+            "xor          %%"FF_REG_a", %%"FF_REG_a"  \n\t"
+            ".p2align                4                \n\t"
+            "1:                                       \n\t"
+            PREFETCH" 64(%0, %%"FF_REG_a", 4)         \n\t"
+            "movq       (%0, %%"FF_REG_a", 4), %%mm0  \n\t" // YUYV YUYV(0)
+            "movq      8(%0, %%"FF_REG_a", 4), %%mm1  \n\t" // YUYV YUYV(4)
+            "movq     16(%0, %%"FF_REG_a", 4), %%mm2  \n\t" // YUYV YUYV(8)
+            "movq     24(%0, %%"FF_REG_a", 4), %%mm3  \n\t" // YUYV YUYV(12)
             "psrlw                  $8, %%mm0   \n\t" // Y0Y0 Y0Y0(0)
             "psrlw                  $8, %%mm1   \n\t" // Y0Y0 Y0Y0(4)
             "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(8)
@@ -1602,15 +1602,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
             "packuswb            %%mm1, %%mm0   \n\t" // YYYY YYYY(0)
             "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(8)
 
-            MOVNTQ"              %%mm0,  (%1, %%"REG_a", 2) \n\t"
-            MOVNTQ"              %%mm2, 8(%1, %%"REG_a", 2) \n\t"
+            MOVNTQ"              %%mm0,  (%1, %%"FF_REG_a", 2) \n\t"
+            MOVNTQ"              %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t"
 
-            "add                    $8, %%"REG_a"   \n\t"
-            "cmp                    %4, %%"REG_a"   \n\t"
-            " jb                    1b          \n\t"
+            "add                    $8, %%"FF_REG_a" \n\t"
+            "cmp                    %4, %%"FF_REG_a" \n\t"
+            " jb                    1b               \n\t"
 
             ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
-            : "memory", "%"REG_a
+            : "memory", "%"FF_REG_a
         );
         udst += chromStride;
         vdst += chromStride;
@@ -1655,20 +1655,20 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
         int i;
         for (i=0; i<2; i++) {
             __asm__ volatile(
-                "mov                        %2, %%"REG_a"   \n\t"
+                "mov                        %2, %%"FF_REG_a"\n\t"
                 "movq          "BGR2Y_IDX"(%3), %%mm6       \n\t"
                 "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
                 "pxor                    %%mm7, %%mm7       \n\t"
-                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
+                "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t"
                 ".p2align                    4              \n\t"
                 "1:                                         \n\t"
-                PREFETCH"    64(%0, %%"REG_d")              \n\t"
-                "movd          (%0, %%"REG_d"), %%mm0       \n\t"
-                "movd         3(%0, %%"REG_d"), %%mm1       \n\t"
+                PREFETCH" 64(%0, %%"FF_REG_d")              \n\t"
+                "movd       (%0, %%"FF_REG_d"), %%mm0       \n\t"
+                "movd      3(%0, %%"FF_REG_d"), %%mm1       \n\t"
                 "punpcklbw               %%mm7, %%mm0       \n\t"
                 "punpcklbw               %%mm7, %%mm1       \n\t"
-                "movd         6(%0, %%"REG_d"), %%mm2       \n\t"
-                "movd         9(%0, %%"REG_d"), %%mm3       \n\t"
+                "movd      6(%0, %%"FF_REG_d"), %%mm2       \n\t"
+                "movd      9(%0, %%"FF_REG_d"), %%mm3       \n\t"
                 "punpcklbw               %%mm7, %%mm2       \n\t"
                 "punpcklbw               %%mm7, %%mm3       \n\t"
                 "pmaddwd                 %%mm6, %%mm0       \n\t"
@@ -1686,12 +1686,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
                 "packssdw                %%mm2, %%mm0       \n\t"
                 "psraw                      $7, %%mm0       \n\t"
 
-                "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
-                "movd        15(%0, %%"REG_d"), %%mm1       \n\t"
+                "movd     12(%0, %%"FF_REG_d"), %%mm4       \n\t"
+                "movd     15(%0, %%"FF_REG_d"), %%mm1       \n\t"
                 "punpcklbw               %%mm7, %%mm4       \n\t"
                 "punpcklbw               %%mm7, %%mm1       \n\t"
-                "movd        18(%0, %%"REG_d"), %%mm2       \n\t"
-                "movd        21(%0, %%"REG_d"), %%mm3       \n\t"
+                "movd     18(%0, %%"FF_REG_d"), %%mm2       \n\t"
+                "movd     21(%0, %%"FF_REG_d"), %%mm3       \n\t"
                 "punpcklbw               %%mm7, %%mm2       \n\t"
                 "punpcklbw               %%mm7, %%mm3       \n\t"
                 "pmaddwd                 %%mm6, %%mm4       \n\t"
@@ -1706,40 +1706,40 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
                 "packssdw                %%mm3, %%mm2       \n\t"
                 "pmaddwd                 %%mm5, %%mm4       \n\t"
                 "pmaddwd                 %%mm5, %%mm2       \n\t"
-                "add                       $24, %%"REG_d"   \n\t"
+                "add                       $24, %%"FF_REG_d"\n\t"
                 "packssdw                %%mm2, %%mm4       \n\t"
                 "psraw                      $7, %%mm4       \n\t"
 
                 "packuswb                %%mm4, %%mm0       \n\t"
                 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0    \n\t"
 
-                MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
-                "add                        $8,      %%"REG_a"  \n\t"
-                " js                        1b                  \n\t"
+                MOVNTQ"                  %%mm0, (%1, %%"FF_REG_a") \n\t"
+                "add                        $8,      %%"FF_REG_a"  \n\t"
+                " js                        1b                     \n\t"
                 : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv)
                   NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2YOffset)
-                : "%"REG_a, "%"REG_d
+                : "%"FF_REG_a, "%"FF_REG_d
             );
             ydst += lumStride;
             src  += srcStride;
         }
         src -= srcStride*2;
         __asm__ volatile(
-            "mov                        %4, %%"REG_a"   \n\t"
+            "mov                        %4, %%"FF_REG_a"\n\t"
             "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
             "movq          "BGR2U_IDX"(%5), %%mm6       \n\t"
             "pxor                    %%mm7, %%mm7       \n\t"
-            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
-            "add                 %%"REG_d", %%"REG_d"   \n\t"
+            "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t"
+            "add              %%"FF_REG_d", %%"FF_REG_d"\n\t"
             ".p2align                    4              \n\t"
             "1:                                         \n\t"
-            PREFETCH"    64(%0, %%"REG_d")              \n\t"
-            PREFETCH"    64(%1, %%"REG_d")              \n\t"
+            PREFETCH" 64(%0, %%"FF_REG_d")              \n\t"
+            PREFETCH" 64(%1, %%"FF_REG_d")              \n\t"
 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
-            "movq          (%0, %%"REG_d"), %%mm0       \n\t"
-            "movq          (%1, %%"REG_d"), %%mm1       \n\t"
-            "movq         6(%0, %%"REG_d"), %%mm2       \n\t"
-            "movq         6(%1, %%"REG_d"), %%mm3       \n\t"
+            "movq       (%0, %%"FF_REG_d"), %%mm0       \n\t"
+            "movq       (%1, %%"FF_REG_d"), %%mm1       \n\t"
+            "movq      6(%0, %%"FF_REG_d"), %%mm2       \n\t"
+            "movq      6(%1, %%"FF_REG_d"), %%mm3       \n\t"
             PAVGB"                   %%mm1, %%mm0       \n\t"
             PAVGB"                   %%mm3, %%mm2       \n\t"
             "movq                    %%mm0, %%mm1       \n\t"
@@ -1751,10 +1751,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             "punpcklbw               %%mm7, %%mm0       \n\t"
             "punpcklbw               %%mm7, %%mm2       \n\t"
 #else
-            "movd          (%0, %%"REG_d"), %%mm0       \n\t"
-            "movd          (%1, %%"REG_d"), %%mm1       \n\t"
-            "movd         3(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd         3(%1, %%"REG_d"), %%mm3       \n\t"
+            "movd       (%0, %%"FF_REG_d"), %%mm0       \n\t"
+            "movd       (%1, %%"FF_REG_d"), %%mm1       \n\t"
+            "movd      3(%0, %%"FF_REG_d"), %%mm2       \n\t"
+            "movd      3(%1, %%"FF_REG_d"), %%mm3       \n\t"
             "punpcklbw               %%mm7, %%mm0       \n\t"
             "punpcklbw               %%mm7, %%mm1       \n\t"
             "punpcklbw               %%mm7, %%mm2       \n\t"
@@ -1762,10 +1762,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             "paddw                   %%mm1, %%mm0       \n\t"
             "paddw                   %%mm3, %%mm2       \n\t"
             "paddw                   %%mm2, %%mm0       \n\t"
-            "movd         6(%0, %%"REG_d"), %%mm4       \n\t"
-            "movd         6(%1, %%"REG_d"), %%mm1       \n\t"
-            "movd         9(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd         9(%1, %%"REG_d"), %%mm3       \n\t"
+            "movd      6(%0, %%"FF_REG_d"), %%mm4       \n\t"
+            "movd      6(%1, %%"FF_REG_d"), %%mm1       \n\t"
+            "movd      9(%0, %%"FF_REG_d"), %%mm2       \n\t"
+            "movd      9(%1, %%"FF_REG_d"), %%mm3       \n\t"
             "punpcklbw               %%mm7, %%mm4       \n\t"
             "punpcklbw               %%mm7, %%mm1       \n\t"
             "punpcklbw               %%mm7, %%mm2       \n\t"
@@ -1795,10 +1795,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             "psraw                      $7, %%mm0       \n\t"
 
 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
-            "movq        12(%0, %%"REG_d"), %%mm4       \n\t"
-            "movq        12(%1, %%"REG_d"), %%mm1       \n\t"
-            "movq        18(%0, %%"REG_d"), %%mm2       \n\t"
-            "movq        18(%1, %%"REG_d"), %%mm3       \n\t"
+            "movq     12(%0, %%"FF_REG_d"), %%mm4       \n\t"
+            "movq     12(%1, %%"FF_REG_d"), %%mm1       \n\t"
+            "movq     18(%0, %%"FF_REG_d"), %%mm2       \n\t"
+            "movq     18(%1, %%"FF_REG_d"), %%mm3       \n\t"
             PAVGB"                   %%mm1, %%mm4       \n\t"
             PAVGB"                   %%mm3, %%mm2       \n\t"
             "movq                    %%mm4, %%mm1       \n\t"
@@ -1810,10 +1810,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             "punpcklbw               %%mm7, %%mm4       \n\t"
             "punpcklbw               %%mm7, %%mm2       \n\t"
 #else
-            "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
-            "movd        12(%1, %%"REG_d"), %%mm1       \n\t"
-            "movd        15(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd        15(%1, %%"REG_d"), %%mm3       \n\t"
+            "movd     12(%0, %%"FF_REG_d"), %%mm4       \n\t"
+            "movd     12(%1, %%"FF_REG_d"), %%mm1       \n\t"
+            "movd     15(%0, %%"FF_REG_d"), %%mm2       \n\t"
+            "movd     15(%1, %%"FF_REG_d"), %%mm3       \n\t"
             "punpcklbw               %%mm7, %%mm4       \n\t"
             "punpcklbw               %%mm7, %%mm1       \n\t"
             "punpcklbw               %%mm7, %%mm2       \n\t"
@@ -1821,10 +1821,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             "paddw                   %%mm1, %%mm4       \n\t"
             "paddw                   %%mm3, %%mm2       \n\t"
             "paddw                   %%mm2, %%mm4       \n\t"
-            "movd        18(%0, %%"REG_d"), %%mm5       \n\t"
-            "movd        18(%1, %%"REG_d"), %%mm1       \n\t"
-            "movd        21(%0, %%"REG_d"), %%mm2       \n\t"
-            "movd        21(%1, %%"REG_d"), %%mm3       \n\t"
+            "movd     18(%0, %%"FF_REG_d"), %%mm5       \n\t"
+            "movd     18(%1, %%"FF_REG_d"), %%mm1       \n\t"
+            "movd     21(%0, %%"FF_REG_d"), %%mm2       \n\t"
+            "movd     21(%1, %%"FF_REG_d"), %%mm3       \n\t"
             "punpcklbw               %%mm7, %%mm5       \n\t"
             "punpcklbw               %%mm7, %%mm1       \n\t"
             "punpcklbw               %%mm7, %%mm2       \n\t"
@@ -1851,7 +1851,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             "packssdw                %%mm3, %%mm1       \n\t"
             "pmaddwd                 %%mm5, %%mm4       \n\t"
             "pmaddwd                 %%mm5, %%mm1       \n\t"
-            "add                       $24, %%"REG_d"   \n\t"
+            "add                       $24, %%"FF_REG_d"\n\t"
             "packssdw                %%mm1, %%mm4       \n\t" // V3 V2 U3 U2
             "psraw                      $7, %%mm4       \n\t"
 
@@ -1860,14 +1860,14 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             "punpckhdq               %%mm4, %%mm1           \n\t"
             "packsswb                %%mm1, %%mm0           \n\t"
             "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0         \n\t"
-            "movd                    %%mm0, (%2, %%"REG_a") \n\t"
-            "punpckhdq               %%mm0, %%mm0           \n\t"
-            "movd                    %%mm0, (%3, %%"REG_a") \n\t"
-            "add                        $4, %%"REG_a"       \n\t"
-            " js                        1b                  \n\t"
+            "movd                    %%mm0, (%2, %%"FF_REG_a") \n\t"
+            "punpckhdq               %%mm0, %%mm0              \n\t"
+            "movd                    %%mm0, (%3, %%"FF_REG_a") \n\t"
+            "add                        $4, %%"FF_REG_a"       \n\t"
+            " js                        1b              \n\t"
             : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv)
               NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2UVOffset)
-            : "%"REG_a, "%"REG_d
+            : "%"FF_REG_a, "%"FF_REG_d
         );
 
         udst += chromStride;
@@ -1898,49 +1898,49 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
 #if COMPILE_TEMPLATE_SSE2
             if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) {
         __asm__(
-            "xor              %%"REG_a", %%"REG_a"  \n\t"
+            "xor              %%"FF_REG_a", %%"FF_REG_a"  \n\t"
             "1:                                     \n\t"
-            PREFETCH" 64(%1, %%"REG_a")             \n\t"
-            PREFETCH" 64(%2, %%"REG_a")             \n\t"
-            "movdqa     (%1, %%"REG_a"), %%xmm0     \n\t"
-            "movdqa     (%1, %%"REG_a"), %%xmm1     \n\t"
-            "movdqa     (%2, %%"REG_a"), %%xmm2     \n\t"
+            PREFETCH" 64(%1, %%"FF_REG_a")          \n\t"
+            PREFETCH" 64(%2, %%"FF_REG_a")          \n\t"
+            "movdqa  (%1, %%"FF_REG_a"), %%xmm0     \n\t"
+            "movdqa  (%1, %%"FF_REG_a"), %%xmm1     \n\t"
+            "movdqa  (%2, %%"FF_REG_a"), %%xmm2     \n\t"
             "punpcklbw           %%xmm2, %%xmm0     \n\t"
             "punpckhbw           %%xmm2, %%xmm1     \n\t"
-            "movntdq             %%xmm0,   (%0, %%"REG_a", 2)   \n\t"
-            "movntdq             %%xmm1, 16(%0, %%"REG_a", 2)   \n\t"
-            "add                    $16, %%"REG_a"  \n\t"
-            "cmp                     %3, %%"REG_a"  \n\t"
+            "movntdq             %%xmm0,   (%0, %%"FF_REG_a", 2) \n\t"
+            "movntdq             %%xmm1, 16(%0, %%"FF_REG_a", 2) \n\t"
+            "add                    $16, %%"FF_REG_a"            \n\t"
+            "cmp                     %3, %%"FF_REG_a"            \n\t"
             " jb                     1b             \n\t"
             ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
-            : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"REG_a
+            : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"FF_REG_a
         );
             } else
 #endif
         __asm__(
-            "xor %%"REG_a", %%"REG_a"               \n\t"
+            "xor %%"FF_REG_a", %%"FF_REG_a"         \n\t"
             "1:                                     \n\t"
-            PREFETCH" 64(%1, %%"REG_a")             \n\t"
-            PREFETCH" 64(%2, %%"REG_a")             \n\t"
-            "movq       (%1, %%"REG_a"), %%mm0      \n\t"
-            "movq      8(%1, %%"REG_a"), %%mm2      \n\t"
+            PREFETCH" 64(%1, %%"FF_REG_a")          \n\t"
+            PREFETCH" 64(%2, %%"FF_REG_a")          \n\t"
+            "movq    (%1, %%"FF_REG_a"), %%mm0      \n\t"
+            "movq   8(%1, %%"FF_REG_a"), %%mm2      \n\t"
             "movq                 %%mm0, %%mm1      \n\t"
             "movq                 %%mm2, %%mm3      \n\t"
-            "movq       (%2, %%"REG_a"), %%mm4      \n\t"
-            "movq      8(%2, %%"REG_a"), %%mm5      \n\t"
+            "movq    (%2, %%"FF_REG_a"), %%mm4      \n\t"
+            "movq   8(%2, %%"FF_REG_a"), %%mm5      \n\t"
             "punpcklbw            %%mm4, %%mm0      \n\t"
             "punpckhbw            %%mm4, %%mm1      \n\t"
             "punpcklbw            %%mm5, %%mm2      \n\t"
             "punpckhbw            %%mm5, %%mm3      \n\t"
-            MOVNTQ"               %%mm0,   (%0, %%"REG_a", 2)   \n\t"
-            MOVNTQ"               %%mm1,  8(%0, %%"REG_a", 2)   \n\t"
-            MOVNTQ"               %%mm2, 16(%0, %%"REG_a", 2)   \n\t"
-            MOVNTQ"               %%mm3, 24(%0, %%"REG_a", 2)   \n\t"
-            "add                    $16, %%"REG_a"  \n\t"
-            "cmp                     %3, %%"REG_a"  \n\t"
-            " jb                     1b             \n\t"
+            MOVNTQ"               %%mm0,   (%0, %%"FF_REG_a", 2) \n\t"
+            MOVNTQ"               %%mm1,  8(%0, %%"FF_REG_a", 2) \n\t"
+            MOVNTQ"               %%mm2, 16(%0, %%"FF_REG_a", 2) \n\t"
+            MOVNTQ"               %%mm3, 24(%0, %%"FF_REG_a", 2) \n\t"
+            "add                    $16, %%"FF_REG_a"            \n\t"
+            "cmp                     %3, %%"FF_REG_a"            \n\t"
+            " jb                     1b                          \n\t"
             ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
-            : "memory", "%"REG_a
+            : "memory", "%"FF_REG_a
         );
 
         }
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 7356692..d68e46b 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -220,16 +220,16 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
         "movdqa     %%xmm3, %%xmm4 \n\t" \
         "movdqa     %%xmm3, %%xmm7 \n\t" \
         "movl           %3, %%ecx  \n\t" \
-        "mov                                 %0, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+        "mov                                 %0, %%"FF_REG_d"        \n\t"\
+        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"     \n\t"\
         ".p2align                             4             \n\t" /* FIXME Unroll? */\
         "1:                                                 \n\t"\
-        "movddup                  8(%%"REG_d"), %%xmm0      \n\t" /* filterCoeff */\
-        "movdqa              (%%"REG_S", %%"REG_c", 2), %%xmm2      \n\t" /* srcData */\
-        "movdqa            16(%%"REG_S", %%"REG_c", 2), %%xmm5      \n\t" /* srcData */\
-        "add                                $16, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        "test                         %%"REG_S", %%"REG_S"  \n\t"\
+        "movddup                  8(%%"FF_REG_d"), %%xmm0   \n\t" /* filterCoeff */\
+        "movdqa              (%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm2 \n\t" /* srcData */\
+        "movdqa            16(%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm5 \n\t" /* srcData */\
+        "add                                $16, %%"FF_REG_d"        \n\t"\
+        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"     \n\t"\
+        "test                         %%"FF_REG_S", %%"FF_REG_S"     \n\t"\
         "pmulhw                           %%xmm0, %%xmm2      \n\t"\
         "pmulhw                           %%xmm0, %%xmm5      \n\t"\
         "paddw                            %%xmm2, %%xmm3      \n\t"\
@@ -238,13 +238,13 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
         "psraw                               $3, %%xmm3      \n\t"\
         "psraw                               $3, %%xmm4      \n\t"\
         "packuswb                         %%xmm4, %%xmm3      \n\t"\
-        "movntdq                          %%xmm3, (%1, %%"REG_c")\n\t"\
-        "add                         $16, %%"REG_c"         \n\t"\
-        "cmp                          %2, %%"REG_c"         \n\t"\
+        "movntdq                          %%xmm3, (%1, %%"FF_REG_c") \n\t"\
+        "add                         $16, %%"FF_REG_c"        \n\t"\
+        "cmp                          %2, %%"FF_REG_c"        \n\t"\
         "movdqa                   %%xmm7, %%xmm3            \n\t" \
         "movdqa                   %%xmm7, %%xmm4            \n\t" \
-        "mov                                 %0, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
+        "mov                                 %0, %%"FF_REG_d"        \n\t"\
+        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"     \n\t"\
         "jb                                  1b             \n\t"
 
     if (offset) {
@@ -259,7 +259,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
               "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
               "m"(filterSize), "m"(((uint64_t *) dither)[0])
               : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
-                "%"REG_d, "%"REG_S, "%"REG_c
+                "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
               );
     } else {
         __asm__ volatile(
@@ -269,7 +269,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
               "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
               "m"(filterSize), "m"(((uint64_t *) dither)[0])
               : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
-                "%"REG_d, "%"REG_S, "%"REG_c
+                "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
               );
     }
 }
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 3b38e98..1a10227 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -88,16 +88,16 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
         "movq    %%mm3, %%mm6\n\t"
         "movq    %%mm4, %%mm7\n\t"
         "movl %3, %%ecx\n\t"
-        "mov                                 %0, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        ".p2align                             4             \n\t" /* FIXME Unroll? */\
-        "1:                                                 \n\t"\
-        "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
-        "movq                (%%"REG_S", %%"REG_c", 2), %%mm2      \n\t" /* srcData */\
-        "movq               8(%%"REG_S", %%"REG_c", 2), %%mm5      \n\t" /* srcData */\
-        "add                                $16, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        "test                         %%"REG_S", %%"REG_S"  \n\t"\
+        "mov                                 %0, %%"FF_REG_d"       \n\t"\
+        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"    \n\t"\
+        ".p2align                             4                     \n\t" /* FIXME Unroll? */\
+        "1:                                                         \n\t"\
+        "movq                      8(%%"FF_REG_d"), %%mm0           \n\t" /* filterCoeff */\
+        "movq                (%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" /* srcData */\
+        "movq               8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" /* srcData */\
+        "add                                $16, %%"FF_REG_d"       \n\t"\
+        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"    \n\t"\
+        "test                         %%"FF_REG_S", %%"FF_REG_S"    \n\t"\
         "pmulhw                           %%mm0, %%mm2      \n\t"\
         "pmulhw                           %%mm0, %%mm5      \n\t"\
         "paddw                            %%mm2, %%mm3      \n\t"\
@@ -106,62 +106,62 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
         "psraw                               $3, %%mm3      \n\t"\
         "psraw                               $3, %%mm4      \n\t"\
         "packuswb                         %%mm4, %%mm3      \n\t"
-        MOVNTQ2 "                         %%mm3, (%1, %%"REG_c")\n\t"
-        "add                          $8, %%"REG_c"         \n\t"\
-        "cmp                          %2, %%"REG_c"         \n\t"\
+        MOVNTQ2 "                         %%mm3, (%1, %%"FF_REG_c")\n\t"
+        "add                          $8, %%"FF_REG_c"      \n\t"\
+        "cmp                          %2, %%"FF_REG_c"      \n\t"\
         "movq    %%mm6, %%mm3\n\t"
         "movq    %%mm7, %%mm4\n\t"
-        "mov                                 %0, %%"REG_d"  \n\t"\
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
-        "jb                                  1b             \n\t"\
+        "mov                                 %0, %%"FF_REG_d"     \n\t"\
+        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
+        "jb                                  1b                   \n\t"\
         :: "g" (filter),
            "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
-        : "%"REG_d, "%"REG_S, "%"REG_c
+        : "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
     );
 }
 
 #define YSCALEYUV2PACKEDX_UV \
     __asm__ volatile(\
-        "xor                   %%"REG_a", %%"REG_a"     \n\t"\
+        "xor                %%"FF_REG_a", %%"FF_REG_a"  \n\t"\
         ".p2align                      4                \n\t"\
         "nop                                            \n\t"\
         "1:                                             \n\t"\
-        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
-        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d"  \n\t"\
+        "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
         "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\
         "movq                      %%mm3, %%mm4         \n\t"\
         ".p2align                      4                \n\t"\
         "2:                                             \n\t"\
-        "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
-        "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
-        "add                          %6, %%"REG_S"     \n\t" \
-        "movq     (%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
-        "add                         $16, %%"REG_d"     \n\t"\
-        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+        "movq            8(%%"FF_REG_d"), %%mm0         \n\t" /* filterCoeff */\
+        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm2      \n\t" /* UsrcData */\
+        "add                          %6, %%"FF_REG_S"  \n\t" \
+        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm5      \n\t" /* VsrcData */\
+        "add                         $16, %%"FF_REG_d"  \n\t"\
+        "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
         "pmulhw                    %%mm0, %%mm2         \n\t"\
         "pmulhw                    %%mm0, %%mm5         \n\t"\
         "paddw                     %%mm2, %%mm3         \n\t"\
         "paddw                     %%mm5, %%mm4         \n\t"\
-        "test                  %%"REG_S", %%"REG_S"     \n\t"\
+        "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\
         " jnz                         2b                \n\t"\
 
 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
-    "lea                "offset"(%0), %%"REG_d"     \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "lea                "offset"(%0), %%"FF_REG_d"  \n\t"\
+    "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
     "movq      "VROUNDER_OFFSET"(%0), "#dst1"       \n\t"\
     "movq                    "#dst1", "#dst2"       \n\t"\
     ".p2align                      4                \n\t"\
     "2:                                             \n\t"\
-    "movq               8(%%"REG_d"), "#coeff"      \n\t" /* filterCoeff */\
-    "movq  (%%"REG_S", %%"REG_a", 2), "#src1"       \n\t" /* Y1srcData */\
-    "movq 8(%%"REG_S", %%"REG_a", 2), "#src2"       \n\t" /* Y2srcData */\
-    "add                         $16, %%"REG_d"            \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "movq            8(%%"FF_REG_d"), "#coeff"      \n\t" /* filterCoeff */\
+    "movq  (%%"FF_REG_S", %%"FF_REG_a", 2), "#src1" \n\t" /* Y1srcData */\
+    "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), "#src2" \n\t" /* Y2srcData */\
+    "add                         $16, %%"FF_REG_d"  \n\t"\
+    "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
     "pmulhw                 "#coeff", "#src1"       \n\t"\
     "pmulhw                 "#coeff", "#src2"       \n\t"\
     "paddw                   "#src1", "#dst1"       \n\t"\
     "paddw                   "#src2", "#dst2"       \n\t"\
-    "test                  %%"REG_S", %%"REG_S"     \n\t"\
+    "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\
     " jnz                         2b                \n\t"\
 
 #define YSCALEYUV2PACKEDX \
@@ -173,41 +173,41 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
             "m" (dummy), "m" (dummy), "m" (dummy),\
             "r" (dest), "m" (dstW_reg), "m"(uv_off) \
             NAMED_CONSTRAINTS_ADD(bF8,bFC) \
-        : "%"REG_a, "%"REG_d, "%"REG_S            \
+        : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_S            \
     );
 
 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
     __asm__ volatile(\
-        "xor %%"REG_a", %%"REG_a"                       \n\t"\
+        "xor %%"FF_REG_a", %%"FF_REG_a"                 \n\t"\
         ".p2align                      4                \n\t"\
         "nop                                            \n\t"\
         "1:                                             \n\t"\
-        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
-        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d"  \n\t"\
+        "mov              (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
         "pxor                      %%mm4, %%mm4         \n\t"\
         "pxor                      %%mm5, %%mm5         \n\t"\
         "pxor                      %%mm6, %%mm6         \n\t"\
         "pxor                      %%mm7, %%mm7         \n\t"\
         ".p2align                      4                \n\t"\
         "2:                                             \n\t"\
-        "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
-        "add                          %6, %%"REG_S"      \n\t" \
-        "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
-        "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
-        "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
+        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm0      \n\t" /* UsrcData */\
+        "add                          %6, %%"FF_REG_S"  \n\t" \
+        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm2      \n\t" /* VsrcData */\
+        "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
+        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm1      \n\t" /* UsrcData */\
         "movq                      %%mm0, %%mm3         \n\t"\
         "punpcklwd                 %%mm1, %%mm0         \n\t"\
         "punpckhwd                 %%mm1, %%mm3         \n\t"\
-        "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1         \n\t" /* filterCoeff */\
+        "movq "STR(APCK_COEF)"(%%"FF_REG_d"),%%mm1      \n\t" /* filterCoeff */\
         "pmaddwd                   %%mm1, %%mm0         \n\t"\
         "pmaddwd                   %%mm1, %%mm3         \n\t"\
         "paddd                     %%mm0, %%mm4         \n\t"\
         "paddd                     %%mm3, %%mm5         \n\t"\
-        "add                          %6, %%"REG_S"      \n\t" \
-        "movq     (%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
-        "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
-        "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
-        "test                  %%"REG_S", %%"REG_S"     \n\t"\
+        "add                          %6, %%"FF_REG_S"  \n\t" \
+        "movq  (%%"FF_REG_S", %%"FF_REG_a"), %%mm3      \n\t" /* VsrcData */\
+        "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
+        "add           $"STR(APCK_SIZE)", %%"FF_REG_d"  \n\t"\
+        "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\
         "movq                      %%mm2, %%mm0         \n\t"\
         "punpcklwd                 %%mm3, %%mm2         \n\t"\
         "punpckhwd                 %%mm3, %%mm0         \n\t"\
@@ -229,30 +229,30 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
         "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\
 
 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
-    "lea                "offset"(%0), %%"REG_d"     \n\t"\
-    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
+    "lea                "offset"(%0), %%"FF_REG_d"      \n\t"\
+    "mov                 (%%"FF_REG_d"), %%"FF_REG_S"   \n\t"\
     "pxor                      %%mm1, %%mm1         \n\t"\
     "pxor                      %%mm5, %%mm5         \n\t"\
     "pxor                      %%mm7, %%mm7         \n\t"\
     "pxor                      %%mm6, %%mm6         \n\t"\
     ".p2align                      4                \n\t"\
     "2:                                             \n\t"\
-    "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\
-    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\
-    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
-    "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\
+    "movq  (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm0       \n\t" /* Y1srcData */\
+    "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm2       \n\t" /* Y2srcData */\
+    "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S"   \n\t"\
+    "movq  (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm4       \n\t" /* Y1srcData */\
     "movq                      %%mm0, %%mm3         \n\t"\
     "punpcklwd                 %%mm4, %%mm0         \n\t"\
     "punpckhwd                 %%mm4, %%mm3         \n\t"\
-    "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
+    "movq "STR(APCK_COEF)"(%%"FF_REG_d"), %%mm4     \n\t" /* filterCoeff */\
     "pmaddwd                   %%mm4, %%mm0         \n\t"\
     "pmaddwd                   %%mm4, %%mm3         \n\t"\
     "paddd                     %%mm0, %%mm1         \n\t"\
     "paddd                     %%mm3, %%mm5         \n\t"\
-    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\
-    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
-    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
-    "test                  %%"REG_S", %%"REG_S"     \n\t"\
+    "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm3   \n\t" /* Y2srcData */\
+    "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
+    "add           $"STR(APCK_SIZE)", %%"FF_REG_d"  \n\t"\
+    "test               %%"FF_REG_S", %%"FF_REG_S"  \n\t"\
     "movq                      %%mm2, %%mm0         \n\t"\
     "punpcklwd                 %%mm3, %%mm2         \n\t"\
     "punpckhwd                 %%mm3, %%mm0         \n\t"\
@@ -359,13 +359,13 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
         "psraw                        $3, %%mm1         \n\t"
         "psraw                        $3, %%mm7         \n\t"
         "packuswb                  %%mm7, %%mm1         \n\t"
-        WRITEBGR32(%4, "%5", %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
+        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
         YSCALEYUV2PACKEDX_END
     } else {
         YSCALEYUV2PACKEDX_ACCURATE
         YSCALEYUV2RGBX
         "pcmpeqd %%mm7, %%mm7 \n\t"
-        WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
         YSCALEYUV2PACKEDX_END
     }
 }
@@ -388,13 +388,13 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
         "psraw                        $3, %%mm1         \n\t"
         "psraw                        $3, %%mm7         \n\t"
         "packuswb                  %%mm7, %%mm1         \n\t"
-        WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
         YSCALEYUV2PACKEDX_END
     } else {
         YSCALEYUV2PACKEDX
         YSCALEYUV2RGBX
         "pcmpeqd %%mm7, %%mm7 \n\t"
-        WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
         YSCALEYUV2PACKEDX_END
     }
 }
@@ -417,13 +417,13 @@ static void RENAME(yuv2bgr32_X)(SwsContext *c, const int16_t *lumFilter,
         "psraw                        $3, %%mm1         \n\t"
         "psraw                        $3, %%mm7         \n\t"
         "packuswb                  %%mm7, %%mm1         \n\t"
-        WRITEBGR32(%4, "%5", %%REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
         YSCALEYUV2PACKEDX_END
     } else {
         YSCALEYUV2PACKEDX
         YSCALEYUV2RGBX
         "pcmpeqd %%mm7, %%mm7 \n\t"
-        WRITEBGR32(%4, "%5", %%REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+        WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
         YSCALEYUV2PACKEDX_END
     }
 }
@@ -476,7 +476,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
     "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
     "paddusb "RED_DITHER"(%0), %%mm5\n\t"
 #endif
-    WRITERGB16(%4, "%5", %%REGa)
+    WRITERGB16(%4, "%5", %%FF_REGa)
     YSCALEYUV2PACKEDX_END
 }
 
@@ -500,7 +500,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
     "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
     "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
 #endif
-    WRITERGB16(%4, "%5", %%REGa)
+    WRITERGB16(%4, "%5", %%FF_REGa)
     YSCALEYUV2PACKEDX_END
 }
 
@@ -553,7 +553,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
     "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
     "paddusb "RED_DITHER"(%0), %%mm5\n\t"
 #endif
-    WRITERGB15(%4, "%5", %%REGa)
+    WRITERGB15(%4, "%5", %%FF_REGa)
     YSCALEYUV2PACKEDX_END
 }
 
@@ -577,7 +577,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
     "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
     "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
 #endif
-    WRITERGB15(%4, "%5", %%REGa)
+    WRITERGB15(%4, "%5", %%FF_REGa)
     YSCALEYUV2PACKEDX_END
 }
 
@@ -705,14 +705,14 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
     "pxor %%mm7, %%mm7 \n\t"
-    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
-    "add %4, %%"REG_c"                        \n\t"
-    WRITEBGR24(%%REGc, "%5", %%REGa)
+    "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c"\n\t" //FIXME optimize
+    "add %4, %%"FF_REG_c"                        \n\t"
+    WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
     :: "r" (&c->redDither),
        "m" (dummy), "m" (dummy), "m" (dummy),
        "r" (dest), "m" (dstW_reg), "m"(uv_off)
        NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
-    : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+    : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
     );
 }
 
@@ -729,15 +729,15 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
-    "pxor                    %%mm7, %%mm7       \n\t"
-    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
-    "add                        %4, %%"REG_c"   \n\t"
-    WRITEBGR24(%%REGc, "%5", %%REGa)
+    "pxor                    %%mm7, %%mm7              \n\t"
+    "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" //FIXME optimize
+    "add                        %4, %%"FF_REG_c"       \n\t"
+    WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
     :: "r" (&c->redDither),
        "m" (dummy), "m" (dummy), "m" (dummy),
        "r" (dest),  "m" (dstW_reg), "m"(uv_off)
        NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
-    : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
+    : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
     );
 }
 #endif /* HAVE_6REGS */
@@ -776,7 +776,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
     "psraw $3, %%mm4    \n\t"
     "psraw $3, %%mm1    \n\t"
     "psraw $3, %%mm7    \n\t"
-    WRITEYUY2(%4, "%5", %%REGa)
+    WRITEYUY2(%4, "%5", %%FF_REGa)
     YSCALEYUV2PACKEDX_END
 }
 
@@ -797,7 +797,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
     "psraw $3, %%mm4    \n\t"
     "psraw $3, %%mm1    \n\t"
     "psraw $3, %%mm7    \n\t"
-    WRITEYUY2(%4, "%5", %%REGa)
+    WRITEYUY2(%4, "%5", %%FF_REGa)
     YSCALEYUV2PACKEDX_END
 }
 
@@ -908,37 +908,37 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
         c->u_temp=(intptr_t)abuf0;
         c->v_temp=(intptr_t)abuf1;
         __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5)
+            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+            "mov        %4, %%"FF_REG_b"            \n\t"
+            "push %%"FF_REG_BP"                     \n\t"
+            YSCALEYUV2RGB(%%FF_REGBP, %5)
             "push                   %0              \n\t"
             "push                   %1              \n\t"
             "mov          "U_TEMP"(%5), %0          \n\t"
             "mov          "V_TEMP"(%5), %1          \n\t"
-            YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
+            YSCALEYUV2RGB_YA(%%FF_REGBP, %5, %0, %1)
             "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
             "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
             "packuswb            %%mm7, %%mm1       \n\t"
             "pop                    %1              \n\t"
             "pop                    %0              \n\t"
-            WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+            "pop %%"FF_REG_BP"                      \n\t"
+            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );
 #endif
     } else {
         __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5)
+            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+            "mov        %4, %%"FF_REG_b"            \n\t"
+            "push %%"FF_REG_BP"                     \n\t"
+            YSCALEYUV2RGB(%%FF_REGBP, %5)
             "pcmpeqd %%mm7, %%mm7                   \n\t"
-            WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+            "pop %%"FF_REG_BP"                      \n\t"
+            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );
@@ -954,14 +954,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
 
     __asm__ volatile(
-        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-        "mov        %4, %%"REG_b"               \n\t"
-        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
+        "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+        "mov           %4, %%"FF_REG_b"         \n\t"
+        "push %%"FF_REG_BP"                     \n\t"
+        YSCALEYUV2RGB(%%FF_REGBP, %5)
         "pxor    %%mm7, %%mm7                   \n\t"
-        WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-        "pop %%"REG_BP"                         \n\t"
-        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+        "pop %%"FF_REG_BP"                      \n\t"
+        "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
            "a" (&c->redDither)
            NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
@@ -977,20 +977,20 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
 
     __asm__ volatile(
-        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-        "mov        %4, %%"REG_b"               \n\t"
-        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
+        "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+        "mov        %4, %%"FF_REG_b"            \n\t"
+        "push %%"FF_REG_BP"                     \n\t"
+        YSCALEYUV2RGB(%%FF_REGBP, %5)
         "pxor    %%mm7, %%mm7                   \n\t"
         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-        "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+        "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
         "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+        "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
 #endif
-        WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-        "pop %%"REG_BP"                         \n\t"
-        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+        "pop %%"FF_REG_BP"                      \n\t"
+        "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
            "a" (&c->redDither)
            NAMED_CONSTRAINTS_ADD(bF8)
@@ -1006,20 +1006,20 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
 
     __asm__ volatile(
-        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-        "mov        %4, %%"REG_b"               \n\t"
-        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5)
+        "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+        "mov           %4, %%"FF_REG_b"         \n\t"
+        "push %%"FF_REG_BP"                     \n\t"
+        YSCALEYUV2RGB(%%FF_REGBP, %5)
         "pxor    %%mm7, %%mm7                   \n\t"
         /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-        "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+        "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
         "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-        "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+        "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
 #endif
-        WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-        "pop %%"REG_BP"                         \n\t"
-        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+        "pop %%"FF_REG_BP"                      \n\t"
+        "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
            "a" (&c->redDither)
            NAMED_CONSTRAINTS_ADD(bF8,bFC)
@@ -1075,13 +1075,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
 
     __asm__ volatile(
-        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-        "mov %4, %%"REG_b"                        \n\t"
-        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2PACKED(%%REGBP, %5)
-        WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-        "pop %%"REG_BP"                         \n\t"
-        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+        "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+        "mov           %4, %%"FF_REG_b"         \n\t"
+        "push %%"FF_REG_BP"                     \n\t"
+        YSCALEYUV2PACKED(%%FF_REGBP, %5)
+        WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+        "pop %%"FF_REG_BP"                      \n\t"
+        "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
         :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
            "a" (&c->redDither)
     );
@@ -1217,27 +1217,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
         const int16_t *ubuf1 = ubuf[0];
         if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
             __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
-                YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+                "mov           %4, %%"FF_REG_b"         \n\t"
+                "push %%"FF_REG_BP"                     \n\t"
+                YSCALEYUV2RGB1(%%FF_REGBP, %5)
+                YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
+                WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"FF_REG_BP"                      \n\t"
+                "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
                 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                    "a" (&c->redDither)
             );
         } else {
             __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5)
+                "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+                "mov           %4, %%"FF_REG_b"         \n\t"
+                "push %%"FF_REG_BP"                     \n\t"
+                YSCALEYUV2RGB1(%%FF_REGBP, %5)
                 "pcmpeqd %%mm7, %%mm7                   \n\t"
-                WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"FF_REG_BP"                      \n\t"
+                "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
                 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                    "a" (&c->redDither)
             );
@@ -1246,27 +1246,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
         const int16_t *ubuf1 = ubuf[1];
         if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
             __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
-                YSCALEYUV2RGB1_ALPHA(%%REGBP)
-                WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+                "mov           %4, %%"FF_REG_b"         \n\t"
+                "push %%"FF_REG_BP"                     \n\t"
+                YSCALEYUV2RGB1b(%%FF_REGBP, %5)
+                YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
+                WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"FF_REG_BP"                      \n\t"
+                "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
                 :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                    "a" (&c->redDither)
             );
         } else {
             __asm__ volatile(
-                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-                "mov        %4, %%"REG_b"               \n\t"
-                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5)
+                "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+                "mov           %4, %%"FF_REG_b"         \n\t"
+                "push %%"FF_REG_BP"                     \n\t"
+                YSCALEYUV2RGB1b(%%FF_REGBP, %5)
                 "pcmpeqd %%mm7, %%mm7                   \n\t"
-                WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
-                "pop %%"REG_BP"                         \n\t"
-                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+                WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+                "pop %%"FF_REG_BP"                      \n\t"
+                "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
                 :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                    "a" (&c->redDither)
             );
@@ -1285,14 +1285,14 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
         const int16_t *ubuf1 = ubuf[0];
         __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
+            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+            "mov           %4, %%"FF_REG_b"         \n\t"
+            "push %%"FF_REG_BP"                     \n\t"
+            YSCALEYUV2RGB1(%%FF_REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
-            WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+            "pop %%"FF_REG_BP"                      \n\t"
+            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
                NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
@@ -1300,14 +1300,14 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
     } else {
         const int16_t *ubuf1 = ubuf[1];
         __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
+            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+            "mov           %4, %%"FF_REG_b"         \n\t"
+            "push %%"FF_REG_BP"                     \n\t"
+            YSCALEYUV2RGB1b(%%FF_REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
-            WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+            "pop %%"FF_REG_BP"                      \n\t"
+            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
                NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
@@ -1326,20 +1326,20 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
         const int16_t *ubuf1 = ubuf[0];
         __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
+            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+            "mov           %4, %%"FF_REG_b"         \n\t"
+            "push %%"FF_REG_BP"                     \n\t"
+            YSCALEYUV2RGB1(%%FF_REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
             "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
 #endif
-            WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+            "pop %%"FF_REG_BP"                      \n\t"
+            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
                NAMED_CONSTRAINTS_ADD(bF8)
@@ -1347,20 +1347,20 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
     } else {
         const int16_t *ubuf1 = ubuf[1];
         __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
+            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+            "mov           %4, %%"FF_REG_b"         \n\t"
+            "push %%"FF_REG_BP"                     \n\t"
+            YSCALEYUV2RGB1b(%%FF_REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
             "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
 #endif
-            WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+            "pop %%"FF_REG_BP"                      \n\t"
+            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
                NAMED_CONSTRAINTS_ADD(bF8)
@@ -1379,20 +1379,20 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
         const int16_t *ubuf1 = ubuf[0];
         __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5)
+            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+            "mov           %4, %%"FF_REG_b"         \n\t"
+            "push %%"FF_REG_BP"                     \n\t"
+            YSCALEYUV2RGB1(%%FF_REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
             "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
 #endif
-            WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+            "pop %%"FF_REG_BP"                      \n\t"
+            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
                NAMED_CONSTRAINTS_ADD(bF8,bFC)
@@ -1400,20 +1400,20 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
     } else {
         const int16_t *ubuf1 = ubuf[1];
         __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5)
+            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+            "mov           %4, %%"FF_REG_b"         \n\t"
+            "push %%"FF_REG_BP"                     \n\t"
+            YSCALEYUV2RGB1b(%%FF_REGBP, %5)
             "pxor    %%mm7, %%mm7                   \n\t"
             /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
-            "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
+            "paddusb "BLUE_DITHER"(%5), %%mm2       \n\t"
             "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
-            "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
+            "paddusb "RED_DITHER"(%5), %%mm5        \n\t"
 #endif
-            WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+            "pop %%"FF_REG_BP"                      \n\t"
+            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
                NAMED_CONSTRAINTS_ADD(bF8,bFC)
@@ -1469,26 +1469,26 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
     if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
         const int16_t *ubuf1 = ubuf[0];
         __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1(%%REGBP, %5)
-            WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+            "mov           %4, %%"FF_REG_b"         \n\t"
+            "push %%"FF_REG_BP"                     \n\t"
+            YSCALEYUV2PACKED1(%%FF_REGBP, %5)
+            WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+            "pop %%"FF_REG_BP"                      \n\t"
+            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );
     } else {
         const int16_t *ubuf1 = ubuf[1];
         __asm__ volatile(
-            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
-            "mov        %4, %%"REG_b"               \n\t"
-            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1b(%%REGBP, %5)
-            WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
-            "pop %%"REG_BP"                         \n\t"
-            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
+            "mov %%"FF_REG_b", "ESP_OFFSET"(%5)     \n\t"
+            "mov           %4, %%"FF_REG_b"         \n\t"
+            "push %%"FF_REG_BP"                     \n\t"
+            YSCALEYUV2PACKED1b(%%FF_REGBP, %5)
+            WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
+            "pop %%"FF_REG_BP"                      \n\t"
+            "mov "ESP_OFFSET"(%5), %%"FF_REG_b"     \n\t"
             :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
                "a" (&c->redDither)
         );



More information about the ffmpeg-cvslog mailing list