[FFmpeg-devel] [PATCH] x86/hpeldsp: fix loop in {avg, avg_no_rnd}_pixels16_x2_mmx

James Almer jamrial at gmail.com
Thu Oct 23 05:50:33 CEST 2014


Handle it inside the __asm__() block.
Fixes fate-vc1_ilaced_twomv when using the gcc-usan toolchain.

Signed-off-by: James Almer <jamrial at gmail.com>
---
The entire hpeldsp_rnd_template.c file should be ported to yasm, and/or this 
function optimized a bit in the long term.
But for now this makes FATE a bit greener.

 libavcodec/x86/hpeldsp_rnd_template.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/libavcodec/x86/hpeldsp_rnd_template.c b/libavcodec/x86/hpeldsp_rnd_template.c
index c8a68fd..8cbc412 100644
--- a/libavcodec/x86/hpeldsp_rnd_template.c
+++ b/libavcodec/x86/hpeldsp_rnd_template.c
@@ -138,27 +138,28 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_
 static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 {
     MOVQ_BFE(mm6);
-    JUMPALIGN();
-    do {
         __asm__ volatile(
-            "movq  %1, %%mm0            \n\t"
-            "movq  1%1, %%mm1           \n\t"
-            "movq  %0, %%mm3            \n\t"
+            ".p2align 3                 \n\t"
+            "1:                         \n\t"
+            "movq  (%1), %%mm0          \n\t"
+            "movq  1(%1), %%mm1         \n\t"
+            "movq  (%2), %%mm3          \n\t"
             PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
             PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
-            "movq  %%mm0, %0            \n\t"
-            "movq  8%1, %%mm0           \n\t"
-            "movq  9%1, %%mm1           \n\t"
-            "movq  8%0, %%mm3           \n\t"
+            "movq  %%mm0, (%2)          \n\t"
+            "movq  8(%1), %%mm0         \n\t"
+            "movq  9(%1), %%mm1         \n\t"
+            "movq  8(%2), %%mm3         \n\t"
             PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
             PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
-            "movq  %%mm0, 8%0           \n\t"
-            :"+m"(*block)
-            :"m"(*pixels)
+            "movq  %%mm0, 8(%2)         \n\t"
+            "add    %3, %1              \n\t"
+            "add    %3, %2              \n\t"
+            "subl   $1, %0              \n\t"
+            "jnz    1b                  \n\t"
+            :"+g"(h), "+S"(pixels), "+D"(block)
+            :"r"((x86_reg)line_size)
             :"memory");
-        pixels += line_size;
-        block += line_size;
-    } while (--h);
 }
 
 static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-- 
2.0.4



More information about the ffmpeg-devel mailing list