[FFmpeg-cvslog] x86: hpeldsp: kill hpeldsp_mmx.c

Christophe Gisquet git at videolan.org
Thu May 22 21:39:25 CEST 2014


ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Thu May 22 17:48:16 2014 +0000| [f8de35ebc4f2cf5802e990ce74b0a564b962687f] | committer: Michael Niedermayer

x86: hpeldsp: kill hpeldsp_mmx.c

before:
1987 decicycles in 8_x2, 262121 runs, 23 skips

after:
1902 decicycles in 8_x2, 262112 runs, 32 skips

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f8de35ebc4f2cf5802e990ce74b0a564b962687f
---

 libavcodec/x86/Makefile       |    1 -
 libavcodec/x86/hpeldsp.asm    |   33 +++++++++++++++++++++++++
 libavcodec/x86/hpeldsp_init.c |    9 ++++++-
 libavcodec/x86/hpeldsp_mmx.c  |   53 -----------------------------------------
 4 files changed, 41 insertions(+), 55 deletions(-)

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index adf4843..9c39265 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -54,7 +54,6 @@ MMX-OBJS-$(CONFIG_DSPUTIL)             += x86/dsputil_mmx.o             \
                                           x86/idct_sse2_xvid.o          \
                                           x86/simple_idct.o
 MMX-OBJS-$(CONFIG_DIRAC_DECODER)       += x86/dirac_dwt.o
-MMX-OBJS-$(CONFIG_HPELDSP)             += x86/hpeldsp_mmx.o
 
 MMX-OBJS-$(CONFIG_SNOW_DECODER)        += x86/snowdsp.o
 MMX-OBJS-$(CONFIG_SNOW_ENCODER)        += x86/snowdsp.o
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index 171c77b..fce434c 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -340,27 +340,58 @@ AVG_PIXELS8
 
 
 ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro PAVGB_MMX 4
+    movu   %3, %1
+    por    %3, %2
+    pxor   %2, %1
+    pand   %2, %4
+    psrlq  %2, 1
+    psubb  %3, %2
+    SWAP   %2, %3
+%endmacro
+
 %macro AVG_PIXELS8_X2 0
 cglobal avg_pixels8_x2, 4,5
     lea          r4, [r2*2]
+%if notcpuflag(mmxext)
+    pcmpeqd      m5, m5
+    paddb        m5, m5
+%endif
 .loop:
     mova         m0, [r1]
     mova         m2, [r1+r2]
+%if notcpuflag(mmxext)
+    PAVGB_MMX    [r1+1], m0, m3, m5
+    PAVGB_MMX    [r1+r2+1], m2, m4, m5
+    PAVGB_MMX    [r0], m0, m3, m5
+    PAVGB_MMX    [r0+r2], m2, m4, m5
+%else
     PAVGB        m0, [r1+1]
     PAVGB        m2, [r1+r2+1]
     PAVGB        m0, [r0]
     PAVGB        m2, [r0+r2]
+%endif
     add          r1, r4
     mova       [r0], m0
     mova    [r0+r2], m2
     mova         m0, [r1]
     mova         m2, [r1+r2]
+%if notcpuflag(mmxext)
+    PAVGB_MMX    [r1+1], m0, m3, m5
+    PAVGB_MMX    [r1+r2+1], m2, m4, m5
+%else
     PAVGB        m0, [r1+1]
     PAVGB        m2, [r1+r2+1]
+%endif
     add          r0, r4
     add          r1, r4
+%if notcpuflag(mmxext)
+    PAVGB_MMX    [r0], m0, m3, m5
+    PAVGB_MMX    [r0+r2], m2, m4, m5
+%else
     PAVGB        m0, [r0]
     PAVGB        m2, [r0+r2]
+%endif
     mova       [r0], m0
     mova    [r0+r2], m2
     add          r0, r4
@@ -369,6 +400,8 @@ cglobal avg_pixels8_x2, 4,5
     REP_RET
 %endmacro
 
+INIT_MMX mmx
+AVG_PIXELS8_X2
 INIT_MMX mmxext
 AVG_PIXELS8_X2
 INIT_MMX 3dnow
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index 4a1b3cb..95de9fe 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -190,7 +190,14 @@ static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int cpu_flags)
     SET_HPEL_FUNCS(avg_no_rnd,    , 16, mmx);
     SET_HPEL_FUNCS(put,        [1],  8, mmx);
     SET_HPEL_FUNCS(put_no_rnd, [1],  8, mmx);
-    SET_HPEL_FUNCS(avg,        [1],  8, mmx);
+    if (HAVE_MMX_EXTERNAL) {
+        c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmx;
+        c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmx;
+    }
+#if HAVE_MMX_INLINE
+    c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
+    c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmx;
+#endif
 }
 
 static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
diff --git a/libavcodec/x86/hpeldsp_mmx.c b/libavcodec/x86/hpeldsp_mmx.c
deleted file mode 100644
index 039ba77..0000000
--- a/libavcodec/x86/hpeldsp_mmx.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * MMX-optimized avg/put pixel routines
- *
- * Copyright (c) 2001 Fabrice Bellard
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include "config.h"
-#include "hpeldsp.h"
-#include "inline_asm.h"
-
-#if HAVE_MMX_INLINE
-
-void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
-                           ptrdiff_t line_size, int h)
-{
-    MOVQ_BFE(mm6);
-    JUMPALIGN();
-    do {
-        __asm__ volatile(
-            "movq  %1, %%mm0            \n\t"
-            "movq  1%1, %%mm1           \n\t"
-            "movq  %0, %%mm3            \n\t"
-            PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
-            PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
-            "movq  %%mm0, %0            \n\t"
-            :"+m"(*block)
-            :"m"(*pixels)
-            :"memory");
-        pixels += line_size;
-        block += line_size;
-    } while (--h);
-}
-
-#endif /* HAVE_MMX_INLINE */



More information about the ffmpeg-cvslog mailing list