[FFmpeg-devel] [PATCH] x86/dsputil: implement 3DNow version of vector_clipf

James Almer jamrial at gmail.com
Tue May 27 20:16:03 CEST 2014


Signed-off-by: James Almer <jamrial at gmail.com>
---
Those old k6-2 and k7 need some love

 libavcodec/x86/dsputil.asm    | 47 +++++++++++++++++++++++++++++++++----------
 libavcodec/x86/dsputil_init.c | 11 ++++++++++
 libavcodec/x86/dsputil_x86.h  |  2 ++
 3 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 4804682..36c9258 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -630,19 +630,35 @@ PUT_SIGNED_PIXELS_CLAMPED 3
 ;void ff_vector_clipf(float *dst, const float *src,
 ;                     float min, float max, int len)
 ;-----------------------------------------------------
-INIT_XMM sse
+%macro CLIPF_3DNOW 3
+    pfmin   %1, %3
+    pfmax   %1, %2
+%endmacro
+
+%macro CLIPF_SSE 3
+    minps   %1, %3
+    maxps   %1, %2
+%endmacro
+
+; %1 = number of xmm registers used
+%macro VECTOR_CLIPF 1
 %if UNIX64
-cglobal vector_clipf, 3,3,6, dst, src, len
+cglobal vector_clipf, 3,3,%1, dst, src, len
 %else
-cglobal vector_clipf, 5,5,6, dst, src, min, max, len
+cglobal vector_clipf, 5,5,%1, dst, src, min, max, len
 %endif
 %if WIN64
     SWAP 0, 2
     SWAP 1, 3
 %elif ARCH_X86_32
+%if mmsize == 8
+    movd    m0, mind
+    movd    m1, maxd
+%else
     movss   m0, minm
     movss   m1, maxm
 %endif
+%endif
     SPLATD  m0
     SPLATD  m1
         shl lend, 2
@@ -654,18 +670,27 @@ cglobal vector_clipf, 5,5,6, dst, src, min, max, len
     mova    m3,  [srcq+lenq+mmsize*1]
     mova    m4,  [srcq+lenq+mmsize*2]
     mova    m5,  [srcq+lenq+mmsize*3]
-    maxps   m2, m0
-    maxps   m3, m0
-    maxps   m4, m0
-    maxps   m5, m0
-    minps   m2, m1
-    minps   m3, m1
-    minps   m4, m1
-    minps   m5, m1
+    CLIPF   m2, m0, m1
+    CLIPF   m3, m0, m1
+    CLIPF   m4, m0, m1
+    CLIPF   m5, m0, m1
     mova    [dstq+lenq+mmsize*0], m2
     mova    [dstq+lenq+mmsize*1], m3
     mova    [dstq+lenq+mmsize*2], m4
     mova    [dstq+lenq+mmsize*3], m5
     add     lenq, mmsize*4
     jl .loop
+%if mmsize == 8
+    femms
+%endif
     REP_RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX 3dnow
+%define CLIPF CLIPF_3DNOW
+VECTOR_CLIPF 0
+%endif
+INIT_XMM sse
+%define CLIPF CLIPF_SSE
+VECTOR_CLIPF 6
diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c
index 30829ae..e6fe456 100644
--- a/libavcodec/x86/dsputil_init.c
+++ b/libavcodec/x86/dsputil_init.c
@@ -550,6 +550,14 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
 #endif /* HAVE_MMX_EXTERNAL */
 }
 
+static av_cold void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
+                                       int cpu_flags, unsigned high_bit_depth)
+{
+#if ARCH_X86_32
+    c->vector_clipf = ff_vector_clipf_3dnow;
+#endif
+}
+
 static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
                                         int cpu_flags, unsigned high_bit_depth)
 {
@@ -679,6 +687,9 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
         dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth);
     }
 
+    if (EXTERNAL_AMD3DNOW(cpu_flags))
+        dsputil_init_3dnow(c, avctx, cpu_flags, high_bit_depth);
+
     if (X86_MMXEXT(cpu_flags))
         dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth);
 
diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h
index 1f4711d..f6247c6 100644
--- a/libavcodec/x86/dsputil_x86.h
+++ b/libavcodec/x86/dsputil_x86.h
@@ -64,6 +64,8 @@ void ff_gmc_sse(uint8_t *dst, uint8_t *src,
                 int dxx, int dxy, int dyx, int dyy,
                 int shift, int r, int width, int height);
 
+void ff_vector_clipf_3dnow(float *dst, const float *src,
+                           float min, float max, int len);
 void ff_vector_clipf_sse(float *dst, const float *src,
                          float min, float max, int len);
 
-- 
1.8.5.5



More information about the ffmpeg-devel mailing list