[FFmpeg-devel] [PATCH] x86/dsputilenc: implement XOP version of pix_sum16

James Almer jamrial at gmail.com
Thu May 29 04:25:59 CEST 2014


SSE2: 137 cycles
XOP:   87 cycles
Signed-off-by: James Almer <jamrial at gmail.com>
---
I'm not putting it inside the macro because there are too 
many differences that would make it an ifdeffery mess.

 libavcodec/x86/dsputilenc.asm   | 23 +++++++++++++++++++++++
 libavcodec/x86/dsputilenc_mmx.c |  5 +++++
 2 files changed, 28 insertions(+)

diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm
index 263516a..47b2b81 100644
--- a/libavcodec/x86/dsputilenc.asm
+++ b/libavcodec/x86/dsputilenc.asm
@@ -484,6 +484,29 @@ PIX_SUM16 0, 16
 INIT_XMM sse2
 PIX_SUM16 6, 8
 
+INIT_XMM xop
+cglobal pix_sum16, 2, 4, 5
+    movsxdifnidn r1, r1d
+    mov          r2, 4
+    lea          r3, [r1*3]
+    pxor         m4, m4
+.loop:
+    vphaddubq    m0, [r0]
+    vphaddubq    m1, [r0+r1]
+    vphaddubq    m2, [r0+r1*2]
+    vphaddubq    m3, [r0+r3]
+    paddd        m1, m0
+    paddd        m3, m2
+    paddd        m3, m1
+    paddd        m4, m3
+    lea          r0, [r0+r1*4]
+    dec r2
+    jne .loop
+    pshufd       m0, m4, q0032
+    paddd        m4, m0
+    movd        eax, m4
+    RET
+
 ; int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
 ; %1 = number of xmm registers used
 ; %2 = number of loops
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index efe835f..4280f4b 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -39,6 +39,7 @@ void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
                          int stride);
 int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
 int ff_pix_sum16_sse2(uint8_t *pix, int line_size);
+int ff_pix_sum16_xop(uint8_t *pix, int line_size);
 int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
 int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
 int ff_sum_abs_dctelem_mmx(int16_t *block);
@@ -925,5 +926,9 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
 #endif
     }
 
+    if (EXTERNAL_XOP(cpu_flags)) {
+        c->pix_sum           = ff_pix_sum16_xop;
+    }
+
     ff_dsputil_init_pix_mmx(c, avctx);
 }
-- 
1.8.5.5



More information about the ffmpeg-devel mailing list