[FFmpeg-cvslog] x86: huffyuvdsp: add_hfyu_left_pred_bgr32

Christophe Gisquet git at videolan.org
Fri May 30 15:25:07 CEST 2014


ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Wed May 28 21:57:38 2014 +0200| [f743fa9c7f872a23672e634c39d61c7b0cd45fcf] | committer: Michael Niedermayer

x86: huffyuvdsp: add_hfyu_left_pred_bgr32

          C   MMX   SSE2
Cycles: 3092  1053  578

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f743fa9c7f872a23672e634c39d61c7b0cd45fcf
---

 libavcodec/x86/huffyuvdsp.asm    |   39 ++++++++++++++++++++++++++++++++++++++
 libavcodec/x86/huffyuvdsp_init.c |   10 +++++++++-
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index 7ebb07c..73c4764 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -1,6 +1,7 @@
 ;******************************************************************************
 ;* SIMD-optimized HuffYUV functions
 ;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2014 Christophe Gisquet
 ;*
 ;* This file is part of FFmpeg.
 ;*
@@ -222,3 +223,41 @@ INIT_MMX mmx
 ADD_BYTES
 INIT_XMM sse2
 ADD_BYTES
+
+; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src,
+;                               intptr_t w, uint8_t *left)
+%macro LEFT_BGR32 0
+cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
+    shl           wq, 2
+    movd          m0, [leftq]
+    lea         dstq, [dstq + wq]
+    lea         srcq, [srcq + wq]
+    LSHIFT        m0, mmsize-4
+    neg           wq
+.loop:
+    movu          m1, [srcq+wq]
+    mova          m2, m1
+%if mmsize == 8
+    punpckhdq     m0, m0
+%endif
+    LSHIFT        m1, 4
+    paddb         m1, m2
+%if mmsize == 16
+    pshufd        m0, m0, q3333
+    mova          m2, m1
+    LSHIFT        m1, 8
+    paddb         m1, m2
+%endif
+    paddb         m0, m1
+    movu   [dstq+wq], m0
+    add           wq, mmsize
+    jl         .loop
+    movd          m0, [dstq-4]
+    movd     [leftq], m0
+    REP_RET
+%endmacro
+
+INIT_MMX mmx
+LEFT_BGR32
+INIT_XMM sse2
+LEFT_BGR32
diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c
index 9628724..7ea36c7 100644
--- a/libavcodec/x86/huffyuvdsp_init.c
+++ b/libavcodec/x86/huffyuvdsp_init.c
@@ -41,6 +41,11 @@ int  ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
 int  ff_add_hfyu_left_pred_sse4(uint8_t *dst, const uint8_t *src,
                                 intptr_t w, int left);
 
+void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src,
+                                     intptr_t w, uint8_t *left);
+void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src,
+                                      intptr_t w, uint8_t *left);
+
 av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -50,8 +55,10 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
         c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov;
 #endif
 
-    if (EXTERNAL_MMX(cpu_flags))
+    if (EXTERNAL_MMX(cpu_flags)) {
         c->add_bytes = ff_add_bytes_mmx;
+        c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx;
+    }
 
     if (EXTERNAL_MMXEXT(cpu_flags)) {
         /* slower than cmov version on AMD */
@@ -62,6 +69,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
     if (EXTERNAL_SSE2(cpu_flags)) {
         c->add_bytes            = ff_add_bytes_sse2;
         c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2;
+        c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2;
     }
 
     if (EXTERNAL_SSSE3(cpu_flags)) {



More information about the ffmpeg-cvslog mailing list