[FFmpeg-cvslog] libavcodec/exr : add x86 SIMD for predictor

Martin Vignali git at videolan.org
Sun Oct 1 23:35:42 EEST 2017


ffmpeg | branch: master | Martin Vignali <martin.vignali at gmail.com> | Sun Oct  1 21:37:15 2017 +0200| [ac5908b13f16cbda396730c35f5f3125ca24577a] | committer: James Almer

libavcodec/exr : add x86 SIMD for predictor

Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ac5908b13f16cbda396730c35f5f3125ca24577a
---

 libavcodec/exr.c             | 16 ++----------
 libavcodec/exrdsp.c          |  9 +++++++
 libavcodec/exrdsp.h          |  1 +
 libavcodec/x86/exrdsp.asm    | 62 +++++++++++++++++++++++++++++++++++++++++++-
 libavcodec/x86/exrdsp_init.c | 13 ++++++++++
 tests/checkasm/exrdsp.c      | 23 ++++++++++++++++
 6 files changed, 109 insertions(+), 15 deletions(-)

diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 230d5bbca8..0b755db3cb 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -265,18 +265,6 @@ static inline uint16_t exr_halflt2uint(uint16_t v)
     return (v + (1 << 16)) >> (exp + 1);
 }
 
-static void predictor(uint8_t *src, int size)
-{
-    uint8_t *t    = src + 1;
-    uint8_t *stop = src + size;
-
-    while (t < stop) {
-        int d = (int) t[-1] + (int) t[0] - 128;
-        t[0] = d;
-        ++t;
-    }
-}
-
 static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size,
                           int uncompressed_size, EXRThreadData *td)
 {
@@ -288,7 +276,7 @@ static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size
 
     av_assert1(uncompressed_size % 2 == 0);
 
-    predictor(td->tmp, uncompressed_size);
+    s->dsp.predictor(td->tmp, uncompressed_size);
     s->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size);
 
     return 0;
@@ -335,7 +323,7 @@ static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_si
 
     av_assert1(uncompressed_size % 2 == 0);
 
-    predictor(td->tmp, uncompressed_size);
+    ctx->dsp.predictor(td->tmp, uncompressed_size);
     ctx->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size);
 
     return 0;
diff --git a/libavcodec/exrdsp.c b/libavcodec/exrdsp.c
index 871b6f1276..42dbf1f54a 100644
--- a/libavcodec/exrdsp.c
+++ b/libavcodec/exrdsp.c
@@ -38,9 +38,18 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si
     }
 }
 
+static void predictor_scalar(uint8_t *src, ptrdiff_t size)
+{
+    ptrdiff_t i;
+
+    for (i = 1; i < size; i++)
+        src[i] += src[i-1] - 128;
+}
+
 av_cold void ff_exrdsp_init(ExrDSPContext *c)
 {
     c->reorder_pixels   = reorder_pixels_scalar;
+    c->predictor        = predictor_scalar;
 
     if (ARCH_X86)
         ff_exrdsp_init_x86(c);
diff --git a/libavcodec/exrdsp.h b/libavcodec/exrdsp.h
index d8cb002efc..2c4dc3af88 100644
--- a/libavcodec/exrdsp.h
+++ b/libavcodec/exrdsp.h
@@ -24,6 +24,7 @@
 
 typedef struct ExrDSPContext {
     void (*reorder_pixels)(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
+    void (*predictor)(uint8_t *src, ptrdiff_t size);
 } ExrDSPContext;
 
 void ff_exrdsp_init(ExrDSPContext *c);
diff --git a/libavcodec/x86/exrdsp.asm b/libavcodec/x86/exrdsp.asm
index 06c629e59e..23c9397ef8 100644
--- a/libavcodec/x86/exrdsp.asm
+++ b/libavcodec/x86/exrdsp.asm
@@ -2,9 +2,11 @@
 ;* X86 Optimized functions for Open Exr Decoder
 ;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
 ;*
-;* reorder_pixels based on patch by John Loy
+;* reorder_pixels, predictor based on patch by John Loy
 ;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema
 ;*
+;* predictor AVX/AVX2 by Henrik Gramner
+;*
 ;* This file is part of FFmpeg.
 ;*
 ;* FFmpeg is free software; you can redistribute it and/or
@@ -24,6 +26,9 @@
 
 %include "libavutil/x86/x86util.asm"
 
+cextern pb_15
+cextern pb_80
+
 SECTION .text
 
 ;------------------------------------------------------------------------------
@@ -60,3 +65,58 @@ REORDER_PIXELS
 INIT_YMM avx2
 REORDER_PIXELS
 %endif
+
+
+;------------------------------------------------------------------------------
+; void ff_predictor(uint8_t *src, ptrdiff_t size);
+;------------------------------------------------------------------------------
+
+%macro PREDICTOR 0
+cglobal predictor, 2,2,5, src, size
+%if mmsize == 32
+    vbroadcasti128   m0, [pb_80]
+%else
+    mova            xm0, [pb_80]
+%endif
+    mova            xm1, [pb_15]
+    mova            xm2, xm0
+    add            srcq, sizeq
+    neg           sizeq
+.loop:
+    pxor             m3, m0, [srcq + sizeq]
+    pslldq           m4, m3, 1
+    paddb            m3, m4
+    pslldq           m4, m3, 2
+    paddb            m3, m4
+    pslldq           m4, m3, 4
+    paddb            m3, m4
+    pslldq           m4, m3, 8
+%if mmsize == 32
+    paddb            m3, m4
+    paddb           xm2, xm3
+    vextracti128    xm4, m3, 1
+    mova [srcq + sizeq], xm2
+    pshufb          xm2, xm1
+    paddb           xm2, xm4
+    mova [srcq + sizeq + 16], xm2
+%else
+    paddb            m2, m3
+    paddb            m2, m4
+    mova [srcq + sizeq], m2
+%endif
+    pshufb          xm2, xm1
+    add           sizeq, mmsize
+    jl .loop
+    RET
+%endmacro
+
+INIT_XMM ssse3
+PREDICTOR
+
+INIT_XMM avx
+PREDICTOR
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+PREDICTOR
+%endif
diff --git a/libavcodec/x86/exrdsp_init.c b/libavcodec/x86/exrdsp_init.c
index 5669be3d97..63b3480d8f 100644
--- a/libavcodec/x86/exrdsp_init.c
+++ b/libavcodec/x86/exrdsp_init.c
@@ -26,6 +26,12 @@ void ff_reorder_pixels_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
 
 void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
 
+void ff_predictor_ssse3(uint8_t *src, ptrdiff_t size);
+
+void ff_predictor_avx(uint8_t *src, ptrdiff_t size);
+
+void ff_predictor_avx2(uint8_t *src, ptrdiff_t size);
+
 av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -33,7 +39,14 @@ av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
     if (EXTERNAL_SSE2(cpu_flags)) {
         dsp->reorder_pixels = ff_reorder_pixels_sse2;
     }
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        dsp->predictor = ff_predictor_ssse3;
+    }
+    if (EXTERNAL_AVX(cpu_flags)) {
+        dsp->predictor = ff_predictor_avx;
+    }
     if (EXTERNAL_AVX2_FAST(cpu_flags)) {
         dsp->reorder_pixels = ff_reorder_pixels_avx2;
+        dsp->predictor      = ff_predictor_avx2;
     }
 }
diff --git a/tests/checkasm/exrdsp.c b/tests/checkasm/exrdsp.c
index 6637f6fdd2..754a079f83 100644
--- a/tests/checkasm/exrdsp.c
+++ b/tests/checkasm/exrdsp.c
@@ -55,6 +55,24 @@ static void check_reorder_pixels(void) {
     bench_new(dst_new, src, BUF_SIZE);
 }
 
+static void check_predictor(void) {
+    LOCAL_ALIGNED_32(uint8_t, src,     [PADDED_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]);
+
+    declare_func(void, uint8_t *src, ptrdiff_t size);
+
+    memset(src,     0, PADDED_BUF_SIZE);
+    randomize_buffers();
+    memcpy(dst_ref, src, PADDED_BUF_SIZE);
+    memcpy(dst_new, src, PADDED_BUF_SIZE);
+    call_ref(dst_ref, BUF_SIZE);
+    call_new(dst_new, BUF_SIZE);
+    if (memcmp(dst_ref, dst_new, BUF_SIZE))
+        fail();
+    bench_new(dst_new, BUF_SIZE);
+}
+
 void checkasm_check_exrdsp(void)
 {
     ExrDSPContext h;
@@ -65,4 +83,9 @@ void checkasm_check_exrdsp(void)
         check_reorder_pixels();
 
     report("reorder_pixels");
+
+    if (check_func(h.predictor, "predictor"))
+        check_predictor();
+
+    report("predictor");
 }



More information about the ffmpeg-cvslog mailing list