[FFmpeg-cvslog] libavcodec/exr : add X86 SIMD for reorder_pixels
Martin Vignali
git at videolan.org
Sun Sep 17 23:54:57 EEST 2017
ffmpeg | branch: master | Martin Vignali <martin.vignali at gmail.com> | Sun Sep 17 21:59:41 2017 +0200| [9b8c1224d7e1804b0b750de11e6a8c4648f1e115] | committer: James Almer
libavcodec/exr : add X86 SIMD for reorder_pixels
Signed-off-by: James Almer <jamrial at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9b8c1224d7e1804b0b750de11e6a8c4648f1e115
---
libavcodec/Makefile | 2 +-
libavcodec/exr.c | 38 +++++++++++---------------
libavcodec/exrdsp.c | 47 +++++++++++++++++++++++++++++++++
libavcodec/exrdsp.h | 32 ++++++++++++++++++++++
libavcodec/x86/Makefile | 2 ++
libavcodec/x86/exrdsp.asm | 63 ++++++++++++++++++++++++++++++++++++++++++++
libavcodec/x86/exrdsp_init.c | 39 +++++++++++++++++++++++++++
7 files changed, 199 insertions(+), 24 deletions(-)
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 943e5db511..fad56129a3 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -286,7 +286,7 @@ OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER) += 8svx.o
OBJS-$(CONFIG_ESCAPE124_DECODER) += escape124.o
OBJS-$(CONFIG_ESCAPE130_DECODER) += escape130.o
OBJS-$(CONFIG_EVRC_DECODER) += evrcdec.o acelp_vectors.o lsp.o
-OBJS-$(CONFIG_EXR_DECODER) += exr.o
+OBJS-$(CONFIG_EXR_DECODER) += exr.o exrdsp.o
OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1.o
OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1.o
OBJS-$(CONFIG_FFWAVESYNTH_DECODER) += ffwavesynth.o
diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 759880756d..de2f05d3a9 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -51,6 +51,7 @@
#include "bswapdsp.h"
#endif
+#include "exrdsp.h"
#include "get_bits.h"
#include "internal.h"
#include "mathops.h"
@@ -121,6 +122,7 @@ typedef struct EXRContext {
AVClass *class;
AVFrame *picture;
AVCodecContext *avctx;
+ ExrDSPContext dsp;
#if HAVE_BIGENDIAN
BswapDSPContext bbdsp;
@@ -275,23 +277,7 @@ static void predictor(uint8_t *src, int size)
}
}
-static void reorder_pixels(uint8_t *src, uint8_t *dst, int size)
-{
- const uint8_t *t1 = src;
- int half_size = size / 2;
- const uint8_t *t2 = src + half_size;
- uint8_t *s = dst;
- int i;
-
- av_assert1(size % 2 == 0);
-
- for (i = 0; i < half_size; i++) {
- *(s++) = *(t1++);
- *(s++) = *(t2++);
- }
-}
-
-static int zip_uncompress(const uint8_t *src, int compressed_size,
+static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size,
int uncompressed_size, EXRThreadData *td)
{
unsigned long dest_len = uncompressed_size;
@@ -300,13 +286,15 @@ static int zip_uncompress(const uint8_t *src, int compressed_size,
dest_len != uncompressed_size)
return AVERROR_INVALIDDATA;
+ av_assert1(uncompressed_size % 2 == 0);
+
predictor(td->tmp, uncompressed_size);
- reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
+ s->dsp.reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
return 0;
}
-static int rle_uncompress(const uint8_t *src, int compressed_size,
+static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_size,
int uncompressed_size, EXRThreadData *td)
{
uint8_t *d = td->tmp;
@@ -345,8 +333,10 @@ static int rle_uncompress(const uint8_t *src, int compressed_size,
if (dend != d)
return AVERROR_INVALIDDATA;
+ av_assert1(uncompressed_size % 2 == 0);
+
predictor(td->tmp, uncompressed_size);
- reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
+ ctx->dsp.reorder_pixels(td->tmp, td->uncompressed_data, uncompressed_size);
return 0;
}
@@ -1152,7 +1142,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
if (data_size < uncompressed_size) {
av_fast_padded_malloc(&td->uncompressed_data,
- &td->uncompressed_size, uncompressed_size);
+ &td->uncompressed_size, uncompressed_size + 64);/* Force 64 padding for AVX2 reorder_pixels dst */
if (!td->uncompressed_data)
return AVERROR(ENOMEM);
@@ -1161,7 +1151,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
switch (s->compression) {
case EXR_ZIP1:
case EXR_ZIP16:
- ret = zip_uncompress(src, data_size, uncompressed_size, td);
+ ret = zip_uncompress(s, src, data_size, uncompressed_size, td);
break;
case EXR_PIZ:
ret = piz_uncompress(s, src, data_size, uncompressed_size, td);
@@ -1170,7 +1160,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
ret = pxr24_uncompress(s, src, data_size, uncompressed_size, td);
break;
case EXR_RLE:
- ret = rle_uncompress(src, data_size, uncompressed_size, td);
+ ret = rle_uncompress(s, src, data_size, uncompressed_size, td);
break;
case EXR_B44:
case EXR_B44A:
@@ -1804,6 +1794,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
s->avctx = avctx;
+ ff_exrdsp_init(&s->dsp);
+
#if HAVE_BIGENDIAN
ff_bswapdsp_init(&s->bbdsp);
#endif
diff --git a/libavcodec/exrdsp.c b/libavcodec/exrdsp.c
new file mode 100644
index 0000000000..e59dac3dc4
--- /dev/null
+++ b/libavcodec/exrdsp.c
@@ -0,0 +1,47 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "exrdsp.h"
+#include "config.h"
+
+static void reorder_pixels_scalar(uint8_t *src, uint8_t *dst, ptrdiff_t size)
+{
+ const uint8_t *t1 = src;
+ int half_size = size / 2;
+ const uint8_t *t2 = src + half_size;
+ uint8_t *s = dst;
+ int i;
+
+ for (i = 0; i < half_size; i++) {
+ *(s++) = *(t1++);
+ *(s++) = *(t2++);
+ }
+}
+
+av_cold void ff_exrdsp_init(ExrDSPContext *c)
+{
+ c->reorder_pixels = reorder_pixels_scalar;
+
+ if (ARCH_X86)
+ ff_exrdsp_init_x86(c);
+}
diff --git a/libavcodec/exrdsp.h b/libavcodec/exrdsp.h
new file mode 100644
index 0000000000..09a76a518e
--- /dev/null
+++ b/libavcodec/exrdsp.h
@@ -0,0 +1,32 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_EXRDSP_H
+#define AVCODEC_EXRDSP_H
+
+#include <stdint.h>
+#include "libavutil/common.h"
+
+typedef struct ExrDSPContext {
+ void (*reorder_pixels)(uint8_t *src, uint8_t *dst, ptrdiff_t size);
+} ExrDSPContext;
+
+void ff_exrdsp_init(ExrDSPContext *c);
+void ff_exrdsp_init_x86(ExrDSPContext *c);
+
+#endif /* AVCODEC_EXRDSP_H */
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index e36644c72a..a805cd37b4 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -52,6 +52,7 @@ OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp_init.o
OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp.o
OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp_init.o x86/synth_filter_init.o
OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc_init.o
+OBJS-$(CONFIG_EXR_DECODER) += x86/exrdsp_init.o
OBJS-$(CONFIG_OPUS_DECODER) += x86/opus_dsp_init.o
OBJS-$(CONFIG_OPUS_ENCODER) += x86/opus_dsp_init.o
OBJS-$(CONFIG_HEVC_DECODER) += x86/hevcdsp_init.o
@@ -153,6 +154,7 @@ X86ASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o x86/synth_filter.o
X86ASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp.o \
x86/dirac_dwt.o
X86ASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o
+X86ASM-OBJS-$(CONFIG_EXR_DECODER) += x86/exrdsp.o
X86ASM-OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp.o
ifdef CONFIG_GPL
X86ASM-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flac_dsp_gpl.o
diff --git a/libavcodec/x86/exrdsp.asm b/libavcodec/x86/exrdsp.asm
new file mode 100644
index 0000000000..91d9c0b0a7
--- /dev/null
+++ b/libavcodec/x86/exrdsp.asm
@@ -0,0 +1,63 @@
+;******************************************************************************
+;* X86 Optimized functions for Open Exr Decoder
+;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
+;*
+;* reorder_pixels based on patch by John Loy
+;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+;------------------------------------------------------------------------------
+; void ff_reorder_pixels(uint8_t *src, uint8_t *dst, ptrdiff_t size)
+;------------------------------------------------------------------------------
+
+%macro REORDER_PIXELS 0
+cglobal reorder_pixels, 3,4,3, src1, dst, size, src2
+ lea src2q, [src1q+sizeq] ; src2 = src + 2 * half_size
+ add dstq, sizeq ; dst offset by size
+ shr sizeq, 1 ; half_size
+ add src1q, sizeq ; offset src by half_size
+ neg sizeq ; size = offset for dst, src1, src2
+.loop:
+
+%if cpuflag(avx2)
+ vpermq m0, [src1q + sizeq], 0xd8; load first part
+ vpermq m1, [src2q + sizeq], 0xd8; load second part
+%else
+ mova m0, [src1q+sizeq] ; load first part
+ movu m1, [src2q+sizeq] ; load second part
+%endif
+ SBUTTERFLY bw, 0, 1, 2 ; interleaved
+ mova [dstq+2*sizeq ], m0 ; copy to dst
+ mova [dstq+2*sizeq+mmsize], m1
+ add sizeq, mmsize
+ jl .loop
+ RET
+%endmacro
+
+INIT_XMM sse2
+REORDER_PIXELS
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+REORDER_PIXELS
+%endif
diff --git a/libavcodec/x86/exrdsp_init.c b/libavcodec/x86/exrdsp_init.c
new file mode 100644
index 0000000000..c0f508b2c4
--- /dev/null
+++ b/libavcodec/x86/exrdsp_init.c
@@ -0,0 +1,39 @@
+/*
+ * OpenEXR (.exr) image decoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/exrdsp.h"
+
+void ff_reorder_pixels_sse2(uint8_t *src, uint8_t *dst, ptrdiff_t size);
+
+void ff_reorder_pixels_avx2(uint8_t *src, uint8_t *dst, ptrdiff_t size);
+
+av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ dsp->reorder_pixels = ff_reorder_pixels_sse2;
+ }
+ if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+ dsp->reorder_pixels = ff_reorder_pixels_avx2;
+ }
+}
More information about the ffmpeg-cvslog
mailing list