[FFmpeg-cvslog] avcodec/utvideoenc : add SIMD (avx) for sub_left_prediction
Martin Vignali
git at videolan.org
Sun Jan 28 21:24:56 EET 2018
ffmpeg | branch: master | Martin Vignali <martin.vignali at gmail.com> | Sun Jan 14 14:23:05 2018 +0100| [8f9c38b19629838066def1207703cfcdc19fcbc9] | committer: Martin Vignali
avcodec/utvideoenc : add SIMD (avx) for sub_left_prediction
asm code by Henrik Gramner
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8f9c38b19629838066def1207703cfcdc19fcbc9
---
libavcodec/lossless_videoencdsp.c | 15 +++++++++++
libavcodec/lossless_videoencdsp.h | 5 ++++
libavcodec/utvideoenc.c | 20 +-------------
libavcodec/x86/lossless_videoencdsp.asm | 43 ++++++++++++++++++++++++++++++
libavcodec/x86/lossless_videoencdsp_init.c | 7 +++++
5 files changed, 71 insertions(+), 19 deletions(-)
diff --git a/libavcodec/lossless_videoencdsp.c b/libavcodec/lossless_videoencdsp.c
index 5cc4934c0e..ed70329628 100644
--- a/libavcodec/lossless_videoencdsp.c
+++ b/libavcodec/lossless_videoencdsp.c
@@ -74,10 +74,25 @@ static void sub_median_pred_c(uint8_t *dst, const uint8_t *src1,
*left_top = lt;
}
+static void sub_left_predict_c(uint8_t *dst, uint8_t *src,
+ ptrdiff_t stride, ptrdiff_t width, int height)
+{
+ int i, j;
+ uint8_t prev = 0x80; /* Set the initial value */
+ for (j = 0; j < height; j++) {
+ for (i = 0; i < width; i++) {
+ *dst++ = src[i] - prev;
+ prev = src[i];
+ }
+ src += stride;
+ }
+}
+
av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c)
{
c->diff_bytes = diff_bytes_c;
c->sub_median_pred = sub_median_pred_c;
+ c->sub_left_predict = sub_left_predict_c;
if (ARCH_X86)
ff_llvidencdsp_init_x86(c);
diff --git a/libavcodec/lossless_videoencdsp.h b/libavcodec/lossless_videoencdsp.h
index 3d645b159a..faa6c32551 100644
--- a/libavcodec/lossless_videoencdsp.h
+++ b/libavcodec/lossless_videoencdsp.h
@@ -21,6 +21,8 @@
#include <stdint.h>
+#include "avcodec.h"
+
typedef struct LLVidEncDSPContext {
void (*diff_bytes)(uint8_t *dst /* align 16 */,
const uint8_t *src1 /* align 16 */,
@@ -33,6 +35,9 @@ typedef struct LLVidEncDSPContext {
void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1,
const uint8_t *src2, intptr_t w,
int *left, int *left_top);
+
+ void (*sub_left_predict)(uint8_t *dst, uint8_t *src,
+ ptrdiff_t stride, ptrdiff_t width, int height);
} LLVidEncDSPContext;
void ff_llvidencdsp_init(LLVidEncDSPContext *c);
diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c
index a829b7aaac..db00e1eff5 100644
--- a/libavcodec/utvideoenc.c
+++ b/libavcodec/utvideoenc.c
@@ -283,23 +283,6 @@ static void mangle_rgb_planes(uint8_t *dst[4], ptrdiff_t dst_stride,
}
}
-/* Write data to a plane with left prediction */
-static void left_predict(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
- int width, int height)
-{
- int i, j;
- uint8_t prev;
-
- prev = 0x80; /* Set the initial value */
- for (j = 0; j < height; j++) {
- for (i = 0; i < width; i++) {
- *dst++ = src[i] - prev;
- prev = src[i];
- }
- src += stride;
- }
-}
-
#undef A
#undef B
@@ -436,8 +419,7 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src,
for (i = 0; i < c->slices; i++) {
sstart = send;
send = height * (i + 1) / c->slices & cmask;
- left_predict(src + sstart * stride, dst + sstart * width,
- stride, width, send - sstart);
+ c->llvidencdsp.sub_left_predict(dst + sstart * width, src + sstart * stride, stride, width, send - sstart);
}
break;
case PRED_MEDIAN:
diff --git a/libavcodec/x86/lossless_videoencdsp.asm b/libavcodec/x86/lossless_videoencdsp.asm
index 4d79eee36b..fb1204f0f1 100644
--- a/libavcodec/x86/lossless_videoencdsp.asm
+++ b/libavcodec/x86/lossless_videoencdsp.asm
@@ -25,6 +25,8 @@
%include "libavutil/x86/x86util.asm"
+cextern pb_80
+
SECTION .text
; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
@@ -149,3 +151,44 @@ DIFF_BYTES_PROLOGUE
DIFF_BYTES_BODY u, u
%undef i
%endif
+
+
+;--------------------------------------------------------------------------------------------------
+;void sub_left_predict(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ptrdiff_t width, int height)
+;--------------------------------------------------------------------------------------------------
+
+INIT_XMM avx
+cglobal sub_left_predict, 5,6,5, dst, src, stride, width, height, x
+ mova m1, [pb_80] ; prev initial
+ add dstq, widthq
+ add srcq, widthq
+ lea xd, [widthq-1]
+ neg widthq
+ and xd, 15
+ pinsrb m4, m1, xd, 15
+ mov xq, widthq
+
+ .loop:
+ movu m0, [srcq + widthq]
+ palignr m2, m0, m1, 15
+ movu m1, [srcq + widthq + 16]
+ palignr m3, m1, m0, 15
+ psubb m2, m0, m2
+ psubb m3, m1, m3
+ movu [dstq + widthq], m2
+ movu [dstq + widthq + 16], m3
+ add widthq, 2 * 16
+ jl .loop
+
+ add srcq, strideq
+ sub dstq, xq ; dst + width
+ test xd, 16
+ jz .mod32
+ mova m1, m0
+
+.mod32:
+ pshufb m1, m4
+ mov widthq, xq
+ dec heightd
+ jg .loop
+ RET
diff --git a/libavcodec/x86/lossless_videoencdsp_init.c b/libavcodec/x86/lossless_videoencdsp_init.c
index fc728c9fd1..40407add52 100644
--- a/libavcodec/x86/lossless_videoencdsp_init.c
+++ b/libavcodec/x86/lossless_videoencdsp_init.c
@@ -36,6 +36,9 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
intptr_t w);
+void ff_sub_left_predict_avx(uint8_t *dst, uint8_t *src,
+ ptrdiff_t stride, ptrdiff_t width, int height);
+
#if HAVE_INLINE_ASM
static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
@@ -98,6 +101,10 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c)
c->diff_bytes = ff_diff_bytes_sse2;
}
+ if (EXTERNAL_AVX(cpu_flags)) {
+ c->sub_left_predict = ff_sub_left_predict_avx;
+ }
+
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
c->diff_bytes = ff_diff_bytes_avx2;
}
More information about the ffmpeg-cvslog
mailing list