[FFmpeg-devel] [RFC PATCH] avfilter/fastdeint: import simple cpu-optimized deinterlacing algorithms from VLC
Aman Gupta
ffmpeg at tmm1.net
Mon Sep 9 23:12:20 EEST 2019
From: Aman Gupta <aman at tmm1.net>
These are simple algorithms which can be run efficiently
on low powered devices to produce deinteraced images.
Signed-off-by: Aman Gupta <aman at tmm1.net>
---
doc/filters.texi | 27 ++
libavfilter/Makefile | 1 +
libavfilter/aarch64/Makefile | 1 +
libavfilter/aarch64/merge_neon.S | 98 ++++++
libavfilter/allfilters.c | 1 +
libavfilter/arm/Makefile | 3 +
libavfilter/arm/merge_armv6.S | 70 ++++
libavfilter/arm/merge_neon.S | 109 ++++++
libavfilter/vf_fastdeint.c | 588 +++++++++++++++++++++++++++++++
9 files changed, 898 insertions(+)
create mode 100644 libavfilter/aarch64/merge_neon.S
create mode 100644 libavfilter/arm/Makefile
create mode 100644 libavfilter/arm/merge_armv6.S
create mode 100644 libavfilter/arm/merge_neon.S
create mode 100644 libavfilter/vf_fastdeint.c
diff --git a/doc/filters.texi b/doc/filters.texi
index 6c81e1da40..55d9adeb81 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -9796,6 +9796,33 @@ fade=t=in:st=5.5:d=0.5
@end itemize
+ at section fastdeint
+Fast deinterlacing algorithms.
+
+ at table @option
+ at item mode
+Deinterlacing algorithm to use.
+
+It accepts the following values:
+ at table @samp
+ at item discard
+Discard bottom frame.
+
+ at item mean
+Half resolution blender.
+
+ at item blend
+Full resolution blender.
+
+ at item bob
+Bob doubler.
+
+ at item linear
+Bob doubler with linear interpolation.
+ at end table
+
+ at end table
+
@section fftdnoiz
Denoise frames using 3D FFT (frequency domain filtering).
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 3ef4191d9a..a2b3566ec0 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -234,6 +234,7 @@ OBJS-$(CONFIG_EROSION_OPENCL_FILTER) += vf_neighbor_opencl.o opencl.o \
opencl/neighbor.o
OBJS-$(CONFIG_EXTRACTPLANES_FILTER) += vf_extractplanes.o
OBJS-$(CONFIG_FADE_FILTER) += vf_fade.o
+OBJS-$(CONFIG_FASTDEINT_FILTER) += vf_fastdeint.o
OBJS-$(CONFIG_FFTDNOIZ_FILTER) += vf_fftdnoiz.o
OBJS-$(CONFIG_FFTFILT_FILTER) += vf_fftfilt.o
OBJS-$(CONFIG_FIELD_FILTER) += vf_field.o
diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile
index b58daa3a3f..2b0ad92893 100644
--- a/libavfilter/aarch64/Makefile
+++ b/libavfilter/aarch64/Makefile
@@ -1,3 +1,4 @@
OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o
+NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += aarch64/merge_neon.o
NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o
diff --git a/libavfilter/aarch64/merge_neon.S b/libavfilter/aarch64/merge_neon.S
new file mode 100644
index 0000000000..62377331a4
--- /dev/null
+++ b/libavfilter/aarch64/merge_neon.S
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2009-2016 Rémi Denis-Courmont, Janne Grunau, VLC authors
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+#define dest x0
+#define src1 x1
+#define src2 x2
+#define size x3
+
+ .align 2
+ // NOTE: Offset and pitch must be multiple of 16-bytes.
+function ff_merge8_neon, export=1
+ ands x5, size, #~63
+ b.eq 2f
+ mov x10, #64
+ add x11, src1, #32
+ add x12, src2, #32
+1:
+ ld1 {v0.16b,v1.16b}, [src1], x10
+ ld1 {v4.16b,v5.16b}, [src2], x10
+ ld1 {v2.16b,v3.16b}, [x11], x10
+ uhadd v0.16b, v0.16b, v4.16b
+ ld1 {v6.16b,v7.16b}, [x12], x10
+ subs x5, x5, #64
+ uhadd v1.16b, v1.16b, v5.16b
+ uhadd v2.16b, v2.16b, v6.16b
+ uhadd v3.16b, v3.16b, v7.16b
+ st1 {v0.16b,v1.16b}, [dest], #32
+ st1 {v2.16b,v3.16b}, [dest], #32
+ b.gt 1b
+2:
+ tbz size, #5, 3f
+ ld1 {v0.16b,v1.16b}, [src1], #32
+ ld1 {v4.16b,v5.16b}, [src2], #32
+ uhadd v0.16b, v0.16b, v4.16b
+ uhadd v1.16b, v1.16b, v5.16b
+ st1 {v0.16b,v1.16b}, [dest], #32
+3:
+ tbz size, #4, 4f
+ ld1 {v0.16b}, [src1]
+ ld1 {v4.16b}, [src2]
+ uhadd v0.16b, v0.16b, v4.16b
+ st1 {v0.16b}, [dest]
+4:
+ ret
+endfunc
+
+ .align 2
+function ff_merge16_neon, export=1
+ ands x5, size, #~63
+ b.eq 2f
+1:
+ ld1 {v0.8h,v1.8h}, [src1], #32
+ ld1 {v4.8h,v5.8h}, [src2], #32
+ ld1 {v2.8h,v3.8h}, [src1], #32
+ uhadd v0.8h, v0.8h, v4.8h
+ ld1 {v6.8h,v7.8h}, [src2], #32
+ uhadd v1.8h, v1.8h, v5.8h
+ uhadd v2.8h, v2.8h, v6.8h
+ uhadd v3.8h, v3.8h, v7.8h
+ st1 {v0.8h,v1.8h}, [dest], #32
+ st1 {v2.8h,v3.8h}, [dest], #32
+ subs x5, x5, #64
+ b.gt 1b
+2:
+ tbz size, #5, 3f
+ ld1 {v0.8h,v1.8h}, [src1], #32
+ ld1 {v4.8h,v5.8h}, [src2], #32
+ uhadd v0.8h, v0.8h, v4.8h
+ uhadd v1.8h, v1.8h, v5.8h
+ st1 {v0.8h,v1.8h}, [dest], #32
+3:
+ tbz size, #4, 4f
+ ld1 {v0.8h}, [src1]
+ ld1 {v4.8h}, [src2]
+ uhadd v0.8h, v0.8h,v4.8h
+ st1 {v0.8h}, [dest]
+4:
+ ret
+endfunc
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index b675c688ee..6631af2ffe 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -219,6 +219,7 @@ extern AVFilter ff_vf_erosion;
extern AVFilter ff_vf_erosion_opencl;
extern AVFilter ff_vf_extractplanes;
extern AVFilter ff_vf_fade;
+extern AVFilter ff_vf_fastdeint;
extern AVFilter ff_vf_fftdnoiz;
extern AVFilter ff_vf_fftfilt;
extern AVFilter ff_vf_field;
diff --git a/libavfilter/arm/Makefile b/libavfilter/arm/Makefile
new file mode 100644
index 0000000000..c92d62fac9
--- /dev/null
+++ b/libavfilter/arm/Makefile
@@ -0,0 +1,3 @@
+ARMV6-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_armv6.o
+
+NEON-OBJS-$(CONFIG_FASTDEINT_FILTER) += arm/merge_neon.o
diff --git a/libavfilter/arm/merge_armv6.S b/libavfilter/arm/merge_armv6.S
new file mode 100644
index 0000000000..9b551c2c6c
--- /dev/null
+++ b/libavfilter/arm/merge_armv6.S
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+#define dest r0
+#define src1 r1
+#define src2 r2
+#define size r3
+
+ .align 2
+function ff_merge8_armv6, export=1
+ push {r4-r9,lr}
+1:
+ pld [src1, #64]
+ ldm src1!, {r4-r5}
+ pld [src2, #64]
+ ldm src2!, {r8-r9}
+ subs size, size, #16
+ uhadd8 r4, r4, r8
+ ldm src1!, {r6-r7}
+ uhadd8 r5, r5, r9
+ ldm src2!, {ip,lr}
+ uhadd8 r6, r6, ip
+ stm dest!, {r4-r5}
+ uhadd8 r7, r7, lr
+ stm dest!, {r6-r7}
+ it eq
+ popeq {r4-r9,pc}
+ b 1b
+endfunc
+
+ .align 2
+function ff_merge16_armv6, export=1
+ push {r4-r9,lr}
+1:
+ pld [src1, #64]
+ ldm src1!, {r4-r5}
+ pld [src2, #64]
+ ldm src2!, {r8-r9}
+ subs size, size, #16
+ uhadd16 r4, r4, r8
+ ldm src1!, {r6-r7}
+ uhadd16 r5, r5, r9
+ ldm src2!, {ip,lr}
+ uhadd16 r6, r6, ip
+ stm dest!, {r4-r5}
+ uhadd16 r7, r7, lr
+ stm dest!, {r6-r7}
+ it eq
+ popeq {r4-r9,pc}
+ b 1b
+endfunc
\ No newline at end of file
diff --git a/libavfilter/arm/merge_neon.S b/libavfilter/arm/merge_neon.S
new file mode 100644
index 0000000000..ae36cf3ca9
--- /dev/null
+++ b/libavfilter/arm/merge_neon.S
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2009-2012 Rémi Denis-Courmont, VLC authors
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+#define dest r0
+#define src1 r1
+#define src2 r2
+#define size r3
+
+ .align 2
+ @ NOTE: Offset and pitch must be multiple of 16-bytes.
+function ff_merge8_neon, export=1
+ cmp size, #64
+ blo 2f
+1:
+ pld [src1, #64]
+ vld1.u8 {q0-q1}, [src1,:128]!
+ pld [src2, #64]
+ vld1.u8 {q8-q9}, [src2,:128]!
+ vhadd.u8 q0, q0, q8
+ sub size, size, #64
+ vld1.u8 {q2-q3}, [src1,:128]!
+ vhadd.u8 q1, q1, q9
+ vld1.u8 {q10-q11}, [src2,:128]!
+ vhadd.u8 q2, q2, q10
+ cmp size, #64
+ vhadd.u8 q3, q3, q11
+ vst1.u8 {q0-q1}, [dest,:128]!
+ vst1.u8 {q2-q3}, [dest,:128]!
+ bhs 1b
+2:
+ cmp size, #32
+ blo 3f
+ vld1.u8 {q0-q1}, [src1,:128]!
+ sub size, size, #32
+ vld1.u8 {q8-q9}, [src2,:128]!
+ vhadd.u8 q0, q0, q8
+ vhadd.u8 q1, q1, q9
+ vst1.u8 {q0-q1}, [dest,:128]!
+3:
+ cmp size, #16
+ it lo
+ bxlo lr
+ vld1.u8 {q0}, [src1,:128]!
+ sub size, size, #16
+ vld1.u8 {q8}, [src2,:128]!
+ vhadd.u8 q0, q0, q8
+ vst1.u8 {q0}, [dest,:128]!
+ bx lr
+endfunc
+
+ .align 2
+function ff_merge16_neon, export=1
+ cmp size, #64
+ blo 2f
+1:
+ pld [src1, #64]
+ vld1.u16 {q0-q1}, [src1,:128]!
+ pld [src2, #64]
+ vld1.u16 {q8-q9}, [src2,:128]!
+ vhadd.u16 q0, q0, q8
+ sub size, size, #64
+ vld1.u16 {q2-q3}, [src1,:128]!
+ vhadd.u16 q1, q1, q9
+ vld1.u16 {q10-q11}, [src2,:128]!
+ vhadd.u16 q2, q2, q10
+ cmp size, #64
+ vhadd.u16 q3, q3, q11
+ vst1.u16 {q0-q1}, [dest,:128]!
+ vst1.u16 {q2-q3}, [dest,:128]!
+ bhs 1b
+2:
+ cmp size, #32
+ blo 3f
+ vld1.u16 {q0-q1}, [src1,:128]!
+ sub size, size, #32
+ vld1.u16 {q8-q9}, [src2,:128]!
+ vhadd.u16 q0, q0, q8
+ vhadd.u16 q1, q1, q9
+ vst1.u16 {q0-q1}, [dest,:128]!
+3:
+ cmp size, #16
+ it lo
+ bxlo lr
+ vld1.u16 {q0}, [src1,:128]!
+ sub size, size, #16
+ vld1.u16 {q8}, [src2,:128]!
+ vhadd.u16 q0, q0, q8
+ vst1.u16 {q0}, [dest,:128]!
+ bx lr
+endfunc
\ No newline at end of file
diff --git a/libavfilter/vf_fastdeint.c b/libavfilter/vf_fastdeint.c
new file mode 100644
index 0000000000..5ddd8be392
--- /dev/null
+++ b/libavfilter/vf_fastdeint.c
@@ -0,0 +1,588 @@
+/*
+ * Copyright (C) 2015 Aman Gupta <aman at tmm1.net>
+ * 2000-2011 VLC authors and VideoLAN
+ *
+ * Author: Sam Hocevar <sam at zoy.org>
+ * Damien Lucas <nitrox at videolan.org>
+ * Laurent Aimar <fenrir at videolan.org>
+ * Sigmund Augdal Helberg <sigmunau at videolan.org>
+ *
+ * These algorithms are derived from the VLC project's
+ * modules/video_filter/deinterlace/algo_basic.c
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/cpu.h"
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/timestamp.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+enum Mode {
+ MODE_DISCARD,
+ MODE_MEAN,
+ MODE_BLEND,
+ MODE_BOB,
+ MODE_LINEAR,
+ MODE_MAX,
+};
+
+typedef void (*merge_fn)(void *dst, const void *src1, const void *src2, size_t len);
+
+typedef struct FastDeintContext {
+ const AVClass *class;
+ merge_fn merge;
+ int merge_size;
+ int merge_aligned;
+ AVFrame *cur, *next;
+ enum Mode mode;
+ int eof;
+} FastDeintContext;
+
+static void merge8_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes)
+{
+ for (; bytes > 0; bytes--)
+ *dst++ = ( *src1++ + *src2++ ) >> 1;
+}
+
+static void merge16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes)
+{
+ for (size_t words = bytes / 2; words > 0; words--)
+ *dst++ = ( *src1++ + *src2++ ) >> 1;
+}
+
+static void merge8_unaligned(FastDeintContext *s, uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes)
+{
+ if (s->merge_aligned) {
+ size_t remainder = bytes % 16;
+ if (remainder > 0) {
+ merge8_c(dst, src1, src2, remainder);
+ bytes -= remainder;
+ dst += remainder;
+ src1 += remainder;
+ src2 += remainder;
+ }
+ }
+ s->merge(dst, src1, src2, bytes);
+}
+
+static void merge16_unaligned(FastDeintContext *s, uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes)
+{
+ if (s->merge_aligned) {
+ size_t words = bytes / 2;
+ size_t remainder = words % 8;
+ if (remainder > 0) {
+ merge16_c(dst, src1, src2, remainder);
+ words -= remainder;
+ dst += remainder;
+ src1 += remainder;
+ src2 += remainder;
+ }
+ }
+ s->merge(dst, src1, src2, bytes);
+}
+
+static void merge_unaligned(FastDeintContext *s, void *dst, const void *src1, const void *src2, size_t bytes)
+{
+ if (s->merge_size == 16)
+ merge16_unaligned(s, dst, src1, src2, bytes);
+ else
+ merge8_unaligned(s, dst, src1, src2, bytes);
+}
+
+#if HAVE_SSE2_INLINE && defined(__x86_64__)
+static void merge8_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes)
+{
+ for(; bytes > 0 && ((uintptr_t)src1 & 15); bytes--)
+ *dst++ = ( *src1++ + *src2++ ) >> 1;
+
+ for (; bytes >= 16; bytes -= 16) {
+ __asm__ __volatile__( "movdqu %2,%%xmm1;"
+ "pavgb %1, %%xmm1;"
+ "movdqu %%xmm1, %0" :"=m" (*dst):
+ "m" (*src1),
+ "m" (*src2) : "xmm1" );
+ dst += 16;
+ src1 += 16;
+ src2 += 16;
+ }
+
+ if (bytes > 0) {
+ merge8_c(dst, src1, src2, bytes);
+ }
+}
+static void merge16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes)
+{
+ size_t words = bytes / 2;
+
+ for(; words > 0 && ((uintptr_t)src1 & 15); words--)
+ *dst++ = ( *src1++ + *src2++ ) >> 1;
+
+ for (; words >= 8; words -= 8) {
+ __asm__ __volatile__( "movdqu %2,%%xmm1;"
+ "pavgw %1, %%xmm1;"
+ "movdqu %%xmm1, %0" :"=m" (*dst):
+ "m" (*src1),
+ "m" (*src2) : "xmm1" );
+ dst += 8;
+ src1 += 8;
+ src2 += 8;
+ }
+
+ if (words > 0) {
+ merge16_c(dst, src1, src2, words * 2);
+ }
+}
+#define merge8 merge8_sse2
+#define merge16 merge16_sse2
+#else
+#define merge8 merge8_c
+#define merge16 merge16_c
+#endif
+
+static void render_image_single(FastDeintContext *s, AVFrame *out, AVFrame *frame)
+{
+ int i, planes_nb = 0;
+ enum Mode mode = s->mode;
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format);
+
+ for (i = 0; i < desc->nb_components; i++)
+ planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
+
+ for (i = 0; i < planes_nb; i++) {
+ int height, bwidth;
+ int dst_linesize, src_linesize;
+ const uint8_t *src;
+ uint8_t *dst;
+
+ bwidth = av_image_get_linesize(out->format, out->width, i);
+ if (bwidth < 0) {
+ av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n");
+ return;
+ }
+
+ height = out->height;
+ if (i == 1 || i == 2) {
+ height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h);
+ }
+
+ src = frame->data[i];
+ dst = out->data[i];
+ dst_linesize = out->linesize[i];
+ src_linesize = frame->linesize[i];
+
+ if (mode == MODE_BLEND) {
+ // Copy first line
+ memcpy(dst, src, bwidth);
+ dst += dst_linesize;
+ height--;
+ }
+
+ // Merge remaining lines
+ for (; height > 0; height--) {
+ if (mode == MODE_DISCARD)
+ memcpy(dst, src, bwidth);
+ else
+ merge_unaligned(s, dst, src, src + src_linesize, bwidth);
+ dst += dst_linesize;
+ src += src_linesize;
+ if (mode == MODE_MEAN || mode == MODE_DISCARD) {
+ src += src_linesize;
+ height--;
+ }
+ }
+ }
+ if (mode != MODE_DISCARD)
+ emms_c();
+}
+
+static void render_image_doubler(FastDeintContext *s, AVFrame *out, AVFrame *frame, int field)
+{
+ int i, planes_nb = 0;
+ enum Mode mode = s->mode;
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format);
+
+ for (i = 0; i < desc->nb_components; i++)
+ planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
+
+ for (i = 0; i < planes_nb; i++) {
+ int height, bwidth;
+ int dst_linesize, src_linesize;
+ const uint8_t *src;
+ uint8_t *dst;
+
+ bwidth = av_image_get_linesize(out->format, out->width, i);
+ if (bwidth < 0) {
+ av_log(s, AV_LOG_ERROR, "av_image_get_linesize failed\n");
+ return;
+ }
+ height = out->height;
+ if (i == 1 || i == 2) {
+ height = FF_CEIL_RSHIFT(out->height, desc->log2_chroma_h);
+ }
+
+ src = frame->data[i];
+ dst = out->data[i];
+ src_linesize = frame->linesize[i];
+ dst_linesize = out->linesize[i];
+
+ // For BOTTOM field we need to add the first line
+ if (field == 1) {
+ memcpy(dst, src, bwidth);
+ dst += dst_linesize;
+ src += src_linesize;
+ height--;
+ }
+
+ height -= 2;
+
+ for (; height > 0; height-=2) {
+ memcpy(dst, src, bwidth);
+ dst += dst_linesize;
+
+ if (mode == MODE_LINEAR)
+ merge_unaligned(s, dst, src, src + 2 * src_linesize, bwidth);
+ else
+ memcpy(dst, src, bwidth);
+ dst += dst_linesize;
+
+ src += src_linesize * 2;
+ }
+
+ memcpy(dst, src, bwidth);
+
+ // For TOP field we need to add the last line
+ if (field == 0)
+ {
+ dst += dst_linesize;
+ src += src_linesize;
+ memcpy(dst, src, bwidth);
+ }
+ }
+ if (mode == MODE_LINEAR)
+ emms_c();
+}
+
+static int filter_frame_single(AVFilterLink *link, AVFrame *frame)
+{
+ AVFilterContext *ctx = link->dst;
+ AVFrame *out;
+ FastDeintContext *s = ctx->priv;
+
+ if (!frame->interlaced_frame) {
+ return ff_filter_frame(ctx->outputs[0], frame);
+ }
+
+ out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h);
+ if (!out) {
+ av_frame_free(&frame);
+ return AVERROR(ENOMEM);
+ }
+
+ av_frame_copy_props(out, frame);
+ out->interlaced_frame = 0;
+ render_image_single(s, out, frame);
+
+ av_frame_free(&frame);
+ return ff_filter_frame(ctx->outputs[0], out);
+}
+
+static AVFrame *copy_frame(AVFilterLink *link, AVFrame *frame)
+{
+ AVFilterContext *ctx = link->dst;
+ AVFrame *out;
+
+ if (frame->format == AV_PIX_FMT_VIDEOTOOLBOX)
+ out = av_frame_alloc();
+ else
+ out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h);
+
+ if (!out)
+ return NULL;
+
+ av_frame_copy_props(out, frame);
+ return out;
+}
+
+static int filter_frame_double(AVFilterLink *link, AVFrame *in)
+{
+ AVFilterContext *ctx = link->dst;
+ FastDeintContext *s = ctx->priv;
+ AVFrame *frame, *out, *out2;
+ int tff, ret;
+
+ s->cur = s->next;
+ s->next = in;
+
+ if (!s->cur) {
+ return 0;
+ }
+
+ frame = s->cur;
+
+ if (!frame->interlaced_frame) {
+ if (frame->pts != AV_NOPTS_VALUE)
+ frame->pts *= 2;
+ s->cur = NULL;
+ return ff_filter_frame(ctx->outputs[0], frame);
+ }
+
+ tff = frame->top_field_first;
+ out = copy_frame(link, frame);
+ if (!out) {
+ av_frame_free(&frame);
+ s->cur = NULL;
+ return AVERROR(ENOMEM);
+ }
+
+ out->interlaced_frame = 0;
+ if (out->pts != AV_NOPTS_VALUE)
+ out->pts = out->pts * 2;
+ render_image_doubler(s, out, frame, !tff);
+
+ ret = ff_filter_frame(ctx->outputs[0], out);
+ if (ret < 0) {
+ av_frame_free(&frame);
+ s->cur = NULL;
+ return ret;
+ }
+
+ out2 = copy_frame(link, frame);
+ if (!out2) {
+ av_frame_free(&frame);
+ s->cur = NULL;
+ return AVERROR(ENOMEM);
+ }
+
+ out2->interlaced_frame = 0;
+ av_frame_remove_side_data(out2, AV_FRAME_DATA_A53_CC);
+ if (out2->pts != AV_NOPTS_VALUE) {
+ out2->pts = frame->pts + s->next->pts;
+ }
+ render_image_doubler(s, out2, frame, tff);
+
+ av_frame_free(&frame);
+ s->cur = NULL;
+
+ return ff_filter_frame(ctx->outputs[0], out2);
+}
+
+static int filter_frame(AVFilterLink *link, AVFrame *frame)
+{
+ AVFilterContext *ctx = link->dst;
+ FastDeintContext *s = ctx->priv;
+
+ av_assert0(frame);
+
+ if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) {
+ return filter_frame_double(link, frame);
+ } else {
+ return filter_frame_single(link, frame);
+ }
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+ FastDeintContext *s = ctx->priv;
+ av_frame_free(&s->cur);
+ av_frame_free(&s->next);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+ static const enum AVPixelFormat pix_fmts[] = {
+ AV_PIX_FMT_YUV420P,
+ AV_PIX_FMT_YUV422P,
+ AV_PIX_FMT_YUV444P,
+ AV_PIX_FMT_YUV410P,
+ AV_PIX_FMT_YUV411P,
+ AV_PIX_FMT_GRAY8,
+ AV_PIX_FMT_YUVJ420P,
+ AV_PIX_FMT_YUVJ422P,
+ AV_PIX_FMT_YUVJ444P,
+ AV_PIX_FMT_GRAY16,
+ AV_PIX_FMT_YUV440P,
+ AV_PIX_FMT_YUVJ440P,
+ AV_PIX_FMT_YUV420P9,
+ AV_PIX_FMT_YUV422P9,
+ AV_PIX_FMT_YUV444P9,
+ AV_PIX_FMT_YUV420P10,
+ AV_PIX_FMT_YUV422P10,
+ AV_PIX_FMT_YUV444P10,
+ AV_PIX_FMT_YUV420P12,
+ AV_PIX_FMT_YUV422P12,
+ AV_PIX_FMT_YUV444P12,
+ AV_PIX_FMT_YUV420P14,
+ AV_PIX_FMT_YUV422P14,
+ AV_PIX_FMT_YUV444P14,
+ AV_PIX_FMT_YUV420P16,
+ AV_PIX_FMT_YUV422P16,
+ AV_PIX_FMT_YUV444P16,
+ AV_PIX_FMT_YUVA420P,
+ AV_PIX_FMT_YUVA422P,
+ AV_PIX_FMT_YUVA444P,
+ AV_PIX_FMT_GBRP,
+ AV_PIX_FMT_GBRP9,
+ AV_PIX_FMT_GBRP10,
+ AV_PIX_FMT_GBRP12,
+ AV_PIX_FMT_GBRP14,
+ AV_PIX_FMT_GBRP16,
+ AV_PIX_FMT_GBRAP,
+ AV_PIX_FMT_NONE
+ };
+
+ AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+ if (!fmts_list)
+ return AVERROR(ENOMEM);
+ return ff_set_common_formats(ctx, fmts_list);
+}
+
+#if ARCH_ARM
+#include "libavutil/arm/cpu.h"
+#endif
+#if ARCH_AARCH64
+#include "libavutil/aarch64/cpu.h"
+#endif
+#if ARCH_AARCH64 || ARCH_ARM
+void ff_merge8_neon(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes);
+void ff_merge16_neon(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes);
+void ff_merge8_armv6(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t bytes);
+void ff_merge16_armv6(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, size_t bytes);
+#endif
+
+static int config_props(AVFilterLink *link)
+{
+ AVFilterContext *ctx = link->src;
+ FastDeintContext *s = ctx->priv;
+ const AVPixFmtDescriptor *pix;
+#if ARCH_AARCH64 || ARCH_ARM
+ int cpu_flags = av_get_cpu_flags();
+#endif
+
+ link->w = link->src->inputs[0]->w;
+ link->h = link->src->inputs[0]->h;
+ link->time_base = link->src->inputs[0]->time_base;
+ link->frame_rate = link->src->inputs[0]->frame_rate;
+ link->sample_aspect_ratio = link->src->inputs[0]->sample_aspect_ratio;
+
+ if (s->mode == MODE_MEAN || s->mode == MODE_DISCARD) {
+ link->h /= 2;
+ link->sample_aspect_ratio = av_mul_q(link->sample_aspect_ratio, av_make_q(1, 2));
+ }
+ if (s->mode == MODE_LINEAR || s->mode == MODE_BOB) {
+ link->time_base = av_mul_q(link->time_base, av_make_q(1, 2));
+ link->frame_rate = av_mul_q(link->frame_rate, av_make_q(2, 1));
+ }
+
+ pix = av_pix_fmt_desc_get(link->format);
+ s->merge_size = (pix->comp[0].depth > 8) ? 16 : 8;
+ s->merge = s->merge_size == 16 ? (merge_fn)merge16 : (merge_fn)merge8;
+
+#if ARCH_ARM
+ if (have_armv6(cpu_flags)) {
+ s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_armv6 : (merge_fn)ff_merge8_armv6;
+ s->merge_aligned = 1;
+ }
+#endif
+#if ARCH_AARCH64 || ARCH_ARM
+ if (have_neon(cpu_flags)) {
+ s->merge = s->merge_size == 16 ? (merge_fn)ff_merge16_neon : (merge_fn)ff_merge8_neon;
+ s->merge_aligned = 1;
+ }
+#endif
+
+ return 0;
+}
+
+static int request_frame(AVFilterLink *link)
+{
+ AVFilterContext *ctx = link->src;
+ FastDeintContext *s = ctx->priv;
+ int ret;
+
+ if (s->eof)
+ return AVERROR_EOF;
+
+ ret = ff_request_frame(ctx->inputs[0]);
+
+ if (ret == AVERROR_EOF && s->cur) {
+ AVFrame *next = av_frame_clone(s->next);
+ if (!next)
+ return AVERROR(ENOMEM);
+
+ next->pts = s->next->pts * 2 - s->cur->pts;
+ filter_frame(ctx->inputs[0], next);
+ s->eof = 1;
+ } else if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+#define OFFSET(x) offsetof(FastDeintContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit }
+
+static const AVOption fastdeint_options[] = {
+ { "mode", "specify the deinterlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=MODE_BLEND}, 0, MODE_MAX-1, FLAGS, "mode" },
+ CONST("discard", "discard bottom frame", MODE_DISCARD, "mode"),
+ CONST("mean", "half resolution blender", MODE_MEAN, "mode"),
+ CONST("blend", "full resolution blender", MODE_BLEND, "mode"),
+ CONST("bob", "bob doubler", MODE_BOB, "mode"),
+ CONST("linear", "bob doubler with linear interpolation", MODE_LINEAR, "mode"),
+
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(fastdeint);
+
+static const AVFilterPad fastdeint_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = filter_frame,
+ },
+ { NULL }
+};
+
+static const AVFilterPad fastdeint_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = config_props,
+ .request_frame = request_frame
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_fastdeint = {
+ .name = "fastdeint",
+ .description = NULL_IF_CONFIG_SMALL("fast deinterlacing algorithms"),
+ .priv_size = sizeof(FastDeintContext),
+ .priv_class = &fastdeint_class,
+ .uninit = uninit,
+ .query_formats = query_formats,
+ .inputs = fastdeint_inputs,
+ .outputs = fastdeint_outputs,
+};
--
2.20.1
More information about the ffmpeg-devel
mailing list