[FFmpeg-devel] [PATCH] avfilter/vf_interlace: add complex vertcal lowpassfilter
James Almer
jamrial at gmail.com
Fri Mar 10 21:59:21 EET 2017
On 3/8/2017 1:58 PM, Thomas Mundt wrote:
> Hi,
>
> attached patch adds a complex (-1 2 6 2 -1) vertcal lowpassfilter to vf_interlace. This will better retain detail and reduce blurring compared to the existing (1 2 1) filter.
>
> Please comment.
>
>
> 0001-avfilter-vf_interlace-add-complex-vertcal-lowpassfil.patch
>
>
> From f157bc9b898d1215f8ec10b301720a9d9ff03c63 Mon Sep 17 00:00:00 2001
> From: Thomas Mundt <loudmax at yahoo.de>
> Date: Wed, 8 Mar 2017 17:33:18 +0100
> Subject: [PATCH] avfilter/vf_interlace: add complex vertcal lowpassfilter Add
> a complex (-1 2 6 2 -1) filter to reduce blurring compared to the existing (1
> 2 1) filter.
>
> Signed-off-by: Thomas Mundt <loudmax at yahoo.de>
> ---
> doc/filters.texi | 13 ++++++--
> libavfilter/interlace.h | 2 ++
> libavfilter/vf_interlace.c | 42 ++++++++++++++++++++++--
> libavfilter/x86/vf_interlace.asm | 65 +++++++++++++++++++++++++++++++++++++
> libavfilter/x86/vf_interlace_init.c | 17 +++++++---
> 5 files changed, 130 insertions(+), 9 deletions(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index b5265d9..0041d39 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -9109,8 +9109,17 @@ This determines whether the interlaced frame is taken from the even
> (tff - default) or odd (bff) lines of the progressive frame.
>
> @item lowpass
> -Enable (default) or disable the vertical lowpass filter to avoid twitter
> -interlacing and reduce moire patterns.
> +Vertical lowpass filter to avoid twitter interlacing and
> +reduce moire patterns.
> +
> + at table @samp
> + at item 0
> +Disable vertical lowpass filter
> + at item 1
> +Enable linear filter (default)
> + at item 2
> +Enable complex filter
> + at end table
> @end table
>
> @section kerndeint
> diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
> index da073ae..7ad457e 100644
> --- a/libavfilter/interlace.h
> +++ b/libavfilter/interlace.h
> @@ -51,6 +51,8 @@ typedef struct InterlaceContext {
> AVFrame *cur, *next; // the two frames from which the new one is obtained
> void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp,
> const uint8_t *srcp_above, const uint8_t *srcp_below);
> + void (*lowpass_line_complex)(uint8_t *dstp, ptrdiff_t linesize,
> + const uint8_t *srcp, int mref, int pref);
Why not keep a single prototype, passing mref and pref for both linear
and complex? You can calculate srcp_above and srcp_below for linear like
you're doing it for complex in both the c and asm versions.
In any case, mref and pref should be ptrdiff_t and not int.
> } InterlaceContext;
>
> void ff_interlace_init_x86(InterlaceContext *interlace);
> diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
> index efa3128..e8d5de4 100644
> --- a/libavfilter/vf_interlace.c
> +++ b/libavfilter/vf_interlace.c
> @@ -47,7 +47,7 @@ static const AVOption interlace_options[] = {
> { "bff", "bottom field first", 0,
> AV_OPT_TYPE_CONST, {.i64 = MODE_BFF }, INT_MIN, INT_MAX, .flags = FLAGS, .unit = "scan" },
> { "lowpass", "set vertical low-pass filter", OFFSET(lowpass),
> - AV_OPT_TYPE_BOOL, {.i64 = 1 }, 0, 1, .flags = FLAGS },
> + AV_OPT_TYPE_INT, {.i64 = 1 }, 0, 2, .flags = FLAGS },
Maybe add AV_OPT_TYPE_CONST values "off", "linear" and "complex".
> { NULL }
> };
>
> @@ -67,6 +67,24 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
> }
> }
>
> +static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
> + const uint8_t *srcp, int mref, int pref)
> +{
> + const uint8_t *srcp_above = srcp + mref;
> + const uint8_t *srcp_below = srcp + pref;
> + const uint8_t *srcp_above2 = srcp + mref * 2;
> + const uint8_t *srcp_below2 = srcp + pref * 2;
> + int i;
> + for (i = 0; i < linesize; i++) {
> + // this calculation is an integer representation of
> + // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
> + // '4 +' is for rounding.
> + dstp[i] = av_clip_uint8((4 + (srcp[i] << 2)
> + + ((srcp[i] + srcp_above[i] + srcp_below[i]) << 1)
> + - srcp_above2[i] - srcp_below2[i]) >> 3);
> + }
> +}
> +
> static const enum AVPixelFormat formats_supported[] = {
> AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
> AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P,
> @@ -116,7 +134,11 @@ static int config_out_props(AVFilterLink *outlink)
>
>
> if (s->lowpass) {
> - s->lowpass_line = lowpass_line_c;
> + if (s->lowpass == 1)
> + s->lowpass_line = lowpass_line_c;
> + else if (s->lowpass == 2)
> + s->lowpass_line_complex = lowpass_line_complex_c;
> +
> if (ARCH_X86)
> ff_interlace_init_x86(s);
> }
> @@ -150,7 +172,7 @@ static void copy_picture_field(InterlaceContext *s,
> srcp += src_frame->linesize[plane];
> dstp += dst_frame->linesize[plane];
> }
> - if (lowpass) {
> + if (lowpass == 1) {
> int srcp_linesize = src_frame->linesize[plane] * 2;
> int dstp_linesize = dst_frame->linesize[plane] * 2;
> for (j = lines; j > 0; j--) {
> @@ -164,6 +186,20 @@ static void copy_picture_field(InterlaceContext *s,
> dstp += dstp_linesize;
> srcp += srcp_linesize;
> }
> + } else if (lowpass == 2) {
> + int srcp_linesize = src_frame->linesize[plane] * 2;
> + int dstp_linesize = dst_frame->linesize[plane] * 2;
> + for (j = lines; j > 0; j--) {
> + int pref = src_frame->linesize[plane];
> + int mref = -pref;
> + if (j >= (lines - 1))
> + mref = 0;
> + else if (j <= 2)
> + pref = 0;
> + s->lowpass_line_complex(dstp, cols, srcp, mref, pref);
> + dstp += dstp_linesize;
> + srcp += srcp_linesize;
> + }
> } else {
> av_image_copy_plane(dstp, dst_frame->linesize[plane] * 2,
> srcp, src_frame->linesize[plane] * 2,
> diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
> index f70c700..777a95f 100644
> --- a/libavfilter/x86/vf_interlace.asm
> +++ b/libavfilter/x86/vf_interlace.asm
> @@ -25,6 +25,8 @@
>
> SECTION_RODATA
>
> +pw_4: times 8 dw 4
> +
> SECTION .text
>
> %macro LOWPASS_LINE 0
> @@ -56,6 +58,63 @@ cglobal lowpass_line, 5, 5, 7
> add r1, 2*mmsize
> jl .loop
> REP_RET
> +
> +%endmacro
> +
> +%macro LOWPASS_LINE_COMPLEX 0
> +cglobal lowpass_line_complex, 3, 5, 7, dst, h, src, mref, pref
Make this 5, 5, 7 and remove the two movs below.
> + mov r3, mrefmp
> + mov r4, prefmp
> +
> + pxor m6, m6
> +.loop:
> + mova m0, [srcq+r3]
[srcq+mrefq]
> + mova m2, [srcq+r4]
[srcq+prefq]
> + mova m1, m0
> + mova m3, m2
> + punpcklbw m0, m6
> + punpcklbw m2, m6
> + punpckhbw m1, m6
> + punpckhbw m3, m6
> + paddw m0, m2
> + paddw m1, m3
> + mova m2, [srcq+r3*2]
[srcq+mrefq*2]
> + mova m4, [srcq+r4*2]
[srcq+prefq*2]
> + mova m3, m2
> + mova m5, m4
> + punpcklbw m2, m6
> + punpcklbw m4, m6
> + punpckhbw m3, m6
> + punpckhbw m5, m6
> + paddw m2, m4
> + paddw m3, m5
> + mova m4, [srcq]
> + mova m5, m4
> + punpcklbw m4, m6
> + punpckhbw m5, m6
> + paddw m0, m4
> + paddw m1, m5
> + psllw m0, 1
> + psllw m1, 1
> + psllw m4, 2
> + psllw m5, 2
> + paddw m0, m4
> + paddw m1, m5
> + paddw m0, [pw_4]
> + paddw m1, [pw_4]
> + psubusw m0, m2
> + psubusw m1, m3
> + psrlw m0, 3
> + psrlw m1, 3
> + packuswb m0, m1
> + mova [dstq], m0
> +
> + add dstq, mmsize
> + add srcq, mmsize
> + sub DWORD hm, mmsize
sub hd, mmsize
> + jg .loop
> +REP_RET
> +
> %endmacro
>
> INIT_XMM sse2
> @@ -63,3 +122,9 @@ LOWPASS_LINE
>
> INIT_XMM avx
> LOWPASS_LINE
> +
> +INIT_XMM sse2
> +LOWPASS_LINE_COMPLEX
> +
> +INIT_XMM avx
> +LOWPASS_LINE_COMPLEX
There's no reason to add an AVX version. You're not taking advantage of
the 3 operand instruction format, or any of the new instructions, or the
32 byte regs.
If you merge some of the movas with punpck* instructions and can notice
a difference when benching, then AVX would make sense.
> diff --git a/libavfilter/x86/vf_interlace_init.c b/libavfilter/x86/vf_interlace_init.c
> index 52a22f8..947ff9a 100644
> --- a/libavfilter/x86/vf_interlace_init.c
> +++ b/libavfilter/x86/vf_interlace_init.c
> @@ -35,12 +35,21 @@ void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
> const uint8_t *srcp_above,
> const uint8_t *srcp_below);
>
> +void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
> + const uint8_t *srcp, int mref, int pref);
> +void ff_lowpass_line_complex_avx (uint8_t *dstp, ptrdiff_t linesize,
> + const uint8_t *srcp, int mref, int pref);
> +
> av_cold void ff_interlace_init_x86(InterlaceContext *s)
> {
> int cpu_flags = av_get_cpu_flags();
>
> - if (EXTERNAL_SSE2(cpu_flags))
> - s->lowpass_line = ff_lowpass_line_sse2;
> - if (EXTERNAL_AVX(cpu_flags))
> - s->lowpass_line = ff_lowpass_line_avx;
> + if (EXTERNAL_SSE2(cpu_flags)) {
> + s->lowpass_line = ff_lowpass_line_sse2;
> + s->lowpass_line_complex = ff_lowpass_line_complex_sse2;
> + }
> + if (EXTERNAL_AVX(cpu_flags)) {
> + s->lowpass_line = ff_lowpass_line_avx;
> + s->lowpass_line_complex = ff_lowpass_line_complex_avx;
> + }
> }
> -- 2.7.4.windows.1
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
More information about the ffmpeg-devel
mailing list