[FFmpeg-devel] [PATCH v2] avfilter: add nonlinearstretch filter.
Nicolas George
george at nsup.org
Tue Jul 7 17:23:12 EEST 2020
Mathias Rasmussen (12020-06-25):
> ---
> Changelog | 1 +
> doc/filters.texi | 34 ++++
> libavfilter/Makefile | 1 +
> libavfilter/allfilters.c | 1 +
> libavfilter/version.h | 2 +-
> libavfilter/vf_nonlinearstretch.c | 268 +++++++++++++++++++++++++
> tests/fate/filter-video.mak | 3 +
> tests/ref/fate/filter-nonlinearstretch | 0
> 8 files changed, 309 insertions(+), 1 deletion(-)
> create mode 100644 libavfilter/vf_nonlinearstretch.c
> create mode 100644 tests/ref/fate/filter-nonlinearstretch
Just a quick preliminary review.
>
> diff --git a/Changelog b/Changelog
> index a60e7d2eb8..d6f0b0ac74 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest within each release,
> releases are sorted from youngest to oldest.
>
> version <next>:
> +- Nonlinear stretch filter
> - AudioToolbox output device
> - MacCaption demuxer
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 551604a143..5657814c13 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -13989,6 +13989,40 @@ Add temporal and uniform noise to input video:
> noise=alls=20:allf=t+u
> @end example
>
> + at section nonlinearstretch
> +
> +Nonlinear stretch video input frame.
> +
> +The filter stretches the input horizonatally to a given target width by gradually increasing
> +the stretching amount from the middle towards the sides.
> +
> +Commonly used to stretch videos from 4:3 to 16:9 aspect ratio.
> +
> +The filter accepts the following options:
> +
> + at table @option
> + at item width
> +Set output width.
I am ok with that for a first version, but it is not very convenient to
not be able to select the width not as a function of the existing
properties.
Also, please document the default value.
> + at item a
> +Set stretch factor exponent. Defaults to @code{2.0}.
> +A larger value retains more of the original proportions around center, while increasing stretching along the sides.
> +A value of 1.0 relaxes the filter to a linear stretch.
> + at item interpolate
> +Enable/disable linear interpolation. Enabled by default.
> + at end table
> +
> + at subsection Examples
> +
> +Stretch input width to 2560 with less stretching around the middle of the frame.
> + at example
> +nonlinearstretch=width=2560:a=2.5
> + at end example
> +
> +Stretch input width to 1920 with default stretch factor and no interpolation.
> + at example
> +nonlinearstretch=width=1920:interpolate=0
> + at end example
> +
> @section normalize
>
> Normalize RGB video (aka histogram stretching, contrast stretching).
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 5123540653..469c4ca66f 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -321,6 +321,7 @@ OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o ope
> OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o
> OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o
> OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o
> +OBJS-$(CONFIG_NONLINEARSTRETCH_FILTER) += vf_nonlinearstretch.o
> OBJS-$(CONFIG_NORMALIZE_FILTER) += vf_normalize.o
> OBJS-$(CONFIG_NULL_FILTER) += vf_null.o
> OBJS-$(CONFIG_OCR_FILTER) += vf_ocr.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 1183e40267..d7be2a2ca2 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -306,6 +306,7 @@ extern AVFilter ff_vf_nlmeans_opencl;
> extern AVFilter ff_vf_nnedi;
> extern AVFilter ff_vf_noformat;
> extern AVFilter ff_vf_noise;
> +extern AVFilter ff_vf_nonlinearstretch;
> extern AVFilter ff_vf_normalize;
> extern AVFilter ff_vf_null;
> extern AVFilter ff_vf_ocr;
> diff --git a/libavfilter/version.h b/libavfilter/version.h
> index 37015085fa..308fbe07c3 100644
> --- a/libavfilter/version.h
> +++ b/libavfilter/version.h
> @@ -30,7 +30,7 @@
> #include "libavutil/version.h"
>
> #define LIBAVFILTER_VERSION_MAJOR 7
> -#define LIBAVFILTER_VERSION_MINOR 86
> +#define LIBAVFILTER_VERSION_MINOR 87
> #define LIBAVFILTER_VERSION_MICRO 100
>
>
> diff --git a/libavfilter/vf_nonlinearstretch.c b/libavfilter/vf_nonlinearstretch.c
> new file mode 100644
> index 0000000000..f6c7f46bc6
> --- /dev/null
> +++ b/libavfilter/vf_nonlinearstretch.c
> @@ -0,0 +1,268 @@
> +/*
> + * Copyright (c) 2020 Mathias V. Rasmussen
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Nonlinear stretch filter
> + */
> +
> +#include "libavutil/pixdesc.h"
> +#include "libavutil/opt.h"
> +#include "avfilter.h"
> +#include "formats.h"
> +#include "internal.h"
> +#include "video.h"
> +#include "math.h"
> +
> +typedef struct NonlinearStretchContext {
> + const AVClass *class;
> + int target_width;
> + double exp;
> + int interpolate;
> +
> + int nb_planes;
> + int plane_width[2];
> + int plane_height[2];
> + double *plane_xmap[2];
> +} NonlinearStretchContext;
> +
> +typedef struct ThreadData {
> + AVFrame *in, *out;
> +} ThreadData;
> +
> +#define OFFSET(x) offsetof(NonlinearStretchContext, x)
> +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
> +static const AVOption nonlinearstretch_options[] = {
> + { "width", "set target width", OFFSET(target_width), AV_OPT_TYPE_INT, {.i64=0}, 0, UINT16_MAX, FLAGS },
> + { "a", "set exponential stretch factor", OFFSET(exp), AV_OPT_TYPE_DOUBLE, {.dbl=2.}, 1, 50, FLAGS },
> + { "interpolate", "perform linear interpolation", OFFSET(interpolate), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
> + { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(nonlinearstretch);
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> + // TODO: which formats are supported when using `av_pix_fmt_count_planes()` and vsub/hsub?
> + // Is there a way to know the number of planes used? E.g. is it always at most 3?
> + // Lastly, is it necessary to support RGB style formats? I assume ffmpeg is doing some form of conversion if not?
> + static const enum AVPixelFormat pix_fmts[] = {
> + AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
> + // AV_PIX_FMT_ARGB, AV_PIX_FMT_RGBA,
> + // AV_PIX_FMT_ABGR, AV_PIX_FMT_BGRA,
> + // AV_PIX_FMT_0RGB, AV_PIX_FMT_RGB0,
> + // AV_PIX_FMT_0BGR, AV_PIX_FMT_BGR0,
> + // AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
> + AV_PIX_FMT_GRAY8,
> + AV_PIX_FMT_YUV410P,
> + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P,
> + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P,
> + AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA420P,
> + AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_YUVA420P10LE,
> + AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUVA444P10LE,
> + AV_PIX_FMT_YUV420P12LE,
> + AV_PIX_FMT_YUV444P12LE,
> + AV_PIX_FMT_YUV444P16LE, AV_PIX_FMT_YUVA444P16LE,
> + AV_PIX_FMT_YUV420P16LE, AV_PIX_FMT_YUVA420P16LE,
> + AV_PIX_FMT_YUV444P9LE, AV_PIX_FMT_YUVA444P9LE,
> + AV_PIX_FMT_YUV420P9LE, AV_PIX_FMT_YUVA420P9LE,
> + AV_PIX_FMT_NONE
> + };
> +
> + AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
> + if (!fmts_list)
> + return AVERROR(ENOMEM);
> + return ff_set_common_formats(ctx, fmts_list);
> +}
> +
> +// non-linear mapping of target frame pixel
> +// x-coordinate to position in the source frame.
> +#define SIGN(x) (x < 0 ? -1 : 1)
> +static double stretch_x(int target_x, int target_width, int src_width, double exp)
> +{
> + double x = 2.0 * target_x / target_width - 1;
> +
> + double step = 0.5 + 0.5 * pow(fabs(x), exp) * SIGN(x);
> +
> + double src_x = target_x - step * (target_width - src_width);
> +
> + // large exponent and high stretch ratio
> + // can cause over- and underflow of the frame width
> + return av_clipd(src_x, 0, src_width - 1);
> +}
Could this be done with only integer arithmetic? Possibly using some
kind of limited power series to approximate the exponent?
Integer arithmetic makes the code bit-exact across architectures, which
makes testing much easier.
> +
> +static int config_props(AVFilterLink *outlink)
> +{
> + AVFilterContext *ctx = outlink->src;
> + NonlinearStretchContext *s = ctx->priv;
> + AVFilterLink *inlink = ctx->inputs[0];
> + int i, pt;
> + int plane_width_in[2];
> +
> + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
> + const uint8_t hsub = desc->log2_chroma_w;
> + const uint8_t vsub = desc->log2_chroma_h;
> +
> + // TODO: is it better to make the width option required? Instead of computing default.
> + outlink->w = s->target_width ? s->target_width : inlink->h * 16 / 9;
> + outlink->h = inlink->h;
> +
> + s->nb_planes = av_pix_fmt_count_planes(inlink->format);
> +
> + // initialize values for different plane types
> + s->plane_height[0] = outlink->h;
> + s->plane_width[0] = outlink->w;
> + plane_width_in[0] = inlink->w;
> + s->plane_height[1] = AV_CEIL_RSHIFT(outlink->h, vsub);
> + s->plane_width[1] = AV_CEIL_RSHIFT(outlink->w, hsub);
> + plane_width_in[1] = AV_CEIL_RSHIFT(inlink->w, hsub);
> +
> + for (pt = 0; pt < 2; pt++) {
> + s->plane_xmap[pt] = av_malloc_array(s->plane_width[pt], sizeof(*s->plane_xmap[pt]));
> +
> + if (!s->plane_xmap[pt])
> + return AVERROR(ENOMEM);
> +
> + // precompute mapping between input and output pixels on the x-axis
> + for (i = 0; i < s->plane_width[pt]; i++) {
> + double src_x = stretch_x(i, s->plane_width[pt], plane_width_in[pt], s->exp);
> + s->plane_xmap[pt][i] = src_x;
> + }
> + }
> +
> + return 0;
> +}
> +
> +// TODO: Should this be as is, inlined or defined
> +static double lerp(double v0, double v1, double t)
> +{
> + return v0 + t * (v1 - v0);
> +}
> +
> +// TODO: Should this inlined?
> +static inline void stretch_frame(int width, int height,
> + uint8_t *dst, int dst_linesize,
> + const uint8_t *src, int src_linesize,
> + double *xmap, int interpolate)
> +{
> + int i, x;
> + for (i = 0; i < height; i++) {
> + for (x = 0; x < width; x++) {
> + int px1 = floor(xmap[x]);
> + int px2 = ceil(xmap[x]);
> + dst[x] = interpolate
> + ? lerp(src[px1], src[px2], xmap[x] - px1)
> + : src[px1];
I remember people insisting on the possibility for bilinear
interpolation in similar cases.
> + }
> + dst += dst_linesize;
> + src += src_linesize;
> + }
> +}
> +
> +static int filter_slice(AVFilterContext *ctx, void *arg, int job, int nb_jobs)
> +{
> + NonlinearStretchContext *s = ctx->priv;
> + ThreadData *td = arg;
> + AVFrame *in = td->in;
> + AVFrame *out = td->out;
> + int plane;
> +
> + for (plane = 0; plane < s->nb_planes; plane++) {
> + const int plane_type = plane == 1 || plane == 2 ? 1 : 0;
> +
> + const int width = s->plane_width[plane_type];
> + const int height = s->plane_height[plane_type];
> +
> + const int slice_start = height * job / nb_jobs;
> + const int slice_end = height * (job+1) / nb_jobs;
> +
> + uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane];
> + const uint8_t *src = in->data[plane] + slice_start * in->linesize[plane];
> +
> + stretch_frame(width, (slice_end - slice_start),
> + dst, out->linesize[plane],
> + src, in->linesize[plane],
> + s->plane_xmap[plane_type], s->interpolate);
> + }
> +
> + return 0;
> +}
> +
> +static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> +{
> + AVFilterContext *ctx = inlink->dst;
> + AVFilterLink *outlink = ctx->outputs[0];
> + ThreadData td;
> +
> + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> + if (!out) {
> + av_frame_free(&in);
> + return AVERROR(ENOMEM);
> + }
> +
> + av_frame_copy_props(out, in);
> + out->width = outlink->w;
> + out->height = outlink->h;
> +
> + td.in = in;
> + td.out = out;
> + ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
> +
> + av_frame_free(&in);
> +
> + return ff_filter_frame(outlink, out);
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> + NonlinearStretchContext *s = ctx->priv;
> + av_freep(&s->plane_xmap[0]);
> + av_freep(&s->plane_xmap[1]);
> +}
> +
> +static const AVFilterPad nonlinearstretch_inputs[] = {
> + {
> + .name = "default",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .filter_frame = filter_frame,
> + },
> + { NULL }
> +};
> +
> +static const AVFilterPad nonlinearstretch_outputs[] = {
> + {
> + .name = "default",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .config_props = config_props,
> + },
> + { NULL }
> +};
> +
> +AVFilter ff_vf_nonlinearstretch = {
> + .name = "nonlinearstretch",
> + .description = NULL_IF_CONFIG_SMALL("Stretch input video nonlinearly."),
> + .priv_size = sizeof(NonlinearStretchContext),
> + .uninit = uninit,
> + .query_formats = query_formats,
> + .inputs = nonlinearstretch_inputs,
> + .outputs = nonlinearstretch_outputs,
> + .priv_class = &nonlinearstretch_class,
> + .flags = AVFILTER_FLAG_SLICE_THREADS | AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
> +};
> diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
> index 18fe4f7016..adbb31b33f 100644
> --- a/tests/fate/filter-video.mak
> +++ b/tests/fate/filter-video.mak
> @@ -555,6 +555,9 @@ fate-filter-hue4: CMD = video_filter "format=yuv422p10,perms=random,hue=h=18*n:s
> FATE_FILTER_VSYNTH-$(CONFIG_IDET_FILTER) += fate-filter-idet
> fate-filter-idet: CMD = framecrc -flags bitexact -idct simple -i $(SRC) -vf idet -frames:v 25 -flags +bitexact
>
> +FATE_FILTER_VSYNTH-$(call ALLYES, FORMAT_FILTER PERMS_FILTER NONLINEARSTRETCH_FILTER) += fate-filter-nonlinearstretch
> +fate-filter-nonlinearstretcH: CMD = video_filter "format=gbrp,perms=random,nonlinearstretch=width=1920" -frames:v 20
> +
> FATE_FILTER_VSYNTH-$(CONFIG_PAD_FILTER) += fate-filter-pad
> fate-filter-pad: CMD = video_filter "pad=iw*1.5:ih*1.5:iw*0.3:ih*0.2"
>
> diff --git a/tests/ref/fate/filter-nonlinearstretch b/tests/ref/fate/filter-nonlinearstretch
> new file mode 100644
> index 0000000000..e69de29bb2
I am confused: is the file empty? Or am I reading something wrong?
Regards,
--
Nicolas George
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20200707/c5d679a5/attachment.sig>
More information about the ffmpeg-devel
mailing list