[FFmpeg-devel] [PATCH v06 5/5] fbdetile videofilter cpu based framebuffer detiling
hanishkvc
hanishkvc at gmail.com
Sat Jul 4 16:17:17 EEST 2020
This adds a video filter called fbdetile, which allows the user
to detile framebuffer layout into a linear layout, if required.
It uses the fbtile helper routines to achieve the detiling.
This is useful, if
a) the user doesnt want to apply detiling when capturing some
content/framebuffer which is tiled. OR
b) the user already has tiled content with them. OR
c) a developer wants to experiment with tiled data.
---
Changelog | 1 +
doc/filters.texi | 78 +++++++++++++
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_fbdetile.c | 238 ++++++++++++++++++++++++++++++++++++++
5 files changed, 319 insertions(+)
create mode 100644 libavfilter/vf_fbdetile.c
diff --git a/Changelog b/Changelog
index 6174770ce1..a4e098f94f 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest within each release,
releases are sorted from youngest to oldest.
version <next>:
+- fbdetile cpu based framebuffer layout detiling video filter
- hwdownload framebuffer layout detiling (Intel tile-x|y|yf layouts)
- hwcontext_drm detiles non linear layouts, if possible
- kmsgrab GetFB2 format_modifier, if user doesnt specify
diff --git a/doc/filters.texi b/doc/filters.texi
index c783e059c2..4ff8b7edc4 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -12229,6 +12229,84 @@ It accepts the following optional parameters:
The number of the CUDA device to use
@end table
+ at anchor{fbdetile}
+ at section fbdetile
+
+Detiles the Framebuffer tile layout into a linear layout using CPU.
+
+It currently supports conversion from Intel legacy tile-x and tile-y as well
+as the newer Intel tile-yf layouts into a linear layout. This is useful if
+one is using kmsgrab and hwdownload to capture a screen which is using one
+of these non-linear layouts.
+
+NOTE: It also provides a generic detiling logic, which can be easily configured
+to detile many different tiling schemes if required, in future. The same is
+used for detiling the intel tile-yf layout. Also sample configuration to handle
+intel tile-x and tile-y using generic detile logic is also shown for reference,
+in the code.
+
+Currently it expects the data to be a 32bit RGB based pixel format. However
+the logic doesnt do any pixel format conversion or so. Later will be enabling
+16bit RGB data also, as the logic is transparent to it at one level.
+
+One could either insert this into the filter chain while capturing itself,
+or else, if it is slowing things down or so, then one could instead insert
+it into the filter chain during playback or transcoding or so.
+
+It supports the following optional parameters
+
+ at table @option
+ at item type
+Specify which detiling conversion to apply. The supported values are
+ at table @var
+ at item 0
+Dont do detiling.
+ at item 1
+Auto detect detile logic to apply (supported in vf_hwdownload, not in vf_fbdetile).
+ at item 2
+intel tile-x to linear conversion (the default).
+ at item 3
+intel tile-y to linear conversion.
+ at item 4
+intel tile-yf to linear conversion.
+ at end table
+ at end table
+
+If one wants to convert during capture itself, one could do
+ at example
+ffmpeg -f kmsgrab -i - -vf "hwdownload,format=bgr0,fbdetile" OUTPUT
+ at end example
+
+However if one wants to convert after the tiled data has been already captured
+ at example
+ffmpeg -i INPUT -vf "fbdetile" OUTPUT
+ at end example
+ at example
+ffplay -i INPUT -vf "fbdetile"
+ at end example
+
+NOTE: While transcoding a test 1080p h264 stream, with 276 frames, below was
+the average times taken by the different detile logics.
+ at example
+rm out.mp4; time ./ffmpeg -i input.mp4 out.mp4
+rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbdetile=2 out.mp4
+rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbdetile=3 out.mp4
+rm out.mp4; time ./ffmpeg -i input.mp4 -vf fbdetile=4 out.mp4
+ at end example
+ at table @option
+ at item with no fbdetile filter
+it took ~7.28 secs, i5-8th Gen
+it took ~10.1 secs, i7-7th Gen
+ at item with fbdetile=2 filter, Intel Tile-X
+it took ~8.69 secs, i5-8th Gen
+it took ~13.3 secs, i7-7th Gen
+ at item with fbdetile=3 filter, Intel Tile-Y
+it took ~9.20 secs. i5-8th Gen
+it took ~13.5 secs. i7-7th Gen
+ at item with fbdetile=4 filter, Intel Tile-Yf
+it took ~13.8 secs. i7-7th Gen
+ at end table
+
@section hqx
Apply a high-quality magnification filter designed for pixel art. This filter
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 5123540653..bdb0c379ae 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -280,6 +280,7 @@ OBJS-$(CONFIG_HWDOWNLOAD_FILTER) += vf_hwdownload.o
OBJS-$(CONFIG_HWMAP_FILTER) += vf_hwmap.o
OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER) += vf_hwupload_cuda.o
OBJS-$(CONFIG_HWUPLOAD_FILTER) += vf_hwupload.o
+OBJS-$(CONFIG_FBDETILE_FILTER) += vf_fbdetile.o
OBJS-$(CONFIG_HYSTERESIS_FILTER) += vf_hysteresis.o framesync.o
OBJS-$(CONFIG_IDET_FILTER) += vf_idet.o
OBJS-$(CONFIG_IL_FILTER) += vf_il.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 1183e40267..f8dceb2a88 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -265,6 +265,7 @@ extern AVFilter ff_vf_hwdownload;
extern AVFilter ff_vf_hwmap;
extern AVFilter ff_vf_hwupload;
extern AVFilter ff_vf_hwupload_cuda;
+extern AVFilter ff_vf_fbdetile;
extern AVFilter ff_vf_hysteresis;
extern AVFilter ff_vf_idet;
extern AVFilter ff_vf_il;
diff --git a/libavfilter/vf_fbdetile.c b/libavfilter/vf_fbdetile.c
new file mode 100644
index 0000000000..bfc28da465
--- /dev/null
+++ b/libavfilter/vf_fbdetile.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2020 HanishKVC
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Detile the Frame buffer's tile layout using the cpu
+ * Currently it supports detiling of following layouts
+ * legacy Intel Tile-X
+ * legacy Intel Tile-Y
+ * newer Intel Tile-Yf
+ * More tiling layouts can be easily supported by adding configuration data
+ * for the generic detile logic, wrt the required tiling schemes.
+ *
+ */
+
+/*
+ * ToThink|Check: Optimisations
+ *
+ * Does gcc setting used by ffmpeg allows memcpy | stringops inlining,
+ * loop unrolling, better native matching instructions, additional
+ * optimisations, ...
+ *
+ * Does gcc map to optimal memcpy logic, based on the situation it is
+ * used in i.e like
+ * based on size of transfer, alignment, architecture, etc
+ * a suitable combination of inlining and or rep movsb and or
+ * simd load/store and or unrolling and or ...
+ *
+ * If not, may be look at vector_size or intrinsics or appropriate arch
+ * and cpu specific inline asm or ...
+ *
+ */
+
+/*
+ * Performance check results on i7-7500u
+ * TileYf, TileGX, TileGY using detile_generic_opti
+ * This mainly impacts TileYf, due to its deeper subtiling
+ * Without opti, its TSCCnt rises to aroun 11.XYM
+ * Run Type : Type : Seconds Max, Min : TSCCnt Min, Max
+ * Non filter run: : 10.11s, 09.96s :
+ * fbdetile=2 run: TileX : 13.45s, 13.20s : 05.95M, 06.10M
+ * fbdetile=3 run: TileY : 13.50s, 13.39s : 06.22M, 06.39M
+ * fbdetile=4 run: TileYf : 13.75s, 13.63s : 09.82M, 09.90M
+ * fbdetile=5 run: TileGX : 13.70s, 13.32s : 06.15M, 06.24M
+ * fbdetile=6 run: TileGY : 14.12s, 13.57s : 08.75M, 09.10M
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/fbtile.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+// Use Optimised detile_generic or the Simpler but more fine grained one
+#define DETILE_GENERIC_OPTI 1
+// Enable printing of the tile walk
+#undef DEBUG_FBTILE
+// Print time taken by detile using performance counter
+#if ARCH_X86
+#define DEBUG_PERF 1
+#else
+#undef DEBUG_PERF
+#endif
+
+#ifdef DEBUG_PERF
+#include <x86intrin.h>
+uint64_t perfTime = 0;
+int perfCnt = 0;
+#endif
+
+typedef struct FBDetileContext {
+ const AVClass *class;
+ int width, height;
+ int type;
+} FBDetileContext;
+
+#define OFFSET(x) offsetof(FBDetileContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption fbdetile_options[] = {
+ { "type", "set framebuffer tile|format_modifier conversion type", OFFSET(type), AV_OPT_TYPE_INT, {.i64=TILE_INTELX}, 0, TILE_NONE_END-1, FLAGS, "type" },
+ { "None", "Dont detile", 0, AV_OPT_TYPE_CONST, {.i64=TILE_NONE}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "Auto", "Auto detect tile conversion type, NotImplemented", 0, AV_OPT_TYPE_CONST, {.i64=TILE_AUTO}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "intelx", "Intel Tile-X layout", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELX}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "intely", "Intel Tile-Y layout", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELY}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "intelyf", "Intel Tile-Yf layout", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELYF}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "intelgx", "Intel Tile-X layout, GenericDetile", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELGX}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { "intelgy", "Intel Tile-Y layout, GenericDetile", 0, AV_OPT_TYPE_CONST, {.i64=TILE_INTELGY}, INT_MIN, INT_MAX, FLAGS, "type" },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(fbdetile);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+ FBDetileContext *fbdetile = ctx->priv;
+
+ if (fbdetile->type == TILE_NONE) {
+ av_log(ctx, AV_LOG_INFO, "init: Wont detile, pass through\n");
+ } else if (fbdetile->type == TILE_AUTO) {
+ av_log(ctx, AV_LOG_WARNING, "init: Auto detile mode detect, not supported, pass through\n");
+ fbdetile->type = TILE_NONE;
+ } else if (fbdetile->type == TILE_INTELX) {
+ av_log(ctx, AV_LOG_INFO, "init: Intel tile-x to linear\n");
+ } else if (fbdetile->type == TILE_INTELY) {
+ av_log(ctx, AV_LOG_INFO, "init: Intel tile-y to linear\n");
+ } else if (fbdetile->type == TILE_INTELYF) {
+ av_log(ctx, AV_LOG_INFO, "init: Intel tile-yf to linear\n");
+ } else if (fbdetile->type == TILE_INTELGX) {
+ av_log(ctx, AV_LOG_INFO, "init: Intel tile-x to linear, using generic detile\n");
+ } else if (fbdetile->type == TILE_INTELGY) {
+ av_log(ctx, AV_LOG_INFO, "init: Intel tile-y to linear, using generic detile\n");
+ } else {
+ av_log(ctx, AV_LOG_ERROR, "init: Unknown Tile format specified, shouldnt reach here\n");
+ }
+ fbdetile->width = 1920;
+ fbdetile->height = 1080;
+ return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+ AVFilterFormats *fmts_list;
+
+ fmts_list = ff_make_format_list(fbtilePixFormats);
+ if (!fmts_list)
+ return AVERROR(ENOMEM);
+ return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int config_props(AVFilterLink *inlink)
+{
+ AVFilterContext *ctx = inlink->dst;
+ FBDetileContext *fbdetile = ctx->priv;
+
+ fbdetile->width = inlink->w;
+ fbdetile->height = inlink->h;
+ av_log(ctx, AV_LOG_INFO, "config_props: %d x %d\n", fbdetile->width, fbdetile->height);
+
+ return 0;
+}
+
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+ AVFilterContext *ctx = inlink->dst;
+ FBDetileContext *fbdetile = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AVFrame *out;
+
+ if (fbdetile->type == TILE_NONE)
+ return ff_filter_frame(outlink, in);
+
+ out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ av_frame_free(&in);
+ return AVERROR(ENOMEM);
+ }
+ av_frame_copy_props(out, in);
+
+#ifdef DEBUG_PERF
+ uint64_t perfStart = __rdtsc();
+#endif
+
+ detile_this(fbdetile->type, 0, fbdetile->width, fbdetile->height,
+ out->data[0], out->linesize[0],
+ in->data[0], in->linesize[0], 4);
+
+#ifdef DEBUG_PERF
+ uint64_t perfEnd = __rdtsc();
+ perfTime += (perfEnd - perfStart);
+ perfCnt += 1;
+#endif
+
+ av_frame_free(&in);
+ return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+#ifdef DEBUG_PERF
+ if (perfCnt == 0)
+ perfCnt = 1;
+ av_log(ctx, AV_LOG_INFO, "uninit:perf: AvgTSCCnt %ld\n", perfTime/perfCnt);
+#endif
+}
+
+static const AVFilterPad fbdetile_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = config_props,
+ .filter_frame = filter_frame,
+ },
+ { NULL }
+};
+
+static const AVFilterPad fbdetile_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_fbdetile = {
+ .name = "fbdetile",
+ .description = NULL_IF_CONFIG_SMALL("Detile Framebuffer using CPU"),
+ .priv_size = sizeof(FBDetileContext),
+ .init = init,
+ .uninit = uninit,
+ .query_formats = query_formats,
+ .inputs = fbdetile_inputs,
+ .outputs = fbdetile_outputs,
+ .priv_class = &fbdetile_class,
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+};
+
+// vim: set expandtab sts=4: //
--
2.25.1
More information about the ffmpeg-devel
mailing list