[FFmpeg-devel] [PATCH v06 2/5] fbtile helperRoutines cpu based framebuffer detiling

hanishkvc hanishkvc at gmail.com
Sat Jul 4 16:17:14 EEST 2020


Add helper routines which can be used to detile tiled framebuffer
layouts into a linear layout, using the cpu.

Currently it supports Legacy Intel Tile-X, Legacy Intel Tile-Y and
Newer Intel Tile-Yf tiled layouts.

Currently supported pixel format is 32bit RGB.

It also contains detile_generic logic, which can be easily configured
to support different kinds of tiling layouts, at the expense of some
processing speed, compared to developing a targeted detiling logic.
---
 libavutil/Makefile |   2 +
 libavutil/fbtile.c | 441 +++++++++++++++++++++++++++++++++++++++++++++
 libavutil/fbtile.h | 228 +++++++++++++++++++++++
 3 files changed, 671 insertions(+)
 create mode 100644 libavutil/fbtile.c
 create mode 100644 libavutil/fbtile.h

diff --git a/libavutil/Makefile b/libavutil/Makefile
index 9b08372eb2..9b58ac5980 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -84,6 +84,7 @@ HEADERS = adler32.h                                                     \
           xtea.h                                                        \
           tea.h                                                         \
           tx.h                                                          \
+          fbtile.h                                                      \
 
 HEADERS-$(CONFIG_LZO)                   += lzo.h
 
@@ -169,6 +170,7 @@ OBJS = adler32.o                                                        \
        tx_float.o                                                       \
        tx_double.o                                                      \
        tx_int32.o                                                       \
+       fbtile.o                                                         \
        video_enc_params.o                                               \
 
 
diff --git a/libavutil/fbtile.c b/libavutil/fbtile.c
new file mode 100644
index 0000000000..ca04f0a7d2
--- /dev/null
+++ b/libavutil/fbtile.c
@@ -0,0 +1,441 @@
+/*
+ * CPU based Framebuffer Tile DeTile logic
+ * Copyright (c) 2020 C Hanish Menon <HanishKVC>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "avutil.h"
+#include "common.h"
+#include "fbtile.h"
+#if CONFIG_LIBDRM
+#include <drm_fourcc.h>
+#endif
+
+
+int fbtilemode_from_formatmodifier(uint64_t formatModifier)
+{
+    int mode = TILE_NONE_END;
+
+#if CONFIG_LIBDRM
+    switch(formatModifier) {
+        case DRM_FORMAT_MOD_LINEAR:
+            mode = TILE_NONE;
+            break;
+        case I915_FORMAT_MOD_X_TILED:
+            mode = TILE_INTELX;
+            break;
+        case I915_FORMAT_MOD_Y_TILED:
+            mode = TILE_INTELY;
+            break;
+        case I915_FORMAT_MOD_Yf_TILED:
+            mode = TILE_INTELYF;
+            break;
+        default:
+            mode = TILE_NONE_END;
+            break;
+    }
+#endif
+#ifdef DEBUG_FBTILE_FORMATMODIFIER_MAPPING
+    av_log(NULL, AV_LOG_DEBUG, "fbtile:formatmodifier[%lx] mapped to mode[%d]\n", formatModifier, mode);
+#endif
+    return mode;
+}
+
+
+/**
+ * Supported pixel formats
+ * Currently only RGB based 32bit formats are specified
+ * TODO: Technically the logic is transparent to 16bit RGB formats also to a great extent
+ */
+const enum AVPixelFormat fbtilePixFormats[] = {AV_PIX_FMT_RGB0, AV_PIX_FMT_0RGB, AV_PIX_FMT_BGR0, AV_PIX_FMT_0BGR,
+                                               AV_PIX_FMT_RGBA, AV_PIX_FMT_ARGB, AV_PIX_FMT_BGRA, AV_PIX_FMT_ABGR,
+                                               AV_PIX_FMT_NONE};
+
+int fbtile_checkpixformats(const enum AVPixelFormat srcPixFormat, const enum AVPixelFormat dstPixFormat)
+{
+    int okSrc = 0;
+    int okDst = 0;
+    for (int i = 0; fbtilePixFormats[i] != AV_PIX_FMT_NONE; i++) {
+        if (fbtilePixFormats[i] == srcPixFormat)
+            okSrc = 1;
+        if (fbtilePixFormats[i] == dstPixFormat)
+            okDst = 1;
+    }
+    return (okSrc && okDst);
+}
+
+
+void detile_intelx(int w, int h,
+                          uint8_t *dst, int dstLineSize,
+                          const uint8_t *src, int srcLineSize)
+{
+    // Offsets and LineSize are in bytes
+    const int pixBytes = 4;                     // bytes per pixel
+    const int tileW = 128;                      // tileWidth inPixels, 512/4, For a 32Bits/Pixel framebuffer
+    const int tileH = 8;                        // tileHeight inPixelLines
+    const int tileWBytes = tileW*pixBytes;      // tileWidth inBytes
+
+    if (w*pixBytes != srcLineSize) {
+        av_log(NULL, AV_LOG_ERROR, "fbdetile:intelx: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+        av_log(NULL, AV_LOG_ERROR, "fbdetile:intelx: dont support LineSize | Pitch going beyond width\n");
+    }
+    int sO = 0;                 // srcOffset inBytes
+    int dX = 0;                 // destX inPixels
+    int dY = 0;                 // destY inPixels
+    int nTLines = (w*h)/tileW;  // numTileLines; One TileLine = One TileWidth
+    int cTL = 0;                // curTileLine
+    while (cTL < nTLines) {
+        int dO = dY*dstLineSize + dX*pixBytes;
+#ifdef DEBUG_FBTILE
+        av_log(NULL, AV_LOG_DEBUG, "fbdetile:intelx: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+        memcpy(dst+dO+0*dstLineSize, src+sO+0*tileWBytes, tileWBytes);
+        memcpy(dst+dO+1*dstLineSize, src+sO+1*tileWBytes, tileWBytes);
+        memcpy(dst+dO+2*dstLineSize, src+sO+2*tileWBytes, tileWBytes);
+        memcpy(dst+dO+3*dstLineSize, src+sO+3*tileWBytes, tileWBytes);
+        memcpy(dst+dO+4*dstLineSize, src+sO+4*tileWBytes, tileWBytes);
+        memcpy(dst+dO+5*dstLineSize, src+sO+5*tileWBytes, tileWBytes);
+        memcpy(dst+dO+6*dstLineSize, src+sO+6*tileWBytes, tileWBytes);
+        memcpy(dst+dO+7*dstLineSize, src+sO+7*tileWBytes, tileWBytes);
+        dX += tileW;
+        if (dX >= w) {
+            dX = 0;
+            dY += tileH;
+        }
+        sO = sO + tileW*tileH*pixBytes;
+        cTL += tileH;
+    }
+}
+
+
+/*
+ * Intel Legacy Tile-Y layout conversion support
+ *
+ * currently done in a simple dumb way. Two low hanging optimisations
+ * that could be readily applied are
+ *
+ * a) unrolling the inner for loop
+ *    --- Given small size memcpy, should help, DONE
+ *
+ * b) using simd based 128bit loading and storing along with prefetch
+ *    hinting.
+ *
+ *    TOTHINK|CHECK: Does memcpy already does this and more if situation
+ *    is right?!
+ *
+ *    As code (or even intrinsics) would be specific to each architecture,
+ *    avoiding for now. Later have to check if vector_size attribute and
+ *    corresponding implementation by gcc can handle different architectures
+ *    properly, such that it wont become worse than memcpy provided for that
+ *    architecture.
+ *
+ * Or maybe I could even merge the two intel detiling logics into one, as
+ * the semantic and flow is almost same for both logics.
+ *
+ */
+void detile_intely(int w, int h,
+                          uint8_t *dst, int dstLineSize,
+                          const uint8_t *src, int srcLineSize)
+{
+    // Offsets and LineSize are in bytes
+    const int pixBytes = 4;                 // bytesPerPixel
+    // tileW represents subTileWidth here, as it can be repeated to fill a tile
+    const int tileW = 4;                    // tileWidth inPixels, 16/4, For a 32Bits/Pixel framebuffer
+    const int tileH = 32;                   // tileHeight inPixelLines
+    const int tileWBytes = tileW*pixBytes;  // tileWidth inBytes
+
+    if (w*pixBytes != srcLineSize) {
+        av_log(NULL, AV_LOG_ERROR, "fbdetile:intely: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+        av_log(NULL, AV_LOG_ERROR, "fbdetile:intely: dont support LineSize | Pitch going beyond width\n");
+    }
+    int sO = 0;
+    int dX = 0;
+    int dY = 0;
+    const int nTLines = (w*h)/tileW;
+    int cTL = 0;
+    while (cTL < nTLines) {
+        int dO = dY*dstLineSize + dX*pixBytes;
+#ifdef DEBUG_FBTILE
+        av_log(NULL, AV_LOG_DEBUG, "fbdetile:intely: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+
+        memcpy(dst+dO+0*dstLineSize, src+sO+0*tileWBytes, tileWBytes);
+        memcpy(dst+dO+1*dstLineSize, src+sO+1*tileWBytes, tileWBytes);
+        memcpy(dst+dO+2*dstLineSize, src+sO+2*tileWBytes, tileWBytes);
+        memcpy(dst+dO+3*dstLineSize, src+sO+3*tileWBytes, tileWBytes);
+        memcpy(dst+dO+4*dstLineSize, src+sO+4*tileWBytes, tileWBytes);
+        memcpy(dst+dO+5*dstLineSize, src+sO+5*tileWBytes, tileWBytes);
+        memcpy(dst+dO+6*dstLineSize, src+sO+6*tileWBytes, tileWBytes);
+        memcpy(dst+dO+7*dstLineSize, src+sO+7*tileWBytes, tileWBytes);
+        memcpy(dst+dO+8*dstLineSize, src+sO+8*tileWBytes, tileWBytes);
+        memcpy(dst+dO+9*dstLineSize, src+sO+9*tileWBytes, tileWBytes);
+        memcpy(dst+dO+10*dstLineSize, src+sO+10*tileWBytes, tileWBytes);
+        memcpy(dst+dO+11*dstLineSize, src+sO+11*tileWBytes, tileWBytes);
+        memcpy(dst+dO+12*dstLineSize, src+sO+12*tileWBytes, tileWBytes);
+        memcpy(dst+dO+13*dstLineSize, src+sO+13*tileWBytes, tileWBytes);
+        memcpy(dst+dO+14*dstLineSize, src+sO+14*tileWBytes, tileWBytes);
+        memcpy(dst+dO+15*dstLineSize, src+sO+15*tileWBytes, tileWBytes);
+        memcpy(dst+dO+16*dstLineSize, src+sO+16*tileWBytes, tileWBytes);
+        memcpy(dst+dO+17*dstLineSize, src+sO+17*tileWBytes, tileWBytes);
+        memcpy(dst+dO+18*dstLineSize, src+sO+18*tileWBytes, tileWBytes);
+        memcpy(dst+dO+19*dstLineSize, src+sO+19*tileWBytes, tileWBytes);
+        memcpy(dst+dO+20*dstLineSize, src+sO+20*tileWBytes, tileWBytes);
+        memcpy(dst+dO+21*dstLineSize, src+sO+21*tileWBytes, tileWBytes);
+        memcpy(dst+dO+22*dstLineSize, src+sO+22*tileWBytes, tileWBytes);
+        memcpy(dst+dO+23*dstLineSize, src+sO+23*tileWBytes, tileWBytes);
+        memcpy(dst+dO+24*dstLineSize, src+sO+24*tileWBytes, tileWBytes);
+        memcpy(dst+dO+25*dstLineSize, src+sO+25*tileWBytes, tileWBytes);
+        memcpy(dst+dO+26*dstLineSize, src+sO+26*tileWBytes, tileWBytes);
+        memcpy(dst+dO+27*dstLineSize, src+sO+27*tileWBytes, tileWBytes);
+        memcpy(dst+dO+28*dstLineSize, src+sO+28*tileWBytes, tileWBytes);
+        memcpy(dst+dO+29*dstLineSize, src+sO+29*tileWBytes, tileWBytes);
+        memcpy(dst+dO+30*dstLineSize, src+sO+30*tileWBytes, tileWBytes);
+        memcpy(dst+dO+31*dstLineSize, src+sO+31*tileWBytes, tileWBytes);
+
+        dX += tileW;
+        if (dX >= w) {
+            dX = 0;
+            dY += tileH;
+        }
+        sO = sO + tileW*tileH*pixBytes;
+        cTL += tileH;
+    }
+}
+
+
+/*
+ * Generic detile logic
+ */
+
+/*
+ * Direction Change Entry
+ * Used to specify the tile walking of subtiles within a tile.
+ */
+/**
+ * Settings for Intel Tile-Yf framebuffer layout.
+ * May need to swap the 4 pixel wide subtile, have to check doc bit more
+ */
+const int tyfBytesPerPixel = 4;
+const int tyfSubTileWidth = 4;
+const int tyfSubTileHeight = 8;
+const int tyfSubTileWidthBytes = tyfSubTileWidth*tyfBytesPerPixel; //16
+const int tyfTileWidth = 32;
+const int tyfTileHeight = 32;
+const int tyfNumDirChanges = 6;
+struct dirChange tyfDirChanges[] = { {8, 4, 0}, {16, -4, 8}, {32, 4, -8}, {64, -12, 8 }, {128, 4, -24}, {256, 4, -24} };
+
+/**
+ * Setting for Intel Tile-X framebuffer layout
+ */
+const int txBytesPerPixel = 4;
+const int txSubTileWidth = 128;
+const int txSubTileHeight = 8;
+const int txSubTileWidthBytes = txSubTileWidth*txBytesPerPixel; //512
+const int txTileWidth = 128;
+const int txTileHeight = 8;
+const int txNumDirChanges = 1;
+struct dirChange txDirChanges[] = { {8, 128, 0} };
+
+/**
+ * Setting for Intel Tile-Y framebuffer layout
+ * Even thou a simple generic detiling logic doesnt require the
+ * dummy 256 posOffset entry. The pseudo parallel detiling based
+ * opti logic requires to know about the Tile boundry.
+ */
+const int tyBytesPerPixel = 4;
+const int tySubTileWidth = 4;
+const int tySubTileHeight = 32;
+const int tySubTileWidthBytes = tySubTileWidth*tyBytesPerPixel; //16
+const int tyTileWidth = 32;
+const int tyTileHeight = 32;
+const int tyNumDirChanges = 2;
+struct dirChange tyDirChanges[] = { {32, 4, 0}, {256, 4, 0} };
+
+
+void detile_generic_simple(int w, int h,
+                                  uint8_t *dst, int dstLineSize,
+                                  const uint8_t *src, int srcLineSize,
+                                  int bytesPerPixel,
+                                  int subTileWidth, int subTileHeight, int subTileWidthBytes,
+                                  int tileWidth, int tileHeight,
+                                  int numDirChanges, struct dirChange *dirChanges)
+{
+
+    if (w*bytesPerPixel != srcLineSize) {
+        av_log(NULL, AV_LOG_ERROR, "fbdetile:generic: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+        av_log(NULL, AV_LOG_ERROR, "fbdetile:generic: dont support LineSize | Pitch going beyond width\n");
+    }
+    int sO = 0;
+    int dX = 0;
+    int dY = 0;
+    int nSTLines = (w*h)/subTileWidth;  // numSubTileLines
+    int cSTL = 0;                       // curSubTileLine
+    while (cSTL < nSTLines) {
+        int dO = dY*dstLineSize + dX*bytesPerPixel;
+#ifdef DEBUG_FBTILE
+        av_log(NULL, AV_LOG_DEBUG, "fbdetile:generic: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+
+        for (int k = 0; k < subTileHeight; k++) {
+            memcpy(dst+dO+k*dstLineSize, src+sO+k*subTileWidthBytes, subTileWidthBytes);
+        }
+        sO = sO + subTileHeight*subTileWidthBytes;
+
+        cSTL += subTileHeight;
+        for (int i=numDirChanges-1; i>=0; i--) {
+            if ((cSTL%dirChanges[i].posOffset) == 0) {
+                dX += dirChanges[i].xDelta;
+                dY += dirChanges[i].yDelta;
+                break;
+            }
+        }
+        if (dX >= w) {
+            dX = 0;
+            dY += tileHeight;
+        }
+    }
+}
+
+
+void detile_generic_opti(int w, int h,
+                                uint8_t *dst, int dstLineSize,
+                                const uint8_t *src, int srcLineSize,
+                                int bytesPerPixel,
+                                int subTileWidth, int subTileHeight, int subTileWidthBytes,
+                                int tileWidth, int tileHeight,
+                                int numDirChanges, struct dirChange *dirChanges)
+{
+    int parallel = 1;
+
+    if (w*bytesPerPixel != srcLineSize) {
+        av_log(NULL, AV_LOG_ERROR, "fbdetile:generic: w%dxh%d, dL%d, sL%d\n", w, h, dstLineSize, srcLineSize);
+        av_log(NULL, AV_LOG_ERROR, "fbdetile:generic: dont support LineSize | Pitch going beyond width\n");
+    }
+    if (w%tileWidth != 0) {
+        av_log(NULL, AV_LOG_ERROR, "fbdetile:generic:NotSupported:NonMultWidth: width%d, tileWidth%d\n", w, tileWidth);
+    }
+    int sO = 0;
+    int sOPrev = 0;
+    int dX = 0;
+    int dY = 0;
+    int nSTLines = (w*h)/subTileWidth;
+    //int nSTLinesInATile = (tileWidth*tileHeight)/subTileWidth;
+    int nTilesInARow = w/tileWidth;
+    for (parallel=8; parallel>0; parallel--) {
+        if (nTilesInARow%parallel == 0)
+            break;
+    }
+    int cSTL = 0;
+    int curTileInRow = 0;
+    while (cSTL < nSTLines) {
+        int dO = dY*dstLineSize + dX*bytesPerPixel;
+#ifdef DEBUG_FBTILE
+        av_log(NULL, AV_LOG_DEBUG, "fbdetile:generic: dX%d dY%d, sO%d, dO%d\n", dX, dY, sO, dO);
+#endif
+
+        // As most tiling layouts have a minimum subtile of 4x4, if I remember correctly,
+        // so this loop has been unrolled to be multiples of 4, and speed up a bit.
+        // However tiling involving 3x3 or 2x2 wont be handlable. Use detile_generic_simple
+        // for such tile layouts.
+        // Detile parallely to a limited extent. To avoid any cache set-associativity and or
+        // limited cache based thrashing, keep it spacially and inturn temporaly small at one level.
+        for (int k = 0; k < subTileHeight; k+=4) {
+            for (int p = 0; p < parallel; p++) {
+                int pSrcOffset = p*tileWidth*tileHeight*bytesPerPixel;
+                int pDstOffset = p*tileWidth*bytesPerPixel;
+                memcpy(dst+dO+k*dstLineSize+pDstOffset, src+sO+k*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+                memcpy(dst+dO+(k+1)*dstLineSize+pDstOffset, src+sO+(k+1)*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+                memcpy(dst+dO+(k+2)*dstLineSize+pDstOffset, src+sO+(k+2)*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+                memcpy(dst+dO+(k+3)*dstLineSize+pDstOffset, src+sO+(k+3)*subTileWidthBytes+pSrcOffset, subTileWidthBytes);
+            }
+        }
+        sO = sO + subTileHeight*subTileWidthBytes;
+
+        cSTL += subTileHeight;
+        for (int i=numDirChanges-1; i>=0; i--) {
+            if ((cSTL%dirChanges[i].posOffset) == 0) {
+                if (i == numDirChanges-1) {
+                    curTileInRow += parallel;
+                    dX = curTileInRow*tileWidth;
+                    sO = sOPrev + tileWidth*tileHeight*bytesPerPixel*(parallel);
+                    sOPrev = sO;
+                } else {
+                    dX += dirChanges[i].xDelta;
+                }
+                dY += dirChanges[i].yDelta;
+		break;
+            }
+        }
+        if (dX >= w) {
+            dX = 0;
+            curTileInRow = 0;
+            dY += tileHeight;
+            if (dY >= h) {
+                break;
+            }
+        }
+    }
+}
+
+
+int detile_this(int mode, uint64_t arg1,
+                        int w, int h,
+                        uint8_t *dst, int dstLineSize,
+                        uint8_t *src, int srcLineSize,
+                        int bytesPerPixel)
+{
+    static int logState=0;
+    if (mode == TILE_AUTO) {
+        mode = fbtilemode_from_formatmodifier(arg1);
+    }
+    if (mode == TILE_NONE) {
+        return 1;
+    }
+
+    if (mode == TILE_INTELX) {
+        detile_intelx(w, h, dst, dstLineSize, src, srcLineSize);
+    } else if (mode == TILE_INTELY) {
+        detile_intely(w, h, dst, dstLineSize, src, srcLineSize);
+    } else if (mode == TILE_INTELYF) {
+        detile_generic(w, h, dst, dstLineSize, src, srcLineSize,
+                            tyfBytesPerPixel, tyfSubTileWidth, tyfSubTileHeight, tyfSubTileWidthBytes,
+                            tyfTileWidth, tyfTileHeight,
+                            tyfNumDirChanges, tyfDirChanges);
+    } else if (mode == TILE_INTELGX) {
+        detile_generic(w, h, dst, dstLineSize, src, srcLineSize,
+                            txBytesPerPixel, txSubTileWidth, txSubTileHeight, txSubTileWidthBytes,
+                            txTileWidth, txTileHeight,
+                            txNumDirChanges, txDirChanges);
+    } else if (mode == TILE_INTELGY) {
+        detile_generic(w, h, dst, dstLineSize, src, srcLineSize,
+                            tyBytesPerPixel, tySubTileWidth, tySubTileHeight, tySubTileWidthBytes,
+                            tyTileWidth, tyTileHeight,
+                            tyNumDirChanges, tyDirChanges);
+    } else if (mode == TILE_NONE_END) {
+        av_log_once(NULL, AV_LOG_WARNING, AV_LOG_VERBOSE, &logState, "fbtile:detile_this:TILE_AUTOOr???: invalid or unsupported format_modifier:%"PRIx64"\n",arg1);
+        return 1;
+    } else {
+        av_log(NULL, AV_LOG_ERROR, "fbtile:detile_this:????: unknown mode specified, check caller\n");
+        return 1;
+    }
+    return 0;
+}
+
+
+// vim: set expandtab sts=4: //
diff --git a/libavutil/fbtile.h b/libavutil/fbtile.h
new file mode 100644
index 0000000000..51556db93a
--- /dev/null
+++ b/libavutil/fbtile.h
@@ -0,0 +1,228 @@
+/*
+ * CPU based Framebuffer Tile DeTile logic
+ * Copyright (c) 2020 C Hanish Menon <HanishKVC>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_FBTILE_H
+#define AVUTIL_FBTILE_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+/**
+ * @file
+ * @brief CPU based Framebuffer tiler detiler
+ * @author C Hanish Menon <HanishKVC>
+ * @{
+ */
+
+
+enum FBTileMode {
+    TILE_NONE,
+    TILE_AUTO,
+    TILE_INTELX,
+    TILE_INTELY,
+    TILE_INTELYF,
+    TILE_INTELGX,
+    TILE_INTELGY,
+    TILE_NONE_END,
+};
+
+
+/**
+ * Map from formatmodifier to fbtile's internal mode.
+ *
+ * @param formatModifier the format_modifier to map
+ * @return the fbtile's equivalent internal mode
+ */
+#undef DEBUG_FBTILE_FORMATMODIFIER_MAPPING
+int fbtilemode_from_formatmodifier(uint64_t formatModifier);
+
+
+/**
+ * Supported pixel formats by the fbtile logics
+ */
+extern const enum AVPixelFormat fbtilePixFormats[];
+/**
+ * Check if the given pixel formats are supported by fbtile logic.
+ *
+ * @param srcPixFormat pixel format of source image
+ * @param dstPixFormat pixel format of destination image
+ */
+int fbtile_checkpixformats(const enum AVPixelFormat srcPixFormat, const enum AVPixelFormat dstPixFormat);
+
+
+/**
+ * Detile legacy intel tile-x layout into linear layout.
+ *
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ */
+void detile_intelx(int w, int h,
+                          uint8_t *dst, int dstLineSize,
+                          const uint8_t *src, int srcLineSize);
+
+
+/**
+ * Detile legacy intel tile-y layout into linear layout.
+ *
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ */
+void detile_intely(int w, int h,
+                          uint8_t *dst, int dstLineSize,
+                          const uint8_t *src, int srcLineSize);
+
+
+/**
+ * Generic Logic.
+ */
+
+/*
+ * Direction Change Entry
+ * Used to specify the tile walking of subtiles within a tile.
+ */
+struct dirChange {
+    int posOffset;
+    int xDelta;
+    int yDelta;
+};
+/**
+ * Settings for Intel Tile-Yf framebuffer layout.
+ * May need to swap the 4 pixel wide subtile, have to check doc bit more
+ */
+extern const int tyfBytesPerPixel;
+extern const int tyfSubTileWidth;
+extern const int tyfSubTileHeight;
+extern const int tyfSubTileWidthBytes;
+extern const int tyfTileWidth;
+extern const int tyfTileHeight;
+extern const int tyfNumDirChanges;
+extern struct dirChange tyfDirChanges[];
+/**
+ * Setting for Intel Tile-X framebuffer layout
+ */
+extern const int txBytesPerPixel;
+extern const int txSubTileWidth;
+extern const int txSubTileHeight;
+extern const int txSubTileWidthBytes;
+extern const int txTileWidth;
+extern const int txTileHeight;
+extern const int txNumDirChanges;
+extern struct dirChange txDirChanges[];
+/**
+ * Setting for Intel Tile-Y framebuffer layout
+ * Even thou a simple generic detiling logic doesnt require the
+ * dummy 256 posOffset entry. The pseudo parallel detiling based
+ * opti logic requires to know about the Tile boundry.
+ */
+extern const int tyBytesPerPixel;
+extern const int tySubTileWidth;
+extern const int tySubTileHeight;
+extern const int tySubTileWidthBytes;
+extern const int tyTileWidth;
+extern const int tyTileHeight;
+extern const int tyNumDirChanges;
+extern struct dirChange tyDirChanges[];
+
+/**
+ * Generic Logic to Detile into linear layout.
+ *
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ * @param bytesPerPixel the bytes per pixel for the image
+ * @param subTileWidth the width of subtile within the tile, in pixels
+ * @param subTileHeight the height of subtile within the tile, in pixels
+ * @param subTileWidthBytes the width of subtile within the tile, in bytes
+ * @param tileWidth the width of the tile, in pixels
+ * @param tileHeight the height of the tile, in pixels
+ */
+
+
+/**
+ * Generic detile simple version, which is fine-grained.
+ */
+void detile_generic_simple(int w, int h,
+                                  uint8_t *dst, int dstLineSize,
+                                  const uint8_t *src, int srcLineSize,
+                                  int bytesPerPixel,
+                                  int subTileWidth, int subTileHeight, int subTileWidthBytes,
+                                  int tileWidth, int tileHeight,
+                                  int numDirChanges, struct dirChange *dirChanges);
+
+
+/**
+ * Generic detile optimised version, minimum subtile supported 4x4.
+ */
+void detile_generic_opti(int w, int h,
+                                uint8_t *dst, int dstLineSize,
+                                const uint8_t *src, int srcLineSize,
+                                int bytesPerPixel,
+                                int subTileWidth, int subTileHeight, int subTileWidthBytes,
+                                int tileWidth, int tileHeight,
+                                int numDirChanges, struct dirChange *dirChanges);
+
+
+#ifdef DETILE_GENERIC_OPTI
+#define detile_generic detile_generic_opti
+#else
+#define detile_generic detile_generic_simple
+#endif
+
+
+/**
+ * detile demuxer.
+ *
+ * @param mode the fbtile mode based detiling to call
+ * @param arg1 the format_modifier, in case mode is TILE_AUTO
+ * @param w width of the image
+ * @param h height of the image
+ * @param dst the destination image buffer
+ * @param dstLineSize the size of each row in dst image, in bytes
+ * @param src the source image buffer
+ * @param srcLineSize the size of each row in src image, in bytes
+ * @param bytesPerPixel the bytes per pixel for the image
+ *
+ * @return 0 if detiled, 1 if not
+ */
+int detile_this(int mode, uint64_t arg1,
+                        int w, int h,
+                        uint8_t *dst, int dstLineSize,
+                        uint8_t *src, int srcLineSize,
+                        int bytesPerPixel);
+
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_FBTILE_H */
+// vim: set expandtab sts=4: //
-- 
2.25.1



More information about the ffmpeg-devel mailing list