[FFmpeg-cvslog] lavc/dvx: use texdsp funcs for texture block decompression
Connor Worley
git at videolan.org
Sun Feb 11 01:42:28 EET 2024
ffmpeg | branch: master | Connor Worley <connorbworley at gmail.com> | Sat Feb 10 14:58:46 2024 -0800| [2f9936e446d56f6e8da9f179143d3ac5a4546afc] | committer: Lynne
lavc/dvx: use texdsp funcs for texture block decompression
Signed-off-by: Connor Worley <connorbworley at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2f9936e446d56f6e8da9f179143d3ac5a4546afc
---
libavcodec/dxv.c | 289 +++++++++++++++----------------------------------------
1 file changed, 75 insertions(+), 214 deletions(-)
diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
index 16c34fff3b..cd78de3e0d 100644
--- a/libavcodec/dxv.c
+++ b/libavcodec/dxv.c
@@ -38,15 +38,12 @@ typedef struct DXVContext {
GetByteContext gbc;
uint8_t *tex_data; // Compressed texture
- uint8_t *ctex_data; // Compressed texture
+ uint8_t *ctex_data; // Compressed chroma texture
int tex_rat; // Compression ratio
int tex_step; // Distance between blocks
int ctex_step; // Distance between blocks
int64_t tex_size; // Texture size
- int64_t ctex_size; // Texture size
-
- /* Optimal number of slices for parallel decoding */
- int slice_count;
+ int64_t ctex_size; // Chroma texture size
uint8_t *op_data[4]; // Opcodes
int64_t op_size[4]; // Opcodes size
@@ -56,198 +53,8 @@ typedef struct DXVContext {
int ctexture_block_w;
int ctexture_block_h;
-
- /* Pointer to the selected decompression function */
- int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
- int (*tex_funct_planar[2])(uint8_t *plane0, ptrdiff_t stride0,
- uint8_t *plane1, ptrdiff_t stride1,
- const uint8_t *block);
} DXVContext;
-static void decompress_indices(uint8_t *dst, const uint8_t *src)
-{
- int block, i;
-
- for (block = 0; block < 2; block++) {
- int tmp = AV_RL24(src);
-
- /* Unpack 8x3 bit from last 3 byte block */
- for (i = 0; i < 8; i++)
- dst[i] = (tmp >> (i * 3)) & 0x7;
-
- src += 3;
- dst += 8;
- }
-}
-
-static int extract_component(int yo0, int yo1, int code)
-{
- int yo;
-
- if (yo0 == yo1) {
- yo = yo0;
- } else if (code == 0) {
- yo = yo0;
- } else if (code == 1) {
- yo = yo1;
- } else {
- if (yo0 > yo1) {
- yo = (uint8_t) (((8 - code) * yo0 +
- (code - 1) * yo1) / 7);
- } else {
- if (code == 6) {
- yo = 0;
- } else if (code == 7) {
- yo = 255;
- } else {
- yo = (uint8_t) (((6 - code) * yo0 +
- (code - 1) * yo1) / 5);
- }
- }
- }
-
- return yo;
-}
-
-static int cocg_block(uint8_t *plane0, ptrdiff_t stride0,
- uint8_t *plane1, ptrdiff_t stride1,
- const uint8_t *block)
-{
- uint8_t co_indices[16];
- uint8_t cg_indices[16];
- uint8_t co0 = *(block);
- uint8_t co1 = *(block + 1);
- uint8_t cg0 = *(block + 8);
- uint8_t cg1 = *(block + 9);
- int x, y;
-
- decompress_indices(co_indices, block + 2);
- decompress_indices(cg_indices, block + 10);
-
- for (y = 0; y < 4; y++) {
- for (x = 0; x < 4; x++) {
- int co_code = co_indices[x + y * 4];
- int cg_code = cg_indices[x + y * 4];
-
- plane0[x] = extract_component(cg0, cg1, cg_code);
- plane1[x] = extract_component(co0, co1, co_code);
- }
- plane0 += stride0;
- plane1 += stride1;
- }
-
- return 16;
-}
-
-static void yao_subblock(uint8_t *dst, uint8_t *yo_indices,
- ptrdiff_t stride, const uint8_t *block)
-{
- uint8_t yo0 = *(block);
- uint8_t yo1 = *(block + 1);
- int x, y;
-
- decompress_indices(yo_indices, block + 2);
-
- for (y = 0; y < 4; y++) {
- for (x = 0; x < 4; x++) {
- int yo_code = yo_indices[x + y * 4];
-
- dst[x] = extract_component(yo0, yo1, yo_code);
- }
- dst += stride;
- }
-}
-
-static int yo_block(uint8_t *dst, ptrdiff_t stride,
- uint8_t *unused0, ptrdiff_t unused1,
- const uint8_t *block)
-{
- uint8_t yo_indices[16];
-
- yao_subblock(dst, yo_indices, stride, block);
- yao_subblock(dst + 4, yo_indices, stride, block + 8);
- yao_subblock(dst + 8, yo_indices, stride, block + 16);
- yao_subblock(dst + 12, yo_indices, stride, block + 24);
-
- return 32;
-}
-
-static int yao_block(uint8_t *plane0, ptrdiff_t stride0,
- uint8_t *plane3, ptrdiff_t stride1,
- const uint8_t *block)
-{
- uint8_t yo_indices[16];
- uint8_t a_indices[16];
-
- yao_subblock(plane0, yo_indices, stride0, block);
- yao_subblock(plane3, a_indices, stride1, block + 8);
- yao_subblock(plane0 + 4, yo_indices, stride0, block + 16);
- yao_subblock(plane3 + 4, a_indices, stride1, block + 24);
- yao_subblock(plane0 + 8, yo_indices, stride0, block + 32);
- yao_subblock(plane3 + 8, a_indices, stride1, block + 40);
- yao_subblock(plane0 + 12, yo_indices, stride0, block + 48);
- yao_subblock(plane3 + 12, a_indices, stride1, block + 56);
-
- return 64;
-}
-
-static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
- int slice, int thread_nb)
-{
- const DXVContext *ctx = avctx->priv_data;
- AVFrame *frame = arg;
- const uint8_t *d = ctx->tex_data;
- int w_block = avctx->coded_width / ctx->texture_block_w;
- int h_block = avctx->coded_height / ctx->texture_block_h;
- int x, y;
- int start_slice, end_slice;
-
- start_slice = h_block * slice / ctx->slice_count;
- end_slice = h_block * (slice + 1) / ctx->slice_count;
-
- if (ctx->tex_funct) {
- for (y = start_slice; y < end_slice; y++) {
- uint8_t *p = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
- int off = y * w_block;
- for (x = 0; x < w_block; x++) {
- ctx->tex_funct(p + x * 4 * ctx->texture_block_w, frame->linesize[0],
- d + (off + x) * ctx->tex_step);
- }
- }
- } else {
- const uint8_t *c = ctx->ctex_data;
-
- for (y = start_slice; y < end_slice; y++) {
- uint8_t *p0 = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
- uint8_t *p3 = ctx->tex_step != 64 ? NULL : frame->data[3] + y * frame->linesize[3] * ctx->texture_block_h;
- int off = y * w_block;
- for (x = 0; x < w_block; x++) {
- ctx->tex_funct_planar[0](p0 + x * ctx->texture_block_w, frame->linesize[0],
- p3 != NULL ? p3 + x * ctx->texture_block_w : NULL, frame->linesize[3],
- d + (off + x) * ctx->tex_step);
- }
- }
-
- w_block = (avctx->coded_width / 2) / ctx->ctexture_block_w;
- h_block = (avctx->coded_height / 2) / ctx->ctexture_block_h;
- start_slice = h_block * slice / ctx->slice_count;
- end_slice = h_block * (slice + 1) / ctx->slice_count;
-
- for (y = start_slice; y < end_slice; y++) {
- uint8_t *p0 = frame->data[1] + y * frame->linesize[1] * ctx->ctexture_block_h;
- uint8_t *p1 = frame->data[2] + y * frame->linesize[2] * ctx->ctexture_block_h;
- int off = y * w_block;
- for (x = 0; x < w_block; x++) {
- ctx->tex_funct_planar[1](p0 + x * ctx->ctexture_block_w, frame->linesize[1],
- p1 + x * ctx->ctexture_block_w, frame->linesize[2],
- c + (off + x) * ctx->ctex_step);
- }
- }
- }
-
- return 0;
-}
-
/* This scheme addresses already decoded elements depending on 2-bit status:
* 0 -> copy new element
* 1 -> copy one element from position -x
@@ -1044,6 +851,8 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
{
DXVContext *ctx = avctx->priv_data;
GetByteContext *gbc = &ctx->gbc;
+ AVCodecContext cavctx = *avctx;
+ TextureDSPThreadContext texdsp_ctx, ctexdsp_ctx;
int (*decompress_tex)(AVCodecContext *avctx);
const char *msgcomp, *msgtext;
uint32_t tag;
@@ -1053,21 +862,22 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
bytestream2_init(gbc, avpkt->data, avpkt->size);
+ cavctx.coded_height = avctx->coded_height / 2;
+ cavctx.coded_width = avctx->coded_width / 2;
+
ctx->texture_block_h = 4;
ctx->texture_block_w = 4;
avctx->pix_fmt = AV_PIX_FMT_RGBA;
avctx->colorspace = AVCOL_SPC_RGB;
- ctx->tex_funct = NULL;
- ctx->tex_funct_planar[0] = NULL;
- ctx->tex_funct_planar[1] = NULL;
-
tag = bytestream2_get_le32(gbc);
switch (tag) {
case DXV_FMT_DXT1:
decompress_tex = dxv_decompress_dxt1;
- ctx->tex_funct = ctx->texdsp.dxt1_block;
+ texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block;
+ texdsp_ctx.tex_ratio = 8;
+ texdsp_ctx.raw_ratio = 16;
ctx->tex_rat = 8;
ctx->tex_step = 8;
msgcomp = "DXTR1";
@@ -1076,7 +886,9 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
case DXV_FMT_DXT5:
decompress_tex = dxv_decompress_dxt5;
/* DXV misnomers DXT5, alpha is premultiplied so use DXT4 instead */
- ctx->tex_funct = ctx->texdsp.dxt4_block;
+ texdsp_ctx.tex_funct = ctx->texdsp.dxt4_block;
+ texdsp_ctx.tex_ratio = 16;
+ texdsp_ctx.raw_ratio = 16;
ctx->tex_rat = 4;
ctx->tex_step = 16;
msgcomp = "DXTR5";
@@ -1084,8 +896,12 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
break;
case DXV_FMT_YCG6:
decompress_tex = dxv_decompress_ycg6;
- ctx->tex_funct_planar[0] = yo_block;
- ctx->tex_funct_planar[1] = cocg_block;
+ texdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
+ texdsp_ctx.tex_ratio = 8;
+ texdsp_ctx.raw_ratio = 4;
+ ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
+ ctexdsp_ctx.tex_ratio = 16;
+ ctexdsp_ctx.raw_ratio = 4;
ctx->tex_rat = 8;
ctx->tex_step = 32;
ctx->ctex_step = 16;
@@ -1101,8 +917,12 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
break;
case DXV_FMT_YG10:
decompress_tex = dxv_decompress_yg10;
- ctx->tex_funct_planar[0] = yao_block;
- ctx->tex_funct_planar[1] = cocg_block;
+ texdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
+ texdsp_ctx.tex_ratio = 16;
+ texdsp_ctx.raw_ratio = 4;
+ ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
+ ctexdsp_ctx.tex_ratio = 16;
+ ctexdsp_ctx.raw_ratio = 4;
ctx->tex_rat = 4;
ctx->tex_step = 64;
ctx->ctex_step = 16;
@@ -1131,14 +951,20 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
}
if (old_type & 0x40) {
+ tag = DXV_FMT_DXT5;
msgtext = "DXT5";
- ctx->tex_funct = ctx->texdsp.dxt5_block;
+ texdsp_ctx.tex_funct = ctx->texdsp.dxt5_block;
+ texdsp_ctx.tex_ratio = 16;
+ texdsp_ctx.raw_ratio = 16;
ctx->tex_step = 16;
} else if (old_type & 0x20 || version_major == 1) {
+ tag = DXV_FMT_DXT1;
msgtext = "DXT1";
- ctx->tex_funct = ctx->texdsp.dxt1_block;
+ texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block;
+ texdsp_ctx.tex_ratio = 8;
+ texdsp_ctx.raw_ratio = 16;
ctx->tex_step = 8;
} else {
av_log(avctx, AV_LOG_ERROR, "Unsupported header (0x%08"PRIX32")\n.", tag);
@@ -1148,10 +974,10 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
break;
}
- ctx->slice_count = av_clip(avctx->thread_count, 1,
- avctx->coded_height / FFMAX(ctx->texture_block_h,
- ctx->ctexture_block_h));
-
+ texdsp_ctx.slice_count = av_clip(avctx->thread_count, 1,
+ avctx->coded_height / TEXTURE_BLOCK_H);
+ ctexdsp_ctx.slice_count = av_clip(avctx->thread_count, 1,
+ cavctx.coded_height / TEXTURE_BLOCK_H);
/* New header is 12 bytes long. */
if (!old_type) {
version_major = bytestream2_get_byte(gbc) - 1;
@@ -1216,9 +1042,44 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
if (ret < 0)
return ret;
- /* Now decompress the texture with the standard functions. */
- avctx->execute2(avctx, decompress_texture_thread,
- frame, NULL, ctx->slice_count);
+ switch (tag) {
+ case DXV_FMT_YG10:
+ /* BC5 texture with alpha in the second half of each block */
+ texdsp_ctx.tex_data.in = ctx->tex_data + texdsp_ctx.tex_ratio / 2;
+ texdsp_ctx.frame_data.out = frame->data[3];
+ texdsp_ctx.stride = frame->linesize[3];
+ ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx);
+ if (ret < 0)
+ return ret;
+ /* fallthrough */
+ case DXV_FMT_YCG6:
+ /* BC5 texture with Co in the first half of each block and Cg in the second */
+ ctexdsp_ctx.tex_data.in = ctx->ctex_data;
+ ctexdsp_ctx.frame_data.out = frame->data[2];
+ ctexdsp_ctx.stride = frame->linesize[2];
+ ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx);
+ if (ret < 0)
+ return ret;
+ ctexdsp_ctx.tex_data.in = ctx->ctex_data + ctexdsp_ctx.tex_ratio / 2;
+ ctexdsp_ctx.frame_data.out = frame->data[1];
+ ctexdsp_ctx.stride = frame->linesize[1];
+ ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx);
+ if (ret < 0)
+ return ret;
+ /* fallthrough */
+ case DXV_FMT_DXT1:
+ case DXV_FMT_DXT5:
+ /* For DXT1 and DXT5, self explanatory
+ * For YCG6, BC4 texture for Y
+ * For YG10, BC5 texture with Y in the first half of each block */
+ texdsp_ctx.tex_data.in = ctx->tex_data;
+ texdsp_ctx.frame_data.out = frame->data[0];
+ texdsp_ctx.stride = frame->linesize[0];
+ ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx);
+ if (ret < 0)
+ return ret;
+ break;
+ }
/* Frame is ready to be output. */
frame->pict_type = AV_PICTURE_TYPE_I;
More information about the ffmpeg-cvslog
mailing list