[FFmpeg-cvslog] avcodec/magicyuvenc: add slice encoding support
Paul B Mahol
git at videolan.org
Sat Jun 3 01:12:59 EEST 2023
ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Fri Jun 2 21:51:54 2023 +0200| [6b8d53f72801dd17eb5804aa3ba1134884475309] | committer: Paul B Mahol
avcodec/magicyuvenc: add slice encoding support
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6b8d53f72801dd17eb5804aa3ba1134884475309
---
libavcodec/magicyuvenc.c | 212 ++++++++++++++++++++++++++---------------------
1 file changed, 119 insertions(+), 93 deletions(-)
diff --git a/libavcodec/magicyuvenc.c b/libavcodec/magicyuvenc.c
index 9e41c1b0fe..a12ef5a33d 100644
--- a/libavcodec/magicyuvenc.c
+++ b/libavcodec/magicyuvenc.c
@@ -22,6 +22,7 @@
#include <stdlib.h>
#include <string.h>
+#include "libavutil/cpu.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/qsort.h"
@@ -63,8 +64,8 @@ typedef struct MagicYUVContext {
int correlate;
int hshift[4];
int vshift[4];
- uint8_t *slices[4];
- unsigned slice_pos[4];
+ uint8_t **slices;
+ unsigned *slice_pos;
unsigned tables_size;
uint8_t *decorrelate_buf[2];
HuffEntry he[4][256];
@@ -150,7 +151,6 @@ static av_cold int magy_encode_init(AVCodecContext *avctx)
{
MagicYUVContext *s = avctx->priv_data;
PutByteContext pb;
- int i;
switch (avctx->pix_fmt) {
case AV_PIX_FMT_GBRP:
@@ -201,14 +201,23 @@ static av_cold int magy_encode_init(AVCodecContext *avctx)
s->planes = av_pix_fmt_count_planes(avctx->pix_fmt);
- s->nb_slices = 1;
+ s->nb_slices = (avctx->slices <= 0) ? av_cpu_count() : avctx->slices;
+ s->nb_slices = FFMIN(s->nb_slices, avctx->height >> s->vshift[1]);
+ s->nb_slices = FFMAX(1, s->nb_slices);
+ s->slice_height = FFALIGN((avctx->height + s->nb_slices - 1) / s->nb_slices, 1 << s->vshift[1]);
+ s->slice_pos = av_calloc(s->nb_slices * s->planes, sizeof(*s->slice_pos));
+ s->slices = av_calloc(s->nb_slices * s->planes, sizeof(*s->slices));
+ if (!s->slices || !s->slice_pos)
+ return AVERROR(ENOMEM);
- for (i = 0; i < s->planes; i++) {
- s->slices[i] = av_malloc(avctx->width * (avctx->height + 2) +
- AV_INPUT_BUFFER_PADDING_SIZE);
- if (!s->slices[i]) {
- av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer.\n");
- return AVERROR(ENOMEM);
+ for (int n = 0; n < s->nb_slices; n++) {
+ for (int i = 0; i < s->planes; i++) {
+ s->slices[n * s->planes + i] = av_malloc(avctx->width * (s->slice_height + 2) +
+ AV_INPUT_BUFFER_PADDING_SIZE);
+ if (!s->slices[n * s->planes + i]) {
+ av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer.\n");
+ return AVERROR(ENOMEM);
+ }
}
}
@@ -263,15 +272,12 @@ static void calculate_codes(HuffEntry *he, uint16_t codes_count[33])
}
}
-static void count_usage(uint8_t *src, int width,
+static void count_usage(const uint8_t *src, int width,
int height, PTable *counts)
{
- int i, j;
-
- for (j = 0; j < height; j++) {
- for (i = 0; i < width; i++) {
+ for (int j = 0; j < height; j++) {
+ for (int i = 0; i < width; i++)
counts[src[i]].prob++;
- }
src += width;
}
}
@@ -352,26 +358,30 @@ static void magy_huffman_compute_bits(PTable *prob_table, HuffEntry *distincts,
}
}
-static int encode_table(AVCodecContext *avctx, uint8_t *dst,
- int width, int height,
- PutBitContext *pb, HuffEntry *he)
+static int encode_table(AVCodecContext *avctx,
+ PutBitContext *pb, HuffEntry *he, int plane)
{
+ MagicYUVContext *s = avctx->priv_data;
PTable counts[256] = { {0} };
uint16_t codes_counts[33] = { 0 };
- int i;
- count_usage(dst, width, height, counts);
+ for (int n = 0; n < s->nb_slices; n++) {
+ const uint8_t *dst = s->slices[n * s->planes + plane];
- for (i = 0; i < 256; i++) {
- counts[i].prob++;
- counts[i].value = i;
+ count_usage(dst, AV_CEIL_RSHIFT(avctx->width, s->hshift[plane]),
+ AV_CEIL_RSHIFT(s->slice_height, s->vshift[plane]), counts);
+
+ for (int i = 0; i < 256; i++) {
+ counts[i].prob++;
+ counts[i].value = i;
+ }
}
magy_huffman_compute_bits(counts, he, codes_counts, 256, 12);
calculate_codes(he, codes_counts);
- for (i = 0; i < 256; i++) {
+ for (int i = 0; i < 256; i++) {
put_bits(pb, 1, 0);
put_bits(pb, 7, he[i].len);
}
@@ -410,13 +420,66 @@ static int encode_slice(uint8_t *src, uint8_t *dst, int dst_size,
return put_bytes_output(&pb);
}
+static int predict_slice(AVCodecContext *avctx, void *tdata,
+ int n, int threadnr)
+{
+ const int aligned_width = FFALIGN(avctx->width, 16);
+ MagicYUVContext *s = avctx->priv_data;
+ const int slice_height = s->slice_height;
+ const int last_height = FFMIN(slice_height, avctx->height - n * slice_height);
+ const int height = (n < (s->nb_slices - 1)) ? slice_height : last_height;
+ const int width = avctx->width;
+ AVFrame *frame = tdata;
+
+ if (s->correlate) {
+ uint8_t *decorrelated[2] = { s->decorrelate_buf[0] + n * slice_height * aligned_width,
+ s->decorrelate_buf[1] + n * slice_height * aligned_width };
+ const int decorrelate_linesize = aligned_width;
+ const uint8_t *const data[4] = { decorrelated[0], frame->data[0] + n * slice_height * frame->linesize[0],
+ decorrelated[1], frame->data[3] + n * slice_height * frame->linesize[3] };
+ const uint8_t *r, *g, *b;
+ const int linesize[4] = { decorrelate_linesize, frame->linesize[0],
+ decorrelate_linesize, frame->linesize[3] };
+
+ g = frame->data[0] + n * slice_height * frame->linesize[0];
+ b = frame->data[1] + n * slice_height * frame->linesize[1];
+ r = frame->data[2] + n * slice_height * frame->linesize[2];
+
+ for (int i = 0; i < slice_height; i++) {
+ s->llvidencdsp.diff_bytes(decorrelated[0], b, g, width);
+ s->llvidencdsp.diff_bytes(decorrelated[1], r, g, width);
+ g += frame->linesize[0];
+ b += frame->linesize[1];
+ r += frame->linesize[2];
+ decorrelated[0] += decorrelate_linesize;
+ decorrelated[1] += decorrelate_linesize;
+ }
+
+ for (int i = 0; i < s->planes; i++) {
+ s->predict(s, data[i], s->slices[n * s->planes + i], linesize[i],
+ frame->width, height);
+ }
+ } else {
+ for (int i = 0; i < s->planes; i++) {
+ s->predict(s, frame->data[i] + n * (slice_height >> s->vshift[i]) * frame->linesize[i],
+ s->slices[n * s->planes + i],
+ frame->linesize[i],
+ AV_CEIL_RSHIFT(frame->width, s->hshift[i]),
+ AV_CEIL_RSHIFT(height, s->vshift[i]));
+ }
+ }
+
+ return 0;
+}
+
static int magy_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
const AVFrame *frame, int *got_packet)
{
MagicYUVContext *s = avctx->priv_data;
PutByteContext pb;
const int width = avctx->width, height = avctx->height;
- int pos, slice, i, j, ret = 0;
+ const int slice_height = s->slice_height;
+ int pos, ret = 0;
ret = ff_alloc_packet(avctx, pkt, (256 + 4 * s->nb_slices + width * height) *
s->planes + 256);
@@ -439,92 +502,53 @@ static int magy_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
bytestream2_put_le32(&pb, avctx->width);
bytestream2_put_le32(&pb, avctx->height);
bytestream2_put_le32(&pb, avctx->width);
- bytestream2_put_le32(&pb, avctx->height);
+ bytestream2_put_le32(&pb, slice_height);
bytestream2_put_le32(&pb, 0);
- for (i = 0; i < s->planes; i++) {
+ for (int i = 0; i < s->planes; i++) {
bytestream2_put_le32(&pb, 0);
- for (j = 1; j < s->nb_slices; j++) {
+ for (int j = 1; j < s->nb_slices; j++)
bytestream2_put_le32(&pb, 0);
- }
}
bytestream2_put_byte(&pb, s->planes);
- for (i = 0; i < s->planes; i++) {
- for (slice = 0; slice < s->nb_slices; slice++) {
- bytestream2_put_byte(&pb, i);
- }
+ for (int i = 0; i < s->planes; i++) {
+ for (int n = 0; n < s->nb_slices; n++)
+ bytestream2_put_byte(&pb, n * s->planes + i);
}
- if (s->correlate) {
- uint8_t *decorrelated[2] = { s->decorrelate_buf[0],
- s->decorrelate_buf[1] };
- const int decorrelate_linesize = FFALIGN(width, 16);
- const uint8_t *const data[4] = { decorrelated[0], frame->data[0],
- decorrelated[1], frame->data[3] };
- const uint8_t *r, *g, *b;
- const int linesize[4] = { decorrelate_linesize, frame->linesize[0],
- decorrelate_linesize, frame->linesize[3] };
-
- g = frame->data[0];
- b = frame->data[1];
- r = frame->data[2];
-
- for (i = 0; i < height; i++) {
- s->llvidencdsp.diff_bytes(decorrelated[0], b, g, width);
- s->llvidencdsp.diff_bytes(decorrelated[1], r, g, width);
- g += frame->linesize[0];
- b += frame->linesize[1];
- r += frame->linesize[2];
- decorrelated[0] += decorrelate_linesize;
- decorrelated[1] += decorrelate_linesize;
- }
-
- for (i = 0; i < s->planes; i++) {
- for (slice = 0; slice < s->nb_slices; slice++) {
- s->predict(s, data[i], s->slices[i], linesize[i],
- frame->width, frame->height);
- }
- }
- } else {
- for (i = 0; i < s->planes; i++) {
- for (slice = 0; slice < s->nb_slices; slice++) {
- s->predict(s, frame->data[i], s->slices[i], frame->linesize[i],
- AV_CEIL_RSHIFT(frame->width, s->hshift[i]),
- AV_CEIL_RSHIFT(frame->height, s->vshift[i]));
- }
- }
- }
+ avctx->execute2(avctx, predict_slice, (void *)frame, NULL, s->nb_slices);
init_put_bits(&s->pb, pkt->data + bytestream2_tell_p(&pb), bytestream2_get_bytes_left_p(&pb));
- for (i = 0; i < s->planes; i++) {
- encode_table(avctx, s->slices[i],
- AV_CEIL_RSHIFT(frame->width, s->hshift[i]),
- AV_CEIL_RSHIFT(frame->height, s->vshift[i]),
- &s->pb, s->he[i]);
- }
+ for (int i = 0; i < s->planes; i++)
+ encode_table(avctx, &s->pb, s->he[i], i);
+
s->tables_size = put_bytes_count(&s->pb, 1);
bytestream2_skip_p(&pb, s->tables_size);
- for (i = 0; i < s->planes; i++) {
- unsigned slice_size;
-
- s->slice_pos[i] = bytestream2_tell_p(&pb);
- slice_size = encode_slice(s->slices[i], pkt->data + bytestream2_tell_p(&pb),
- bytestream2_get_bytes_left_p(&pb),
- AV_CEIL_RSHIFT(frame->width, s->hshift[i]),
- AV_CEIL_RSHIFT(frame->height, s->vshift[i]),
- s->he[i], s->frame_pred);
- bytestream2_skip_p(&pb, slice_size);
+ for (int n = 0; n < s->nb_slices; n++) {
+ for (int i = 0; i < s->planes; i++) {
+ unsigned slice_size;
+
+ s->slice_pos[n * s->planes + i] = bytestream2_tell_p(&pb);
+ slice_size = encode_slice(s->slices[n * s->planes + i],
+ pkt->data + bytestream2_tell_p(&pb),
+ bytestream2_get_bytes_left_p(&pb),
+ AV_CEIL_RSHIFT(frame->width, s->hshift[i]),
+ AV_CEIL_RSHIFT(slice_height, s->vshift[i]),
+ s->he[i], s->frame_pred);
+ bytestream2_skip_p(&pb, slice_size);
+ }
}
pos = bytestream2_tell_p(&pb);
bytestream2_seek_p(&pb, 32, SEEK_SET);
bytestream2_put_le32(&pb, s->slice_pos[0] - 32);
- for (i = 0; i < s->planes; i++) {
- bytestream2_put_le32(&pb, s->slice_pos[i] - 32);
+ for (int i = 0; i < s->planes; i++) {
+ for (int n = 0; n < s->nb_slices; n++)
+ bytestream2_put_le32(&pb, s->slice_pos[n * s->planes + i] - 32);
}
bytestream2_seek_p(&pb, pos, SEEK_SET);
@@ -538,10 +562,11 @@ static int magy_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
static av_cold int magy_encode_close(AVCodecContext *avctx)
{
MagicYUVContext *s = avctx->priv_data;
- int i;
- for (i = 0; i < s->planes; i++)
+ av_freep(&s->slice_pos);
+ for (int i = 0; i < s->planes && s->slices; i++)
av_freep(&s->slices[i]);
+ av_freep(&s->slices);
av_freep(&s->decorrelate_buf);
return 0;
@@ -570,6 +595,7 @@ const FFCodec ff_magicyuv_encoder = {
.p.type = AVMEDIA_TYPE_VIDEO,
.p.id = AV_CODEC_ID_MAGICYUV,
.p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
+ AV_CODEC_CAP_SLICE_THREADS |
AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
.priv_data_size = sizeof(MagicYUVContext),
.p.priv_class = &magicyuv_class,
More information about the ffmpeg-cvslog
mailing list