[FFmpeg-devel] [PATCH v1 2/2] avcodec/v210dec: add the slice threading support
lance.lmwang at gmail.com
lance.lmwang at gmail.com
Fri Aug 30 06:37:52 EEST 2019
From: Limin Wang <lance.lmwang at gmail.com>
Signed-off-by: Limin Wang <lance.lmwang at gmail.com>
---
libavcodec/v210dec.c | 136 +++++++++++++++++++++++++++----------------
libavcodec/v210dec.h | 1 +
2 files changed, 87 insertions(+), 50 deletions(-)
diff --git a/libavcodec/v210dec.c b/libavcodec/v210dec.c
index 6ce18aab5a..d3add8b45b 100644
--- a/libavcodec/v210dec.c
+++ b/libavcodec/v210dec.c
@@ -28,6 +28,7 @@
#include "libavutil/internal.h"
#include "libavutil/mem.h"
#include "libavutil/intreadwrite.h"
+#include "thread.h"
#define READ_PIXELS(a, b, c) \
do { \
@@ -37,6 +38,13 @@
*c++ = (val >> 20) & 0x3FF; \
} while (0)
+#define MAX_SLICES 32
+typedef struct ThreadData {
+ AVFrame *frame;
+ uint8_t *buf;
+ int stride;
+} ThreadData;
+
static void v210_planar_unpack_c(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width)
{
uint32_t val;
@@ -67,58 +75,32 @@ static av_cold int decode_init(AVCodecContext *avctx)
s->aligned_input = 0;
ff_v210dec_init(s);
+ s->slice_count = av_clip(avctx->thread_count, 1, MAX_SLICES);
return 0;
}
-static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
- AVPacket *avpkt)
+static int v210_decode_slice(AVCodecContext *avctx, void *arg, int jobnr, int nb_jobs)
{
V210DecContext *s = avctx->priv_data;
-
- int h, w, ret, stride, aligned_input;
- AVFrame *pic = data;
- const uint8_t *psrc = avpkt->data;
+ int h, w;
+ ThreadData *td = arg;
+ AVFrame *frame = td->frame;
+ int stride = td->stride;
+ int slice_h = avctx->height / s->slice_count;
+ int slice_m = avctx->height % s->slice_count;
+ int slice_start = jobnr * slice_h;
+ int slice_end = slice_start + slice_h;
+ const uint8_t *psrc = td->buf + stride * slice_start;
uint16_t *y, *u, *v;
- if (s->custom_stride )
- stride = s->custom_stride;
- else {
- int aligned_width = ((avctx->width + 47) / 48) * 48;
- stride = aligned_width * 8 / 3;
- }
+ /* add the remaining slice for the last job */
+ if (jobnr == s->slice_count - 1)
+ slice_end += slice_m;
- if (avpkt->size < stride * avctx->height) {
- if ((((avctx->width + 23) / 24) * 24 * 8) / 3 * avctx->height == avpkt->size) {
- stride = avpkt->size / avctx->height;
- if (!s->stride_warning_shown)
- av_log(avctx, AV_LOG_WARNING, "Broken v210 with too small padding (64 byte) detected\n");
- s->stride_warning_shown = 1;
- } else {
- av_log(avctx, AV_LOG_ERROR, "packet too small\n");
- return AVERROR_INVALIDDATA;
- }
- }
- if (avctx->codec_tag == MKTAG('C', '2', '1', '0')
- && AV_RN32(psrc) == AV_RN32("INFO")
- && avpkt->size - 64 >= stride * avctx->height)
- psrc += 64;
-
- aligned_input = !((uintptr_t)psrc & 0x1f) && !(stride & 0x1f);
- if (aligned_input != s->aligned_input) {
- s->aligned_input = aligned_input;
- ff_v210dec_init(s);
- }
-
- if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
- return ret;
-
- y = (uint16_t*)pic->data[0];
- u = (uint16_t*)pic->data[1];
- v = (uint16_t*)pic->data[2];
- pic->pict_type = AV_PICTURE_TYPE_I;
- pic->key_frame = 1;
-
- for (h = 0; h < avctx->height; h++) {
+ y = (uint16_t*)frame->data[0] + slice_start * frame->linesize[0] / 2;
+ u = (uint16_t*)frame->data[1] + slice_start * frame->linesize[1] / 2;
+ v = (uint16_t*)frame->data[2] + slice_start * frame->linesize[2] / 2;
+ for (h = slice_start; h < slice_end; h++) {
const uint32_t *src = (const uint32_t*)psrc;
uint32_t val;
@@ -154,16 +136,68 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
}
psrc += stride;
- y += pic->linesize[0] / 2 - avctx->width + (avctx->width & 1);
- u += pic->linesize[1] / 2 - avctx->width / 2;
- v += pic->linesize[2] / 2 - avctx->width / 2;
+ y += frame->linesize[0] / 2 - avctx->width + (avctx->width & 1);
+ u += frame->linesize[1] / 2 - avctx->width / 2;
+ v += frame->linesize[2] / 2 - avctx->width / 2;
+ }
+
+ return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
+ AVPacket *avpkt)
+{
+ V210DecContext *s = avctx->priv_data;
+ ThreadData td;
+ int ret, stride, aligned_input;
+ AVFrame *frame = data;
+ const uint8_t *psrc = avpkt->data;
+
+ if (s->custom_stride )
+ stride = s->custom_stride;
+ else {
+ int aligned_width = ((avctx->width + 47) / 48) * 48;
+ stride = aligned_width * 8 / 3;
}
+ td.stride = stride;
+
+ if (avpkt->size < stride * avctx->height) {
+ if ((((avctx->width + 23) / 24) * 24 * 8) / 3 * avctx->height == avpkt->size) {
+ stride = avpkt->size / avctx->height;
+ if (!s->stride_warning_shown)
+ av_log(avctx, AV_LOG_WARNING, "Broken v210 with too small padding (64 byte) detected\n");
+ s->stride_warning_shown = 1;
+ } else {
+ av_log(avctx, AV_LOG_ERROR, "packet too small\n");
+ return AVERROR_INVALIDDATA;
+ }
+ }
+ if (avctx->codec_tag == MKTAG('C', '2', '1', '0')
+ && AV_RN32(psrc) == AV_RN32("INFO")
+ && avpkt->size - 64 >= stride * avctx->height)
+ psrc += 64;
+
+ aligned_input = !((uintptr_t)psrc & 0x1f) && !(stride & 0x1f);
+ if (aligned_input != s->aligned_input) {
+ s->aligned_input = aligned_input;
+ ff_v210dec_init(s);
+ }
+
+ if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+ return ret;
+
+ frame->pict_type = AV_PICTURE_TYPE_I;
+ frame->key_frame = 1;
+
+ td.buf = (uint8_t*)psrc;
+ td.frame = frame;
+ avctx->execute2(avctx, v210_decode_slice, &td, NULL, s->slice_count);
if (avctx->field_order > AV_FIELD_PROGRESSIVE) {
/* we have interlaced material flagged in container */
- pic->interlaced_frame = 1;
+ frame->interlaced_frame = 1;
if (avctx->field_order == AV_FIELD_TT || avctx->field_order == AV_FIELD_TB)
- pic->top_field_first = 1;
+ frame->top_field_first = 1;
}
*got_frame = 1;
@@ -193,6 +227,8 @@ AVCodec ff_v210_decoder = {
.priv_data_size = sizeof(V210DecContext),
.init = decode_init,
.decode = decode_frame,
- .capabilities = AV_CODEC_CAP_DR1,
+ .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS,
.priv_class = &v210dec_class,
+ .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE |
+ FF_CODEC_CAP_INIT_CLEANUP,
};
diff --git a/libavcodec/v210dec.h b/libavcodec/v210dec.h
index cfdb29da09..35819437de 100644
--- a/libavcodec/v210dec.h
+++ b/libavcodec/v210dec.h
@@ -26,6 +26,7 @@
typedef struct {
AVClass *av_class;
int custom_stride;
+ int slice_count; // Number of slices for threaded operations
int aligned_input;
int stride_warning_shown;
void (*unpack_frame)(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
--
2.21.0
More information about the ffmpeg-devel
mailing list