[FFmpeg-devel] [PATCH 24/24] lavfi/vf_scale: implement slice threading
Anton Khirnov
anton at khirnov.net
Mon May 31 10:55:15 EEST 2021
---
libavfilter/vf_scale.c | 182 +++++++++++++++++++++++++++++++----------
1 file changed, 141 insertions(+), 41 deletions(-)
diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c
index cdd7c4da0d..87317393bd 100644
--- a/libavfilter/vf_scale.c
+++ b/libavfilter/vf_scale.c
@@ -106,8 +106,16 @@ enum EvalMode {
typedef struct ScaleContext {
const AVClass *class;
- struct SwsContext *sws; ///< software scaler context
- struct SwsContext *isws[2]; ///< software scaler context for interlaced material
+
+ /**
+ * Scaler contexts.
+ * [0] - progressive
+ * [1/2] - top/bottom fields
+ */
+ struct SwsContext *(*scalers)[3];
+ unsigned int nb_scalers;
+ int *scaler_res;
+
AVDictionary *opts;
/**
@@ -122,6 +130,7 @@ typedef struct ScaleContext {
double param[2]; // sws params
int hsub, vsub; ///< chroma subsampling
+ int ohsub, ovsub; ///< output chroma subsampling
int slice_y; ///< top of current output slice
int input_is_pal; ///< set to 1 if the input format is paletted
int output_is_pal; ///< set to 1 if the output format is paletted
@@ -153,6 +162,7 @@ typedef struct ScaleContext {
int eval_mode; ///< expression evaluation mode
+ int passthrough;
} ScaleContext;
const AVFilter ff_vf_scale2ref;
@@ -330,13 +340,11 @@ static av_cold int init_dict(AVFilterContext *ctx, AVDictionary **opts)
static void scaler_free(ScaleContext *s)
{
- sws_freeContext(s->sws);
- sws_freeContext(s->isws[0]);
- sws_freeContext(s->isws[1]);
+ for (int i = 0; i < s->nb_scalers; i++)
+ for (int j = 0; j < 3; j++)
+ sws_freeContext(s->scalers[i][j]);
- s->sws = NULL;
- s->isws[0] = NULL;
- s->isws[1] = NULL;
+ av_freep(&s->scalers);
}
static av_cold void uninit(AVFilterContext *ctx)
@@ -346,6 +354,7 @@ static av_cold void uninit(AVFilterContext *ctx)
av_expr_free(scale->h_pexpr);
scale->w_pexpr = scale->h_pexpr = NULL;
scaler_free(scale);
+ av_freep(&scale->scaler_res);
av_dict_free(&scale->opts);
}
@@ -522,19 +531,28 @@ static int config_props(AVFilterLink *outlink)
scaler_free(scale);
- if (inlink0->w == outlink->w &&
- inlink0->h == outlink->h &&
- !scale->out_color_matrix &&
- scale->in_range == scale->out_range &&
- inlink0->format == outlink->format)
- ;
- else {
- struct SwsContext **swscs[3] = {&scale->sws, &scale->isws[0], &scale->isws[1]};
- int i;
-
- for (i = 0; i < 3; i++) {
+ scale->passthrough = inlink0->w == outlink->w &&
+ inlink0->h == outlink->h &&
+ !scale->out_color_matrix &&
+ scale->in_range == scale->out_range &&
+ inlink0->format == outlink->format;
+
+ if (!scale->passthrough) {
+ int nb_scalers = ff_filter_get_nb_threads(ctx);
+
+ scale->scalers = av_mallocz_array(nb_scalers, 3 * sizeof(struct SwsContext*));
+ if (!scale->scalers)
+ return AVERROR(ENOMEM);
+
+ ret = av_reallocp_array(&scale->scaler_res, nb_scalers, sizeof(*scale->scaler_res));
+ if (ret < 0)
+ return ret;
+
+ for (int i = 0; i < 3; i++) {
+ for (int t = 0; t < nb_scalers; t++) {
int in_v_chr_pos = scale->in_v_chr_pos, out_v_chr_pos = scale->out_v_chr_pos;
- struct SwsContext **s = swscs[i];
+ struct SwsContext **s = &scale->scalers[t][i];
+
*s = sws_alloc_context();
if (!*s)
return AVERROR(ENOMEM);
@@ -580,9 +598,29 @@ static int config_props(AVFilterLink *outlink)
if ((ret = sws_init_context(*s, NULL, NULL)) < 0)
return ret;
+
+ /* do not multithread error-diffusion dithering */
+ if (i == 0 && t == 0) {
+ const AVOption *opt;
+ int64_t dither;
+
+ av_opt_get_int(*s, "sws_dither", 0, &dither);
+ opt = av_opt_find2(*s, "ed", "sws_dither", 0, 0, NULL);
+ if (!opt)
+ return AVERROR_BUG;
+
+ if (dither == opt->default_val.i64) {
+ av_log(ctx, AV_LOG_WARNING, "Error-diffusion dithering is "
+ "used, conversion will be single-threaded.\n");
+ nb_scalers = 1;
+ }
+ }
+ }
+
if (!scale->interlaced)
break;
}
+ scale->nb_scalers = nb_scalers;
}
if (inlink0->sample_aspect_ratio.num){
@@ -625,7 +663,8 @@ static int request_frame_ref(AVFilterLink *outlink)
return ff_request_frame(outlink->src->inputs[1]);
}
-static int scale_slice(ScaleContext *scale, AVFrame *out_buf, AVFrame *cur_pic, struct SwsContext *sws, int y, int h, int mul, int field)
+static int scale_slice(ScaleContext *scale, AVFrame *out_buf, AVFrame *cur_pic, struct SwsContext *sws,
+ int y, int h, int mul, int field, int dst)
{
const uint8_t *in[4];
uint8_t *out[4];
@@ -633,9 +672,10 @@ static int scale_slice(ScaleContext *scale, AVFrame *out_buf, AVFrame *cur_pic,
int i;
for (i=0; i<4; i++) {
- int vsub= ((i+1)&2) ? scale->vsub : 0;
- ptrdiff_t in_offset = ((y>>vsub)+field) * cur_pic->linesize[i];
- ptrdiff_t out_offset = field * out_buf->linesize[i];
+ int vsub = ((i+1)&2) ? scale->vsub : 0;
+ int ovsub = ((i+1)&2) ? scale->ovsub : 0;
+ ptrdiff_t in_offset = (((y * !dst) >> vsub) + field) * cur_pic->linesize[i];
+ ptrdiff_t out_offset = (((y * dst) >> ovsub) + field) * out_buf->linesize[i];
in_stride[i] = cur_pic->linesize[i] * mul;
out_stride[i] = out_buf->linesize[i] * mul;
in[i] = FF_PTR_ADD(cur_pic->data[i], in_offset);
@@ -646,17 +686,57 @@ static int scale_slice(ScaleContext *scale, AVFrame *out_buf, AVFrame *cur_pic,
if (scale->output_is_pal)
out[1] = out_buf->data[1];
+ if (dst)
+ return sws_scale_dst_slice(sws, in, in_stride,
+ out, out_stride, y / mul, h);
+
return sws_scale(sws, in, in_stride, y/mul, h,
out,out_stride);
}
+typedef struct ScaleThreadData {
+ AVFrame *frame_in;
+ AVFrame *frame_out;
+ int scaler_idx;
+} ScaleThreadData;
+
+static int scaler_res(ScaleContext *scale)
+{
+ for (int i = 0; i < scale->nb_scalers; i++)
+ if (scale->scaler_res[i] < 0)
+ return scale->scaler_res[i];
+ return 0;
+}
+
+static int scale_job(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ ScaleContext *scale = ctx->priv;
+ ScaleThreadData *td = arg;
+ int stride_mul = 1 << (td->scaler_idx > 0);
+ int first_field = td->scaler_idx == 1;
+ int picture_height = (td->frame_out->height + first_field) / stride_mul;
+ int slice_height = FFALIGN(FFMAX((picture_height + nb_jobs - 1) / nb_jobs, 1),
+ 1 << scale->ovsub);
+ int slice_start = jobnr * slice_height;
+ int slice_end = FFMIN((jobnr + 1) * slice_height, picture_height);
+
+ if (slice_start < slice_end) {
+ scale_slice(scale, td->frame_out, td->frame_in,
+ scale->scalers[jobnr][td->scaler_idx], slice_start,
+ slice_end - slice_start, stride_mul, td->scaler_idx == 2, 1);
+ }
+
+ return 0;
+}
+
static int scale_frame(AVFilterLink *link, AVFrame *in, AVFrame **frame_out)
{
AVFilterContext *ctx = link->dst;
ScaleContext *scale = ctx->priv;
AVFilterLink *outlink = ctx->outputs[0];
AVFrame *out;
- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
+ const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
char buf[32];
int ret;
int in_range;
@@ -723,13 +803,15 @@ static int scale_frame(AVFilterLink *link, AVFrame *in, AVFrame **frame_out)
}
scale:
- if (!scale->sws) {
+ if (!scale->nb_scalers) {
*frame_out = in;
return 0;
}
scale->hsub = desc->log2_chroma_w;
scale->vsub = desc->log2_chroma_h;
+ scale->ohsub = odesc->log2_chroma_w;
+ scale->ovsub = odesc->log2_chroma_h;
out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
if (!out) {
@@ -755,7 +837,7 @@ scale:
int in_full, out_full, brightness, contrast, saturation;
const int *inv_table, *table;
- sws_getColorspaceDetails(scale->sws, (int **)&inv_table, &in_full,
+ sws_getColorspaceDetails(scale->scalers[0][0], (int **)&inv_table, &in_full,
(int **)&table, &out_full,
&brightness, &contrast, &saturation);
@@ -773,17 +855,14 @@ scale:
if (scale->out_range != AVCOL_RANGE_UNSPECIFIED)
out_full = (scale->out_range == AVCOL_RANGE_JPEG);
- sws_setColorspaceDetails(scale->sws, inv_table, in_full,
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < scale->nb_scalers; j++) {
+ if (!scale->scalers[j][i])
+ continue;
+ sws_setColorspaceDetails(scale->scalers[j][i], inv_table, in_full,
table, out_full,
brightness, contrast, saturation);
- if (scale->isws[0])
- sws_setColorspaceDetails(scale->isws[0], inv_table, in_full,
- table, out_full,
- brightness, contrast, saturation);
- if (scale->isws[1])
- sws_setColorspaceDetails(scale->isws[1], inv_table, in_full,
- table, out_full,
- brightness, contrast, saturation);
+ }
out->color_range = out_full ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
}
@@ -793,10 +872,22 @@ scale:
(int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
INT_MAX);
+ memset(scale->scaler_res, 0, scale->nb_scalers * sizeof(*scale->scaler_res));
+
if (scale->interlaced>0 || (scale->interlaced<0 && in->interlaced_frame)) {
- ret = scale_slice(scale, out, in, scale->isws[0], 0, (link->h+1)/2, 2, 0);
- if (ret >= 0)
- ret = scale_slice(scale, out, in, scale->isws[1], 0, link->h /2, 2, 1);
+ ScaleThreadData td = {
+ .scaler_idx = 1,
+ .frame_in = in,
+ .frame_out = out,
+ };
+
+ ctx->internal->execute(ctx, scale_job, &td, scale->scaler_res, scale->nb_scalers);
+
+ if (scaler_res(scale) >= 0) {
+ td.scaler_idx = 2;
+ memset(scale->scaler_res, 0, scale->nb_scalers * sizeof(*scale->scaler_res));
+ ctx->internal->execute(ctx, scale_job, &td, scale->scaler_res, scale->nb_scalers);
+ }
} else if (scale->nb_slices) {
int i, slice_h, slice_start, slice_end = 0;
const int nb_slices = FFMIN(scale->nb_slices, link->h);
@@ -804,14 +895,22 @@ scale:
slice_start = slice_end;
slice_end = (link->h * (i+1)) / nb_slices;
slice_h = slice_end - slice_start;
- ret = scale_slice(scale, out, in, scale->sws, slice_start, slice_h, 1, 0);
+ ret = scale_slice(scale, out, in, scale->scalers[0][0], slice_start, slice_h, 1, 0, 0);
if (ret < 0)
break;
}
} else {
- ret = scale_slice(scale, out, in, scale->sws, 0, link->h, 1, 0);
+ ScaleThreadData td = {
+ .scaler_idx = 0,
+ .frame_in = in,
+ .frame_out = out,
+ };
+
+ ctx->internal->execute(ctx, scale_job, &td, scale->scaler_res, scale->nb_scalers);
}
+ ret = scaler_res(scale);
+
av_frame_free(&in);
if (ret < 0)
av_frame_free(frame_out);
@@ -984,6 +1083,7 @@ const AVFilter ff_vf_scale = {
.inputs = avfilter_vf_scale_inputs,
.outputs = avfilter_vf_scale_outputs,
.process_command = process_command,
+ .flags = AVFILTER_FLAG_SLICE_THREADS,
};
static const AVClass scale2ref_class = {
--
2.30.2
More information about the ffmpeg-devel
mailing list