[FFmpeg-cvslog] avfilter/vf_morpho: add slice threading support
Paul B Mahol
git at videolan.org
Mon May 8 18:52:45 EEST 2023
ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Mon May 8 17:17:19 2023 +0200| [1eed7f65624cd590a4acd944f9bbeb9f03634a3f] | committer: Paul B Mahol
avfilter/vf_morpho: add slice threading support
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1eed7f65624cd590a4acd944f9bbeb9f03634a3f
---
libavfilter/vf_morpho.c | 257 +++++++++++++++++++++++++++++-------------------
1 file changed, 158 insertions(+), 99 deletions(-)
diff --git a/libavfilter/vf_morpho.c b/libavfilter/vf_morpho.c
index 55886d2b8a..a218499d61 100644
--- a/libavfilter/vf_morpho.c
+++ b/libavfilter/vf_morpho.c
@@ -95,6 +95,8 @@ typedef struct chord_set {
unsigned nb_elements;
} chord_set;
+#define MAX_THREADS 64
+
typedef struct MorphoContext {
const AVClass *class;
FFFrameSync fs;
@@ -102,7 +104,7 @@ typedef struct MorphoContext {
chord_set SE[4];
IPlane SEimg[4];
IPlane g[4], f[4], h[4];
- LUT Ty[2][4];
+ LUT Ty[MAX_THREADS][2][4];
int mode;
int planes;
@@ -460,14 +462,14 @@ static void line_erode(IPlane *g, LUT *Ty, chord_set *SE, int y, int tid)
}
}
-static int dilate(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty)
+static int dilate(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty, int y0, int y1)
{
- int ret = compute_max_lut(Ty, f, SE, 0, 1);
+ int ret = compute_max_lut(Ty, f, SE, y0, 1);
if (ret < 0)
return ret;
- line_dilate(g, Ty, SE, 0, 0);
- for (int y = 1; y < f->h; y++) {
+ line_dilate(g, Ty, SE, y0, 0);
+ for (int y = y0 + 1; y < y1; y++) {
update_max_lut(f, Ty, SE, y, 0, 1);
line_dilate(g, Ty, SE, y, 0);
}
@@ -475,14 +477,14 @@ static int dilate(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty)
return 0;
}
-static int erode(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty)
+static int erode(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty, int y0, int y1)
{
- int ret = compute_min_lut(Ty, f, SE, 0, 1);
+ int ret = compute_min_lut(Ty, f, SE, y0, 1);
if (ret < 0)
return ret;
- line_erode(g, Ty, SE, 0, 0);
- for (int y = 1; y < f->h; y++) {
+ line_erode(g, Ty, SE, y0, 0);
+ for (int y = y0 + 1; y < y1; y++) {
update_min_lut(f, Ty, SE, y, 0, 1);
line_erode(g, Ty, SE, y, 0);
}
@@ -490,15 +492,15 @@ static int erode(IPlane *g, IPlane *f, chord_set *SE, LUT *Ty)
return 0;
}
-static void difference(IPlane *g, IPlane *f)
+static void difference(IPlane *g, IPlane *f, int y0, int y1)
{
- for (int y = 0; y < f->h; y++)
+ for (int y = y0; y < y1; y++)
f->diff_in_place(g->img[y], f->img[y], f->w);
}
-static void difference2(IPlane *g, IPlane *f)
+static void difference2(IPlane *g, IPlane *f, int y0, int y1)
{
- for (int y = 0; y < f->h; y++)
+ for (int y = y0; y < y1; y++)
f->diff_rin_place(g->img[y], f->img[y], f->w);
}
@@ -785,12 +787,133 @@ static int activate(AVFilterContext *ctx)
return ff_framesync_activate(&s->fs);
}
+typedef struct ThreadData {
+ AVFrame *in, *out;
+} ThreadData;
+
+static int morpho_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ MorphoContext *s = ctx->priv;
+ ThreadData *td = arg;
+ AVFrame *out = td->out;
+ AVFrame *in = td->in;
+ int ret;
+
+ for (int p = 0; p < s->nb_planes; p++) {
+ const int width = s->planewidth[p];
+ const int height = s->planeheight[p];
+ const int y0 = (height * jobnr ) / nb_jobs;
+ const int y1 = (height * (jobnr+1)) / nb_jobs;
+ const int depth = s->depth;
+
+ if (ctx->is_disabled || !(s->planes & (1 << p))) {
+copy:
+ av_image_copy_plane(out->data[p] + y0 * out->linesize[p],
+ out->linesize[p],
+ in->data[p] + y0 * in->linesize[p],
+ in->linesize[p],
+ width * ((depth + 7) / 8),
+ y1 - y0);
+ continue;
+ }
+
+ if (s->SE[p].minX == INT16_MAX ||
+ s->SE[p].minY == INT16_MAX ||
+ s->SE[p].maxX == INT16_MIN ||
+ s->SE[p].maxY == INT16_MIN)
+ goto copy;
+
+ switch (s->mode) {
+ case ERODE:
+ ret = erode(&s->g[p], &s->f[p], &s->SE[p], &s->Ty[jobnr][0][p], y0, y1);
+ break;
+ case DILATE:
+ case GRADIENT:
+ ret = dilate(&s->g[p], &s->f[p], &s->SE[p], &s->Ty[jobnr][0][p], y0, y1);
+ break;
+ case OPEN:
+ case TOPHAT:
+ ret = erode(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[jobnr][0][p], y0, y1);
+ break;
+ case CLOSE:
+ case BLACKHAT:
+ ret = dilate(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[jobnr][0][p], y0, y1);
+ break;
+ default:
+ av_assert0(0);
+ }
+
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int morpho_sliceX(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ MorphoContext *s = ctx->priv;
+ int ret;
+
+ for (int p = 0; p < s->nb_planes; p++) {
+ const int height = s->planeheight[p];
+ const int y0 = (height * jobnr ) / nb_jobs;
+ const int y1 = (height * (jobnr+1)) / nb_jobs;
+
+ if (ctx->is_disabled || !(s->planes & (1 << p))) {
+copy:
+ continue;
+ }
+
+ if (s->SE[p].minX == INT16_MAX ||
+ s->SE[p].minY == INT16_MAX ||
+ s->SE[p].maxX == INT16_MIN ||
+ s->SE[p].maxY == INT16_MIN)
+ goto copy;
+
+ switch (s->mode) {
+ case OPEN:
+ ret = dilate(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[jobnr][1][p], y0, y1);
+ break;
+ case CLOSE:
+ ret = erode(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[jobnr][1][p], y0, y1);
+ break;
+ case GRADIENT:
+ ret = erode(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[jobnr][1][p], y0, y1);
+ if (ret < 0)
+ break;
+ difference(&s->g[p], &s->h[p], y0, y1);
+ break;
+ case TOPHAT:
+ ret = dilate(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[jobnr][1][p], y0, y1);
+ if (ret < 0)
+ break;
+ difference2(&s->g[p], &s->f[p], y0, y1);
+ break;
+ case BLACKHAT:
+ ret = erode(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[jobnr][1][p], y0, y1);
+ if (ret < 0)
+ break;
+ difference(&s->g[p], &s->f[p], y0, y1);
+ break;
+ default:
+ av_assert0(0);
+ }
+
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
static int do_morpho(FFFrameSync *fs)
{
AVFilterContext *ctx = fs->parent;
AVFilterLink *outlink = ctx->outputs[0];
MorphoContext *s = ctx->priv;
AVFrame *in = NULL, *structurepic = NULL;
+ ThreadData td;
AVFrame *out;
int ret;
@@ -812,6 +935,12 @@ static int do_morpho(FFFrameSync *fs)
const int ssrc_linesize = structurepic->linesize[p];
const int swidth = s->splanewidth[p];
const int sheight = s->splaneheight[p];
+ const uint8_t *src = in->data[p];
+ int src_linesize = in->linesize[p];
+ uint8_t *dst = out->data[p];
+ int dst_linesize = out->linesize[p];
+ const int width = s->planewidth[p];
+ const int height = s->planeheight[p];
const int depth = s->depth;
int type_size = s->type_size;
@@ -826,34 +955,6 @@ static int do_morpho(FFFrameSync *fs)
goto fail;
s->got_structure[p] = 1;
}
- }
-
- for (int p = 0; p < s->nb_planes; p++) {
- const uint8_t *src = in->data[p];
- int src_linesize = in->linesize[p];
- uint8_t *dst = out->data[p];
- int dst_linesize = out->linesize[p];
- const int width = s->planewidth[p];
- const int height = s->planeheight[p];
- const int depth = s->depth;
- int type_size = s->type_size;
-
- if (ctx->is_disabled || !(s->planes & (1 << p))) {
-copy:
- av_image_copy_plane(out->data[p] + 0 * out->linesize[p],
- out->linesize[p],
- in->data[p] + 0 * in->linesize[p],
- in->linesize[p],
- width * ((depth + 7) / 8),
- height);
- continue;
- }
-
- if (s->SE[p].minX == INT16_MAX ||
- s->SE[p].minY == INT16_MAX ||
- s->SE[p].maxX == INT16_MIN ||
- s->SE[p].maxY == INT16_MIN)
- goto copy;
ret = read_iplane(&s->f[p], src, src_linesize, width, height, 1, type_size, depth);
if (ret < 0)
@@ -864,74 +965,29 @@ copy:
goto fail;
switch (s->mode) {
- case ERODE:
- ret = erode(&s->g[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
- break;
- case DILATE:
- ret = dilate(&s->g[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
- break;
case OPEN:
- ret = read_iplane(&s->h[p], s->temp->data[p], s->temp->linesize[p], width, height, 1, type_size, depth);
- if (ret < 0)
- break;
- ret = erode(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
- if (ret < 0)
- break;
- ret = dilate(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[1][p]);
- break;
case CLOSE:
- ret = read_iplane(&s->h[p], s->temp->data[p], s->temp->linesize[p], width, height, 1, type_size, depth);
- if (ret < 0)
- break;
- ret = dilate(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
- if (ret < 0)
- break;
- ret = erode(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[1][p]);
- break;
case GRADIENT:
- ret = read_iplane(&s->h[p], s->temp->data[p], s->temp->linesize[p], width, height, 1, type_size, depth);
- if (ret < 0)
- break;
- ret = dilate(&s->g[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
- if (ret < 0)
- break;
- ret = erode(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[1][p]);
- if (ret < 0)
- break;
- difference(&s->g[p], &s->h[p]);
- break;
case TOPHAT:
- ret = read_iplane(&s->h[p], s->temp->data[p], s->temp->linesize[p], width, height, 1, type_size, depth);
- if (ret < 0)
- break;
- ret = erode(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
- if (ret < 0)
- break;
- ret = dilate(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[1][p]);
- if (ret < 0)
- break;
- difference2(&s->g[p], &s->f[p]);
- break;
case BLACKHAT:
ret = read_iplane(&s->h[p], s->temp->data[p], s->temp->linesize[p], width, height, 1, type_size, depth);
- if (ret < 0)
- break;
- ret = dilate(&s->h[p], &s->f[p], &s->SE[p], &s->Ty[0][p]);
- if (ret < 0)
- break;
- ret = erode(&s->g[p], &s->h[p], &s->SE[p], &s->Ty[1][p]);
- if (ret < 0)
- break;
- difference(&s->g[p], &s->f[p]);
break;
- default:
- av_assert0(0);
}
if (ret < 0)
goto fail;
}
+ td.in = in; td.out = out;
+ ret = ff_filter_execute(ctx, morpho_slice, &td, NULL,
+ FFMIN3(s->planeheight[1], s->planeheight[2],
+ FFMIN(MAX_THREADS, ff_filter_get_nb_threads(ctx))));
+ if (ret == 0 && (s->mode != ERODE && s->mode != DILATE)) {
+ ff_filter_execute(ctx, morpho_sliceX, NULL, NULL,
+ FFMIN3(s->planeheight[1], s->planeheight[2],
+ FFMIN(MAX_THREADS, ff_filter_get_nb_threads(ctx))));
+ }
+
av_frame_free(&in);
out->pts = av_rescale_q(s->fs.pts, s->fs.time_base, outlink->time_base);
return ff_filter_frame(outlink, out);
@@ -984,8 +1040,10 @@ static av_cold void uninit(AVFilterContext *ctx)
free_iplane(&s->g[p]);
free_iplane(&s->h[p]);
free_chord_set(&s->SE[p]);
- free_lut(&s->Ty[0][p]);
- free_lut(&s->Ty[1][p]);
+ for (int n = 0; n < MAX_THREADS; n++) {
+ free_lut(&s->Ty[n][0][p]);
+ free_lut(&s->Ty[n][1][p]);
+ }
}
ff_framesync_uninit(&s->fs);
@@ -1027,6 +1085,7 @@ const AVFilter ff_vf_morpho = {
FILTER_INPUTS(morpho_inputs),
FILTER_OUTPUTS(morpho_outputs),
FILTER_PIXFMTS_ARRAY(pix_fmts),
- .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
+ AVFILTER_FLAG_SLICE_THREADS,
.process_command = ff_filter_process_command,
};
More information about the ffmpeg-cvslog
mailing list