[FFmpeg-devel] [PATCH v3 2/5] avfilter/vf_libopencv: add opencv HaarCascade classifier simple face detection filter

Mon May 18 17:09:00 EEST 2020

On Mon, May 18, 2020 at 01:11:12PM +0200, Paul B Mahol wrote:
> This opencv module is obsolete for latest opencv.
> 
> Instead there should be C++ wrapper.

Sorry, where is the C++ wrapper? Now Opencv 2.x and 3.x is good
to use for my current function. By my initial testing, the performance is
very good, we can easily to get realtime for HD, the drawbacks are: 
1. The face is not directly opposite and cannot be recognized.
2. there are some false detect for video.

If have any old discussion for that, please give the discussion link for 
reference. I'm not keep track of the ML before.

> 
> On 5/18/20, lance.lmwang at gmail.com <lance.lmwang at gmail.com> wrote:
> > From: Limin Wang <lance.lmwang at gmail.com>
> >
> > Signed-off-by: Limin Wang <lance.lmwang at gmail.com>
> > ---
> > change the update_metadata() to postprocess() only, I'll add opencv drawbox
> > filter and it need preprocess() to get the meta, so I prefer to change the
> > function name for better readablity, in future, it may have other processing
> > than metadata only.
> >
> >  configure                  |   1 +
> >  doc/filters.texi           |  29 +++++++
> >  libavfilter/vf_libopencv.c | 164 ++++++++++++++++++++++++++++++++++++-
> >  3 files changed, 191 insertions(+), 3 deletions(-)
> >
> > diff --git a/configure b/configure
> > index 34afdaad28..281b67efc4 100755
> > --- a/configure
> > +++ b/configure
> > @@ -2123,6 +2123,7 @@ HEADERS_LIST="
> >      machine_ioctl_meteor_h
> >      malloc_h
> >      opencv2_core_core_c_h
> > +    opencv2_objdetect_objdetect_c_h
> >      OpenGL_gl3_h
> >      poll_h
> >      sys_param_h
> > diff --git a/doc/filters.texi b/doc/filters.texi
> > index d9ba0fffa1..f938dd04de 100644
> > --- a/doc/filters.texi
> > +++ b/doc/filters.texi
> > @@ -14177,6 +14177,35 @@ other parameters is 0.
> >  These parameters correspond to the parameters assigned to the
> >  libopencv function @code{cvSmooth}.
> >
> > + at subsection facedetect
> > +Face detection using Haar Feature-based Cascade Classifiers.
> > +
> > +The filter takes the following parameters:
> > + at var{xml_model}|@var{qoffset}.
> > +
> > + at var{xml_model} is the path of pre-trained classifiers, The C API still
> > +does not support the newer cascade format, please use the old format
> > +haarcascade_frontalface_alt.xml which type_id is opencv-haar-classifier.
> > +
> > + at var{qoffset}
> > +If you want export the detected faces by ROI side data in frame, please set
> > the
> > +parameters, See also the @ref{addroi} filter. The range of qoffset is from
> > [-1.0, 1.0]
> > +
> > +By default the filter will report these metadata values if face are
> > +detected:
> > + at table @option
> > + at item lavfi.facedetect.nb_faces
> > +Display the detected face number
> > +
> > + at item lavfi.facedetect.face_id.x, lavfi.facedetect.face_id.y
> > +Display x and y of every faces, face_id is the face index which is range
> > +from [0, nb_faces-1]
> > +
> > + at item lavfi.facedetect.face_id.w, lavfi.facedetect.face_id.h
> > +Display width and height of every faces, face_id is the face index
> > +which is range from [0, nb_faces-1]
> > + at end table
> > +
> >  @section oscilloscope
> >
> >  2D Video Oscilloscope.
> > diff --git a/libavfilter/vf_libopencv.c b/libavfilter/vf_libopencv.c
> > index 8128030b8c..b2d19bb241 100644
> > --- a/libavfilter/vf_libopencv.c
> > +++ b/libavfilter/vf_libopencv.c
> > @@ -1,5 +1,6 @@
> >  /*
> >   * Copyright (c) 2010 Stefano Sabatini
> > + * Copyright (c) 2020 Limin Wang
> >   *
> >   * This file is part of FFmpeg.
> >   *
> > @@ -27,10 +28,16 @@
> >  #if HAVE_OPENCV2_CORE_CORE_C_H
> >  #include <opencv2/core/core_c.h>
> >  #include <opencv2/imgproc/imgproc_c.h>
> > +#if HAVE_OPENCV2_OBJECTDETECT_OBJECTDETECT_C_H
> > +#include <opencv2/objdetect/objdetect_c.h>
> > +#else
> > +#include <opencv/cv.h>
> > +#endif
> >  #else
> >  #include <opencv/cv.h>
> >  #include <opencv/cxcore.h>
> >  #endif
> > +
> >  #include "libavutil/avstring.h"
> >  #include "libavutil/common.h"
> >  #include "libavutil/file.h"
> > @@ -82,6 +89,7 @@ typedef struct OCVContext {
> >      int (*init)(AVFilterContext *ctx, const char *args);
> >      void (*uninit)(AVFilterContext *ctx);
> >      void (*end_frame_filter)(AVFilterContext *ctx, IplImage *inimg,
> > IplImage *outimg);
> > +    void (*postprocess)(AVFilterContext *ctx, AVFrame *out);
> >      void *priv;
> >  } OCVContext;
> >
> > @@ -326,18 +334,152 @@ static void erode_end_frame_filter(AVFilterContext
> > *ctx, IplImage *inimg, IplIma
> >      cvErode(inimg, outimg, dilate->kernel, dilate->nb_iterations);
> >  }
> >
> > +typedef struct FaceDetectContext {
> > +    char *xml_model;
> > +    CvHaarClassifierCascade* cascade;
> > +    CvMemStorage* storage;
> > +    int nb_faces;
> > +    CvSeq *faces;
> > +    int add_roi;
> > +    AVRational qoffset;
> > +} FaceDetectContext;
> > +
> > +static av_cold int facedetect_init(AVFilterContext *ctx, const char *args)
> > +{
> > +    OCVContext *s = ctx->priv;
> > +    FaceDetectContext *facedetect = s->priv;
> > +    const char *buf = args;
> > +    double qoffset;
> > +
> > +    if (args) {
> > +        facedetect->xml_model = av_get_token(&buf, "|");
> > +        if (!facedetect->xml_model) {
> > +            av_log(ctx, AV_LOG_ERROR, "failed to get %s, %s\n", args,
> > facedetect->xml_model);
> > +            return AVERROR(EINVAL);
> > +        }
> > +
> > +        if (buf && sscanf(buf, "|%lf", &qoffset) == 1) {
> > +            if (qoffset < -1.0 || qoffset > 1.0) {
> > +                av_log(ctx, AV_LOG_ERROR, "failed to get valid
> > qoffset(%f))\n", qoffset);
> > +                return AVERROR(EINVAL);
> > +            }
> > +            facedetect->add_roi = 1;
> > +            facedetect->qoffset = av_d2q(qoffset, 255);
> > +        }
> > +    } else {
> > +        av_log(ctx, AV_LOG_ERROR, "failed to get
> > haarcascade_frontalface_alt.xml model file\n");
> > +        return AVERROR(EINVAL);
> > +    }
> > +
> > +    av_log(ctx, AV_LOG_VERBOSE, "xml_model: %s add_roi: %d qoffset:
> > %d/%d\n",
> > +           facedetect->xml_model, facedetect->add_roi,
> > facedetect->qoffset.num, facedetect->qoffset.den);
> > +
> > +    facedetect->storage = cvCreateMemStorage(0);
> > +    if (!facedetect->storage) {
> > +        av_log(ctx, AV_LOG_ERROR, "cvCreateMemStorage() failed\n");
> > +        return AVERROR(EINVAL);
> > +    }
> > +    cvClearMemStorage(facedetect->storage);
> > +
> > +    facedetect->cascade = (CvHaarClassifierCascade*)cvLoad(
> > facedetect->xml_model, NULL, NULL, NULL );
> > +    if (!facedetect->cascade) {
> > +        av_log(ctx, AV_LOG_ERROR, "failed to load classifier cascade: %s
> > \n", facedetect->xml_model);
> > +        return AVERROR(EINVAL);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static av_cold void facedetect_uninit(AVFilterContext *ctx)
> > +{
> > +    OCVContext *s = ctx->priv;
> > +    FaceDetectContext *facedetect = s->priv;
> > +
> > +    if (facedetect->cascade)
> > +        cvReleaseHaarClassifierCascade(&facedetect->cascade);
> > +    if (facedetect->storage)
> > +        cvReleaseMemStorage(&facedetect->storage);
> > +}
> > +
> > +static void set_meta_int(AVDictionary **metadata, const char *key, int idx,
> > int d)
> > +{
> > +    char value[128];
> > +    char key2[128];
> > +
> > +    snprintf(value, sizeof(value), "%d", d);
> > +    snprintf(key2, sizeof(key2), "lavfi.facedetect.%d.%s", idx, key);
> > +    av_dict_set(metadata, key2, value, 0);
> > +}
> > +
> > +static void facedetect_end_frame_filter(AVFilterContext *ctx, IplImage
> > *inimg, IplImage *outimg)
> > +{
> > +    OCVContext *s = ctx->priv;
> > +    FaceDetectContext *facedetect = s->priv;
> > +
> > +    facedetect->faces = cvHaarDetectObjects(inimg, facedetect->cascade,
> > facedetect->storage,
> > +            1.25, 3, CV_HAAR_DO_CANNY_PRUNING,
> > +            cvSize(inimg->width/16,inimg->height/16), cvSize(0,0));
> > +
> > +    facedetect->nb_faces = facedetect->faces ? facedetect->faces->total :
> > 0;
> > +}
> > +
> > +static void facedetect_postprocess(AVFilterContext *ctx, AVFrame *out)
> > +{
> > +    OCVContext *s = ctx->priv;
> > +    FaceDetectContext *facedetect = s->priv;
> > +    AVRegionOfInterest *roi;
> > +    AVFrameSideData *sd;
> > +    AVBufferRef *roi_buf;
> > +    int i;
> > +
> > +    if (facedetect->add_roi && facedetect->nb_faces > 0) {
> > +        sd = av_frame_new_side_data(out, AV_FRAME_DATA_REGIONS_OF_INTEREST,
> > +                facedetect->nb_faces * sizeof(AVRegionOfInterest));
> > +        if (!sd) {
> > +            return AVERROR(ENOMEM);
> > +        }
> > +        roi = (AVRegionOfInterest*)sd->data;
> > +        for(i = 0; i < facedetect->nb_faces; i++ ) {
> > +            CvRect *r = (CvRect*) cvGetSeqElem(facedetect->faces, i);
> > +
> > +            roi[i] = (AVRegionOfInterest) {
> > +                .self_size = sizeof(*roi),
> > +                .top       = r->y,
> > +                .bottom    = r->y + r->height,
> > +                .left      = r->x,
> > +                .right     = r->x + r->width,
> > +                .qoffset   = facedetect->qoffset,
> > +            };
> > +        }
> > +    }
> > +
> > +    if (facedetect->nb_faces > 0)
> > +        av_dict_set_int(&out->metadata, "lavfi.facedetect.nb_faces",
> > facedetect->nb_faces, 0);
> > +
> > +    for(i = 0; i < facedetect->nb_faces; i++ ) {
> > +        CvRect *r = (CvRect*) cvGetSeqElem(facedetect->faces, i);
> > +
> > +        set_meta_int(&out->metadata, "x", i, r->x);
> > +        set_meta_int(&out->metadata, "y", i, r->y);
> > +        set_meta_int(&out->metadata, "w", i, r->width);
> > +        set_meta_int(&out->metadata, "h", i, r->height);
> > +    }
> > +}
> > +
> >  typedef struct OCVFilterEntry {
> >      const char *name;
> >      size_t priv_size;
> >      int  (*init)(AVFilterContext *ctx, const char *args);
> >      void (*uninit)(AVFilterContext *ctx);
> >      void (*end_frame_filter)(AVFilterContext *ctx, IplImage *inimg,
> > IplImage *outimg);
> > +    void (*postprocess)(AVFilterContext *ctx, AVFrame *out);
> >  } OCVFilterEntry;
> >
> >  static const OCVFilterEntry ocv_filter_entries[] = {
> > -    { "dilate", sizeof(DilateContext), dilate_init, dilate_uninit,
> > dilate_end_frame_filter },
> > -    { "erode",  sizeof(DilateContext), dilate_init, dilate_uninit,
> > erode_end_frame_filter  },
> > -    { "smooth", sizeof(SmoothContext), smooth_init, NULL,
> > smooth_end_frame_filter },
> > +    { "dilate", sizeof(DilateContext), dilate_init, dilate_uninit,
> > dilate_end_frame_filter, NULL },
> > +    { "erode",  sizeof(DilateContext), dilate_init, dilate_uninit,
> > erode_end_frame_filter, NULL },
> > +    { "smooth", sizeof(SmoothContext), smooth_init, NULL,
> > smooth_end_frame_filter, NULL },
> > +    { "facedetect", sizeof(FaceDetectContext), facedetect_init,
> > facedetect_uninit, facedetect_end_frame_filter, facedetect_postprocess },
> >  };
> >
> >  static av_cold int init(AVFilterContext *ctx)
> > @@ -355,6 +497,7 @@ static av_cold int init(AVFilterContext *ctx)
> >              s->init             = entry->init;
> >              s->uninit           = entry->uninit;
> >              s->end_frame_filter = entry->end_frame_filter;
> > +            s->postprocess      = entry->postprocess;
> >
> >              if (!(s->priv = av_mallocz(entry->priv_size)))
> >                  return AVERROR(ENOMEM);
> > @@ -383,18 +526,33 @@ static int filter_frame(AVFilterLink *inlink, AVFrame
> > *in)
> >      AVFrame *out;
> >      IplImage inimg, outimg;
> >
> > +    /* facedetect filter will passthrought the input frame */
> > +    if (strcmp(s->name, "facedetect")) {
> >      out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> >      if (!out) {
> >          av_frame_free(&in);
> >          return AVERROR(ENOMEM);
> >      }
> >      av_frame_copy_props(out, in);
> > +    } else {
> > +        out = in;
> > +    }
> >
> >      fill_iplimage_from_frame(&inimg , in , inlink->format);
> > +
> > +    if (strcmp(s->name, "facedetect")) {
> >      fill_iplimage_from_frame(&outimg, out, inlink->format);
> >      s->end_frame_filter(ctx, &inimg, &outimg);
> >      fill_frame_from_iplimage(out, &outimg, inlink->format);
> > +    } else {
> > +        s->end_frame_filter(ctx, &inimg, NULL);
> > +    }
> > +
> > +    if (s->postprocess) {
> > +        s->postprocess(ctx, out);
> > +    }
> >
> > +    if (out != in)
> >      av_frame_free(&in);
> >
> >      return ff_filter_frame(outlink, out);
> > --
> > 2.21.0
> >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel at ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".

-- 
Thanks,
Limin Wang