Adds Haar like features detection to mplayer. Needs OpenCV >= 0.9.7, applies cleanly to mplayer SVN trunk r29324 (2009-05-25). -- Sylvain Fourmanoit diff -ruN a/DOCS/man/en/mplayer.1 b/DOCS/man/en/mplayer.1 --- a/DOCS/man/en/mplayer.1 2009-05-23 13:29:47.975606248 -0400 +++ b/DOCS/man/en/mplayer.1 2009-05-26 02:25:20.156730427 -0400 @@ -7335,6 +7335,77 @@ .IPs Threshold below which a pixel value is considered black (default: 32). .RE +.PD 1 +. +.TP +.B haardetect=/path/to/cascade_file.xml[:options] +Detect Haar like features in frames, using the OpenCV library. +Can be used, for instance, to detect faces with real time or near real time +performances from most sources on modern hardware. +Have a look at OpenCV documentation to know more. +.RE +.PD 1 +.sp 1 +.RS +The required parameter is: +.RE +.PD 0 +.RSs +.IPs /path/to/cascade_file.xml +Full path to the cascade classifier file to use, as supported by OpenCV. +This library provides a number of ready-made cascade files +for detecting a number of features, including frontal and profile +faces. +.RE +.PD 1 +.sp 1 +.RS +The options are: +.RE +.PD 0 +.RSs +.IPs quality= +Determine how many times the detection box will be scaled when +analyzing a frame by computing its initial size according to scale +(box = frame_height / (scale ^ quality)). +Default will be 7 (reasonably fast, detect features +about a fourth of the image height at default scale). +Higher values will shrink the initial detection box, making it +more likely to detect smaller features, but using +more cycles (8x more cycles for 2x quality). +At default scale, it does not make sense to make it greater than about 11, +unless you are looking for very small details in the frames. +.IPs scale= +Set by how much to increase the box size between each search iteration. +Default is 1.2, meaning box will grow 20% at each step. +Useful range is typically between 1.1 and 1.3. +.IPs box= +Set the initial x detection box size: it is basically +the estimated minimal bounding box of the searched features. +Default box is set so that it will take 7 iterations to cover the image +(see quality above). +If you set quality and box at the same time, box will have precedence. +.IPs neighbors= +Set the minimal number of neighboring zones to consider for a match. +Default is 2, but you can safely set to 3 most of the time +for eliminating false positive at the expense of more cycles. +Upper bound for this value depends on the cascade classifier. +.IPs delta= +Set minimal time interval (in seconds) separating two frames you want +to perform detection on. +Default is zero, meaning that all frames are analyzed. +It's usually significantly faster to use -sstep if you got +a seekable stream and there is no other reason to play it in full. +.IPs logfile= +Set filename to output detection results to. +Default is haar.log. +.RE +.PD 1 +.sp 1 +.RS +Options can be listed colon-separated in any order. +Using only their first letter (e.g. q=8 instead of quality=8) works. +.RE . . . diff -ruN a/Makefile b/Makefile --- a/Makefile 2009-05-23 13:29:47.975606248 -0400 +++ b/Makefile 2009-05-25 14:44:08.387237509 -0400 @@ -331,6 +331,7 @@ SRCS_COMMON-$(XVID4) += libmpcodecs/vd_xvid4.c SRCS_COMMON-$(ZR) += libmpcodecs/vd_zrmjpeg.c \ libmpcodecs/vf_zrmjpeg.c +SRCS_COMMON-$(OPENCV) += libmpcodecs/vf_haardetect.c SRCS_COMMON = asxparser.c \ codec-cfg.c \ cpudetect.c \ diff -ruN a/configure b/configure --- a/configure 2009-05-23 13:29:47.979605304 -0400 +++ b/configure 2009-05-25 14:44:08.391236248 -0400 @@ -261,6 +261,7 @@ --disable-dvdread disable libdvdread [autodetect] --disable-dvdread-internal disable internal libdvdread [autodetect] --disable-libdvdcss-internal disable internal libdvdcss [autodetect] + --disable-opencv disable OpenCV lib [autodetect] --disable-cdparanoia disable cdparanoia [autodetect] --disable-cddb disable cddb [autodetect] --disable-bitmap-font disable bitmap font support [enable] @@ -630,6 +631,7 @@ _dvdread=auto _dvdread_internal=auto _libdvdcss_internal=auto +_opencv=auto _xanim=auto _real=auto _live=auto @@ -1036,6 +1038,8 @@ --disable-libdvdcss-internal) _libdvdcss_internal=no ;; --enable-dvdnav) _dvdnav=yes ;; --disable-dvdnav) _dvdnav=no ;; + --enable-opencv) _opencv=yes ;; + --disable-opencv) _opencv=no ;; --enable-xanim) _xanim=yes ;; --disable-xanim) _xanim=no ;; --enable-real) _real=yes ;; @@ -5876,6 +5880,24 @@ fi echores "$_libdvdcss_internal" +echocheck "OpenCV" +if test "$_opencv" = auto ; then + _opencv=no + cat > $TMPC < +#include + +int main(void) { cvLoadImage(NULL, 0); } +EOF + cc_check '-lcv -lhighgui' && _opencv=yes +fi +if test "$_opencv" = yes ; then + def_opencv='#define CONFIG_OPENCV 1' + extra_ldflags="$extra_ldflags -lcv -lhighgui" +else + def_opencv='#undef CONFIG_OPENCV' +fi +echores "$_opencv" echocheck "cdparanoia" if test "$_cdparanoia" = auto ; then @@ -8207,6 +8229,7 @@ DVDNAV_INTERNAL = $dvdnav_internal DVDREAD = $_dvdread DVDREAD_INTERNAL = $_dvdread_internal +OPENCV = $_opencv DXR2 = $_dxr2 DXR3 = $_dxr3 ESD = $_esd @@ -8556,6 +8579,7 @@ $def_sortsub $def_stream_cache $def_pthread_cache +$def_opencv /* CPU stuff */ diff -ruN a/libmpcodecs/vf.c b/libmpcodecs/vf.c --- a/libmpcodecs/vf.c 2009-05-23 13:29:47.983605328 -0400 +++ b/libmpcodecs/vf.c 2009-05-25 14:44:08.391236248 -0400 @@ -97,6 +97,7 @@ extern const vf_info_t vf_info_mcdeint; extern const vf_info_t vf_info_yadif; extern const vf_info_t vf_info_blackframe; +extern const vf_info_t vf_info_haardetect; extern const vf_info_t vf_info_geq; extern const vf_info_t vf_info_ow; @@ -190,6 +191,9 @@ #endif &vf_info_yadif, &vf_info_blackframe, +#ifdef CONFIG_OPENCV + &vf_info_haardetect, +#endif &vf_info_ow, NULL }; diff -ruN a/libmpcodecs/vf_haardetect.c b/libmpcodecs/vf_haardetect.c --- a/libmpcodecs/vf_haardetect.c 1969-12-31 19:00:00.000000000 -0500 +++ b/libmpcodecs/vf_haardetect.c 2009-05-26 01:44:27.848922969 -0400 @@ -0,0 +1,347 @@ +/* + * Detect Haar-Like Features from Frames + * (c) 2009 Sylvain Fourmanoit + * + * Based on OpenCV sample code: facedetect.c + * (c) 2000, Intel Corporation + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include + +#include + +#include "config.h" +#include "mp_msg.h" + +#include "img_format.h" +#include "mp_image.h" +#include "vf.h" + +struct vf_priv_s { + CvHaarClassifierCascade * cascade; + IplImage * image; + CvMemStorage * storage; + FILE * log; + unsigned int frame; + double threshold; + int quality; + double scale_factor; + unsigned int box_size; + unsigned int min_neighbors; + double time_delta; +}; + +static int +cvErrorLog(int status, const char * func_name, const char * err_msg, + const char * file_name, int line, struct vf_instance_s * vf) +{ + mp_msg(MSGT_VFILTER, MSGL_WARN, "%s: OpenCV error - %s " + "(file: %s func: %s line: %u)", + vf->info->name, err_msg, file_name, func_name, line); + return 0; +} + +static int +config(struct vf_instance_s* vf, int width, int height, int d_width, + int d_height, unsigned int flags, unsigned int outfmt) +{ + double ratio; + + if (!vf->priv->box_size) + vf->priv->box_size = (int)(((double)height) / + exp(vf->priv->quality * + log(vf->priv->scale_factor))); + + if (!vf->priv->box_size) { + mp_msg(MSGT_VFILTER, MSGL_ERR, + "%s: computed box size is zero.\n", vf->info->name); + return 0; + } + + if ((ratio = (((double)vf->priv->box_size) / height)) < .1) + mp_msg(MSGT_VFILTER, MSGL_WARN, + "%s: box size / height ratio is awfully small (%.1f%%)\n", + vf->info->name, ratio * 100); + + if (!((vf->priv->image = cvCreateImage(cvSize(width, height), 8, 3)))) { + mp_msg(MSGT_VFILTER, MSGL_ERR, + "%s: could not create OpenCV image\n", vf->info->name); + return 0; + } + + return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt); +} + +static int +query_format(struct vf_instance_s *vf, unsigned fmt) +{ + switch (fmt) { + case IMGFMT_YV12: + case IMGFMT_I420: + case IMGFMT_IYUV: + case IMGFMT_YVU9: + case IMGFMT_Y800: + case IMGFMT_Y8: + /* preview would use vo_draw_alpha_yv12 */ + case IMGFMT_IF09: + case IMGFMT_CLPL: + case IMGFMT_NV12: + case IMGFMT_NV21: + case IMGFMT_444P: + case IMGFMT_422P: + case IMGFMT_411P: + case IMGFMT_HM12: + return vf_next_query_format(vf, fmt); + default: + return 0; + } +} + +static int +put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts) +{ + mp_image_t *dmpi; + int i, x, y; + char * yplane, * dest, * pos; + CvSeq * objects; + CvRect * r; + + if (pts >= vf->priv->threshold) { + vf->priv->threshold = ((pts - vf->priv->threshold <= vf->priv->time_delta)? + vf->priv->threshold:pts) + vf->priv->time_delta; + + /* Just keep luma since we only care about grayscale */ + for (yplane = mpi->planes[0], dest = vf->priv->image->imageData, y = 0; + y < mpi->h; + ++y, yplane += mpi->stride[0], dest += vf->priv->image->widthStep) + for (pos = dest, x=0; x < mpi->width; ++x) { + *(pos++) = yplane[x]; + *(pos++) = yplane[x]; + *(pos++) = yplane[x]; + } + + cvClearMemStorage(vf->priv->storage); + objects = cvHaarDetectObjects(vf->priv->image, + vf->priv->cascade, + vf->priv->storage, + vf->priv->scale_factor, + vf->priv->min_neighbors, + CV_HAAR_DO_CANNY_PRUNING, + cvSize(vf->priv->box_size, + vf->priv->box_size)); + + for (i = 0; i < ((objects)?objects->total:0); ++i) { + r = (CvRect*)cvGetSeqElem(objects, i); + mp_msg(MSGT_VFILTER, MSGL_INFO, + "%s: (%.3f s., frame %u) object %u at %ux%u+%u+%u\n", + vf->info->name, pts, vf->priv->frame, + i, r->width, r->height, r->x, r->y); + if (vf->priv->log) + fprintf(vf->priv->log, "%10.3f%10u%10u%10u%10u\n", + pts, r->x, r->y, r->width, r->height); + } + } + + dmpi = vf_get_image(vf->next, mpi->imgfmt, MP_IMGTYPE_EXPORT, 0, + mpi->width, mpi->height); + + dmpi->planes[0] = mpi->planes[0]; + dmpi->stride[0] = mpi->stride[0]; + dmpi->planes[1] = mpi->planes[1]; + dmpi->stride[1] = mpi->stride[1]; + dmpi->planes[2] = mpi->planes[2]; + dmpi->stride[2] = mpi->stride[2]; + + vf_clone_mpi_attributes(dmpi, mpi); + + ++vf->priv->frame; + return vf_next_put_image(vf, dmpi, pts); +} + +static int +control(struct vf_instance_s* vf, int request, void* data) +{ + return vf_next_control(vf,request,data); +} + +static void +uninit(struct vf_instance_s *vf) { + if (vf->priv) { + if (vf->priv->cascade) cvRelease((void**)&vf->priv->cascade); + if (vf->priv->image) cvReleaseImage(&vf->priv->image); + if (vf->priv->storage) cvReleaseMemStorage(&vf->priv->storage); + if (vf->priv->log) fclose(vf->priv->log); + free(vf->priv); + } +} + +static int +open(vf_instance_t *vf, char* args) +{ + int box_flag = 0; + char * ap, * q, * a; + char * profilename = NULL, * logfilename = "haar.log"; + + if (!(vf->priv = calloc(1, sizeof(struct vf_priv_s)))) + goto nomem; + + cvRedirectError((CvErrorCallback)cvErrorLog, (void*)vf, NULL); + + vf->config = config; + vf->put_image = put_image; + vf->control = control; + vf->uninit = uninit; + vf->query_format = query_format; + + vf->priv->quality = 7; + vf->priv->scale_factor = 1.2; + vf->priv->min_neighbors = 2; + + if (args && !(args = strdup(args))) + goto nomem; + + if ((ap = args)) + while(*ap) { + q = ap; + if ((ap = strchr(q, ':'))) *ap ++= 0; else ap = q + strlen(q); + if ((a = strchr(q, '='))) *a ++= 0; else a = q + strlen(q); + + if (q == args) { + profilename = q; + continue; + } + + switch(*q) { + case 0: break; + case 'q': vf->priv->quality = atoi(a); break; + case 's': vf->priv->scale_factor = atof(a); break; + case 'b': box_flag = 1; vf->priv->box_size = atoi(a); break; + case 'n': vf->priv->min_neighbors = atoi(a); break; + case 'd': vf->priv->time_delta = atof(a); break; + case 'l': logfilename = a; break; + case 'h': + mp_msg(MSGT_VFILTER, MSGL_INFO, + "\n%s options:\n\n" + "quality=value " + "- Set quality (default: %u)\n" + "scale=value " + "- Set search scale factor (default: %.2f)\n" + "box=value " + "- Set minimal feature size\n" + " " + " (default is set so detection takes %u scaling steps)\n" + "neighbors=value " + "- Set minimal number of neighbors (default: %u)\n" + "delta=value " + "- Set delay in seconds between consecutive detections\n" + " " + " (default is zero, meaning every frame is analyzed)\n" + "logfile=filename " + "- Set output log file (default: %s)\n\n", + vf->info->name, + vf->priv->quality, + vf->priv->scale_factor, + vf->priv->quality, + vf->priv->min_neighbors, + logfilename); + break; + default: + mp_msg(MSGT_VFILTER, MSGL_FATAL, + "%s: Unknown argument %s.\n", vf->info->name, q); + goto fail; + } + } + + if (!profilename || strlen(profilename) == 0) { + mp_msg(MSGT_VFILTER, MSGL_FATAL, + "%s: No Haar cascade classifier xml file specified\n", + vf->info->name); + goto fail; + } + + if (vf->priv->quality <= 1) { + mp_msg(MSGT_VFILTER, MSGL_FATAL, + "%s: Quality should be greater than 1\n", + vf->info->name); + goto fail; + } + + if (vf->priv->scale_factor <= 1) { + mp_msg(MSGT_VFILTER, MSGL_FATAL, + "%s: Scale factor should be greater than 1\n", + vf->info->name); + goto fail; + } + + if (box_flag) { + if (vf->priv->box_size <= 1) { + mp_msg(MSGT_VFILTER, MSGL_FATAL, + "%s: Box size should be greater than 1\n", + vf->info->name); + goto fail; + } + } else vf->priv->box_size = 0; + + if (vf->priv->min_neighbors < 1) { + mp_msg(MSGT_VFILTER, MSGL_FATAL, + "%s: Minimal number of neighbors box size should be positive\n", + vf->info->name); + goto fail; + } + + if ((vf->priv->log = fopen(logfilename, "w"))) + fprintf(vf->priv->log, + "# pts x y width height\n"); + else + mp_msg(MSGT_VFILTER, MSGL_INFO, + "%s: Could not open log file '%s' for writing\n", + vf->info->name, logfilename); + + if (!(vf->priv->cascade = (CvHaarClassifierCascade*) + cvLoad(profilename, 0, 0, 0))) { + mp_msg(MSGT_VFILTER, MSGL_FATAL, + "%s: Could not load Haar cascade classifier xml file '%s'\n", + vf->info->name, profilename); + goto fail; + } + + if (!(vf->priv->storage = cvCreateMemStorage(0))) + goto nomem; + + free(args); + return 1; +nomem: + mp_msg(MSGT_VFILTER, MSGL_FATAL, + "%s: Not enough memory.\n", vf->info->name); +fail: + uninit(vf); + free(args); + return 0; +} + +const vf_info_t vf_info_haardetect = { + "detects Haar like features", + "haardetect", + "Sylvain Fourmanoit", + "Useful for detecting many kinds of features, including faces", + open, + NULL +};