[FFmpeg-devel] [PATCH] add phqm filter and img_hash

Fri Oct 25 11:49:53 EEST 2019

On Thu, Oct 24, 2019 at 19:23:48 -0500, ckennedy at ellation.com wrote:
> ffmpeg -i encode.mp4 -i reference.mp4 \
>            -filter_complex "[0:v][1:v]phqm=stats_file=out.log" \
>            -an -codec:v rawvideo -y -f null /dev/null

-f null doesn't require -c:v rawvideo.

>  OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
> +OBJS-$(CONFIG_PHQM_FILTER)               += vf_phqm.o img_hash.o
>  OBJS-$(CONFIG_OSCILLOSCOPE_FILTER)           += vf_datascope.o
>  OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o framesync.o
>  OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)         += vf_overlay_opencl.o opencl.o \

Incorrect indentation, and should be in alphabetical order.

>  extern AVFilter ff_vf_ocv;
> +extern AVFilter ff_vf_phqm;
>  extern AVFilter ff_vf_oscilloscope;
>  extern AVFilter ff_vf_overlay;

This should be in alphabetical order.

> +using namespace cv;
> +using namespace cv::img_hash;
> +using namespace std;

I dislike this style, especially the "std", but there are probably no
existing C++ recommendations for ffmpeg.

> +    if      (pixfmt == AV_PIX_FMT_GRAY8) { depth = IPL_DEPTH_8U;  channels_nb = 1; }
> +    else if (pixfmt == AV_PIX_FMT_BGRA)  { depth = IPL_DEPTH_8U;  channels_nb = 4; }
> +    else if (pixfmt == AV_PIX_FMT_BGR24) { depth = IPL_DEPTH_8U;  channels_nb = 3; }
> +    else if (pixfmt == AV_PIX_FMT_YUV420P) { depth = IPL_DEPTH_8U;  channels_nb = 3; }
> +    else return;

This looks artificially overcomplicated.

int depth = IPL_DEPTH_8U; // or even const int - or just use the actual value in place

and then
    switch (pixfmt) {
    case AV_PIX_FMT_GRAY8:   channels_nb = 1; break;
    case AV_PIX_FMT_BGRA :   channels_nb = 4; break;
    case AV_PIX_FMT_BGR24: // fallthrough
    case AV_PIX_FMT_YUV420P: channels_nb = 3; break;
    default: return;
    }

> +// Get the score of two Video Frames by comparing the perceptual hashes and deriving a hamming distance
> +// showing how similar they are or different. lower the score is better for most algorithms

"lower score"

> +    cv::Mat h1;
> +    cv::Mat h2;
> +    cv::Mat m1;
> +    cv::Mat m2;

It seems you're declaring use namespace cv, but not makeing use of it.
Drop the former. (Also for cv::img_hash.)

> +    // Convert an IplImage to an Mat Image for OpenCV (newer format)
> +    m1 = cv::cvarrToMat(&ipl1);
> +    m2 = cv::cvarrToMat(&ipl2);

How do you assure you're no passing uninitialized pointers to
cv::cvarrToMat()? I the pixfmt is rejected by
fill_iplimage_from_frame(), ipl1/2 aren't set. And you accept many more
pixfmts in the filter than fill_iplimage_from_frame() handles, right?

> +    // substantiate the hash type algorithm
> +    if (hash_type == COLORMOMENT) {
> +        algo = cv::img_hash::ColorMomentHash::create();
> +    } else if (hash_type == AVERAGE) {
> +        algo = cv::img_hash::AverageHash::create();
> +    } else if (hash_type == BLOCKMEAN1) {
> +        //BlockMeanHash support mode 0 and mode 1, they associate to
> +        //    //mode 1 and mode 2 of PHash library
> +        algo = cv::img_hash::BlockMeanHash::create(0);
> +    } else if (hash_type == BLOCKMEAN2) {
> +        algo = cv::img_hash::BlockMeanHash::create(1);
> +    } else if (hash_type == MARRHILDRETH) {
> +        algo = cv::img_hash::MarrHildrethHash::create();
> +    } else if (hash_type == RADIALVARIANCE) {
> +        algo = cv::img_hash::RadialVarianceHash::create();
> +    } else { // Default to PHash
> +        algo = cv::img_hash::PHash::create();
> +    }

Again, a switch/case would be easier to read.

> + * PHQM Perceptual Hash Quality Metric
> + * PHQM: Caculate the Image Hash Hamming Difference between two input videos.

"Calculate"

> +    {"scd_thresh", "Scene Change Detection Threshold. 0.5 default, 0.0-1.0",    OFFSET(scd_thresh),  AV_OPT_TYPE_DOUBLE,    {.dbl=0.5},    0, 1, FLAGS },

Defaults and ranges are self-documenting, no need to mention.

> +    { "hash_type", "options: phash, colormoment, average", OFFSET(hash_type), AV_OPT_TYPE_STRING, {.str = "phash"}, .flags = FLAGS },

This calls for proper options flags.

> +    double ret = 0;

Usually "0.", but shouldn't matter.

> +    double comp_mse[4], mse = 0, hd = 0;

Ditto.

> +    double hd_limit = 1000000;

Ditto.

> +        av_log(s, AV_LOG_WARNING, "ImgHashScene: n:%"PRId64"-%"PRId64" hd_avg:%0.3lf hd_min:%0.3lf hd_max:%0.3lf scd:%0.2lf\n",
> +               (s->nb_frames - s->nb_shd), s->nb_frames - 1, (s->shd / s->nb_shd), s->smin_hd, s->smax_hd, s->scene_score);

Is WARNING the correct level for such an info? I don't believe so.

> +    /* limit the highest value so we cut off at perceptual difference match */
> +    if (s->hash_type_i == PHASH || s->hash_type_i == AVERAGE)
> +        hd_limit = 5;
> +    else if (s->hash_type_i == MARRHILDRETH)
> +        hd_limit = 30;
> +    else if (s->hash_type_i == RADIALVARIANCE)
> +        hd_limit = 0.9;
> +    else if (s->hash_type_i == BLOCKMEAN1)
> +        hd_limit = 12;
> +    else if (s->hash_type_i == BLOCKMEAN2)
> +        hd_limit = 48;
> +    else if (s->hash_type_i == COLORMOMENT)
> +        hd_limit = 8;

switch/case?

> +        fprintf(s->stats_file,
> +                "n:%"PRId64" phqm:%0.3f phqm_min:%0.3f phqm_max:%0.3f sad:%0.2f ",
> +                s->nb_frames, hd, s->min_hd, s->max_hd, s->scene_score);
> +        fprintf(s->stats_file, "\n");

Trailing whitespace in log. Intended?

> +    if (s->hash_type) {
> +        if (!strcmp(s->hash_type, "phash"))
> +            s->hash_type_i = PHASH;
> +        else if (!strcmp(s->hash_type, "colormoment")) {
> +            s->hash_type_i = COLORMOMENT;
> +        } else if (!strcmp(s->hash_type, "average"))
> +            s->hash_type_i = AVERAGE;
> +        else if (!strcmp(s->hash_type, "marrhildreth"))
> +            s->hash_type_i = MARRHILDRETH;
> +        else if (!strcmp(s->hash_type, "radialvariance"))
> +            s->hash_type_i = RADIALVARIANCE;
> +        else if (!strcmp(s->hash_type, "blockmean1"))
> +            s->hash_type_i = BLOCKMEAN1;
> +        else if (!strcmp(s->hash_type, "blockmean2"))
> +            s->hash_type_i = BLOCKMEAN2;
> +        else {
> +            av_log(s, AV_LOG_ERROR, "Bad hash_type given %s\n", s->hash_type);
> +            return AVERROR(EINVAL);
> +        }

As mentioned above, ffmpeg's option system is prepared for this, no
need to parse yourself. See how other filters do it.

> +static int query_formats(AVFilterContext *ctx)
> +{
> +    static const enum AVPixelFormat pix_fmts[] = {
> +        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
> +#define PF_NOALPHA(suf) AV_PIX_FMT_YUV420##suf,  AV_PIX_FMT_YUV422##suf,  AV_PIX_FMT_YUV444##suf
> +#define PF_ALPHA(suf)   AV_PIX_FMT_YUVA420##suf, AV_PIX_FMT_YUVA422##suf, AV_PIX_FMT_YUVA444##suf
> +#define PF(suf)         PF_NOALPHA(suf), PF_ALPHA(suf)
> +        PF(P), PF(P9), PF(P10), PF_NOALPHA(P12), PF_NOALPHA(P14), PF(P16),
> +        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
> +        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
> +        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
> +        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
> +        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
> +        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
> +        AV_PIX_FMT_NONE
> +    };

> +    PHQMContext *s = ctx->priv;
> +    double average_max;
> +    unsigned sum;
> +    int j;

All unused?

Cheers,
Moritz