[FFmpeg-devel] [PATCH] avfilter: Added siti filter

Fri Jan 15 07:31:17 EET 2021

Jan 15, 2021, 06:06 by borbarak at fb.com:

>
> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
> in ITU-T P.910: Subjective video quality assessment methods for multimedia
> applications.
> ---
>  Changelog                |   1 +
>  doc/filters.texi         |  25 ++++
>  libavfilter/Makefile     |   1 +
>  libavfilter/allfilters.c |   1 +
>  libavfilter/version.h    |   2 +-
>  libavfilter/vf_siti.c    | 359 +++++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 388 insertions(+), 1 deletion(-)
>  create mode 100644 libavfilter/vf_siti.c
>
> +// Determine whether the video is in full or limited range. If not defined, assume limited.
> +static int is_full_range(AVFrame* frame)
> +{
> +    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
> +    {
> +        // If color range not specified, fallback to pixel format
> +        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
> +    }
> +    return frame->color_range == AVCOL_RANGE_JPEG;
> +}
> +
> +// Check frame's color range and convert to full range if needed
> +static uint16_t convert_full_range(uint16_t y, SiTiContext *s)
> +{
> +    if (s->full_range == 1)
> +    {
> +        return y;
> +    }
> +
> +    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
> +    double factor = s->pixel_depth == 1? 1 : 4;
> +    double shift = 16 * factor;
> +    double limit_upper = 235 * factor - shift;
> +    double full_upper = 256 * factor - 1;
> +    double limit_y = fmin(fmax(y - shift, 0), limit_upper);
> +    return (uint16_t) (full_upper * limit_y / limit_upper);
> +}
> +
> +// Applies sobel convolution
> +static void convolve_sobel(const unsigned char* src, double* dst, int linesize, SiTiContext *s)
> +{
> +    int filter_width = 3;
> +    int filter_size = filter_width * filter_width;
> +    for (int j=1; j<s->height-1; j++)
> +    {
> +        for (int i=1; i<s->width-1; i++)
> +        {
> +            double x_conv_sum = 0, y_conv_sum = 0;
> +            for (int k=0; k<filter_size; k++)
> +            {
> +                int ki = k % filter_width - 1;
> +                int kj = floor(k / filter_width) - 1;
> +                int index = (j + kj) * (linesize / s->pixel_depth) + (i + ki);
> +                uint16_t data = convert_full_range(get_frame_data(src, s->pixel_depth, index), s);
> +                x_conv_sum += data * X_FILTER[k];
> +                y_conv_sum += data * Y_FILTER[k];
> +            }
> +            double gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum);
> +            // Dst matrix is smaller than src since we ignore edges that can't be convolved
> +            dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient;
> +        }
> +    }
> +}
> +
> +// Calculate pixel difference between current and previous frame, and update previous
> +static void calculate_motion(const unsigned char* curr, double* motion_matrix,
> +                             int linesize, SiTiContext *s)
> +{
> +    for (int j=0; j<s->height; j++)
> +    {
> +        for (int i=0; i<s->width; i++)
> +        {
> +            double motion = 0;
> +            int curr_index = j * (linesize / s->pixel_depth) + i;
> +            int prev_index = j * s->width + i;
> +            uint16_t curr_data = convert_full_range(get_frame_data(curr, s->pixel_depth, curr_index), s);
> +
> +            if (s->nb_frames > 1)
> +            {
> +                // Previous frame is already converted to full range
> +                motion = curr_data - get_frame_data(s->prev_frame, s->pixel_depth, prev_index);
> +            }
> +            set_frame_data(s->prev_frame, s->pixel_depth, prev_index, curr_data);
> +            motion_matrix[j * s->width + i] = motion;
> +        }
> +    }
> +}
> +
> +static double std_deviation(double* img_metrics, int width, int height)
> +{
> +    double size = height * width;
> +
> +    double mean_sum = 0;
> +    for (int j=0; j<height; j++)
> +    {
> +        for (int i=0; i<width; i++)
> +        {
> +            mean_sum += img_metrics[j * width + i];
> +        }
> +    }
> +    double mean = mean_sum / size;
> +
> +    double sqr_diff_sum = 0;
> +    for (int j=0; j<height; j++)
> +    {
> +        for (int i=0; i<width; i++)
> +        {
> +            double mean_diff = img_metrics[j * width + i] - mean;
> +            sqr_diff_sum += (mean_diff * mean_diff);
> +        }
> +    }
>

The coding style mismatches the project's style.
We don't put opening brackets on a new line and in
case of single-line blocks we leave the brackets off entirely.

> +
> +#define OFFSET(x) offsetof(SiTiContext, x)
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> +
> +static const AVOption siti_options[] = {
> +    {"stats_file", "Set file where to store per-frame si-ti scores", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
> +    { NULL }
> +};
>

Make it output the data to the frame metadata instead. That's how
we usually deal with data like this.
The 'metadata' filter can then be used to save the metadata to a file
or alter it.

Just an initial review.