[FFmpeg-devel] [PATCH] avfilter: Added siti filter

Tue Jan 19 02:07:19 EET 2021

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.

Update: Fixed bracket style. I'm already adding the data to the frame's metadata, is the suggestion to remove the file option altogether?

---
 Changelog                |   1 +
 doc/filters.texi         |  25 ++++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/version.h    |   2 +-
 libavfilter/vf_siti.c    | 321 +++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 350 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c

diff --git a/Changelog b/Changelog
index 0b27c15122..5e1f107204 100644
--- a/Changelog
+++ b/Changelog
@@ -56,6 +56,7 @@ version <next>:
 - shufflepixels filter
 - tmidequalizer filter
 - estdif filter
+- siti filter


 version 4.3:
diff --git a/doc/filters.texi b/doc/filters.texi
index 3ce6699d7c..910558e162 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -18239,6 +18239,31 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu

 @end itemize

+ at anchor{siti}
+ at section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+Per frame metrics can be written into a file in csv format.
+
+It accepts the following option:
+
+ at table @option
+ at item stats_file
+Set the path to the file where per frame SI and TI metrics will be written. If no file
+is specified, only summary statistics will be printed to the console.
+ at end table
+
+ at subsection Examples
+ at itemize
+ at item
+To calculate SI/TI metrics and store per frame data to stats.csv:
+ at example
+ffmpeg -i input.mp4 -vf siti=stats_file='siti.csv' -f null -
+ at end example
+ at end itemize
+
 @anchor{smartblur}
 @section smartblur

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 44afa79963..7f96c22b12 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -414,6 +414,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 471844a603..0138c22cac 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -394,6 +394,7 @@ extern AVFilter ff_vf_signature;
 extern AVFilter ff_vf_smartblur;
 extern AVFilter ff_vf_sobel;
 extern AVFilter ff_vf_sobel_opencl;
+extern AVFilter ff_vf_siti;
 extern AVFilter ff_vf_split;
 extern AVFilter ff_vf_spp;
 extern AVFilter ff_vf_sr;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 2136235e54..e949e9bfb8 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"

 #define LIBAVFILTER_VERSION_MAJOR   7
-#define LIBAVFILTER_VERSION_MINOR  96
+#define LIBAVFILTER_VERSION_MINOR  97
 #define LIBAVFILTER_VERSION_MICRO 100


diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..de2868fd93
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2002 A'rpi
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    int nb_frames;
+    unsigned char *prev_frame;
+    double max_si;
+    double max_ti;
+    double min_si;
+    double min_ti;
+    double sum_si;
+    double sum_ti;
+    FILE *stats_file;
+    char *stats_file_str;
+    int full_range;
+} SiTiContext;
+
+static int query_formats(AVFilterContext *ctx) {
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static av_cold int init(AVFilterContext *ctx) {
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    if (s->stats_file_str) {
+        s->stats_file = fopen(s->stats_file_str, "w");
+        if (!s->stats_file) {
+            int err = AVERROR(errno);
+            char buf[128];
+            av_strerror(err, buf, sizeof(buf));
+            av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
+                   s->stats_file_str, buf);
+            return err;
+        }
+        fprintf(s->stats_file, "Frame,SI,TI\n");
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx) {
+    SiTiContext *s = ctx->priv;
+
+    double avg_si = s->sum_si / s->nb_frames;
+    double avg_ti = s->sum_ti / s->nb_frames;
+    av_log(ctx, AV_LOG_INFO,
+           "Summary:\nTotal frames: %d\n\n"
+           "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+           "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+           s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+    );
+
+    if (s->stats_file && s->stats_file != stdout)
+        fclose(s->stats_file);
+}
+
+static int config_input(AVFilterLink *inlink) {
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    size_t pixel_sz = s->pixel_depth==1? (size_t) sizeof(uint8_t) : (size_t) sizeof(uint16_t);
+    size_t data_sz = (size_t) s->width * pixel_sz * s->height;
+    s->prev_frame = av_malloc(data_sz);
+
+    return 0;
+}
+
+// Get frame data handling 8 and 10 bit formats
+static uint16_t get_frame_data(const unsigned char* src, int pixel_depth, int index) {
+    const uint16_t *src16 = (const uint16_t *)src;
+    if (pixel_depth == 2)
+        return src16[index];
+    return (uint16_t) src[index];
+}
+
+// Set frame data handling 8 and 10 bit formats
+static void set_frame_data(unsigned char* dst, int pixel_depth, int index, uint16_t data) {
+    uint16_t *dst16 = (uint16_t *)dst;
+    if (pixel_depth == 2)
+        dst16[index] = data;
+    else
+        dst[index] = (uint8_t) data;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame) {
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(uint16_t y, SiTiContext *s) {
+    if (s->full_range == 1)
+        return y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    double factor = s->pixel_depth == 1? 1 : 4;
+    double shift = 16 * factor;
+    double limit_upper = 235 * factor - shift;
+    double full_upper = 256 * factor - 1;
+    double limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (uint16_t) (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(const unsigned char* src, double* dst, int linesize, SiTiContext *s) {
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    for (int j=1; j<s->height-1; j++) {
+        for (int i=1; i<s->width-1; i++) {
+            double x_conv_sum = 0, y_conv_sum = 0;
+            for (int k=0; k<filter_size; k++) {
+                int ki = k % filter_width - 1;
+                int kj = floor(k / filter_width) - 1;
+                int index = (j + kj) * (linesize / s->pixel_depth) + (i + ki);
+                uint16_t data = convert_full_range(get_frame_data(src, s->pixel_depth, index), s);
+                x_conv_sum += data * X_FILTER[k];
+                y_conv_sum += data * Y_FILTER[k];
+            }
+            double gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum);
+            // Dst matrix is smaller than src since we ignore edges that can't be convolved
+            dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient;
+        }
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(const unsigned char* curr, double* motion_matrix,
+                             int linesize, SiTiContext *s) {
+    for (int j=0; j<s->height; j++) {
+        for (int i=0; i<s->width; i++) {
+            double motion = 0;
+            int curr_index = j * (linesize / s->pixel_depth) + i;
+            int prev_index = j * s->width + i;
+            uint16_t curr_data = convert_full_range(get_frame_data(curr, s->pixel_depth, curr_index), s);
+
+            // Previous frame is already converted to full range
+            if (s->nb_frames > 1)
+                motion = curr_data - get_frame_data(s->prev_frame, s->pixel_depth, prev_index);
+            set_frame_data(s->prev_frame, s->pixel_depth, prev_index, curr_data);
+            motion_matrix[j * s->width + i] = motion;
+        }
+    }
+}
+
+static double std_deviation(double* img_metrics, int width, int height) {
+    double size = height * width;
+    double mean_sum = 0;
+    for (int j=0; j<height; j++)
+        for (int i=0; i<width; i++)
+            mean_sum += img_metrics[j * width + i];
+
+    double mean = mean_sum / size;
+
+    double sqr_diff_sum = 0;
+    for (int j=0; j<height; j++) {
+        for (int i=0; i<width; i++) {
+            double mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff_sum += (mean_diff * mean_diff);
+        }
+    }
+    double variance = sqr_diff_sum / size;
+    return sqrt(variance);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d) {
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame) {
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+
+    // Gradient matrix will not include the input frame's edges
+    size_t gradient_data_sz = (size_t) (s->width - 2) * sizeof(double) * (s->height - 2);
+    double *gradient_matrix = av_malloc(gradient_data_sz);
+    size_t motion_data_sz = (size_t) s->width * sizeof(double) * s->height;
+    double *motion_matrix = av_malloc(motion_data_sz);
+    if (!gradient_matrix || !motion_matrix) {
+        av_frame_free(&frame);
+        return AVERROR(ENOMEM);
+    }
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(frame->data[0], gradient_matrix, frame->linesize[0], s);
+    calculate_motion(frame->data[0], motion_matrix, frame->linesize[0], s);
+    double si = std_deviation(gradient_matrix, s->width - 2, s->height - 2);
+    double ti = std_deviation(motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si = fmax(si, s->max_si);
+    s->max_ti = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si = s->nb_frames == 1? si : fmin(si, s->min_si);
+    s->min_ti = s->nb_frames == 1? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    // Print per frame csv data to file
+    if (s->stats_file)
+        fprintf(s->stats_file, "%d,%f,%f\n", s->nb_frames, si, ti);
+
+    av_free(gradient_matrix);
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    {"stats_file", "Set file where to store per-frame si-ti scores", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial info (SI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = avfilter_vf_siti_inputs,
+    .outputs       = avfilter_vf_siti_outputs,
+};
--
2.13.5
________________________________
From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> on behalf of Lynne <dev at lynne.ee>
Sent: Thursday, January 14, 2021 9:31 PM
To: FFmpeg development discussions and patches <ffmpeg-devel at ffmpeg.org>
Subject: Re: [FFmpeg-devel] [PATCH] avfilter: Added siti filter

Jan 15, 2021, 06:06 by borbarak at fb.com:

>
> Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
> in ITU-T P.910: Subjective video quality assessment methods for multimedia
> applications.
> ---
>  Changelog                |   1 +
>  doc/filters.texi         |  25 ++++
>  libavfilter/Makefile     |   1 +
>  libavfilter/allfilters.c |   1 +
>  libavfilter/version.h    |   2 +-
>  libavfilter/vf_siti.c    | 359 +++++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 388 insertions(+), 1 deletion(-)
>  create mode 100644 libavfilter/vf_siti.c
>
> +// Determine whether the video is in full or limited range. If not defined, assume limited.
> +static int is_full_range(AVFrame* frame)
> +{
> +    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
> +    {
> +        // If color range not specified, fallback to pixel format
> +        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
> +    }
> +    return frame->color_range == AVCOL_RANGE_JPEG;
> +}
> +
> +// Check frame's color range and convert to full range if needed
> +static uint16_t convert_full_range(uint16_t y, SiTiContext *s)
> +{
> +    if (s->full_range == 1)
> +    {
> +        return y;
> +    }
> +
> +    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
> +    double factor = s->pixel_depth == 1? 1 : 4;
> +    double shift = 16 * factor;
> +    double limit_upper = 235 * factor - shift;
> +    double full_upper = 256 * factor - 1;
> +    double limit_y = fmin(fmax(y - shift, 0), limit_upper);
> +    return (uint16_t) (full_upper * limit_y / limit_upper);
> +}
> +
> +// Applies sobel convolution
> +static void convolve_sobel(const unsigned char* src, double* dst, int linesize, SiTiContext *s)
> +{
> +    int filter_width = 3;
> +    int filter_size = filter_width * filter_width;
> +    for (int j=1; j<s->height-1; j++)
> +    {
> +        for (int i=1; i<s->width-1; i++)
> +        {
> +            double x_conv_sum = 0, y_conv_sum = 0;
> +            for (int k=0; k<filter_size; k++)
> +            {
> +                int ki = k % filter_width - 1;
> +                int kj = floor(k / filter_width) - 1;
> +                int index = (j + kj) * (linesize / s->pixel_depth) + (i + ki);
> +                uint16_t data = convert_full_range(get_frame_data(src, s->pixel_depth, index), s);
> +                x_conv_sum += data * X_FILTER[k];
> +                y_conv_sum += data * Y_FILTER[k];
> +            }
> +            double gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum);
> +            // Dst matrix is smaller than src since we ignore edges that can't be convolved
> +            dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient;
> +        }
> +    }
> +}
> +
> +// Calculate pixel difference between current and previous frame, and update previous
> +static void calculate_motion(const unsigned char* curr, double* motion_matrix,
> +                             int linesize, SiTiContext *s)
> +{
> +    for (int j=0; j<s->height; j++)
> +    {
> +        for (int i=0; i<s->width; i++)
> +        {
> +            double motion = 0;
> +            int curr_index = j * (linesize / s->pixel_depth) + i;
> +            int prev_index = j * s->width + i;
> +            uint16_t curr_data = convert_full_range(get_frame_data(curr, s->pixel_depth, curr_index), s);
> +
> +            if (s->nb_frames > 1)
> +            {
> +                // Previous frame is already converted to full range
> +                motion = curr_data - get_frame_data(s->prev_frame, s->pixel_depth, prev_index);
> +            }
> +            set_frame_data(s->prev_frame, s->pixel_depth, prev_index, curr_data);
> +            motion_matrix[j * s->width + i] = motion;
> +        }
> +    }
> +}
> +
> +static double std_deviation(double* img_metrics, int width, int height)
> +{
> +    double size = height * width;
> +
> +    double mean_sum = 0;
> +    for (int j=0; j<height; j++)
> +    {
> +        for (int i=0; i<width; i++)
> +        {
> +            mean_sum += img_metrics[j * width + i];
> +        }
> +    }
> +    double mean = mean_sum / size;
> +
> +    double sqr_diff_sum = 0;
> +    for (int j=0; j<height; j++)
> +    {
> +        for (int i=0; i<width; i++)
> +        {
> +            double mean_diff = img_metrics[j * width + i] - mean;
> +            sqr_diff_sum += (mean_diff * mean_diff);
> +        }
> +    }
>

The coding style mismatches the project's style.
We don't put opening brackets on a new line and in
case of single-line blocks we leave the brackets off entirely.


> +
> +#define OFFSET(x) offsetof(SiTiContext, x)
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> +
> +static const AVOption siti_options[] = {
> +    {"stats_file", "Set file where to store per-frame si-ti scores", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
> +    { NULL }
> +};
>

Make it output the data to the frame metadata instead. That's how
we usually deal with data like this.
The 'metadata' filter can then be used to save the metadata to a file
or alter it.

Just an initial review.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel at ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".