[FFmpeg-devel] [PATCH 5/5] ffmpeg: Add {h, v}scale argument to display_matrix option to allow for scaling via the display matrix

Tue Aug 16 20:26:49 EEST 2022

Thilo Borgmann:
> ---
>  doc/ffmpeg.texi         |  4 ++++
>  fftools/ffmpeg_filter.c | 15 +++++++++++++++
>  fftools/ffmpeg_opt.c    | 10 ++++++++++
>  libavutil/display.c     | 21 +++++++++++++++++++++
>  libavutil/display.h     | 28 ++++++++++++++++++++++++++++
>  5 files changed, 78 insertions(+)
> 
> diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
> index 5d3e3b3052..52cca7a407 100644
> --- a/doc/ffmpeg.texi
> +++ b/doc/ffmpeg.texi
> @@ -923,6 +923,10 @@ The @code{-autorotate} logic will be affected.
>  @item hflip=@var{[0,1]}
>  @item vflip=@var{[0,1]}
>  Set a horizontal or vertical flip.
> + at item hscale=@var{[0,2]}
> +Set a horizontal scaling by factor of the given floating-point value.
> + at item vscale=@var{[0,2]}
> +Set a vertical scaling by factor of the given floating-point value.
>  @end table
>  
>  @item -vn (@emph{input/output})
> diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
> index f9ae76f76d..0759c08687 100644
> --- a/fftools/ffmpeg_filter.c
> +++ b/fftools/ffmpeg_filter.c
> @@ -778,9 +778,24 @@ static int configure_input_video_filter(FilterGraph *fg, InputFilter *ifilter,
>      if (ist->autorotate && !(desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) {
>          int32_t *displaymatrix = ifilter->displaymatrix;
>          double theta;
> +        double hscale = 1.0f;
> +        double vscale = 1.0f;
>  
>          if (!displaymatrix)
>              displaymatrix = (int32_t *)av_stream_get_side_data(ist->st, AV_PKT_DATA_DISPLAYMATRIX, NULL);
> +
> +        if (displaymatrix) {
> +            hscale = av_display_hscale_get(displaymatrix);
> +            vscale = av_display_vscale_get(displaymatrix);
> +
> +            if (hscale != 1.0f || vscale != 1.0f) {
> +                char scale_buf[128];
> +                snprintf(scale_buf, sizeof(scale_buf), "%f*iw:%f*ih", hscale, vscale);
> +                ret = insert_filter(&last_filter, &pad_idx, "scale", scale_buf);
> +            }
> +        }
> +
> +
>          theta = get_rotation(displaymatrix);
>  
>          if (fabs(theta - 90) < 1.0) {
> diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
> index f6551621c3..4fae6cbfbf 100644
> --- a/fftools/ffmpeg_opt.c
> +++ b/fftools/ffmpeg_opt.c
> @@ -121,6 +121,8 @@ static const char *const opt_name_bits_per_raw_sample[]       = {"bits_per_raw_s
>          double  rotation;
>          int     hflip;
>          int     vflip;
> +        double  hscale;
> +        double  vscale;
>      };
>  #define OFFSET(x) offsetof(struct display_matrix_s, x)
>      static const AVOption display_matrix_args[] = {
> @@ -130,6 +132,10 @@ static const char *const opt_name_bits_per_raw_sample[]       = {"bits_per_raw_s
>              { .i64 = -1 }, 0, 1, AV_OPT_FLAG_ARGUMENT},
>          { "vflip",    "set vflip", OFFSET(vflip),    AV_OPT_TYPE_BOOL,
>              { .i64 = -1 }, 0, 1, AV_OPT_FLAG_ARGUMENT},
> +        { "hscale", "set scale factor", OFFSET(hscale), AV_OPT_TYPE_DOUBLE,
> +            { .dbl = 1.0f }, 0.0f, 2.0f, AV_OPT_FLAG_ARGUMENT},
> +        { "vscale", "set scale factor", OFFSET(vscale), AV_OPT_TYPE_DOUBLE,
> +            { .dbl = 1.0f }, 0.0f, 2.0f, AV_OPT_FLAG_ARGUMENT},
>          { NULL },
>      };
>      static const AVClass class_display_matrix_args = {
> @@ -848,6 +854,8 @@ static void add_display_matrix_to_stream(OptionsContext *o,
>          .rotation = DBL_MAX,
>          .hflip    = -1,
>          .vflip    = -1,
> +        .hscale    = 1.0f,
> +        .vscale    = 1.0f,
>      };
>  
>      AVDictionary *global_args = NULL;
> @@ -903,6 +911,8 @@ static void add_display_matrix_to_stream(OptionsContext *o,
>      av_display_matrix_flip((int32_t *)buf,
>                             hflip_set ? test_args.hflip : 0,
>                             vflip_set ? test_args.vflip : 0);
> +
> +    av_display_matrix_scale((int32_t *)buf, test_args.hscale, test_args.vscale);
>  }
>  
>  
> diff --git a/libavutil/display.c b/libavutil/display.c
> index d31061283c..b89763ff48 100644
> --- a/libavutil/display.c
> +++ b/libavutil/display.c
> @@ -28,9 +28,11 @@
>  
>  // fixed point to double
>  #define CONV_FP(x) ((double) (x)) / (1 << 16)
> +#define CONV_FP2(x) ((double) (x)) / (1 << 30)
>  
>  // double to fixed point
>  #define CONV_DB(x) (int32_t) ((x) * (1 << 16))
> +#define CONV_DB2(x) (int32_t) ((x) * (1 << 30))
>  
>  double av_display_rotation_get(const int32_t matrix[9])
>  {
> @@ -48,6 +50,17 @@ double av_display_rotation_get(const int32_t matrix[9])
>      return -rotation;
>  }
>  
> +double av_display_hscale_get(const int32_t matrix[9])
> +{
> +    return fabs(CONV_FP2(matrix[2]));
> +}
> +
> +double av_display_vscale_get(const int32_t matrix[9])
> +{
> +    return fabs(CONV_FP2(matrix[5]));
> +}
> +
> +#include <stdio.h>
>  void av_display_rotation_set(int32_t matrix[9], double angle)
>  {
>      double radians = -angle * M_PI / 180.0f;
> @@ -60,6 +73,8 @@ void av_display_rotation_set(int32_t matrix[9], double angle)
>      matrix[1] = CONV_DB(-s);
>      matrix[3] = CONV_DB(s);
>      matrix[4] = CONV_DB(c);
> +    matrix[2] = 1 << 30;
> +    matrix[5] = 1 << 30;

?

>      matrix[8] = 1 << 30;
>  }
>  
> @@ -72,3 +87,9 @@ void av_display_matrix_flip(int32_t matrix[9], int hflip, int vflip)
>          for (i = 0; i < 9; i++)
>              matrix[i] *= flip[i % 3];
>  }
> +
> +void av_display_matrix_scale(int32_t matrix[9], double hscale, double vscale)
> +{
> +    matrix[2] = CONV_DB2(CONV_FP2(matrix[2]) * hscale);
> +    matrix[5] = CONV_DB2(CONV_FP2(matrix[5]) * vscale);

matrix[2] and matrix[5] correspond to u and v in the matrix in
display.h. These values need to be zero or you don't have an affine, but
a projective transformation.

> +}
> diff --git a/libavutil/display.h b/libavutil/display.h
> index 31d8bef361..b875e1cfdd 100644
> --- a/libavutil/display.h
> +++ b/libavutil/display.h
> @@ -86,6 +86,26 @@
>   */
>  double av_display_rotation_get(const int32_t matrix[9]);
>  
> +/**
> + * Extract the horizontal scaling component of the transformation matrix.
> + *
> + * @param matrix the transformation matrix
> + * @return the horizontal scaling by which the transformation matrix scales the frame
> + *         in the horizontal direction. The scaling factor will be in the range
> + *         [0.0, 2.0].
> + */
> +double av_display_hscale_get(const int32_t matrix[9]);
> +
> +/**
> + * Extract the vertical scaling component of the transformation matrix.
> + *
> + * @param matrix the transformation matrix
> + * @return the vertical scaling by which the transformation matrix scales the frame
> + *         in the vertical direction. The scaling factor will be in the range
> + *         [0.0, 2.0].
> + */
> +double av_display_vscale_get(const int32_t matrix[9]);
> +
>  /**
>   * Initialize a transformation matrix describing a pure clockwise
>   * rotation by the specified angle (in degrees).
> @@ -105,6 +125,14 @@ void av_display_rotation_set(int32_t matrix[9], double angle);
>   */
>  void av_display_matrix_flip(int32_t matrix[9], int hflip, int vflip);
>  
> +/**
> + * Scale the input matrix horizontally and/or vertically.
> + *
> + * @param matrix an allocated transformation matrix
> + * @param hscale whether the matrix should be scaled horizontally
> + * @param vscale whether the matrix should be scaled vertically
> + */
> +void av_display_matrix_scale(int32_t matrix[9], double hscale, double vscale);
>  /**
>   * @}
>   * @}

1. Once again: Separate lavu patches from fftools patches.
2. What makes you believe that every matrix has something like a
horizontal scaling factor? What about rotations by 90°?
3. What makes you believe that 2 and 5 are the right elements? They are
not. These elements must be zero or you have a projective
transformation. If you made an off-by-one error and thought 1 and 4
(i.e. b and d in the notation of display.h), it is still wrong, as
scaling in one direction needs to scale two matrix elements.
4. Even if one restricts oneself to simple diagonal matrices (i.e. b = c
= u = v = 0), your formulae for av_display_[hv]scale_get are wrong,
because they use the wrong matrix elements and because you completely
ignore w, i.e. matrix[8]. And of course the scaling range can be much
greater than just [0, 2].
5. Finally, missing APIchanges and version entry stuff.

- Andreas