[FFmpeg-devel] [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit

Tue Sep 19 11:02:56 EEST 2017

2017-09-19 4:09 GMT+02:00 James Almer <jamrial at gmail.com>:

> On 9/18/2017 10:41 PM, Thomas Mundt wrote:
> > I tried to set up MIPS compiler for two days on windows and linux without
> > success.
> > Now I try it blind. This solution is based on the first suggestion James
> > gave me at IRC.
> > There might be room for improvement and an alternative solution with
> > AV_RL16() / AV_WL16().
> > I used av_le2ne16() because it will be ignored for little endian.
> >
> > Regards,
> > Thomas
>
> > From a2be5859266b1a2f7048b81ced6770ab4b90a5a4 Mon Sep 17 00:00:00 2001
> > From: Thomas Mundt <tmundt75 at gmail.com>
> > Date: Tue, 19 Sep 2017 00:25:25 +0200
> > Subject: [PATCH 3/3 v2] avfilter/interlace: add support for 10 and 12 bit
> >
> > Signed-off-by: Thomas Mundt <tmundt75 at gmail.com>
> > ---
> >  libavfilter/interlace.h                        |  5 +-
> >  libavfilter/tinterlace.h                       |  5 +-
> >  libavfilter/vf_interlace.c                     | 92
> ++++++++++++++++++++++----
> >  libavfilter/vf_tinterlace.c                    | 73 ++++++++++++++++++--
> >  libavfilter/x86/vf_interlace.asm               | 80
> ++++++++++++++++++++--
> >  libavfilter/x86/vf_interlace_init.c            | 51 ++++++++++----
> >  libavfilter/x86/vf_tinterlace_init.c           | 51 ++++++++++----
> >  tests/ref/fate/filter-pixfmts-tinterlace_cvlpf | 11 +++
> >  tests/ref/fate/filter-pixfmts-tinterlace_merge | 11 +++
> >  tests/ref/fate/filter-pixfmts-tinterlace_pad   | 11 +++
> >  tests/ref/fate/filter-pixfmts-tinterlace_vlpf  | 11 +++
> >  11 files changed, 345 insertions(+), 56 deletions(-)
> >
> > diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
> > index 2101b79..90a0198 100644
> > --- a/libavfilter/interlace.h
> > +++ b/libavfilter/interlace.h
> > @@ -25,9 +25,11 @@
> >  #ifndef AVFILTER_INTERLACE_H
> >  #define AVFILTER_INTERLACE_H
> >
> > +#include "libavutil/bswap.h"
> >  #include "libavutil/common.h"
> >  #include "libavutil/imgutils.h"
> >  #include "libavutil/opt.h"
> > +#include "libavutil/pixdesc.h"
> >
> >  #include "avfilter.h"
> >  #include "formats.h"
> > @@ -55,8 +57,9 @@ typedef struct InterlaceContext {
> >      enum ScanMode scan;    // top or bottom field first scanning
> >      int lowpass;           // enable or disable low pass filtering
> >      AVFrame *cur, *next;   // the two frames from which the new one is
> obtained
> > +    const AVPixFmtDescriptor *csp;
> >      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const
> uint8_t *srcp,
> > -                         ptrdiff_t mref, ptrdiff_t pref);
> > +                         ptrdiff_t mref, ptrdiff_t pref, int clip_max);
> >  } InterlaceContext;
> >
> >  void ff_interlace_init_x86(InterlaceContext *interlace);
> > diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h
> > index cc13a6c..b5c39aa 100644
> > --- a/libavfilter/tinterlace.h
> > +++ b/libavfilter/tinterlace.h
> > @@ -27,7 +27,9 @@
> >  #ifndef AVFILTER_TINTERLACE_H
> >  #define AVFILTER_TINTERLACE_H
> >
> > +#include "libavutil/bswap.h"
> >  #include "libavutil/opt.h"
> > +#include "libavutil/pixdesc.h"
> >  #include "drawutils.h"
> >  #include "avfilter.h"
> >
> > @@ -60,8 +62,9 @@ typedef struct TInterlaceContext {
> >      int black_linesize[4];
> >      FFDrawContext draw;
> >      FFDrawColor color;
> > +    const AVPixFmtDescriptor *csp;
> >      void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t
> *srcp,
> > -                         ptrdiff_t mref, ptrdiff_t pref);
> > +                         ptrdiff_t mref, ptrdiff_t pref, int clip_max);
> >  } TInterlaceContext;
> >
> >  void ff_tinterlace_init_x86(TInterlaceContext *interlace);
> > diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
> > index 55bf782..bfba054 100644
> > --- a/libavfilter/vf_interlace.c
> > +++ b/libavfilter/vf_interlace.c
> > @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = {
> >  AVFILTER_DEFINE_CLASS(interlace);
> >
> >  static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
> > -                           const uint8_t *srcp,
> > -                           ptrdiff_t mref, ptrdiff_t pref)
> > +                           const uint8_t *srcp, ptrdiff_t mref,
> > +                           ptrdiff_t pref, int clip_max)
> >  {
> >      const uint8_t *srcp_above = srcp + mref;
> >      const uint8_t *srcp_below = srcp + pref;
> > @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t
> linesize,
> >      }
> >  }
> >
> > +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize,
> > +                              const uint8_t *src8, ptrdiff_t mref,
> > +                              ptrdiff_t pref, int clip_max)
> > +{
> > +    uint16_t *dstp = (uint16_t *)dst8;
> > +    const uint16_t *srcp = (const uint16_t *)src8;
> > +    const uint16_t *srcp_above = srcp + mref / 2;
> > +    const uint16_t *srcp_below = srcp + pref / 2;
> > +    int i;
> > +    for (i = 0; i < linesize; i++) {
> > +        // this calculation is an integer representation of
> > +        // '0.5 * current + 0.25 * above + 0.25 * below'
> > +        // '1 +' is for rounding.
> > +        dstp[i] = av_le2ne16((1 + av_le2ne16(srcp[i]) +
> av_le2ne16(srcp[i])
> > +                                + av_le2ne16(srcp_above[i])
> > +                                + av_le2ne16(srcp_below[i])) >> 2);
>
> This might work (And Michael will be able to confirm that if
> filter-pixfmts-tinterlace_vlpf passes)...
>
> > +    }
> > +}
> > +
> >  static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
> > -                                   const uint8_t *srcp,
> > -                                   ptrdiff_t mref, ptrdiff_t pref)
> > +                                   const uint8_t *srcp, ptrdiff_t mref,
> > +                                   ptrdiff_t pref, int clip_max)
> >  {
> >      const uint8_t *srcp_above = srcp + mref;
> >      const uint8_t *srcp_below = srcp + pref;
> > @@ -103,11 +122,46 @@ static void lowpass_line_complex_c(uint8_t *dstp,
> ptrdiff_t linesize,
> >      }
> >  }
> >
> > +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t
> linesize,
> > +                                      const uint8_t *src8, ptrdiff_t
> mref,
> > +                                      ptrdiff_t pref, int clip_max)
> > +{
> > +    uint16_t *dstp = (uint16_t *)dst8;
> > +    const uint16_t *srcp = (const uint16_t *)src8;
> > +    const uint16_t *srcp_above = srcp + mref / 2;
> > +    const uint16_t *srcp_below = srcp + pref / 2;
> > +    const uint16_t *srcp_above2 = srcp + mref;
> > +    const uint16_t *srcp_below2 = srcp + pref;
> > +    int i, srcp_x, srcp_ab;
> > +    for (i = 0; i < linesize; i++) {
> > +        // this calculation is an integer representation of
> > +        // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 *
> above2 - 0.125 * below2'
> > +        // '4 +' is for rounding.
> > +        srcp_x = av_le2ne16(srcp[i]) << 1;
> > +        srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
> > +        dstp[i] = av_le2ne16(av_clip((4 + ((av_le2ne16(srcp[i]) +
> srcp_x + srcp_ab) << 1)
> > +                                     - av_le2ne16(srcp_above2[i])
> > +                                     - av_le2ne16(srcp_below2[i])) >>
> 3, 0, clip_max));
> > +        // Prevent over-sharpening:
> > +        // dst must not exceed src when the average of above and below
> > +        // is less than src. And the other way around.
> > +        if (srcp_ab > srcp_x) {
> > +            if (av_le2ne16(dstp[i]) < av_le2ne16(srcp[i]))
> > +                dstp[i] = srcp[i];
> > +        } else if (av_le2ne16(dstp[i]) > av_le2ne16(srcp[i]))
> > +            dstp[i] = srcp[i];
>
> ...but chances are this over-sharpening prevention part will not. You're
> loading in native endianness here before storing. You only byteswapped
> for the comparison.
>
> Also, consider using local variables inside the for loop. You're loading
> scrp[i] and dstp[i] several times per iteration.
>

Okay, then I would do:
    int i, dstp_le, srcp_le, srcp_x, srcp_ab;
    for (i = 0; i < linesize; i++) {
        srcp_le = av_le2ne16(srcp[i]);
        srcp_x = srcp_le << 1;
        srcp_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
        dstp_le = av_clip((4 + (srcp_le + srcp_x + srcp_ab) << 1)
                                 - av_le2ne16(srcp_above2[i])
                                 - av_le2ne16(srcp_below2[i])) >> 3, 0,
clip_max);
        if (srcp_ab > srcp_x) {
            if (dstp_le < srcp_le)
                dstp[i] = srcp[i];
            else
                dstp[i] = av_le2ne16(dstp_le);
        } else if (dstp_le > srcp_le) {
            dstp[i] = srcp[i];
        } else
            dstp[i] = av_le2ne16(dstp_le);
    }
Shall I do dstp[i] = av_le2ne16(srcp_le); instead of dstp[i] = srcp[i]; ?
Don´t two consecutive byte swaps cancel each other out?