[FFmpeg-devel] [PATCH] avcodec/dvenc: support encoding dvcprohd

Thu Sep 19 22:33:51 EEST 2019

On Wed, Sep 11, 2019 at 12:29:57PM -0700, Baptiste Coudurier wrote:
> ---
>  libavcodec/dv.h    |   1 +
>  libavcodec/dvenc.c | 576 ++++++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 522 insertions(+), 55 deletions(-)

a fate test should be added for this if its not already planed or done


> 
> diff --git a/libavcodec/dv.h b/libavcodec/dv.h
> index 7ef5b7c552..0205d72347 100644
> --- a/libavcodec/dv.h
> +++ b/libavcodec/dv.h
> @@ -83,6 +83,7 @@ enum dv_pack_type {
>  
>  #define DV_PROFILE_IS_HD(p) ((p)->video_stype & 0x10)
>  #define DV_PROFILE_IS_1080i50(p) (((p)->video_stype == 0x14) && ((p)->dsf == 1))
> +#define DV_PROFILE_IS_1080i60(p) (((p)->video_stype == 0x14) && ((p)->dsf == 0))
>  #define DV_PROFILE_IS_720p50(p)  (((p)->video_stype == 0x18) && ((p)->dsf == 1))
>  
>  /**
> diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c
> index ce2fc75daa..b7a771fa18 100644
> --- a/libavcodec/dvenc.c
> +++ b/libavcodec/dvenc.c
> @@ -60,10 +60,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
>          ff_dv_print_profiles(avctx, AV_LOG_ERROR);
>          return AVERROR(EINVAL);
>      }
> -    if (avctx->height > 576) {
> -        av_log(avctx, AV_LOG_ERROR, "DVCPRO HD encoding is not supported.\n");
> -        return AVERROR_PATCHWELCOME;
> -    }
> +
>      ret = ff_dv_init_dynamic_tables(s, s->sys);
>      if (ret < 0) {
>          av_log(avctx, AV_LOG_ERROR, "Error initializing work tables.\n");
> @@ -90,6 +87,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
>  }
>  
>  /* bit budget for AC only in 5 MBs */
> +static const int vs_total_ac_bits_hd = (68 * 6 + 52*2) * 5;
>  static const int vs_total_ac_bits = (100 * 4 + 68 * 2) * 5;
>  static const int mb_area_start[5] = { 1, 6, 21, 43, 64 };
>  
> @@ -158,6 +156,11 @@ typedef struct EncBlockInfo {
>      uint8_t  sign[64];
>      uint8_t  partial_bit_count;
>      uint32_t partial_bit_buffer; /* we can't use uint16_t here */
> +    /* used by DV100 only: a copy of the weighted and classified but
> +       not-yet-quantized AC coefficients. This is necessary for
> +       re-quantizing at different steps. */
> +    int16_t  save[64];
> +    int      min_qlevel; /* DV100 only: minimum qlevel (for AC coefficients >255) */
>  } EncBlockInfo;
>  
>  static av_always_inline PutBitContext *dv_encode_ac(EncBlockInfo *bi,
> @@ -243,13 +246,135 @@ static const int dv_weight_248[64] = {
>      170627, 170627, 153560, 153560, 165371, 165371, 144651, 144651,
>  };
>  
> -static av_always_inline int dv_init_enc_block(EncBlockInfo *bi, uint8_t *data,
> -                                              ptrdiff_t linesize,
> -                                              DVVideoContext *s, int bias)

> +/* setting this to 1 results in a faster codec but
> + * somewhat lower image quality */
> +#define DV100_SACRIFICE_QUALITY_FOR_SPEED 1
> +#define DV100_ENABLE_FINER 1

These could be changed to runtime user options
[...]

> +/* how much to increase qlevel when we need to compress more coarsely */
> +/* this is a tradeoff between encoding speed and space efficiency */
> +/* the highest-quality, lowest-speed option it to use 1 for all qlevels. */
> +static const uint8_t dv100_qstep_delta[16] = {
> +#if DV100_SACRIFICE_QUALITY_FOR_SPEED
> +    0, 2, 0, 5, 0, 0, 0, 0, 1, 6, 0, 0, 0, 0, 0, 0,
> +#else
> +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
> +#endif
> +};

This is unused


> +
> +static const int dv100_min_bias = 0;
> +static const int dv100_chroma_bias = 0;
> +static const int dv100_starting_qno = 1;

> +static const int dv100_min_qno = 1;

unused
[...]
>  
> +/* this function just copies the DCT coefficients and performs
> +   the initial (non-)quantization. */
> +static inline void dv_set_class_number_hd(DVVideoContext *s,
> +                                          int16_t *blk, EncBlockInfo *bi,
> +                                          const uint8_t *zigzag_scan,
> +                                          const int *weight, int bias)
> +{
> +    int i, max = 0;
> +
> +    /* the first quantization (none at all) */
> +    bi->area_q[0] = 1;
> +

> +    /* LOOP1: weigh AC components and store to save[] */
> +    /* (i=0 is the DC component; we only include it to make the
> +       number of loop iterations even, for future possible SIMD optimization) */
> +    for (i = 0; i < 64; i += 2) {
> +        int level0, level1;
> +
> +        /* get the AC component (in zig-zag order) */
> +        level0 = blk[zigzag_scan[i+0]];
> +        level1 = blk[zigzag_scan[i+1]];
> +
> +        /* extract sign and make it the lowest bit */
> +        bi->sign[i+0] = (level0>>31)&1;
> +        bi->sign[i+1] = (level1>>31)&1;
> +
> +        /* take absolute value of the level */
> +        level0 = FFABS(level0);
> +        level1 = FFABS(level1);
> +
> +        /* weigh it */
> +        level0 = (level0*weight[i+0] + 4096 + (1<<17)) >> 18;
> +        level1 = (level1*weight[i+1] + 4096 + (1<<17)) >> 18;
> +
> +        /* save unquantized value */
> +        bi->save[i+0] = level0;
> +        bi->save[i+1] = level1;
> +    }
> +
> +    /* find max component */
> +    for (i = 0; i < 64; i++) {
> +        int ac = bi->save[i];
> +        if (ac > max)
> +            max = ac;
> +    }

these 2 loops can be merged avoiding a 2nd pass

[...]
> +static inline void dv_guess_qnos_hd(EncBlockInfo *blks, int *qnos)
> +{
> +    EncBlockInfo *b;
> +    int min_qlevel[5];
> +    int qlevels[5];
> +    int size[5];
> +    int i, j;
> +    /* cache block sizes at hypothetical qlevels */
> +    uint16_t size_cache[5*8][DV100_NUM_QLEVELS] = {{0}};
> +
> +    /* get minimum qlevels */
> +    for (i = 0; i < 5; i++) {
> +        min_qlevel[i] = 1;
> +        for (j = 0; j < 8; j++) {
> +            if (blks[8*i+j].min_qlevel > min_qlevel[i])
> +                min_qlevel[i] = blks[8*i+j].min_qlevel;
> +        }
> +    }
> +
> +    /* initialize sizes */
> +    for (i = 0; i < 5; i++) {
> +        qlevels[i] = dv100_starting_qno;
> +        if (qlevels[i] < min_qlevel[i])
> +            qlevels[i] = min_qlevel[i];
> +
> +        qnos[i] = DV100_QLEVEL_QNO(dv100_qlevels[qlevels[i]]);
> +        size[i] = 0;
> +        for (j = 0; j < 8; j++) {
> +            size_cache[8*i+j][qlevels[i]] = dv100_actual_quantize(&blks[8*i+j], qlevels[i]);
> +            size[i] += size_cache[8*i+j][qlevels[i]];
> +        }
> +    }
> +
> +    /* must we go coarser? */
> +    if (size[0]+size[1]+size[2]+size[3]+size[4] > vs_total_ac_bits_hd) {
> +        int largest = size[0] % 5; /* 'random' number */
> +

> +        do {
> +            /* find the macroblock with the lowest qlevel */
> +            for (i = 0; i < 5; i++) {
> +                if (qlevels[i] < DV100_NUM_QLEVELS-1 &&
> +                    qlevels[i] < qlevels[largest])
> +                    largest = i;
> +            }
> +
> +            i = largest;
> +            /* ensure that we don't enter infinite loop */
> +            largest = (largest+1) % 5;
> +
> +            if (qlevels[i] >= DV100_NUM_QLEVELS-1) {
> +                /* can't quantize any more */
> +                continue;
> +            }
> +
> +            /* quantize a little bit more */
> +            qlevels[i] += dv100_qlevel_inc;
> +            if (qlevels[i] > DV100_NUM_QLEVELS-1)
> +                qlevels[i] = DV100_NUM_QLEVELS-1;
> +
> +            qnos[i] = DV100_QLEVEL_QNO(dv100_qlevels[qlevels[i]]);
> +            size[i] = 0;
> +
> +            /* for each block */
> +            b = &blks[8*i];
> +            for (j = 0; j < 8; j++, b++) {
> +                /* accumulate block size into macroblock */
> +                if(size_cache[8*i+j][qlevels[i]] == 0) {
> +                    /* it is safe to use actual_quantize() here because we only go from finer to coarser,
> +                       and it saves the final actual_quantize() down below */
> +                    size_cache[8*i+j][qlevels[i]] = dv100_actual_quantize(b, qlevels[i]);
> +                }
> +                size[i] += size_cache[8*i+j][qlevels[i]];
> +            } /* for each block */
> +
> +        } while (vs_total_ac_bits_hd < size[0] + size[1] + size[2] + size[3] + size[4] &&
> +                 (qlevels[0] < DV100_NUM_QLEVELS-1 ||
> +                  qlevels[1] < DV100_NUM_QLEVELS-1 ||
> +                  qlevels[2] < DV100_NUM_QLEVELS-1 ||
> +                  qlevels[3] < DV100_NUM_QLEVELS-1 ||
> +                  qlevels[4] < DV100_NUM_QLEVELS-1));

i think the DV100_NUM_QLEVELS checks can be simplified

If we keep track of how many qlevels are < DV100_NUM_QLEVELS-1
The check in the first loop is then not needed because if
there is one that is smaller than that will be found and
no need to check each against DV100_NUM_QLEVELS-1

The smallest then being checked again against DV100_NUM_QLEVELS-1 also
becomes unneeded

and at the end the 5 checks in the while() can then be changed to a
single check on the new variable

This should make the code both faster and simpler


> +
> +        // can we go finer?
> +    } else if (DV100_ENABLE_FINER &&
> +               size[0]+size[1]+size[2]+size[3]+size[4] < vs_total_ac_bits_hd) {
> +        int save_qlevel;
> +        int largest = size[0] % 5; /* 'random' number */
> +
> +        while (qlevels[0] > min_qlevel[0] ||
> +               qlevels[1] > min_qlevel[1] ||
> +               qlevels[2] > min_qlevel[2] ||
> +               qlevels[3] > min_qlevel[3] ||
> +               qlevels[4] > min_qlevel[4]) {
> +
> +            /* find the macroblock with the highest qlevel */
> +            for (i = 0; i < 5; i++) {
> +                if (qlevels[i] > min_qlevel[i] && qlevels[i] > qlevels[largest])
> +                    largest = i;
> +            }
> +
> +            i = largest;
> +
> +            /* ensure that we don't enter infinite loop */
> +            largest = (largest+1) % 5;
> +
> +            if (qlevels[i] <= min_qlevel[i]) {
> +                /* can't unquantize any more */
> +                continue;
> +            }
> +            /* quantize a little bit less */
> +            save_qlevel = qlevels[i];
> +            qlevels[i] -= dv100_qlevel_inc;
> +            if (qlevels[i] < min_qlevel[i])
> +                qlevels[i] = min_qlevel[i];
> +
> +            qnos[i] = DV100_QLEVEL_QNO(dv100_qlevels[qlevels[i]]);
> +
> +            size[i] = 0;
> +
> +            /* for each block */
> +            b = &blks[8*i];
> +            for (j = 0; j < 8; j++, b++) {
> +                /* accumulate block size into macroblock */
> +                if(size_cache[8*i+j][qlevels[i]] == 0) {
> +                    size_cache[8*i+j][qlevels[i]] = dv100_actual_quantize(b, qlevels[i]);
> +                }
> +                size[i] += size_cache[8*i+j][qlevels[i]];
> +            } /* for each block */
> +
> +            /* did we bust the limit? */
> +            if (vs_total_ac_bits_hd < size[0] + size[1] + size[2] + size[3] + size[4]) {
> +                /* go back down and exit */
> +                qlevels[i] = save_qlevel;
> +                qnos[i] = DV100_QLEVEL_QNO(dv100_qlevels[qlevels[i]]);
> +                break;
> +            }
> +        }
> +    }
> +
> +    /* now do the actual quantization */
> +    for (i = 0; i < 5; i++) {
> +        /* for each block */
> +        b = &blks[8*i];
> +        size[i] = 0;
> +        for (j = 0; j < 8; j++, b++) {
> +            /* accumulate block size into macroblock */
> +            size[i] += dv100_actual_quantize(b, qlevels[i]);
> +        } /* for each block */
> +    }
> +}
> +
>  static inline void dv_guess_qnos(EncBlockInfo *blks, int *qnos)
>  {
>      int size[5];
[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Dictatorship naturally arises out of democracy, and the most aggravated
form of tyranny and slavery out of the most extreme liberty. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20190919/4787ec8b/attachment.sig>