[FFmpeg-devel] [PATCH 11/25] avcodec/magicyuv: Avoid AV_QSORT when creating Huffman table

Paul B Mahol onemda at gmail.com
Sat Sep 26 13:57:35 EEST 2020


On Sat, Sep 26, 2020 at 12:27:50PM +0200, Andreas Rheinhardt wrote:
> The MagicYUV format stores Huffman tables in its bitstream by coding
> the length of a given symbol; it does not code the actual code directly,
> instead this is to be inferred by the rule that a symbol is to the left
> of every shorter symbol in the Huffman tree and that for symbols of the
> same length the symbol is ascending from left to right.
> 
> Our decoder implemented this by first sorting the array containing
> length and symbol of each element according to descending length and
> for equal length, according to ascending symbol. Afterwards, the current
> state in the tree got encoded in a variable code; if the next array entry
> had length len, then the len most significant bits of code contained
> the code of this entry. Whenever an entry of the array of length
> len was processed, code was incremented by 1U << (32 - len). So two
> entries of length len have the same effect as incrementing code by
> 1U << (32 - (len - 1)), which corresponds to the parent node of length
> len - 1 of the two nodes of length len etc.
> 
> This commit modifies this to avoid sorting the entries before
> calculating the codes. This is done by calculating how many non-leaf
> nodes there are on each level of the tree before calculating the codes.
> Afterwards every leaf node on this level gets assigned the number of
> nodes already on this level as code. This of course works only because
> the entries are already sorted by their symbol initially, so that this
> algorithm indeed gives ascending symbols from left to right on every
> level.
> 
> This offers both speed- as well as (obvious) codesize advantages. With
> Clang 10 the number of decicycles for build_huffman decreased from
> 1561987 to 1228405; for GCC 9 it went from 1825096 decicyles to 1429921.
> These tests were carried out with a sample with 150 frames that was
> looped 13 times; and this was iterated 10 times. The earlier reference
> point here is from the point when the loop generating the codes was
> traversed in reverse order (as the patch reversing the order led to
> performance penalties).

ok

> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
> ---
>  libavcodec/magicyuv.c | 36 ++++++++++++++++++------------------
>  1 file changed, 18 insertions(+), 18 deletions(-)
> 
> diff --git a/libavcodec/magicyuv.c b/libavcodec/magicyuv.c
> index 17dea69d76..7dded9b457 100644
> --- a/libavcodec/magicyuv.c
> +++ b/libavcodec/magicyuv.c
> @@ -25,7 +25,6 @@
>  #define CACHED_BITSTREAM_READER !ARCH_X86_32
>  
>  #include "libavutil/pixdesc.h"
> -#include "libavutil/qsort.h"
>  
>  #include "avcodec.h"
>  #include "bytestream.h"
> @@ -74,26 +73,24 @@ typedef struct MagicYUVContext {
>      LLVidDSPContext   llviddsp;
>  } MagicYUVContext;
>  
> -static int huff_cmp_len(const void *a, const void *b)
> +static int huff_build(HuffEntry he[], uint16_t codes_count[33],
> +                      VLC *vlc, int nb_elems)
>  {
> -    const HuffEntry *aa = a, *bb = b;
> -    return (bb->len - aa->len) * 4096 + aa->sym - bb->sym;
> -}
> -
> -static int huff_build(HuffEntry he[], VLC *vlc, int nb_elems)
> -{
> -    uint32_t code;
> -
> -    AV_QSORT(he, nb_elems, HuffEntry, huff_cmp_len);
> +    unsigned nb_codes = 0, max = 0;
> +
> +    for (int i = 32; i > 0; i--) {
> +        uint16_t curr = codes_count[i];   // # of leafs of length i
> +        codes_count[i] = nb_codes / 2;    // # of non-leaf nodes on level i
> +        nb_codes = codes_count[i] + curr; // # of nodes on level i
> +        if (curr && !max)
> +            max = i;
> +    }
>  
> -    code = 0;
>      for (unsigned i = 0; i < nb_elems; i++) {
> -        he[i].code = code >> (32 - he[i].len);
> -        code += 0x80000000u >> (he[i].len - 1);
> +        he[i].code = codes_count[he[i].len];
> +        codes_count[he[i].len]++;
>      }
> -
> -    ff_free_vlc(vlc);
> -    return ff_init_vlc_sparse(vlc, FFMIN(he[0].len, 12), nb_elems,
> +    return ff_init_vlc_sparse(vlc, FFMIN(max, 12), nb_elems,
>                                &he[0].len,  sizeof(he[0]), sizeof(he[0].len),
>                                &he[0].code, sizeof(he[0]), sizeof(he[0].code),
>                                &he[0].sym,  sizeof(he[0]), sizeof(he[0].sym),  0);
> @@ -389,6 +386,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
>      MagicYUVContext *s = avctx->priv_data;
>      GetByteContext gb;
>      HuffEntry he[4096];
> +    uint16_t length_count[33] = { 0 };
>      int i = 0, j = 0, k;
>  
>      bytestream2_init(&gb, table, table_size);
> @@ -409,6 +407,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
>              return AVERROR_INVALIDDATA;
>          }
>  
> +        length_count[x] += l;
>          for (; j < k; j++) {
>              he[j].sym = j;
>              he[j].len = x;
> @@ -416,7 +415,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
>  
>          if (j == max) {
>              j = 0;
> -            if (huff_build(he, &s->vlc[i], max)) {
> +            if (huff_build(he, length_count, &s->vlc[i], max)) {
>                  av_log(avctx, AV_LOG_ERROR, "Cannot build Huffman codes\n");
>                  return AVERROR_INVALIDDATA;
>              }
> @@ -424,6 +423,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
>              if (i == s->planes) {
>                  break;
>              }
> +            memset(length_count, 0, sizeof(length_count));
>          }
>      }
>  
> -- 
> 2.25.1
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".


More information about the ffmpeg-devel mailing list