[FFmpeg-devel] [RFC] cineform CFHD encoder, and decoder speedup

Paul B Mahol onemda at gmail.com
Fri Jul 31 10:51:59 EEST 2020


On 7/30/20, Nicolas George <george at nsup.org> wrote:
> Paul B Mahol (12020-07-30):
>> Hi,
>>
>> patches attached.
>>
>> Decoder speedup is approx %20 overall.
>
>> From 9692daafc0c4a69ce3cf7dca5b5aa90b5737e15f Mon Sep 17 00:00:00 2001
>> From: Paul B Mahol <onemda at gmail.com>
>> Date: Sat, 25 Jul 2020 18:18:18 +0200
>> Subject: [PATCH 1/2] avcodec: add CFHD encoder
>>
>> ---
>>  libavcodec/Makefile    |   1 +
>>  libavcodec/allcodecs.c |   1 +
>>  libavcodec/cfhd.c      |  28 --
>>  libavcodec/cfhd.h      |  52 +++
>>  libavcodec/cfhdenc.c   | 750 +++++++++++++++++++++++++++++++++++++++++
>>  5 files changed, 804 insertions(+), 28 deletions(-)
>>  create mode 100644 libavcodec/cfhdenc.c
>>
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index c48138d0ad..1e7f899855 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -255,6 +255,7 @@ OBJS-$(CONFIG_CDGRAPHICS_DECODER)      +=
>> cdgraphics.o
>>  OBJS-$(CONFIG_CDTOONS_DECODER)         += cdtoons.o
>>  OBJS-$(CONFIG_CDXL_DECODER)            += cdxl.o
>>  OBJS-$(CONFIG_CFHD_DECODER)            += cfhd.o cfhddata.o
>> +OBJS-$(CONFIG_CFHD_ENCODER)            += cfhdenc.o cfhddata.o
>>  OBJS-$(CONFIG_CINEPAK_DECODER)         += cinepak.o
>>  OBJS-$(CONFIG_CINEPAK_ENCODER)         += cinepakenc.o elbg.o
>>  OBJS-$(CONFIG_CLEARVIDEO_DECODER)      += clearvideo.o
>> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
>> index 7a941b257e..c3a1f38db2 100644
>> --- a/libavcodec/allcodecs.c
>> +++ b/libavcodec/allcodecs.c
>> @@ -70,6 +70,7 @@ extern AVCodec ff_cavs_decoder;
>>  extern AVCodec ff_cdgraphics_decoder;
>>  extern AVCodec ff_cdtoons_decoder;
>>  extern AVCodec ff_cdxl_decoder;
>> +extern AVCodec ff_cfhd_encoder;
>>  extern AVCodec ff_cfhd_decoder;
>>  extern AVCodec ff_cinepak_encoder;
>>  extern AVCodec ff_cinepak_decoder;
>> diff --git a/libavcodec/cfhd.c b/libavcodec/cfhd.c
>> index 299d98780c..ae7dd8ddbe 100644
>> --- a/libavcodec/cfhd.c
>> +++ b/libavcodec/cfhd.c
>> @@ -40,34 +40,6 @@
>>  #define ALPHA_COMPAND_DC_OFFSET 256
>>  #define ALPHA_COMPAND_GAIN 9400
>>
>> -enum CFHDParam {
>> -    SampleType       =   1,
>> -    SampleIndexTable =   2,
>> -    BitstreamMarker  =   4,
>> -    TransformType    =  10,
>> -    ChannelCount     =  12,
>> -    SubbandCount     =  14,
>> -    ImageWidth       =  20,
>> -    ImageHeight      =  21,
>> -    LowpassWidth     =  27,
>> -    LowpassHeight    =  28,
>> -    LowpassPrecision =  35,
>> -    HighpassWidth    =  41,
>> -    HighpassHeight   =  42,
>> -    SubbandNumber    =  48,
>> -    Quantization     =  53,
>> -    BandHeader       =  55,
>> -    ChannelNumber    =  62,
>> -    SampleFlags      =  68,
>> -    EncodedFormat    =  84,
>> -    BitsPerComponent = 101,
>> -    ChannelWidth     = 104,
>> -    ChannelHeight    = 105,
>> -    PrescaleShift    = 109,
>> -};
>> -
>> -
>> -
>>  static av_cold int cfhd_init(AVCodecContext *avctx)
>>  {
>>      CFHDContext *s = avctx->priv_data;
>> diff --git a/libavcodec/cfhd.h b/libavcodec/cfhd.h
>> index 4f2c82d8bc..0e3155be23 100644
>> --- a/libavcodec/cfhd.h
>> +++ b/libavcodec/cfhd.h
>> @@ -30,6 +30,58 @@
>>  #include "get_bits.h"
>>  #include "vlc.h"
>>
>> +enum CFHDParam {
>> +    SampleType       =   1,
>> +    SampleIndexTable =   2,
>> +    BitstreamMarker  =   4,
>> +    TransformType    =  10,
>> +    NumFrames        =  11,
>> +    ChannelCount     =  12,
>> +    WaveletCount     =  13,
>> +    SubbandCount     =  14,
>> +    NumSpatial       =  15,
>> +    FirstWavelet     =  16,
>> +    GroupTrailer     =  18,
>> +    ImageWidth       =  20,
>> +    ImageHeight      =  21,
>> +    LowpassSubband   =  25,
>> +    NumLevels        =  26,
>> +    LowpassWidth     =  27,
>> +    LowpassHeight    =  28,
>> +    PixelOffset      =  33,
>> +    LowpassQuantization=34,
>> +    LowpassPrecision =  35,
>> +    WaveletType      =  37,
>> +    WaveletNumber    =  38,
>> +    WaveletLevel     =  39,
>> +    NumBands         =  40,
>> +    HighpassWidth    =  41,
>> +    HighpassHeight   =  42,
>> +    LowpassBorder    =  43,
>> +    HighpassBorder   =  44,
>> +    LowpassScale     =  45,
>> +    LowpassDivisor   =  46,
>> +    SubbandNumber    =  48,
>> +    BandWidth        =  49,
>> +    BandHeight       =  50,
>> +    SubbandBand      =  51,
>> +    BandEncoding     =  52,
>> +    Quantization     =  53,
>> +    BandScale        =  54,
>> +    BandHeader       =  55,
>> +    BandTrailer      =  56,
>> +    ChannelNumber    =  62,
>> +    SampleFlags      =  68,
>> +    Precision        =  70,
>> +    BandCodingFlags  =  72,
>> +    PrescaleTable    =  83,
>> +    EncodedFormat    =  84,
>> +    BitsPerComponent = 101,
>> +    ChannelWidth     = 104,
>> +    ChannelHeight    = 105,
>> +    PrescaleShift    = 109,
>> +};
>> +
>>  #define VLC_BITS       9
>>  #define SUBBAND_COUNT 10
>>
>> diff --git a/libavcodec/cfhdenc.c b/libavcodec/cfhdenc.c
>> new file mode 100644
>> index 0000000000..b2f4b31795
>> --- /dev/null
>> +++ b/libavcodec/cfhdenc.c
>> @@ -0,0 +1,750 @@
>> +/*
>> + * Copyright (c) 2020 Paul B Mahol
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> 02110-1301 USA
>> + */
>> +
>> +/**
>> + * @file
>> + * Cineform HD video encoder
>> + */
>> +
>> +#include <stdlib.h>
>> +#include <string.h>
>> +
>> +#include "libavutil/avassert.h"
>> +#include "libavutil/imgutils.h"
>> +#include "libavutil/opt.h"
>> +
>> +#include "avcodec.h"
>> +#include "bytestream.h"
>> +#include "cfhd.h"
>> +#include "put_bits.h"
>> +#include "internal.h"
>> +#include "thread.h"
>> +
>
>> +static const unsigned codebook[256][2] = {
>> +    { 1, 0x00000000 }, { 2, 0x00000002 }, { 3, 0x00000007 }, { 5,
>> 0x00000019 }, { 6, 0x00000030 },
>> +    { 6, 0x00000036 }, { 7, 0x00000063 }, { 7, 0x0000006B }, { 7,
>> 0x0000006F }, { 8, 0x000000D4 },
>> +    { 8, 0x000000DC }, { 9, 0x00000189 }, { 9, 0x000001A0 }, { 9,
>> 0x000001AB }, {10, 0x00000310 },
>> +    {10, 0x00000316 }, {10, 0x00000354 }, {10, 0x00000375 }, {10,
>> 0x00000377 }, {11, 0x00000623 },
>> +    {11, 0x00000684 }, {11, 0x000006AB }, {11, 0x000006EC }, {12,
>> 0x00000C44 }, {12, 0x00000C5C },
>> +    {12, 0x00000C5E }, {12, 0x00000D55 }, {12, 0x00000DD1 }, {12,
>> 0x00000DD3 }, {12, 0x00000DDB },
>> +    {13, 0x0000188B }, {13, 0x000018BB }, {13, 0x00001AA8 }, {13,
>> 0x00001BA0 }, {13, 0x00001BA4 },
>> +    {13, 0x00001BB5 }, {14, 0x00003115 }, {14, 0x00003175 }, {14,
>> 0x0000317D }, {14, 0x00003553 },
>> +    {14, 0x00003768 }, {15, 0x00006228 }, {15, 0x000062E8 }, {15,
>> 0x000062F8 }, {15, 0x00006AA4 },
>> +    {15, 0x00006E85 }, {15, 0x00006E87 }, {15, 0x00006ED3 }, {16,
>> 0x0000C453 }, {16, 0x0000C5D3 },
>> +    {16, 0x0000C5F3 }, {16, 0x0000DD08 }, {16, 0x0000DD0C }, {16,
>> 0x0000DDA4 }, {17, 0x000188A4 },
>> +    {17, 0x00018BA5 }, {17, 0x00018BE5 }, {17, 0x0001AA95 }, {17,
>> 0x0001AA97 }, {17, 0x0001BA13 },
>> +    {17, 0x0001BB4A }, {17, 0x0001BB4B }, {18, 0x00031748 }, {18,
>> 0x000317C8 }, {18, 0x00035528 },
>> +    {18, 0x0003552C }, {18, 0x00037424 }, {18, 0x00037434 }, {18,
>> 0x00037436 }, {19, 0x00062294 },
>> +    {19, 0x00062E92 }, {19, 0x00062F92 }, {19, 0x0006AA52 }, {19,
>> 0x0006AA5A }, {19, 0x0006E84A },
>> +    {19, 0x0006E86A }, {19, 0x0006E86E }, {20, 0x000C452A }, {20,
>> 0x000C5D27 }, {20, 0x000C5F26 },
>> +    {20, 0x000D54A6 }, {20, 0x000D54B6 }, {20, 0x000DD096 }, {20,
>> 0x000DD0D6 }, {20, 0x000DD0DE },
>> +    {21, 0x00188A56 }, {21, 0x0018BA4D }, {21, 0x0018BE4E }, {21,
>> 0x0018BE4F }, {21, 0x001AA96E },
>> +    {21, 0x001BA12E }, {21, 0x001BA12F }, {21, 0x001BA1AF }, {21,
>> 0x001BA1BF }, {22, 0x00317498 },
>> +    {22, 0x0035529C }, {22, 0x0035529D }, {22, 0x003552DE }, {22,
>> 0x003552DF }, {22, 0x0037435D },
>> +    {22, 0x0037437D }, {23, 0x0062295D }, {23, 0x0062E933 }, {23,
>> 0x006AA53D }, {23, 0x006AA53E },
>> +    {23, 0x006AA53F }, {23, 0x006E86B9 }, {23, 0x006E86F8 }, {24,
>> 0x00C452B8 }, {24, 0x00C5D265 },
>> +    {24, 0x00D54A78 }, {24, 0x00D54A79 }, {24, 0x00DD0D70 }, {24,
>> 0x00DD0D71 }, {24, 0x00DD0DF2 },
>> +    {24, 0x00DD0DF3 }, {26, 0x03114BA2 }, {25, 0x0188A5B1 }, {25,
>> 0x0188A58B }, {25, 0x0188A595 },
>> +    {25, 0x0188A5D6 }, {25, 0x0188A5D7 }, {25, 0x0188A5A8 }, {25,
>> 0x0188A5AE }, {25, 0x0188A5AF },
>> +    {25, 0x0188A5C4 }, {25, 0x0188A5C5 }, {25, 0x0188A587 }, {25,
>> 0x0188A584 }, {25, 0x0188A585 },
>> +    {25, 0x0188A5C6 }, {25, 0x0188A5C7 }, {25, 0x0188A5CC }, {25,
>> 0x0188A5CD }, {25, 0x0188A581 },
>> +    {25, 0x0188A582 }, {25, 0x0188A583 }, {25, 0x0188A5CE }, {25,
>> 0x0188A5CF }, {25, 0x0188A5C2 },
>> +    {25, 0x0188A5C3 }, {25, 0x0188A5C1 }, {25, 0x0188A5B4 }, {25,
>> 0x0188A5B5 }, {25, 0x0188A5E6 },
>> +    {25, 0x0188A5E7 }, {25, 0x0188A5E4 }, {25, 0x0188A5E5 }, {25,
>> 0x0188A5AB }, {25, 0x0188A5E0 },
>> +    {25, 0x0188A5E1 }, {25, 0x0188A5E2 }, {25, 0x0188A5E3 }, {25,
>> 0x0188A5B6 }, {25, 0x0188A5B7 },
>> +    {25, 0x0188A5FD }, {25, 0x0188A57E }, {25, 0x0188A57F }, {25,
>> 0x0188A5EC }, {25, 0x0188A5ED },
>> +    {25, 0x0188A5FE }, {25, 0x0188A5FF }, {25, 0x0188A57D }, {25,
>> 0x0188A59C }, {25, 0x0188A59D },
>> +    {25, 0x0188A5E8 }, {25, 0x0188A5E9 }, {25, 0x0188A5EA }, {25,
>> 0x0188A5EB }, {25, 0x0188A5EF },
>> +    {25, 0x0188A57A }, {25, 0x0188A57B }, {25, 0x0188A578 }, {25,
>> 0x0188A579 }, {25, 0x0188A5BA },
>> +    {25, 0x0188A5BB }, {25, 0x0188A5B8 }, {25, 0x0188A5B9 }, {25,
>> 0x0188A588 }, {25, 0x0188A589 },
>> +    {25, 0x018BA4C8 }, {25, 0x018BA4C9 }, {25, 0x0188A5FA }, {25,
>> 0x0188A5FB }, {25, 0x0188A5BC },
>> +    {25, 0x0188A5BD }, {25, 0x0188A598 }, {25, 0x0188A599 }, {25,
>> 0x0188A5F4 }, {25, 0x0188A5F5 },
>> +    {25, 0x0188A59B }, {25, 0x0188A5DE }, {25, 0x0188A5DF }, {25,
>> 0x0188A596 }, {25, 0x0188A597 },
>> +    {25, 0x0188A5F8 }, {25, 0x0188A5F9 }, {25, 0x0188A5F1 }, {25,
>> 0x0188A58E }, {25, 0x0188A58F },
>> +    {25, 0x0188A5DC }, {25, 0x0188A5DD }, {25, 0x0188A5F2 }, {25,
>> 0x0188A5F3 }, {25, 0x0188A58C },
>> +    {25, 0x0188A58D }, {25, 0x0188A5A4 }, {25, 0x0188A5F0 }, {25,
>> 0x0188A5A5 }, {25, 0x0188A5A6 },
>> +    {25, 0x0188A5A7 }, {25, 0x0188A59A }, {25, 0x0188A5A2 }, {25,
>> 0x0188A5A3 }, {25, 0x0188A58A },
>> +    {25, 0x0188A5B0 }, {25, 0x0188A5A0 }, {25, 0x0188A5A1 }, {25,
>> 0x0188A5DA }, {25, 0x0188A5DB },
>> +    {25, 0x0188A59E }, {25, 0x0188A59F }, {25, 0x0188A5D8 }, {25,
>> 0x0188A5EE }, {25, 0x0188A5D9 },
>> +    {25, 0x0188A5F6 }, {25, 0x0188A5F7 }, {25, 0x0188A57C }, {25,
>> 0x0188A5C8 }, {25, 0x0188A5C9 },
>> +    {25, 0x0188A594 }, {25, 0x0188A5FC }, {25, 0x0188A5CA }, {25,
>> 0x0188A5CB }, {25, 0x0188A5B2 },
>> +    {25, 0x0188A5AA }, {25, 0x0188A5B3 }, {25, 0x0188A572 }, {25,
>> 0x0188A573 }, {25, 0x0188A5C0 },
>> +    {25, 0x0188A5BE }, {25, 0x0188A5BF }, {25, 0x0188A592 }, {25,
>> 0x0188A580 }, {25, 0x0188A593 },
>> +    {25, 0x0188A590 }, {25, 0x0188A591 }, {25, 0x0188A586 }, {25,
>> 0x0188A5A9 }, {25, 0x0188A5D2 },
>> +    {25, 0x0188A5D3 }, {25, 0x0188A5D4 }, {25, 0x0188A5D5 }, {25,
>> 0x0188A5AC }, {25, 0x0188A5AD },
>> +    {25, 0x0188A5D0 },
>> +};
>> +
>> +static const unsigned runbook[8][3] = {
>> +    {1,  0x00000000,   1},
>> +    {7,  0x00000069,  12},
>> +    {8,  0x000000D1,  20},
>> +    {9,  0x0000018A,  32},
>> +    {10, 0x00000343,  60},
>> +    {11, 0x00000685, 100},
>> +    {13, 0x000018BF, 180},
>> +    {13, 0x00001BA5, 320},
>> +};
>> +
>> +static const uint16_t quantization_per_subband[10][9] = {
>> +    { 12, 12,  6, 12, 12,  6,  18,  18,  24, },
>> +    { 16, 16,  8, 16, 16,  8,  24,  24,  36, },
>> +    { 16, 16,  8, 16, 16,  8,  48,  48,  96, },
>> +    { 24, 24, 16, 24, 24, 16,  64,  64, 128, },
>> +    { 24, 24, 16, 24, 24, 16, 128, 128, 256, },
>> +    { 64, 64, 48, 64, 64, 48, 256, 256, 512, },
>> +    { 64, 64, 48, 64, 64, 48, 512, 512, 768, },
>> +    { 64, 64, 48, 64, 64, 48,1024,1024,1536, },
>> +    {160,160,168,172,172,180,2048,2048,3072, },
>> +    {168,180,180,196,196,200,2048,2048,3072, },
>> +};
>
> Please document where all these numbers come from.

These are either made up by inspecting numbers from already encoded
files or added new ones from scratch.

>
>> +
>> +typedef struct Codebook {
>> +    unsigned bits;
>> +    unsigned size;
>> +} Codebook;
>> +
>> +typedef struct Runbook {
>> +    unsigned size;
>> +    unsigned bits;
>> +    unsigned run;
>> +} Runbook;
>> +
>> +typedef struct PlaneEnc {
>> +    unsigned width;
>> +    unsigned height;
>> +    unsigned size;
>> +    ptrdiff_t stride;
>> +
>> +    int16_t *dwt_buf;
>> +    int16_t *dwt_tmp;
>> +
>> +    unsigned quantization[SUBBAND_COUNT];
>> +    int16_t *subband[SUBBAND_COUNT];
>> +    int16_t *l_h[8];
>> +
>> +    SubBand band[DWT_LEVELS][4];
>> +} PlaneEnc;
>> +
>> +typedef struct CFHDEncContext {
>> +    PutBitContext       pb;
>> +    PutByteContext      pby;
>> +
>> +    int compression;
>> +    int planes;
>> +    PlaneEnc plane[4];
>> +
>> +    Runbook  rb[321];
>> +    Codebook cb[513];
>> +} CFHDEncContext;
>> +
>> +static av_cold int cfhd_encode_init(AVCodecContext *avctx)
>> +{
>> +    CFHDEncContext *s = avctx->priv_data;
>> +    const int sign_mask = 256;
>> +    const int twos_complement = -sign_mask;
>> +    const int mag_mask = sign_mask - 1;
>> +
>> +    s->planes = av_pix_fmt_count_planes(avctx->pix_fmt);
>> +    if (avctx->compression_level == FF_COMPRESSION_DEFAULT)
>> +        s->compression = 2;
>> +    s->compression = av_clip(avctx->compression_level, 0, 9);
>> +
>> +    for (int i = 0; i < s->planes; i++) {
>> +        int w8, h8, w4, h4, w2, h2;
>> +        int width  = avctx->width;
>> +        int height = avctx->height;
>> +        ptrdiff_t stride = FFALIGN(width / 8, 8) * 8;
>> +
>> +        s->plane[i].width  = width;
>> +        s->plane[i].height = height;
>> +        s->plane[i].stride = stride;
>> +
>> +        w8 = FFALIGN(width / 8, 8);
>> +        h8 = height / 8;
>> +        w4 = w8 * 2;
>> +        h4 = h8 * 2;
>> +        w2 = w4 * 2;
>> +        h2 = h4 * 2;
>> +
>> +        s->plane[i].dwt_buf =
>> +            av_mallocz_array(height * stride,
>> sizeof(*s->plane[i].dwt_buf));
>> +        s->plane[i].dwt_tmp =
>> +            av_malloc_array(height * stride,
>> sizeof(*s->plane[i].dwt_tmp));
>> +        if (!s->plane[i].dwt_buf || !s->plane[i].dwt_tmp)
>> +            return AVERROR(ENOMEM);
>> +
>> +        s->plane[i].subband[0] = s->plane[i].dwt_buf;
>> +        s->plane[i].subband[1] = s->plane[i].dwt_buf + 2 * w8 * h8;
>> +        s->plane[i].subband[2] = s->plane[i].dwt_buf + 1 * w8 * h8;
>> +        s->plane[i].subband[3] = s->plane[i].dwt_buf + 3 * w8 * h8;
>> +        s->plane[i].subband[4] = s->plane[i].dwt_buf + 2 * w4 * h4;
>> +        s->plane[i].subband[5] = s->plane[i].dwt_buf + 1 * w4 * h4;
>> +        s->plane[i].subband[6] = s->plane[i].dwt_buf + 3 * w4 * h4;
>> +        s->plane[i].subband[7] = s->plane[i].dwt_buf + 2 * w2 * h2;
>> +        s->plane[i].subband[8] = s->plane[i].dwt_buf + 1 * w2 * h2;
>> +        s->plane[i].subband[9] = s->plane[i].dwt_buf + 3 * w2 * h2;
>> +
>> +        for (int j = 0; j < DWT_LEVELS; j++) {
>> +            for (int k = 0; k < FF_ARRAY_ELEMS(s->plane[i].band[j]); k++)
>> {
>> +                s->plane[i].band[j][k].width  = (width / 8) << j;
>> +                s->plane[i].band[j][k].height = (height / 8) << j;
>> +                s->plane[i].band[j][k].a_width  = w8 << j;
>> +                s->plane[i].band[j][k].a_height = h8 << j;
>> +            }
>> +        }
>> +
>> +        /* ll2 and ll1 commented out because they are done in-place */
>> +        s->plane[i].l_h[0] = s->plane[i].dwt_tmp;
>> +        s->plane[i].l_h[1] = s->plane[i].dwt_tmp + 2 * w8 * h8;
>> +        // s->plane[i].l_h[2] = ll2;
>> +        s->plane[i].l_h[3] = s->plane[i].dwt_tmp;
>> +        s->plane[i].l_h[4] = s->plane[i].dwt_tmp + 2 * w4 * h4;
>> +        // s->plane[i].l_h[5] = ll1;
>> +        s->plane[i].l_h[6] = s->plane[i].dwt_tmp;
>> +        s->plane[i].l_h[7] = s->plane[i].dwt_tmp + 2 * w2 * h2;
>> +    }
>> +
>> +    for (int i = 0; i < 512; i++) {
>> +        int value = (i & sign_mask) ? twos_complement + (i & mag_mask):
>> i;
>> +        int mag = FFMIN(FFABS(value), 255);
>> +
>> +        if (mag) {
>> +            s->cb[i].bits = (codebook[mag][1] << 1) | (value > 0 ? 0 :
>> 1);
>> +            s->cb[i].size = codebook[mag][0] + 1;
>> +        } else {
>> +            s->cb[i].bits = codebook[mag][1];
>> +            s->cb[i].size = codebook[mag][0];
>> +        }
>> +    }
>> +
>> +    s->cb[512].bits = 0x3114ba3;
>> +    s->cb[512].size = 26;
>> +
>> +    s->rb[0].run = 0;
>> +
>> +    for (int i = 1, j = 0; i < 320 && j < 7;) {
>> +        int run = runbook[j][2];
>> +        int end = runbook[j+1][2];
>> +
>> +        while (i < end) {
>> +            s->rb[i].run = run;
>> +            s->rb[i].bits = runbook[j][1];
>> +            s->rb[i++].size = runbook[j][0];
>> +        }
>> +
>> +        j++;
>> +    }
>> +
>> +    s->rb[320].bits = runbook[7][1];
>> +    s->rb[320].size = runbook[7][0];
>> +    s->rb[320].run = 320;
>> +
>> +    return 0;
>> +}
>> +
>> +static inline void filter(int16_t *input, ptrdiff_t in_stride,
>> +                          int16_t *low, ptrdiff_t low_stride,
>> +                          int16_t *high, ptrdiff_t high_stride,
>> +                          int len)
>> +{
>> +    low[(0>>1) * low_stride]   = av_clip_int16(input[0*in_stride] +
>> input[1*in_stride]);
>> +    high[(0>>1) * high_stride] = av_clip_int16((5 * input[0*in_stride] -
>> 11 * input[1*in_stride] +
>> +                                                4 * input[2*in_stride] +
>> 4 * input[3*in_stride] -
>> +                                                1 * input[4*in_stride] -
>> 1 * input[5*in_stride] + 4) >> 3);
>> +
>> +    for (int i = 2; i < len - 2; i += 2) {
>> +        low[(i>>1) * low_stride]   = av_clip_int16(input[i*in_stride] +
>> input[(i+1)*in_stride]);
>> +        high[(i>>1) * high_stride] =
>> av_clip_int16(((-input[(i-2)*in_stride] - input[(i-1)*in_stride] +
>> +
>> input[(i+2)*in_stride] + input[(i+3)*in_stride] + 4) >> 3) +
>> +
>> input[(i+0)*in_stride] - input[(i+1)*in_stride]);
>> +    }
>> +
>> +    low[((len-2)>>1) * low_stride]   =
>> av_clip_int16(input[((len-2)+0)*in_stride] +
>> input[((len-2)+1)*in_stride]);
>> +    high[((len-2)>>1) * high_stride] = av_clip_int16((11*
>> input[((len-2)+0)*in_stride] - 5 * input[((len-2)+1)*in_stride] -
>> +                                                      4 *
>> input[((len-2)-1)*in_stride] - 4 * input[((len-2)-2)*in_stride] +
>> +                                                      1 *
>> input[((len-2)-3)*in_stride] + 1 * input[((len-2)-4)*in_stride] + 4) >>
>> 3);
>> +}
>> +
>> +static void horiz_filter(int16_t *input, int16_t *low, int16_t *high,
>> +                         int width)
>> +{
>> +    filter(input, 1, low, 1, high, 1, width);
>> +}
>> +
>> +static void vert_filter(int16_t *input, ptrdiff_t in_stride,
>> +                        int16_t *low, ptrdiff_t low_stride,
>> +                        int16_t *high, ptrdiff_t high_stride, int len)
>> +{
>> +    filter(input, in_stride, low, low_stride, high, high_stride, len);
>> +}
>> +
>> +static void quantize_band(int16_t *input, int width, int a_width,
>> +                          int height, int quantization)
>> +{
>> +    const int factor = (1 << 16) / quantization;
>> +
>> +    for (int i = 0; i < height; i++) {
>> +        for (int j = 0; j < width; j++)
>> +            input[j] = av_clip_intp2((input[j] * factor) / 65536, 8);
>> +        input += a_width;
>> +    }
>> +}
>> +
>> +static int put_runcode(PutBitContext *pb, int count, const Runbook *const
>> rb)
>> +{
>> +    while (count > 0) {
>> +        const int index = FFMIN(320, count);
>> +
>> +        put_bits(pb, rb[index].size, rb[index].bits);
>> +        count -= rb[index].run;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
>> +                             const AVFrame *frame, int *got_packet)
>> +{
>> +    CFHDEncContext *s = avctx->priv_data;
>> +    PutByteContext *pby = &s->pby;
>> +    PutBitContext *pb = &s->pb;
>> +    const Codebook *const cb = s->cb;
>> +    const Runbook *const rb = s->rb;
>> +    unsigned pos;
>> +    int ret = 0;
>> +
>> +    for (int plane = 0; plane < s->planes && !ret; plane++) {
>> +        int width = s->plane[plane].band[2][0].width;
>> +        int a_width = s->plane[plane].band[2][0].a_width;
>> +        int height = s->plane[plane].band[2][0].height;
>> +        int act_plane = plane == 1 ? 2 : plane == 2 ? 1 : plane;
>> +        int16_t *input = (int16_t *)frame->data[act_plane];
>> +        int16_t *low = s->plane[plane].l_h[6];
>> +        int16_t *high = s->plane[plane].l_h[7];
>> +        const ptrdiff_t in_stride = frame->linesize[act_plane] / 2;
>> +        int low_stride, high_stride;
>> +
>> +        for (int i = 0; i < height * 2; i++) {
>> +            horiz_filter(input, low, high, width * 2);
>> +            input += in_stride;
>> +            low += a_width;
>> +            high += a_width;
>> +        }
>> +
>> +        input = s->plane[plane].l_h[7];
>> +        low = s->plane[plane].subband[7];
>> +        low_stride = s->plane[plane].band[2][0].a_width;
>> +        high = s->plane[plane].subband[9];
>> +        high_stride = s->plane[plane].band[2][0].a_width;
>> +
>> +        for (int i = 0; i < width; i++) {
>> +            vert_filter(input, a_width, low, low_stride, high,
>> high_stride, height * 2);
>> +            input++;
>> +            low++;
>> +            high++;
>> +        }
>> +
>> +        input = s->plane[plane].l_h[6];
>> +        low = s->plane[plane].l_h[7];
>> +        high = s->plane[plane].subband[8];
>> +
>> +        for (int i = 0; i < width; i++) {
>> +            vert_filter(input, a_width, low, low_stride, high,
>> high_stride, height * 2);
>> +            input++;
>> +            low++;
>> +            high++;
>> +        }
>> +
>> +        a_width = s->plane[plane].band[1][0].a_width;
>> +        width = s->plane[plane].band[1][0].width;
>> +        height = s->plane[plane].band[1][0].height;
>> +        input = s->plane[plane].l_h[7];
>> +        low = s->plane[plane].l_h[3];
>> +        low_stride = s->plane[plane].band[1][0].a_width;
>> +        high = s->plane[plane].l_h[4];
>> +        high_stride = s->plane[plane].band[1][0].a_width;
>> +
>> +        for (int i = 0; i < height * 2; i++) {
>> +            for (int j = 0; j < width * 2; j++) {
>> +                input[j] /= 4;
>> +            }
>> +            input += a_width * 2;
>> +        }
>> +
>> +        input = s->plane[plane].l_h[7];
>> +        for (int i = 0; i < height * 2; i++) {
>> +            horiz_filter(input, low, high, width * 2);
>> +            input += a_width * 2;
>> +            low += low_stride;
>> +            high += high_stride;
>> +        }
>> +
>> +        input = s->plane[plane].l_h[4];
>> +        low = s->plane[plane].subband[4];
>> +        high = s->plane[plane].subband[6];
>> +
>> +        for (int i = 0; i < width; i++) {
>> +            vert_filter(input, a_width, low, low_stride, high,
>> high_stride, height * 2);
>> +            input++;
>> +            low++;
>> +            high++;
>> +        }
>> +
>> +        input = s->plane[plane].l_h[3];
>> +        low = s->plane[plane].l_h[4];
>> +        high = s->plane[plane].subband[5];
>> +
>> +        for (int i = 0; i < width; i++) {
>> +            vert_filter(input, a_width, low, low_stride, high,
>> high_stride, height * 2);
>> +            input++;
>> +            low++;
>> +            high++;
>> +        }
>> +
>> +        a_width = s->plane[plane].band[0][0].a_width;
>> +        width = s->plane[plane].band[0][0].width;
>> +        height = s->plane[plane].band[0][0].height;
>> +        input = s->plane[plane].l_h[4];
>> +        low = s->plane[plane].l_h[0];
>> +        low_stride = s->plane[plane].band[0][0].a_width;
>> +        high = s->plane[plane].l_h[1];
>> +        high_stride = s->plane[plane].band[0][0].a_width;
>> +
>> +        for (int i = 0; i < height * 2; i++) {
>> +            for (int j = 0; j < width * 2; j++) {
>> +                input[j] /= 4;
>> +            }
>> +            input += a_width * 2;
>> +        }
>> +
>> +        input = s->plane[plane].l_h[4];
>> +        for (int i = 0; i < height * 2; i++) {
>> +            horiz_filter(input, low, high, width * 2);
>> +            input += a_width * 2;
>> +            low += low_stride;
>> +            high += high_stride;
>> +        }
>> +
>> +        low = s->plane[plane].subband[1];
>> +        high = s->plane[plane].subband[3];
>> +        input = s->plane[plane].l_h[1];
>> +
>> +        for (int i = 0; i < width; i++) {
>> +            vert_filter(input, a_width, low, low_stride, high,
>> high_stride, height * 2);
>> +            input++;
>> +            low++;
>> +            high++;
>> +        }
>> +
>> +        low = s->plane[plane].subband[0];
>> +        high = s->plane[plane].subband[2];
>> +        input = s->plane[plane].l_h[0];
>> +
>> +        for (int i = 0; i < width; i++) {
>> +            vert_filter(input, a_width, low, low_stride, high,
>> high_stride, height * 2);
>> +            input++;
>> +            low++;
>> +            high++;
>> +        }
>> +    }
>> +
>> +    ret = ff_alloc_packet2(avctx, pkt, 6 * avctx->width * avctx->height,
>> 0);
>> +    if (ret < 0)
>> +        return ret;
>> +
>> +    bytestream2_init_writer(pby, pkt->data, pkt->size);
>> +
>> +    bytestream2_put_be16(pby, SampleType);
>> +    bytestream2_put_be16(pby, 9);
>> +
>> +    bytestream2_put_be16(pby, SampleIndexTable);
>> +    bytestream2_put_be16(pby, s->planes);
>> +
>> +    for (int i = 0; i < s->planes; i++)
>> +        bytestream2_put_be32(pby, 0);
>> +
>> +    bytestream2_put_be16(pby, TransformType);
>> +    bytestream2_put_be16(pby, 0);
>> +
>> +    bytestream2_put_be16(pby, NumFrames);
>> +    bytestream2_put_be16(pby, 1);
>> +
>> +    bytestream2_put_be16(pby, ChannelCount);
>> +    bytestream2_put_be16(pby, s->planes);
>> +
>> +    bytestream2_put_be16(pby, EncodedFormat);
>> +    bytestream2_put_be16(pby, 3);
>> +
>> +    bytestream2_put_be16(pby, WaveletCount);
>> +    bytestream2_put_be16(pby, 3);
>> +
>> +    bytestream2_put_be16(pby, SubbandCount);
>> +    bytestream2_put_be16(pby, SUBBAND_COUNT);
>> +
>> +    bytestream2_put_be16(pby, NumSpatial);
>> +    bytestream2_put_be16(pby, 2);
>> +
>> +    bytestream2_put_be16(pby, FirstWavelet);
>> +    bytestream2_put_be16(pby, 3);
>> +
>> +    bytestream2_put_be16(pby, ImageWidth);
>> +    bytestream2_put_be16(pby, avctx->width);
>> +
>> +    bytestream2_put_be16(pby, ImageHeight);
>> +    bytestream2_put_be16(pby, avctx->height);
>> +
>> +    bytestream2_put_be16(pby, Precision);
>> +    bytestream2_put_be16(pby, 12);
>> +
>> +    bytestream2_put_be16(pby, PrescaleTable);
>> +    bytestream2_put_be16(pby, 0x2800);
>> +
>> +    bytestream2_put_be16(pby, SampleFlags);
>> +    bytestream2_put_be16(pby, 1);
>> +
>> +    for (int p = 0; p < s->planes; p++) {
>> +        int width = s->plane[p].band[0][0].width;
>> +        int a_width = s->plane[p].band[0][0].a_width;
>> +        int height = s->plane[p].band[0][0].height;
>> +        int16_t *data = s->plane[p].subband[0];
>> +
>> +        if (p) {
>> +            bytestream2_put_be16(pby, SampleType);
>> +            bytestream2_put_be16(pby, 3);
>> +
>> +            bytestream2_put_be16(pby, ChannelNumber);
>> +            bytestream2_put_be16(pby, p);
>> +        }
>> +
>> +        bytestream2_put_be16(pby, BitstreamMarker);
>> +        bytestream2_put_be16(pby, 0x1a4a);
>> +
>> +        pos = bytestream2_tell_p(pby);
>> +
>> +        bytestream2_put_be16(pby, LowpassSubband);
>> +        bytestream2_put_be16(pby, 0);
>> +
>> +        bytestream2_put_be16(pby, NumLevels);
>> +        bytestream2_put_be16(pby, 3);
>> +
>> +        bytestream2_put_be16(pby, LowpassWidth);
>> +        bytestream2_put_be16(pby, width);
>> +
>> +        bytestream2_put_be16(pby, LowpassHeight);
>> +        bytestream2_put_be16(pby, height);
>> +
>> +        bytestream2_put_be16(pby, PixelOffset);
>> +        bytestream2_put_be16(pby, 0);
>> +
>> +        bytestream2_put_be16(pby, LowpassQuantization);
>> +        bytestream2_put_be16(pby, 1);
>> +
>> +        bytestream2_put_be16(pby, LowpassPrecision);
>> +        bytestream2_put_be16(pby, 16);
>> +
>> +        bytestream2_put_be16(pby, BitstreamMarker);
>> +        bytestream2_put_be16(pby, 0x0f0f);
>> +
>> +        for (int i = 0; i < height; i++) {
>> +            for (int j = 0; j < width; j++)
>> +                bytestream2_put_be16(pby, data[j]);
>> +            data += a_width;
>> +        }
>> +
>> +        bytestream2_put_be16(pby, BitstreamMarker);
>> +        bytestream2_put_be16(pby, 0x1b4b);
>> +
>> +        for (int l = 0; l < 3; l++) {
>> +            for (int i = 0; i < 3; i++) {
>> +                s->plane[p].quantization[1 + l * 3 + i] =
>> quantization_per_subband[s->compression][l * 3 + i];
>> +            }
>> +        }
>> +
>> +        for (int l = 0; l < 3; l++) {
>> +            int a_width = s->plane[p].band[l][0].a_width;
>> +            int width = s->plane[p].band[l][0].width;
>> +            int stride = FFALIGN(width, 8);
>> +            int height = s->plane[p].band[l][0].height;
>> +
>> +            bytestream2_put_be16(pby, BitstreamMarker);
>> +            bytestream2_put_be16(pby, 0x0d0d);
>> +
>> +            bytestream2_put_be16(pby, WaveletType);
>> +            bytestream2_put_be16(pby, 3 + 2 * (l == 2));
>> +
>> +            bytestream2_put_be16(pby, WaveletNumber);
>> +            bytestream2_put_be16(pby, 3 - l);
>> +
>> +            bytestream2_put_be16(pby, WaveletLevel);
>> +            bytestream2_put_be16(pby, 3 - l);
>> +
>> +            bytestream2_put_be16(pby, NumBands);
>> +            bytestream2_put_be16(pby, 4);
>> +
>> +            bytestream2_put_be16(pby, HighpassWidth);
>> +            bytestream2_put_be16(pby, width);
>> +
>> +            bytestream2_put_be16(pby, HighpassHeight);
>> +            bytestream2_put_be16(pby, height);
>> +
>> +            bytestream2_put_be16(pby, LowpassBorder);
>> +            bytestream2_put_be16(pby, 0);
>> +
>> +            bytestream2_put_be16(pby, HighpassBorder);
>> +            bytestream2_put_be16(pby, 0);
>> +
>> +            bytestream2_put_be16(pby, LowpassScale);
>> +            bytestream2_put_be16(pby, 1);
>> +
>> +            bytestream2_put_be16(pby, LowpassDivisor);
>> +            bytestream2_put_be16(pby, 1);
>> +
>> +            for (int i = 0; i < 3; i++) {
>> +                int16_t *data = s->plane[p].subband[1 + l * 3 + i];
>> +                int count = 0;
>> +
>> +                bytestream2_put_be16(pby, BitstreamMarker);
>> +                bytestream2_put_be16(pby, 0x0e0e);
>> +
>> +                bytestream2_put_be16(pby, SubbandNumber);
>> +                bytestream2_put_be16(pby, i + 1);
>> +
>> +                bytestream2_put_be16(pby, BandCodingFlags);
>> +                bytestream2_put_be16(pby, 2);
>> +
>> +                bytestream2_put_be16(pby, BandWidth);
>> +                bytestream2_put_be16(pby, width);
>> +
>> +                bytestream2_put_be16(pby, BandHeight);
>> +                bytestream2_put_be16(pby, height);
>> +
>> +                bytestream2_put_be16(pby, SubbandBand);
>> +                bytestream2_put_be16(pby, 1 + l * 3 + i);
>> +
>> +                bytestream2_put_be16(pby, BandEncoding);
>> +                bytestream2_put_be16(pby, 3);
>> +
>> +                bytestream2_put_be16(pby, Quantization);
>> +                bytestream2_put_be16(pby, s->plane[p].quantization[1 + l
>> * 3 + i]);
>> +
>> +                bytestream2_put_be16(pby, BandScale);
>> +                bytestream2_put_be16(pby, 1);
>> +
>> +                bytestream2_put_be16(pby, BandHeader);
>> +                bytestream2_put_be16(pby, 0);
>> +
>> +                quantize_band(data, width, a_width, height,
>> +                              s->plane[p].quantization[1 + l * 3 + i]);
>> +
>> +                init_put_bits(pb, pkt->data + bytestream2_tell_p(pby),
>> bytestream2_get_bytes_left_p(pby));
>> +
>> +                for (int m = 0; m < height; m++) {
>> +                    for (int j = 0; j < stride; j++) {
>> +                        int16_t index = data[j];
>> +
>> +                        if (index < 0)
>> +                            index += 512;
>> +                        if (index == 0) {
>> +                            count++;
>> +                            continue;
>> +                        } else if (count > 0) {
>> +                            count = put_runcode(pb, count, rb);
>> +                        }
>> +
>> +                        put_bits(pb, cb[index].size, cb[index].bits);
>> +                    }
>> +
>> +                    data += a_width;
>> +                }
>> +
>> +                if (count > 0) {
>> +                    count = put_runcode(pb, count, rb);
>> +                }
>> +
>> +                put_bits(pb, cb[512].size, cb[512].bits);
>> +
>> +                avpriv_align_put_bits(pb);
>> +                flush_put_bits(pb);
>> +                bytestream2_skip_p(pby, put_bits_count(pb) >> 3);
>> +                while (bytestream2_tell_p(pby) & 3)
>> +                    bytestream2_put_byte(pby, 0);
>> +
>> +                bytestream2_put_be16(pby, BandTrailer);
>> +                bytestream2_put_be16(pby, 0);
>> +            }
>> +
>> +            bytestream2_put_be16(pby, BitstreamMarker);
>> +            bytestream2_put_be16(pby, 0x0c0c);
>> +        }
>> +
>> +        s->plane[p].size = bytestream2_tell_p(pby) - pos;
>> +    }
>> +
>> +    bytestream2_put_be16(pby, GroupTrailer);
>> +    bytestream2_put_be16(pby, 0);
>> +
>> +    pkt->size   = bytestream2_tell_p(pby);
>> +    pkt->flags |= AV_PKT_FLAG_KEY;
>> +
>> +    bytestream2_seek_p(pby, 8, SEEK_SET);
>> +    for (int i = 0; i < s->planes; i++)
>> +        bytestream2_put_be32(pby, s->plane[i].size);
>> +
>> +    av_assert0((pkt->size & 3) == 0);
>> +
>> +    *got_packet = 1;
>> +
>> +    return 0;
>> +}
>> +
>> +static av_cold int cfhd_encode_close(AVCodecContext *avctx)
>> +{
>> +    CFHDEncContext *s = avctx->priv_data;
>> +
>> +    for (int i = 0; i < s->planes; i++) {
>> +        av_freep(&s->plane[i].dwt_buf);
>> +        av_freep(&s->plane[i].dwt_tmp);
>> +
>> +        for (int j = 0; j < SUBBAND_COUNT; j++)
>> +            s->plane[i].subband[j] = NULL;
>> +
>> +        for (int j = 0; j < 8; j++)
>> +            s->plane[i].l_h[j] = NULL;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +AVCodec ff_cfhd_encoder = {
>> +    .name             = "cfhd",
>> +    .long_name        = NULL_IF_CONFIG_SMALL("Cineform HD"),
>> +    .type             = AVMEDIA_TYPE_VIDEO,
>> +    .id               = AV_CODEC_ID_CFHD,
>> +    .priv_data_size   = sizeof(CFHDEncContext),
>> +    .init             = cfhd_encode_init,
>> +    .close            = cfhd_encode_close,
>> +    .encode2          = cfhd_encode_frame,
>> +    .capabilities     = AV_CODEC_CAP_FRAME_THREADS,
>> +    .pix_fmts         = (const enum AVPixelFormat[]) {
>> +                          AV_PIX_FMT_GBRP12,
>> +                          AV_PIX_FMT_NONE
>> +                        },
>> +};
>
>
> --
>   Nicolas George
>


More information about the ffmpeg-devel mailing list