[FFmpeg-cvslog] avcodec: add HVQM4 Video decoder

Wed Feb 23 18:00:45 EET 2022

ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Sat Feb 12 09:10:00 2022 +0100| [57f4da0973c665862ac48c244db64a9294f71c81] | committer: Paul B Mahol

avcodec: add HVQM4 Video decoder

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=57f4da0973c665862ac48c244db64a9294f71c81
---

 libavcodec/Makefile     |    1 +
 libavcodec/allcodecs.c  |    1 +
 libavcodec/codec_desc.c |    7 +
 libavcodec/codec_id.h   |    1 +
 libavcodec/hvqm4.c      | 1719 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 1729 insertions(+)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 276df8ee5b..55ef3185f9 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -418,6 +418,7 @@ OBJS-$(CONFIG_HQ_HQA_DECODER)          += hq_hqa.o hq_hqadata.o hq_hqadsp.o \
 OBJS-$(CONFIG_HQX_DECODER)             += hqx.o hqxvlc.o hqxdsp.o canopus.o
 OBJS-$(CONFIG_HUFFYUV_DECODER)         += huffyuv.o huffyuvdec.o
 OBJS-$(CONFIG_HUFFYUV_ENCODER)         += huffyuv.o huffyuvenc.o
+OBJS-$(CONFIG_HVQM4_DECODER)           += hvqm4.o
 OBJS-$(CONFIG_HYMT_DECODER)            += huffyuv.o huffyuvdec.o
 OBJS-$(CONFIG_IDCIN_DECODER)           += idcinvideo.o
 OBJS-$(CONFIG_IDF_DECODER)             += bintext.o cga_data.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 89ba205a2f..9fa5007c0f 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -164,6 +164,7 @@ extern const AVCodec ff_hq_hqa_decoder;
 extern const AVCodec ff_hqx_decoder;
 extern const AVCodec ff_huffyuv_encoder;
 extern const AVCodec ff_huffyuv_decoder;
+extern const AVCodec ff_hvqm4_decoder;
 extern const AVCodec ff_hymt_decoder;
 extern const AVCodec ff_idcin_decoder;
 extern const AVCodec ff_iff_ilbm_decoder;
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 6deba785dc..cdcb164336 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -1862,6 +1862,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("GEM Raster image"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_HVQM4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hvqm4",
+        .long_name = NULL_IF_CONFIG_SMALL("HVQM4 Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* various PCM "codecs" */
     {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index f3f262ec75..f1109c27d9 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -308,6 +308,7 @@ enum AVCodecID {
     AV_CODEC_ID_SIMBIOSIS_IMX,
     AV_CODEC_ID_SGA_VIDEO,
     AV_CODEC_ID_GEM,
+    AV_CODEC_ID_HVQM4,
 
     /* various PCM "codecs" */
     AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
diff --git a/libavcodec/hvqm4.c b/libavcodec/hvqm4.c
new file mode 100644
index 0000000000..9b1238c5bc
--- /dev/null
+++ b/libavcodec/hvqm4.c
@@ -0,0 +1,1719 @@
+/*
+ * HVQM4 Video decoder
+ * Copyright (c) 2018-2020 Tillmann Karras
+ * Copyright (c) 2022 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/thread.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "get_bits.h"
+#include "golomb.h"
+#include "internal.h"
+
+#define PLANE_COUNT 3
+#define LUMA_CHROMA 2
+#define LUMA_IDX 0
+#define CHROMA_IDX 1
+
+enum FrameType {
+    I_FRAME = 0x10,
+    P_FRAME = 0x20,
+    B_FRAME = 0x30,
+};
+
+typedef struct BlockData {
+    uint8_t value;
+    uint8_t type;
+} BlockData;
+
+typedef struct StackState {
+    uint32_t plane_idx;
+    BlockData const *line_prev;
+    BlockData const *line_curr;
+    BlockData const *line_next;
+    BlockData next;
+    BlockData curr;
+    uint8_t value_prev;
+} StackState;
+
+typedef struct GBCWithVLC {
+    GetBitContext gb;
+    VLC *vlc;
+} GBCWithVLC;
+
+typedef struct HVQPlaneDesc {
+    BlockData *border; // 0-3 beginning of the plane including the border
+    BlockData *payload; // 4-7 beginning of the non-border plane data
+    uint16_t h_blocks;
+    uint16_t v_blocks;
+    uint16_t h_blocks_safe;
+    uint16_t v_blocks_safe;
+    // offsets of PBs within one MCB
+    // +---+---+
+    // | 0 | 3 |
+    // +---+---+
+    // | 1 | 2 |
+    // +---+---+
+    uint16_t mcb_offset[4];
+    uint32_t px_offset[4];
+    uint32_t py_offset[4];
+    uint8_t width_shift;
+    uint8_t height_shift;
+    uint8_t pb_per_mcb_x;
+    uint8_t pb_per_mcb_y;
+    uint8_t blocks_per_mcb;
+    uint8_t padding[3];
+} HVQPlaneDesc;
+
+typedef struct VideoState {
+    HVQPlaneDesc planes[PLANE_COUNT];
+    VLC vlc[6];
+    GBCWithVLC dc_values[PLANE_COUNT]; // DC values
+    GBCWithVLC dc_rle[PLANE_COUNT]; // DC run lengths
+    GBCWithVLC bufTree0[PLANE_COUNT];
+    GBCWithVLC basis_num[LUMA_CHROMA];
+    GBCWithVLC basis_num_run[LUMA_CHROMA];
+    GetBitContext fixvl[PLANE_COUNT]; // uncompressed high-entropy data
+    GBCWithVLC mv_h; // horizontal motion vectors
+    GBCWithVLC mv_v; // vertical motion vectors
+    GBCWithVLC mcb_proc; // macroblock proc
+    GBCWithVLC mcb_type; // macroblock type
+    uint16_t h_nest_size;
+    uint16_t v_nest_size;
+    uint8_t is_landscape; // FIXME: check what happens for square video
+    uint8_t nest_data[70 * 38];
+    uint8_t padding[3];
+    uint32_t dc_max;
+    uint32_t dc_min;
+    uint8_t unk_shift;
+    uint8_t dc_shift;
+    // number of residual bits to read from mv_h/mv_v,
+    // one setting for each of past and future
+    uint8_t mc_residual_bits_h[2];
+    uint8_t mc_residual_bits_v[2];
+    uint8_t maybe_padding[2];
+} VideoState;
+
+typedef struct SeqObj {
+    VideoState *state;
+    uint16_t width;
+    uint16_t height;
+    uint8_t h_samp;
+    uint8_t v_samp;
+} SeqObj;
+
+typedef struct MCPlane {
+    uint32_t rle;
+    uint32_t pb_dc;
+    BlockData *payload_cur_blk;
+    BlockData *payload_cur_row;
+    uint8_t *present;
+    ptrdiff_t present_stride;
+    uint8_t *top;
+    ptrdiff_t top_stride;
+    uint8_t *target;
+    ptrdiff_t target_stride;
+    uint8_t *past;
+    ptrdiff_t past_stride;
+    uint8_t *future;
+    ptrdiff_t future_stride;
+    uint16_t h_mcb_stride;
+    uint16_t padding;
+    uint32_t v_mcb_stride;
+    uint32_t pb_per_mcb_x;
+    ptrdiff_t stride;
+} MCPlane;
+
+struct RLDecoder {
+    uint32_t value;
+    uint32_t count;
+};
+
+typedef struct HVQM4Context {
+    AVFrame *frame[3];
+
+    SeqObj seqobj;
+    VideoState state;
+    uint8_t *buffer;
+
+    int current_pic;
+
+    GetBitContext gb;
+} HVQM4Context;
+
+static int32_t div_tab[16];
+static int32_t mcdiv_tab[512];
+
+static av_cold void hvqm4_init_static(void)
+{
+    div_tab[0] = 0;
+    mcdiv_tab[0] = 0;
+
+    for (int i = 1; i < 0x10; i++)
+        div_tab[i] = 0x1000 / (i * 16) * 16;
+    for (int i = 1; i < 0x200; i++)
+        mcdiv_tab[i] = 0x1000 / i;
+}
+
+static void set_plane_desc(SeqObj *seqobj, uint8_t plane_idx, uint8_t h_samp, uint8_t v_samp)
+{
+    HVQPlaneDesc *plane = &seqobj->state->planes[plane_idx];
+
+    plane->width_shift = h_samp == 2 ? 1 : 0;
+    plane->height_shift = v_samp == 2 ? 1 : 0;
+    // pixels per 2x2 block
+    plane->pb_per_mcb_x = 2 >> plane->width_shift; // 1..2
+    plane->pb_per_mcb_y = 2 >> plane->height_shift; // 1..2
+    plane->blocks_per_mcb = plane->pb_per_mcb_x * plane->pb_per_mcb_y; // 1..4
+    // number of 4x4 blocks
+    plane->h_blocks = seqobj->width / (h_samp * 4);
+    plane->v_blocks = seqobj->height / (v_samp * 4);
+    // number of 4x4 blocks + border
+    plane->h_blocks_safe = plane->h_blocks + 2;
+    plane->v_blocks_safe = plane->v_blocks + 2;
+    // offset of blocks in MCB
+    plane->mcb_offset[0] = 0;
+    plane->mcb_offset[1] = plane->h_blocks_safe;
+    plane->mcb_offset[2] = plane->h_blocks_safe + 1;
+    plane->mcb_offset[3] = 1;
+    plane->px_offset[0] = 0;
+    plane->py_offset[0] = 0;
+    plane->px_offset[1] = 0;
+    plane->py_offset[1] = 4;
+    plane->px_offset[2] = 4;
+    plane->py_offset[2] = 4;
+    plane->px_offset[3] = 4;
+    plane->py_offset[3] = 0;
+}
+
+static void set_border(BlockData *dst)
+{
+    dst->value = 0x7F;
+    dst->type = 0xFF;
+}
+
+static void set_buffer(SeqObj *seqobj, void *workbuff, uint8_t *buffer)
+{
+    VideoState *state = workbuff;
+    BlockData *plane_data;
+
+    seqobj->state = state;
+    set_plane_desc(seqobj, 0, 1, 1);
+    set_plane_desc(seqobj, 1, 2, 2);
+    set_plane_desc(seqobj, 2, 2, 2);
+
+    state->is_landscape = seqobj->width >= seqobj->height;
+    if (state->is_landscape) {
+        state->h_nest_size = 70;
+        state->v_nest_size = 38;
+    } else {
+        state->h_nest_size = 38;
+        state->v_nest_size = 70;
+    }
+
+    state->basis_num[0].vlc = &state->vlc[3];
+    state->basis_num[1].vlc = &state->vlc[3];
+
+    state->basis_num_run[0].vlc = &state->vlc[1];
+    state->basis_num_run[1].vlc = &state->vlc[1];
+
+    state->dc_values[0].vlc = &state->vlc[0];
+    state->dc_values[1].vlc = &state->vlc[0];
+    state->dc_values[2].vlc = &state->vlc[0];
+
+    state->dc_rle[0].vlc = &state->vlc[1]; // reuse!
+    state->dc_rle[1].vlc = &state->vlc[1]; //
+    state->dc_rle[2].vlc = &state->vlc[1]; //
+
+    state->bufTree0[0].vlc = &state->vlc[2];
+    state->bufTree0[1].vlc = &state->vlc[2];
+    state->bufTree0[2].vlc = &state->vlc[2];
+
+    state->mv_h.vlc = &state->vlc[4];
+    state->mv_v.vlc = &state->vlc[4];
+
+    state->mcb_proc.vlc = &state->vlc[5];
+    state->mcb_type.vlc = &state->vlc[5];
+
+    plane_data = (BlockData *)buffer;
+    for (int i = 0; i < PLANE_COUNT; i++) {
+        HVQPlaneDesc *plane = &state->planes[i];
+        ptrdiff_t stride = plane->h_blocks_safe;
+        BlockData *ptr;
+
+        plane->border = plane_data;
+        // skip top border (stride) and left border (1)
+        plane->payload = plane_data + stride + 1;
+        plane_data += plane->h_blocks_safe * plane->v_blocks_safe;
+
+        // set horizontal borders
+        ptr = plane->border;
+        for (int i = plane->h_blocks_safe; i > 0; --i) {
+            set_border(ptr);
+            ++ptr;
+        }
+
+        ptr = plane_data;
+        for (int i = plane->h_blocks_safe; i > 0; --i) {
+            --ptr;
+            set_border(ptr);
+        }
+
+        // set vertical borders
+        ptr = plane->border + stride;
+        for (int i = plane->v_blocks_safe - 2; i > 0; --i) {
+            set_border(ptr);
+            ptr += stride;
+        }
+
+        ptr = plane->border + stride * 2 - 1;
+        for (int i = plane->v_blocks_safe - 2; i > 0; --i) {
+            set_border(ptr);
+            ptr += stride;
+        }
+    }
+}
+
+static uint32_t hvqm4_buffsize(SeqObj *seqobj)
+{
+    uint32_t h_blocks = seqobj->width / 4;
+    uint32_t v_blocks = seqobj->height / 4;
+    uint32_t y_blocks = (h_blocks + 2) * (v_blocks + 2);
+
+    uint32_t uv_h_blocks = seqobj->h_samp == 2 ? h_blocks / 2 : h_blocks;
+    uint32_t uv_v_blocks = seqobj->v_samp == 2 ? v_blocks / 2 : v_blocks;
+    uint32_t uv_blocks = (uv_h_blocks + 2) * (uv_v_blocks + 2);
+
+    uint32_t total = (y_blocks + uv_blocks * 2) * sizeof(uint16_t);
+
+    return total;
+}
+
+static av_cold int hvqm4_init(AVCodecContext *avctx)
+{
+    int width = avctx->width, height = avctx->height;
+    static AVOnce init_static_once = AV_ONCE_INIT;
+    HVQM4Context *s = avctx->priv_data;
+
+    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+    ff_set_dimensions(avctx, avctx->width + 16, avctx->height + 16);
+    avctx->width = width;
+    avctx->height = height;
+
+    for (int i = 0; i < 3; i++) {
+        s->frame[i] = av_frame_alloc();
+        if (!s->frame[i])
+            return AVERROR(ENOMEM);
+    }
+
+    ff_thread_once(&init_static_once, hvqm4_init_static);
+
+    s->seqobj.width = avctx->width;
+    s->seqobj.height = avctx->height;
+    s->seqobj.h_samp = 2;
+    s->seqobj.v_samp = 2;
+
+    s->buffer = av_calloc(hvqm4_buffsize(&s->seqobj), 1);
+    if (!s->buffer)
+        return AVERROR(ENOMEM);
+
+    set_buffer(&s->seqobj, &s->state, s->buffer);
+
+    return 0;
+}
+
+static int read_trees(int index,
+                      int length,
+                      uint16_t code,
+                      uint8_t *bits,
+                      uint16_t *codes,
+                      uint16_t *symbols,
+                      GetBitContext *gb,
+                      const uint32_t tree_signed,
+                      const uint32_t tree_scale)
+{
+    if (get_bits1(gb) == 0) { // leaf node
+        uint8_t byte = get_bits(gb, 8);
+        int16_t symbol = byte;
+
+        if (tree_signed && byte > 0x7F)
+            symbol = (int8_t)byte;
+
+        symbol *= 1 << tree_scale;
+        bits[index] = length;
+        codes[index] = code;
+        symbols[index] = symbol;
+        index++;
+        return index;
+    } else { // recurse
+        index = read_trees(index, length + 1,  code << 1, bits, codes, symbols, gb, tree_signed, tree_scale);
+        index = read_trees(index, length + 1, (code << 1) + 1, bits, codes, symbols, gb, tree_signed, tree_scale);
+        return index;
+    }
+}
+
+static int build_huff(GBCWithVLC *buf, uint32_t is_signed, uint32_t scale)
+{
+    const uint32_t tree_signed = is_signed;
+    const uint32_t tree_scale = scale;
+    uint8_t bits[256] = { 0 };
+    uint16_t codes[256] = { 0 };
+    uint16_t symbols[256] = { 0 };
+    VLC *vlc = buf->vlc;
+    int nb_codes;
+
+    ff_free_vlc(vlc);
+    nb_codes = read_trees(0, 0, 0, bits, codes, symbols, &buf->gb, tree_signed, tree_scale);
+
+    return ff_init_vlc_sparse(vlc, ff_log2(nb_codes) + 3, nb_codes, bits, 1, 1,
+                              codes, 2, 2, symbols, 2, 2, 0);
+}
+
+static int get_code(GetBitContext *new_gb, GetBitContext *gb, int skip)
+{
+    GetBitContext tmp_gb = *gb;
+    uint32_t new_size, offset = get_bits_long(gb, 32);
+    int ret;
+
+    if (offset >= INT32_MAX - skip - 4)
+        return AVERROR_INVALIDDATA;
+
+    offset += skip;
+
+    if ((gb->size_in_bits >> 3) <= offset + 4)
+        return AVERROR_INVALIDDATA;
+
+    ret = init_get_bits8(&tmp_gb, gb->buffer + offset, (gb->size_in_bits >> 3) - offset);
+    if (ret < 0)
+        return ret;
+
+    new_size = get_bits_long(&tmp_gb, 32);
+
+    if (new_size >= INT32_MAX - 4)
+        return AVERROR_INVALIDDATA;
+
+    if ((tmp_gb.size_in_bits >> 3) < new_size + 4)
+        return AVERROR_INVALIDDATA;
+
+    ret = init_get_bits8(new_gb, tmp_gb.buffer + 4, new_size);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static uint32_t decode_huff(GBCWithVLC *buf)
+{
+    return get_vlc2(&buf->gb, buf->vlc->table, buf->vlc->bits, 3);
+}
+
+static void iframe_basis_numdec(VideoState *state)
+{
+    BlockData *luma_dst = state->planes[LUMA_IDX].payload;
+    const uint32_t luma_h_blocks = state->planes[LUMA_IDX].h_blocks;
+    const uint32_t luma_v_blocks = state->planes[LUMA_IDX].v_blocks;
+    const uint32_t chroma_h_blocks = state->planes[CHROMA_IDX].h_blocks;
+    const uint32_t chroma_v_blocks = state->planes[CHROMA_IDX].v_blocks;
+    BlockData *u_dst = state->planes[1].payload;
+    BlockData *v_dst = state->planes[2].payload;
+    uint32_t rle = 0;
+
+    for (int y = 0; y < luma_v_blocks; y++) {
+        for (int x = 0; x < luma_h_blocks; x++) {
+            if (rle) {
+                luma_dst->type = 0;
+                rle--;
+            } else {
+                int16_t num = decode_huff(&state->basis_num[LUMA_IDX]) & 0xFFFF;
+                if (num == 0)
+                    rle = decode_huff(&state->basis_num_run[LUMA_IDX]);
+                luma_dst->type = num & 0xFF;
+            }
+            luma_dst++;
+        }
+        // skip borders
+        luma_dst += 2;
+    }
+
+    rle = 0;
+    for (int y = 0; y < chroma_v_blocks; y++) {
+        for (int x = 0; x < chroma_h_blocks; x++) {
+            if (rle) {
+                u_dst->type = 0;
+                v_dst->type = 0;
+                --rle;
+            } else {
+                int16_t num = decode_huff(&state->basis_num[CHROMA_IDX]) & 0xFFFF;
+                if (num == 0)
+                    rle = decode_huff(&state->basis_num_run[CHROMA_IDX]);
+                u_dst->type = (num >> 0) & 0xF;
+                v_dst->type = (num >> 4) & 0xF;
+            }
+            ++u_dst;
+            ++v_dst;
+        }
+        u_dst += 2;
+        v_dst += 2;
+    }
+}
+
+static int32_t decode_sovf_sym(GBCWithVLC *buf, int32_t min, int32_t max)
+{
+    int32_t sum = 0, value;
+
+    do {
+        value = decode_huff(buf);
+        sum += value;
+    } while (value <= min || value >= max);
+
+    return sum;
+}
+
+static int32_t decode_uovf_sym(GBCWithVLC *buf, int32_t max)
+{
+    int32_t sum = 0, value;
+
+    do {
+        value = decode_huff(buf);
+        sum += value;
+    } while (value >= max);
+
+    return sum;
+}
+
+static uint32_t get_delta_dc(VideoState *state, uint32_t plane_idx, uint32_t *rle)
+{
+    if (*rle == 0) {
+        uint32_t delta = decode_sovf_sym(&state->dc_values[plane_idx], state->dc_min, state->dc_max);
+
+        if (delta == 0) // successive zeroes are run-length encoded
+            *rle = decode_huff(&state->dc_rle[plane_idx]);
+        return delta;
+    } else {
+        --*rle;
+        return 0;
+    }
+}
+
+static void iframe_dc_decode(VideoState *state)
+{
+    for (int plane_idx = 0; plane_idx < PLANE_COUNT; ++plane_idx) {
+        HVQPlaneDesc *plane = &state->planes[plane_idx];
+        uint32_t rle = 0;
+        const uint32_t v_blocks = plane->v_blocks;
+        BlockData *curr = plane->payload;
+        for (uint32_t y = 0; y < v_blocks; y++) {
+            // pointer to previous line
+            BlockData const *prev = curr - plane->h_blocks_safe;
+            // first prediction on a line is only the previous line's value
+            uint8_t value = prev->value;
+            for (uint32_t x = 0; x < plane->h_blocks; x++) {
+                value += get_delta_dc(state, plane_idx, &rle);
+                curr->value = value;
+                ++curr;
+                ++prev;
+                // next prediction on this line is the mean of left (current) and top values
+                // +---+---+
+                // |   | T |
+                // +---+---+
+                // | L | P |
+                // +---+---+
+                value = (value + prev->value + 1) / 2;
+            }
+            // skip right border of this line and left border of next line
+            curr += 2;
+        }
+    }
+}
+
+static void make_nest(VideoState *state, uint16_t nest_x, uint16_t nest_y)
+{
+    int32_t v_empty, h_empty, v_nest_blocks, h_nest_blocks, v_mirror, h_mirror;
+    HVQPlaneDesc *y_plane = &state->planes[0];
+    BlockData const *ptr = y_plane->payload + y_plane->h_blocks_safe * nest_y + nest_x;
+    uint8_t const *nest2;
+    uint8_t *nest;
+
+    if (y_plane->h_blocks < state->h_nest_size) {
+        // special case if the video is less than 280 pixels wide (assuming landscape mode)
+        h_nest_blocks = y_plane->h_blocks;
+        h_mirror = state->h_nest_size - y_plane->h_blocks;
+        if (h_mirror > y_plane->h_blocks)
+            h_mirror = y_plane->h_blocks;
+        h_empty = state->h_nest_size - (h_nest_blocks + h_mirror);
+    } else {
+        h_nest_blocks = state->h_nest_size;
+        h_empty = 0;
+        h_mirror = 0;
+    }
+
+    if (y_plane->v_blocks < state->v_nest_size) {
+        // special case if the video is less than 152 pixels high
+        v_nest_blocks = y_plane->v_blocks;
+        v_mirror = state->v_nest_size - y_plane->v_blocks;
+        if (v_mirror > y_plane->v_blocks)
+            v_mirror = y_plane->v_blocks;
+        v_empty = state->v_nest_size - (v_nest_blocks + v_mirror);
+    } else {
+        v_nest_blocks = state->v_nest_size;
+        v_empty = 0;
+        v_mirror = 0;
+    }
+
+    nest = state->nest_data;
+    for (int i = 0; i < v_nest_blocks; i++) {
+        BlockData const *p = ptr;
+        for (int j = 0; j < h_nest_blocks; j++) {
+            *nest++ = (p->value >> 4) & 0xF;
+            ++p;
+        }
+        // if the video is too small, mirror it
+        for (int j = 0; j < h_mirror; j++) {
+            --p;
+            *nest++ = (p->value >> 4) & 0xF;
+        }
+        // if it is still too small, null out the rest
+        for (int j = 0; j < h_empty; j++)
+            *nest++ = 0;
+        ptr += y_plane->h_blocks_safe;
+    }
+
+    // handle vertical mirroring
+    nest2 = nest - state->h_nest_size;
+    for (int i = 0; i < v_mirror; i++) {
+        for (int j = 0; j < state->h_nest_size; j++)
+            *nest++ = nest2[j];
+        nest2 -= state->h_nest_size;
+    }
+
+    // and vertical nulling
+    for (int i = 0; i < v_empty; i++)
+        for (int j = 0; j < state->h_nest_size; j++)
+            *nest++ = 0;
+}
+
+static uint8_t sat_mean8(uint32_t u)
+{
+    return av_clip_uint8((u + 4) / 8);
+}
+
+static void weight_im_block(uint8_t *dst, ptrdiff_t stride, uint8_t value,
+                            uint8_t top, uint8_t bottom, uint8_t left, uint8_t right)
+{
+    /*
+    +---+---+---+
+    |   | T |   |
+    +---+---+---+
+    | L | D | R |
+    +---+---+---+
+    |   | B |   |
+    +---+---+---+
+     */
+    int32_t tmb = top - bottom;
+    int32_t lmr = left - right;
+    int32_t vph = tmb + lmr;
+    int32_t vmh = tmb - lmr;
+
+    int32_t v2 = value * 2;
+    int32_t v8 = value * 8;
+
+    int32_t tpl = (top + left) - v2;
+    int32_t tpr = (top + right) - v2;
+    int32_t bpr = (bottom + right) - v2;
+    int32_t bpl = (bottom + left) - v2;
+
+    int32_t tml = top - left;
+    int32_t tmr = top - right;
+    int32_t bmr = bottom - right;
+    int32_t bml = bottom - left;
+
+    // V:
+    // 6  8  8 6
+    // 8 10 10 8
+    // 8 10 10 8
+    // 6  8  8 6
+    //
+    // T:
+    //  2  2  2  2
+    //  0  0  0  0
+    // -1 -1 -1 -1
+    // -1 -1 -1 -1
+    //
+    // B/L/R: like T but rotated accordingly
+
+    // (6*V + 2*T - B + 2*L -   R + 4) / 8
+    // (8*V + 2*T - B       -   R + 4) / 8
+    // (8*V + 2*T - B -   L       + 4) / 8
+    // (6*V + 2*T - B -   L + 2*R + 4) / 8
+
+    dst[0] = sat_mean8(v8 + vph + tpl);
+    dst[1] = sat_mean8(v8 + vph + tml);
+    dst[2] = sat_mean8(v8 + vmh + tmr);
+    dst[3] = sat_mean8(v8 + vmh + tpr);
+
+    dst += stride;
+
+    // ( 8*V - B + 2*L -   R + 4) / 8
+    // (10*V - B       -   R + 4) / 8
+    // (10*V - B -   L       + 4) / 8
+    // ( 8*V - B -   L + 2*R + 4) / 8
+
+    dst[0] = sat_mean8(v8 + vph - tml);
+    dst[1] = sat_mean8(v8 - bpr);
+    dst[2] = sat_mean8(v8 - bpl);
+    dst[3] = sat_mean8(v8 + vmh - tmr);
+
+    dst += stride;
+
+    // ( 8*V - T + 2*L - R + 4) / 8
+    // (10*V - T       - R + 4) / 8
+    // (10*V - T - L
+
+    dst[0] = sat_mean8(v8 - vmh - bml);
+    dst[1] = sat_mean8(v8 - tpr);
+    dst[2] = sat_mean8(v8 - tpl);
+    dst[3] = sat_mean8(v8 - vph - bmr);
+
+    dst += stride;
+
+    dst[0] = sat_mean8(v8 - vmh + bpl);
+    dst[1] = sat_mean8(v8 - vmh + bml);
+    dst[2] = sat_mean8(v8 - vph + bmr);
+    dst[3] = sat_mean8(v8 - vph + bpr);
+}
+
+static void dc_block(uint8_t *dst, ptrdiff_t stride, uint8_t value)
+{
+    for (int y = 0; y < 4; y++)
+        for (int x = 0; x < 4; x++)
+            dst[y * stride + x] = value;
+}
+
+static uint32_t get_aot_basis(VideoState *state, uint8_t basis_out[4][4],
+                              int32_t *sum, uint8_t const *nest_data,
+                              uint32_t nest_stride, uint32_t plane_idx)
+{
+    GetBitContext *gb = &state->fixvl[plane_idx];
+    uint16_t bits = get_bits(gb, 16);
+    uint32_t x_stride, y_stride;
+    uint32_t offset70 = bits & 0x3F;
+    uint32_t offset38 = (bits >> 6) & 0x1F;
+    uint32_t stride70 = (bits >> 11) & 1;
+    uint32_t stride38 = (bits >> 12) & 1;
+    int32_t inverse, offset;
+    uint8_t min, max;
+
+    if (state->is_landscape) {
+        nest_data += nest_stride * offset38 + offset70;
+        x_stride = 1 << stride70;
+        y_stride = nest_stride << stride38;
+    } else {
+        nest_data += nest_stride * offset70 + offset38;
+        x_stride = 1 << stride38;
+        y_stride = nest_stride << stride70;
+    }
+
+    // copy basis vector from the nest
+    min = nest_data[0];
+    max = nest_data[0];
+    for (int y = 0; y < 4; y++) {
+        for (int x = 0; x < 4; x++) {
+            uint8_t nest_value = nest_data[y * y_stride + x * x_stride];
+            basis_out[y][x] = nest_value;
+            min = nest_value < min ? nest_value : min;
+            max = nest_value > max ? nest_value : max;
+        }
+    }
+    *sum += decode_huff(&state->bufTree0[plane_idx]);
+    inverse = div_tab[max - min];
+    if (bits & 0x8000)
+        inverse = -inverse;
+    offset = (bits >> 13) & 3;
+    return (*sum + offset) * inverse;
+}
+
+static int32_t get_aot_sum(VideoState *state, int32_t result[4][4],
+                           uint8_t num_bases, uint8_t const *nest_data,
+                           uint32_t nest_stride, uint32_t plane_idx)
+{
+    int32_t temp, sum, mean;
+    uint8_t basis[4][4];
+
+    for (int y = 0; y < 4; y++)
+        for (int x = 0; x < 4; x++)
+            result[y][x] = 0;
+    temp = 0;
+
+    for (int k = 0; k < num_bases; k++) {
+        uint32_t factor = get_aot_basis(state, basis, &temp, nest_data, nest_stride, plane_idx);
+        for (int y = 0; y < 4; y++)
+            for (int x = 0; x < 4; x++)
+                result[y][x] += factor * basis[y][x];
+    }
+
+    sum = 0;
+    for (int y = 0; y < 4; y++)
+        for (int x = 0; x < 4; x++)
+            sum += result[y][x];
+    mean = sum >> 4;
+    return mean;
+}
+
+static void read_block(VideoState *state, uint8_t *dst, ptrdiff_t dst_stride, uint32_t plane_idx)
+{
+    GetBitContext *gb = &state->fixvl[plane_idx];
+
+    for (int y = 0; y < 4; y++)
+        for (int x = 0; x < 4; x++)
+            dst[y * dst_stride + x] = get_bits(gb, 8);
+}
+
+static void intra_aot_block(VideoState *state, uint8_t *dst, ptrdiff_t stride,
+                            uint8_t target_average, uint8_t block_type, uint32_t plane_idx)
+{
+    int32_t result[4][4], aot_average, delta;
+
+    if (block_type == 6) {
+        read_block(state, dst, stride, plane_idx);
+        return;
+    }
+
+    // block types 1..5 serve as number of bases to use, 9..15 are unused
+    aot_average = get_aot_sum(state, result, block_type, state->nest_data, state->h_nest_size, plane_idx);
+    delta = (target_average << state->unk_shift) - aot_average;
+    for (int y = 0; y < 4; y++) {
+        for (int x = 0; x < 4; x++) {
+            int32_t value = ((result[y][x] + delta) >> state->unk_shift);
+            dst[y * stride + x] = av_clip_uint8(value);
+        }
+    }
+}
+
+static void decode_iframe_block(VideoState *state, uint8_t *dst, ptrdiff_t stride, StackState *stack_state)
+{
+    if (stack_state->curr.type == 0) {
+        uint8_t top = stack_state->line_prev->type & 0x77 ? stack_state->curr.value : stack_state->line_prev->value;
+        uint8_t bottom = stack_state->line_next->type & 0x77 ? stack_state->curr.value : stack_state->line_next->value;
+        uint8_t right = stack_state->next.type & 0x77 ? stack_state->curr.value : stack_state->next.value;
+        // the left value is tracked manually, the logic is equivalent with the other surrounding values
+        uint8_t left = stack_state->value_prev;
+
+        weight_im_block(dst, stride, stack_state->curr.value, top, bottom, left, right);
+        stack_state->value_prev = stack_state->curr.value;
+    } else if (stack_state->curr.type == 8) {
+        dc_block(dst, stride, stack_state->curr.value);
+        stack_state->value_prev = stack_state->curr.value;
+    } else {
+        intra_aot_block(state, dst, stride, stack_state->curr.value, stack_state->curr.type, stack_state->plane_idx);
+        // don't use the current DC value to predict the next one
+        stack_state->value_prev = stack_state->next.value;
+    }
+    // next block
+    stack_state->line_prev++;
+    stack_state->line_next++;
+}
+
+static void iframe_line(VideoState *state, uint8_t *dst, ptrdiff_t stride, StackState *stack_state, uint16_t h_blocks)
+{
+    stack_state->next = stack_state->line_curr[0];
+    stack_state->value_prev = stack_state->line_curr[0].value;
+
+    while (--h_blocks > 0) {
+        stack_state->curr = stack_state->next;
+        ++stack_state->line_curr;
+        stack_state->next = stack_state->line_curr[0];
+        decode_iframe_block(state, dst, stride, stack_state);
+        // next block on same line
+        dst += 4;
+    }
+
+    stack_state->curr = stack_state->next;
+    decode_iframe_block(state, dst, stride, stack_state);
+
+    // skip current, right border on same line, and left border on next line
+    stack_state->line_curr += 3;
+
+    // these have already been advanced to the right border in decode_iframe_block
+    stack_state->line_prev += 2;
+    stack_state->line_next += 2;
+}
+
+static void decode_iframe_plane(VideoState *state, int plane_idx, uint8_t *dst, ptrdiff_t linesize)
+{
+    HVQPlaneDesc *plane = &state->planes[plane_idx];
+    StackState stack_state;
+    int16_t v_blocks;
+
+    stack_state.plane_idx = plane_idx;
+    stack_state.line_prev = plane->payload;
+    stack_state.line_curr = plane->payload;
+    stack_state.line_next = plane->payload + plane->h_blocks_safe;
+    v_blocks = plane->v_blocks;
+
+    // first line
+    if (v_blocks > 0) {
+        iframe_line(state, dst, linesize, &stack_state, plane->h_blocks);
+        // blocks are 4x4 so advance dst by 4 lines
+        dst += linesize * 4;
+        v_blocks--;
+    }
+    // middle lines
+    stack_state.line_prev = plane->payload;
+    while (v_blocks > 1) {
+        iframe_line(state, dst, linesize, &stack_state, plane->h_blocks);
+        dst += linesize * 4;
+        v_blocks--;
+    }
+    // last line
+    if (v_blocks > 0) {
+        stack_state.line_next = stack_state.line_curr;
+        iframe_line(state, dst, linesize, &stack_state, plane->h_blocks);
+    }
+}
+
+static int decode_iframe(SeqObj *seqobj, GetBitContext *gb, AVFrame *frame)
+{
+    VideoState *state = seqobj->state;
+    uint8_t dc_shift = get_bits(gb, 8);
+    uint16_t nest_x, nest_y;
+    int ret;
+
+    state->unk_shift = get_bits(gb, 8);
+    skip_bits(gb, 16);
+    nest_x = get_bits(gb, 16);
+    nest_y = get_bits(gb, 16);
+
+    for (int i = 0; i < LUMA_CHROMA; i++) {
+        ret = get_code(&state->basis_num[i].gb, gb, 78);
+        if (ret < 0)
+            return ret;
+        ret = get_code(&state->basis_num_run[i].gb, gb, 78);
+        if (ret < 0)
+            return ret;
+    }
+
+    for (int i = 0; i < PLANE_COUNT; i++) {
+        ret = get_code(&state->dc_values[i].gb, gb, 78);
+        if (ret < 0)
+            return ret;
+        ret = get_code(&state->bufTree0[i].gb, gb, 78);
+        if (ret < 0)
+            return ret;
+        ret = get_code(&state->fixvl[i], gb, 78);
+        if (ret < 0)
+            return ret;
+    }
+
+    for (int i = 0; i < PLANE_COUNT; i++) {
+        ret = get_code(&state->dc_rle[i].gb, gb, 78);
+        if (ret < 0)
+            return ret;
+    }
+
+    ret = build_huff(&state->basis_num[0], 0, 0);
+    if (ret < 0)
+        return ret;
+    ret = build_huff(&state->basis_num_run[0], 0, 0);
+    if (ret < 0)
+        return ret;
+    ret = build_huff(&state->dc_values[0], 1, dc_shift);
+    if (ret < 0)
+        return ret;
+    ret = build_huff(&state->bufTree0[0], 0, 2);
+    if (ret < 0)
+        return ret;
+
+    state->dc_max =   0x7F << dc_shift;
+    state->dc_min = -(0x80 << dc_shift);
+
+    // 4x4 block types
+    iframe_basis_numdec(state);
+    // 4x4 block DC values
+    iframe_dc_decode(state);
+    // 70x38 nest copied from upper 4 bits of DC values somewhere in the luma plane
+    make_nest(state, nest_x, nest_y);
+
+    for (int i = 0; i < PLANE_COUNT; i++)
+        decode_iframe_plane(state, i, frame->data[i], frame->linesize[i]);
+
+    return 0;
+}
+
+static void init_mc_handler(VideoState *state,
+                            MCPlane mcplanes[PLANE_COUNT],
+                            AVFrame *present, AVFrame *past, AVFrame *future)
+{
+    for (int i = 0; i < PLANE_COUNT; i++) {
+        MCPlane *mcplane = &mcplanes[i];
+        HVQPlaneDesc *plane = &state->planes[i];
+
+        mcplane->rle = 0;
+        mcplane->pb_dc = 0x7F;
+        mcplane->present = present->data[i];
+        mcplane->present_stride = present->linesize[i];
+        mcplane->past    = past->data[i];
+        mcplane->past_stride = past->linesize[i];
+        mcplane->future  = future->data[i];
+        mcplane->future_stride = future->linesize[i];
+        mcplane->payload_cur_blk = plane->payload;
+        mcplane->payload_cur_row = plane->payload;
+        mcplane->h_mcb_stride = 8 >> plane->width_shift;
+        mcplane->v_mcb_stride = present->linesize[i] * (8 >> plane->height_shift);
+        mcplane->pb_per_mcb_x = plane->pb_per_mcb_x;
+        mcplane->stride = plane->h_blocks_safe * plane->pb_per_mcb_y;
+    }
+}
+
+static void initMCBproc(GBCWithVLC *buf, struct RLDecoder *proc)
+{
+    if (buf->gb.buffer) {
+        proc->value = get_bits1(&buf->gb);
+        proc->count = decode_uovf_sym(buf, 0xFF);
+    }
+}
+
+static void initMCBtype(GBCWithVLC *buf, struct RLDecoder *type)
+{
+    if (buf->gb.buffer) {
+        uint32_t value = get_bits1(&buf->gb) << 1;
+
+        type->value = value | get_bits1(&buf->gb);
+        type->count = decode_uovf_sym(buf, 0xFF);
+    }
+}
+
+static void setMCTop(MCPlane mcplanes[PLANE_COUNT])
+{
+    for (int i = 0; i < PLANE_COUNT; i++) {
+        mcplanes[i].top = mcplanes[i].present;
+        mcplanes[i].top_stride = mcplanes[i].present_stride;
+    }
+}
+
+static const uint8_t mcbtypetrans[2][3] = {
+    { 1, 2, 0 },
+    { 2, 0, 1 },
+};
+
+static uint32_t getMCBtype(GBCWithVLC *buftree, struct RLDecoder *type)
+{
+    if (type->count == 0) {
+        // only three possible values, so when the value changes,
+        // a single bit decides which other value to select
+        // bit == 0 -> increment
+        // bit == 1 -> decrement
+        // then wrap to range 0..2
+        uint32_t bit = get_bits1(&buftree->gb);
+
+        type->value = mcbtypetrans[bit][type->value];
+        type->count = decode_uovf_sym(buftree, 0xFF);
+    }
+
+    type->count--;
+
+    return type->value;
+}
+
+static void decode_PB_dc(VideoState *state, MCPlane mcplanes[PLANE_COUNT])
+{
+    for (int i = 0; i < PLANE_COUNT; i++) {
+        HVQPlaneDesc *plane = &state->planes[i];
+        MCPlane *mcplane = &mcplanes[i];
+
+        for (int j = 0; j < plane->blocks_per_mcb; j++) {
+            BlockData *payload;
+
+            mcplane->pb_dc += decode_sovf_sym(&state->dc_values[i], state->dc_min, state->dc_max);
+            payload = mcplane->payload_cur_blk;
+            payload[plane->mcb_offset[j]].value = mcplane->pb_dc;
+        }
+    }
+}
+
+static void decode_PB_cc(VideoState *state, MCPlane mcplanes[PLANE_COUNT], uint32_t proc, uint32_t type)
+{
+    uint32_t block_type = (type << 5) | (proc << 4);
+    if (proc == 1) {
+        for (int i = 0; i < PLANE_COUNT; i++) {
+            BlockData *payload = mcplanes[i].payload_cur_blk;
+            HVQPlaneDesc *plane = &state->planes[i];
+            for (int j = 0; j < plane->blocks_per_mcb; j++)
+                payload[plane->mcb_offset[j]].type = block_type;
+        }
+        return;
+    } else {
+        HVQPlaneDesc *planeY = &state->planes[0];
+        HVQPlaneDesc *planeU = &state->planes[1];
+        MCPlane *mcplaneY = &mcplanes[0];
+        MCPlane *mcplaneU = &mcplanes[1];
+        MCPlane *mcplaneV = &mcplanes[2];
+        for (int i = 0; i < planeY->blocks_per_mcb; i++) {
+            BlockData *ptr = mcplaneY->payload_cur_blk;
+            if (mcplaneY->rle) {
+                ptr[planeY->mcb_offset[i]].type = block_type;
+                --mcplaneY->rle;
+            }
+            else
+            {
+                int16_t huff = decode_huff(&state->basis_num[LUMA_IDX]);
+
+                if (huff)
+                    ptr[planeY->mcb_offset[i]].type = block_type | huff;
+                else
+                {
+                    ptr[planeY->mcb_offset[i]].type = block_type;
+                    mcplaneY->rle = decode_huff(&state->basis_num_run[0]);
+                }
+            }
+        }
+        // chroma
+        for (int i = 0; i < planeU->blocks_per_mcb; i++)
+        {
+            BlockData *ptrU = mcplaneU->payload_cur_blk;
+            BlockData *ptrV = mcplaneV->payload_cur_blk;
+            if (mcplaneU->rle)
+            {
+                ptrU[planeU->mcb_offset[i]].type = block_type;
+                ptrV[planeU->mcb_offset[i]].type = block_type;
+                --mcplaneU->rle;
+            }
+            else
+            {
+                int16_t huff = decode_huff(&state->basis_num[CHROMA_IDX]);
+                if (huff)
+                {
+                    ptrU[planeU->mcb_offset[i]].type = block_type | ((huff >> 0) & 0xF);
+                    ptrV[planeU->mcb_offset[i]].type = block_type | ((huff >> 4) & 0xF);
+                }
+                else
+                {
+                    ptrU[planeU->mcb_offset[i]].type = block_type;
+                    ptrV[planeU->mcb_offset[i]].type = block_type;
+                    mcplaneU->rle = decode_huff(&state->basis_num_run[1]);
+                }
+            }
+        }
+    }
+}
+
+static void reset_PB_dc(MCPlane mcplanes[PLANE_COUNT])
+{
+    for (int i = 0; i < PLANE_COUNT; i++)
+        mcplanes[i].pb_dc = 0x7F;
+}
+
+static uint32_t getMCBproc(GBCWithVLC *buf, struct RLDecoder *proc)
+{
+    if (proc->count == 0) {
+        proc->value ^= 1;
+        proc->count = decode_uovf_sym(buf, 0xFF);
+    }
+
+    proc->count--;
+
+    return proc->value;
+}
+
+static void setMCNextBlk(MCPlane mcplanes[PLANE_COUNT])
+{
+    for (int i = 0; i < PLANE_COUNT; i++) {
+        mcplanes[i].top += mcplanes[i].h_mcb_stride;
+        mcplanes[i].payload_cur_blk += mcplanes[i].pb_per_mcb_x;
+    }
+}
+
+static void setMCDownBlk(MCPlane mcplanes[PLANE_COUNT])
+{
+    for (int i = 0; i < PLANE_COUNT; i++) {
+        MCPlane *mcplane = &mcplanes[i];
+        BlockData *first_block_on_next_row = mcplane->payload_cur_row + mcplane->stride;
+
+        mcplane->present += mcplane->v_mcb_stride;
+        mcplane->payload_cur_blk = first_block_on_next_row;
+        mcplane->payload_cur_row = first_block_on_next_row;
+    }
+}
+
+static void spread_PB_descMap(SeqObj *seqobj, MCPlane mcplanes[PLANE_COUNT])
+{
+    struct RLDecoder proc, type;
+    VideoState *state = seqobj->state;
+    initMCBproc(&state->mcb_proc, &proc);
+    initMCBtype(&state->mcb_type, &type);
+
+    for (int i = 0; i < seqobj->height; i += 8) {
+        setMCTop(mcplanes);
+        for (int j = 0; j < seqobj->width; j += 8) {
+            getMCBtype(&state->mcb_type, &type);
+            if (type.value == 0) {
+                decode_PB_dc(state, mcplanes);
+                decode_PB_cc(state, mcplanes, 0, type.value);
+            } else {
+                reset_PB_dc(mcplanes);
+                decode_PB_cc(state, mcplanes, getMCBproc(&state->mcb_proc, &proc), type.value);
+            }
+            setMCNextBlk(mcplanes);
+                // for all planes
+                //     top             += h_mcb_stride
+                //     payload_cur_blk += pb_per_mcb_x
+        }
+        setMCDownBlk(mcplanes);
+            // for all planes
+            //     present += v_mcb_stride
+            //     payload_cur_row += stride;
+            //     payload_cur_blk = payload_cur_row
+    }
+}
+
+static void resetMCHandler(VideoState *state, MCPlane mcplanes[PLANE_COUNT], AVFrame *present)
+{
+    for (int i = 0; i < PLANE_COUNT; i++) {
+        mcplanes[i].present = present->data[i];
+        mcplanes[i].payload_cur_blk = state->planes[i].payload;
+        mcplanes[i].payload_cur_row = state->planes[i].payload;
+    }
+}
+
+static void MCBlockDecDCNest(VideoState *state, MCPlane mcplanes[PLANE_COUNT])
+{
+    for (int plane_idx = 0; plane_idx < PLANE_COUNT; plane_idx++) {
+        BlockData const *ptr = mcplanes[plane_idx].payload_cur_blk;
+        HVQPlaneDesc *plane = &state->planes[plane_idx];
+        ptrdiff_t stride = mcplanes[plane_idx].top_stride;
+        int32_t line = plane->h_blocks_safe;
+
+        for (int j = 0; j < plane->blocks_per_mcb; j++) {
+            // dst is a 4x4 region
+            uint8_t *dst = mcplanes[plane_idx].top + plane->px_offset[j] + plane->py_offset[j] * mcplanes[plane_idx].top_stride;
+            int32_t block_idx = plane->mcb_offset[j];
+            uint32_t value = ptr[block_idx].value;
+            // block type:
+            // 0: weighted
+            // 6: literal block
+            // 8: single value
+            uint32_t type = ptr[block_idx].type & 0xF;
+            // see also IpicBlockDec
+            if (type == 0) {
+                uint8_t top    = ptr[block_idx - line].type & 0x77 ? value : ptr[block_idx - line].value;
+                uint8_t left   = ptr[block_idx -    1].type & 0x77 ? value : ptr[block_idx -    1].value;
+                uint8_t right  = ptr[block_idx +    1].type & 0x77 ? value : ptr[block_idx +    1].value;
+                uint8_t bottom = ptr[block_idx + line].type & 0x77 ? value : ptr[block_idx + line].value;
+                weight_im_block(dst, stride, value, top, bottom, left, right);
+            } else if (type == 8) {
+                dc_block(dst, stride, value);
+            } else {
+                intra_aot_block(state, dst, stride, value, type, plane_idx);
+            }
+        }
+    }
+}
+
+static void setMCTarget(MCPlane mcplanes[PLANE_COUNT], uint32_t reference_frame)
+{
+    if (reference_frame == 0) {
+        for (int i = 0; i < PLANE_COUNT; i++) {
+            mcplanes[i].target = mcplanes[i].past;
+            mcplanes[i].target_stride = mcplanes[i].past_stride;
+        }
+    } else {
+        for (int i = 0; i < PLANE_COUNT; i++) {
+            mcplanes[i].target = mcplanes[i].future;
+            mcplanes[i].target_stride = mcplanes[i].future_stride;
+        }
+    }
+}
+
+static void getMVector(int32_t *result, GBCWithVLC *buf, int32_t residual_bits)
+{
+    int32_t max_val_plus_1 = 1 << (residual_bits + 5);
+    // quantized value
+    int32_t value = decode_huff(buf) << residual_bits;
+    // residual bits
+    for (int i = residual_bits - 1; i >= 0; --i)
+        value += get_bits1(&buf->gb) << i;
+    *result += value;
+    // signed wrap to -max_val_plus_1 .. max_val_plus_1-1
+    if (*result >= max_val_plus_1)
+        *result -= max_val_plus_1 << 1;
+    else if (*result < -max_val_plus_1)
+        *result += max_val_plus_1 << 1;
+}
+
+static void _MotionComp_00(uint8_t *dst, ptrdiff_t dst_stride, uint8_t const *src, ptrdiff_t src_stride)
+{
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            dst[i * dst_stride + j] = src[i * src_stride + j];
+}
+
+// offset vertically by half a sample
+static void _MotionComp_01(uint8_t *dst, ptrdiff_t dst_stride, uint8_t const *src, ptrdiff_t src_stride)
+{
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            dst[i * dst_stride + j] = (
+                    src[(i + 0) * src_stride + j] +
+                    src[(i + 1) * src_stride + j] + 1) / 2;
+}
+
+// offset horizontally by half a sample
+static void _MotionComp_10(uint8_t *dst, ptrdiff_t dst_stride, uint8_t const *src, ptrdiff_t src_stride)
+{
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            dst[i * dst_stride + j] = (
+                    src[i * src_stride + j + 0] +
+                    src[i * src_stride + j + 1] + 1) / 2;
+}
+
+// offset by half a sample in both directions
+static void _MotionComp_11(uint8_t *dst, ptrdiff_t dst_stride, uint8_t const *src, ptrdiff_t src_stride)
+{
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            dst[i * dst_stride + j] = (
+                    src[(i + 0) * src_stride + j + 0] +
+                    src[(i + 0) * src_stride + j + 1] +
+                    src[(i + 1) * src_stride + j + 0] +
+                    src[(i + 1) * src_stride + j + 1] + 2) >> 2;
+}
+
+static void do_motion_comp(uint8_t *dst, ptrdiff_t dst_stride, uint8_t const *src, ptrdiff_t src_stride, uint32_t hpel_dx, uint32_t hpel_dy)
+{
+    if (hpel_dy == 0)
+        if (hpel_dx == 0)
+            _MotionComp_00(dst, dst_stride, src, src_stride);
+        else
+            _MotionComp_10(dst, dst_stride, src, src_stride);
+    else
+        if (hpel_dx == 0)
+            _MotionComp_01(dst, dst_stride, src, src_stride);
+        else
+            _MotionComp_11(dst, dst_stride, src, src_stride);
+}
+
+static uint32_t get_mc_aot_basis(VideoState *state, uint8_t basis_out[4][4],
+                                 int32_t *sum, uint8_t const *nest_data,
+                                 ptrdiff_t nest_stride, uint32_t plane_idx)
+{
+    // the only difference to GetAotBasis() seems to be the ">> 4 & 0xF"
+    GetBitContext *gb = &state->fixvl[plane_idx];
+    uint16_t bits = get_bits(gb, 16);
+    uint32_t step, stride;
+    uint32_t big = bits & 0x3F;
+    uint32_t small = (bits >> 6) & 0x1F;
+    int32_t inverse, foo;
+    uint8_t min, max;
+
+    if (state->is_landscape) {
+        nest_data += nest_stride * small + big;
+        step   =           1 << ((bits >> 11) & 1);
+        stride = nest_stride << ((bits >> 12) & 1);
+    } else {
+        nest_data += nest_stride * big + small;
+        step   =           1 << ((bits >> 12) & 1);
+        stride = nest_stride << ((bits >> 11) & 1);
+    }
+    min = max = (nest_data[0] >> 4) & 0xF; // !
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            uint8_t nest_value = (nest_data[i * stride + j * step] >> 4) & 0xF; // !
+
+            basis_out[i][j] = nest_value;
+            min = nest_value < min ? nest_value : min;
+            max = nest_value > max ? nest_value : max;
+        }
+    }
+    *sum += decode_huff(&state->bufTree0[plane_idx]);
+    inverse = div_tab[max - min];
+    if (bits & 0x8000)
+        inverse = -inverse;
+    foo = (bits >> 13) & 3;
+    return (*sum + foo) * inverse;
+}
+
+static int32_t get_mc_aot_sum(VideoState *state, int32_t result[4][4], uint8_t num_bases,
+                              uint8_t const *nest_data, ptrdiff_t nest_stride, uint32_t plane_idx)
+{
+    uint8_t byte_result[4][4];
+    int32_t sum, mean, temp = 0;
+
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            result[i][j] = 0;
+
+    for (int k = 0; k < num_bases; k++) {
+        uint32_t factor = get_mc_aot_basis(state, byte_result, &temp, nest_data, nest_stride, plane_idx);
+
+        for (int i = 0; i < 4; i++)
+            for (int j = 0; j < 4; j++)
+                result[i][j] += factor * byte_result[i][j];
+    }
+
+    sum = 0;
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            sum += result[i][j];
+    mean = sum >> 4;
+
+    return mean;
+}
+
+static void PrediAotBlock(VideoState *state, uint8_t *dst, uint8_t const *src, ptrdiff_t stride, uint8_t block_type,
+                          uint8_t *nest_data, uint32_t h_nest_size, uint32_t plane_idx, uint32_t hpel_dx, uint32_t hpel_dy)
+{
+    int32_t result[4][4], mean, diff[4][4], min, max;
+    uint32_t addend, factor;
+    uint32_t aot_sum = get_mc_aot_sum(state, result, block_type - 1, nest_data, h_nest_size, plane_idx);
+    uint8_t mdst[4][4];
+    uint32_t const dst_stride = 4;
+
+    do_motion_comp((uint8_t *)mdst, dst_stride, src, stride, hpel_dx, hpel_dy);
+    mean = 8;
+    for (int y = 0; y < 4; y++)
+        for (int x = 0; x < 4; x++)
+            mean += mdst[y][x];
+    mean /= 16;
+    min = max = mdst[0][0] - mean;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            int32_t value = diff[i][j] = mdst[i][j] - mean;
+
+            min = value < min ? value : min;
+            max = value > max ? value : max;
+        }
+    }
+    addend = (decode_sovf_sym(&state->dc_values[plane_idx], state->dc_min, state->dc_max) >> state->dc_shift << state->unk_shift) - aot_sum;
+    factor = (decode_sovf_sym(&state->dc_values[plane_idx], state->dc_min, state->dc_max) >> state->dc_shift);
+    factor *= mcdiv_tab[max - min];
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            result[i][j] += addend + diff[i][j] * factor;
+
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            uint32_t value = (result[i][j] >> state->unk_shift) + mdst[i][j];
+
+            dst[i * stride + j] = av_clip_uint8(value);
+        }
+    }
+}
+
+static void MCBlockDecMCNest(VideoState *state, MCPlane mcplanes[PLANE_COUNT], int32_t x, int32_t y)
+{
+    uint32_t hpel_dx = x & 1;
+    uint32_t hpel_dy = y & 1;
+    void *nest_data;
+
+    if (state->is_landscape)
+        nest_data = mcplanes[0].target + x/2 + (y/2 - 16)*mcplanes[0].target_stride - 32;
+    else
+        nest_data = mcplanes[0].target + x/2 + (y/2 - 32)*mcplanes[0].target_stride - 16;
+    for (int plane_idx = 0; plane_idx < PLANE_COUNT; plane_idx++) {
+        MCPlane *mcplane = &mcplanes[plane_idx];
+        HVQPlaneDesc *plane = &state->planes[plane_idx];
+        for (int i = 0; i < plane->blocks_per_mcb; i++) {
+            BlockData const *ptr = mcplane->payload_cur_blk;
+            uint8_t block_type = ptr[plane->mcb_offset[i]].type & 0xF;
+            uint8_t *dst = mcplane->top + plane->px_offset[i] + plane->py_offset[i] * mcplane->top_stride;
+            ptrdiff_t stride = mcplane->top_stride;
+
+            if (block_type == 6) {
+                read_block(state, dst, stride, plane_idx);
+            } else {
+                int32_t plane_dx = x >> plane->width_shift;
+                int32_t plane_dy = y >> plane->height_shift;
+                uint8_t const *src = mcplane->target + (plane_dy >> 1) * mcplane->target_stride + (plane_dx >> 1) +
+                                     plane->px_offset[i] + plane->py_offset[i] * mcplane->target_stride;
+
+                if (state->padding[0]) {
+                    hpel_dx = plane_dx & 1;
+                    hpel_dy = plane_dy & 1;
+                }
+
+                if (block_type == 0) {
+                    do_motion_comp(dst, stride, src, stride, hpel_dx, hpel_dy);
+                } else {
+                    uint32_t strideY = mcplanes[0].target_stride;
+                    PrediAotBlock(state, dst, src, stride, block_type, nest_data, strideY, plane_idx, hpel_dx, hpel_dy);
+                }
+            }
+        }
+    }
+}
+
+static void motion_comp(VideoState *state, MCPlane mcplanes[PLANE_COUNT], int32_t dx, int32_t dy)
+{
+    uint32_t hpel_dx = dx & 1;
+    uint32_t hpel_dy = dy & 1;
+
+    for (int i = 0; i < PLANE_COUNT; i++) {
+        MCPlane *mcplane = &mcplanes[i];
+        HVQPlaneDesc *plane = &state->planes[i];
+        int32_t plane_dx = dx >> plane->width_shift;
+        int32_t plane_dy = dy >> plane->height_shift;
+        uint8_t *ptr = mcplane->target + (plane_dy >> 1) * mcplane->target_stride + (plane_dx >> 1);
+
+        if (state->padding[0]) {
+            hpel_dx = plane_dx & 1;
+            hpel_dy = plane_dy & 1;
+        }
+
+        for (int j = 0; j < plane->blocks_per_mcb; j++) {
+            do_motion_comp(mcplane->top + plane->px_offset[j] + plane->py_offset[j] * mcplane->top_stride,
+                           mcplane->top_stride,
+                           ptr + plane->px_offset[j] + plane->py_offset[j] * mcplane->target_stride,
+                           mcplane->target_stride,
+                           hpel_dx, hpel_dy);
+        }
+    }
+}
+
+static void decode_bframe_plane(SeqObj *seqobj, AVFrame *present, AVFrame *past, AVFrame *future)
+{
+    MCPlane mcplanes[PLANE_COUNT];
+    VideoState *state = seqobj->state;
+    int32_t reference_frame = -1;
+    int32_t mv_h, mv_v;
+
+    init_mc_handler(state, mcplanes, present, past, future);
+    spread_PB_descMap(seqobj, mcplanes);
+    resetMCHandler(state, mcplanes, present);
+    for (int y = 0; y < seqobj->height; y += 8) { // MC blocks are 8x8 pixels
+        setMCTop(mcplanes);
+        for (int x = 0; x < seqobj->width; x += 8) {
+            uint8_t bits = mcplanes[0].payload_cur_blk->type;
+            // 0: intra
+            // 1: inter - past
+            // 2: inter - future
+            // see getMCBtype()
+            int8_t new_reference_frame = (bits >> 5) & 3;
+            if (new_reference_frame == 0)
+            {
+                // intra
+                MCBlockDecDCNest(state, mcplanes);
+            } else {
+                int mcb_proc;
+                uint32_t ref_x;
+                uint32_t ref_y;
+
+                new_reference_frame--;
+                // check if we need to update the reference frame pointers
+                if (new_reference_frame != reference_frame) {
+                    reference_frame = new_reference_frame;
+                    setMCTarget(mcplanes, reference_frame);
+                    mv_h = 0;
+                    mv_v = 0;
+                }
+
+                getMVector(&mv_h, &state->mv_h, state->mc_residual_bits_h[reference_frame]);
+                getMVector(&mv_v, &state->mv_v, state->mc_residual_bits_v[reference_frame]);
+
+                // compute half-pixel position of reference macroblock
+                ref_x = x * 2 + mv_h;
+                ref_y = y * 2 + mv_v;
+
+                // see getMCBproc()
+                mcb_proc = (bits >> 4) & 1;
+                if (mcb_proc == 0)
+                    MCBlockDecMCNest(state, mcplanes, ref_x, ref_y);
+                else
+                    motion_comp(state, mcplanes, ref_x, ref_y);
+            }
+            setMCNextBlk(mcplanes);
+        }
+        setMCDownBlk(mcplanes);
+    }
+}
+
+static int decode_bframe(SeqObj *seqobj, GetBitContext *gb, AVFrame *present, AVFrame *past, AVFrame *future)
+{
+    VideoState *state = seqobj->state;
+    int ret;
+
+    state->dc_shift = get_bits(gb, 8);
+    state->unk_shift = get_bits(gb, 8);
+    state->mc_residual_bits_h[0] = get_bits(gb, 8);
+    state->mc_residual_bits_v[0] = get_bits(gb, 8);
+    state->mc_residual_bits_h[1] = get_bits(gb, 8);
+    state->mc_residual_bits_v[1] = get_bits(gb, 8);
+    skip_bits_long(gb, 16);
+
+    for (int i = 0; i < 2; i++) {
+        ret = get_code(&state->basis_num[i].gb, gb, 82);
+        if (ret < 0)
+            return ret;
+        ret = get_code(&state->basis_num_run[i].gb, gb, 82);
+        if (ret < 0)
+            return ret;
+    }
+
+    for (int i = 0; i < PLANE_COUNT; i++) {
+        ret = get_code(&state->dc_values[i].gb, gb, 82);
+        if (ret < 0)
+            return ret;
+        ret = get_code(&state->bufTree0[i].gb, gb, 82);
+        if (ret < 0)
+            return ret;
+        ret = get_code(&state->fixvl[i], gb, 82);
+        if (ret < 0)
+            return ret;
+    }
+
+    ret = get_code(&state->mv_h.gb, gb, 82);
+    if (ret < 0)
+        return ret;
+    ret = get_code(&state->mv_v.gb, gb, 82);
+    if (ret < 0)
+        return ret;
+    ret = get_code(&state->mcb_type.gb, gb, 82);
+    if (ret < 0)
+        return ret;
+    ret = get_code(&state->mcb_proc.gb, gb, 82);
+    if (ret < 0)
+        return ret;
+
+    ret = build_huff(&state->basis_num[0], 0, 0);
+    if (ret < 0)
+        return ret;
+    ret = build_huff(&state->basis_num_run[0], 0, 0);
+    if (ret < 0)
+        return ret;
+    ret = build_huff(&state->dc_values[0], 1, state->dc_shift);
+    if (ret < 0)
+        return ret;
+    ret = build_huff(&state->bufTree0[0], 0, 2);
+    if (ret < 0)
+        return ret;
+    ret = build_huff(&state->mv_h, 1, 0);
+    if (ret < 0)
+        return ret;
+    ret = build_huff(&state->mcb_type, 0, 0);
+    if (ret < 0)
+        return ret;
+
+    state->dc_max =   0x7F << state->dc_shift;
+    state->dc_min = -(0x80 << state->dc_shift);
+
+    decode_bframe_plane(seqobj, present, past, future);
+
+    return 0;
+}
+
+static int hvqm4_decode(AVCodecContext *avctx, void *data,
+                        int *got_frame, AVPacket *pkt)
+{
+    HVQM4Context *s = avctx->priv_data;
+    GetBitContext *gb = &s->gb;
+    AVFrame *frame = s->frame[0];
+    int frame_type;
+    int ret;
+
+    s->state.padding[0] = 1;
+
+    if ((ret = init_get_bits8(gb, pkt->data, pkt->size)) < 0)
+        return ret;
+
+    frame_type = get_bits(gb, 16);
+    if (frame_type != B_FRAME)
+        FFSWAP(AVFrame *, s->frame[1], s->frame[2]);
+
+    if ((ret = ff_reget_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    skip_bits_long(gb, 32);
+    switch (frame_type) {
+    case I_FRAME:
+        frame->pict_type = AV_PICTURE_TYPE_I;
+        frame->key_frame = 1;
+        ret = decode_iframe(&s->seqobj, gb, frame);
+        break;
+    case P_FRAME:
+        frame->pict_type = AV_PICTURE_TYPE_P;
+        frame->key_frame = 0;
+        if (!s->frame[1]->data[0])
+            return AVERROR_INVALIDDATA;
+        ret = decode_bframe(&s->seqobj, gb, frame, s->frame[1], frame);
+        break;
+    case B_FRAME:
+        frame->pict_type = AV_PICTURE_TYPE_B;
+        frame->key_frame = 0;
+        if (!s->frame[1]->data[0] ||
+            !s->frame[2]->data[0])
+            return AVERROR_INVALIDDATA;
+        ret = decode_bframe(&s->seqobj, gb, frame, s->frame[1], s->frame[2]);
+        break;
+    default:
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ret < 0)
+        return ret;
+
+    ret = av_frame_ref(data, frame);
+    if (ret < 0)
+        return ret;
+
+    if (frame_type != B_FRAME)
+        FFSWAP(AVFrame *, s->frame[0], s->frame[2]);
+
+    *got_frame = 1;
+
+    return 0;
+}
+
+static void hvqm4_flush(AVCodecContext *avctx)
+{
+    HVQM4Context *s = avctx->priv_data;
+
+    for (int i = 0; i < 3; i++)
+        av_frame_unref(s->frame[i]);
+}
+
+static av_cold int hvqm4_close(AVCodecContext *avctx)
+{
+    HVQM4Context *s = avctx->priv_data;
+
+    av_freep(&s->buffer);
+    for (int i = 0; i < 6; i++)
+        ff_free_vlc(&s->state.vlc[i]);
+    for (int i = 0; i < 3; i++)
+        av_frame_free(&s->frame[i]);
+
+    return 0;
+}
+
+const AVCodec ff_hvqm4_decoder = {
+    .name           = "hvqm4",
+    .long_name      = NULL_IF_CONFIG_SMALL("HVQM4 Video"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_HVQM4,
+    .priv_data_size = sizeof(HVQM4Context),
+    .init           = hvqm4_init,
+    .decode         = hvqm4_decode,
+    .flush          = hvqm4_flush,
+    .close          = hvqm4_close,
+    .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
+};