[FFmpeg-cvslog] avcodec: add HVQM4 Video decoder
Paul B Mahol
git at videolan.org
Wed Feb 23 18:00:45 EET 2022
ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Sat Feb 12 09:10:00 2022 +0100| [57f4da0973c665862ac48c244db64a9294f71c81] | committer: Paul B Mahol
avcodec: add HVQM4 Video decoder
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=57f4da0973c665862ac48c244db64a9294f71c81
---
libavcodec/Makefile | 1 +
libavcodec/allcodecs.c | 1 +
libavcodec/codec_desc.c | 7 +
libavcodec/codec_id.h | 1 +
libavcodec/hvqm4.c | 1719 +++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 1729 insertions(+)
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 276df8ee5b..55ef3185f9 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -418,6 +418,7 @@ OBJS-$(CONFIG_HQ_HQA_DECODER) += hq_hqa.o hq_hqadata.o hq_hqadsp.o \
OBJS-$(CONFIG_HQX_DECODER) += hqx.o hqxvlc.o hqxdsp.o canopus.o
OBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuv.o huffyuvdec.o
OBJS-$(CONFIG_HUFFYUV_ENCODER) += huffyuv.o huffyuvenc.o
+OBJS-$(CONFIG_HVQM4_DECODER) += hvqm4.o
OBJS-$(CONFIG_HYMT_DECODER) += huffyuv.o huffyuvdec.o
OBJS-$(CONFIG_IDCIN_DECODER) += idcinvideo.o
OBJS-$(CONFIG_IDF_DECODER) += bintext.o cga_data.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 89ba205a2f..9fa5007c0f 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -164,6 +164,7 @@ extern const AVCodec ff_hq_hqa_decoder;
extern const AVCodec ff_hqx_decoder;
extern const AVCodec ff_huffyuv_encoder;
extern const AVCodec ff_huffyuv_decoder;
+extern const AVCodec ff_hvqm4_decoder;
extern const AVCodec ff_hymt_decoder;
extern const AVCodec ff_idcin_decoder;
extern const AVCodec ff_iff_ilbm_decoder;
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 6deba785dc..cdcb164336 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -1862,6 +1862,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
.long_name = NULL_IF_CONFIG_SMALL("GEM Raster image"),
.props = AV_CODEC_PROP_LOSSY,
},
+ {
+ .id = AV_CODEC_ID_HVQM4,
+ .type = AVMEDIA_TYPE_VIDEO,
+ .name = "hvqm4",
+ .long_name = NULL_IF_CONFIG_SMALL("HVQM4 Video"),
+ .props = AV_CODEC_PROP_LOSSY,
+ },
/* various PCM "codecs" */
{
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index f3f262ec75..f1109c27d9 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -308,6 +308,7 @@ enum AVCodecID {
AV_CODEC_ID_SIMBIOSIS_IMX,
AV_CODEC_ID_SGA_VIDEO,
AV_CODEC_ID_GEM,
+ AV_CODEC_ID_HVQM4,
/* various PCM "codecs" */
AV_CODEC_ID_FIRST_AUDIO = 0x10000, ///< A dummy id pointing at the start of audio codecs
diff --git a/libavcodec/hvqm4.c b/libavcodec/hvqm4.c
new file mode 100644
index 0000000000..9b1238c5bc
--- /dev/null
+++ b/libavcodec/hvqm4.c
@@ -0,0 +1,1719 @@
+/*
+ * HVQM4 Video decoder
+ * Copyright (c) 2018-2020 Tillmann Karras
+ * Copyright (c) 2022 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/thread.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "get_bits.h"
+#include "golomb.h"
+#include "internal.h"
+
+#define PLANE_COUNT 3
+#define LUMA_CHROMA 2
+#define LUMA_IDX 0
+#define CHROMA_IDX 1
+
+enum FrameType {
+ I_FRAME = 0x10,
+ P_FRAME = 0x20,
+ B_FRAME = 0x30,
+};
+
+typedef struct BlockData {
+ uint8_t value;
+ uint8_t type;
+} BlockData;
+
+typedef struct StackState {
+ uint32_t plane_idx;
+ BlockData const *line_prev;
+ BlockData const *line_curr;
+ BlockData const *line_next;
+ BlockData next;
+ BlockData curr;
+ uint8_t value_prev;
+} StackState;
+
+typedef struct GBCWithVLC {
+ GetBitContext gb;
+ VLC *vlc;
+} GBCWithVLC;
+
+typedef struct HVQPlaneDesc {
+ BlockData *border; // 0-3 beginning of the plane including the border
+ BlockData *payload; // 4-7 beginning of the non-border plane data
+ uint16_t h_blocks;
+ uint16_t v_blocks;
+ uint16_t h_blocks_safe;
+ uint16_t v_blocks_safe;
+ // offsets of PBs within one MCB
+ // +---+---+
+ // | 0 | 3 |
+ // +---+---+
+ // | 1 | 2 |
+ // +---+---+
+ uint16_t mcb_offset[4];
+ uint32_t px_offset[4];
+ uint32_t py_offset[4];
+ uint8_t width_shift;
+ uint8_t height_shift;
+ uint8_t pb_per_mcb_x;
+ uint8_t pb_per_mcb_y;
+ uint8_t blocks_per_mcb;
+ uint8_t padding[3];
+} HVQPlaneDesc;
+
+typedef struct VideoState {
+ HVQPlaneDesc planes[PLANE_COUNT];
+ VLC vlc[6];
+ GBCWithVLC dc_values[PLANE_COUNT]; // DC values
+ GBCWithVLC dc_rle[PLANE_COUNT]; // DC run lengths
+ GBCWithVLC bufTree0[PLANE_COUNT];
+ GBCWithVLC basis_num[LUMA_CHROMA];
+ GBCWithVLC basis_num_run[LUMA_CHROMA];
+ GetBitContext fixvl[PLANE_COUNT]; // uncompressed high-entropy data
+ GBCWithVLC mv_h; // horizontal motion vectors
+ GBCWithVLC mv_v; // vertical motion vectors
+ GBCWithVLC mcb_proc; // macroblock proc
+ GBCWithVLC mcb_type; // macroblock type
+ uint16_t h_nest_size;
+ uint16_t v_nest_size;
+ uint8_t is_landscape; // FIXME: check what happens for square video
+ uint8_t nest_data[70 * 38];
+ uint8_t padding[3];
+ uint32_t dc_max;
+ uint32_t dc_min;
+ uint8_t unk_shift;
+ uint8_t dc_shift;
+ // number of residual bits to read from mv_h/mv_v,
+ // one setting for each of past and future
+ uint8_t mc_residual_bits_h[2];
+ uint8_t mc_residual_bits_v[2];
+ uint8_t maybe_padding[2];
+} VideoState;
+
+typedef struct SeqObj {
+ VideoState *state;
+ uint16_t width;
+ uint16_t height;
+ uint8_t h_samp;
+ uint8_t v_samp;
+} SeqObj;
+
+typedef struct MCPlane {
+ uint32_t rle;
+ uint32_t pb_dc;
+ BlockData *payload_cur_blk;
+ BlockData *payload_cur_row;
+ uint8_t *present;
+ ptrdiff_t present_stride;
+ uint8_t *top;
+ ptrdiff_t top_stride;
+ uint8_t *target;
+ ptrdiff_t target_stride;
+ uint8_t *past;
+ ptrdiff_t past_stride;
+ uint8_t *future;
+ ptrdiff_t future_stride;
+ uint16_t h_mcb_stride;
+ uint16_t padding;
+ uint32_t v_mcb_stride;
+ uint32_t pb_per_mcb_x;
+ ptrdiff_t stride;
+} MCPlane;
+
+struct RLDecoder {
+ uint32_t value;
+ uint32_t count;
+};
+
+typedef struct HVQM4Context {
+ AVFrame *frame[3];
+
+ SeqObj seqobj;
+ VideoState state;
+ uint8_t *buffer;
+
+ int current_pic;
+
+ GetBitContext gb;
+} HVQM4Context;
+
+static int32_t div_tab[16];
+static int32_t mcdiv_tab[512];
+
+static av_cold void hvqm4_init_static(void)
+{
+ div_tab[0] = 0;
+ mcdiv_tab[0] = 0;
+
+ for (int i = 1; i < 0x10; i++)
+ div_tab[i] = 0x1000 / (i * 16) * 16;
+ for (int i = 1; i < 0x200; i++)
+ mcdiv_tab[i] = 0x1000 / i;
+}
+
+static void set_plane_desc(SeqObj *seqobj, uint8_t plane_idx, uint8_t h_samp, uint8_t v_samp)
+{
+ HVQPlaneDesc *plane = &seqobj->state->planes[plane_idx];
+
+ plane->width_shift = h_samp == 2 ? 1 : 0;
+ plane->height_shift = v_samp == 2 ? 1 : 0;
+ // pixels per 2x2 block
+ plane->pb_per_mcb_x = 2 >> plane->width_shift; // 1..2
+ plane->pb_per_mcb_y = 2 >> plane->height_shift; // 1..2
+ plane->blocks_per_mcb = plane->pb_per_mcb_x * plane->pb_per_mcb_y; // 1..4
+ // number of 4x4 blocks
+ plane->h_blocks = seqobj->width / (h_samp * 4);
+ plane->v_blocks = seqobj->height / (v_samp * 4);
+ // number of 4x4 blocks + border
+ plane->h_blocks_safe = plane->h_blocks + 2;
+ plane->v_blocks_safe = plane->v_blocks + 2;
+ // offset of blocks in MCB
+ plane->mcb_offset[0] = 0;
+ plane->mcb_offset[1] = plane->h_blocks_safe;
+ plane->mcb_offset[2] = plane->h_blocks_safe + 1;
+ plane->mcb_offset[3] = 1;
+ plane->px_offset[0] = 0;
+ plane->py_offset[0] = 0;
+ plane->px_offset[1] = 0;
+ plane->py_offset[1] = 4;
+ plane->px_offset[2] = 4;
+ plane->py_offset[2] = 4;
+ plane->px_offset[3] = 4;
+ plane->py_offset[3] = 0;
+}
+
+static void set_border(BlockData *dst)
+{
+ dst->value = 0x7F;
+ dst->type = 0xFF;
+}
+
+static void set_buffer(SeqObj *seqobj, void *workbuff, uint8_t *buffer)
+{
+ VideoState *state = workbuff;
+ BlockData *plane_data;
+
+ seqobj->state = state;
+ set_plane_desc(seqobj, 0, 1, 1);
+ set_plane_desc(seqobj, 1, 2, 2);
+ set_plane_desc(seqobj, 2, 2, 2);
+
+ state->is_landscape = seqobj->width >= seqobj->height;
+ if (state->is_landscape) {
+ state->h_nest_size = 70;
+ state->v_nest_size = 38;
+ } else {
+ state->h_nest_size = 38;
+ state->v_nest_size = 70;
+ }
+
+ state->basis_num[0].vlc = &state->vlc[3];
+ state->basis_num[1].vlc = &state->vlc[3];
+
+ state->basis_num_run[0].vlc = &state->vlc[1];
+ state->basis_num_run[1].vlc = &state->vlc[1];
+
+ state->dc_values[0].vlc = &state->vlc[0];
+ state->dc_values[1].vlc = &state->vlc[0];
+ state->dc_values[2].vlc = &state->vlc[0];
+
+ state->dc_rle[0].vlc = &state->vlc[1]; // reuse!
+ state->dc_rle[1].vlc = &state->vlc[1]; //
+ state->dc_rle[2].vlc = &state->vlc[1]; //
+
+ state->bufTree0[0].vlc = &state->vlc[2];
+ state->bufTree0[1].vlc = &state->vlc[2];
+ state->bufTree0[2].vlc = &state->vlc[2];
+
+ state->mv_h.vlc = &state->vlc[4];
+ state->mv_v.vlc = &state->vlc[4];
+
+ state->mcb_proc.vlc = &state->vlc[5];
+ state->mcb_type.vlc = &state->vlc[5];
+
+ plane_data = (BlockData *)buffer;
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ HVQPlaneDesc *plane = &state->planes[i];
+ ptrdiff_t stride = plane->h_blocks_safe;
+ BlockData *ptr;
+
+ plane->border = plane_data;
+ // skip top border (stride) and left border (1)
+ plane->payload = plane_data + stride + 1;
+ plane_data += plane->h_blocks_safe * plane->v_blocks_safe;
+
+ // set horizontal borders
+ ptr = plane->border;
+ for (int i = plane->h_blocks_safe; i > 0; --i) {
+ set_border(ptr);
+ ++ptr;
+ }
+
+ ptr = plane_data;
+ for (int i = plane->h_blocks_safe; i > 0; --i) {
+ --ptr;
+ set_border(ptr);
+ }
+
+ // set vertical borders
+ ptr = plane->border + stride;
+ for (int i = plane->v_blocks_safe - 2; i > 0; --i) {
+ set_border(ptr);
+ ptr += stride;
+ }
+
+ ptr = plane->border + stride * 2 - 1;
+ for (int i = plane->v_blocks_safe - 2; i > 0; --i) {
+ set_border(ptr);
+ ptr += stride;
+ }
+ }
+}
+
+static uint32_t hvqm4_buffsize(SeqObj *seqobj)
+{
+ uint32_t h_blocks = seqobj->width / 4;
+ uint32_t v_blocks = seqobj->height / 4;
+ uint32_t y_blocks = (h_blocks + 2) * (v_blocks + 2);
+
+ uint32_t uv_h_blocks = seqobj->h_samp == 2 ? h_blocks / 2 : h_blocks;
+ uint32_t uv_v_blocks = seqobj->v_samp == 2 ? v_blocks / 2 : v_blocks;
+ uint32_t uv_blocks = (uv_h_blocks + 2) * (uv_v_blocks + 2);
+
+ uint32_t total = (y_blocks + uv_blocks * 2) * sizeof(uint16_t);
+
+ return total;
+}
+
+static av_cold int hvqm4_init(AVCodecContext *avctx)
+{
+ int width = avctx->width, height = avctx->height;
+ static AVOnce init_static_once = AV_ONCE_INIT;
+ HVQM4Context *s = avctx->priv_data;
+
+ avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+ ff_set_dimensions(avctx, avctx->width + 16, avctx->height + 16);
+ avctx->width = width;
+ avctx->height = height;
+
+ for (int i = 0; i < 3; i++) {
+ s->frame[i] = av_frame_alloc();
+ if (!s->frame[i])
+ return AVERROR(ENOMEM);
+ }
+
+ ff_thread_once(&init_static_once, hvqm4_init_static);
+
+ s->seqobj.width = avctx->width;
+ s->seqobj.height = avctx->height;
+ s->seqobj.h_samp = 2;
+ s->seqobj.v_samp = 2;
+
+ s->buffer = av_calloc(hvqm4_buffsize(&s->seqobj), 1);
+ if (!s->buffer)
+ return AVERROR(ENOMEM);
+
+ set_buffer(&s->seqobj, &s->state, s->buffer);
+
+ return 0;
+}
+
+static int read_trees(int index,
+ int length,
+ uint16_t code,
+ uint8_t *bits,
+ uint16_t *codes,
+ uint16_t *symbols,
+ GetBitContext *gb,
+ const uint32_t tree_signed,
+ const uint32_t tree_scale)
+{
+ if (get_bits1(gb) == 0) { // leaf node
+ uint8_t byte = get_bits(gb, 8);
+ int16_t symbol = byte;
+
+ if (tree_signed && byte > 0x7F)
+ symbol = (int8_t)byte;
+
+ symbol *= 1 << tree_scale;
+ bits[index] = length;
+ codes[index] = code;
+ symbols[index] = symbol;
+ index++;
+ return index;
+ } else { // recurse
+ index = read_trees(index, length + 1, code << 1, bits, codes, symbols, gb, tree_signed, tree_scale);
+ index = read_trees(index, length + 1, (code << 1) + 1, bits, codes, symbols, gb, tree_signed, tree_scale);
+ return index;
+ }
+}
+
+static int build_huff(GBCWithVLC *buf, uint32_t is_signed, uint32_t scale)
+{
+ const uint32_t tree_signed = is_signed;
+ const uint32_t tree_scale = scale;
+ uint8_t bits[256] = { 0 };
+ uint16_t codes[256] = { 0 };
+ uint16_t symbols[256] = { 0 };
+ VLC *vlc = buf->vlc;
+ int nb_codes;
+
+ ff_free_vlc(vlc);
+ nb_codes = read_trees(0, 0, 0, bits, codes, symbols, &buf->gb, tree_signed, tree_scale);
+
+ return ff_init_vlc_sparse(vlc, ff_log2(nb_codes) + 3, nb_codes, bits, 1, 1,
+ codes, 2, 2, symbols, 2, 2, 0);
+}
+
+static int get_code(GetBitContext *new_gb, GetBitContext *gb, int skip)
+{
+ GetBitContext tmp_gb = *gb;
+ uint32_t new_size, offset = get_bits_long(gb, 32);
+ int ret;
+
+ if (offset >= INT32_MAX - skip - 4)
+ return AVERROR_INVALIDDATA;
+
+ offset += skip;
+
+ if ((gb->size_in_bits >> 3) <= offset + 4)
+ return AVERROR_INVALIDDATA;
+
+ ret = init_get_bits8(&tmp_gb, gb->buffer + offset, (gb->size_in_bits >> 3) - offset);
+ if (ret < 0)
+ return ret;
+
+ new_size = get_bits_long(&tmp_gb, 32);
+
+ if (new_size >= INT32_MAX - 4)
+ return AVERROR_INVALIDDATA;
+
+ if ((tmp_gb.size_in_bits >> 3) < new_size + 4)
+ return AVERROR_INVALIDDATA;
+
+ ret = init_get_bits8(new_gb, tmp_gb.buffer + 4, new_size);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static uint32_t decode_huff(GBCWithVLC *buf)
+{
+ return get_vlc2(&buf->gb, buf->vlc->table, buf->vlc->bits, 3);
+}
+
+static void iframe_basis_numdec(VideoState *state)
+{
+ BlockData *luma_dst = state->planes[LUMA_IDX].payload;
+ const uint32_t luma_h_blocks = state->planes[LUMA_IDX].h_blocks;
+ const uint32_t luma_v_blocks = state->planes[LUMA_IDX].v_blocks;
+ const uint32_t chroma_h_blocks = state->planes[CHROMA_IDX].h_blocks;
+ const uint32_t chroma_v_blocks = state->planes[CHROMA_IDX].v_blocks;
+ BlockData *u_dst = state->planes[1].payload;
+ BlockData *v_dst = state->planes[2].payload;
+ uint32_t rle = 0;
+
+ for (int y = 0; y < luma_v_blocks; y++) {
+ for (int x = 0; x < luma_h_blocks; x++) {
+ if (rle) {
+ luma_dst->type = 0;
+ rle--;
+ } else {
+ int16_t num = decode_huff(&state->basis_num[LUMA_IDX]) & 0xFFFF;
+ if (num == 0)
+ rle = decode_huff(&state->basis_num_run[LUMA_IDX]);
+ luma_dst->type = num & 0xFF;
+ }
+ luma_dst++;
+ }
+ // skip borders
+ luma_dst += 2;
+ }
+
+ rle = 0;
+ for (int y = 0; y < chroma_v_blocks; y++) {
+ for (int x = 0; x < chroma_h_blocks; x++) {
+ if (rle) {
+ u_dst->type = 0;
+ v_dst->type = 0;
+ --rle;
+ } else {
+ int16_t num = decode_huff(&state->basis_num[CHROMA_IDX]) & 0xFFFF;
+ if (num == 0)
+ rle = decode_huff(&state->basis_num_run[CHROMA_IDX]);
+ u_dst->type = (num >> 0) & 0xF;
+ v_dst->type = (num >> 4) & 0xF;
+ }
+ ++u_dst;
+ ++v_dst;
+ }
+ u_dst += 2;
+ v_dst += 2;
+ }
+}
+
+static int32_t decode_sovf_sym(GBCWithVLC *buf, int32_t min, int32_t max)
+{
+ int32_t sum = 0, value;
+
+ do {
+ value = decode_huff(buf);
+ sum += value;
+ } while (value <= min || value >= max);
+
+ return sum;
+}
+
+static int32_t decode_uovf_sym(GBCWithVLC *buf, int32_t max)
+{
+ int32_t sum = 0, value;
+
+ do {
+ value = decode_huff(buf);
+ sum += value;
+ } while (value >= max);
+
+ return sum;
+}
+
+static uint32_t get_delta_dc(VideoState *state, uint32_t plane_idx, uint32_t *rle)
+{
+ if (*rle == 0) {
+ uint32_t delta = decode_sovf_sym(&state->dc_values[plane_idx], state->dc_min, state->dc_max);
+
+ if (delta == 0) // successive zeroes are run-length encoded
+ *rle = decode_huff(&state->dc_rle[plane_idx]);
+ return delta;
+ } else {
+ --*rle;
+ return 0;
+ }
+}
+
+static void iframe_dc_decode(VideoState *state)
+{
+ for (int plane_idx = 0; plane_idx < PLANE_COUNT; ++plane_idx) {
+ HVQPlaneDesc *plane = &state->planes[plane_idx];
+ uint32_t rle = 0;
+ const uint32_t v_blocks = plane->v_blocks;
+ BlockData *curr = plane->payload;
+ for (uint32_t y = 0; y < v_blocks; y++) {
+ // pointer to previous line
+ BlockData const *prev = curr - plane->h_blocks_safe;
+ // first prediction on a line is only the previous line's value
+ uint8_t value = prev->value;
+ for (uint32_t x = 0; x < plane->h_blocks; x++) {
+ value += get_delta_dc(state, plane_idx, &rle);
+ curr->value = value;
+ ++curr;
+ ++prev;
+ // next prediction on this line is the mean of left (current) and top values
+ // +---+---+
+ // | | T |
+ // +---+---+
+ // | L | P |
+ // +---+---+
+ value = (value + prev->value + 1) / 2;
+ }
+ // skip right border of this line and left border of next line
+ curr += 2;
+ }
+ }
+}
+
+static void make_nest(VideoState *state, uint16_t nest_x, uint16_t nest_y)
+{
+ int32_t v_empty, h_empty, v_nest_blocks, h_nest_blocks, v_mirror, h_mirror;
+ HVQPlaneDesc *y_plane = &state->planes[0];
+ BlockData const *ptr = y_plane->payload + y_plane->h_blocks_safe * nest_y + nest_x;
+ uint8_t const *nest2;
+ uint8_t *nest;
+
+ if (y_plane->h_blocks < state->h_nest_size) {
+ // special case if the video is less than 280 pixels wide (assuming landscape mode)
+ h_nest_blocks = y_plane->h_blocks;
+ h_mirror = state->h_nest_size - y_plane->h_blocks;
+ if (h_mirror > y_plane->h_blocks)
+ h_mirror = y_plane->h_blocks;
+ h_empty = state->h_nest_size - (h_nest_blocks + h_mirror);
+ } else {
+ h_nest_blocks = state->h_nest_size;
+ h_empty = 0;
+ h_mirror = 0;
+ }
+
+ if (y_plane->v_blocks < state->v_nest_size) {
+ // special case if the video is less than 152 pixels high
+ v_nest_blocks = y_plane->v_blocks;
+ v_mirror = state->v_nest_size - y_plane->v_blocks;
+ if (v_mirror > y_plane->v_blocks)
+ v_mirror = y_plane->v_blocks;
+ v_empty = state->v_nest_size - (v_nest_blocks + v_mirror);
+ } else {
+ v_nest_blocks = state->v_nest_size;
+ v_empty = 0;
+ v_mirror = 0;
+ }
+
+ nest = state->nest_data;
+ for (int i = 0; i < v_nest_blocks; i++) {
+ BlockData const *p = ptr;
+ for (int j = 0; j < h_nest_blocks; j++) {
+ *nest++ = (p->value >> 4) & 0xF;
+ ++p;
+ }
+ // if the video is too small, mirror it
+ for (int j = 0; j < h_mirror; j++) {
+ --p;
+ *nest++ = (p->value >> 4) & 0xF;
+ }
+ // if it is still too small, null out the rest
+ for (int j = 0; j < h_empty; j++)
+ *nest++ = 0;
+ ptr += y_plane->h_blocks_safe;
+ }
+
+ // handle vertical mirroring
+ nest2 = nest - state->h_nest_size;
+ for (int i = 0; i < v_mirror; i++) {
+ for (int j = 0; j < state->h_nest_size; j++)
+ *nest++ = nest2[j];
+ nest2 -= state->h_nest_size;
+ }
+
+ // and vertical nulling
+ for (int i = 0; i < v_empty; i++)
+ for (int j = 0; j < state->h_nest_size; j++)
+ *nest++ = 0;
+}
+
+static uint8_t sat_mean8(uint32_t u)
+{
+ return av_clip_uint8((u + 4) / 8);
+}
+
+static void weight_im_block(uint8_t *dst, ptrdiff_t stride, uint8_t value,
+ uint8_t top, uint8_t bottom, uint8_t left, uint8_t right)
+{
+ /*
+ +---+---+---+
+ | | T | |
+ +---+---+---+
+ | L | D | R |
+ +---+---+---+
+ | | B | |
+ +---+---+---+
+ */
+ int32_t tmb = top - bottom;
+ int32_t lmr = left - right;
+ int32_t vph = tmb + lmr;
+ int32_t vmh = tmb - lmr;
+
+ int32_t v2 = value * 2;
+ int32_t v8 = value * 8;
+
+ int32_t tpl = (top + left) - v2;
+ int32_t tpr = (top + right) - v2;
+ int32_t bpr = (bottom + right) - v2;
+ int32_t bpl = (bottom + left) - v2;
+
+ int32_t tml = top - left;
+ int32_t tmr = top - right;
+ int32_t bmr = bottom - right;
+ int32_t bml = bottom - left;
+
+ // V:
+ // 6 8 8 6
+ // 8 10 10 8
+ // 8 10 10 8
+ // 6 8 8 6
+ //
+ // T:
+ // 2 2 2 2
+ // 0 0 0 0
+ // -1 -1 -1 -1
+ // -1 -1 -1 -1
+ //
+ // B/L/R: like T but rotated accordingly
+
+ // (6*V + 2*T - B + 2*L - R + 4) / 8
+ // (8*V + 2*T - B - R + 4) / 8
+ // (8*V + 2*T - B - L + 4) / 8
+ // (6*V + 2*T - B - L + 2*R + 4) / 8
+
+ dst[0] = sat_mean8(v8 + vph + tpl);
+ dst[1] = sat_mean8(v8 + vph + tml);
+ dst[2] = sat_mean8(v8 + vmh + tmr);
+ dst[3] = sat_mean8(v8 + vmh + tpr);
+
+ dst += stride;
+
+ // ( 8*V - B + 2*L - R + 4) / 8
+ // (10*V - B - R + 4) / 8
+ // (10*V - B - L + 4) / 8
+ // ( 8*V - B - L + 2*R + 4) / 8
+
+ dst[0] = sat_mean8(v8 + vph - tml);
+ dst[1] = sat_mean8(v8 - bpr);
+ dst[2] = sat_mean8(v8 - bpl);
+ dst[3] = sat_mean8(v8 + vmh - tmr);
+
+ dst += stride;
+
+ // ( 8*V - T + 2*L - R + 4) / 8
+ // (10*V - T - R + 4) / 8
+ // (10*V - T - L
+
+ dst[0] = sat_mean8(v8 - vmh - bml);
+ dst[1] = sat_mean8(v8 - tpr);
+ dst[2] = sat_mean8(v8 - tpl);
+ dst[3] = sat_mean8(v8 - vph - bmr);
+
+ dst += stride;
+
+ dst[0] = sat_mean8(v8 - vmh + bpl);
+ dst[1] = sat_mean8(v8 - vmh + bml);
+ dst[2] = sat_mean8(v8 - vph + bmr);
+ dst[3] = sat_mean8(v8 - vph + bpr);
+}
+
+static void dc_block(uint8_t *dst, ptrdiff_t stride, uint8_t value)
+{
+ for (int y = 0; y < 4; y++)
+ for (int x = 0; x < 4; x++)
+ dst[y * stride + x] = value;
+}
+
+static uint32_t get_aot_basis(VideoState *state, uint8_t basis_out[4][4],
+ int32_t *sum, uint8_t const *nest_data,
+ uint32_t nest_stride, uint32_t plane_idx)
+{
+ GetBitContext *gb = &state->fixvl[plane_idx];
+ uint16_t bits = get_bits(gb, 16);
+ uint32_t x_stride, y_stride;
+ uint32_t offset70 = bits & 0x3F;
+ uint32_t offset38 = (bits >> 6) & 0x1F;
+ uint32_t stride70 = (bits >> 11) & 1;
+ uint32_t stride38 = (bits >> 12) & 1;
+ int32_t inverse, offset;
+ uint8_t min, max;
+
+ if (state->is_landscape) {
+ nest_data += nest_stride * offset38 + offset70;
+ x_stride = 1 << stride70;
+ y_stride = nest_stride << stride38;
+ } else {
+ nest_data += nest_stride * offset70 + offset38;
+ x_stride = 1 << stride38;
+ y_stride = nest_stride << stride70;
+ }
+
+ // copy basis vector from the nest
+ min = nest_data[0];
+ max = nest_data[0];
+ for (int y = 0; y < 4; y++) {
+ for (int x = 0; x < 4; x++) {
+ uint8_t nest_value = nest_data[y * y_stride + x * x_stride];
+ basis_out[y][x] = nest_value;
+ min = nest_value < min ? nest_value : min;
+ max = nest_value > max ? nest_value : max;
+ }
+ }
+ *sum += decode_huff(&state->bufTree0[plane_idx]);
+ inverse = div_tab[max - min];
+ if (bits & 0x8000)
+ inverse = -inverse;
+ offset = (bits >> 13) & 3;
+ return (*sum + offset) * inverse;
+}
+
+static int32_t get_aot_sum(VideoState *state, int32_t result[4][4],
+ uint8_t num_bases, uint8_t const *nest_data,
+ uint32_t nest_stride, uint32_t plane_idx)
+{
+ int32_t temp, sum, mean;
+ uint8_t basis[4][4];
+
+ for (int y = 0; y < 4; y++)
+ for (int x = 0; x < 4; x++)
+ result[y][x] = 0;
+ temp = 0;
+
+ for (int k = 0; k < num_bases; k++) {
+ uint32_t factor = get_aot_basis(state, basis, &temp, nest_data, nest_stride, plane_idx);
+ for (int y = 0; y < 4; y++)
+ for (int x = 0; x < 4; x++)
+ result[y][x] += factor * basis[y][x];
+ }
+
+ sum = 0;
+ for (int y = 0; y < 4; y++)
+ for (int x = 0; x < 4; x++)
+ sum += result[y][x];
+ mean = sum >> 4;
+ return mean;
+}
+
+static void read_block(VideoState *state, uint8_t *dst, ptrdiff_t dst_stride, uint32_t plane_idx)
+{
+ GetBitContext *gb = &state->fixvl[plane_idx];
+
+ for (int y = 0; y < 4; y++)
+ for (int x = 0; x < 4; x++)
+ dst[y * dst_stride + x] = get_bits(gb, 8);
+}
+
+static void intra_aot_block(VideoState *state, uint8_t *dst, ptrdiff_t stride,
+ uint8_t target_average, uint8_t block_type, uint32_t plane_idx)
+{
+ int32_t result[4][4], aot_average, delta;
+
+ if (block_type == 6) {
+ read_block(state, dst, stride, plane_idx);
+ return;
+ }
+
+ // block types 1..5 serve as number of bases to use, 9..15 are unused
+ aot_average = get_aot_sum(state, result, block_type, state->nest_data, state->h_nest_size, plane_idx);
+ delta = (target_average << state->unk_shift) - aot_average;
+ for (int y = 0; y < 4; y++) {
+ for (int x = 0; x < 4; x++) {
+ int32_t value = ((result[y][x] + delta) >> state->unk_shift);
+ dst[y * stride + x] = av_clip_uint8(value);
+ }
+ }
+}
+
+static void decode_iframe_block(VideoState *state, uint8_t *dst, ptrdiff_t stride, StackState *stack_state)
+{
+ if (stack_state->curr.type == 0) {
+ uint8_t top = stack_state->line_prev->type & 0x77 ? stack_state->curr.value : stack_state->line_prev->value;
+ uint8_t bottom = stack_state->line_next->type & 0x77 ? stack_state->curr.value : stack_state->line_next->value;
+ uint8_t right = stack_state->next.type & 0x77 ? stack_state->curr.value : stack_state->next.value;
+ // the left value is tracked manually, the logic is equivalent with the other surrounding values
+ uint8_t left = stack_state->value_prev;
+
+ weight_im_block(dst, stride, stack_state->curr.value, top, bottom, left, right);
+ stack_state->value_prev = stack_state->curr.value;
+ } else if (stack_state->curr.type == 8) {
+ dc_block(dst, stride, stack_state->curr.value);
+ stack_state->value_prev = stack_state->curr.value;
+ } else {
+ intra_aot_block(state, dst, stride, stack_state->curr.value, stack_state->curr.type, stack_state->plane_idx);
+ // don't use the current DC value to predict the next one
+ stack_state->value_prev = stack_state->next.value;
+ }
+ // next block
+ stack_state->line_prev++;
+ stack_state->line_next++;
+}
+
+static void iframe_line(VideoState *state, uint8_t *dst, ptrdiff_t stride, StackState *stack_state, uint16_t h_blocks)
+{
+ stack_state->next = stack_state->line_curr[0];
+ stack_state->value_prev = stack_state->line_curr[0].value;
+
+ while (--h_blocks > 0) {
+ stack_state->curr = stack_state->next;
+ ++stack_state->line_curr;
+ stack_state->next = stack_state->line_curr[0];
+ decode_iframe_block(state, dst, stride, stack_state);
+ // next block on same line
+ dst += 4;
+ }
+
+ stack_state->curr = stack_state->next;
+ decode_iframe_block(state, dst, stride, stack_state);
+
+ // skip current, right border on same line, and left border on next line
+ stack_state->line_curr += 3;
+
+ // these have already been advanced to the right border in decode_iframe_block
+ stack_state->line_prev += 2;
+ stack_state->line_next += 2;
+}
+
+static void decode_iframe_plane(VideoState *state, int plane_idx, uint8_t *dst, ptrdiff_t linesize)
+{
+ HVQPlaneDesc *plane = &state->planes[plane_idx];
+ StackState stack_state;
+ int16_t v_blocks;
+
+ stack_state.plane_idx = plane_idx;
+ stack_state.line_prev = plane->payload;
+ stack_state.line_curr = plane->payload;
+ stack_state.line_next = plane->payload + plane->h_blocks_safe;
+ v_blocks = plane->v_blocks;
+
+ // first line
+ if (v_blocks > 0) {
+ iframe_line(state, dst, linesize, &stack_state, plane->h_blocks);
+ // blocks are 4x4 so advance dst by 4 lines
+ dst += linesize * 4;
+ v_blocks--;
+ }
+ // middle lines
+ stack_state.line_prev = plane->payload;
+ while (v_blocks > 1) {
+ iframe_line(state, dst, linesize, &stack_state, plane->h_blocks);
+ dst += linesize * 4;
+ v_blocks--;
+ }
+ // last line
+ if (v_blocks > 0) {
+ stack_state.line_next = stack_state.line_curr;
+ iframe_line(state, dst, linesize, &stack_state, plane->h_blocks);
+ }
+}
+
+static int decode_iframe(SeqObj *seqobj, GetBitContext *gb, AVFrame *frame)
+{
+ VideoState *state = seqobj->state;
+ uint8_t dc_shift = get_bits(gb, 8);
+ uint16_t nest_x, nest_y;
+ int ret;
+
+ state->unk_shift = get_bits(gb, 8);
+ skip_bits(gb, 16);
+ nest_x = get_bits(gb, 16);
+ nest_y = get_bits(gb, 16);
+
+ for (int i = 0; i < LUMA_CHROMA; i++) {
+ ret = get_code(&state->basis_num[i].gb, gb, 78);
+ if (ret < 0)
+ return ret;
+ ret = get_code(&state->basis_num_run[i].gb, gb, 78);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ ret = get_code(&state->dc_values[i].gb, gb, 78);
+ if (ret < 0)
+ return ret;
+ ret = get_code(&state->bufTree0[i].gb, gb, 78);
+ if (ret < 0)
+ return ret;
+ ret = get_code(&state->fixvl[i], gb, 78);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ ret = get_code(&state->dc_rle[i].gb, gb, 78);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = build_huff(&state->basis_num[0], 0, 0);
+ if (ret < 0)
+ return ret;
+ ret = build_huff(&state->basis_num_run[0], 0, 0);
+ if (ret < 0)
+ return ret;
+ ret = build_huff(&state->dc_values[0], 1, dc_shift);
+ if (ret < 0)
+ return ret;
+ ret = build_huff(&state->bufTree0[0], 0, 2);
+ if (ret < 0)
+ return ret;
+
+ state->dc_max = 0x7F << dc_shift;
+ state->dc_min = -(0x80 << dc_shift);
+
+ // 4x4 block types
+ iframe_basis_numdec(state);
+ // 4x4 block DC values
+ iframe_dc_decode(state);
+ // 70x38 nest copied from upper 4 bits of DC values somewhere in the luma plane
+ make_nest(state, nest_x, nest_y);
+
+ for (int i = 0; i < PLANE_COUNT; i++)
+ decode_iframe_plane(state, i, frame->data[i], frame->linesize[i]);
+
+ return 0;
+}
+
+static void init_mc_handler(VideoState *state,
+ MCPlane mcplanes[PLANE_COUNT],
+ AVFrame *present, AVFrame *past, AVFrame *future)
+{
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ MCPlane *mcplane = &mcplanes[i];
+ HVQPlaneDesc *plane = &state->planes[i];
+
+ mcplane->rle = 0;
+ mcplane->pb_dc = 0x7F;
+ mcplane->present = present->data[i];
+ mcplane->present_stride = present->linesize[i];
+ mcplane->past = past->data[i];
+ mcplane->past_stride = past->linesize[i];
+ mcplane->future = future->data[i];
+ mcplane->future_stride = future->linesize[i];
+ mcplane->payload_cur_blk = plane->payload;
+ mcplane->payload_cur_row = plane->payload;
+ mcplane->h_mcb_stride = 8 >> plane->width_shift;
+ mcplane->v_mcb_stride = present->linesize[i] * (8 >> plane->height_shift);
+ mcplane->pb_per_mcb_x = plane->pb_per_mcb_x;
+ mcplane->stride = plane->h_blocks_safe * plane->pb_per_mcb_y;
+ }
+}
+
+static void initMCBproc(GBCWithVLC *buf, struct RLDecoder *proc)
+{
+ if (buf->gb.buffer) {
+ proc->value = get_bits1(&buf->gb);
+ proc->count = decode_uovf_sym(buf, 0xFF);
+ }
+}
+
+static void initMCBtype(GBCWithVLC *buf, struct RLDecoder *type)
+{
+ if (buf->gb.buffer) {
+ uint32_t value = get_bits1(&buf->gb) << 1;
+
+ type->value = value | get_bits1(&buf->gb);
+ type->count = decode_uovf_sym(buf, 0xFF);
+ }
+}
+
+static void setMCTop(MCPlane mcplanes[PLANE_COUNT])
+{
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ mcplanes[i].top = mcplanes[i].present;
+ mcplanes[i].top_stride = mcplanes[i].present_stride;
+ }
+}
+
+static const uint8_t mcbtypetrans[2][3] = {
+ { 1, 2, 0 },
+ { 2, 0, 1 },
+};
+
+static uint32_t getMCBtype(GBCWithVLC *buftree, struct RLDecoder *type)
+{
+ if (type->count == 0) {
+ // only three possible values, so when the value changes,
+ // a single bit decides which other value to select
+ // bit == 0 -> increment
+ // bit == 1 -> decrement
+ // then wrap to range 0..2
+ uint32_t bit = get_bits1(&buftree->gb);
+
+ type->value = mcbtypetrans[bit][type->value];
+ type->count = decode_uovf_sym(buftree, 0xFF);
+ }
+
+ type->count--;
+
+ return type->value;
+}
+
+static void decode_PB_dc(VideoState *state, MCPlane mcplanes[PLANE_COUNT])
+{
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ HVQPlaneDesc *plane = &state->planes[i];
+ MCPlane *mcplane = &mcplanes[i];
+
+ for (int j = 0; j < plane->blocks_per_mcb; j++) {
+ BlockData *payload;
+
+ mcplane->pb_dc += decode_sovf_sym(&state->dc_values[i], state->dc_min, state->dc_max);
+ payload = mcplane->payload_cur_blk;
+ payload[plane->mcb_offset[j]].value = mcplane->pb_dc;
+ }
+ }
+}
+
+static void decode_PB_cc(VideoState *state, MCPlane mcplanes[PLANE_COUNT], uint32_t proc, uint32_t type)
+{
+ uint32_t block_type = (type << 5) | (proc << 4);
+ if (proc == 1) {
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ BlockData *payload = mcplanes[i].payload_cur_blk;
+ HVQPlaneDesc *plane = &state->planes[i];
+ for (int j = 0; j < plane->blocks_per_mcb; j++)
+ payload[plane->mcb_offset[j]].type = block_type;
+ }
+ return;
+ } else {
+ HVQPlaneDesc *planeY = &state->planes[0];
+ HVQPlaneDesc *planeU = &state->planes[1];
+ MCPlane *mcplaneY = &mcplanes[0];
+ MCPlane *mcplaneU = &mcplanes[1];
+ MCPlane *mcplaneV = &mcplanes[2];
+ for (int i = 0; i < planeY->blocks_per_mcb; i++) {
+ BlockData *ptr = mcplaneY->payload_cur_blk;
+ if (mcplaneY->rle) {
+ ptr[planeY->mcb_offset[i]].type = block_type;
+ --mcplaneY->rle;
+ }
+ else
+ {
+ int16_t huff = decode_huff(&state->basis_num[LUMA_IDX]);
+
+ if (huff)
+ ptr[planeY->mcb_offset[i]].type = block_type | huff;
+ else
+ {
+ ptr[planeY->mcb_offset[i]].type = block_type;
+ mcplaneY->rle = decode_huff(&state->basis_num_run[0]);
+ }
+ }
+ }
+ // chroma
+ for (int i = 0; i < planeU->blocks_per_mcb; i++)
+ {
+ BlockData *ptrU = mcplaneU->payload_cur_blk;
+ BlockData *ptrV = mcplaneV->payload_cur_blk;
+ if (mcplaneU->rle)
+ {
+ ptrU[planeU->mcb_offset[i]].type = block_type;
+ ptrV[planeU->mcb_offset[i]].type = block_type;
+ --mcplaneU->rle;
+ }
+ else
+ {
+ int16_t huff = decode_huff(&state->basis_num[CHROMA_IDX]);
+ if (huff)
+ {
+ ptrU[planeU->mcb_offset[i]].type = block_type | ((huff >> 0) & 0xF);
+ ptrV[planeU->mcb_offset[i]].type = block_type | ((huff >> 4) & 0xF);
+ }
+ else
+ {
+ ptrU[planeU->mcb_offset[i]].type = block_type;
+ ptrV[planeU->mcb_offset[i]].type = block_type;
+ mcplaneU->rle = decode_huff(&state->basis_num_run[1]);
+ }
+ }
+ }
+ }
+}
+
+static void reset_PB_dc(MCPlane mcplanes[PLANE_COUNT])
+{
+ for (int i = 0; i < PLANE_COUNT; i++)
+ mcplanes[i].pb_dc = 0x7F;
+}
+
+static uint32_t getMCBproc(GBCWithVLC *buf, struct RLDecoder *proc)
+{
+ if (proc->count == 0) {
+ proc->value ^= 1;
+ proc->count = decode_uovf_sym(buf, 0xFF);
+ }
+
+ proc->count--;
+
+ return proc->value;
+}
+
+static void setMCNextBlk(MCPlane mcplanes[PLANE_COUNT])
+{
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ mcplanes[i].top += mcplanes[i].h_mcb_stride;
+ mcplanes[i].payload_cur_blk += mcplanes[i].pb_per_mcb_x;
+ }
+}
+
+static void setMCDownBlk(MCPlane mcplanes[PLANE_COUNT])
+{
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ MCPlane *mcplane = &mcplanes[i];
+ BlockData *first_block_on_next_row = mcplane->payload_cur_row + mcplane->stride;
+
+ mcplane->present += mcplane->v_mcb_stride;
+ mcplane->payload_cur_blk = first_block_on_next_row;
+ mcplane->payload_cur_row = first_block_on_next_row;
+ }
+}
+
+static void spread_PB_descMap(SeqObj *seqobj, MCPlane mcplanes[PLANE_COUNT])
+{
+ struct RLDecoder proc, type;
+ VideoState *state = seqobj->state;
+ initMCBproc(&state->mcb_proc, &proc);
+ initMCBtype(&state->mcb_type, &type);
+
+ for (int i = 0; i < seqobj->height; i += 8) {
+ setMCTop(mcplanes);
+ for (int j = 0; j < seqobj->width; j += 8) {
+ getMCBtype(&state->mcb_type, &type);
+ if (type.value == 0) {
+ decode_PB_dc(state, mcplanes);
+ decode_PB_cc(state, mcplanes, 0, type.value);
+ } else {
+ reset_PB_dc(mcplanes);
+ decode_PB_cc(state, mcplanes, getMCBproc(&state->mcb_proc, &proc), type.value);
+ }
+ setMCNextBlk(mcplanes);
+ // for all planes
+ // top += h_mcb_stride
+ // payload_cur_blk += pb_per_mcb_x
+ }
+ setMCDownBlk(mcplanes);
+ // for all planes
+ // present += v_mcb_stride
+ // payload_cur_row += stride;
+ // payload_cur_blk = payload_cur_row
+ }
+}
+
+static void resetMCHandler(VideoState *state, MCPlane mcplanes[PLANE_COUNT], AVFrame *present)
+{
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ mcplanes[i].present = present->data[i];
+ mcplanes[i].payload_cur_blk = state->planes[i].payload;
+ mcplanes[i].payload_cur_row = state->planes[i].payload;
+ }
+}
+
+static void MCBlockDecDCNest(VideoState *state, MCPlane mcplanes[PLANE_COUNT])
+{
+ for (int plane_idx = 0; plane_idx < PLANE_COUNT; plane_idx++) {
+ BlockData const *ptr = mcplanes[plane_idx].payload_cur_blk;
+ HVQPlaneDesc *plane = &state->planes[plane_idx];
+ ptrdiff_t stride = mcplanes[plane_idx].top_stride;
+ int32_t line = plane->h_blocks_safe;
+
+ for (int j = 0; j < plane->blocks_per_mcb; j++) {
+ // dst is a 4x4 region
+ uint8_t *dst = mcplanes[plane_idx].top + plane->px_offset[j] + plane->py_offset[j] * mcplanes[plane_idx].top_stride;
+ int32_t block_idx = plane->mcb_offset[j];
+ uint32_t value = ptr[block_idx].value;
+ // block type:
+ // 0: weighted
+ // 6: literal block
+ // 8: single value
+ uint32_t type = ptr[block_idx].type & 0xF;
+ // see also IpicBlockDec
+ if (type == 0) {
+ uint8_t top = ptr[block_idx - line].type & 0x77 ? value : ptr[block_idx - line].value;
+ uint8_t left = ptr[block_idx - 1].type & 0x77 ? value : ptr[block_idx - 1].value;
+ uint8_t right = ptr[block_idx + 1].type & 0x77 ? value : ptr[block_idx + 1].value;
+ uint8_t bottom = ptr[block_idx + line].type & 0x77 ? value : ptr[block_idx + line].value;
+ weight_im_block(dst, stride, value, top, bottom, left, right);
+ } else if (type == 8) {
+ dc_block(dst, stride, value);
+ } else {
+ intra_aot_block(state, dst, stride, value, type, plane_idx);
+ }
+ }
+ }
+}
+
+static void setMCTarget(MCPlane mcplanes[PLANE_COUNT], uint32_t reference_frame)
+{
+ if (reference_frame == 0) {
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ mcplanes[i].target = mcplanes[i].past;
+ mcplanes[i].target_stride = mcplanes[i].past_stride;
+ }
+ } else {
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ mcplanes[i].target = mcplanes[i].future;
+ mcplanes[i].target_stride = mcplanes[i].future_stride;
+ }
+ }
+}
+
+static void getMVector(int32_t *result, GBCWithVLC *buf, int32_t residual_bits)
+{
+ int32_t max_val_plus_1 = 1 << (residual_bits + 5);
+ // quantized value
+ int32_t value = decode_huff(buf) << residual_bits;
+ // residual bits
+ for (int i = residual_bits - 1; i >= 0; --i)
+ value += get_bits1(&buf->gb) << i;
+ *result += value;
+ // signed wrap to -max_val_plus_1 .. max_val_plus_1-1
+ if (*result >= max_val_plus_1)
+ *result -= max_val_plus_1 << 1;
+ else if (*result < -max_val_plus_1)
+ *result += max_val_plus_1 << 1;
+}
+
+static void _MotionComp_00(uint8_t *dst, ptrdiff_t dst_stride, uint8_t const *src, ptrdiff_t src_stride)
+{
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 4; j++)
+ dst[i * dst_stride + j] = src[i * src_stride + j];
+}
+
+// offset vertically by half a sample
+static void _MotionComp_01(uint8_t *dst, ptrdiff_t dst_stride, uint8_t const *src, ptrdiff_t src_stride)
+{
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 4; j++)
+ dst[i * dst_stride + j] = (
+ src[(i + 0) * src_stride + j] +
+ src[(i + 1) * src_stride + j] + 1) / 2;
+}
+
+// offset horizontally by half a sample
+static void _MotionComp_10(uint8_t *dst, ptrdiff_t dst_stride, uint8_t const *src, ptrdiff_t src_stride)
+{
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 4; j++)
+ dst[i * dst_stride + j] = (
+ src[i * src_stride + j + 0] +
+ src[i * src_stride + j + 1] + 1) / 2;
+}
+
+// offset by half a sample in both directions
+static void _MotionComp_11(uint8_t *dst, ptrdiff_t dst_stride, uint8_t const *src, ptrdiff_t src_stride)
+{
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 4; j++)
+ dst[i * dst_stride + j] = (
+ src[(i + 0) * src_stride + j + 0] +
+ src[(i + 0) * src_stride + j + 1] +
+ src[(i + 1) * src_stride + j + 0] +
+ src[(i + 1) * src_stride + j + 1] + 2) >> 2;
+}
+
+static void do_motion_comp(uint8_t *dst, ptrdiff_t dst_stride, uint8_t const *src, ptrdiff_t src_stride, uint32_t hpel_dx, uint32_t hpel_dy)
+{
+ if (hpel_dy == 0)
+ if (hpel_dx == 0)
+ _MotionComp_00(dst, dst_stride, src, src_stride);
+ else
+ _MotionComp_10(dst, dst_stride, src, src_stride);
+ else
+ if (hpel_dx == 0)
+ _MotionComp_01(dst, dst_stride, src, src_stride);
+ else
+ _MotionComp_11(dst, dst_stride, src, src_stride);
+}
+
+static uint32_t get_mc_aot_basis(VideoState *state, uint8_t basis_out[4][4],
+ int32_t *sum, uint8_t const *nest_data,
+ ptrdiff_t nest_stride, uint32_t plane_idx)
+{
+ // the only difference to GetAotBasis() seems to be the ">> 4 & 0xF"
+ GetBitContext *gb = &state->fixvl[plane_idx];
+ uint16_t bits = get_bits(gb, 16);
+ uint32_t step, stride;
+ uint32_t big = bits & 0x3F;
+ uint32_t small = (bits >> 6) & 0x1F;
+ int32_t inverse, foo;
+ uint8_t min, max;
+
+ if (state->is_landscape) {
+ nest_data += nest_stride * small + big;
+ step = 1 << ((bits >> 11) & 1);
+ stride = nest_stride << ((bits >> 12) & 1);
+ } else {
+ nest_data += nest_stride * big + small;
+ step = 1 << ((bits >> 12) & 1);
+ stride = nest_stride << ((bits >> 11) & 1);
+ }
+ min = max = (nest_data[0] >> 4) & 0xF; // !
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ uint8_t nest_value = (nest_data[i * stride + j * step] >> 4) & 0xF; // !
+
+ basis_out[i][j] = nest_value;
+ min = nest_value < min ? nest_value : min;
+ max = nest_value > max ? nest_value : max;
+ }
+ }
+ *sum += decode_huff(&state->bufTree0[plane_idx]);
+ inverse = div_tab[max - min];
+ if (bits & 0x8000)
+ inverse = -inverse;
+ foo = (bits >> 13) & 3;
+ return (*sum + foo) * inverse;
+}
+
+static int32_t get_mc_aot_sum(VideoState *state, int32_t result[4][4], uint8_t num_bases,
+ uint8_t const *nest_data, ptrdiff_t nest_stride, uint32_t plane_idx)
+{
+ uint8_t byte_result[4][4];
+ int32_t sum, mean, temp = 0;
+
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 4; j++)
+ result[i][j] = 0;
+
+ for (int k = 0; k < num_bases; k++) {
+ uint32_t factor = get_mc_aot_basis(state, byte_result, &temp, nest_data, nest_stride, plane_idx);
+
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 4; j++)
+ result[i][j] += factor * byte_result[i][j];
+ }
+
+ sum = 0;
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 4; j++)
+ sum += result[i][j];
+ mean = sum >> 4;
+
+ return mean;
+}
+
+static void PrediAotBlock(VideoState *state, uint8_t *dst, uint8_t const *src, ptrdiff_t stride, uint8_t block_type,
+ uint8_t *nest_data, uint32_t h_nest_size, uint32_t plane_idx, uint32_t hpel_dx, uint32_t hpel_dy)
+{
+ int32_t result[4][4], mean, diff[4][4], min, max;
+ uint32_t addend, factor;
+ uint32_t aot_sum = get_mc_aot_sum(state, result, block_type - 1, nest_data, h_nest_size, plane_idx);
+ uint8_t mdst[4][4];
+ uint32_t const dst_stride = 4;
+
+ do_motion_comp((uint8_t *)mdst, dst_stride, src, stride, hpel_dx, hpel_dy);
+ mean = 8;
+ for (int y = 0; y < 4; y++)
+ for (int x = 0; x < 4; x++)
+ mean += mdst[y][x];
+ mean /= 16;
+ min = max = mdst[0][0] - mean;
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ int32_t value = diff[i][j] = mdst[i][j] - mean;
+
+ min = value < min ? value : min;
+ max = value > max ? value : max;
+ }
+ }
+ addend = (decode_sovf_sym(&state->dc_values[plane_idx], state->dc_min, state->dc_max) >> state->dc_shift << state->unk_shift) - aot_sum;
+ factor = (decode_sovf_sym(&state->dc_values[plane_idx], state->dc_min, state->dc_max) >> state->dc_shift);
+ factor *= mcdiv_tab[max - min];
+ for (int i = 0; i < 4; i++)
+ for (int j = 0; j < 4; j++)
+ result[i][j] += addend + diff[i][j] * factor;
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ uint32_t value = (result[i][j] >> state->unk_shift) + mdst[i][j];
+
+ dst[i * stride + j] = av_clip_uint8(value);
+ }
+ }
+}
+
+static void MCBlockDecMCNest(VideoState *state, MCPlane mcplanes[PLANE_COUNT], int32_t x, int32_t y)
+{
+ uint32_t hpel_dx = x & 1;
+ uint32_t hpel_dy = y & 1;
+ void *nest_data;
+
+ if (state->is_landscape)
+ nest_data = mcplanes[0].target + x/2 + (y/2 - 16)*mcplanes[0].target_stride - 32;
+ else
+ nest_data = mcplanes[0].target + x/2 + (y/2 - 32)*mcplanes[0].target_stride - 16;
+ for (int plane_idx = 0; plane_idx < PLANE_COUNT; plane_idx++) {
+ MCPlane *mcplane = &mcplanes[plane_idx];
+ HVQPlaneDesc *plane = &state->planes[plane_idx];
+ for (int i = 0; i < plane->blocks_per_mcb; i++) {
+ BlockData const *ptr = mcplane->payload_cur_blk;
+ uint8_t block_type = ptr[plane->mcb_offset[i]].type & 0xF;
+ uint8_t *dst = mcplane->top + plane->px_offset[i] + plane->py_offset[i] * mcplane->top_stride;
+ ptrdiff_t stride = mcplane->top_stride;
+
+ if (block_type == 6) {
+ read_block(state, dst, stride, plane_idx);
+ } else {
+ int32_t plane_dx = x >> plane->width_shift;
+ int32_t plane_dy = y >> plane->height_shift;
+ uint8_t const *src = mcplane->target + (plane_dy >> 1) * mcplane->target_stride + (plane_dx >> 1) +
+ plane->px_offset[i] + plane->py_offset[i] * mcplane->target_stride;
+
+ if (state->padding[0]) {
+ hpel_dx = plane_dx & 1;
+ hpel_dy = plane_dy & 1;
+ }
+
+ if (block_type == 0) {
+ do_motion_comp(dst, stride, src, stride, hpel_dx, hpel_dy);
+ } else {
+ uint32_t strideY = mcplanes[0].target_stride;
+ PrediAotBlock(state, dst, src, stride, block_type, nest_data, strideY, plane_idx, hpel_dx, hpel_dy);
+ }
+ }
+ }
+ }
+}
+
+static void motion_comp(VideoState *state, MCPlane mcplanes[PLANE_COUNT], int32_t dx, int32_t dy)
+{
+ uint32_t hpel_dx = dx & 1;
+ uint32_t hpel_dy = dy & 1;
+
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ MCPlane *mcplane = &mcplanes[i];
+ HVQPlaneDesc *plane = &state->planes[i];
+ int32_t plane_dx = dx >> plane->width_shift;
+ int32_t plane_dy = dy >> plane->height_shift;
+ uint8_t *ptr = mcplane->target + (plane_dy >> 1) * mcplane->target_stride + (plane_dx >> 1);
+
+ if (state->padding[0]) {
+ hpel_dx = plane_dx & 1;
+ hpel_dy = plane_dy & 1;
+ }
+
+ for (int j = 0; j < plane->blocks_per_mcb; j++) {
+ do_motion_comp(mcplane->top + plane->px_offset[j] + plane->py_offset[j] * mcplane->top_stride,
+ mcplane->top_stride,
+ ptr + plane->px_offset[j] + plane->py_offset[j] * mcplane->target_stride,
+ mcplane->target_stride,
+ hpel_dx, hpel_dy);
+ }
+ }
+}
+
+static void decode_bframe_plane(SeqObj *seqobj, AVFrame *present, AVFrame *past, AVFrame *future)
+{
+ MCPlane mcplanes[PLANE_COUNT];
+ VideoState *state = seqobj->state;
+ int32_t reference_frame = -1;
+ int32_t mv_h, mv_v;
+
+ init_mc_handler(state, mcplanes, present, past, future);
+ spread_PB_descMap(seqobj, mcplanes);
+ resetMCHandler(state, mcplanes, present);
+ for (int y = 0; y < seqobj->height; y += 8) { // MC blocks are 8x8 pixels
+ setMCTop(mcplanes);
+ for (int x = 0; x < seqobj->width; x += 8) {
+ uint8_t bits = mcplanes[0].payload_cur_blk->type;
+ // 0: intra
+ // 1: inter - past
+ // 2: inter - future
+ // see getMCBtype()
+ int8_t new_reference_frame = (bits >> 5) & 3;
+ if (new_reference_frame == 0)
+ {
+ // intra
+ MCBlockDecDCNest(state, mcplanes);
+ } else {
+ int mcb_proc;
+ uint32_t ref_x;
+ uint32_t ref_y;
+
+ new_reference_frame--;
+ // check if we need to update the reference frame pointers
+ if (new_reference_frame != reference_frame) {
+ reference_frame = new_reference_frame;
+ setMCTarget(mcplanes, reference_frame);
+ mv_h = 0;
+ mv_v = 0;
+ }
+
+ getMVector(&mv_h, &state->mv_h, state->mc_residual_bits_h[reference_frame]);
+ getMVector(&mv_v, &state->mv_v, state->mc_residual_bits_v[reference_frame]);
+
+ // compute half-pixel position of reference macroblock
+ ref_x = x * 2 + mv_h;
+ ref_y = y * 2 + mv_v;
+
+ // see getMCBproc()
+ mcb_proc = (bits >> 4) & 1;
+ if (mcb_proc == 0)
+ MCBlockDecMCNest(state, mcplanes, ref_x, ref_y);
+ else
+ motion_comp(state, mcplanes, ref_x, ref_y);
+ }
+ setMCNextBlk(mcplanes);
+ }
+ setMCDownBlk(mcplanes);
+ }
+}
+
+static int decode_bframe(SeqObj *seqobj, GetBitContext *gb, AVFrame *present, AVFrame *past, AVFrame *future)
+{
+ VideoState *state = seqobj->state;
+ int ret;
+
+ state->dc_shift = get_bits(gb, 8);
+ state->unk_shift = get_bits(gb, 8);
+ state->mc_residual_bits_h[0] = get_bits(gb, 8);
+ state->mc_residual_bits_v[0] = get_bits(gb, 8);
+ state->mc_residual_bits_h[1] = get_bits(gb, 8);
+ state->mc_residual_bits_v[1] = get_bits(gb, 8);
+ skip_bits_long(gb, 16);
+
+ for (int i = 0; i < 2; i++) {
+ ret = get_code(&state->basis_num[i].gb, gb, 82);
+ if (ret < 0)
+ return ret;
+ ret = get_code(&state->basis_num_run[i].gb, gb, 82);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (int i = 0; i < PLANE_COUNT; i++) {
+ ret = get_code(&state->dc_values[i].gb, gb, 82);
+ if (ret < 0)
+ return ret;
+ ret = get_code(&state->bufTree0[i].gb, gb, 82);
+ if (ret < 0)
+ return ret;
+ ret = get_code(&state->fixvl[i], gb, 82);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = get_code(&state->mv_h.gb, gb, 82);
+ if (ret < 0)
+ return ret;
+ ret = get_code(&state->mv_v.gb, gb, 82);
+ if (ret < 0)
+ return ret;
+ ret = get_code(&state->mcb_type.gb, gb, 82);
+ if (ret < 0)
+ return ret;
+ ret = get_code(&state->mcb_proc.gb, gb, 82);
+ if (ret < 0)
+ return ret;
+
+ ret = build_huff(&state->basis_num[0], 0, 0);
+ if (ret < 0)
+ return ret;
+ ret = build_huff(&state->basis_num_run[0], 0, 0);
+ if (ret < 0)
+ return ret;
+ ret = build_huff(&state->dc_values[0], 1, state->dc_shift);
+ if (ret < 0)
+ return ret;
+ ret = build_huff(&state->bufTree0[0], 0, 2);
+ if (ret < 0)
+ return ret;
+ ret = build_huff(&state->mv_h, 1, 0);
+ if (ret < 0)
+ return ret;
+ ret = build_huff(&state->mcb_type, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ state->dc_max = 0x7F << state->dc_shift;
+ state->dc_min = -(0x80 << state->dc_shift);
+
+ decode_bframe_plane(seqobj, present, past, future);
+
+ return 0;
+}
+
+static int hvqm4_decode(AVCodecContext *avctx, void *data,
+ int *got_frame, AVPacket *pkt)
+{
+ HVQM4Context *s = avctx->priv_data;
+ GetBitContext *gb = &s->gb;
+ AVFrame *frame = s->frame[0];
+ int frame_type;
+ int ret;
+
+ s->state.padding[0] = 1;
+
+ if ((ret = init_get_bits8(gb, pkt->data, pkt->size)) < 0)
+ return ret;
+
+ frame_type = get_bits(gb, 16);
+ if (frame_type != B_FRAME)
+ FFSWAP(AVFrame *, s->frame[1], s->frame[2]);
+
+ if ((ret = ff_reget_buffer(avctx, frame, 0)) < 0)
+ return ret;
+
+ skip_bits_long(gb, 32);
+ switch (frame_type) {
+ case I_FRAME:
+ frame->pict_type = AV_PICTURE_TYPE_I;
+ frame->key_frame = 1;
+ ret = decode_iframe(&s->seqobj, gb, frame);
+ break;
+ case P_FRAME:
+ frame->pict_type = AV_PICTURE_TYPE_P;
+ frame->key_frame = 0;
+ if (!s->frame[1]->data[0])
+ return AVERROR_INVALIDDATA;
+ ret = decode_bframe(&s->seqobj, gb, frame, s->frame[1], frame);
+ break;
+ case B_FRAME:
+ frame->pict_type = AV_PICTURE_TYPE_B;
+ frame->key_frame = 0;
+ if (!s->frame[1]->data[0] ||
+ !s->frame[2]->data[0])
+ return AVERROR_INVALIDDATA;
+ ret = decode_bframe(&s->seqobj, gb, frame, s->frame[1], s->frame[2]);
+ break;
+ default:
+ return AVERROR_INVALIDDATA;
+ }
+
+ if (ret < 0)
+ return ret;
+
+ ret = av_frame_ref(data, frame);
+ if (ret < 0)
+ return ret;
+
+ if (frame_type != B_FRAME)
+ FFSWAP(AVFrame *, s->frame[0], s->frame[2]);
+
+ *got_frame = 1;
+
+ return 0;
+}
+
+static void hvqm4_flush(AVCodecContext *avctx)
+{
+ HVQM4Context *s = avctx->priv_data;
+
+ for (int i = 0; i < 3; i++)
+ av_frame_unref(s->frame[i]);
+}
+
+static av_cold int hvqm4_close(AVCodecContext *avctx)
+{
+ HVQM4Context *s = avctx->priv_data;
+
+ av_freep(&s->buffer);
+ for (int i = 0; i < 6; i++)
+ ff_free_vlc(&s->state.vlc[i]);
+ for (int i = 0; i < 3; i++)
+ av_frame_free(&s->frame[i]);
+
+ return 0;
+}
+
+const AVCodec ff_hvqm4_decoder = {
+ .name = "hvqm4",
+ .long_name = NULL_IF_CONFIG_SMALL("HVQM4 Video"),
+ .type = AVMEDIA_TYPE_VIDEO,
+ .id = AV_CODEC_ID_HVQM4,
+ .priv_data_size = sizeof(HVQM4Context),
+ .init = hvqm4_init,
+ .decode = hvqm4_decode,
+ .flush = hvqm4_flush,
+ .close = hvqm4_close,
+ .capabilities = AV_CODEC_CAP_DR1,
+ .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
+};
More information about the ffmpeg-cvslog
mailing list