[FFmpeg-devel] [PATCH] avformat/westwood_vqa: Store VQFL codebook chunks

Tue Sep 21 19:32:42 EEST 2021

Pekka Väänänen:
> High color 15-bit VQA3 video streams contain high level chunks with
> only codebook updates that shouldn't be considered new frames. Now
> the demuxer stores a reference to such VQFL chunks and returns them
> later along with a VQFR chunk with full frame data.
> ---
>  libavformat/westwood_vqa.c | 49 ++++++++++++++++++++++++++++++++++----
>  1 file changed, 45 insertions(+), 4 deletions(-)
> 
> diff --git a/libavformat/westwood_vqa.c b/libavformat/westwood_vqa.c
> index 77df007c11..587626ea67 100644
> --- a/libavformat/westwood_vqa.c
> +++ b/libavformat/westwood_vqa.c
> @@ -1,6 +1,7 @@
>  /*
>   * Westwood Studios VQA Format Demuxer
> - * Copyright (c) 2003 The FFmpeg project
> + * Copyright (c) 2003 Mike Melanson <melanson at pcisys.net>
> + * Copyright (c) 2021 Pekka Väänänen <pekka.vaananen at iki.fi>
>   *
>   * This file is part of FFmpeg.
>   *
> @@ -40,6 +41,7 @@
>  #define SND1_TAG MKBETAG('S', 'N', 'D', '1')
>  #define SND2_TAG MKBETAG('S', 'N', 'D', '2')
>  #define VQFR_TAG MKBETAG('V', 'Q', 'F', 'R')
> +#define VQFL_TAG MKBETAG('V', 'Q', 'F', 'L')
>  
>  /* don't know what these tags are for, but acknowledge their existence */
>  #define CINF_TAG MKBETAG('C', 'I', 'N', 'F')
> @@ -60,6 +62,8 @@ typedef struct WsVqaDemuxContext {
>      int sample_rate;
>      int audio_stream_index;
>      int video_stream_index;
> +    int64_t vqfl_chunk_pos;
> +    int vqfl_chunk_size;
>  } WsVqaDemuxContext;
>  
>  static int wsvqa_probe(const AVProbeData *p)
> @@ -120,6 +124,8 @@ static int wsvqa_read_header(AVFormatContext *s)
>      wsvqa->channels     = header[26];
>      wsvqa->bps          = header[27];
>      wsvqa->audio_stream_index = -1;
> +    wsvqa->vqfl_chunk_pos     = 0;
> +    wsvqa->vqfl_chunk_size    = 0;
>  
>      s->ctx_flags |= AVFMTCTX_NOHEADER;
>  
> @@ -172,11 +178,21 @@ static int wsvqa_read_packet(AVFormatContext *s,
>  
>          skip_byte = chunk_size & 0x01;
>  
> -        if ((chunk_type == SND0_TAG) || (chunk_type == SND1_TAG) ||
> +        if (chunk_type == VQFL_TAG) {
> +            /* Each VQFL chunk carries only a codebook update inside which must be applied
> +             * before the next VQFR is rendered. That's why we stash the VQFL offset here
> +             * so it can be combined with the next VQFR packet. This way each packet
> +             * includes a whole frame as expected. */
> +            wsvqa->vqfl_chunk_pos = avio_tell(pb);
> +            wsvqa->vqfl_chunk_size = (int)chunk_size;
> +            avio_skip(pb, chunk_size + skip_byte);
> +            continue;
> +        } else if ((chunk_type == SND0_TAG) || (chunk_type == SND1_TAG) ||
>              (chunk_type == SND2_TAG) || (chunk_type == VQFR_TAG)) {
>  
> -            ret= av_get_packet(pb, pkt, chunk_size);
> -            if (ret<0)
> +            ret = av_get_packet(pb, pkt, chunk_size);
> +
> +            if (ret < 0)

This is a cosmetic change. It should not be part of a commit for a
functional change (unless you already have to touch that line anyway (in
which case you should beautify the line)).

>                  return AVERROR(EIO);
>  
>              switch (chunk_type) {
> @@ -235,6 +251,31 @@ static int wsvqa_read_packet(AVFormatContext *s,
>                  }
>                  break;
>              case VQFR_TAG:
> +                /* if a new codebook is available inside an earlier a VQFL chunk then
> +                 * prepend it to 'pkt' */
> +                if (wsvqa->vqfl_chunk_size > 0) {
> +                    int64_t current_pos = pkt->pos;
> +                    int current_size = pkt->size;
> +
> +                    if (avio_seek(pb, wsvqa->vqfl_chunk_pos, SEEK_SET) < 0)
> +                        return AVERROR(EIO);
> +
> +                    /* the decoder expects chunks to be 16-bit aligned */
> +                    if (wsvqa->vqfl_chunk_size % 2 == 1)
> +                        wsvqa->vqfl_chunk_size++;
> +
> +                    if (av_get_packet(pb, pkt, wsvqa->vqfl_chunk_size) < 0)
> +                        return AVERROR(EIO);

This is executed after the earlier av_get_packet() and therefore the
contents of the chunk read earlier will leak.

Maybe you should read the earlier VQFL chunk into the packet when
encountering it and then append the current packet to it via
av_append_packet(), i.e. replace the ordinary av_get_packet() with
av_append_packet()? That way no seek would be necessary, but you need to
be careful not to leak anything.

> +
> +                    if (avio_seek(pb, current_pos, SEEK_SET) < 0)
> +                        return AVERROR(EIO);
> +
> +                    ret = av_append_packet(pb, pkt, current_size);
> +
> +                    wsvqa->vqfl_chunk_pos = 0;
> +                    wsvqa->vqfl_chunk_size = 0;
> +                }
> +
>                  pkt->stream_index = wsvqa->video_stream_index;
>                  pkt->duration = 1;
>                  break;
>