[FFmpeg-devel] [PATCH v9 1/5] avcodec/jpegxl: add Jpeg XL image codec and parser

Andreas Rheinhardt andreas.rheinhardt at outlook.com
Wed Mar 23 16:45:15 EET 2022


Leo Izen:
> This commit adds support to libavcodec to read and parse
> encoded Jpeg XL images. Jpeg XL is intended to be an
> extended-life replacement to legacy mjpeg.
> ---
>  MAINTAINERS                |   2 +
>  libavcodec/Makefile        |   1 +
>  libavcodec/codec_desc.c    |   9 +
>  libavcodec/codec_id.h      |   1 +
>  libavcodec/jpegxl.h        |  43 ++
>  libavcodec/jpegxl_parser.c | 941 +++++++++++++++++++++++++++++++++++++
>  libavcodec/parsers.c       |   1 +
>  libavcodec/version.h       |   2 +-
>  8 files changed, 999 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/jpegxl.h
>  create mode 100644 libavcodec/jpegxl_parser.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 931cf4bd2c..2e0de9e224 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -188,6 +188,7 @@ Codecs:
>    interplayvideo.c                      Mike Melanson
>    jni*, ffjni*                          Matthieu Bouron
>    jpeg2000*                             Nicolas Bertrand
> +  jpegxl.h, jpegxl_parser.c             Leo Izen
>    jvdec.c                               Peter Ross
>    lcl*.c                                Roberto Togni, Reimar Doeffinger
>    libcelt_dec.c                         Nicolas George
> @@ -616,6 +617,7 @@ Haihao Xiang (haihao)         1F0C 31E8 B4FE F7A4 4DC1 DC99 E0F5 76D4 76FC 437F
>  Jaikrishnan Menon             61A1 F09F 01C9 2D45 78E1 C862 25DC 8831 AF70 D368
>  James Almer                   7751 2E8C FD94 A169 57E6 9A7A 1463 01AD 7376 59E0
>  Jean Delvare                  7CA6 9F44 60F1 BDC4 1FD2 C858 A552 6B9B B3CD 4E6A
> +Leo Izen (thebombzen)         B6FD 3CFC 7ACF 83FC 9137 6945 5A71 C331 FD2F A19A
>  Loren Merritt                 ABD9 08F4 C920 3F65 D8BE 35D7 1540 DAA7 060F 56DE
>  Lynne                         FE50 139C 6805 72CA FD52 1F8D A2FE A5F0 3F03 4464
>  Michael Niedermayer           9FF2 128B 147E F673 0BAD F133 611E C787 040B 0FAB
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index fb8b0e824b..3723601b3d 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -44,6 +44,7 @@ OBJS = ac3_parser.o                                                     \
>         dv_profile.o                                                     \
>         encode.o                                                         \
>         imgconvert.o                                                     \
> +       jpegxl_parser.o                                                  \
>         jni.o                                                            \
>         mathtables.o                                                     \
>         mediacodec.o                                                     \
> diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
> index 81f3b3c640..1b82870aaa 100644
> --- a/libavcodec/codec_desc.c
> +++ b/libavcodec/codec_desc.c
> @@ -1863,6 +1863,15 @@ static const AVCodecDescriptor codec_descriptors[] = {
>          .long_name = NULL_IF_CONFIG_SMALL("GEM Raster image"),
>          .props     = AV_CODEC_PROP_LOSSY,
>      },
> +    {
> +        .id        = AV_CODEC_ID_JPEGXL,
> +        .type      = AVMEDIA_TYPE_VIDEO,
> +        .name      = "jpegxl",
> +        .long_name = NULL_IF_CONFIG_SMALL("JPEG XL"),
> +        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
> +                     AV_CODEC_PROP_LOSSLESS,
> +        .mime_types= MT("image/jxl"),
> +    },
>  
>      /* various PCM "codecs" */
>      {
> diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
> index 3ffb9bd22e..dbc4f3a208 100644
> --- a/libavcodec/codec_id.h
> +++ b/libavcodec/codec_id.h
> @@ -308,6 +308,7 @@ enum AVCodecID {
>      AV_CODEC_ID_SIMBIOSIS_IMX,
>      AV_CODEC_ID_SGA_VIDEO,
>      AV_CODEC_ID_GEM,
> +    AV_CODEC_ID_JPEGXL,
>  
>      /* various PCM "codecs" */
>      AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
> diff --git a/libavcodec/jpegxl.h b/libavcodec/jpegxl.h
> new file mode 100644
> index 0000000000..4f93c99687
> --- /dev/null
> +++ b/libavcodec/jpegxl.h
> @@ -0,0 +1,43 @@
> +/*
> + * JPEG XL header
> + * Copyright (c) 2021 Leo Izen <leo.izen at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * JPEG XL header
> + */
> +
> +#ifndef AVCODEC_JPEGXL_H
> +#define AVCODEC_JPEGXL_H
> +
> +#include <stdint.h>
> +
> +/* these are also used in avformat/img2dec.c */
> +#define FF_JPEGXL_CODESTREAM_SIGNATURE_LE 0x0aff
> +#define FF_JPEGXL_CODESTREAM_SIGNATURE_BE 0xff0a
> +#define FF_JPEGXL_CONTAINER_SIGNATURE_LE 0x204c584a0c000000
> +#define FF_JPEGXL_CONTAINER_SIGNATURE_BE 0x0000000c4a584c20
> +
> +/**
> + * @return 0 upon valid, nonzero upon some parse error

Better: return a value >= 0 if valid, < 0 upon error to future-proof this.

> + */
> +int avpriv_jpegxl_verify_codestream_header(void *avctx, uint8_t *buf, int buflen);

Should be const uint8_t *buf (your probe function added later will be
const-incorrect otherwise).

> +
> +#endif /* AVCODEC_JPEGXL_H */
> diff --git a/libavcodec/jpegxl_parser.c b/libavcodec/jpegxl_parser.c
> new file mode 100644
> index 0000000000..13d1b176cd
> --- /dev/null
> +++ b/libavcodec/jpegxl_parser.c
> @@ -0,0 +1,941 @@
> +/*
> + * JPEG XL parser
> + * Copyright (c) 2021 Leo Izen <leo.izen at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * JPEG XL parser
> + */
> +
> +#include <inttypes.h>
> +#include <stdlib.h>
> +
> +#include "libavutil/error.h"
> +#include "libavutil/intreadwrite.h"
> +#include "libavutil/macros.h"
> +#include "libavutil/mem.h"

This header is unnecessary now.

> +
> +#define BITSTREAM_READER_LE
> +
> +#include "codec_id.h"
> +#include "config.h"
> +#include "get_bits.h"
> +#include "jpegxl.h"
> +#include "parser.h"
> +
> +enum JpegXLExtraChannelType {
> +    FF_JPEGXL_CT_ALPHA = 0,
> +    FF_JPEGXL_CT_DEPTH,
> +    FF_JPEGXL_CT_SPOT_COLOR,
> +    FF_JPEGXL_CT_SELECTION_MASK,
> +    FF_JPEGXL_CT_BLACK,
> +    FF_JPEGXL_CT_CFA,
> +    FF_JPEGXL_CT_THERMAL,
> +    FF_JPEGXL_CT_NON_OPTIONAL = 15,
> +    FF_JPEGXL_CT_OPTIONAL
> +};
> +
> +enum JpegXLColorSpace {
> +    FF_JPEGXL_CS_RGB = 0,
> +    FF_JPEGXL_CS_GRAY,
> +    FF_JPEGXL_CS_XYB,
> +    FF_JPEGXL_CS_UNKNOWN
> +};
> +
> +enum JpegXLWhitePoint {
> +    FF_JPEGXL_WP_D65 = 1,
> +    FF_JPEGXL_WP_CUSTOM,
> +    FF_JPEGXL_WP_E = 10,
> +    FF_JPEGXL_WP_DCI = 11
> +};
> +
> +enum JpegXLPrimaries {
> +    FF_JPEGXL_PR_SRGB = 1,
> +    FF_JPEGXL_PR_CUSTOM,
> +    FF_JPEGXL_PR_2100 = 9,
> +    FF_JPEGXL_PR_P3 = 11,
> +};
> +
> +enum JpegXLTransferFunction {
> +    FF_JPEGXL_TF_709 = 1,
> +    FF_JPEGXL_TF_UNKNOWN,
> +    FF_JPEGXL_TF_LINEAR = 8,
> +    FF_JPEGXL_TF_SRGB = 13,
> +    FF_JPEGXL_TF_PQ = 16,
> +    FF_JPEGXL_TF_DCI,
> +    FF_JPEGXL_TF_HLG
> +};
> +
> +enum JpegXLRenderingIntent {
> +    FF_JPEGXL_RI_PERCEPTUAL = 0,
> +    FF_JPEGXL_RI_RELATIVE,
> +    FF_JPEGXL_RI_SATURATION,
> +    FF_JPEGXL_RI_ABSOLUTE
> +};
> +
> +typedef struct JpegXLExtraChannelInfo {
> +    enum JpegXLExtraChannelType type;
> +    uint32_t bits_per_sample;
> +    uint32_t exp_bits_per_sample;
> +    uint32_t dim_shift;
> +    size_t name_len;
> +    int alpha_associated;
> +    float red;
> +    float green;
> +    float blue;
> +    float solidity;
> +    uint32_t cfa_channel;
> +} JpegXLExtraChannelInfo;
> +
> +typedef struct JpegXLHeader {
> +    uint32_t width;
> +    uint32_t height;
> +    int orientation;
> +    /* zero if not present */
> +    uint32_t intrinsic_width;
> +    uint32_t intrinsic_height;
> +    uint32_t preview_width;
> +    uint32_t preview_height;
> +    /* BEGIN animation header */
> +    uint32_t anim_tb_num;
> +    uint32_t anim_tb_denom;
> +    uint32_t anim_loop_count;
> +    int anim_have_pts;
> +    /* END animation header */
> +
> +    uint32_t bits_per_sample;
> +    uint32_t exp_bits_per_sample;
> +
> +    int modular_16bit_buffers;
> +
> +    uint32_t num_extra_channels;
> +
> +    /*
> +     * an array of extra channel info
> +     * with length num_extra_channels
> +     * this is not NULL-terminated
> +     */
> +    JpegXLExtraChannelInfo extra_channel_info[256];
> +
> +    int xyb_encoded;
> +
> +    /* BEGIN color encoding bundle */
> +    int have_icc_profile;
> +    enum JpegXLColorSpace color_space;
> +    enum JpegXLWhitePoint white_point;
> +    uint32_t white_ux;
> +    uint32_t white_uy;
> +    enum JpegXLPrimaries primaries;
> +    uint32_t red_ux;
> +    uint32_t red_uy;
> +    uint32_t green_ux;
> +    uint32_t green_uy;
> +    uint32_t blue_ux;
> +    uint32_t blue_uy;
> +    /*
> +     * if this is less than 1 << 24,
> +     * then interpret it as a gamma value
> +     * If this is greater than or equal to 1 << 24,
> +     * then subtract 1 << 24 and interpret it as a
> +     * an enum JpegXLTransferFunction
> +     */
> +    int have_gamma;
> +    uint32_t transfer_function;
> +    enum JpegXLRenderingIntent rendering_intent;
> +    /* END color encoding bundle */
> +
> +    /* BEGIN tone mapping bundle */
> +    float intensity_target;
> +    float min_nits;
> +    int relative_to_max_display;
> +    float linear_below;
> +    /* END tone mapping bundle */
> +
> +    /* each extension bit determines which extension matters */
> +    uint64_t extensions;
> +    uint64_t extension_bits[64];
> +
> +    int default_transform;
> +
> +    int have_opsin_inv;
> +    float opsin_inverse_matrix[16];
> +
> +    uint32_t cw_mask;
> +    float up2_weight[15];
> +    float up4_weight[55];
> +    float up8_weight[210];

These are not used atm.

> +
> +    /*
> +     * this is not provided by the header,
> +     * but rather, by the container
> +     * raw Jpeg XL Codestreams are level 5
> +     * the container can choose to up it to 10
> +     */
> +    int level;
> +
> +} JpegXLHeader;
> +
> +typedef struct JpegXLParseContext {
> +    ParseContext pc;
> +    GetBitContext gb;
> +    int box_size;
> +    uint32_t box_tag;
> +    int box_index;
> +    int level;
> +    int container;
> +    int found_codestream;
> +} JpegXLParseContext;
> +
> +#define jxl_bits(n) jpegxl_get_bits(NULL, jxlr, (n))
> +#define jxl_u32(c0, c1, c2, c3, u0, u1, u2, u3) jpegxl_u32(jxlr, \
> +    (uint32_t[]){c0, c1, c2, c3}, (uint32_t[]){u0, u1, u2, u3})

const uint32_t[]

> +#define jxl_enum() jxl_u32(0, 1, 2, 18, 0, 0, 4, 6)
> +
> +#define jxl_parse_errv(type, ...) av_log(avctx, AV_LOG_DEBUG, \
> +    "At position: %d, invalid " type "\n", jxlr->gb.index, __VA_ARGS__)
> +
> +#define jxl_parse_err(msg) jxl_parse_errv("%s", (msg))
> +
> +static int jpegxl_skip_boxes(void *avctx, JpegXLParseContext *jxlr)
> +{
> +    uint64_t size = 0;
> +    uint32_t tag = 0;
> +    char tag_str[5];
> +    int remaining;
> +    while (1) {
> +        if (jxlr->box_index < 0) {
> +            if (jxlr->box_size > 1) {
> +                size = jxlr->box_size / 8 + 8;
> +                tag = jxlr->box_tag;
> +            } else if (jxlr->box_size == 1) {
> +                size = 1;
> +                tag = jxlr->box_tag;
> +            } else {
> +                size = 0;
> +            }
> +            jxlr->box_index = 0;
> +        } else {
> +            size = 0;
> +        }
> +
> +        remaining = jxlr->gb.size_in_bits - jxlr->gb.index;
> +        remaining -= remaining % 8;

If I am not mistaken, then the getbits reader should always be
byte-aligned at this point. So the second statement is redundant.
If you are not aligned, then I don't get this code: You are aligning
your remaining size as if you wanted to align the GetBitContext, but you
never align the GetBitContext.

> +        if (remaining < 64)
> +            goto box_eof;
> +
> +        if (size == 0) {
> +            tag = 0;
> +            for (int k = 0; k < 4; k++)
> +                size = (size << 8) | get_bits(&jxlr->gb, 8);

If I am right about the alignment, then you could just use AV_RB32 here
and below; and AV_RB64() for reading the 64-bit size, too.

> +            remaining -= 32;
> +        }
> +        if (tag == 0) {
> +            for (int k = 0; k < 4; k++)
> +                tag = (tag << 8) | get_bits(&jxlr->gb, 8);
> +            remaining -= 32;
> +        }
> +
> +        /* extra 64-bit size field */
> +        if (size == 1) {
> +            if (remaining < 64)
> +                goto box_eof;
> +            size = 0;
> +            for (int k = 0; k < 8; k++)
> +                size = (size << 8) | get_bits(&jxlr->gb, 8);
> +            size -= 8;

Potential for wraparound here.

> +            remaining -= 64;
> +        }
> +
> +        if (size > 0 && size < 9)
> +            return 1; /* invalid ISOBMFF box size */

Really? A box with no payload is legal IIRC. The check should be "size < 8".

> +
> +        /* box too big for GetBitContext */
> +        /* size - 40 => (size - 8) - 32 */
> +        if (size > 40 && size - 40 > INT_MAX / 8)

Where does the 32 come from? If you want to use
AV_INPUT_BUFFER_PADDING_SIZE, use it. (But probably get_bits.h should
define a maximum of the buffer sizes it can handle.)
Anyway, the above is equivalent to "size > INT_MAX / 8 - 40".

> +            return 1;
> +
> +        /* turn size into something the parser can use */
> +        if (size > 0)
> +            size = (size - 8) * 8;
> +
> +        /* partial jxl codestream box */
> +        if (tag == MKBETAG('j','x','l','p')) {
> +            if (remaining < 32)
> +                goto box_eof;
> +            /* 32-bit box index, we ignore it */
> +            skip_bits_long(&jxlr->gb, 32);
> +            remaining -= 32;
> +            break;
> +        }
> +        /* full jxl codestream box */
> +        if (tag == MKBETAG('j','x','l','c'))
> +            break;
> +        /* jxl level box */
> +        if (tag == MKBETAG('j','x','l','l')) {
> +            if (size != 8)
> +                return 1; /* illegal jxll box */
> +            if (remaining < 8)
> +                goto box_eof;
> +            jxlr->level = get_bits(&jxlr->gb, 8);
> +            remaining -= 8;
> +            continue;
> +        }
> +        /* any other box is skipped at this point */
> +        AV_WB32(tag_str, tag);
> +        av_log(avctx, AV_LOG_VERBOSE, "skipping jxl container box: %s\n", tag_str);

1. tag_str is potentially not-zero terminated.
2. If tag_str contains a \0, it might get truncated; it would be better
to just report it as hex with %X or so.
3. And actually I don't think that this should be reported at all.

> +       
> +        /* zero size means -> eof, nothing more to skip */
> +        if (size == 0)
> +            break;
> +
> +        if (size - 1 > remaining) {
> +            skip_bits_long(&jxlr->gb, remaining);
> +            size -= remaining;
> +            goto box_eof;
> +        }
> +
> +        skip_bits_long(&jxlr->gb, size);
> +    }
> +
> +    jxlr->box_size = size;
> +    jxlr->box_tag = tag;
> +    return 0;
> +
> +box_eof:
> +    jxlr->box_size = size;
> +    jxlr->box_tag = tag;
> +    return 2;
> +}
> +
> +/*
> + * get from 1-64 bits from a JpegXLParseContext
> + */
> +static uint64_t jpegxl_get_bits(void *avctx, JpegXLParseContext *jxlr, int bits)
> +{
> +    if (jxlr->box_size) {
> +        if (bits > jxlr->box_size) {
> +            int remaining = jxlr->box_size;
> +            uint64_t ret = jpegxl_get_bits(avctx, jxlr, remaining);
> +            /* go to the next box */
> +            int status = jpegxl_skip_boxes(avctx, jxlr);
> +            if (status)
> +                return 0;
> +            ret |= jpegxl_get_bits(avctx, jxlr, bits - remaining) << remaining;

What guarantees that there is not a sequence of boxes with a payload of
1 byte, so that a single read can span more than two boxes?

And does the file format really allow to split the payload into
different boxes at arbitrary positions?

> +            return ret;
> +        }
> +        jxlr->box_size -= bits;
> +    }
> +    return get_bits64(&jxlr->gb, bits);

As far as I can see, only skips exceed 32 bits.

> +}
> +
> +static uint32_t jpegxl_u32(JpegXLParseContext *jxlr,
> +                           const uint32_t constants[4], const uint32_t ubits[4])
> +{
> +    uint32_t ret, choice = jxl_bits(2);
> +    ret = constants[choice];
> +    if (ubits[choice])
> +        ret += jxl_bits(ubits[choice]);
> +    return ret;
> +}
> +
> +static uint64_t jpegxl_u64(JpegXLParseContext *jxlr)
> +{
> +    uint64_t shift = 12, ret;
> +    switch (jxl_bits(2)) {
> +    case 0:
> +        ret = 0;
> +        break;
> +    case 1:
> +        ret = 1 + jxl_bits(4);
> +        break;
> +    case 2:
> +        ret = 17 + jxl_bits(8);
> +        break;
> +    case 3:
> +        ret = jxl_bits(12);
> +        while (jxl_bits(1)) {
> +            if (shift < 60) {
> +                ret |= jxl_bits(8) << shift;
> +                shift += 8;
> +            } else {
> +                ret |= jxl_bits(4) << shift;
> +                break;
> +            }
> +        }
> +        break;
> +    }
> +    return ret;
> +}
> +
> +static float jpegxl_f16(JpegXLParseContext *jxlr)
> +{
> +    uint32_t mantissa = jxl_bits(10) << 13;
> +    uint32_t biased_exponent = jxl_bits(5);
> +    if (biased_exponent == 31)
> +        mantissa |= 0xFF << 23;
> +    else
> +        mantissa |= ((biased_exponent - 15 + 127) & 0xFF) << 23;
> +    return av_int2float(mantissa);
> +}
> +
> +static uint32_t jpegxl_width_from_ratio(uint32_t height, int ratio)
> +{
> +    uint64_t height64 = height;
> +    switch (ratio) {
> +    case 1:
> +        return height;
> +    case 2:
> +        return (uint32_t)((height64 * 12) / 10);
> +    case 3:
> +        return (uint32_t)((height64 * 4) / 3);
> +    case 4:
> +        return (uint32_t)((height64 * 3) / 2);
> +    case 5:
> +        return (uint32_t)((height64 * 16) / 9);
> +    case 6:
> +        return (uint32_t)((height64 * 5) / 4);
> +    case 7:
> +        return (uint32_t)(height64 * 2);
> +    default:
> +        return 0; /* manual width */
> +    }
> +}
> +
> +
> +static int jpegxl_parse_size_header(JpegXLParseContext *jxlr,
> +        uint32_t *width, uint32_t *height)
> +{
> +    uint32_t w, h;
> +    if (jxl_bits(1)) {
> +        /* small size header */
> +        h = (jxl_bits(5) + 1) << 3;
> +        w = jpegxl_width_from_ratio(h, jxl_bits(3));
> +        if (!w)
> +            w = (jxl_bits(5) + 1) << 3;
> +    } else {
> +        /* large size header */
> +        h = 1 + jxl_u32(0, 0, 0, 0, 9, 13, 18, 30);
> +        w = jpegxl_width_from_ratio(h, jxl_bits(3));
> +        if (!w)
> +            w = 1 + jxl_u32(0, 0, 0, 0, 9, 13, 18, 30);
> +    }
> +    *width = w, *height = h;
> +    return 0;

Why does this pretend to be able to fail when it just can't?

> +}
> +
> +static int jpegxl_parse_preview_header(JpegXLParseContext *jxlr,
> +        uint32_t *width, uint32_t *height)
> +{
> +    uint32_t w, h;
> +    if (jxl_bits(1)) {
> +        /* coded height and width divided by eight */
> +        h = jxl_u32(16, 32, 1, 33, 0, 0, 5, 9) << 3;
> +        w = jpegxl_width_from_ratio(h, jxl_bits(3));
> +        if (!w)
> +            w = jxl_u32(16, 32, 1, 33, 0, 0, 5, 9) << 3;
> +    } else {
> +        /* full height and width coded */
> +        h = jxl_u32(1, 65, 321, 1345, 6, 8, 10, 12);
> +        w = jpegxl_width_from_ratio(h, jxl_bits(3));
> +        if (!w)
> +            w = jxl_u32(1, 65, 321, 1345, 6, 8, 10, 12);
> +    }
> +    *width = w, *height = h;
> +    return 0;
> +}
> +
> +static int jpegxl_parse_animation_header(JpegXLParseContext *jxlr,
> +    uint32_t *num, uint32_t *denom, uint32_t *count, int *have_pts)
> +{
> +    *num = jxl_u32(100, 1000, 1, 1, 0, 0, 10, 30);
> +    *denom = jxl_u32(1, 1001, 1, 1, 0, 0, 8, 10);
> +    *count = jxl_u32(0, 0, 0, 0, 0, 3, 16, 32);
> +    *have_pts = jxl_bits(1);
> +    return 0;
> +}
> +
> +static int jpegxl_parse_bit_depth(JpegXLParseContext *jxlr,
> +        uint32_t *depth, uint32_t *exp_depth)
> +{
> +    if (jxl_bits(1)) {
> +        /* float samples */
> +        *depth = jxl_u32(32, 16, 24, 1, 0, 0, 0, 6);
> +        *exp_depth = jxl_bits(4) + 1;
> +    } else {
> +        /* integer samples */
> +        *depth = jxl_u32(8, 10, 12, 1, 0, 0, 0, 6);
> +        *exp_depth = 0;
> +    }
> +    return 0;
> +}
> +
> +static int jpegxl_parse_extra_channel_info(JpegXLParseContext *jxlr,
> +        JpegXLExtraChannelInfo *info, int level)
> +{
> +    int status = 0;
> +    int all_default = jxl_bits(1);
> +
> +    if (!all_default) {
> +        info->type = jxl_enum();
> +        if (info->type > 63)
> +            return 1; /* enum types cannot be 64+ */
> +        status = jpegxl_parse_bit_depth(jxlr, &info->bits_per_sample, &info->exp_bits_per_sample);
> +        if (status)
> +            return status;
> +        info->dim_shift = jxl_u32(0, 3, 4, 1, 0, 0, 0, 3);
> +        /* max of name_len is 1071 = 48 + 2^10 - 1 */
> +        info->name_len = jxl_u32(0, 0, 16, 48, 0, 4, 5, 10);
> +    } else {
> +        info->type = FF_JPEGXL_CT_ALPHA;
> +        info->bits_per_sample = 8;
> +        info->exp_bits_per_sample = 0;
> +    }
> +
> +    /* skip over the name as it is not used */
> +    jxl_bits(8 * info->name_len);
> +
> +    info->alpha_associated = !all_default && info->type == FF_JPEGXL_CT_ALPHA && jxl_bits(1);
> +
> +    if (info->type == FF_JPEGXL_CT_SPOT_COLOR) {
> +        info->red = jpegxl_f16(jxlr);
> +        info->green = jpegxl_f16(jxlr);
> +        info->blue = jpegxl_f16(jxlr);
> +        info->solidity = jpegxl_f16(jxlr);
> +    }
> +
> +    if (info->type == FF_JPEGXL_CT_CFA)
> +        info->cfa_channel = jxl_u32(1, 0, 3, 19, 0, 2, 4, 8);
> +    else
> +        info->cfa_channel = 1;
> +
> +    if (info->type == FF_JPEGXL_CT_BLACK && level < 10)
> +        return 1;
> +
> +    return 0;
> +}
> +
> +/**
> + * Parse a JpegXL Codestream Header and read it into the argument Header
> + * @return 0 upon success, 1 upon error
> + */
> +static int jpegxl_parse_codestream_header(void *avctx,
> +                                          JpegXLParseContext *jxlr,
> +                                          JpegXLHeader *header)
> +{
> +    int all_default, extra_fields = 0, status;
> +
> +    /* signature check */
> +    if (jxl_bits(16) != FF_JPEGXL_CODESTREAM_SIGNATURE_LE) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed JPEG XL Signature Check\n");
> +        goto fail;
> +    }
> +
> +    status = jpegxl_parse_size_header(jxlr, &header->width, &header->height);
> +    if (status) {
> +        jxl_parse_err("size header");
> +        goto fail;
> +    }
> +
> +    if (jxlr->level < 10) {
> +        /* level 5 codestream */
> +        if (header->width > (1 << 18) || header->height > (1 << 18)
> +            || (header->width >> 4) * (header->height >> 4) > (1 << 20)) {
> +                jxl_parse_err("width or height or both");
> +                goto fail;
> +            }
> +        header->level = 5;
> +    } else {
> +        /* level 10 codestream */
> +        if (header->width > (1 << 30) || header->height > (1 << 30)
> +            || (header->width >> 14) * (header->height >> 14) > (1 << 12)) {
> +                jxl_parse_err("width or height or both");
> +                goto fail;
> +            }
> +        header->level = 10;
> +    }
> +
> +    all_default = jxl_bits(1);
> +
> +    if (!all_default)
> +        extra_fields = jxl_bits(1);
> +
> +    if (extra_fields) {
> +        header->orientation = jxl_bits(3);
> +        if (header->orientation > 3)
> +            FFSWAP(uint32_t, header->width, header->height);
> +
> +        /* intrinstic size */
> +        if (jxl_bits(1)) {
> +            status = jpegxl_parse_size_header(jxlr, &header->intrinsic_width, &header->intrinsic_height);
> +            if (status) {
> +                jxl_parse_err("intrinstic size header");
> +                goto fail;
> +            }
> +        }
> +
> +        /* preview header */
> +        if (jxl_bits(1)) {
> +            status = jpegxl_parse_preview_header(jxlr,
> +                &header->preview_width, &header->preview_height);
> +            if (status) {
> +                jxl_parse_err("preview header");
> +                goto fail;
> +            }
> +            if (header->preview_width > 4096 || header->preview_height > 4096) {
> +                jxl_parse_errv("preview header size %" PRIu32 ", %" PRIu32,
> +                    header->preview_width, header->preview_height);
> +                goto fail;
> +            }
> +        }
> +
> +        /* animation header */
> +        if (jxl_bits(1)) {
> +            status = jpegxl_parse_animation_header(jxlr,
> +                &header->anim_tb_num, &header->anim_tb_denom,
> +                &header->anim_loop_count, &header->anim_have_pts);
> +            if (status) {
> +                jxl_parse_err("animation header");
> +                goto fail;
> +            }
> +        }
> +
> +    }
> +
> +    if (!all_default) {
> +        status = jpegxl_parse_bit_depth(jxlr,
> +            &header->bits_per_sample, &header->exp_bits_per_sample);
> +        if (status) {
> +            jxl_parse_err("bit depth header");
> +            goto fail;
> +        }
> +
> +        header->modular_16bit_buffers = jxl_bits(1);
> +
> +        if (!header->modular_16bit_buffers && header->level < 10) {
> +            jxl_parse_err("modular 16bit buffers");
> +            goto fail;
> +        }
> +
> +        header->num_extra_channels = jxl_u32(0, 1, 2, 1, 0, 0, 4, 12);
> +        if (header->num_extra_channels > 256 ||
> +            header->level < 10 && header->num_extra_channels > 4) {
> +                jxl_parse_err("too many extra channels");
> +                goto fail;
> +        }
> +        for (uint32_t i = 0; i < header->num_extra_channels; i++) {
> +            status = jpegxl_parse_extra_channel_info(jxlr, &header->extra_channel_info[i], header->level);
> +            if (status) {
> +                jxl_parse_errv("extra channel number %" PRIu32, i);
> +                goto fail;
> +            }
> +        }
> +
> +        header->xyb_encoded = jxl_bits(1);
> +
> +        if (jxl_bits(1)) {
> +            /* all_default for color encoding */
> +            header->have_icc_profile = 0;
> +            header->color_space = FF_JPEGXL_CS_RGB;
> +            header->white_point = FF_JPEGXL_WP_D65;
> +            header->primaries = FF_JPEGXL_PR_SRGB;
> +            header->transfer_function = (1 << 24) + FF_JPEGXL_TF_SRGB;
> +            header->rendering_intent = FF_JPEGXL_RI_RELATIVE;
> +        } else {
> +            header->have_icc_profile = jxl_bits(1);
> +            header->color_space = jxl_enum();
> +            if (header->color_space > 63) {
> +                jxl_parse_errv("color space enum %" PRIu32, header->white_point);
> +                goto fail;
> +            }
> +            if (header->color_space != FF_JPEGXL_CS_XYB
> +                    && !header->have_icc_profile) {
> +                header->white_point = jxl_enum();
> +                if (header->white_point > 63) {
> +                    jxl_parse_errv("white point enum %" PRIu32, header->white_point);
> +                    goto fail;
> +                }
> +            } else {
> +                header->white_point = FF_JPEGXL_WP_D65;
> +            }
> +            if (header->white_point == FF_JPEGXL_WP_CUSTOM) {
> +                header->white_ux = jxl_u32(0, 524288, 1048576, 2097152, 19, 19, 20, 21);
> +                header->white_uy = jxl_u32(0, 524288, 1048576, 2097152, 19, 19, 20, 21);
> +            }
> +            if (header->color_space != FF_JPEGXL_CS_XYB
> +                    && header->color_space != FF_JPEGXL_CS_GRAY
> +                    && !header->have_icc_profile) {
> +                header->primaries = jxl_enum();
> +                if (header->primaries > 63) {
> +                    jxl_parse_errv("primaries enum %" PRIu32, header->primaries);
> +                    goto fail;
> +                }
> +            } else {
> +                header->primaries = FF_JPEGXL_PR_SRGB;
> +            }
> +            if (header->primaries == FF_JPEGXL_PR_CUSTOM) {
> +                header->red_ux = jxl_u32(0, 524288, 1048576, 2097152, 19, 19, 20, 21);
> +                header->red_uy = jxl_u32(0, 524288, 1048576, 2097152, 19, 19, 20, 21);
> +                header->green_ux = jxl_u32(0, 524288, 1048576, 2097152, 19, 19, 20, 21);
> +                header->green_uy = jxl_u32(0, 524288, 1048576, 2097152, 19, 19, 20, 21);
> +                header->blue_ux = jxl_u32(0, 524288, 1048576, 2097152, 19, 19, 20, 21);
> +                header->blue_uy = jxl_u32(0, 524288, 1048576, 2097152, 19, 19, 20, 21);
> +            }
> +            if (!header->have_icc_profile) {
> +                if (jxl_bits(1)) {
> +                    /* this is gamma */
> +                    header->transfer_function = jxl_bits(24);
> +                } else {
> +                    header->transfer_function = jxl_enum();
> +                    if (header->transfer_function > 63) {
> +                        jxl_parse_errv("transfer function enum %" PRIu32, header->transfer_function);
> +                        goto fail;
> +                    }
> +                    /*
> +                     * higher than the highest possible gamma value
> +                     * marks it as an enum isntead of gamma
> +                     */
> +                    header->transfer_function += 1 << 24;
> +                }
> +                header->rendering_intent = jxl_enum();
> +                if (header->rendering_intent > 63) {
> +                    jxl_parse_errv("rendering intent enum %" PRIu32, header->rendering_intent);
> +                    goto fail;
> +                }
> +            } else {
> +                header->transfer_function = (1 << 24) + FF_JPEGXL_TF_SRGB;
> +                header->rendering_intent = FF_JPEGXL_RI_RELATIVE;
> +            }
> +        }
> +
> +        /* intensity_target should be set to 255 without it,
> +         * but it's unused
> +         * lazy && works with this macro */
> +        if (extra_fields && !jxl_bits(1)) {
> +            /*
> +             * these are 16-bit floats
> +             * since these fields are not used at the moment,
> +             * we skip 16 bits for each instead of calling
> +             * jpegxl_f16(jxlr) and assigning
> +             */
> +            /* intensity target */
> +            jxl_bits(16);
> +            /* min nits */
> +            jxl_bits(16);
> +            /* relative to max display */
> +            jxl_bits(1);
> +            /* linear below */
> +            jxl_bits(16);
> +        }
> +
> +        header->extensions = jpegxl_u64(jxlr);
> +        if (header->extensions) {
> +            for (int i = 0; i < 64; i++) {
> +                if (header->extensions & (UINT64_C(1) << i))
> +                    header->extension_bits[i] = jpegxl_u64(jxlr);
> +            }
> +        }
> +
> +    } else {
> +        header->modular_16bit_buffers = 1;
> +        header->xyb_encoded = 1;
> +    }
> +
> +    header->default_transform = jxl_bits(1);
> +
> +    /* opsin_inverse_matrix skipped over because it is not used atm */
> +    if (!header->default_transform && header->xyb_encoded && !jxl_bits(1)) {
> +        header->have_opsin_inv = 1;
> +        jxl_bits(16 * 16);
> +    }
> +
> +    if (!header->default_transform)
> +        header->cw_mask = jxl_bits(3);
> +
> +    /*
> +     * up2_weight skipped over because it is not used atm
> +     */
> +    if (header->cw_mask & 1)
> +        jxl_bits(16 * 15);
> +
> +    /*
> +     * up4_weight skipped over because it is not used atm
> +     */
> +    if (header->cw_mask & 2)
> +        jxl_bits(16 * 55);
> +
> +    /*
> +     * up8_weight skipped over because it is not used atm
> +     */
> +    if (header->cw_mask & 4)
> +        jxl_bits(16 * 210);
> +
> +    /* zero pad to byte */
> +    if (!header->have_icc_profile) {
> +        int byte_remaining = 7 - (jxlr->gb.index - 1) % 8;
> +        if (byte_remaining && jxl_bits(byte_remaining)) {
> +            jxl_parse_err("zero padding to byte");
> +            goto fail;
> +        }
> +    }
> +
> +    /* bits consumed > buflen */
> +    if (jxlr->gb.index > jxlr->gb.size_in_bits) {
> +        jxl_parse_err("unexpected end of file");
> +        goto fail;
> +    }
> +
> +    return 0;
> +
> +fail:
> +    return 1;
> +}
> +
> +static int jpegxl_parse_header(void *avctx, JpegXLParseContext *jxlr, JpegXLHeader *header)
> +{
> +    int status;
> +    if (jxlr->container) {
> +        status = jpegxl_skip_boxes(avctx, jxlr);
> +        if (status)
> +            return status;
> +    }
> +    return jpegxl_parse_codestream_header(avctx, jxlr, header);
> +}
> +
> +int avpriv_jpegxl_verify_codestream_header(void *avctx, uint8_t *buf, int buflen)
> +{
> +    JpegXLParseContext jxlri = { 0 };
> +    JpegXLHeader header = { 0 };
> +    int status;
> +    init_get_bits8(&jxlri.gb, buf, buflen);
> +    jxlri.level = 5;
> +    status = jpegxl_parse_codestream_header(avctx, &jxlri, &header);
> +    return status;
> +}
> +
> +static enum AVPixelFormat jpegxl_header_get_pixfmt(JpegXLHeader *header)
> +{
> +    int alpha = 0;
> +    for (int i = 0; i < header->num_extra_channels; i++) {
> +        if (header->extra_channel_info[i].type == FF_JPEGXL_CT_ALPHA) {
> +            alpha = 1;
> +            break;
> +        }
> +    }
> +    if (header->color_space == FF_JPEGXL_CS_GRAY) {
> +        if (header->bits_per_sample <= 8)
> +            return alpha ? AV_PIX_FMT_YA8 : AV_PIX_FMT_GRAY8;
> +        if (header->bits_per_sample > 16 || header->exp_bits_per_sample)
> +            return alpha ? AV_PIX_FMT_NONE : AV_PIX_FMT_GRAYF32;
> +        return alpha ? AV_PIX_FMT_YA16 : AV_PIX_FMT_GRAY16;
> +    } else if (header->color_space == FF_JPEGXL_CS_RGB
> +            || header->color_space == FF_JPEGXL_CS_XYB) {
> +        if (header->bits_per_sample <= 8)
> +            return alpha ? AV_PIX_FMT_RGBA : AV_PIX_FMT_RGB24;
> +        if (header->bits_per_sample > 16 || header->exp_bits_per_sample)
> +            return alpha ? AV_PIX_FMT_GBRAPF32 : AV_PIX_FMT_GBRPF32;
> +        return alpha ? AV_PIX_FMT_RGBA64 : AV_PIX_FMT_RGB48;
> +    }
> +    return AV_PIX_FMT_NONE;
> +}
> +
> +static av_cold int jpegxl_parse_init(AVCodecParserContext *s1)
> +{
> +    s1->pict_type = AV_PICTURE_TYPE_NONE;
> +    return 0;
> +}
> +
> +static int jpegxl_parse(AVCodecParserContext *s1,
> +                     AVCodecContext *avctx,
> +                     const uint8_t **poutbuf, int *poutbuf_size,
> +                     const uint8_t *buf, int buf_size)
> +{
> +    JpegXLParseContext *jxlr = s1->priv_data;
> +    JpegXLHeader header = { 0 };
> +    int next = END_NOT_FOUND, status = 0;
> +    int i = 0;
> +
> +    *poutbuf_size = 0;
> +    *poutbuf = NULL;
> +
> +    if (buf_size == 0 || s1->flags & PARSER_FLAG_COMPLETE_FRAMES) {
> +        /* eof is a frame boundary */
> +        next = buf_size;
> +    } else if (!jxlr->pc.frame_start_found) {
> +        /* look for stream signature */
> +        uint64_t state64 = jxlr->pc.state64;
> +        for (; i < buf_size; i++) {
> +            state64 = (state64 << 8) | buf[i];
> +            if ((state64 & 0xFFFF) == FF_JPEGXL_CODESTREAM_SIGNATURE_BE) {
> +                i -= 1;
> +                jxlr->pc.frame_start_found = 1;
> +                jxlr->container = 0;
> +                jxlr->level = 5;
> +                break;
> +            }
> +            if (state64 == FF_JPEGXL_CONTAINER_SIGNATURE_BE) {
> +                i -= 7;
> +                jxlr->pc.frame_start_found = 1;
> +                jxlr->container = 1;
> +                jxlr->level = 5;
> +                break;
> +            }
> +        }
> +        jxlr->pc.state64 = state64;
> +    }
> +
> +    if (jxlr->pc.frame_start_found && !jxlr->found_codestream) {
> +        init_get_bits8(&jxlr->gb, buf + i, buf_size - i);
> +        status = jpegxl_parse_header(NULL, jxlr, &header);
> +        if (status == 0) {
> +            /* parsed successfully */
> +            s1->pict_type = AV_PICTURE_TYPE_I;
> +            s1->key_frame = 1;
> +            s1->width = avctx->width = header.width;
> +            s1->height = avctx->height = header.height;
> +            s1->format = avctx->pix_fmt = jpegxl_header_get_pixfmt(&header);
> +        }
> +        if (status == 2)
> +            /* need higher probesize */
> +            jxlr->found_codestream = 0;
> +        else
> +            jxlr->found_codestream = 1;
> +    }
> +
> +    if (ff_combine_frame(&jxlr->pc, next, &buf, &buf_size) < 0) {
> +        *poutbuf = NULL;
> +        *poutbuf_size = 0;
> +        return buf_size;
> +    }
> +
> +    jxlr->pc.frame_start_found = 0;
> +
> +    *poutbuf = buf + i;
> +    *poutbuf_size = buf_size - i;

Seems like the parser is discarding some data here (if i != 0).

> +
> +    return next;
> +}
> +
> +const AVCodecParser ff_jpegxl_parser = {
> +    .codec_ids      = { AV_CODEC_ID_JPEGXL },
> +    .priv_data_size = sizeof(JpegXLParseContext),
> +    .parser_init    = jpegxl_parse_init,
> +    .parser_parse   = jpegxl_parse,
> +    .parser_close   = ff_parse_close,
> +};
> diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
> index 6b40c18d80..18a40eceea 100644
> --- a/libavcodec/parsers.c
> +++ b/libavcodec/parsers.c
> @@ -52,6 +52,7 @@ extern const AVCodecParser ff_h264_parser;
>  extern const AVCodecParser ff_hevc_parser;
>  extern const AVCodecParser ff_ipu_parser;
>  extern const AVCodecParser ff_jpeg2000_parser;
> +extern const AVCodecParser ff_jpegxl_parser;
>  extern const AVCodecParser ff_mjpeg_parser;
>  extern const AVCodecParser ff_mlp_parser;
>  extern const AVCodecParser ff_mpeg4video_parser;
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index a744e7469f..26ee41eb1f 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -29,7 +29,7 @@
>  
>  #include "version_major.h"
>  
> -#define LIBAVCODEC_VERSION_MINOR  25
> +#define LIBAVCODEC_VERSION_MINOR  26
>  #define LIBAVCODEC_VERSION_MICRO 100
>  
>  #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \



More information about the ffmpeg-devel mailing list