[FFmpeg-devel] [PATCH] avformat: Add support for embedding cover art in Ogg files

Zsolt Vadász zsolt_vadasz at protonmail.com
Tue Jan 3 12:41:53 EET 2023


On Sunday, January 1st, 2023 at 10:41 AM, Jean-Baptiste Kempf <jb at videolan.org> wrote:


> On Thu, 29 Dec 2022, at 22:05, Zsolt Vadász wrote:
> 
> > It's done similarly to how the flac muxer does it, so I reused most of
> > the code and adapted it.
> 
> 
> Would a common function make sense here?

Yes, I named it ff_flac_write_picture and put it in libavformat/flac_picture.c, here is the revised patch:

Signed-off-by: Zsolt Vadasz <zsolt_vadasz at protonmail.com>
---
 libavformat/flac_picture.c | 132 +++++++++++++++++++++++
 libavformat/flac_picture.h |   5 +
 libavformat/flacenc.c      |  90 +---------------
 libavformat/oggenc.c       | 207 ++++++++++++++++++++++++++++++-------
 4 files changed, 308 insertions(+), 126 deletions(-)

diff --git a/libavformat/flac_picture.c b/libavformat/flac_picture.c
index b33fee75b4..30152a2ba9 100644
--- a/libavformat/flac_picture.c
+++ b/libavformat/flac_picture.c
@@ -20,6 +20,9 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/avstring.h"
+#include "libavutil/base64.h"
+#include "libavutil/pixdesc.h"
 #include "libavcodec/bytestream.h"
 #include "libavcodec/png.h"
 #include "avformat.h"
@@ -188,3 +191,132 @@ fail:
 
     return ret;
 }
+
+int ff_flac_write_picture(struct AVFormatContext *s,
+                          int isogg,
+                          unsigned *attached_types,
+                          int audio_stream_idx, // unused if !isogg
+                          AVPacket *pkt)
+{
+    AVIOContext *pb = s->pb;
+    const AVPixFmtDescriptor *pixdesc;
+    const CodecMime *mime = ff_id3v2_mime_tags;
+    AVDictionaryEntry *e;
+    const char *mimetype = NULL, *desc = "";
+    const AVStream *st = s->streams[pkt->stream_index];
+    int i, mimelen, desclen, type = 0, blocklen;
+
+    if (!pkt->data)
+        return 0;
+
+    while (mime->id != AV_CODEC_ID_NONE) {
+        if (mime->id == st->codecpar->codec_id) {
+            mimetype = mime->str;
+            break;
+        }
+        mime++;
+    }
+    if (!mimetype) {
+        av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot "
+               "write an attached picture.\n", st->index);
+        return AVERROR(EINVAL);
+    }
+    mimelen = strlen(mimetype);
+
+    /* get the picture type */
+    e = av_dict_get(st->metadata, "comment", NULL, 0);
+    for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) {
+        if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) {
+            type = i;
+            break;
+        }
+    }
+
+    if (((*attached_types) & (1 << type)) & 0x6) {
+        av_log(s, AV_LOG_ERROR, "Duplicate attachment for type '%s'\n", ff_id3v2_picture_types[type]);
+        return AVERROR(EINVAL);
+    }
+
+    if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG ||
+                      st->codecpar->width != 32 ||
+                      st->codecpar->height != 32)) {
+        av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG");
+        return AVERROR(EINVAL);
+    }
+
+    *attached_types |= (1 << type);
+
+    /* get the description */
+    if ((e = av_dict_get(st->metadata, "title", NULL, 0)))
+        desc = e->value;
+    desclen = strlen(desc);
+
+    blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size;
+    if (blocklen >= 1<<24) {
+        av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24);
+        return AVERROR(EINVAL);
+    }
+
+    if(!isogg) {
+        avio_w8(pb, 0x06);
+        avio_wb24(pb, blocklen);
+
+        avio_wb32(pb, type);
+
+        avio_wb32(pb, mimelen);
+        avio_write(pb, mimetype, mimelen);
+
+        avio_wb32(pb, desclen);
+        avio_write(pb, desc, desclen);
+
+        avio_wb32(pb, st->codecpar->width);
+        avio_wb32(pb, st->codecpar->height);
+        if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
+            avio_wb32(pb, av_get_bits_per_pixel(pixdesc));
+        else
+            avio_wb32(pb, 0);
+        avio_wb32(pb, 0);
+
+        avio_wb32(pb, pkt->size);
+        avio_write(pb, pkt->data, pkt->size);
+    } else {
+        uint8_t *metadata_block_picture, *ptr;
+        int encoded_len, ret;
+        char *encoded;
+        AVStream *audio_stream = s->streams[audio_stream_idx];
+
+        metadata_block_picture = av_mallocz(blocklen);
+        ptr = metadata_block_picture;
+        bytestream_put_be32(&ptr, type);
+
+        bytestream_put_be32(&ptr, mimelen);
+        bytestream_put_buffer(&ptr, mimetype, mimelen);
+
+        bytestream_put_be32(&ptr, desclen);
+        bytestream_put_buffer(&ptr, desc, desclen);
+
+        bytestream_put_be32(&ptr, st->codecpar->width);
+        bytestream_put_be32(&ptr, st->codecpar->height);
+        if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
+            bytestream_put_be32(&ptr, av_get_bits_per_pixel(pixdesc));
+        else
+            bytestream_put_be32(&ptr, 0);
+        bytestream_put_be32(&ptr, 0);
+
+        bytestream_put_be32(&ptr, pkt->size);
+        bytestream_put_buffer(&ptr, pkt->data, pkt->size);
+
+        encoded_len = AV_BASE64_SIZE(blocklen);
+        encoded = av_mallocz(encoded_len);
+        av_base64_encode(encoded, encoded_len, metadata_block_picture, blocklen);
+        av_free(metadata_block_picture);
+
+        ret = av_dict_set(&audio_stream->metadata, "METADATA_BLOCK_PICTURE", encoded, 0);
+        av_free(encoded);
+        av_packet_unref(pkt);
+
+        if (ret < 0)
+            return ret;
+    }
+    return 0;
+}
diff --git a/libavformat/flac_picture.h b/libavformat/flac_picture.h
index db074e531d..efa11aee32 100644
--- a/libavformat/flac_picture.h
+++ b/libavformat/flac_picture.h
@@ -39,5 +39,10 @@
  */
 int ff_flac_parse_picture(AVFormatContext *s, uint8_t **buf, int buf_size,
                           int truncate_workaround);
+int ff_flac_write_picture(struct AVFormatContext *s,
+                          int isogg,
+                          unsigned *attached_types,
+                          int audio_stream_idx,
+                          AVPacket *pkt);
 
 #endif /* AVFORMAT_FLAC_PICTURE_H */
diff --git a/libavformat/flacenc.c b/libavformat/flacenc.c
index d7930f4a6e..ec26113bb2 100644
--- a/libavformat/flacenc.c
+++ b/libavformat/flacenc.c
@@ -32,6 +32,7 @@
 #include "internal.h"
 #include "version.h"
 #include "vorbiscomment.h"
+#include "flac_picture.h"
 
 
 typedef struct FlacMuxerContext {
@@ -78,94 +79,9 @@ static int flac_write_block_comment(AVIOContext *pb, AVDictionary **m,
     return 0;
 }
 
-static int flac_write_picture(struct AVFormatContext *s, AVPacket *pkt)
-{
-    FlacMuxerContext *c = s->priv_data;
-    AVIOContext *pb = s->pb;
-    const AVPixFmtDescriptor *pixdesc;
-    const CodecMime *mime = ff_id3v2_mime_tags;
-    AVDictionaryEntry *e;
-    const char *mimetype = NULL, *desc = "";
-    const AVStream *st = s->streams[pkt->stream_index];
-    int i, mimelen, desclen, type = 0, blocklen;
-
-    if (!pkt->data)
-        return 0;
-
-    while (mime->id != AV_CODEC_ID_NONE) {
-        if (mime->id == st->codecpar->codec_id) {
-            mimetype = mime->str;
-            break;
-        }
-        mime++;
-    }
-    if (!mimetype) {
-        av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot "
-               "write an attached picture.\n", st->index);
-        return AVERROR(EINVAL);
-    }
-    mimelen = strlen(mimetype);
-
-    /* get the picture type */
-    e = av_dict_get(st->metadata, "comment", NULL, 0);
-    for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) {
-        if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) {
-            type = i;
-            break;
-        }
-    }
-
-    if ((c->attached_types & (1 << type)) & 0x6) {
-        av_log(s, AV_LOG_ERROR, "Duplicate attachment for type '%s'\n", ff_id3v2_picture_types[type]);
-        return AVERROR(EINVAL);
-    }
-
-    if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG ||
-                      st->codecpar->width != 32 ||
-                      st->codecpar->height != 32)) {
-        av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG");
-        return AVERROR(EINVAL);
-    }
-
-    c->attached_types |= (1 << type);
-
-    /* get the description */
-    if ((e = av_dict_get(st->metadata, "title", NULL, 0)))
-        desc = e->value;
-    desclen = strlen(desc);
-
-    blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size;
-    if (blocklen >= 1<<24) {
-        av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24);
-        return AVERROR(EINVAL);
-    }
-
-    avio_w8(pb, 0x06);
-    avio_wb24(pb, blocklen);
-
-    avio_wb32(pb, type);
-
-    avio_wb32(pb, mimelen);
-    avio_write(pb, mimetype, mimelen);
-
-    avio_wb32(pb, desclen);
-    avio_write(pb, desc, desclen);
-
-    avio_wb32(pb, st->codecpar->width);
-    avio_wb32(pb, st->codecpar->height);
-    if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
-        avio_wb32(pb, av_get_bits_per_pixel(pixdesc));
-    else
-        avio_wb32(pb, 0);
-    avio_wb32(pb, 0);
-
-    avio_wb32(pb, pkt->size);
-    avio_write(pb, pkt->data, pkt->size);
-    return 0;
-}
-
 static int flac_finish_header(struct AVFormatContext *s)
 {
+    FlacMuxerContext *c = s->priv_data;
     int i, ret, padding = s->metadata_header_padding;
     if (padding < 0)
         padding = 8192;
@@ -178,7 +94,7 @@ static int flac_finish_header(struct AVFormatContext *s)
         AVPacket *pkt = st->priv_data;
         if (!pkt)
             continue;
-        ret = flac_write_picture(s, pkt);
+        ret = ff_flac_write_picture(s, 0, &c->attached_types, -1, pkt);
         av_packet_unref(pkt);
         if (ret < 0 && (s->error_recognition & AV_EF_EXPLODE))
             return ret;
diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
index 5003314adb..c604e493f0 100644
--- a/libavformat/oggenc.c
+++ b/libavformat/oggenc.c
@@ -23,18 +23,27 @@
 
 #include <stdint.h>
 
+#include "libavcodec/codec_id.h"
+#include "libavutil/avutil.h"
 #include "libavutil/crc.h"
+#include "libavutil/log.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 #include "libavutil/random_seed.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+#include "libavutil/base64.h"
+#include "libavutil/bswap.h"
 #include "libavcodec/xiph.h"
 #include "libavcodec/bytestream.h"
 #include "libavcodec/flac.h"
 #include "avformat.h"
+#include "id3v2.h"
 #include "avio_internal.h"
 #include "internal.h"
 #include "version.h"
 #include "vorbiscomment.h"
+#include "flac_picture.h"
 
 #define MAX_PAGE_SIZE 65025
 
@@ -77,6 +86,11 @@ typedef struct OGGContext {
     int pref_size; ///< preferred page size (0 => fill all segments)
     int64_t pref_duration;      ///< preferred page duration (0 => fill all segments)
     int serial_offset;
+
+    PacketList queue;
+    int audio_stream_idx;
+    int waiting_pics;
+    unsigned attached_types;
 } OGGContext;
 
 #define OFFSET(x) offsetof(OGGContext, x)
@@ -468,12 +482,14 @@ static void ogg_write_pages(AVFormatContext *s, int flush)
     ogg->page_list = p;
 }
 
-static int ogg_init(AVFormatContext *s)
+static int ogg_finish_init(AVFormatContext *s)
 {
     OGGContext *ogg = s->priv_data;
     OGGStreamContext *oggstream = NULL;
     int i, j;
 
+    ogg->waiting_pics = 0;
+
     if (ogg->pref_size)
         av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
 
@@ -481,29 +497,6 @@ static int ogg_init(AVFormatContext *s)
         AVStream *st = s->streams[i];
         unsigned serial_num = i + ogg->serial_offset;
 
-        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
-                /* Opus requires a fixed 48kHz clock */
-                avpriv_set_pts_info(st, 64, 1, 48000);
-            else
-                avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
-        }
-
-        if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
-            st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
-            st->codecpar->codec_id != AV_CODEC_ID_SPEEX  &&
-            st->codecpar->codec_id != AV_CODEC_ID_FLAC   &&
-            st->codecpar->codec_id != AV_CODEC_ID_OPUS   &&
-            st->codecpar->codec_id != AV_CODEC_ID_VP8) {
-            av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
-            return AVERROR(EINVAL);
-        }
-
-        if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
-            st->codecpar->codec_id != AV_CODEC_ID_VP8) {
-            av_log(s, AV_LOG_ERROR, "No extradata present\n");
-            return AVERROR_INVALIDDATA;
-        }
         oggstream = av_mallocz(sizeof(*oggstream));
         if (!oggstream)
             return AVERROR(ENOMEM);
@@ -561,10 +554,11 @@ static int ogg_init(AVFormatContext *s)
             int header_type = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 3 : 0x81;
             int framing_bit = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 1 : 0;
 
-            if (avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size,
-                                      st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
-                                      (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
-                av_log(s, AV_LOG_ERROR, "Extradata corrupted\n");
+            if (!(st->disposition & AV_DISPOSITION_ATTACHED_PIC) &&
+                avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size,
+                                          st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
+                                          (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
+                av_log(s, AV_LOG_ERROR, "Extradata corrupted for stream #%d\n", i);
                 oggstream->header[1] = NULL;
                 return AVERROR_INVALIDDATA;
             }
@@ -601,7 +595,59 @@ static int ogg_init(AVFormatContext *s)
     return 0;
 }
 
-static int ogg_write_header(AVFormatContext *s)
+static int ogg_init(AVFormatContext *s)
+{
+    OGGContext *ogg = s->priv_data;
+    int i;
+
+    ogg->waiting_pics = 0;
+    ogg->attached_types = 0;
+
+    if (ogg->pref_size)
+        av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
+
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st = s->streams[i];
+
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+            ogg->audio_stream_idx = i;
+            if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
+                /* Opus requires a fixed 48kHz clock */
+                avpriv_set_pts_info(st, 64, 1, 48000);
+            else
+                avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+        }
+
+        if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
+            st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
+            st->codecpar->codec_id != AV_CODEC_ID_SPEEX  &&
+            st->codecpar->codec_id != AV_CODEC_ID_FLAC   &&
+            st->codecpar->codec_id != AV_CODEC_ID_OPUS   &&
+            st->codecpar->codec_id != AV_CODEC_ID_VP8    &&
+            st->codecpar->codec_id != AV_CODEC_ID_PNG    &&
+            st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+            av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
+            return AVERROR(EINVAL);
+        }
+
+        if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
+            st->codecpar->codec_id != AV_CODEC_ID_VP8 &&
+            st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+            st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+            av_log(s, AV_LOG_ERROR, "No extradata present\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
+            (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+            ogg->waiting_pics++;
+    }
+
+    if (!ogg->waiting_pics)
+        return ogg_finish_init(s);
+    return 0;
+}
+
+static int ogg_finish_header(AVFormatContext *s)
 {
     OGGStreamContext *oggstream = NULL;
     int i, j;
@@ -631,6 +677,14 @@ static int ogg_write_header(AVFormatContext *s)
     return 0;
 }
 
+static int ogg_write_header(AVFormatContext *s)
+{
+    OGGContext *ogg = s->priv_data;
+    if (!ogg->waiting_pics)
+        return ogg_finish_header(s);
+    return 0;
+}
+
 static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
 {
     AVStream *st = s->streams[pkt->stream_index];
@@ -683,20 +737,92 @@ static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
     return 0;
 }
 
+static int ogg_queue_flush(AVFormatContext *s)
+{
+    OGGContext *c = s->priv_data;
+    AVPacket *const pkt = ffformatcontext(s)->pkt;
+    int ret, write = 1;
+    ret = ogg_finish_init(s);
+    if (ret < 0)
+        write = 0;
+    ret = ogg_finish_header(s);
+    if (ret < 0)
+        write = 0;
+
+    while (c->queue.head) {
+        avpriv_packet_list_get(&c->queue, pkt);
+        if (write && (ret = ogg_write_packet_internal(s, pkt)) < 0)
+            write = 0;
+        av_packet_unref(pkt);
+    }
+    return ret;
+}
+
 static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
-    int i;
+    OGGContext *c = s->priv_data;
+    int i, ret;
+
+    if (pkt && pkt->size) {
+        if (pkt->stream_index == c->audio_stream_idx) {
+            if (c->waiting_pics) {
+                /* buffer audio packets until we get all the pictures */
+                ret = avpriv_packet_list_put(&c->queue, pkt, NULL, 0);
+                if (ret < 0) {
+                    av_log(s, AV_LOG_ERROR, "Out of memory in packet queue; skipping attached pictures\n");
+                    c->waiting_pics = 0;
+                    ret = ogg_queue_flush(s);
+                    if (ret < 0)
+                        return ret;
+                    return ogg_write_packet_internal(s, pkt);
+                }
+            } else
+                return ogg_write_packet_internal(s, pkt);
+        } else {
+            AVStream *st = s->streams[pkt->stream_index];
 
-    if (pkt && pkt->size)
-        return ogg_write_packet_internal(s, pkt);
+            if (!c->waiting_pics ||
+                !(st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+                return 0;
 
-    for (i = 0; i < s->nb_streams; i++) {
-        OGGStreamContext *oggstream = s->streams[i]->priv_data;
-        if (oggstream->page.segments_count)
-            ogg_buffer_page(s, oggstream);
-    }
+            /* warn only once for each stream */
+            if (st->nb_frames == 1) {
+                av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+                       " ignoring.\n", pkt->stream_index);
+            }
+            if (st->nb_frames >= 1) {
+                av_log(s, AV_LOG_WARNING, "Attached picture must not have more than one frame.\n");
+                return 0;
+            }
 
-    ogg_write_pages(s, 2);
+            //st->priv_data = av_packet_clone(pkt);
+            //if (!st->priv_data)
+            //    av_log(s, AV_LOG_ERROR, "Out of memory queueing an attached picture; skipping\n");
+            ret = ff_flac_write_picture(s,
+                                        1,
+                                        &c->attached_types,
+                                        c->audio_stream_idx,
+                                        pkt);
+            if (ret < 0) {
+                av_log(s, AV_LOG_ERROR, "Failed to process attached picture.\n");
+                return ret;
+            }
+            c->waiting_pics--;
+
+            /* flush the buffered audio packets */
+            if (!c->waiting_pics &&
+                (ret = ogg_queue_flush(s)) < 0)
+                return ret;
+        }
+    } else {
+        for (i = 0; i < s->nb_streams; i++) {
+            OGGStreamContext *oggstream = s->streams[i]->priv_data;
+            if (oggstream->page.segments_count)
+                ogg_buffer_page(s, oggstream);
+        }
+
+        ogg_write_pages(s, 2);
+    }
     return 1;
 }
 
@@ -734,7 +860,9 @@ static void ogg_free(AVFormatContext *s)
             st->codecpar->codec_id == AV_CODEC_ID_VP8) {
             av_freep(&oggstream->header[0]);
         }
-        av_freep(&oggstream->header[1]);
+        if (st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+            st->codecpar->codec_id != AV_CODEC_ID_MJPEG)
+            av_freep(&oggstream->header[1]);
     }
 
     while (p) {
@@ -840,6 +968,7 @@ const AVOutputFormat ff_opus_muxer = {
     .extensions        = "opus",
     .priv_data_size    = sizeof(OGGContext),
     .audio_codec       = AV_CODEC_ID_OPUS,
+    .video_codec       = AV_CODEC_ID_PNG,
     .init              = ogg_init,
     .write_header      = ogg_write_header,
     .write_packet      = ogg_write_packet,
-- 
2.39.0



More information about the ffmpeg-devel mailing list