[FFmpeg-devel] [PATCH] avformat: add option to parse/store ID3 PRIV tags in metadata.
Richard Shaffer
rshaffer at tunein.com
Tue Jan 23 01:48:07 EET 2018
On Mon, Jan 22, 2018 at 3:18 PM, wm4 <nfxjfg at googlemail.com> wrote:
> On Wed, 17 Jan 2018 16:34:39 -0800
> rshaffer at tunein.com wrote:
>
> > From: Richard Shaffer <rshaffer at tunein.com>
> >
> > Enables getting access to ID3 PRIV tags from the command-line or
> metadata API
> > when demuxing. The PRIV owner is stored as the metadata key prepended
> with
> > "id3v2_priv.", and the data is stored as the metadata value. As PRIV
> tags may
> > contain arbitrary data, non-printable characters, including NULL bytes,
> are
> > escaped as \xXX.
> >
> > Similarly, any metadata tags that begin with "id3v2_priv." are inserted
> as ID3
> > PRIV tags into the output (assuming the format supports ID3). \xXX
> sequences in
> > the value are un-escaped to their byte value.
> > ---
> >
> > Sorry. One more update. I realized there was a possibility of reading
> past the
> > end of input in id3v2_put_priv, so I added a fix.
> >
> > -Richard
> >
> > libavformat/id3v2.c | 48 +++++++++++++++++++++++++++++++++++++
> > libavformat/id3v2.h | 15 ++++++++++++
> > libavformat/id3v2enc.c | 64 ++++++++++++++++++++++++++++++
> ++++++++++++++++++++
> > libavformat/utils.c | 2 ++
> > 4 files changed, 129 insertions(+)
> >
> > diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
> > index 6c216ba7a2..e46174d7c7 100644
> > --- a/libavformat/id3v2.c
> > +++ b/libavformat/id3v2.c
> > @@ -33,6 +33,7 @@
> > #endif
> >
> > #include "libavutil/avstring.h"
> > +#include "libavutil/bprint.h"
> > #include "libavutil/dict.h"
> > #include "libavutil/intreadwrite.h"
> > #include "avio_internal.h"
> > @@ -1224,3 +1225,50 @@ end:
> > av_freep(&chapters);
> > return ret;
> > }
> > +
> > +int ff_id3v2_parse_priv_dict(AVDictionary **metadata, ID3v2ExtraMeta
> **extra_meta)
> > +{
> > + ID3v2ExtraMeta *cur;
> > + int dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_KEY |
> AV_DICT_DONT_STRDUP_VAL;
> > +
> > + for (cur = *extra_meta; cur; cur = cur->next) {
> > + if (!strcmp(cur->tag, "PRIV")) {
> > + ID3v2ExtraMetaPRIV *priv = cur->data;
> > + AVBPrint bprint;
> > + char * escaped, * key;
> > + int i, ret;
> > +
> > + if ((key = av_asprintf(ID3v2_PRIV_METADATA_PREFIX "%s",
> priv->owner)) == NULL) {
> > + return AVERROR(ENOMEM);
> > + }
> > +
> > + av_bprint_init(&bprint, priv->datasize + 1,
> AV_BPRINT_SIZE_UNLIMITED);
> > +
> > + for (i = 0; i < priv->datasize; i++) {
> > + if (priv->data[i] < 32 || priv->data[i] > 126 ||
> priv->data[i] == '\\') {
> > + av_bprintf(&bprint, "\\x%02x", priv->data[i]);
> > + } else {
> > + av_bprint_chars(&bprint, priv->data[i], 1);
> > + }
> > + }
> > +
> > + if ((ret = av_bprint_finalize(&bprint, &escaped)) < 0) {
> > + av_free(key);
> > + return ret;
> > + }
> > +
> > + if ((ret = av_dict_set(metadata, key, escaped, dict_flags))
> < 0) {
> > + av_free(key);
> > + av_free(escaped);
> > + return ret;
> > + }
> > + }
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta
> **extra_meta)
> > +{
> > + return ff_id3v2_parse_priv_dict(&s->metadata, extra_meta);
> > +}
> > \ No newline at end of file
> > diff --git a/libavformat/id3v2.h b/libavformat/id3v2.h
> > index 5e64ead096..9de0bee374 100644
> > --- a/libavformat/id3v2.h
> > +++ b/libavformat/id3v2.h
> > @@ -39,6 +39,8 @@
> > #define ID3v2_FLAG_ENCRYPTION 0x0004
> > #define ID3v2_FLAG_COMPRESSION 0x0008
> >
> > +#define ID3v2_PRIV_METADATA_PREFIX "id3v2_priv."
> > +
> > enum ID3v2Encoding {
> > ID3v2_ENCODING_ISO8859 = 0,
> > ID3v2_ENCODING_UTF16BOM = 1,
> > @@ -167,6 +169,19 @@ int ff_id3v2_parse_apic(AVFormatContext *s,
> ID3v2ExtraMeta **extra_meta);
> > */
> > int ff_id3v2_parse_chapters(AVFormatContext *s, ID3v2ExtraMeta
> **extra_meta);
> >
> > +/**
> > + * Parse PRIV tags into a dictionary. The PRIV owner is the metadata
> key. The
> > + * PRIV data is the value, with non-printable characters escaped.
> > + */
> > +int ff_id3v2_parse_priv_dict(AVDictionary **d, ID3v2ExtraMeta
> **extra_meta);
> > +
> > +/**
> > + * Add metadata for all PRIV tags in the ID3v2 header. The PRIV owner
> is the
> > + * metadata key. The PRIV data is the value, with non-printable
> characters
> > + * escaped.
> > + */
> > +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta
> **extra_meta);
> > +
> > extern const AVMetadataConv ff_id3v2_34_metadata_conv[];
> > extern const AVMetadataConv ff_id3v2_4_metadata_conv[];
> >
> > diff --git a/libavformat/id3v2enc.c b/libavformat/id3v2enc.c
> > index 14de76ac06..d5e358c8b6 100644
> > --- a/libavformat/id3v2enc.c
> > +++ b/libavformat/id3v2enc.c
> > @@ -96,6 +96,63 @@ static int id3v2_put_ttag(ID3v2EncContext *id3,
> AVIOContext *avioc, const char *
> > return len + ID3v2_HEADER_SIZE;
> > }
> >
> > +/**
> > + * Write a priv frame with owner and data. 'key' is the owner prepended
> with
> > + * ID3v2_PRIV_METADATA_PREFIX. 'data' is provided as a string. Any \xXX
> > + * (where 'X' is a valid hex digit) will be unescaped to the byte value.
> > + */
> > +static int id3v2_put_priv(ID3v2EncContext *id3, AVIOContext *avioc,
> const char *key, const char *data)
> > +{
> > + int len;
> > + uint8_t *pb;
> > + AVIOContext *dyn_buf;
> > +
> > + if (!av_strstart(key, ID3v2_PRIV_METADATA_PREFIX, &key)) {
> > + return 0;
> > + }
> > +
> > + if (avio_open_dyn_buf(&dyn_buf) < 0)
> > + return AVERROR(ENOMEM);
> > +
> > + // owner + null byte.
> > + avio_write(dyn_buf, key, strlen(key) + 1);
> > +
> > + while (*data) {
> > + if (av_strstart(data, "\\x", &data)) {
> > + if (data[0] && data[1] && av_isxdigit(data[0]) &&
> av_isxdigit(data[1])) {
> > + char digits[] = {data[0], data[1], 0};
> > + avio_w8(dyn_buf, strtol(digits, NULL, 16));
> > + } else {
> > + // '\x' wasn't followed by two hex digits. Just write
> out
> > + // whatever the 2-4 characters were.
> > + avio_write(dyn_buf, "\\x", 2);
> > + if (data[0]) {
> > + avio_w8(dyn_buf, data[0]);
> > + if (data[1])
> > + avio_w8(dyn_buf, data[1]);
> > + }
>
> Wouldn't this be slightly nicer if you only wrote the "\\x" here, and
> moved the "data += 2;" to the case when the digits were valid?
>
> Though personally I'd probably prefer if it just errored out on invalid
> input, but dunno.
>
Not a bad idea to just error out, I guess. Probably better than silently
writing data that could be a bug. Would this be better?
if (av_strstart(data, "\\x", &data)) {
if (data[0] && data[1] && av_isxdigit(data[0]) &&
av_isxdigit(data[1])) {
char digits[] = {data[0], data[1], 0};
avio_w8(dyn_buf, strtol(digits, NULL, 16));
data += 2;
} else {
ffio_free_dyn_buf(&dyn_buf);
av_log(avioc, AV_LOG_ERROR, "Invalid escape \\x%.2s in
metadata tag '%s'.\n", data, key);
return AVERROR(EINVAL);;
}
} else {
avio_write(dyn_buf, data++, 1);
}
>
> > + }
> > + data += 2;
> > + } else {
> > + avio_write(dyn_buf, data++, 1);
> > + }
> > + }
> > +
> > + len = avio_close_dyn_buf(dyn_buf, &pb);
> > +
> > + avio_wb32(avioc, MKBETAG('P', 'R', 'I', 'V'));
> > + if (id3->version == 3)
> > + avio_wb32(avioc, len);
> > + else
> > + id3v2_put_size(avioc, len);
> > + avio_wb16(avioc, 0);
> > + avio_write(avioc, pb, len);
> > +
> > + av_free(pb);
> > +
> > + return len + ID3v2_HEADER_SIZE;
> > +}
> > +
> > static int id3v2_check_write_tag(ID3v2EncContext *id3, AVIOContext
> *pb, AVDictionaryEntry *t,
> > const char table[][4], enum
> ID3v2Encoding enc)
> > {
> > @@ -186,6 +243,13 @@ static int write_metadata(AVIOContext *pb,
> AVDictionary **metadata,
> > continue;
> > }
> >
> > + if ((ret = id3v2_put_priv(id3, pb, t->key, t->value)) > 0) {
> > + id3->len += ret;
> > + continue;
> > + } else if (ret < 0) {
> > + return ret;
> > + }
> > +
> > /* unknown tag, write as TXXX frame */
> > if ((ret = id3v2_put_ttag(id3, pb, t->key, t->value,
> MKBETAG('T', 'X', 'X', 'X'), enc)) < 0)
> > return ret;
> > diff --git a/libavformat/utils.c b/libavformat/utils.c
> > index 3d733417e1..c15b8cc818 100644
> > --- a/libavformat/utils.c
> > +++ b/libavformat/utils.c
> > @@ -637,6 +637,8 @@ int avformat_open_input(AVFormatContext **ps, const
> char *filename,
> > goto fail;
> > if ((ret = ff_id3v2_parse_chapters(s, &id3v2_extra_meta)) <
> 0)
> > goto fail;
> > + if ((ret = ff_id3v2_parse_priv(s, &id3v2_extra_meta)) < 0)
> > + goto fail;
> > } else
> > av_log(s, AV_LOG_DEBUG, "demuxer does not support
> additional id3 data, skipping\n");
> > }
>
> I still don't like using hex escaping, but given how there's no better
> mechanism for metadata that can contain arbitrary bytes, it's probably
> fine.
>
I feel the same way, but I don't see a better way without breaking the API
or inventing a new one.
>
> Send a reminder in a day, and I'll push the patch if there are no new
> comments.
>
> Bonus points if you send another patch adding a FATE test for this
> later.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
More information about the ffmpeg-devel
mailing list