[FFmpeg-devel] [PATCH] asf - read/write metadata as UTF-16

Michael Niedermayer michaelni
Sun Feb 21 23:03:01 CET 2010


On Sun, Feb 21, 2010 at 09:45:53PM +0100, Anton Khirnov wrote:
> Hi,
> 
> asfenc/dec currently reads/writes metadata in a format that can be
> described as UTF-8 interleaved with 0s. for latin1 strings it happens to
> be identical with UTF-16, for anything else it's identical with random
> garbage.
> 
> attached series of patches attempts to fix it.
> 
> Anton Khirnov

>  common.h |   32 ++++++++++++++++++++++++++++++++
>  1 file changed, 32 insertions(+)
> fe7b6d0330b85006eeeb767aaa1bb4c764f57451  0001-Add-PUT_UTF16-macro.patch
> From 28107be2554a34c1cd110b06725bb0ca59d14005 Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 21 Feb 2010 15:20:23 +0100
> Subject: [PATCH 1/5] Add PUT_UTF16 macro.
> 
> ---
>  libavutil/common.h |   32 ++++++++++++++++++++++++++++++++
>  1 files changed, 32 insertions(+), 0 deletions(-)
> 
> diff --git a/libavutil/common.h b/libavutil/common.h
> index c91e658..ba048dd 100644
> --- a/libavutil/common.h
> +++ b/libavutil/common.h
> @@ -335,6 +335,38 @@ static inline av_const int av_ceil_log2(int x)
>          }\
>      }
>  
> +/*!
> + * \def PUT_UTF16(val, tmp, PUT_16BIT, ERROR)
> + * Converts a 32-bit Unicode character to its UTF-16 encoded form (2 or 4 bytes).
> + * \param val is an input-only argument and should be of type uint32_t. It holds

> + * a UCS-4 encoded Unicode character that is to be converted to UTF-16. If
> + * val is given as a function it is executed only once.

doesnt seems so


> + * \param tmp is a temporary variable and should be of type uint16_t. It
> + * represents an intermediate value during conversion that is to be
> + * output by PUT_16BIT.
> + * \param PUT_16BIT writes the converted UTF-16 data to any proper destination
> + * in desired endianness. It could be a function or a statement, and uses tmp
> + * as the input byte.  For example, PUT_BYTE could be "*output++ = tmp;"
> + * PUT_BYTE will be executed 1 or 2 times depending on input character.
> + * \param ERROR action that should be taken when val can't be represented in
> + * UTF-16 encoding. It should be a statement that jumps out of the macro,
> + * like exit(), goto, return, break, or continue.
> + */
> +#define PUT_UTF16(val, tmp, PUT_16BIT, ERROR)\
> +    if (val < 0x10000) {\
> +        tmp = val;\
> +        PUT_16BIT\
> +    } else if ( val <= 0x10FFFFU ) {\
> +        tmp = 0xD800 | ((val - 0x10000) >> 10);\
> +        PUT_16BIT\
> +        tmp = 0xDC00 | ((val - 0x10000) & 0x3FF);\
> +        PUT_16BIT\
> +    } else {\
> +        ERROR\
> +    }\

PUT_UTF8 doesnt handle errors, and you do it quite incompletly

> +
> +
> +
>  #include "mem.h"
>  
>  #ifdef HAVE_AV_CONFIG_H
> -- 
> 1.6.6.1
> 

>  libavformat/asfenc.c   |    2 +-
>  tests/ref/acodec/wmav1 |    2 +-
>  tests/ref/acodec/wmav2 |    2 +-
>  tests/ref/lavf/asf     |    2 +-
>  4 files changed, 4 insertions(+), 4 deletions(-)
> 4495eac8d7428cbddbb190fd4ac6218ab19cf752  0002-asfenc-fix-yet-another-wrong-string-length.patch
> From 5cfe028a96bce36a2292cd175d6f056174cf29ee Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 21 Feb 2010 17:45:14 +0100
> Subject: [PATCH 2/5] asfenc: fix yet another wrong string length.
> 
> ---
>  libavformat/asfenc.c   |    2 +-
>  tests/ref/acodec/wmav1 |    2 +-
>  tests/ref/acodec/wmav2 |    2 +-
>  tests/ref/lavf/asf     |    2 +-
>  4 files changed, 4 insertions(+), 4 deletions(-)

ok if you checked all uses of this function against the spec and
wmp
also make sure you bump the version string we store in asf files


[...]

>  asf.c    |    6 +++---
>  asfenc.c |    9 ++-------
>  2 files changed, 5 insertions(+), 10 deletions(-)
> 82be7a855b218246f2ea0ce710ae2c4a8cc39033  0003-asf-don-t-add-WM-prefix-to-all-tags-it-s-only-requir.patch
> From 8d544fa0f591b72544fb44d6251982686b0c96e9 Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 21 Feb 2010 17:17:06 +0100
> Subject: [PATCH 3/5] asf: don't add WM/ prefix to all tags, it's only required for some.

ok

[...]

>  asfenc.c |   71 +++++++++++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 47 insertions(+), 24 deletions(-)
> 40fdbbb259951470ed7e054d1d0c931f35b63e96  0004-asfenc-write-tags-in-proper-UTF-16.patch
> From efd7583fe9bba5e965463d657c693b7972745a31 Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 21 Feb 2010 17:57:17 +0100
> Subject: [PATCH 4/5] asfenc: write tags in proper UTF-16.
> 
> ---
>  libavformat/asfenc.c |   71 +++++++++++++++++++++++++++++++++-----------------
>  1 files changed, 47 insertions(+), 24 deletions(-)
> 
> diff --git a/libavformat/asfenc.c b/libavformat/asfenc.c
> index 8d6292d..f610a42 100644
> --- a/libavformat/asfenc.c
> +++ b/libavformat/asfenc.c
> @@ -203,21 +203,37 @@ static void put_guid(ByteIOContext *s, const ff_asf_guid *g)
>      put_buffer(s, *g, sizeof(*g));
>  }
>  
> -static void put_str16_nolen(ByteIOContext *s, const char *tag);
> +static int put_str16_nolen(ByteIOContext *s, const char *tag);
>  static void put_str16(ByteIOContext *s, const char *tag)
>  {
> -    put_le16(s, 2*(strlen(tag) + 1));
> -    put_str16_nolen(s, tag);
> +    int len;
> +    uint8_t *pb;
> +    ByteIOContext *dyn_buf;
> +    if (url_open_dyn_buf(&dyn_buf) < 0)
> +        return;
> +
> +    put_str16_nolen(dyn_buf, tag);
> +    len = url_close_dyn_buf(dyn_buf, &pb);
> +    put_le16(s, len);
> +    put_buffer(s, pb, len);
> +    av_freep(&pb);
>  }
>  
> -static void put_str16_nolen(ByteIOContext *s, const char *tag)
> +static int put_str16_nolen(ByteIOContext *s, const char *tag)
>  {
> -    int c;
> +    const uint8_t *q = tag;
> +    int ret = 0;
>  
> -    do{
> -        c = (uint8_t)*tag++;
> -        put_le16(s, c);
> -    }while(c);
> +    while (*q) {
> +        uint32_t ch;
> +        uint16_t tmp;
> +
> +        GET_UTF8(ch, *q++, break;)
> +        PUT_UTF16(ch, tmp, put_le16(s, tmp);ret += 2;, break;)
> +    }
> +    put_le16(s, 0);
> +    ret += 2;
> +    return ret;
>  }
>  
>  static int64_t put_header(ByteIOContext *pb, const ff_asf_guid *g)

> @@ -272,7 +288,7 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size, int64_t data
>  {
>      ASFContext *asf = s->priv_data;
>      ByteIOContext *pb = s->pb;
> -    AVMetadataTag *title, *author, *copyright, *comment;
> +    AVMetadataTag *tags[5];

unrelated?


>      int header_size, n, extra_size, extra_size2, wav_extra_size, file_time;
>      int has_title;
>      int metadata_count;
> @@ -281,13 +297,14 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size, int64_t data
>      int bit_rate;
>      int64_t duration;
>  
> -    title     = av_metadata_get(s->metadata, "title"    , NULL, 0);
> -    author    = av_metadata_get(s->metadata, "author"   , NULL, 0);
> -    copyright = av_metadata_get(s->metadata, "copyright", NULL, 0);
> -    comment   = av_metadata_get(s->metadata, "comment"  , NULL, 0);
> +    tags[0] = av_metadata_get(s->metadata, "title"    , NULL, 0);
> +    tags[1] = av_metadata_get(s->metadata, "author"   , NULL, 0);
> +    tags[2] = av_metadata_get(s->metadata, "copyright", NULL, 0);
> +    tags[3] = av_metadata_get(s->metadata, "comment"  , NULL, 0);
> +    tags[4] = av_metadata_get(s->metadata, "rating"   , NULL, 0);

adding rating is a unrelated change

[...]
>  asfdec.c |   14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 726ff0a32ad06db7fe8719e2e071b0763b1cc5ed  0005-asfdec-read-metadata-as-proper-UTF-16.patch
> From c8beff1c527f43fc4763c4a46db403df8f8da278 Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 21 Feb 2010 18:45:38 +0100
> Subject: [PATCH 5/5] asfdec: read metadata as proper UTF-16.
> 
> ---
>  libavformat/asfdec.c |   14 +++++++++-----
>  1 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
> index 3ccadeb..de48167 100644
> --- a/libavformat/asfdec.c
> +++ b/libavformat/asfdec.c
> @@ -122,9 +122,12 @@ static void get_str16(ByteIOContext *pb, char *buf, int buf_size)
>  static void get_str16_nolen(ByteIOContext *pb, int len, char *buf, int buf_size)
>  {
>      char* q = buf;
> -    for (; len > 1; len -= 2) {
> +    while (len > 1) {
>          uint8_t tmp;
> -        PUT_UTF8(get_le16(pb), tmp, if (q - buf < buf_size - 1) *q++ = tmp;)
> +        uint32_t ch;
> +
> +        GET_UTF16(ch, (len -= 2) >= 0 ? get_le16(pb) : 0, break;)
> +        PUT_UTF8(ch, tmp, if (q - buf < buf_size - 1) *q++ = tmp;)
>      }
>      if (len > 0)
>          url_fskip(pb, len);
> @@ -157,12 +160,12 @@ static void get_tag(AVFormatContext *s, const char *key, int type, int len)
>      if ((unsigned)len >= UINT_MAX)
>          return;
>  
> -    value = av_malloc(len+1);
> +    value = av_malloc(2*len+1);
>      if (!value)
>          return;
>  
>      if (type == 0) {         // UTF16-LE
> -        get_str16_nolen(s->pb, len, value, len);
> +        get_str16_nolen(s->pb, len, value, 2*len + 1);
>      } else if (type > 1 && type <= 5) {  // boolean or DWORD or QWORD or WORD
>          uint64_t num = get_value(s->pb, type);
>          snprintf(value, len, "%"PRIu64, num);
> @@ -174,7 +177,8 @@ static void get_tag(AVFormatContext *s, const char *key, int type, int len)
>      }
>      if (!strncmp(key, "WM/", 3))
>          key += 3;
> -    av_metadata_set2(&s->metadata, key, value, AV_METADATA_DONT_STRDUP_VAL);
> +    av_metadata_set2(&s->metadata, key, value, 0);
> +    av_freep(&value);

why the strdup change?

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The greatest way to live with honor in this world is to be what we pretend
to be. -- Socrates
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20100221/320ffc7e/attachment.pgp>



More information about the ffmpeg-devel mailing list