[FFmpeg-devel] [PATCH] support for UTF-16 encoding in id3v2 tags
Michael Niedermayer
michaelni
Thu Sep 10 13:07:17 CEST 2009
On Sun, Sep 06, 2009 at 02:00:32PM +0200, Anton Khirnov wrote:
> Hi,
> attached is a patch for $subj. I've uploaded a sample file to incoming
> - utf16_tags.mp3.
>
> Anton Khirnov
> libavformat/id3v2.c | 34 ++++++++++++++++++++++++++++++++++
> libavutil/common.h | 26 ++++++++++++++++++++++++++
> 2 files changed, 60 insertions(+)
> 3cc4dd4ef107240c2e23a4bdbf0ea569ee0c8e6d 0001-id3v2-add-support-for-UTF-16-encoding.patch
> >From a7c0a1fb2cc56531963feb8e4730f0844a462ecf Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 6 Sep 2009 13:34:48 +0200
> Subject: [PATCH] id3v2: add support for UTF-16 encoding.
>
> ---
> libavformat/id3v2.c | 34 ++++++++++++++++++++++++++++++++++
> libavutil/common.h | 26 ++++++++++++++++++++++++++
> 2 files changed, 60 insertions(+), 0 deletions(-)
>
> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
> index 0cf2cb1..fe79c01 100644
> --- a/libavformat/id3v2.c
> +++ b/libavformat/id3v2.c
> @@ -81,6 +81,7 @@ static void read_ttag(AVFormatContext *s, int taglen, const char *key)
> char *q, dst[512];
> int len, dstlen = sizeof(dst) - 1;
> unsigned genre;
> + unsigned int (*get)(ByteIOContext*) = NULL;
>
> dst[0] = 0;
> if (taglen < 1)
> @@ -99,11 +100,44 @@ static void read_ttag(AVFormatContext *s, int taglen, const char *key)
> *q = '\0';
> break;
>
> + case 1: /* UTF-16 with BOM */
> + taglen -= 2;
> + switch (get_be16(s->pb)) {
> + case 0xfeff:
> + get = get_be16;
> + break;
> + case 0xfffe:
> + get = get_le16;
> + break;
> + default:
> + av_log(s, AV_LOG_ERROR, "Incorrect BOM value.\n");
> + return;
> + }
> + // fall-through
> +
> + case 2: /* UTF-16BE without BOM */
> + if (!get)
> + get = get_be16;
can be set before the switch() as default
> +
> + q = dst;
> + while (taglen > 1) {
> + uint32_t ch;
> + uint8_t tmp;
> +
> + GET_UTF16(ch, get(s->pb), break;, len)
> + PUT_UTF8(ch, tmp, if (q - dst < dstlen -1) *q++ = tmp;)
> + taglen -= len;
> + }
> + *q = '\0';
> + break;
> +
> case 3: /* UTF-8 */
> len = FFMIN(taglen, dstlen - 1);
> get_buffer(s->pb, dst, len);
> dst[len] = 0;
> break;
> + default:
> + av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s\n.", key);
> }
>
> if (!strcmp(key, "genre")
> diff --git a/libavutil/common.h b/libavutil/common.h
> index 0797a79..458e32f 100644
> --- a/libavutil/common.h
> +++ b/libavutil/common.h
> @@ -266,6 +266,32 @@ static inline av_const int av_ceil_log2(int x)
> }
>
> /*!
> + * \def GET_UTF16(val, GET_BYTE, ERROR)
GET_16BIT not GET_BYTE
> + * Converts a UTF-16 character (2 or 4 bytes) to its 32-bit UCS-4 encoded form
> + * \param val is the output and should be of type uint32_t. It holds the converted
> + * UCS-4 character and should be a left value.
> + * \param GET_BYTE gets UTF-16 encoded bytes from any proper source. It can be
> + * a function or a statement whose return value or evaluated value is of type
> + * uint16_t. It will be executed up to 2 times.
> + * \param ERROR action that should be taken when an invalid UTF-16 surrogate is
> + * returned from GET_BYTE. It should be a statement that jumps out of the macro,
> + * like exit(), goto, return, break, or continue.
> + * \param read should be an int and is set to the number of bytes read (2 or 4).
> + */
> +#define GET_UTF16(val, GET_BYTE, ERROR, read)\
> + {\
> + val = GET_BYTE;\
> + read = 2;\
> + if (val >= 0xD800 && val <= 0xDBFF) {\
> + val = ((val - 0xD800)<<10) + (get(s->pb) - 0xDC00) + 0x0010000UL;\
missing error checking
> + read += 2;\
> + }\
> + else if (val > 0xDBFF && val <= 0xDFFF) {\
> + ERROR\
> + }\
unsigned int hi= val - 0xD800;
if(hi < 0x800) {
val= get(s->pb) - 0xDC00;
if(val > 0x3FFU || hi > 0x3FFU){
ERROR
}
val+= (hi<<10) + 0x1000;
}
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Thouse who are best at talking, realize last or never when they are wrong.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090910/2fd025a4/attachment.pgp>
More information about the ffmpeg-devel
mailing list