[MPlayer-dev-eng] [PATCH] Recode legacy metadata (was: Moving towards UTF-8)

Zuxy Meng zuxy.meng at gmail.com
Mon Jun 25 07:23:25 CEST 2007


Hello,

2006/10/23, Rich Felker <dalias at aerifal.cx>:
> On Mon, Oct 23, 2006 at 03:43:15PM +0800, Zuxy Meng wrote:
> > >> Then for GBK encoded Chinese, more than 80% the case, the string won't
> > >> be a legal UTF-8 symbol and hence the user will see the correct,
> > >> unconverted string.
> > >
> > >Unacceptable. If the string is GBK but the user has a UTF-8 system, it
> > >will print nonsense to the terminal (possibly even corrupt terminal
> > >control sequences). Maybe now this is rare, but eventually everyone
> > >will be using UTF-8. Conversion must never be bypassed.
> >
> > Well, currently, if the string is in GBK but MSG_CHARSET != GBK, then
> > the user has no chance to get anything sane on the terminal,
> > regardless of his/her locale, because mp_msg() converts the string at
> > its best effort: it'll jump to next byte if the previous one has
> > failed, while GBK is a two-byte encoding....
>
> Right. I know it doesn't work currently, but replacing a
> broken-by-lack-of-sophistication system with a broken-by-design system
> is not acceptable. If you're going to try to fix it, fix it right.
> Don't add complexity in the form of broken hacks. If complexity is
> needed, then spend the complexity on a correct solution rather than
> something that will just need to be replaced again..

This is the simplest implementation of Rich's idea. I hope I didn't
misunderstand what he meant and the structure is OK. Finer control may
be added later.

-- 
Zuxy
Beauty is truth,
While truth is beauty.
PGP KeyID: E8555ED6
-------------- next part --------------
Index: libmpdemux/demuxer.h
===================================================================
--- libmpdemux/demuxer.h	?????? 23661??
+++ libmpdemux/demuxer.h	????????????
@@ -385,6 +385,7 @@
 
 int demuxer_add_chapter(demuxer_t* demuxer, const char* name, uint64_t start, uint64_t end);
 int demuxer_seek_chapter(demuxer_t *demuxer, int chapter, int mode, float *seek_pts, int *num_chapters, char **chapter_name);
+char* demux_legacy_recode(char* legacy, char* internal, size_t n);
 
 
 #endif /* __DEMUXER_H */
Index: libmpdemux/demuxer.c
===================================================================
--- libmpdemux/demuxer.c	?????? 23661??
+++ libmpdemux/demuxer.c	????????????
@@ -21,6 +21,10 @@
 #include "mf.h"
 
 #include "libaf/af_format.h"
+#ifdef USE_ICONV
+extern char* mp_msg_charset;
+#include <iconv.h>
+#endif
 
 extern void resync_video_stream(sh_video_t *sh_video);
 extern void resync_audio_stream(sh_audio_t *sh_audio);
@@ -1215,3 +1219,50 @@
         return current;
     }
 }
+
+char* demux_legacy_recode(char* legacy, char* internal, const size_t n)
+{
+#if !defined(USE_ICONV) || !defined(MSG_CHARSET)
+    return legacy;
+#else
+    const char* fallbacks[] = {
+	"UTF-8",
+	mp_msg_charset,
+	"GB2312",
+	"KOREAN",
+	"SHIFT-JIS",
+	"LATIN1"
+    };
+    size_t legacylen;
+    size_t inlen, outlen, ret;
+    char *inbuf, *outbuf;
+    int i;
+    iconv_t cd;
+
+    if (!internal)
+	return legacy;
+
+    if (!strcasecmp(mp_msg_charset, "noconv"))
+	return legacy;
+
+    legacylen = strlen(legacy);
+    for (i = 0; i < sizeof(fallbacks) / sizeof(const char*); i++) {
+	if ((cd = iconv_open(MSG_CHARSET, fallbacks[i])) == (iconv_t)(-1))
+	    continue;
+	inlen = legacylen;
+	outlen = n;
+	inbuf = legacy;
+	outbuf = internal;
+	ret = iconv(cd, (const char**)&inbuf, &inlen, &outbuf, &outlen);
+	iconv_close(cd);
+	if (ret != (size_t)(-1)) {
+	    *outbuf = '\0';
+	    mp_msg(MSGT_DEMUX, MSGL_DBG2, "Legacy string detected as: %s\n",
+		    fallbacks[i]);
+	    return internal;
+	}
+    }
+    return legacy;
+#endif
+}
+
Index: libmpdemux/demux_real.c
===================================================================
--- libmpdemux/demux_real.c	?????? 23661??
+++ libmpdemux/demux_real.c	????????????
@@ -1175,16 +1175,19 @@
 	    case MKTAG('C', 'O', 'N', 'T'):
 	    {
 		/* Content description header */
-		char *buf;
+		char *buf, *tmp;
 		int len;
 
 		len = stream_read_word(demuxer->stream);
 		if (len > 0)
 		{
 		    buf = malloc(len+1);
+		    tmp = malloc(2*len+1);
 		    stream_read(demuxer->stream, buf, len);
 		    buf[len] = 0;
-		    demux_info_add(demuxer, "name", buf);
+		    demux_info_add(demuxer, "name",
+			   demux_legacy_recode(buf, tmp, 2*len));
+		    free(tmp);
 		    free(buf);
 		}
 
@@ -1192,9 +1195,12 @@
 		if (len > 0)
 		{
 		    buf = malloc(len+1);
+		    tmp = malloc(2*len+1);
 		    stream_read(demuxer->stream, buf, len);
 		    buf[len] = 0;
-		    demux_info_add(demuxer, "author", buf);
+		    demux_info_add(demuxer, "author",
+			   demux_legacy_recode(buf, tmp, 2*len));
+		    free(tmp);
 		    free(buf);
 		}
 
@@ -1202,9 +1208,12 @@
 		if (len > 0)
 		{
 		    buf = malloc(len+1);
+		    tmp = malloc(2*len+1);
 		    stream_read(demuxer->stream, buf, len);
 		    buf[len] = 0;
-		    demux_info_add(demuxer, "copyright", buf);
+		    demux_info_add(demuxer, "copyright",
+			   demux_legacy_recode(buf, tmp, 2*len));
+		    free(tmp);
 		    free(buf);
 		}
 
@@ -1212,9 +1221,12 @@
 		if (len > 0)
 		{
 		    buf = malloc(len+1);
+		    tmp = malloc(2*len+1);
 	    	    stream_read(demuxer->stream, buf, len);
 		    buf[len] = 0;
-		    demux_info_add(demuxer, "comment", buf);
+		    demux_info_add(demuxer, "comment",
+			   demux_legacy_recode(buf, tmp, 2*len));
+		    free(tmp);
 		    free(buf);
 		}
 		break;
@@ -1281,7 +1293,7 @@
 		    int coded_frame_size;
 		    int codecdata_length;
 		    int i;
-		    char *buft;
+		    char *buft, *tmp;
 		    int hdr_size;
 		    mp_msg(MSGT_DEMUX, MSGL_INFO, MSGTR_AudioID, "real", stream_id);
 		    mp_msg(MSGT_DEMUX,MSGL_V,"Found audio stream!\n");
@@ -1294,23 +1306,32 @@
                     // Name, author, (c) are also in CONT tag
                     if ((i = stream_read_char(demuxer->stream)) != 0) {
                       buft = malloc(i+1);
+                      tmp = malloc(2*i+1);
                       stream_read(demuxer->stream, buft, i);
                       buft[i] = 0;
-                      demux_info_add(demuxer, "Name", buft);
+                      demux_info_add(demuxer, "Name",
+                             demux_legacy_recode(buft, tmp, 2*i));
+                      free(tmp);
                       free(buft);
                     }
                     if ((i = stream_read_char(demuxer->stream)) != 0) {
                       buft = malloc(i+1);
+                      tmp = malloc(2*i+1);
                       stream_read(demuxer->stream, buft, i);
                       buft[i] = 0;
-                      demux_info_add(demuxer, "Author", buft);
+                      demux_info_add(demuxer, "Author",
+                             demux_legacy_recode(buft, tmp, 2*i));
+                      free(tmp);
                       free(buft);
                     }
                     if ((i = stream_read_char(demuxer->stream)) != 0) {
                       buft = malloc(i+1);
+                      tmp = malloc(2*i+1);
                       stream_read(demuxer->stream, buft, i);
                       buft[i] = 0;
-                      demux_info_add(demuxer, "Copyright", buft);
+                      demux_info_add(demuxer, "Copyright",
+                             demux_legacy_recode(buft, tmp, 2*i));
+                      free(tmp);
                       free(buft);
                     }
                     if ((i = stream_read_char(demuxer->stream)) != 0)
Index: libmpdemux/demux_audio.c
===================================================================
--- libmpdemux/demux_audio.c	?????? 23661??
+++ libmpdemux/demux_audio.c	????????????
@@ -374,23 +392,24 @@
 	demuxer->movi_end = s->end_pos;
       else {
 	char buf[31];
+	char tmp[62];
 	uint8_t g;
 	demuxer->movi_end = stream_tell(s)-3;
 	stream_read(s,buf,30);
 	buf[30] = '\0';
-	demux_info_add(demuxer,"Title",buf);
+	demux_info_add(demuxer,"Title",demux_legacy_recode(buf,tmp,61));
 	stream_read(s,buf,30);
 	buf[30] = '\0';
-	demux_info_add(demuxer,"Artist",buf);
+	demux_info_add(demuxer,"Artist",demux_legacy_recode(buf,tmp,61));
 	stream_read(s,buf,30);
 	buf[30] = '\0';
-	demux_info_add(demuxer,"Album",buf);
+	demux_info_add(demuxer,"Album",demux_legacy_recode(buf,tmp,61));
 	stream_read(s,buf,4);
 	buf[4] = '\0';
-	demux_info_add(demuxer,"Year",buf);
+	demux_info_add(demuxer,"Year",demux_legacy_recode(buf,tmp,61));
 	stream_read(s,buf,30);
 	buf[30] = '\0';
-	demux_info_add(demuxer,"Comment",buf);
+	demux_info_add(demuxer,"Comment",demux_legacy_recode(buf,tmp,61));
 	if(buf[28] == 0 && buf[29] != 0) {
 	  uint8_t trk = (uint8_t)buf[29];
 	  sprintf(buf,"%d",trk);


More information about the MPlayer-dev-eng mailing list