[MPlayer-dev-eng] [PATCH] Recode legacy metadata (was: Moving towards UTF-8)
Zuxy Meng
zuxy.meng at gmail.com
Mon Jun 25 07:23:25 CEST 2007
Hello,
2006/10/23, Rich Felker <dalias at aerifal.cx>:
> On Mon, Oct 23, 2006 at 03:43:15PM +0800, Zuxy Meng wrote:
> > >> Then for GBK encoded Chinese, more than 80% the case, the string won't
> > >> be a legal UTF-8 symbol and hence the user will see the correct,
> > >> unconverted string.
> > >
> > >Unacceptable. If the string is GBK but the user has a UTF-8 system, it
> > >will print nonsense to the terminal (possibly even corrupt terminal
> > >control sequences). Maybe now this is rare, but eventually everyone
> > >will be using UTF-8. Conversion must never be bypassed.
> >
> > Well, currently, if the string is in GBK but MSG_CHARSET != GBK, then
> > the user has no chance to get anything sane on the terminal,
> > regardless of his/her locale, because mp_msg() converts the string at
> > its best effort: it'll jump to next byte if the previous one has
> > failed, while GBK is a two-byte encoding....
>
> Right. I know it doesn't work currently, but replacing a
> broken-by-lack-of-sophistication system with a broken-by-design system
> is not acceptable. If you're going to try to fix it, fix it right.
> Don't add complexity in the form of broken hacks. If complexity is
> needed, then spend the complexity on a correct solution rather than
> something that will just need to be replaced again..
This is the simplest implementation of Rich's idea. I hope I didn't
misunderstand what he meant and the structure is OK. Finer control may
be added later.
--
Zuxy
Beauty is truth,
While truth is beauty.
PGP KeyID: E8555ED6
-------------- next part --------------
Index: libmpdemux/demuxer.h
===================================================================
--- libmpdemux/demuxer.h ?????? 23661??
+++ libmpdemux/demuxer.h ????????????
@@ -385,6 +385,7 @@
int demuxer_add_chapter(demuxer_t* demuxer, const char* name, uint64_t start, uint64_t end);
int demuxer_seek_chapter(demuxer_t *demuxer, int chapter, int mode, float *seek_pts, int *num_chapters, char **chapter_name);
+char* demux_legacy_recode(char* legacy, char* internal, size_t n);
#endif /* __DEMUXER_H */
Index: libmpdemux/demuxer.c
===================================================================
--- libmpdemux/demuxer.c ?????? 23661??
+++ libmpdemux/demuxer.c ????????????
@@ -21,6 +21,10 @@
#include "mf.h"
#include "libaf/af_format.h"
+#ifdef USE_ICONV
+extern char* mp_msg_charset;
+#include <iconv.h>
+#endif
extern void resync_video_stream(sh_video_t *sh_video);
extern void resync_audio_stream(sh_audio_t *sh_audio);
@@ -1215,3 +1219,50 @@
return current;
}
}
+
+char* demux_legacy_recode(char* legacy, char* internal, const size_t n)
+{
+#if !defined(USE_ICONV) || !defined(MSG_CHARSET)
+ return legacy;
+#else
+ const char* fallbacks[] = {
+ "UTF-8",
+ mp_msg_charset,
+ "GB2312",
+ "KOREAN",
+ "SHIFT-JIS",
+ "LATIN1"
+ };
+ size_t legacylen;
+ size_t inlen, outlen, ret;
+ char *inbuf, *outbuf;
+ int i;
+ iconv_t cd;
+
+ if (!internal)
+ return legacy;
+
+ if (!strcasecmp(mp_msg_charset, "noconv"))
+ return legacy;
+
+ legacylen = strlen(legacy);
+ for (i = 0; i < sizeof(fallbacks) / sizeof(const char*); i++) {
+ if ((cd = iconv_open(MSG_CHARSET, fallbacks[i])) == (iconv_t)(-1))
+ continue;
+ inlen = legacylen;
+ outlen = n;
+ inbuf = legacy;
+ outbuf = internal;
+ ret = iconv(cd, (const char**)&inbuf, &inlen, &outbuf, &outlen);
+ iconv_close(cd);
+ if (ret != (size_t)(-1)) {
+ *outbuf = '\0';
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "Legacy string detected as: %s\n",
+ fallbacks[i]);
+ return internal;
+ }
+ }
+ return legacy;
+#endif
+}
+
Index: libmpdemux/demux_real.c
===================================================================
--- libmpdemux/demux_real.c ?????? 23661??
+++ libmpdemux/demux_real.c ????????????
@@ -1175,16 +1175,19 @@
case MKTAG('C', 'O', 'N', 'T'):
{
/* Content description header */
- char *buf;
+ char *buf, *tmp;
int len;
len = stream_read_word(demuxer->stream);
if (len > 0)
{
buf = malloc(len+1);
+ tmp = malloc(2*len+1);
stream_read(demuxer->stream, buf, len);
buf[len] = 0;
- demux_info_add(demuxer, "name", buf);
+ demux_info_add(demuxer, "name",
+ demux_legacy_recode(buf, tmp, 2*len));
+ free(tmp);
free(buf);
}
@@ -1192,9 +1195,12 @@
if (len > 0)
{
buf = malloc(len+1);
+ tmp = malloc(2*len+1);
stream_read(demuxer->stream, buf, len);
buf[len] = 0;
- demux_info_add(demuxer, "author", buf);
+ demux_info_add(demuxer, "author",
+ demux_legacy_recode(buf, tmp, 2*len));
+ free(tmp);
free(buf);
}
@@ -1202,9 +1208,12 @@
if (len > 0)
{
buf = malloc(len+1);
+ tmp = malloc(2*len+1);
stream_read(demuxer->stream, buf, len);
buf[len] = 0;
- demux_info_add(demuxer, "copyright", buf);
+ demux_info_add(demuxer, "copyright",
+ demux_legacy_recode(buf, tmp, 2*len));
+ free(tmp);
free(buf);
}
@@ -1212,9 +1221,12 @@
if (len > 0)
{
buf = malloc(len+1);
+ tmp = malloc(2*len+1);
stream_read(demuxer->stream, buf, len);
buf[len] = 0;
- demux_info_add(demuxer, "comment", buf);
+ demux_info_add(demuxer, "comment",
+ demux_legacy_recode(buf, tmp, 2*len));
+ free(tmp);
free(buf);
}
break;
@@ -1281,7 +1293,7 @@
int coded_frame_size;
int codecdata_length;
int i;
- char *buft;
+ char *buft, *tmp;
int hdr_size;
mp_msg(MSGT_DEMUX, MSGL_INFO, MSGTR_AudioID, "real", stream_id);
mp_msg(MSGT_DEMUX,MSGL_V,"Found audio stream!\n");
@@ -1294,23 +1306,32 @@
// Name, author, (c) are also in CONT tag
if ((i = stream_read_char(demuxer->stream)) != 0) {
buft = malloc(i+1);
+ tmp = malloc(2*i+1);
stream_read(demuxer->stream, buft, i);
buft[i] = 0;
- demux_info_add(demuxer, "Name", buft);
+ demux_info_add(demuxer, "Name",
+ demux_legacy_recode(buft, tmp, 2*i));
+ free(tmp);
free(buft);
}
if ((i = stream_read_char(demuxer->stream)) != 0) {
buft = malloc(i+1);
+ tmp = malloc(2*i+1);
stream_read(demuxer->stream, buft, i);
buft[i] = 0;
- demux_info_add(demuxer, "Author", buft);
+ demux_info_add(demuxer, "Author",
+ demux_legacy_recode(buft, tmp, 2*i));
+ free(tmp);
free(buft);
}
if ((i = stream_read_char(demuxer->stream)) != 0) {
buft = malloc(i+1);
+ tmp = malloc(2*i+1);
stream_read(demuxer->stream, buft, i);
buft[i] = 0;
- demux_info_add(demuxer, "Copyright", buft);
+ demux_info_add(demuxer, "Copyright",
+ demux_legacy_recode(buft, tmp, 2*i));
+ free(tmp);
free(buft);
}
if ((i = stream_read_char(demuxer->stream)) != 0)
Index: libmpdemux/demux_audio.c
===================================================================
--- libmpdemux/demux_audio.c ?????? 23661??
+++ libmpdemux/demux_audio.c ????????????
@@ -374,23 +392,24 @@
demuxer->movi_end = s->end_pos;
else {
char buf[31];
+ char tmp[62];
uint8_t g;
demuxer->movi_end = stream_tell(s)-3;
stream_read(s,buf,30);
buf[30] = '\0';
- demux_info_add(demuxer,"Title",buf);
+ demux_info_add(demuxer,"Title",demux_legacy_recode(buf,tmp,61));
stream_read(s,buf,30);
buf[30] = '\0';
- demux_info_add(demuxer,"Artist",buf);
+ demux_info_add(demuxer,"Artist",demux_legacy_recode(buf,tmp,61));
stream_read(s,buf,30);
buf[30] = '\0';
- demux_info_add(demuxer,"Album",buf);
+ demux_info_add(demuxer,"Album",demux_legacy_recode(buf,tmp,61));
stream_read(s,buf,4);
buf[4] = '\0';
- demux_info_add(demuxer,"Year",buf);
+ demux_info_add(demuxer,"Year",demux_legacy_recode(buf,tmp,61));
stream_read(s,buf,30);
buf[30] = '\0';
- demux_info_add(demuxer,"Comment",buf);
+ demux_info_add(demuxer,"Comment",demux_legacy_recode(buf,tmp,61));
if(buf[28] == 0 && buf[29] != 0) {
uint8_t trk = (uint8_t)buf[29];
sprintf(buf,"%d",trk);
More information about the MPlayer-dev-eng
mailing list