[MPlayer-dev-eng] Re: [PATCH] OSD localization third try

Tobias Diedrich ranma at tdiedrich.de
Wed Nov 16 21:12:57 CET 2005


Reimar Döffinger wrote:

> On Mon, Nov 07, 2005 at 07:40:41PM +0100, Tobias Diedrich wrote:
> > Well, the code enabled by this only deals with UTF8, other multibyte
> > encodings are entirely different beasts.  I'll rewrite it to use
> > iconv when I find the time (Maybe Wednesday).
> 
> Keep in mind that e.g. MinGW does not have iconv, so IMHO avoid it if at
> all possible,

Ok, so how about a simple iconv implementation for osdep?
See attached patch (Note: I know the configure check is lacking.
i.e. IIRC at least on Solaris -liconv is needed).
For MinGW maybe it could be extended to use native Windows charset
conversion functions.

-- 
Tobias						PGP: http://9ac7e0bc.uguu.de
-------------- next part --------------
Index: configure
===================================================================
RCS file: /cvsroot/mplayer/main/configure,v
retrieving revision 1.1102
diff -u -r1.1102 configure
--- configure	12 Nov 2005 08:25:04 -0000	1.1102
+++ configure	16 Nov 2005 20:05:10 -0000
@@ -3219,6 +3229,20 @@
 fi
 echores "$_fseeko"
 
+echocheck "iconv()"
+cat > $TMPC << EOF
+#include <iconv.h>
+int main (void) { iconv_open("UTF-8", "ISO-8859-1"); return 0; }
+EOF
+_iconv=no
+cc_check && _iconv=yes
+if test "$_iconv" = yes ; then
+ _def_iconv='#define HAVE_ICONV 1'
+else
+ _def_iconv='#undef HAVE_ICONV'
+fi
+echores "$_iconv"
+
 echocheck "localtime_r()"
 cat > $TMPC << EOF
 #include <time.h>
@@ -7455,6 +7485,18 @@
 int fseeko(FILE *, off_t, int);
 #endif
 
+/* Define this if your system has iconv */
+$_def_iconv
+#ifndef HAVE_ICONV
+struct iconv_priv;
+typedef struct iconv_priv* iconv_t;
+iconv_t iconv_open(const char *tocode, const char *fromcode);
+int iconv_close(iconv_t cd);
+size_t iconv(iconv_t cd,
+             char **inbuf, size_t *inbytesleft,
+             char **outbuf, size_t *outbytesleft);
+#endif
+
 $_def_localtime_r
 
 /* Define this if your system has vsscanf */
Index: osdep/Makefile
===================================================================
RCS file: /cvsroot/mplayer/main/osdep/Makefile,v
retrieving revision 1.19
diff -u -r1.19 Makefile
--- osdep/Makefile	18 Aug 2005 11:26:04 -0000	1.19
+++ osdep/Makefile	16 Nov 2005 20:05:10 -0000
@@ -4,7 +4,7 @@
 LIBNAME = libosdep.a
 
 SRCS= shmem.c strsep.c strl.c vsscanf.c scandir.c gettimeofday.c fseeko.c \
-      swab.c
+      swab.c iconv.c
       # timer.c
 
 getch = getch2.c
--- osdep/iconv.c.foo	1970-01-01 01:00:00.000000000 +0100
+++ osdep/iconv.c	2005-11-16 21:00:12.000000000 +0100
@@ -0,0 +1,306 @@
+/* simple iconv implementation for systems that do not have it in libc */
+/* iconv.c - simple iconv implementation
+ * Author: Tobias Diedrich
+ * $Id$
+ *
+ * vim:set cino=:0l1g0(0:
+ */
+
+#include "config.h"
+
+#ifdef TEST_ICONV
+#  ifdef HAVE_ICONV
+
+#undef HAVE_ICONV
+typedef struct iconv_priv* iconv_t;
+
+#  endif /* HAVE_ICONV */
+#endif /* TEST_ICONV */
+
+#ifndef HAVE_ICONV
+
+#include <inttypes.h>
+#include <malloc.h>
+#include <errno.h>
+
+#include "mp_msg.h"
+
+typedef uint32_t (any_to_uni_fn)(char **src);
+typedef void (uni_to_any_fn)(uint32_t src, char **dst);
+typedef int (in_n_fn)(char **src);
+typedef int (out_n_fn)(uint32_t src);
+
+struct iconv_priv {
+	any_to_uni_fn *any_to_uni;
+	uni_to_any_fn *uni_to_any;
+	in_n_fn       *in_n;
+	out_n_fn      *out_n;
+};
+
+struct iconv_info {
+	char *name;
+	any_to_uni_fn *charset_to_uni;
+	in_n_fn       *in_n;
+	uni_to_any_fn *uni_to_charset;
+	out_n_fn      *out_n;
+};
+
+/*****************************************************************************
+ * CONVERSION FUNCTIONS
+ *****************************************************************************/
+
+/*
+ * generic singlebyte charset functions
+ */
+
+static int in_n_1(char **src)
+{
+	return 1;
+}
+
+static int out_n_1(uint32_t src)
+{
+	return 1;
+}
+
+/*
+ * ascii
+ */
+
+static uint32_t ascii_to_uni(char **src)
+{
+	uint32_t ret = *((*src)++);
+
+	if (ret > 127) ret = '?';
+
+	return ret;
+}
+
+static void uni_to_ascii(uint32_t src, char **dst)
+{
+	if (src > 127) *(*dst)++ = '?';
+	else *(*dst)++ = src;
+}
+
+/*
+ * iso-8859-1
+ */
+
+static uint32_t iso8859_1_to_uni(char **src)
+{
+	return (unsigned char)*(*src)++;
+}
+
+static void uni_to_iso8859_1(uint32_t src, char **dst)
+{
+	if (src > 255) *(*dst)++ = '?';
+	else *(*dst)++ = src;
+}
+
+/*
+ * utf-8
+ */
+
+static int utf8_in_n(char **src)
+{
+	unsigned char c1 = **src;
+
+	if ((c1 && 0xf8) == 0xf0) return 4;
+	if ((c1 && 0xf0) == 0xe0) return 3;
+	if ((c1 && 0xe0) == 0xc0) return 2;
+	return 1;
+}
+
+static int utf8_out_n(uint32_t src)
+{
+	if (src > 0x10ffff) src = '?';
+
+	if (src > 0xffff) return 4;
+	else if (src > 0x7ff) return 3;
+	else if (src > 0x7f) return 2;
+	return 1;
+}
+
+static uint32_t utf8_to_uni(char **src)
+{
+	uint32_t res = (unsigned char)*(*src)++;
+
+	if ((res & 0xe0) == 0xc0 && **src) {
+		res = (res & 0x1f) << 6 | (*(*src)++ & 0x3f);
+	} else if ((res & 0xf0) == 0xe0 && **src && *(*src+1)) {
+		res = (res & 0x3f) << 12 |
+		      (*(*src) & 0x3f) << 6 |
+		      (*(*src+1) & 0x3f);
+		*src += 2;
+	} else if ((res & 0xf8) == 0xf0) {
+		res = (res & 0x07) << 18 |
+		      (*(*src) & 0x3f) << 12 |
+		      (*(*src+1) & 0x3f) << 6 |
+		      (*(*src+2) & 0x3f);
+		*src += 3;
+	} else res &= 0x7f;
+
+	if (res > 0x10ffff) res = '?';
+
+	return res;
+}
+
+static void uni_to_utf8(uint32_t src, char **dst)
+{
+	int left = 0, shift;
+
+	if (src > 0x10ffff) src = '?';
+
+	if (src > 0xffff) {
+		*(*dst++) = 0xf0 | (src >> 18);
+		left = 3;
+		shift = 12;
+	} else if (src > 0x7ff) {
+		*(*dst++) = 0xe0 | (src >> 12);
+		left = 2;
+		shift = 6;
+	} else if (src > 0x7f) {
+		*(*dst++) = 0xc0 | (src >> 6);
+		left = 1;
+		shift = 0;
+	}
+
+	while (left--) {
+		*(*dst++) = 0x80 | ((src >> shift) & 0x3f);
+		shift -= 6;
+	}
+}
+
+/*****************************************************************************
+ * MAIN FUNCTIONS
+ *****************************************************************************/
+
+static struct iconv_info supported_charsets[] = {
+	{ "US-ASCII", ascii_to_uni, in_n_1, uni_to_ascii, out_n_1 },
+	{ "ISO-8859-1", iso8859_1_to_uni, in_n_1, uni_to_iso8859_1, out_n_1 },
+	{ "UTF-8", utf8_to_uni, utf8_in_n, uni_to_utf8, utf8_out_n },
+	{ NULL, NULL, NULL }
+};
+
+iconv_t iconv_open(const char *tocode, const char *fromcode)
+{
+	int i;
+	struct iconv_priv *cd = malloc(sizeof(struct iconv_priv));
+
+	if (!cd) {
+		errno = ENOMEM;
+		return (iconv_t)-1;
+	}
+
+	for (i=0; supported_charsets[i].name &&
+	          strcasecmp(supported_charsets[i].name, tocode); i++);
+
+	if (!supported_charsets[i].name) {
+		errno = EINVAL;
+		mp_msg(MSGT_GLOBAL, MSGL_ERR,
+		       "iconv_open: destination charset '%s' unsupported.\n", tocode);
+		free(cd);
+		return (iconv_t)-1;
+	}
+
+	cd->any_to_uni = supported_charsets[i].charset_to_uni;
+	cd->in_n       = supported_charsets[i].in_n;
+
+	for (i=0; supported_charsets[i].name &&
+	          strcasecmp(supported_charsets[i].name, fromcode); i++);
+
+	if (!supported_charsets[i].name) {
+		errno = EINVAL;
+		mp_msg(MSGT_GLOBAL, MSGL_ERR,
+		       "iconv_open: source charset '%s' unsupported.\n", fromcode);
+		free(cd);
+		return (iconv_t)-1;
+	}
+
+	cd->uni_to_any = supported_charsets[i].uni_to_charset;
+	cd->out_n      = supported_charsets[i].out_n;
+
+	return cd;
+}
+
+int iconv_close(iconv_t cd)
+{
+	free(cd);
+}
+
+size_t iconv(iconv_t cd,
+             char **inbuf, size_t *inbytesleft,
+             char **outbuf, size_t *outbytesleft)
+{
+	while (cd->in_n(inbuf) <= *inbytesleft) {
+		char *inptr = *inbuf;
+		char *outptr = *outbuf;
+		int n;
+		uint32_t uni = cd->any_to_uni(inbuf);
+
+		n = *inbuf - inptr;
+
+		if (cd->out_n(uni) > *outbytesleft) {
+			errno = E2BIG; /* insufficient room at *outbuf */
+			*inbuf = inptr;
+			return (size_t)-1;
+		}
+
+		(*inbytesleft) -= n;
+
+		cd->uni_to_any(uni, outbuf);
+		(*outbytesleft) -= *outbuf - outptr;
+	}
+
+	if (*inbytesleft > 0) {
+		errno = EINVAL; /* incomplete multibyte sequence */
+		return (size_t)-1;
+	}
+
+	return 0;
+}
+
+#  ifdef TEST_ICONV
+
+int main(int argc, char **argv)
+{
+	char src[] = { 0xc3, 0xa4, 0xc3, 0xb6, 0xc3, 0xbc, 0x00 };
+	char expect[] = { 0xe4, 0xf6, 0xfc, 0x00, 0xff };
+	char buf[200];
+	char *in, *out;
+	int i, passed;
+	size_t inbytes, outbytes;
+	size_t res;
+
+	iconv_t cd = iconv_open("UTF-8", "ISO-8859-1");
+
+	if (cd == (iconv_t)-1) {
+		perror("iconv_open failed");
+
+		return 1;
+	}
+
+	memset(buf, 0xff, sizeof(buf));
+	inbytes = sizeof(src);
+	outbytes = sizeof(buf);
+	in = src;
+	out = buf;
+	passed = 1;
+	res = iconv(cd, &in, &inbytes, &out, &outbytes);
+	for (i=0; i<sizeof(expect); i++) {
+		printf("%2d: %02x %02x\n", i,
+		       (unsigned char)buf[i],
+		       (unsigned char)expect[i]);
+		if (buf[i] != expect[i]) passed = 0;
+	}
+	printf("outbytes=%d inbytes=%d res=%d passed=%d error='%s'\n",
+	       outbytes, inbytes, res, passed, strerror(errno));
+
+	iconv_close(cd);
+
+	return passed ? 0 : 1;
+}
+
+#  endif /* TEST_ICONV */
+
+#endif /* HAVE_ICONV */


More information about the MPlayer-dev-eng mailing list