[MPlayer-dev-eng] Re: [PATCH] OSD localization third try
Tobias Diedrich
ranma at tdiedrich.de
Wed Nov 16 21:12:57 CET 2005
Reimar Döffinger wrote:
> On Mon, Nov 07, 2005 at 07:40:41PM +0100, Tobias Diedrich wrote:
> > Well, the code enabled by this only deals with UTF8, other multibyte
> > encodings are entirely different beasts. I'll rewrite it to use
> > iconv when I find the time (Maybe Wednesday).
>
> Keep in mind that e.g. MinGW does not have iconv, so IMHO avoid it if at
> all possible,
Ok, so how about a simple iconv implementation for osdep?
See attached patch (Note: I know the configure check is lacking.
i.e. IIRC at least on Solaris -liconv is needed).
For MinGW maybe it could be extended to use native Windows charset
conversion functions.
--
Tobias PGP: http://9ac7e0bc.uguu.de
-------------- next part --------------
Index: configure
===================================================================
RCS file: /cvsroot/mplayer/main/configure,v
retrieving revision 1.1102
diff -u -r1.1102 configure
--- configure 12 Nov 2005 08:25:04 -0000 1.1102
+++ configure 16 Nov 2005 20:05:10 -0000
@@ -3219,6 +3229,20 @@
fi
echores "$_fseeko"
+echocheck "iconv()"
+cat > $TMPC << EOF
+#include <iconv.h>
+int main (void) { iconv_open("UTF-8", "ISO-8859-1"); return 0; }
+EOF
+_iconv=no
+cc_check && _iconv=yes
+if test "$_iconv" = yes ; then
+ _def_iconv='#define HAVE_ICONV 1'
+else
+ _def_iconv='#undef HAVE_ICONV'
+fi
+echores "$_iconv"
+
echocheck "localtime_r()"
cat > $TMPC << EOF
#include <time.h>
@@ -7455,6 +7485,18 @@
int fseeko(FILE *, off_t, int);
#endif
+/* Define this if your system has iconv */
+$_def_iconv
+#ifndef HAVE_ICONV
+struct iconv_priv;
+typedef struct iconv_priv* iconv_t;
+iconv_t iconv_open(const char *tocode, const char *fromcode);
+int iconv_close(iconv_t cd);
+size_t iconv(iconv_t cd,
+ char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft);
+#endif
+
$_def_localtime_r
/* Define this if your system has vsscanf */
Index: osdep/Makefile
===================================================================
RCS file: /cvsroot/mplayer/main/osdep/Makefile,v
retrieving revision 1.19
diff -u -r1.19 Makefile
--- osdep/Makefile 18 Aug 2005 11:26:04 -0000 1.19
+++ osdep/Makefile 16 Nov 2005 20:05:10 -0000
@@ -4,7 +4,7 @@
LIBNAME = libosdep.a
SRCS= shmem.c strsep.c strl.c vsscanf.c scandir.c gettimeofday.c fseeko.c \
- swab.c
+ swab.c iconv.c
# timer.c
getch = getch2.c
--- osdep/iconv.c.foo 1970-01-01 01:00:00.000000000 +0100
+++ osdep/iconv.c 2005-11-16 21:00:12.000000000 +0100
@@ -0,0 +1,306 @@
+/* simple iconv implementation for systems that do not have it in libc */
+/* iconv.c - simple iconv implementation
+ * Author: Tobias Diedrich
+ * $Id$
+ *
+ * vim:set cino=:0l1g0(0:
+ */
+
+#include "config.h"
+
+#ifdef TEST_ICONV
+# ifdef HAVE_ICONV
+
+#undef HAVE_ICONV
+typedef struct iconv_priv* iconv_t;
+
+# endif /* HAVE_ICONV */
+#endif /* TEST_ICONV */
+
+#ifndef HAVE_ICONV
+
+#include <inttypes.h>
+#include <malloc.h>
+#include <errno.h>
+
+#include "mp_msg.h"
+
+typedef uint32_t (any_to_uni_fn)(char **src);
+typedef void (uni_to_any_fn)(uint32_t src, char **dst);
+typedef int (in_n_fn)(char **src);
+typedef int (out_n_fn)(uint32_t src);
+
+struct iconv_priv {
+ any_to_uni_fn *any_to_uni;
+ uni_to_any_fn *uni_to_any;
+ in_n_fn *in_n;
+ out_n_fn *out_n;
+};
+
+struct iconv_info {
+ char *name;
+ any_to_uni_fn *charset_to_uni;
+ in_n_fn *in_n;
+ uni_to_any_fn *uni_to_charset;
+ out_n_fn *out_n;
+};
+
+/*****************************************************************************
+ * CONVERSION FUNCTIONS
+ *****************************************************************************/
+
+/*
+ * generic singlebyte charset functions
+ */
+
+static int in_n_1(char **src)
+{
+ return 1;
+}
+
+static int out_n_1(uint32_t src)
+{
+ return 1;
+}
+
+/*
+ * ascii
+ */
+
+static uint32_t ascii_to_uni(char **src)
+{
+ uint32_t ret = *((*src)++);
+
+ if (ret > 127) ret = '?';
+
+ return ret;
+}
+
+static void uni_to_ascii(uint32_t src, char **dst)
+{
+ if (src > 127) *(*dst)++ = '?';
+ else *(*dst)++ = src;
+}
+
+/*
+ * iso-8859-1
+ */
+
+static uint32_t iso8859_1_to_uni(char **src)
+{
+ return (unsigned char)*(*src)++;
+}
+
+static void uni_to_iso8859_1(uint32_t src, char **dst)
+{
+ if (src > 255) *(*dst)++ = '?';
+ else *(*dst)++ = src;
+}
+
+/*
+ * utf-8
+ */
+
+static int utf8_in_n(char **src)
+{
+ unsigned char c1 = **src;
+
+ if ((c1 && 0xf8) == 0xf0) return 4;
+ if ((c1 && 0xf0) == 0xe0) return 3;
+ if ((c1 && 0xe0) == 0xc0) return 2;
+ return 1;
+}
+
+static int utf8_out_n(uint32_t src)
+{
+ if (src > 0x10ffff) src = '?';
+
+ if (src > 0xffff) return 4;
+ else if (src > 0x7ff) return 3;
+ else if (src > 0x7f) return 2;
+ return 1;
+}
+
+static uint32_t utf8_to_uni(char **src)
+{
+ uint32_t res = (unsigned char)*(*src)++;
+
+ if ((res & 0xe0) == 0xc0 && **src) {
+ res = (res & 0x1f) << 6 | (*(*src)++ & 0x3f);
+ } else if ((res & 0xf0) == 0xe0 && **src && *(*src+1)) {
+ res = (res & 0x3f) << 12 |
+ (*(*src) & 0x3f) << 6 |
+ (*(*src+1) & 0x3f);
+ *src += 2;
+ } else if ((res & 0xf8) == 0xf0) {
+ res = (res & 0x07) << 18 |
+ (*(*src) & 0x3f) << 12 |
+ (*(*src+1) & 0x3f) << 6 |
+ (*(*src+2) & 0x3f);
+ *src += 3;
+ } else res &= 0x7f;
+
+ if (res > 0x10ffff) res = '?';
+
+ return res;
+}
+
+static void uni_to_utf8(uint32_t src, char **dst)
+{
+ int left = 0, shift;
+
+ if (src > 0x10ffff) src = '?';
+
+ if (src > 0xffff) {
+ *(*dst++) = 0xf0 | (src >> 18);
+ left = 3;
+ shift = 12;
+ } else if (src > 0x7ff) {
+ *(*dst++) = 0xe0 | (src >> 12);
+ left = 2;
+ shift = 6;
+ } else if (src > 0x7f) {
+ *(*dst++) = 0xc0 | (src >> 6);
+ left = 1;
+ shift = 0;
+ }
+
+ while (left--) {
+ *(*dst++) = 0x80 | ((src >> shift) & 0x3f);
+ shift -= 6;
+ }
+}
+
+/*****************************************************************************
+ * MAIN FUNCTIONS
+ *****************************************************************************/
+
+static struct iconv_info supported_charsets[] = {
+ { "US-ASCII", ascii_to_uni, in_n_1, uni_to_ascii, out_n_1 },
+ { "ISO-8859-1", iso8859_1_to_uni, in_n_1, uni_to_iso8859_1, out_n_1 },
+ { "UTF-8", utf8_to_uni, utf8_in_n, uni_to_utf8, utf8_out_n },
+ { NULL, NULL, NULL }
+};
+
+iconv_t iconv_open(const char *tocode, const char *fromcode)
+{
+ int i;
+ struct iconv_priv *cd = malloc(sizeof(struct iconv_priv));
+
+ if (!cd) {
+ errno = ENOMEM;
+ return (iconv_t)-1;
+ }
+
+ for (i=0; supported_charsets[i].name &&
+ strcasecmp(supported_charsets[i].name, tocode); i++);
+
+ if (!supported_charsets[i].name) {
+ errno = EINVAL;
+ mp_msg(MSGT_GLOBAL, MSGL_ERR,
+ "iconv_open: destination charset '%s' unsupported.\n", tocode);
+ free(cd);
+ return (iconv_t)-1;
+ }
+
+ cd->any_to_uni = supported_charsets[i].charset_to_uni;
+ cd->in_n = supported_charsets[i].in_n;
+
+ for (i=0; supported_charsets[i].name &&
+ strcasecmp(supported_charsets[i].name, fromcode); i++);
+
+ if (!supported_charsets[i].name) {
+ errno = EINVAL;
+ mp_msg(MSGT_GLOBAL, MSGL_ERR,
+ "iconv_open: source charset '%s' unsupported.\n", fromcode);
+ free(cd);
+ return (iconv_t)-1;
+ }
+
+ cd->uni_to_any = supported_charsets[i].uni_to_charset;
+ cd->out_n = supported_charsets[i].out_n;
+
+ return cd;
+}
+
+int iconv_close(iconv_t cd)
+{
+ free(cd);
+}
+
+size_t iconv(iconv_t cd,
+ char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
+{
+ while (cd->in_n(inbuf) <= *inbytesleft) {
+ char *inptr = *inbuf;
+ char *outptr = *outbuf;
+ int n;
+ uint32_t uni = cd->any_to_uni(inbuf);
+
+ n = *inbuf - inptr;
+
+ if (cd->out_n(uni) > *outbytesleft) {
+ errno = E2BIG; /* insufficient room at *outbuf */
+ *inbuf = inptr;
+ return (size_t)-1;
+ }
+
+ (*inbytesleft) -= n;
+
+ cd->uni_to_any(uni, outbuf);
+ (*outbytesleft) -= *outbuf - outptr;
+ }
+
+ if (*inbytesleft > 0) {
+ errno = EINVAL; /* incomplete multibyte sequence */
+ return (size_t)-1;
+ }
+
+ return 0;
+}
+
+# ifdef TEST_ICONV
+
+int main(int argc, char **argv)
+{
+ char src[] = { 0xc3, 0xa4, 0xc3, 0xb6, 0xc3, 0xbc, 0x00 };
+ char expect[] = { 0xe4, 0xf6, 0xfc, 0x00, 0xff };
+ char buf[200];
+ char *in, *out;
+ int i, passed;
+ size_t inbytes, outbytes;
+ size_t res;
+
+ iconv_t cd = iconv_open("UTF-8", "ISO-8859-1");
+
+ if (cd == (iconv_t)-1) {
+ perror("iconv_open failed");
+
+ return 1;
+ }
+
+ memset(buf, 0xff, sizeof(buf));
+ inbytes = sizeof(src);
+ outbytes = sizeof(buf);
+ in = src;
+ out = buf;
+ passed = 1;
+ res = iconv(cd, &in, &inbytes, &out, &outbytes);
+ for (i=0; i<sizeof(expect); i++) {
+ printf("%2d: %02x %02x\n", i,
+ (unsigned char)buf[i],
+ (unsigned char)expect[i]);
+ if (buf[i] != expect[i]) passed = 0;
+ }
+ printf("outbytes=%d inbytes=%d res=%d passed=%d error='%s'\n",
+ outbytes, inbytes, res, passed, strerror(errno));
+
+ iconv_close(cd);
+
+ return passed ? 0 : 1;
+}
+
+# endif /* TEST_ICONV */
+
+#endif /* HAVE_ICONV */
More information about the MPlayer-dev-eng
mailing list