lib/util/charcnv Move iconv handle setup in common
authorAndrew Bartlett <abartlet@samba.org>
Fri, 18 Feb 2011 02:47:28 +0000 (13:47 +1100)
committerAndrew Bartlett <abartlet@samba.org>
Fri, 18 Feb 2011 07:41:01 +0000 (18:41 +1100)
We now use the struct smb_iconv_convenience at the core of all our
iconv code, and use global_iconv_convenience for the callers that
don't specify one.

Andrew Bartlett

lib/util/charset/charcnv.c
lib/util/charset/charset.h
lib/util/charset/codepoints.c
lib/util/charset/util_unistr.c
lib/util/charset/wscript_build
source3/lib/charcnv.c
source4/param/loadparm.c
source4/param/util.c

index 59b36e30621dfa72fc03d5195e076ef3c6a397be..dd2c725125b887f0b6f3ef9bbd27057d1cbf193f 100644 (file)
  * @sa lib/iconv.c
  */
 
-struct smb_iconv_convenience {
-       TALLOC_CTX *child_ctx;
-       const char *unix_charset;
-       const char *dos_charset;
-       bool native_iconv;
-       smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
-};
-
-
-/**
- * Return the name of a charset to give to iconv().
- **/
-static const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch)
-{
-       switch (ch) {
-       case CH_UTF16: return "UTF-16LE";
-       case CH_UNIX: return ic->unix_charset;
-       case CH_DOS: return ic->dos_charset;
-       case CH_UTF8: return "UTF8";
-       case CH_UTF16BE: return "UTF-16BE";
-       case CH_UTF16MUNGED: return "UTF16_MUNGED";
-       default:
-       return "ASCII";
-       }
-}
-
-/**
- re-initialize iconv conversion descriptors
-**/
-static int close_iconv_convenience(struct smb_iconv_convenience *data)
-{
-       unsigned c1, c2;
-       for (c1=0;c1<NUM_CHARSETS;c1++) {
-               for (c2=0;c2<NUM_CHARSETS;c2++) {
-                       if (data->conv_handles[c1][c2] != NULL) {
-                               if (data->conv_handles[c1][c2] != (smb_iconv_t)-1) {
-                                       smb_iconv_close(data->conv_handles[c1][c2]);
-                               }
-                               data->conv_handles[c1][c2] = NULL;
-                       }
-               }
-       }
-
-       return 0;
-}
-
-/*
-  the old_ic is passed in here as the smb_iconv_convenience structure
-  is used as a global pointer in some places (eg. python modules). We
-  don't want to invalidate those global pointers, but we do want to
-  update them with the right charset information when loadparm
-  runs. To do that we need to re-use the structure pointer, but
-  re-fill the elements in the structure with the updated values
- */
-_PUBLIC_ struct smb_iconv_convenience *smb_iconv_convenience_reinit(TALLOC_CTX *mem_ctx,
-                                                                   const char *dos_charset,
-                                                                   const char *unix_charset,
-                                                                   bool native_iconv,
-                                                                   struct smb_iconv_convenience *old_ic)
-{
-       struct smb_iconv_convenience *ret;
-
-       if (old_ic != NULL) {
-               ret = old_ic;
-               close_iconv_convenience(ret);
-               talloc_free(ret->child_ctx);
-               ZERO_STRUCTP(ret);
-       } else {
-               ret = talloc_zero(mem_ctx, struct smb_iconv_convenience);
-       }
-       if (ret == NULL) {
-               return NULL;
-       }
-
-       /* we use a child context to allow us to free all ptrs without
-          freeing the structure itself */
-       ret->child_ctx = talloc_new(ret);
-       if (ret->child_ctx == NULL) {
-               return NULL;
-       }
-
-       talloc_set_destructor(ret, close_iconv_convenience);
-
-       ret->dos_charset = talloc_strdup(ret->child_ctx, dos_charset);
-       ret->unix_charset = talloc_strdup(ret->child_ctx, unix_charset);
-       ret->native_iconv = native_iconv;
-
-       return ret;
-}
-
-/*
-  on-demand initialisation of conversion handles
-*/
-static smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic,
-                                  charset_t from, charset_t to)
-{
-       const char *n1, *n2;
-       static bool initialised;
-
-       if (initialised == false) {
-               initialised = true;
-       }
-
-       if (ic->conv_handles[from][to]) {
-               return ic->conv_handles[from][to];
-       }
-
-       n1 = charset_name(ic, from);
-       n2 = charset_name(ic, to);
-
-       ic->conv_handles[from][to] = smb_iconv_open_ex(ic, n2, n1, 
-                                                      ic->native_iconv);
-       
-       if (ic->conv_handles[from][to] == (smb_iconv_t)-1) {
-               if ((from == CH_DOS || to == CH_DOS) &&
-                   strcasecmp(charset_name(ic, CH_DOS), "ASCII") != 0) {
-                       DEBUG(0,("dos charset '%s' unavailable - using ASCII\n",
-                                charset_name(ic, CH_DOS)));
-                       ic->dos_charset = "ASCII";
-
-                       n1 = charset_name(ic, from);
-                       n2 = charset_name(ic, to);
-                       
-                       ic->conv_handles[from][to] = 
-                               smb_iconv_open_ex(ic, n2, n1, ic->native_iconv);
-               }
-       }
-
-       return ic->conv_handles[from][to];
-}
-
 /**
  * Convert string from one encoding to another, making error checking etc
  *
@@ -363,161 +232,3 @@ _PUBLIC_ bool convert_string_talloc_convenience(TALLOC_CTX *ctx,
        return true;
 }
 
-
-/**
- * Return the unicode codepoint for the next character in the input
- * string in the given src_charset.
- * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
- *
- * Also return the number of bytes consumed (which tells the caller
- * how many bytes to skip to get to the next src_charset-character).
- *
- * This is implemented (in the non-ascii-case) by first converting the
- * next character in the input string to UTF16_LE and then calculating
- * the unicode codepoint from that.
- *
- * Return INVALID_CODEPOINT if the next character cannot be converted.
- */
-_PUBLIC_ codepoint_t next_codepoint_convenience_ext(
-                       struct smb_iconv_convenience *ic,
-                       const char *str, charset_t src_charset,
-                       size_t *bytes_consumed)
-{
-       /* it cannot occupy more than 4 bytes in UTF16 format */
-       uint8_t buf[4];
-       smb_iconv_t descriptor;
-       size_t ilen_orig;
-       size_t ilen;
-       size_t olen;
-       char *outbuf;
-
-       if ((str[0] & 0x80) == 0) {
-               *bytes_consumed = 1;
-               return (codepoint_t)str[0];
-       }
-
-       /*
-        * we assume that no multi-byte character can take more than 5 bytes.
-        * This is OK as we only support codepoints up to 1M (U+100000)
-        */
-       ilen_orig = strnlen(str, 5);
-       ilen = ilen_orig;
-
-       descriptor = get_conv_handle(ic, src_charset, CH_UTF16);
-       if (descriptor == (smb_iconv_t)-1) {
-               *bytes_consumed = 1;
-               return INVALID_CODEPOINT;
-       }
-
-       /*
-        * this looks a little strange, but it is needed to cope with
-        * codepoints above 64k (U+1000) which are encoded as per RFC2781.
-        */
-       olen = 2;
-       outbuf = (char *)buf;
-       smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
-       if (olen == 2) {
-               olen = 4;
-               outbuf = (char *)buf;
-               smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
-               if (olen == 4) {
-                       /* we didn't convert any bytes */
-                       *bytes_consumed = 1;
-                       return INVALID_CODEPOINT;
-               }
-               olen = 4 - olen;
-       } else {
-               olen = 2 - olen;
-       }
-
-       *bytes_consumed = ilen_orig - ilen;
-
-       if (olen == 2) {
-               return (codepoint_t)SVAL(buf, 0);
-       }
-       if (olen == 4) {
-               /* decode a 4 byte UTF16 character manually */
-               return (codepoint_t)0x10000 + 
-                       (buf[2] | ((buf[3] & 0x3)<<8) | 
-                        (buf[0]<<10) | ((buf[1] & 0x3)<<18));
-       }
-
-       /* no other length is valid */
-       return INVALID_CODEPOINT;
-}
-
-/*
-  return the unicode codepoint for the next multi-byte CH_UNIX character
-  in the string
-
-  also return the number of bytes consumed (which tells the caller
-  how many bytes to skip to get to the next CH_UNIX character)
-
-  return INVALID_CODEPOINT if the next character cannot be converted
-*/
-_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,
-                                   const char *str, size_t *size)
-{
-       return next_codepoint_convenience_ext(ic, str, CH_UNIX, size);
-}
-
-/*
-  push a single codepoint into a CH_UNIX string the target string must
-  be able to hold the full character, which is guaranteed if it is at
-  least 5 bytes in size. The caller may pass less than 5 bytes if they
-  are sure the character will fit (for example, you can assume that
-  uppercase/lowercase of a character will not add more than 1 byte)
-
-  return the number of bytes occupied by the CH_UNIX character, or
-  -1 on failure
-*/
-_PUBLIC_ ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic, 
-                               char *str, codepoint_t c)
-{
-       smb_iconv_t descriptor;
-       uint8_t buf[4];
-       size_t ilen, olen;
-       const char *inbuf;
-       
-       if (c < 128) {
-               *str = c;
-               return 1;
-       }
-
-       descriptor = get_conv_handle(ic, 
-                                    CH_UTF16, CH_UNIX);
-       if (descriptor == (smb_iconv_t)-1) {
-               return -1;
-       }
-
-       if (c < 0x10000) {
-               ilen = 2;
-               olen = 5;
-               inbuf = (char *)buf;
-               SSVAL(buf, 0, c);
-               smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
-               if (ilen != 0) {
-                       return -1;
-               }
-               return 5 - olen;
-       }
-
-       c -= 0x10000;
-
-       buf[0] = (c>>10) & 0xFF;
-       buf[1] = (c>>18) | 0xd8;
-       buf[2] = c & 0xFF;
-       buf[3] = ((c>>8) & 0x3) | 0xdc;
-
-       ilen = 4;
-       olen = 5;
-       inbuf = (char *)buf;
-
-       smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
-       if (ilen != 0) {
-               return -1;
-       }
-       return 5 - olen;
-}
-
-
index 28d762578b152c45f323a2f9800dd2d9ca923068..b4a5a5546106733bc7a74172a5327ac660a18da6 100644 (file)
@@ -170,6 +170,10 @@ ssize_t iconv_talloc(TALLOC_CTX *mem_ctx,
                                       void *dest);
 
 extern struct smb_iconv_convenience *global_iconv_convenience;
+struct smb_iconv_convenience *get_iconv_convenience(void);
+smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic,
+                           charset_t from, charset_t to);
+const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch);
 
 codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
                               size_t *size);
@@ -195,6 +199,7 @@ int codepoint_cmpi(codepoint_t c1, codepoint_t c2);
 struct smb_iconv_convenience *smb_iconv_convenience_reinit(TALLOC_CTX *mem_ctx,
                                                           const char *dos_charset,
                                                           const char *unix_charset,
+                                                          const char *display_charset,
                                                           bool native_iconv,
                                                           struct smb_iconv_convenience *old_ic);
 
index 53febb8b5e5399b5905880cf8c747890c87eae8a..01183e4ad448df929a23c99ccba0ee015ba7f05b 100644 (file)
@@ -1,8 +1,10 @@
 /* 
    Unix SMB/CIFS implementation.
-   Samba utility functions
-   Copyright (C) Andrew Tridgell 1992-2001
+   Character set conversion Extensions
+   Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
+   Copyright (C) Andrew Tridgell 2001
    Copyright (C) Simo Sorce 2001
+   Copyright (C) Jelmer Vernooij 2007
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
 
+*/
 #include "includes.h"
+#include "lib/util/charset/charset.h"
 #include "system/locale.h"
 #include "dynconfig.h"
 
+#ifdef strcasecmp
+#undef strcasecmp
+#endif
+
 /**
  * @file
  * @brief Unicode string manipulation
@@ -126,3 +133,352 @@ _PUBLIC_ int codepoint_cmpi(codepoint_t c1, codepoint_t c2)
 }
 
 
+struct smb_iconv_convenience {
+       TALLOC_CTX *child_ctx;
+       const char *unix_charset;
+       const char *dos_charset;
+       const char *display_charset;
+       bool native_iconv;
+       smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
+};
+
+struct smb_iconv_convenience *global_iconv_convenience = NULL;
+
+struct smb_iconv_convenience *get_iconv_convenience(void)
+{
+       if (global_iconv_convenience == NULL)
+               global_iconv_convenience = smb_iconv_convenience_reinit(talloc_autofree_context(),
+                                                                       "ASCII", "UTF-8", "ASCII", true, NULL);
+       return global_iconv_convenience;
+}
+
+/**
+ * Return the name of a charset to give to iconv().
+ **/
+const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch)
+{
+       switch (ch) {
+       case CH_UTF16: return "UTF-16LE";
+       case CH_UNIX: return ic->unix_charset;
+       case CH_DOS: return ic->dos_charset;
+       case CH_DISPLAY: return ic->display_charset;
+       case CH_UTF8: return "UTF8";
+       case CH_UTF16BE: return "UTF-16BE";
+       case CH_UTF16MUNGED: return "UTF16_MUNGED";
+       default:
+       return "ASCII";
+       }
+}
+
+/**
+ re-initialize iconv conversion descriptors
+**/
+static int close_iconv_convenience(struct smb_iconv_convenience *data)
+{
+       unsigned c1, c2;
+       for (c1=0;c1<NUM_CHARSETS;c1++) {
+               for (c2=0;c2<NUM_CHARSETS;c2++) {
+                       if (data->conv_handles[c1][c2] != NULL) {
+                               if (data->conv_handles[c1][c2] != (smb_iconv_t)-1) {
+                                       smb_iconv_close(data->conv_handles[c1][c2]);
+                               }
+                               data->conv_handles[c1][c2] = NULL;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static const char *map_locale(const char *charset)
+{
+       if (strcmp(charset, "LOCALE") != 0) {
+               return charset;
+       }
+#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
+       {
+               const char *ln;
+               smb_iconv_t handle;
+
+               ln = nl_langinfo(CODESET);
+               if (ln == NULL) {
+                       DEBUG(1,("Unable to determine charset for LOCALE - using ASCII\n"));
+                       return "ASCII";
+               }
+               /* Check whether the charset name is supported
+                  by iconv */
+               handle = smb_iconv_open(ln, "UCS-2LE");
+               if (handle == (smb_iconv_t) -1) {
+                       DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
+                       return "ASCII";
+               } else {
+                       DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
+                       smb_iconv_close(handle);
+               }
+               return ln;
+       }
+#endif
+       return "ASCII";
+}
+
+/*
+  the old_ic is passed in here as the smb_iconv_convenience structure
+  is used as a global pointer in some places (eg. python modules). We
+  don't want to invalidate those global pointers, but we do want to
+  update them with the right charset information when loadparm
+  runs. To do that we need to re-use the structure pointer, but
+  re-fill the elements in the structure with the updated values
+ */
+_PUBLIC_ struct smb_iconv_convenience *smb_iconv_convenience_reinit(TALLOC_CTX *mem_ctx,
+                                                                   const char *dos_charset,
+                                                                   const char *unix_charset,
+                                                                   const char *display_charset,
+                                                                   bool native_iconv,
+                                                                   struct smb_iconv_convenience *old_ic)
+{
+       struct smb_iconv_convenience *ret;
+
+       display_charset = map_locale(display_charset);
+
+       if (old_ic != NULL) {
+               ret = old_ic;
+               close_iconv_convenience(ret);
+               talloc_free(ret->child_ctx);
+               ZERO_STRUCTP(ret);
+       } else {
+               ret = talloc_zero(mem_ctx, struct smb_iconv_convenience);
+       }
+       if (ret == NULL) {
+               return NULL;
+       }
+
+       /* we use a child context to allow us to free all ptrs without
+          freeing the structure itself */
+       ret->child_ctx = talloc_new(ret);
+       if (ret->child_ctx == NULL) {
+               return NULL;
+       }
+
+       talloc_set_destructor(ret, close_iconv_convenience);
+
+       ret->dos_charset = talloc_strdup(ret->child_ctx, dos_charset);
+       ret->unix_charset = talloc_strdup(ret->child_ctx, unix_charset);
+       ret->display_charset = talloc_strdup(ret->child_ctx, display_charset);
+       ret->native_iconv = native_iconv;
+
+       return ret;
+}
+
+/*
+  on-demand initialisation of conversion handles
+*/
+smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic,
+                           charset_t from, charset_t to)
+{
+       const char *n1, *n2;
+       static bool initialised;
+
+       if (initialised == false) {
+               initialised = true;
+       }
+
+       if (ic->conv_handles[from][to]) {
+               return ic->conv_handles[from][to];
+       }
+
+       n1 = charset_name(ic, from);
+       n2 = charset_name(ic, to);
+
+       ic->conv_handles[from][to] = smb_iconv_open_ex(ic, n2, n1,
+                                                      ic->native_iconv);
+
+       if (ic->conv_handles[from][to] == (smb_iconv_t)-1) {
+               if ((from == CH_DOS || to == CH_DOS) &&
+                   strcasecmp(charset_name(ic, CH_DOS), "ASCII") != 0) {
+                       DEBUG(0,("dos charset '%s' unavailable - using ASCII\n",
+                                charset_name(ic, CH_DOS)));
+                       ic->dos_charset = "ASCII";
+
+                       n1 = charset_name(ic, from);
+                       n2 = charset_name(ic, to);
+
+                       ic->conv_handles[from][to] =
+                               smb_iconv_open_ex(ic, n2, n1, ic->native_iconv);
+               }
+       }
+
+       return ic->conv_handles[from][to];
+}
+
+/**
+ * Return the unicode codepoint for the next character in the input
+ * string in the given src_charset.
+ * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
+ *
+ * Also return the number of bytes consumed (which tells the caller
+ * how many bytes to skip to get to the next src_charset-character).
+ *
+ * This is implemented (in the non-ascii-case) by first converting the
+ * next character in the input string to UTF16_LE and then calculating
+ * the unicode codepoint from that.
+ *
+ * Return INVALID_CODEPOINT if the next character cannot be converted.
+ */
+_PUBLIC_ codepoint_t next_codepoint_convenience_ext(
+                       struct smb_iconv_convenience *ic,
+                       const char *str, charset_t src_charset,
+                       size_t *bytes_consumed)
+{
+       /* it cannot occupy more than 4 bytes in UTF16 format */
+       uint8_t buf[4];
+       smb_iconv_t descriptor;
+       size_t ilen_orig;
+       size_t ilen;
+       size_t olen;
+       char *outbuf;
+
+       if ((str[0] & 0x80) == 0) {
+               *bytes_consumed = 1;
+               return (codepoint_t)str[0];
+       }
+
+       /*
+        * we assume that no multi-byte character can take more than 5 bytes.
+        * This is OK as we only support codepoints up to 1M (U+100000)
+        */
+       ilen_orig = strnlen(str, 5);
+       ilen = ilen_orig;
+
+       descriptor = get_conv_handle(ic, src_charset, CH_UTF16);
+       if (descriptor == (smb_iconv_t)-1) {
+               *bytes_consumed = 1;
+               return INVALID_CODEPOINT;
+       }
+
+       /*
+        * this looks a little strange, but it is needed to cope with
+        * codepoints above 64k (U+1000) which are encoded as per RFC2781.
+        */
+       olen = 2;
+       outbuf = (char *)buf;
+       smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
+       if (olen == 2) {
+               olen = 4;
+               outbuf = (char *)buf;
+               smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
+               if (olen == 4) {
+                       /* we didn't convert any bytes */
+                       *bytes_consumed = 1;
+                       return INVALID_CODEPOINT;
+               }
+               olen = 4 - olen;
+       } else {
+               olen = 2 - olen;
+       }
+
+       *bytes_consumed = ilen_orig - ilen;
+
+       if (olen == 2) {
+               return (codepoint_t)SVAL(buf, 0);
+       }
+       if (olen == 4) {
+               /* decode a 4 byte UTF16 character manually */
+               return (codepoint_t)0x10000 +
+                       (buf[2] | ((buf[3] & 0x3)<<8) |
+                        (buf[0]<<10) | ((buf[1] & 0x3)<<18));
+       }
+
+       /* no other length is valid */
+       return INVALID_CODEPOINT;
+}
+
+/*
+  return the unicode codepoint for the next multi-byte CH_UNIX character
+  in the string
+
+  also return the number of bytes consumed (which tells the caller
+  how many bytes to skip to get to the next CH_UNIX character)
+
+  return INVALID_CODEPOINT if the next character cannot be converted
+*/
+_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,
+                                   const char *str, size_t *size)
+{
+       return next_codepoint_convenience_ext(ic, str, CH_UNIX, size);
+}
+
+/*
+  push a single codepoint into a CH_UNIX string the target string must
+  be able to hold the full character, which is guaranteed if it is at
+  least 5 bytes in size. The caller may pass less than 5 bytes if they
+  are sure the character will fit (for example, you can assume that
+  uppercase/lowercase of a character will not add more than 1 byte)
+
+  return the number of bytes occupied by the CH_UNIX character, or
+  -1 on failure
+*/
+_PUBLIC_ ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic,
+                               char *str, codepoint_t c)
+{
+       smb_iconv_t descriptor;
+       uint8_t buf[4];
+       size_t ilen, olen;
+       const char *inbuf;
+
+       if (c < 128) {
+               *str = c;
+               return 1;
+       }
+
+       descriptor = get_conv_handle(ic,
+                                    CH_UTF16, CH_UNIX);
+       if (descriptor == (smb_iconv_t)-1) {
+               return -1;
+       }
+
+       if (c < 0x10000) {
+               ilen = 2;
+               olen = 5;
+               inbuf = (char *)buf;
+               SSVAL(buf, 0, c);
+               smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
+               if (ilen != 0) {
+                       return -1;
+               }
+               return 5 - olen;
+       }
+
+       c -= 0x10000;
+
+       buf[0] = (c>>10) & 0xFF;
+       buf[1] = (c>>18) | 0xd8;
+       buf[2] = c & 0xFF;
+       buf[3] = ((c>>8) & 0x3) | 0xdc;
+
+       ilen = 4;
+       olen = 5;
+       inbuf = (char *)buf;
+
+       smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
+       if (ilen != 0) {
+               return -1;
+       }
+       return 5 - olen;
+}
+
+_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
+                                       size_t *size)
+{
+       return next_codepoint_convenience_ext(get_iconv_convenience(), str,
+                                             src_charset, size);
+}
+
+_PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
+{
+       return next_codepoint_convenience(get_iconv_convenience(), str, size);
+}
+
+_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
+{
+       return push_codepoint_convenience(get_iconv_convenience(), str, c);
+}
index 410547400d75a64e3e65a31e80044a84b8e22fb5..b6bfb29e7dc4f5bc7af9673627e36944d2b545e1 100644 (file)
 #include "includes.h"
 #include "system/locale.h"
 
-struct smb_iconv_convenience *global_iconv_convenience = NULL;
-
-static inline struct smb_iconv_convenience *get_iconv_convenience(void)
-{
-       if (global_iconv_convenience == NULL)
-               global_iconv_convenience = smb_iconv_convenience_reinit(talloc_autofree_context(),
-                                                                       "ASCII", "UTF-8", true, NULL);
-       return global_iconv_convenience;
-}
-
 /**
  Case insensitive string compararison
 **/
@@ -1043,19 +1033,3 @@ _PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx,
                                                                                         allow_badcharcnv);
 }
 
-_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
-                                       size_t *size)
-{
-       return next_codepoint_convenience_ext(get_iconv_convenience(), str,
-                                             src_charset, size);
-}
-
-_PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)
-{
-       return next_codepoint_convenience(get_iconv_convenience(), str, size);
-}
-
-_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
-{
-       return push_codepoint_convenience(get_iconv_convenience(), str, c);
-}
index 18479a9978c7342d1e1038cdf6ab61ddad270f33..7dcd18903637d8ed4ccf5054590ef3792a5229b1 100644 (file)
@@ -4,7 +4,7 @@
 if bld.env._SAMBA_BUILD_ == 4:
     bld.SAMBA_SUBSYSTEM('CHARSET',
                         source='charcnv.c util_unistr.c',
-                        public_deps='ICONV_WRAPPER CODEPOINTS',
+                        public_deps='CODEPOINTS',
                         public_headers='charset.h',
                         )
 
@@ -14,5 +14,5 @@ bld.SAMBA_SUBSYSTEM('ICONV_WRAPPER',
 
 bld.SAMBA_SUBSYSTEM('CODEPOINTS',
        source='codepoints.c',
-       deps='DYNCONFIG'
+       deps='DYNCONFIG ICONV_WRAPPER'
        )
index 4c98f8f33938b1838e01537f4fbcaa9a508f8071..27235995993c4869da118b4cc29e00efee352c9d 100644 (file)
@@ -45,68 +45,9 @@ char lp_failed_convert_char(void)
  */
 
 
-static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
 static bool initialized;
 
-/**
- * Return the name of a charset to give to iconv().
- **/
-static const char *charset_name(charset_t ch)
-{
-       const char *ret;
-
-       switch (ch) {
-       case CH_UTF16LE:
-               ret = "UTF-16LE";
-               break;
-       case CH_UTF16BE:
-               ret = "UTF-16BE";
-               break;
-       case CH_UNIX:
-               ret = lp_unix_charset();
-               break;
-       case CH_DOS:
-               ret = lp_dos_charset();
-               break;
-       case CH_DISPLAY:
-               ret = lp_display_charset();
-               break;
-       case CH_UTF8:
-               ret = "UTF8";
-               break;
-       default:
-               ret = NULL;
-       }
-
-#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
-       if (ret && !strcmp(ret, "LOCALE")) {
-               const char *ln = NULL;
-
-#ifdef HAVE_SETLOCALE
-               setlocale(LC_ALL, "");
-#endif
-               ln = nl_langinfo(CODESET);
-               if (ln) {
-                       /* Check whether the charset name is supported
-                          by iconv */
-                       smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
-                       if (handle == (smb_iconv_t) -1) {
-                               DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
-                               ln = NULL;
-                       } else {
-                               DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
-                               smb_iconv_close(handle);
-                       }
-               }
-               ret = ln;
-       }
-#endif
-
-       if (!ret || !*ret) ret = "ASCII";
-       return ret;
-}
-
 void lazy_initialize_conv(void)
 {
        if (!initialized) {
@@ -121,16 +62,7 @@ void lazy_initialize_conv(void)
  **/
 void gfree_charcnv(void)
 {
-       int c1, c2;
-
-       for (c1=0;c1<NUM_CHARSETS;c1++) {
-               for (c2=0;c2<NUM_CHARSETS;c2++) {
-                       if ( conv_handles[c1][c2] ) {
-                               smb_iconv_close( conv_handles[c1][c2] );
-                               conv_handles[c1][c2] = 0;
-                       }
-               }
-       }
+       TALLOC_FREE(global_iconv_convenience);
        initialized = false;
 }
 
@@ -143,51 +75,9 @@ void gfree_charcnv(void)
  **/
 void init_iconv(void)
 {
-       int c1, c2;
-       bool did_reload = False;
-
-       /* so that charset_name() works we need to get the UNIX<->UCS2 going
-          first */
-       if (!conv_handles[CH_UNIX][CH_UTF16LE])
-               conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
-
-       if (!conv_handles[CH_UTF16LE][CH_UNIX])
-               conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
-
-       for (c1=0;c1<NUM_CHARSETS;c1++) {
-               for (c2=0;c2<NUM_CHARSETS;c2++) {
-                       const char *n1 = charset_name((charset_t)c1);
-                       const char *n2 = charset_name((charset_t)c2);
-                       if (conv_handles[c1][c2] &&
-                           strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
-                           strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
-                               continue;
-
-                       did_reload = True;
-
-                       if (conv_handles[c1][c2])
-                               smb_iconv_close(conv_handles[c1][c2]);
-
-                       conv_handles[c1][c2] = smb_iconv_open(n2,n1);
-                       if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
-                               DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
-                                        charset_name((charset_t)c1), charset_name((charset_t)c2)));
-                               if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
-                                       n1 = "ASCII";
-                               }
-                               if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
-                                       n2 = "ASCII";
-                               }
-                               DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
-                                       n1, n2 ));
-                               conv_handles[c1][c2] = smb_iconv_open(n2,n1);
-                               if (!conv_handles[c1][c2]) {
-                                       DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
-                                       smb_panic("init_iconv: conv_handle initialization failed");
-                               }
-                       }
-               }
-       }
+       global_iconv_convenience = smb_iconv_convenience_reinit(NULL, lp_dos_charset(),
+                                                               lp_unix_charset(), lp_display_charset(),
+                                                               true, global_iconv_convenience);
 }
 
 /**
@@ -214,10 +104,11 @@ static size_t convert_string_internal(charset_t from, charset_t to,
        const char* inbuf = (const char*)src;
        char* outbuf = (char*)dest;
        smb_iconv_t descriptor;
+       struct smb_iconv_convenience *ic;
 
        lazy_initialize_conv();
-
-       descriptor = conv_handles[from][to];
+       ic = get_iconv_convenience();
+       descriptor = get_conv_handle(ic, from, to);
 
        if (srclen == (size_t)-1) {
                if (from == CH_UTF16LE || from == CH_UTF16BE) {
@@ -255,11 +146,11 @@ static size_t convert_string_internal(charset_t from, charset_t to,
                                if (!conv_silent) {
                                        if (from == CH_UNIX) {
                                                DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
-                                                       charset_name(from), charset_name(to),
+                                                        charset_name(ic, from), charset_name(ic, to),
                                                        (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
                                        } else {
                                                DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
-                                                       charset_name(from), charset_name(to),
+                                                        charset_name(ic, from), charset_name(ic, to),
                                                        (unsigned int)srclen, (unsigned int)destlen));
                                        }
                                }
@@ -552,6 +443,7 @@ bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
        char *outbuf = NULL, *ob = NULL;
        smb_iconv_t descriptor;
        void **dest = (void **)dst;
+       struct smb_iconv_convenience *ic;
 
        *dest = NULL;
 
@@ -576,8 +468,8 @@ bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
        }
 
        lazy_initialize_conv();
-
-       descriptor = conv_handles[from][to];
+       ic = get_iconv_convenience();
+       descriptor = get_conv_handle(ic, from, to);
 
        if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
                if (!conv_silent)
@@ -1784,173 +1676,3 @@ size_t align_string(const void *base_ptr, const char *p, int flags)
        return 0;
 }
 
-/**
- * Return the unicode codepoint for the next character in the input
- * string in the given src_charset.
- * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
- *
- * Also return the number of bytes consumed (which tells the caller
- * how many bytes to skip to get to the next src_charset-character).
- *
- * This is implemented (in the non-ascii-case) by first converting the
- * next character in the input string to UTF16_LE and then calculating
- * the unicode codepoint from that.
- *
- * Return INVALID_CODEPOINT if the next character cannot be converted.
- */
-
-codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
-                              size_t *bytes_consumed)
-{
-       /* It cannot occupy more than 4 bytes in UTF16 format */
-       uint8_t buf[4];
-       smb_iconv_t descriptor;
-       size_t ilen_orig;
-       size_t ilen;
-       size_t olen;
-       char *outbuf;
-
-       /* fastpath if the character is ASCII */
-       if ((str[0] & 0x80) == 0) {
-               *bytes_consumed = 1;
-               return (codepoint_t)str[0];
-       }
-
-       /*
-        * We assume that no multi-byte character can take more than
-        * 5 bytes. This is OK as we only support codepoints up to 1M (U+100000)
-        */
-
-       ilen_orig = strnlen(str, 5);
-       ilen = ilen_orig;
-
-       lazy_initialize_conv();
-
-       descriptor = conv_handles[src_charset][CH_UTF16LE];
-       if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
-               *bytes_consumed = 1;
-               return INVALID_CODEPOINT;
-       }
-
-       /*
-        * This looks a little strange, but it is needed to cope
-        * with codepoints above 64k (U+10000) which are encoded as per RFC2781.
-        */
-       olen = 2;
-       outbuf = (char *)buf;
-       smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
-       if (olen == 2) {
-               /*
-                * We failed to convert to a 2 byte character.
-                * See if we can convert to a 4 UTF16-LE byte char encoding.
-                */
-               olen = 4;
-               outbuf = (char *)buf;
-               smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
-               if (olen == 4) {
-                       /* We didn't convert any bytes */
-                       *bytes_consumed = 1;
-                       return INVALID_CODEPOINT;
-               }
-               olen = 4 - olen;
-       } else {
-               olen = 2 - olen;
-       }
-
-       *bytes_consumed = ilen_orig - ilen;
-
-       if (olen == 2) {
-               /* 2 byte, UTF16-LE encoded value. */
-               return (codepoint_t)SVAL(buf, 0);
-       }
-       if (olen == 4) {
-               /*
-                * Decode a 4 byte UTF16-LE character manually.
-                * See RFC2871 for the encoding machanism.
-                */
-               codepoint_t w1 = SVAL(buf,0) & ~0xD800;
-               codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
-
-               return (codepoint_t)0x10000 +
-                               (w1 << 10) + w2;
-       }
-
-       /* no other length is valid */
-       return INVALID_CODEPOINT;
-}
-
-/*
-  Return the unicode codepoint for the next multi-byte CH_UNIX character
-  in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
-
-  Also return the number of bytes consumed (which tells the caller
-  how many bytes to skip to get to the next CH_UNIX character).
-
-  Return INVALID_CODEPOINT if the next character cannot be converted.
-*/
-
-codepoint_t next_codepoint(const char *str, size_t *size)
-{
-       return next_codepoint_ext(str, CH_UNIX, size);
-}
-
-/*
-  push a single codepoint into a CH_UNIX string the target string must
-  be able to hold the full character, which is guaranteed if it is at
-  least 5 bytes in size. The caller may pass less than 5 bytes if they
-  are sure the character will fit (for example, you can assume that
-  uppercase/lowercase of a character will not add more than 1 byte)
-
-  return the number of bytes occupied by the CH_UNIX character, or
-  -1 on failure
-*/
-_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
-{
-       smb_iconv_t descriptor;
-       uint8_t buf[4];
-       size_t ilen, olen;
-       const char *inbuf;
-       
-       if (c < 128) {
-               *str = c;
-               return 1;
-       }
-
-       lazy_initialize_conv();
-
-       descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
-       if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
-               return -1;
-       }
-
-       if (c < 0x10000) {
-               ilen = 2;
-               olen = 5;
-               inbuf = (char *)buf;
-               SSVAL(buf, 0, c);
-               smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
-               if (ilen != 0) {
-                       return -1;
-               }
-               return 5 - olen;
-       }
-
-       c -= 0x10000;
-
-       buf[0] = (c>>10) & 0xFF;
-       buf[1] = (c>>18) | 0xd8;
-       buf[2] = c & 0xFF;
-       buf[3] = ((c>>8) & 0x3) | 0xdc;
-
-       ilen = 4;
-       olen = 5;
-       inbuf = (char *)buf;
-
-       smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
-       if (ilen != 0) {
-               return -1;
-       }
-       return 5 - olen;
-}
-
-
index 3d87d6fb1296b7e0df9bdac186569405d88430ad..31157b2833349b24ad6431af967fb47d3f51c9e1 100644 (file)
@@ -2776,11 +2776,7 @@ int lpcfg_maxprintjobs(struct loadparm_service *service, struct loadparm_service
 struct smb_iconv_convenience *lpcfg_iconv_convenience(struct loadparm_context *lp_ctx)
 {
        if (lp_ctx == NULL) {
-               static struct smb_iconv_convenience *fallback_ic = NULL;
-               if (fallback_ic == NULL)
-                       fallback_ic = smb_iconv_convenience_reinit(talloc_autofree_context(),
-                                                                  "CP850", "UTF8", true, NULL);
-               return fallback_ic;
+               return get_iconv_convenience();
        }
        return lp_ctx->iconv_convenience;
 }
index fd12bb1ecafa3c853ca315dcad1015ccdb011373..c6dca6076e4c5eb1eaa91cb9ffcb917304cb7272 100644 (file)
@@ -304,6 +304,7 @@ struct smb_iconv_convenience *smb_iconv_convenience_reinit_lp(TALLOC_CTX *mem_ct
 {
        return smb_iconv_convenience_reinit(mem_ctx, lpcfg_dos_charset(lp_ctx),
                                            lpcfg_unix_charset(lp_ctx),
+                                           lpcfg_display_charset(lp_ctx),
                                            lpcfg_parm_bool(lp_ctx, NULL, "iconv", "native", true),
                                            old_ic);
 }