s3:lib/util_str: add strlen_m_ext() that takes the dest charset as a parameter.

author Michael Adam <obnox@samba.org>

Mon, 1 Nov 2010 15:28:43 +0000 (16:28 +0100)

committer Karolin Seeger <kseeger@samba.org>

Sat, 5 Mar 2011 13:34:31 +0000 (14:34 +0100)
author Michael Adam <obnox@samba.org>
Mon, 1 Nov 2010 15:28:43 +0000 (16:28 +0100)
committer Karolin Seeger <kseeger@samba.org>
Sat, 5 Mar 2011 13:34:31 +0000 (14:34 +0100)
diff --git a/source3/include/proto.h b/source3/include/proto.h

index 5064fdb8bfef0e9cfeb7260076a3d6220bb4c733..348b8b2fe1d7eb7eaf74b0fea5329a148a0ac2de 100644 (file)
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -1539,6 +1539,7 @@ char *strnrchr_m(const char *s, char c, unsigned int n);
  char *strstr_m(const char *src, const char *findstr);
  void strlower_m(char *s);
  void strupper_m(char *s);
+size_t strlen_m_ext(const char *s, const charset_t dst_charset);
  size_t strlen_m(const char *s);
  size_t strlen_m_term(const char *s);
  size_t strlen_m_term_null(const char *s);
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c

index 9a0b12adea0fc39de887bb20fb0e481424c921be..f0eb6e557156a2802426fbd7a92d0c4a3b69b966 100644 (file)
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -1454,12 +1454,12 @@ void strupper_m(char *s)
  }
  
  /**
- Count the number of UCS2 characters in a string. Normally this will
- be the same as the number of bytes in a string for single byte strings,
- but will be different for multibyte.
-**/
-
-size_t strlen_m(const char *s)
+ * Calculate the number of units (8 or 16-bit, depending on the
+ * destination charset), that would be needed to convert the input
+ * string which is expected to be in in CH_UNIX encoding to the
+ * destination charset (which should be a unicode charset).
+ */
+size_t strlen_m_ext(const char *s, const charset_t dst_charset)
  {
         size_t count = 0;
  
@@ -1479,19 +1479,59 @@ size_t strlen_m(const char *s)
         while (*s) {
                 size_t c_size;
                 codepoint_t c = next_codepoint(s, &c_size);
-               if (c < 0x10000) {
-                       /* Unicode char fits into 16 bits. */
+               s += c_size;
+
+               switch(dst_charset) {
+               case CH_UTF16LE:
+               case CH_UTF16BE:
+               case CH_UTF16MUNGED:
+                       if (c < 0x10000) {
+                               /* Unicode char fits into 16 bits. */
+                               count += 1;
+                       } else {
+                               /* Double-width unicode char - 32 bits. */
+                               count += 2;
+                       }
+                       break;
+               case CH_UTF8:
+                       /*
+                        * this only checks ranges, and does not
+                        * check for invalid codepoints
+                        */
+                       if (c < 0x80) {
+                               count += 1;
+                       } else if (c < 0x800) {
+                               count += 2;
+                       } else if (c < 0x1000) {
+                               count += 3;
+                       } else {
+                               count += 4;
+                       }
+                       break;
+               default:
+                       /*
+                        * non-unicode encoding:
+                        * assume that each codepoint fits into
+                        * one unit in the destination encoding.
+                        */
                         count += 1;
-               } else {
-                       /* Double-width unicode char - 32 bits. */
-                       count += 2;
                 }
-               s += c_size;
         }
  
         return count;
  }
  
+/**
+ Count the number of UCS2 characters in a string. Normally this will
+ be the same as the number of bytes in a string for single byte strings,
+ but will be different for multibyte.
+**/
+
+size_t strlen_m(const char *s)
+{
+       return strlen_m_ext(s, CH_UTF16LE);
+}
+
  /**
   Count the number of UCS2 characters in a string including the null
   terminator.
author	Michael Adam <obnox@samba.org>
	Mon, 1 Nov 2010 15:28:43 +0000 (16:28 +0100)
committer	Karolin Seeger <kseeger@samba.org>
	Sat, 5 Mar 2011 13:34:31 +0000 (14:34 +0100)
source3/include/proto.h		patch \| blob \| history
source3/lib/util_str.c		patch \| blob \| history