Revert "Fix bug #7781 (Samba transforms "ShareName" to lowercase when adding new...

[samba.git] / source3 / lib / util_str.c
diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c

index c197fd751598d955d1c5d96f18beddd660615d7c..d86963702e03ebc73365d327f08f5f090e098e16 100644 (file)
--- a/source3/lib/util_str.c
+++ b/source3/lib/util_str.c
@@ -586,7 +586,9 @@ char *safe_strcat_fn(const char *fn,
   Paranoid strcpy into a buffer of given length (includes terminating
   zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
   and replaces with '_'. Deliberately does *NOT* check for multibyte
- characters. Don't change it !
+ characters. Treats src as an array of bytes, not as a multibyte
+ string. Any byte >0x7f is automatically converted to '_'.
+ other_safe_chars must also contain an ascii string (bytes<0x7f).
  **/
  
  char *alpha_strcpy_fn(const char *fn,
@@ -622,8 +624,12 @@ char *alpha_strcpy_fn(const char *fn,
  
         for(i = 0; i < len; i++) {
                 int val = (src[i] & 0xff);
-               if (isupper_ascii(val) || islower_ascii(val) ||
-                               isdigit(val) || strchr_m(other_safe_chars, val))
+               if (val > 0x7f) {
+                       dest[i] = '_';
+                       continue;
+               }
+               if (isupper(val) || islower(val) ||
+                               isdigit(val) || strchr(other_safe_chars, val))
                         dest[i] = src[i];
                 else
                         dest[i] = '_';
@@ -1454,12 +1460,12 @@ void strupper_m(char *s)
  }
  
  /**
- Count the number of UCS2 characters in a string. Normally this will
- be the same as the number of bytes in a string for single byte strings,
- but will be different for multibyte.
-**/
-
-size_t strlen_m(const char *s)
+ * Calculate the number of units (8 or 16-bit, depending on the
+ * destination charset), that would be needed to convert the input
+ * string which is expected to be in in CH_UNIX encoding to the
+ * destination charset (which should be a unicode charset).
+ */
+size_t strlen_m_ext(const char *s, const charset_t dst_charset)
  {
         size_t count = 0;
  
@@ -1479,19 +1485,67 @@ size_t strlen_m(const char *s)
         while (*s) {
                 size_t c_size;
                 codepoint_t c = next_codepoint(s, &c_size);
-               if (c < 0x10000) {
-                       /* Unicode char fits into 16 bits. */
+               s += c_size;
+
+               switch(dst_charset) {
+               case CH_UTF16LE:
+               case CH_UTF16BE:
+               case CH_UTF16MUNGED:
+                       if (c < 0x10000) {
+                               /* Unicode char fits into 16 bits. */
+                               count += 1;
+                       } else {
+                               /* Double-width unicode char - 32 bits. */
+                               count += 2;
+                       }
+                       break;
+               case CH_UTF8:
+                       /*
+                        * this only checks ranges, and does not
+                        * check for invalid codepoints
+                        */
+                       if (c < 0x80) {
+                               count += 1;
+                       } else if (c < 0x800) {
+                               count += 2;
+                       } else if (c < 0x1000) {
+                               count += 3;
+                       } else {
+                               count += 4;
+                       }
+                       break;
+               default:
+                       /*
+                        * non-unicode encoding:
+                        * assume that each codepoint fits into
+                        * one unit in the destination encoding.
+                        */
                         count += 1;
-               } else {
-                       /* Double-width unicode char - 32 bits. */
-                       count += 2;
                 }
-               s += c_size;
         }
  
         return count;
  }
  
+size_t strlen_m_ext_term(const char *s, const charset_t dst_charset)
+{
+       if (!s) {
+               return 0;
+       }
+       return strlen_m_ext(s, dst_charset) + 1;
+}
+
+/**
+ Count the number of UCS2 characters in a string. Normally this will
+ be the same as the number of bytes in a string for single byte strings,
+ but will be different for multibyte.
+**/
+
+size_t strlen_m(const char *s)
+{
+       return strlen_m_ext(s, CH_UTF16LE);
+}
+
  /**
   Count the number of UCS2 characters in a string including the null
   terminator.
@@ -1616,7 +1670,7 @@ bool str_list_sub_basic( char **list, const char *smb_name,
  }
  
  /******************************************************************************
- substritute a specific pattern in a string list
+ substitute a specific pattern in a string list
   *****************************************************************************/
  
  bool str_list_substitute(char **list, const char *pattern, const char *insert)
@@ -2430,13 +2484,13 @@ char *escape_shell_string(const char *src)
  
  #define S_LIST_ABS 16 /* List Allocation Block Size */
  
-char **str_list_make_v3(TALLOC_CTX *mem_ctx, const char *string, const char *sep)
+char **str_list_make_v3(TALLOC_CTX *mem_ctx, const char *string,
+       const char *sep)
  {
         char **list;
         const char *str;
-       char *s;
+       char *s, *tok;
         int num, lsize;
-       char *tok;
  
         if (!string || !*string)
                 return NULL;