source4/lib/util_str.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Samba utility functions
   4
   5    Copyright (C) Andrew Tridgell 1992-2001
   6    Copyright (C) Simo Sorce      2001-2002
   7    Copyright (C) Martin Pool     2003
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22 */
  23
  24 #include "includes.h"
  25 #include "system/iconv.h"
  26 #include "pstring.h"
  27
  28 /**
  29  * @file
  30  * @brief String utilities.
  31  **/
  32
  33 /**
  34  * Get the next token from a string, return False if none found.
  35  * Handles double-quotes.
  36  *
  37  * Based on a routine by GJC@VILLAGE.COM.
  38  * Extensively modified by Andrew.Tridgell@anu.edu.au
  39  **/
  40 BOOL next_token(const char **ptr,char *buff, const char *sep, size_t bufsize)
  41 {
  42         const char *s;
  43         BOOL quoted;
  44         size_t len=1;
  45
  46         if (!ptr)
  47                 return(False);
  48
  49         s = *ptr;
  50
  51         /* default to simple separators */
  52         if (!sep)
  53                 sep = " \t\n\r";
  54
  55         /* find the first non sep char */
  56         while (*s && strchr_m(sep,*s))
  57                 s++;
  58
  59         /* nothing left? */
  60         if (! *s)
  61                 return(False);
  62
  63         /* copy over the token */
  64         for (quoted = False; len < bufsize && *s && (quoted || !strchr_m(sep,*s)); s++) {
  65                 if (*s == '\"') {
  66                         quoted = !quoted;
  67                 } else {
  68                         len++;
  69                         *buff++ = *s;
  70                 }
  71         }
  72
  73         *ptr = (*s) ? s+1 : s;
  74         *buff = 0;
  75
  76         return(True);
  77 }
  78
  79 /**
  80  Case insensitive string compararison
  81 **/
  82 int StrCaseCmp(const char *s1, const char *s2)
  83 {
  84         codepoint_t c1=0, c2=0;
  85         size_t size1, size2;
  86
  87         while (*s1 && *s2) {
  88                 c1 = next_codepoint(s1, &size1);
  89                 c2 = next_codepoint(s2, &size2);
  90
  91                 s1 += size1;
  92                 s2 += size2;
  93
  94                 if (c1 == c2) {
  95                         continue;
  96                 }
  97
  98                 if (c1 == INVALID_CODEPOINT ||
  99                     c2 == INVALID_CODEPOINT) {
 100                         /* what else can we do?? */
 101                         return c1 - c2;
 102                 }
 103
 104                 if (toupper_w(c1) != toupper_w(c2)) {
 105                         return c1 - c2;
 106                 }
 107         }
 108
 109         return *s1 - *s2;
 110 }
 111
 112 /**
 113  * Compare 2 strings.
 114  *
 115  * @note The comparison is case-insensitive.
 116  **/
 117 BOOL strequal(const char *s1, const char *s2)
 118 {
 119         if (s1 == s2)
 120                 return(True);
 121         if (!s1 || !s2)
 122                 return(False);
 123
 124         return StrCaseCmp(s1,s2) == 0;
 125 }
 126
 127 /**
 128  Compare 2 strings (case sensitive).
 129 **/
 130 BOOL strcsequal(const char *s1,const char *s2)
 131 {
 132         if (s1 == s2)
 133                 return(True);
 134         if (!s1 || !s2)
 135                 return(False);
 136
 137         return strcmp(s1,s2) == 0;
 138 }
 139
 140
 141 /**
 142 Do a case-insensitive, whitespace-ignoring string compare.
 143 **/
 144 int strwicmp(const char *psz1, const char *psz2)
 145 {
 146         /* if BOTH strings are NULL, return TRUE, if ONE is NULL return */
 147         /* appropriate value. */
 148         if (psz1 == psz2)
 149                 return (0);
 150         else if (psz1 == NULL)
 151                 return (-1);
 152         else if (psz2 == NULL)
 153                 return (1);
 154
 155         /* sync the strings on first non-whitespace */
 156         while (1) {
 157                 while (isspace((int)*psz1))
 158                         psz1++;
 159                 while (isspace((int)*psz2))
 160                         psz2++;
 161                 if (toupper(*psz1) != toupper(*psz2) || *psz1 == '\0'
 162                     || *psz2 == '\0')
 163                         break;
 164                 psz1++;
 165                 psz2++;
 166         }
 167         return (*psz1 - *psz2);
 168 }
 169
 170 /**
 171  String replace.
 172  NOTE: oldc and newc must be 7 bit characters
 173 **/
 174 void string_replace(char *s, char oldc, char newc)
 175 {
 176         while (*s) {
 177                 size_t size;
 178                 codepoint_t c = next_codepoint(s, &size);
 179                 if (c == oldc) {
 180                         *s = newc;
 181                 }
 182                 s += size;
 183         }
 184 }
 185
 186 /**
 187  Trim the specified elements off the front and back of a string.
 188 **/
 189 BOOL trim_string(char *s,const char *front,const char *back)
 190 {
 191         BOOL ret = False;
 192         size_t front_len;
 193         size_t back_len;
 194         size_t len;
 195
 196         /* Ignore null or empty strings. */
 197         if (!s || (s[0] == '\0'))
 198                 return False;
 199
 200         front_len       = front? strlen(front) : 0;
 201         back_len        = back? strlen(back) : 0;
 202
 203         len = strlen(s);
 204
 205         if (front_len) {
 206                 while (len && strncmp(s, front, front_len)==0) {
 207                         /* Must use memmove here as src & dest can
 208                          * easily overlap. Found by valgrind. JRA. */
 209                         memmove(s, s+front_len, (len-front_len)+1);
 210                         len -= front_len;
 211                         ret=True;
 212                 }
 213         }
 214
 215         if (back_len) {
 216                 while ((len >= back_len) && strncmp(s+len-back_len,back,back_len)==0) {
 217                         s[len-back_len]='\0';
 218                         len -= back_len;
 219                         ret=True;
 220                 }
 221         }
 222         return ret;
 223 }
 224
 225 /**
 226  Find the number of 'c' chars in a string
 227 **/
 228 size_t count_chars(const char *s, char c)
 229 {
 230         size_t count = 0;
 231
 232         while (*s) {
 233                 size_t size;
 234                 codepoint_t c2 = next_codepoint(s, &size);
 235                 if (c2 == c) count++;
 236                 s += size;
 237         }
 238
 239         return count;
 240 }
 241
 242 /**
 243  Safe string copy into a known length string. maxlength does not
 244  include the terminating zero.
 245 **/
 246 char *safe_strcpy(char *dest,const char *src, size_t maxlength)
 247 {
 248         size_t len;
 249
 250         if (!dest) {
 251                 DEBUG(0,("ERROR: NULL dest in safe_strcpy\n"));
 252                 return NULL;
 253         }
 254
 255 #ifdef DEVELOPER
 256         /* We intentionally write out at the extremity of the destination
 257          * string.  If the destination is too short (e.g. pstrcpy into mallocd
 258          * or fstring) then this should cause an error under a memory
 259          * checker. */
 260         dest[maxlength] = '\0';
 261         if (PTR_DIFF(&len, dest) > 0) {  /* check if destination is on the stack, ok if so */
 262                 log_suspicious_usage("safe_strcpy", src);
 263         }
 264 #endif
 265
 266         if (!src) {
 267                 *dest = 0;
 268                 return dest;
 269         }
 270
 271         len = strlen(src);
 272
 273         if (len > maxlength) {
 274                 DEBUG(0,("ERROR: string overflow by %u (%u - %u) in safe_strcpy [%.50s]\n",
 275                          (uint_t)(len-maxlength), len, maxlength, src));
 276                 len = maxlength;
 277         }
 278
 279         memmove(dest, src, len);
 280         dest[len] = 0;
 281         return dest;
 282 }
 283
 284 /**
 285  Safe string cat into a string. maxlength does not
 286  include the terminating zero.
 287 **/
 288 char *safe_strcat(char *dest, const char *src, size_t maxlength)
 289 {
 290         size_t src_len, dest_len;
 291
 292         if (!dest) {
 293                 DEBUG(0,("ERROR: NULL dest in safe_strcat\n"));
 294                 return NULL;
 295         }
 296
 297         if (!src)
 298                 return dest;
 299
 300 #ifdef DEVELOPER
 301         if (PTR_DIFF(&src_len, dest) > 0) {  /* check if destination is on the stack, ok if so */
 302                 log_suspicious_usage("safe_strcat", src);
 303         }
 304 #endif
 305         src_len = strlen(src);
 306         dest_len = strlen(dest);
 307
 308         if (src_len + dest_len > maxlength) {
 309                 DEBUG(0,("ERROR: string overflow by %d in safe_strcat [%.50s]\n",
 310                          (int)(src_len + dest_len - maxlength), src));
 311                 if (maxlength > dest_len) {
 312                         memcpy(&dest[dest_len], src, maxlength - dest_len);
 313                 }
 314                 dest[maxlength] = 0;
 315                 return NULL;
 316         }
 317
 318         memcpy(&dest[dest_len], src, src_len);
 319         dest[dest_len + src_len] = 0;
 320         return dest;
 321 }
 322
 323 /**
 324  Paranoid strcpy into a buffer of given length (includes terminating
 325  zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
 326  and replaces with '_'. Deliberately does *NOT* check for multibyte
 327  characters. Don't change it !
 328 **/
 329
 330 char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength)
 331 {
 332         size_t len, i;
 333
 334         if (maxlength == 0) {
 335                 /* can't fit any bytes at all! */
 336                 return NULL;
 337         }
 338
 339         if (!dest) {
 340                 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
 341                 return NULL;
 342         }
 343
 344         if (!src) {
 345                 *dest = 0;
 346                 return dest;
 347         }
 348
 349         len = strlen(src);
 350         if (len >= maxlength)
 351                 len = maxlength - 1;
 352
 353         if (!other_safe_chars)
 354                 other_safe_chars = "";
 355
 356         for(i = 0; i < len; i++) {
 357                 int val = (src[i] & 0xff);
 358                 if (isupper(val) || islower(val) || isdigit(val) || strchr_m(other_safe_chars, val))
 359                         dest[i] = src[i];
 360                 else
 361                         dest[i] = '_';
 362         }
 363
 364         dest[i] = '\0';
 365
 366         return dest;
 367 }
 368
 369 /**
 370  Like strncpy but always null terminates. Make sure there is room!
 371  The variable n should always be one less than the available size.
 372 **/
 373
 374 char *StrnCpy(char *dest,const char *src,size_t n)
 375 {
 376         char *d = dest;
 377         if (!dest)
 378                 return(NULL);
 379         if (!src) {
 380                 *dest = 0;
 381                 return(dest);
 382         }
 383         while (n-- && (*d++ = *src++))
 384                 ;
 385         *d = 0;
 386         return(dest);
 387 }
 388
 389
 390 /**
 391  Routine to get hex characters and turn them into a 16 byte array.
 392  the array can be variable length, and any non-hex-numeric
 393  characters are skipped.  "0xnn" or "0Xnn" is specially catered
 394  for.
 395
 396  valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
 397
 398 **/
 399 size_t strhex_to_str(char *p, size_t len, const char *strhex)
 400 {
 401         size_t i;
 402         size_t num_chars = 0;
 403         uint8_t   lonybble, hinybble;
 404         const char     *hexchars = "0123456789ABCDEF";
 405         char           *p1 = NULL, *p2 = NULL;
 406
 407         for (i = 0; i < len && strhex[i] != 0; i++) {
 408                 if (strncasecmp(hexchars, "0x", 2) == 0) {
 409                         i++; /* skip two chars */
 410                         continue;
 411                 }
 412
 413                 if (!(p1 = strchr_m(hexchars, toupper(strhex[i]))))
 414                         break;
 415
 416                 i++; /* next hex digit */
 417
 418                 if (!(p2 = strchr_m(hexchars, toupper(strhex[i]))))
 419                         break;
 420
 421                 /* get the two nybbles */
 422                 hinybble = PTR_DIFF(p1, hexchars);
 423                 lonybble = PTR_DIFF(p2, hexchars);
 424
 425                 p[num_chars] = (hinybble << 4) | lonybble;
 426                 num_chars++;
 427
 428                 p1 = NULL;
 429                 p2 = NULL;
 430         }
 431         return num_chars;
 432 }
 433
 434 DATA_BLOB strhex_to_data_blob(const char *strhex)
 435 {
 436         DATA_BLOB ret_blob = data_blob(NULL, strlen(strhex)/2+1);
 437
 438         ret_blob.length = strhex_to_str(ret_blob.data,
 439                                         strlen(strhex),
 440                                         strhex);
 441
 442         return ret_blob;
 443 }
 444
 445
 446 /**
 447  * Routine to print a buffer as HEX digits, into an allocated string.
 448  */
 449 void hex_encode(const unsigned char *buff_in, size_t len, char **out_hex_buffer)
 450 {
 451         int i;
 452         char *hex_buffer;
 453
 454         *out_hex_buffer = smb_xmalloc((len*2)+1);
 455         hex_buffer = *out_hex_buffer;
 456
 457         for (i = 0; i < len; i++)
 458                 slprintf(&hex_buffer[i*2], 3, "%02X", buff_in[i]);
 459 }
 460
 461 /**
 462  Check if a string is part of a list.
 463 **/
 464 BOOL in_list(const char *s, const char *list, BOOL casesensitive)
 465 {
 466         pstring tok;
 467         const char *p=list;
 468
 469         if (!list)
 470                 return(False);
 471
 472         while (next_token(&p,tok,LIST_SEP,sizeof(tok))) {
 473                 if (casesensitive) {
 474                         if (strcmp(tok,s) == 0)
 475                                 return(True);
 476                 } else {
 477                         if (StrCaseCmp(tok,s) == 0)
 478                                 return(True);
 479                 }
 480         }
 481         return(False);
 482 }
 483
 484 /**
 485  Set a string value, allocing the space for the string
 486 **/
 487 static BOOL string_init(char **dest,const char *src)
 488 {
 489         if (!src) src = "";
 490
 491         (*dest) = strdup(src);
 492         if ((*dest) == NULL) {
 493                 DEBUG(0,("Out of memory in string_init\n"));
 494                 return False;
 495         }
 496         return True;
 497 }
 498
 499 /**
 500  Free a string value.
 501 **/
 502 void string_free(char **s)
 503 {
 504         if (s) SAFE_FREE(*s);
 505 }
 506
 507 /**
 508  Set a string value, deallocating any existing space, and allocing the space
 509  for the string
 510 **/
 511 BOOL string_set(char **dest, const char *src)
 512 {
 513         string_free(dest);
 514         return string_init(dest,src);
 515 }
 516
 517 /**
 518  Substitute a string for a pattern in another string. Make sure there is
 519  enough room!
 520
 521  This routine looks for pattern in s and replaces it with
 522  insert. It may do multiple replacements.
 523
 524  Any of " ; ' $ or ` in the insert string are replaced with _
 525  if len==0 then the string cannot be extended. This is different from the old
 526  use of len==0 which was for no length checks to be done.
 527 **/
 528
 529 void string_sub(char *s,const char *pattern, const char *insert, size_t len)
 530 {
 531         char *p;
 532         ssize_t ls,lp,li, i;
 533
 534         if (!insert || !pattern || !*pattern || !s)
 535                 return;
 536
 537         ls = (ssize_t)strlen(s);
 538         lp = (ssize_t)strlen(pattern);
 539         li = (ssize_t)strlen(insert);
 540
 541         if (len == 0)
 542                 len = ls + 1; /* len is number of *bytes* */
 543
 544         while (lp <= ls && (p = strstr(s,pattern))) {
 545                 if (ls + (li-lp) >= len) {
 546                         DEBUG(0,("ERROR: string overflow by %d in string_sub(%.50s, %d)\n",
 547                                  (int)(ls + (li-lp) - len),
 548                                  pattern, (int)len));
 549                         break;
 550                 }
 551                 if (li != lp) {
 552                         memmove(p+li,p+lp,strlen(p+lp)+1);
 553                 }
 554                 for (i=0;i<li;i++) {
 555                         switch (insert[i]) {
 556                         case '`':
 557                         case '"':
 558                         case '\'':
 559                         case ';':
 560                         case '$':
 561                         case '%':
 562                         case '\r':
 563                         case '\n':
 564                                 p[i] = '_';
 565                                 break;
 566                         default:
 567                                 p[i] = insert[i];
 568                         }
 569                 }
 570                 s = p + li;
 571                 ls += (li-lp);
 572         }
 573 }
 574
 575
 576 /**
 577  Similar to string_sub() but allows for any character to be substituted.
 578  Use with caution!
 579  if len==0 then the string cannot be extended. This is different from the old
 580  use of len==0 which was for no length checks to be done.
 581 **/
 582
 583 void all_string_sub(char *s,const char *pattern,const char *insert, size_t len)
 584 {
 585         char *p;
 586         ssize_t ls,lp,li;
 587
 588         if (!insert || !pattern || !s)
 589                 return;
 590
 591         ls = (ssize_t)strlen(s);
 592         lp = (ssize_t)strlen(pattern);
 593         li = (ssize_t)strlen(insert);
 594
 595         if (!*pattern)
 596                 return;
 597
 598         if (len == 0)
 599                 len = ls + 1; /* len is number of *bytes* */
 600
 601         while (lp <= ls && (p = strstr(s,pattern))) {
 602                 if (ls + (li-lp) >= len) {
 603                         DEBUG(0,("ERROR: string overflow by %d in all_string_sub(%.50s, %d)\n",
 604                                  (int)(ls + (li-lp) - len),
 605                                  pattern, (int)len));
 606                         break;
 607                 }
 608                 if (li != lp) {
 609                         memmove(p+li,p+lp,strlen(p+lp)+1);
 610                 }
 611                 memcpy(p, insert, li);
 612                 s = p + li;
 613                 ls += (li-lp);
 614         }
 615 }
 616
 617
 618 /**
 619  Strchr and strrchr_m are a bit complex on general multi-byte strings.
 620 **/
 621 char *strchr_m(const char *s, char c)
 622 {
 623         /* characters below 0x3F are guaranteed to not appear in
 624            non-initial position in multi-byte charsets */
 625         if ((c & 0xC0) == 0) {
 626                 return strchr(s, c);
 627         }
 628
 629         while (*s) {
 630                 size_t size;
 631                 codepoint_t c2 = next_codepoint(s, &size);
 632                 if (c2 == c) {
 633                         return discard_const(s);
 634                 }
 635                 s += size;
 636         }
 637
 638         return NULL;
 639 }
 640
 641 char *strrchr_m(const char *s, char c)
 642 {
 643         char *ret = NULL;
 644
 645         /* characters below 0x3F are guaranteed to not appear in
 646            non-initial position in multi-byte charsets */
 647         if ((c & 0xC0) == 0) {
 648                 return strrchr(s, c);
 649         }
 650
 651         while (*s) {
 652                 size_t size;
 653                 codepoint_t c2 = next_codepoint(s, &size);
 654                 if (c2 == c) {
 655                         ret = discard_const(s);
 656                 }
 657                 s += size;
 658         }
 659
 660         return ret;
 661 }
 662
 663 /**
 664  Convert a string to lower case, allocated with talloc
 665 **/
 666 char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
 667 {
 668         size_t size=0;
 669         char *dest;
 670
 671         /* this takes advantage of the fact that upper/lower can't
 672            change the length of a character by more than 1 byte */
 673         dest = talloc_size(ctx, 2*(strlen(src))+1);
 674         if (dest == NULL) {
 675                 return NULL;
 676         }
 677
 678         while (*src) {
 679                 size_t c_size;
 680                 codepoint_t c = next_codepoint(src, &c_size);
 681                 src += c_size;
 682
 683                 c = tolower_w(c);
 684
 685                 c_size = push_codepoint(dest+size, c);
 686                 if (c_size == -1) {
 687                         talloc_free(dest);
 688                         return NULL;
 689                 }
 690                 size += c_size;
 691         }
 692
 693         dest[size] = 0;
 694
 695         return dest;
 696 }
 697
 698 /**
 699  Convert a string to UPPER case, allocated with talloc
 700 **/
 701 char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
 702 {
 703         size_t size=0;
 704         char *dest;
 705
 706         /* this takes advantage of the fact that upper/lower can't
 707            change the length of a character by more than 1 byte */
 708         dest = talloc_size(ctx, 2*(strlen(src))+1);
 709         if (dest == NULL) {
 710                 return NULL;
 711         }
 712
 713         while (*src) {
 714                 size_t c_size;
 715                 codepoint_t c = next_codepoint(src, &c_size);
 716                 src += c_size;
 717
 718                 c = toupper_w(c);
 719
 720                 c_size = push_codepoint(dest+size, c);
 721                 if (c_size == -1) {
 722                         talloc_free(dest);
 723                         return NULL;
 724                 }
 725                 size += c_size;
 726         }
 727
 728         dest[size] = 0;
 729
 730         return dest;
 731 }
 732
 733 /**
 734  Convert a string to lower case.
 735 **/
 736 void strlower_m(char *s)
 737 {
 738         char *d;
 739
 740         /* this is quite a common operation, so we want it to be
 741            fast. We optimise for the ascii case, knowing that all our
 742            supported multi-byte character sets are ascii-compatible
 743            (ie. they match for the first 128 chars) */
 744         while (*s && !(((uint8_t)s[0]) & 0x7F)) {
 745                 *s = tolower((uint8_t)*s);
 746                 s++;
 747         }
 748
 749         if (!*s)
 750                 return;
 751
 752         d = s;
 753
 754         while (*s) {
 755                 size_t c_size, c_size2;
 756                 codepoint_t c = next_codepoint(s, &c_size);
 757                 c_size2 = push_codepoint(d, tolower_w(c));
 758                 if (c_size2 > c_size) {
 759                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
 760                                  c, tolower_w(c), c_size, c_size2));
 761                         smb_panic("codepoint expansion in strlower_m\n");
 762                 }
 763                 s += c_size;
 764                 d += c_size2;
 765         }
 766         *d = 0;
 767 }
 768
 769 /**
 770  Convert a string to UPPER case.
 771 **/
 772 void strupper_m(char *s)
 773 {
 774         char *d;
 775
 776         /* this is quite a common operation, so we want it to be
 777            fast. We optimise for the ascii case, knowing that all our
 778            supported multi-byte character sets are ascii-compatible
 779            (ie. they match for the first 128 chars) */
 780         while (*s && !(((uint8_t)s[0]) & 0x7F)) {
 781                 *s = toupper((uint8_t)*s);
 782                 s++;
 783         }
 784
 785         if (!*s)
 786                 return;
 787
 788         d = s;
 789
 790         while (*s) {
 791                 size_t c_size, c_size2;
 792                 codepoint_t c = next_codepoint(s, &c_size);
 793                 c_size2 = push_codepoint(d, toupper_w(c));
 794                 if (c_size2 > c_size) {
 795                         DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
 796                                  c, toupper_w(c), c_size, c_size2));
 797                         smb_panic("codepoint expansion in strupper_m\n");
 798                 }
 799                 s += c_size;
 800                 d += c_size2;
 801         }
 802         *d = 0;
 803 }
 804
 805 /**
 806  Count the number of UCS2 characters in a string. Normally this will
 807  be the same as the number of bytes in a string for single byte strings,
 808  but will be different for multibyte.
 809 **/
 810 size_t strlen_m(const char *s)
 811 {
 812         size_t count = 0;
 813
 814         if (!s) {
 815                 return 0;
 816         }
 817
 818         while (*s && !(((uint8_t)s[0]) & 0x7F)) {
 819                 s++;
 820                 count++;
 821         }
 822
 823         if (!*s) {
 824                 return count;
 825         }
 826
 827         while (*s) {
 828                 size_t c_size;
 829                 codepoint_t c = next_codepoint(s, &c_size);
 830                 if (c < 0x10000) {
 831                         count += 1;
 832                 } else {
 833                         count += 2;
 834                 }
 835                 s += c_size;
 836         }
 837
 838         return count;
 839 }
 840
 841 /**
 842    Work out the number of multibyte chars in a string, including the NULL
 843    terminator.
 844 **/
 845 size_t strlen_m_term(const char *s)
 846 {
 847         if (!s) {
 848                 return 0;
 849         }
 850
 851         return strlen_m(s) + 1;
 852 }
 853
 854 /**
 855  Return a RFC2254 binary string representation of a buffer.
 856  Used in LDAP filters.
 857  Caller must free.
 858 **/
 859 char *binary_string(char *buf, int len)
 860 {
 861         char *s;
 862         int i, j;
 863         const char *hex = "0123456789ABCDEF";
 864         s = malloc(len * 3 + 1);
 865         if (!s)
 866                 return NULL;
 867         for (j=i=0;i<len;i++) {
 868                 s[j] = '\\';
 869                 s[j+1] = hex[((uint8_t)buf[i]) >> 4];
 870                 s[j+2] = hex[((uint8_t)buf[i]) & 0xF];
 871                 j += 3;
 872         }
 873         s[j] = 0;
 874         return s;
 875 }
 876
 877 /**
 878  Unescape a URL encoded string, in place.
 879 **/
 880
 881 void rfc1738_unescape(char *buf)
 882 {
 883         char *p=buf;
 884
 885         while ((p=strchr_m(p,'+')))
 886                 *p = ' ';
 887
 888         p = buf;
 889
 890         while (p && *p && (p=strchr_m(p,'%'))) {
 891                 int c1 = p[1];
 892                 int c2 = p[2];
 893
 894                 if (c1 >= '0' && c1 <= '9')
 895                         c1 = c1 - '0';
 896                 else if (c1 >= 'A' && c1 <= 'F')
 897                         c1 = 10 + c1 - 'A';
 898                 else if (c1 >= 'a' && c1 <= 'f')
 899                         c1 = 10 + c1 - 'a';
 900                 else {p++; continue;}
 901
 902                 if (c2 >= '0' && c2 <= '9')
 903                         c2 = c2 - '0';
 904                 else if (c2 >= 'A' && c2 <= 'F')
 905                         c2 = 10 + c2 - 'A';
 906                 else if (c2 >= 'a' && c2 <= 'f')
 907                         c2 = 10 + c2 - 'a';
 908                 else {p++; continue;}
 909
 910                 *p = (c1<<4) | c2;
 911
 912                 memmove(p+1, p+3, strlen(p+3)+1);
 913                 p++;
 914         }
 915 }
 916
 917 /**
 918  * Decode a base64 string into a DATA_BLOB - simple and slow algorithm
 919  **/
 920 DATA_BLOB base64_decode_data_blob(const char *s)
 921 {
 922         const char *b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 923
 924         int bit_offset, byte_offset, idx, i, n;
 925         DATA_BLOB decoded = data_blob(s, strlen(s)+1);
 926         uint8_t *d = decoded.data;
 927         char *p;
 928
 929         n=i=0;
 930
 931         while (*s && (p=strchr_m(b64,*s))) {
 932                 idx = (int)(p - b64);
 933                 byte_offset = (i*6)/8;
 934                 bit_offset = (i*6)%8;
 935                 d[byte_offset] &= ~((1<<(8-bit_offset))-1);
 936                 if (bit_offset < 3) {
 937                         d[byte_offset] |= (idx << (2-bit_offset));
 938                         n = byte_offset+1;
 939                 } else {
 940                         d[byte_offset] |= (idx >> (bit_offset-2));
 941                         d[byte_offset+1] = 0;
 942                         d[byte_offset+1] |= (idx << (8-(bit_offset-2))) & 0xFF;
 943                         n = byte_offset+2;
 944                 }
 945                 s++; i++;
 946         }
 947
 948         /* fix up length */
 949         decoded.length = n;
 950         return decoded;
 951 }
 952
 953 /**
 954  * Decode a base64 string in-place - wrapper for the above
 955  **/
 956 void base64_decode_inplace(char *s)
 957 {
 958         DATA_BLOB decoded = base64_decode_data_blob(s);
 959         memcpy(s, decoded.data, decoded.length);
 960         data_blob_free(&decoded);
 961
 962         /* null terminate */
 963         s[decoded.length] = '\0';
 964 }
 965
 966 /**
 967  * Encode a base64 string into a malloc()ed string caller to free.
 968  *
 969  *From SQUID: adopted from http://ftp.sunet.se/pub2/gnu/vm/base64-encode.c with adjustments
 970  **/
 971 char * base64_encode_data_blob(DATA_BLOB data)
 972 {
 973         const char *b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 974         int bits = 0;
 975         int char_count = 0;
 976         size_t out_cnt = 0;
 977         size_t len = data.length;
 978         size_t output_len = data.length * 2;
 979         char *result = malloc(output_len); /* get us plenty of space */
 980
 981         while (len-- && out_cnt < (data.length * 2) - 5) {
 982                 int c = (uint8_t) *(data.data++);
 983                 bits += c;
 984                 char_count++;
 985                 if (char_count == 3) {
 986                         result[out_cnt++] = b64[bits >> 18];
 987                         result[out_cnt++] = b64[(bits >> 12) & 0x3f];
 988                         result[out_cnt++] = b64[(bits >> 6) & 0x3f];
 989             result[out_cnt++] = b64[bits & 0x3f];
 990             bits = 0;
 991             char_count = 0;
 992         } else {
 993             bits <<= 8;
 994         }
 995     }
 996     if (char_count != 0) {
 997         bits <<= 16 - (8 * char_count);
 998         result[out_cnt++] = b64[bits >> 18];
 999         result[out_cnt++] = b64[(bits >> 12) & 0x3f];
1000         if (char_count == 1) {
1001             result[out_cnt++] = '=';
1002             result[out_cnt++] = '=';
1003         } else {
1004             result[out_cnt++] = b64[(bits >> 6) & 0x3f];
1005             result[out_cnt++] = '=';
1006         }
1007     }
1008     result[out_cnt] = '\0';     /* terminate */
1009     return result;
1010 }
1011
1012 #ifdef VALGRIND
1013 size_t valgrind_strlen(const char *s)
1014 {
1015         size_t count;
1016         for(count = 0; *s++; count++)
1017                 ;
1018         return count;
1019 }
1020 #endif
1021
1022
1023 /*
1024   format a string into length-prefixed dotted domain format, as used in NBT
1025   and in some ADS structures
1026 */
1027 const char *str_format_nbt_domain(TALLOC_CTX *mem_ctx, const char *s)
1028 {
1029         char *ret;
1030         int i;
1031         if (!s || !*s) {
1032                 return talloc_strdup(mem_ctx, "");
1033         }
1034         ret = talloc_size(mem_ctx, strlen(s)+2);
1035         if (!ret) {
1036                 return ret;
1037         }
1038
1039         memcpy(ret+1, s, strlen(s)+1);
1040         ret[0] = '.';
1041
1042         for (i=0;ret[i];i++) {
1043                 if (ret[i] == '.') {
1044                         char *p = strchr(ret+i+1, '.');
1045                         if (p) {
1046                                 ret[i] = p-(ret+i+1);
1047                         } else {
1048                                 ret[i] = strlen(ret+i+1);
1049                         }
1050                 }
1051         }
1052
1053         return ret;
1054 }
1055
1056 BOOL add_string_to_array(TALLOC_CTX *mem_ctx,
1057                          const char *str, const char ***strings, int *num)
1058 {
1059         char *dup_str = talloc_strdup(mem_ctx, str);
1060
1061         *strings = talloc_realloc(mem_ctx,
1062                                     *strings,
1063                                     const char *, ((*num)+1));
1064
1065         if ((*strings == NULL) || (dup_str == NULL))
1066                 return False;
1067
1068         (*strings)[*num] = dup_str;
1069         *num += 1;
1070
1071         return True;
1072 }
1073
1074
1075
1076 /*
1077   varient of strcmp() that handles NULL ptrs
1078 */
1079 int strcmp_safe(const char *s1, const char *s2)
1080 {
1081         if (s1 == s2) {
1082                 return 0;
1083         }
1084         if (s1 == NULL || s2 == NULL) {
1085                 return s1?-1:1;
1086         }
1087         return strcmp(s1, s2);
1088 }
1089
1090
1091 /*******************************************************************
1092 return the number of bytes occupied by a buffer in ASCII format
1093 the result includes the null termination
1094 limited by 'n' bytes
1095 ********************************************************************/
1096 size_t ascii_len_n(const char *src, size_t n)
1097 {
1098         size_t len;
1099
1100         len = strnlen(src, n);
1101         if (len+1 <= n) {
1102                 len += 1;
1103         }
1104
1105         return len;
1106 }
1107
1108
1109 /*******************************************************************
1110  Return a string representing a CIFS attribute for a file.
1111 ********************************************************************/
1112 char *attrib_string(TALLOC_CTX *mem_ctx, uint32_t attrib)
1113 {
1114         int i, len;
1115         const struct {
1116                 char c;
1117                 uint16_t attr;
1118         } attr_strs[] = {
1119                 {'V', FILE_ATTRIBUTE_VOLUME},
1120                 {'D', FILE_ATTRIBUTE_DIRECTORY},
1121                 {'A', FILE_ATTRIBUTE_ARCHIVE},
1122                 {'H', FILE_ATTRIBUTE_HIDDEN},
1123                 {'S', FILE_ATTRIBUTE_SYSTEM},
1124                 {'N', FILE_ATTRIBUTE_NORMAL},
1125                 {'R', FILE_ATTRIBUTE_READONLY},
1126                 {'d', FILE_ATTRIBUTE_DEVICE},
1127                 {'t', FILE_ATTRIBUTE_TEMPORARY},
1128                 {'s', FILE_ATTRIBUTE_SPARSE},
1129                 {'r', FILE_ATTRIBUTE_REPARSE_POINT},
1130                 {'c', FILE_ATTRIBUTE_COMPRESSED},
1131                 {'o', FILE_ATTRIBUTE_OFFLINE},
1132                 {'n', FILE_ATTRIBUTE_NONINDEXED},
1133                 {'e', FILE_ATTRIBUTE_ENCRYPTED}
1134         };
1135         char *ret;
1136
1137         ret = talloc_size(mem_ctx, ARRAY_SIZE(attr_strs)+1);
1138         if (!ret) {
1139                 return NULL;
1140         }
1141
1142         for (len=i=0; i<ARRAY_SIZE(attr_strs); i++) {
1143                 if (attrib & attr_strs[i].attr) {
1144                         ret[len++] = attr_strs[i].c;
1145                 }
1146         }
1147
1148         ret[len] = 0;
1149
1150         return ret;
1151 }
1152