lib/ldb/common/attrib_handlers.c

   1 /*
   2    ldb database library
   3
   4    Copyright (C) Andrew Tridgell  2005
   5    Copyright (C) Andrew Bartlett <abartlet@samba.org> 2006-2009
   6
   7      ** NOTE! The following LGPL license applies to the ldb
   8      ** library. This does NOT imply that all of Samba is released
   9      ** under the LGPL
  10
  11    This library is free software; you can redistribute it and/or
  12    modify it under the terms of the GNU Lesser General Public
  13    License as published by the Free Software Foundation; either
  14    version 3 of the License, or (at your option) any later version.
  15
  16    This library is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19    Lesser General Public License for more details.
  20
  21    You should have received a copy of the GNU Lesser General Public
  22    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  23 */
  24 /*
  25   attribute handlers for well known attribute types, selected by syntax OID
  26   see rfc2252
  27 */
  28
  29 #include "ldb_private.h"
  30 #include "system/locale.h"
  31 #include "ldb_handlers.h"
  32
  33 /*
  34   default handler that just copies a ldb_val.
  35 */
  36 int ldb_handler_copy(struct ldb_context *ldb, void *mem_ctx,
  37                      const struct ldb_val *in, struct ldb_val *out)
  38 {
  39         *out = ldb_val_dup(mem_ctx, in);
  40         if (in->length > 0 && out->data == NULL) {
  41                 ldb_oom(ldb);
  42                 return -1;
  43         }
  44         return 0;
  45 }
  46
  47 /*
  48   a case folding copy handler, removing leading and trailing spaces and
  49   multiple internal spaces
  50
  51   We exploit the fact that utf8 never uses the space octet except for
  52   the space itself
  53 */
  54 int ldb_handler_fold(struct ldb_context *ldb, void *mem_ctx,
  55                             const struct ldb_val *in, struct ldb_val *out)
  56 {
  57         char *s, *t, *start;
  58         bool in_space;
  59
  60         if (!in || !out || !(in->data)) {
  61                 return -1;
  62         }
  63
  64         out->data = (uint8_t *)ldb_casefold(ldb, mem_ctx, (const char *)(in->data), in->length);
  65         if (out->data == NULL) {
  66                 ldb_debug(ldb, LDB_DEBUG_ERROR, "ldb_handler_fold: unable to casefold string [%.*s]", (int)in->length, (const char *)in->data);
  67                 return -1;
  68         }
  69
  70         start = (char *)(out->data);
  71         in_space = true;
  72         t = start;
  73         for (s = start; *s != '\0'; s++) {
  74                 if (*s == ' ') {
  75                         if (in_space) {
  76                                 /*
  77                                  * We already have one (or this is the start)
  78                                  * and we don't want to add more
  79                                  */
  80                                 continue;
  81                         }
  82                         in_space = true;
  83                 } else {
  84                         in_space = false;
  85                 }
  86                 *t = *s;
  87                 t++;
  88         }
  89
  90         if (in_space && t != start) {
  91                 /* the loop will have left a single trailing space */
  92                 t--;
  93         }
  94         *t = '\0';
  95
  96         out->length = t - start;
  97         return 0;
  98 }
  99
 100 /* length limited conversion of a ldb_val to an int64_t */
 101 static int val_to_int64(const struct ldb_val *in, int64_t *v)
 102 {
 103         char *end;
 104         char buf[64];
 105
 106         /* make sure we don't read past the end of the data */
 107         if (in->length > sizeof(buf)-1) {
 108                 return LDB_ERR_INVALID_ATTRIBUTE_SYNTAX;
 109         }
 110         strncpy(buf, (char *)in->data, in->length);
 111         buf[in->length] = 0;
 112
 113         *v = (int64_t) strtoll(buf, &end, 0);
 114         if (*end != 0) {
 115                 return LDB_ERR_INVALID_ATTRIBUTE_SYNTAX;
 116         }
 117         return LDB_SUCCESS;
 118 }
 119
 120
 121 /*
 122   canonicalise a ldap Integer
 123   rfc2252 specifies it should be in decimal form
 124 */
 125 static int ldb_canonicalise_Integer(struct ldb_context *ldb, void *mem_ctx,
 126                                     const struct ldb_val *in, struct ldb_val *out)
 127 {
 128         int64_t i;
 129         int ret;
 130
 131         ret = val_to_int64(in, &i);
 132         if (ret != LDB_SUCCESS) {
 133                 return ret;
 134         }
 135         out->data = (uint8_t *) talloc_asprintf(mem_ctx, "%lld", (long long)i);
 136         if (out->data == NULL) {
 137                 ldb_oom(ldb);
 138                 return LDB_ERR_OPERATIONS_ERROR;
 139         }
 140         out->length = strlen((char *)out->data);
 141         return 0;
 142 }
 143
 144 /*
 145  * Lexicographically ordered format for a ldap Integer
 146  *
 147  * [ INT64_MIN ... -3, -2, -1 | 0 | +1, +2, +3 ... INT64_MAX ]
 148  *             n                o              p
 149  *
 150  * For human readability sake, we continue to format the key as a string
 151  * (like the canonicalize) rather than store as a fixed binary representation.
 152  *
 153  * In order to sort the integers in the correct string order, there are three
 154  * techniques we use:
 155  *
 156  * 1. Zero padding
 157  * 2. Negative integer inversion
 158  * 3. 1-byte prefixes: 'n' < 'o' < 'p'
 159  *
 160  * 1. To have a fixed-width representation so that 10 sorts after 2 rather than
 161  * after 1, we zero pad, like this 4-byte width example:
 162  *
 163  *     0001, 0002, 0010
 164  *
 165  * INT64_MAX = 2^63 - 1 = 9223372036854775807 (19 characters long)
 166  *
 167  * Meaning we need to pad to 19 characters.
 168  *
 169  * 2. This works for positive integers, but negative integers will still be
 170  * sorted backwards, for example:
 171  *
 172  *     -9223372036854775808 ..., -0000000000000000002, -0000000000000000001
 173  *          INT64_MIN                    -2                    -1
 174  *
 175  *   gets sorted based on string as:
 176  *
 177  *     -0000000000000000001, -0000000000000000002, ... -9223372036854775808
 178  *
 179  * In order to fix this, we invert the negative integer range, so that they
 180  * get sorted the same way as positive numbers. INT64_MIN becomes the lowest
 181  * possible non-negative number (zero), and -1 becomes the highest (INT64_MAX).
 182  *
 183  * The actual conversion applied to negative number 'x' is:
 184  *   INT64_MAX - abs(x) + 1
 185  * (The +1 is needed because abs(INT64_MIN) is one greater than INT64_MAX)
 186  *
 187  * 3. Finally, we now have two different numbers that map to the same key, e.g.
 188  * INT64_MIN maps to -0000000000000000000 and zero maps to 0000000000000000000.
 189  * In order to avoid confusion, we give every number a prefix representing its
 190  * sign: 'n' for negative numbers, 'o' for zero, and 'p' for positive. (Note
 191  * that '+' and '-' weren't used because they sort the wrong way).
 192  *
 193  * The result is a range of key values that look like this:
 194  *
 195  *     n0000000000000000000, ... n9223372036854775807,
 196  *          INT64_MIN                    -1
 197  *
 198  *     o0000000000000000000,
 199  *            ZERO
 200  *
 201  *     p0000000000000000001, ... p9223372036854775807
 202  *            +1                       INT64_MAX
 203  */
 204 static int ldb_index_format_Integer(struct ldb_context *ldb,
 205                                     void *mem_ctx,
 206                                     const struct ldb_val *in,
 207                                     struct ldb_val *out)
 208 {
 209         int64_t i;
 210         int ret;
 211         char prefix;
 212         size_t len;
 213
 214         ret = val_to_int64(in, &i);
 215         if (ret != LDB_SUCCESS) {
 216                 return ret;
 217         }
 218
 219         if (i < 0) {
 220                 /*
 221                  * i is negative, so this is subtraction rather than
 222                  * wrap-around.
 223                  */
 224                 prefix = 'n';
 225                 i = INT64_MAX + i + 1;
 226         } else if (i > 0) {
 227                 prefix = 'p';
 228         } else {
 229                 prefix = 'o';
 230         }
 231
 232         out->data = (uint8_t *) talloc_asprintf(mem_ctx, "%c%019lld", prefix, (long long)i);
 233         if (out->data == NULL) {
 234                 ldb_oom(ldb);
 235                 return LDB_ERR_OPERATIONS_ERROR;
 236         }
 237
 238         len = talloc_array_length(out->data) - 1;
 239         if (len != 20) {
 240                 ldb_debug(ldb, LDB_DEBUG_ERROR,
 241                           __location__ ": expected index format str %s to"
 242                           " have length 20 but got %zu",
 243                           (char*)out->data, len);
 244                 return LDB_ERR_OPERATIONS_ERROR;
 245         }
 246
 247         out->length = 20;
 248         return 0;
 249 }
 250
 251 /*
 252   compare two Integers
 253 */
 254 static int ldb_comparison_Integer(struct ldb_context *ldb, void *mem_ctx,
 255                                   const struct ldb_val *v1, const struct ldb_val *v2)
 256 {
 257         int64_t i1=0, i2=0;
 258         val_to_int64(v1, &i1);
 259         val_to_int64(v2, &i2);
 260         if (i1 == i2) return 0;
 261         return i1 > i2? 1 : -1;
 262 }
 263
 264 /*
 265   canonicalise a ldap Boolean
 266   rfc2252 specifies it should be either "TRUE" or "FALSE"
 267 */
 268 static int ldb_canonicalise_Boolean(struct ldb_context *ldb, void *mem_ctx,
 269                              const struct ldb_val *in, struct ldb_val *out)
 270 {
 271         if (in->length >= 4 && strncasecmp((char *)in->data, "TRUE", in->length) == 0) {
 272                 out->data = (uint8_t *)talloc_strdup(mem_ctx, "TRUE");
 273                 out->length = 4;
 274         } else if (in->length >= 5 && strncasecmp((char *)in->data, "FALSE", in->length) == 0) {
 275                 out->data = (uint8_t *)talloc_strdup(mem_ctx, "FALSE");
 276                 out->length = 5;
 277         } else {
 278                 return -1;
 279         }
 280         return 0;
 281 }
 282
 283 /*
 284   compare two Booleans
 285 */
 286 static int ldb_comparison_Boolean(struct ldb_context *ldb, void *mem_ctx,
 287                            const struct ldb_val *v1, const struct ldb_val *v2)
 288 {
 289         if (v1->length != v2->length) {
 290                 return NUMERIC_CMP(v1->length, v2->length);
 291         }
 292         return strncasecmp((char *)v1->data, (char *)v2->data, v1->length);
 293 }
 294
 295
 296 /*
 297   compare two binary blobs
 298 */
 299 int ldb_comparison_binary(struct ldb_context *ldb, void *mem_ctx,
 300                           const struct ldb_val *v1, const struct ldb_val *v2)
 301 {
 302         if (v1->length != v2->length) {
 303                 return v1->length - v2->length;
 304         }
 305         return memcmp(v1->data, v2->data, v1->length);
 306 }
 307
 308 /*
 309   compare two case insensitive strings, ignoring multiple whitespaces
 310   and leading and trailing whitespaces
 311   see rfc2252 section 8.1
 312
 313   try to optimize for the ascii case,
 314   but if we find out an utf8 codepoint revert to slower but correct function
 315 */
 316 int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx,
 317                                const struct ldb_val *v1, const struct ldb_val *v2)
 318 {
 319         const char *s1=(const char *)v1->data, *s2=(const char *)v2->data;
 320         size_t n1 = v1->length, n2 = v2->length;
 321         char *b1, *b2;
 322         const char *u1, *u2;
 323         int ret;
 324
 325         while (n1 && *s1 == ' ') { s1++; n1--; };
 326         while (n2 && *s2 == ' ') { s2++; n2--; };
 327
 328         while (n1 && n2 && *s1 && *s2) {
 329                 /* the first 127 (0x7F) chars are ascii and utf8 guarantees they
 330                  * never appear in multibyte sequences */
 331                 if (((unsigned char)s1[0]) & 0x80) goto utf8str;
 332                 if (((unsigned char)s2[0]) & 0x80) goto utf8str;
 333                 if (toupper((unsigned char)*s1) != toupper((unsigned char)*s2))
 334                         break;
 335                 if (*s1 == ' ') {
 336                         while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; }
 337                         while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; }
 338                 }
 339                 s1++; s2++;
 340                 n1--; n2--;
 341         }
 342
 343         /* check for trailing spaces only if the other pointers has
 344          * reached the end of the strings otherwise we can
 345          * mistakenly match.  ex. "domain users" <->
 346          * "domainUpdates"
 347          */
 348         if (n1 && *s1 == ' ' && (!n2 || !*s2)) {
 349                 while (n1 && *s1 == ' ') { s1++; n1--; }
 350         }
 351         if (n2 && *s2 == ' ' && (!n1 || !*s1)) {
 352                 while (n2 && *s2 == ' ') { s2++; n2--; }
 353         }
 354         if (n1 == 0 && n2 != 0) {
 355                 return -(int)ldb_ascii_toupper(*s2);
 356         }
 357         if (n2 == 0 && n1 != 0) {
 358                 return (int)ldb_ascii_toupper(*s1);
 359         }
 360         if (n1 == 0 && n2 == 0) {
 361                 return 0;
 362         }
 363         return (int)ldb_ascii_toupper(*s1) - (int)ldb_ascii_toupper(*s2);
 364
 365 utf8str:
 366         /*
 367          * No need to recheck from the start, just from the first utf8 charu
 368          * found. Note that the callback of ldb_casefold() needs to be ascii
 369          * compatible.
 370          */
 371         b1 = ldb_casefold(ldb, mem_ctx, s1, n1);
 372         b2 = ldb_casefold(ldb, mem_ctx, s2, n2);
 373
 374         if (!b1 || !b2) {
 375                 /* One of the strings was not UTF8, so we have no
 376                  * options but to do a binary compare */
 377                 talloc_free(b1);
 378                 talloc_free(b2);
 379                 ret = memcmp(s1, s2, MIN(n1, n2));
 380                 if (ret == 0) {
 381                         if (n1 == n2) return 0;
 382                         if (n1 > n2) {
 383                                 return (int)ldb_ascii_toupper(s1[n2]);
 384                         } else {
 385                                 return -(int)ldb_ascii_toupper(s2[n1]);
 386                         }
 387                 }
 388                 return ret;
 389         }
 390
 391         u1 = b1;
 392         u2 = b2;
 393
 394         while (*u1 & *u2) {
 395                 if (*u1 != *u2)
 396                         break;
 397                 if (*u1 == ' ') {
 398                         while (u1[0] == u1[1]) u1++;
 399                         while (u2[0] == u2[1]) u2++;
 400                 }
 401                 u1++; u2++;
 402         }
 403         if (! (*u1 && *u2)) {
 404                 while (*u1 == ' ') u1++;
 405                 while (*u2 == ' ') u2++;
 406         }
 407         ret = (int)(*u1 - *u2);
 408
 409         talloc_free(b1);
 410         talloc_free(b2);
 411
 412         return ret;
 413 }
 414
 415
 416 /*
 417   canonicalise a attribute in DN format
 418 */
 419 static int ldb_canonicalise_dn(struct ldb_context *ldb, void *mem_ctx,
 420                                const struct ldb_val *in, struct ldb_val *out)
 421 {
 422         struct ldb_dn *dn;
 423         int ret = -1;
 424
 425         out->length = 0;
 426         out->data = NULL;
 427
 428         dn = ldb_dn_from_ldb_val(mem_ctx, ldb, in);
 429         if ( ! ldb_dn_validate(dn)) {
 430                 return LDB_ERR_INVALID_DN_SYNTAX;
 431         }
 432
 433         out->data = (uint8_t *)ldb_dn_alloc_casefold(mem_ctx, dn);
 434         if (out->data == NULL) {
 435                 goto done;
 436         }
 437         out->length = strlen((char *)out->data);
 438
 439         ret = 0;
 440
 441 done:
 442         talloc_free(dn);
 443
 444         return ret;
 445 }
 446
 447 /*
 448   compare two dns
 449 */
 450 static int ldb_comparison_dn(struct ldb_context *ldb, void *mem_ctx,
 451                              const struct ldb_val *v1, const struct ldb_val *v2)
 452 {
 453         struct ldb_dn *dn1 = NULL, *dn2 = NULL;
 454         int ret;
 455
 456         dn1 = ldb_dn_from_ldb_val(mem_ctx, ldb, v1);
 457         if ( ! ldb_dn_validate(dn1)) return -1;
 458
 459         dn2 = ldb_dn_from_ldb_val(mem_ctx, ldb, v2);
 460         if ( ! ldb_dn_validate(dn2)) {
 461                 talloc_free(dn1);
 462                 return -1;
 463         }
 464
 465         ret = ldb_dn_compare(dn1, dn2);
 466
 467         talloc_free(dn1);
 468         talloc_free(dn2);
 469         return ret;
 470 }
 471
 472 /*
 473   compare two utc time values. 1 second resolution
 474 */
 475 static int ldb_comparison_utctime(struct ldb_context *ldb, void *mem_ctx,
 476                                   const struct ldb_val *v1, const struct ldb_val *v2)
 477 {
 478         time_t t1=0, t2=0;
 479         ldb_val_to_time(v1, &t1);
 480         ldb_val_to_time(v2, &t2);
 481         if (t1 == t2) return 0;
 482         return t1 > t2? 1 : -1;
 483 }
 484
 485 /*
 486   canonicalise a utc time
 487 */
 488 static int ldb_canonicalise_utctime(struct ldb_context *ldb, void *mem_ctx,
 489                                     const struct ldb_val *in, struct ldb_val *out)
 490 {
 491         time_t t;
 492         int ret;
 493         ret = ldb_val_to_time(in, &t);
 494         if (ret != LDB_SUCCESS) {
 495                 return ret;
 496         }
 497         out->data = (uint8_t *)ldb_timestring_utc(mem_ctx, t);
 498         if (out->data == NULL) {
 499                 ldb_oom(ldb);
 500                 return LDB_ERR_OPERATIONS_ERROR;
 501         }
 502         out->length = strlen((char *)out->data);
 503         return 0;
 504 }
 505
 506 /*
 507   canonicalise a generalized time
 508 */
 509 static int ldb_canonicalise_generalizedtime(struct ldb_context *ldb, void *mem_ctx,
 510                                         const struct ldb_val *in, struct ldb_val *out)
 511 {
 512         time_t t;
 513         int ret;
 514         ret = ldb_val_to_time(in, &t);
 515         if (ret != LDB_SUCCESS) {
 516                 return ret;
 517         }
 518         out->data = (uint8_t *)ldb_timestring(mem_ctx, t);
 519         if (out->data == NULL) {
 520                 ldb_oom(ldb);
 521                 return LDB_ERR_OPERATIONS_ERROR;
 522         }
 523         out->length = strlen((char *)out->data);
 524         return 0;
 525 }
 526
 527 /*
 528   table of standard attribute handlers
 529 */
 530 static const struct ldb_schema_syntax ldb_standard_syntaxes[] = {
 531         {
 532                 .name            = LDB_SYNTAX_INTEGER,
 533                 .ldif_read_fn    = ldb_handler_copy,
 534                 .ldif_write_fn   = ldb_handler_copy,
 535                 .canonicalise_fn = ldb_canonicalise_Integer,
 536                 .comparison_fn   = ldb_comparison_Integer
 537         },
 538         {
 539                 .name            = LDB_SYNTAX_ORDERED_INTEGER,
 540                 .ldif_read_fn    = ldb_handler_copy,
 541                 .ldif_write_fn   = ldb_handler_copy,
 542                 .canonicalise_fn = ldb_canonicalise_Integer,
 543                 .index_format_fn = ldb_index_format_Integer,
 544                 .comparison_fn   = ldb_comparison_Integer
 545         },
 546         {
 547                 .name            = LDB_SYNTAX_OCTET_STRING,
 548                 .ldif_read_fn    = ldb_handler_copy,
 549                 .ldif_write_fn   = ldb_handler_copy,
 550                 .canonicalise_fn = ldb_handler_copy,
 551                 .comparison_fn   = ldb_comparison_binary
 552         },
 553         {
 554                 .name            = LDB_SYNTAX_DIRECTORY_STRING,
 555                 .ldif_read_fn    = ldb_handler_copy,
 556                 .ldif_write_fn   = ldb_handler_copy,
 557                 .canonicalise_fn = ldb_handler_fold,
 558                 .comparison_fn   = ldb_comparison_fold
 559         },
 560         {
 561                 .name            = LDB_SYNTAX_DN,
 562                 .ldif_read_fn    = ldb_handler_copy,
 563                 .ldif_write_fn   = ldb_handler_copy,
 564                 .canonicalise_fn = ldb_canonicalise_dn,
 565                 .comparison_fn   = ldb_comparison_dn
 566         },
 567         {
 568                 .name            = LDB_SYNTAX_OBJECTCLASS,
 569                 .ldif_read_fn    = ldb_handler_copy,
 570                 .ldif_write_fn   = ldb_handler_copy,
 571                 .canonicalise_fn = ldb_handler_fold,
 572                 .comparison_fn   = ldb_comparison_fold
 573         },
 574         {
 575                 .name            = LDB_SYNTAX_UTC_TIME,
 576                 .ldif_read_fn    = ldb_handler_copy,
 577                 .ldif_write_fn   = ldb_handler_copy,
 578                 .canonicalise_fn = ldb_canonicalise_utctime,
 579                 .comparison_fn   = ldb_comparison_utctime
 580         },
 581         {
 582                 .name            = LDB_SYNTAX_GENERALIZED_TIME,
 583                 .ldif_read_fn    = ldb_handler_copy,
 584                 .ldif_write_fn   = ldb_handler_copy,
 585                 .canonicalise_fn = ldb_canonicalise_generalizedtime,
 586                 .comparison_fn   = ldb_comparison_utctime
 587         },
 588         {
 589                 .name            = LDB_SYNTAX_BOOLEAN,
 590                 .ldif_read_fn    = ldb_handler_copy,
 591                 .ldif_write_fn   = ldb_handler_copy,
 592                 .canonicalise_fn = ldb_canonicalise_Boolean,
 593                 .comparison_fn   = ldb_comparison_Boolean
 594         },
 595 };
 596
 597
 598 /*
 599   return the attribute handlers for a given syntax name
 600 */
 601 const struct ldb_schema_syntax *ldb_standard_syntax_by_name(struct ldb_context *ldb,
 602                                                             const char *syntax)
 603 {
 604         unsigned int i;
 605         unsigned num_handlers = sizeof(ldb_standard_syntaxes)/sizeof(ldb_standard_syntaxes[0]);
 606         /* TODO: should be replaced with a binary search */
 607         for (i=0;i<num_handlers;i++) {
 608                 if (strcmp(ldb_standard_syntaxes[i].name, syntax) == 0) {
 609                         return &ldb_standard_syntaxes[i];
 610                 }
 611         }
 612         return NULL;
 613 }
 614
 615 int ldb_any_comparison(struct ldb_context *ldb, void *mem_ctx,
 616                        ldb_attr_handler_t canonicalise_fn,
 617                        const struct ldb_val *v1,
 618                        const struct ldb_val *v2)
 619 {
 620         int ret, ret1, ret2;
 621         struct ldb_val v1_canon, v2_canon;
 622         TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
 623
 624         /* I could try and bail if tmp_ctx was NULL, but what return
 625          * value would I use?
 626          *
 627          * It seems easier to continue on the NULL context
 628          */
 629         ret1 = canonicalise_fn(ldb, tmp_ctx, v1, &v1_canon);
 630         ret2 = canonicalise_fn(ldb, tmp_ctx, v2, &v2_canon);
 631
 632         if (ret1 == LDB_SUCCESS && ret2 == LDB_SUCCESS) {
 633                 ret = ldb_comparison_binary(ldb, mem_ctx, &v1_canon, &v2_canon);
 634         } else {
 635                 ret = ldb_comparison_binary(ldb, mem_ctx, v1, v2);
 636         }
 637         talloc_free(tmp_ctx);
 638         return ret;
 639 }