r8585: add to ldb and ldap comparison functionality
[kamenim/samba.git] / source4 / lib / ldb / common / ldb_parse.c
1 /* 
2    ldb database library
3
4    Copyright (C) Andrew Tridgell  2004
5
6      ** NOTE! The following LGPL license applies to the ldb
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9    
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 2 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23 */
24
25 /*
26  *  Name: ldb
27  *
28  *  Component: ldb expression parsing
29  *
30  *  Description: parse LDAP-like search expressions
31  *
32  *  Author: Andrew Tridgell
33  */
34
35 /*
36   TODO:
37       - add RFC2254 binary string handling
38       - possibly add ~=, <= and >= handling
39       - expand the test suite
40       - add better parse error handling
41
42 */
43
44 #include "includes.h"
45 #include "ldb/include/ldb.h"
46 #include <ctype.h>
47
48
49 /*
50 a filter is defined by:
51                <filter> ::= '(' <filtercomp> ')'
52                <filtercomp> ::= <and> | <or> | <not> | <simple>
53                <and> ::= '&' <filterlist>
54                <or> ::= '|' <filterlist>
55                <not> ::= '!' <filter>
56                <filterlist> ::= <filter> | <filter> <filterlist>
57                <simple> ::= <attributetype> <filtertype> <attributevalue>
58                <filtertype> ::= '=' | '~=' | '<=' | '>='
59 */
60
61 /*
62    decode a RFC2254 binary string representation of a buffer.
63    Used in LDAP filters.
64 */
65 struct ldb_val ldb_binary_decode(void *mem_ctx, const char *str)
66 {
67         int i, j;
68         struct ldb_val ret;
69         int slen = str?strlen(str):0;
70
71         ret.data = talloc_size(mem_ctx, slen+1);
72         ret.length = 0;
73         if (ret.data == NULL) return ret;
74
75         for (i=j=0;i<slen;i++) {
76                 if (str[i] == '\\') {
77                         unsigned c;
78                         if (sscanf(&str[i+1], "%02X", &c) != 1) {
79                                 talloc_free(ret.data);
80                                 memset(&ret, 0, sizeof(ret));
81                                 return ret;
82                         }
83                         ((uint8_t *)ret.data)[j++] = c;
84                         i += 2;
85                 } else {
86                         ((uint8_t *)ret.data)[j++] = str[i];
87                 }
88         }
89         ret.length = j;
90         ((uint8_t *)ret.data)[j] = 0;
91
92         return ret;
93 }
94
95
96 /*
97    encode a blob as a RFC2254 binary string, escaping any
98    non-printable or '\' characters
99 */
100 char *ldb_binary_encode(void *mem_ctx, struct ldb_val val)
101 {
102         int i;
103         char *ret;
104         int len = val.length;
105         unsigned char *buf = val.data;
106
107         for (i=0;i<val.length;i++) {
108                 if (!isprint(buf[i]) || strchr(" *()\\&|!\"", buf[i])) {
109                         len += 2;
110                 }
111         }
112         ret = talloc_array(mem_ctx, char, len+1);
113         if (ret == NULL) return NULL;
114
115         len = 0;
116         for (i=0;i<val.length;i++) {
117                 if (!isprint(buf[i]) || strchr(" *()\\&|!\"", buf[i])) {
118                         snprintf(ret+len, 4, "\\%02X", buf[i]);
119                         len += 3;
120                 } else {
121                         ret[len++] = buf[i];
122                 }
123         }
124
125         ret[len] = 0;
126
127         return ret;     
128 }
129
130 /* find the first matching wildcard */
131 static char *ldb_parse_find_wildcard(char *value)
132 {
133         while (*value) {
134                 value = strpbrk(value, "\\*");
135                 if (value == NULL) return NULL;
136
137                 if (value[0] == '\\') {
138                         if (value[1] == '\0') return NULL;
139                         value += 2;
140                         continue;
141                 }
142
143                 if (value[0] == '*') return value;
144         }
145
146         return NULL;
147 }
148
149 /* return a NULL terminated list of binary strings representing the value
150    chunks separated by wildcards that makes the value portion of the filter
151 */
152 static struct ldb_val **ldb_wildcard_decode(void *mem_ctx, const char *string)
153 {
154         struct ldb_val **ret = NULL;
155         int val = 0;
156         char *wc, *str;
157
158         wc = talloc_strdup(mem_ctx, string);
159         if (wc == NULL) return NULL;
160
161         while (wc && *wc) {
162                 str = wc;
163                 wc = ldb_parse_find_wildcard(str);
164                 if (wc && *wc) {
165                         if (wc == str) {
166                                 wc++;
167                                 continue;
168                         }
169                         *wc = 0;
170                         wc++;
171                 }
172
173                 ret = talloc_realloc(mem_ctx, ret, struct ldb_val *, val + 2);
174                 if (ret == NULL) return NULL;
175
176                 ret[val] = talloc(mem_ctx, struct ldb_val);
177                 if (ret[val] == NULL) return NULL;
178
179                 *(ret[val]) = ldb_binary_decode(mem_ctx, str);
180                 if ((ret[val])->data == NULL) return NULL;
181
182                 val++;
183         }
184
185         ret[val] = NULL;
186
187         return ret;
188 }
189
190 static struct ldb_parse_tree *ldb_parse_filter(void *mem_ctx, const char **s);
191
192
193 /*
194   parse an extended match
195
196   possible forms:
197         (attr:oid:=value)
198         (attr:dn:oid:=value)
199         (attr:dn:=value)
200         (:dn:oid:=value)
201
202   the ':dn' part sets the dnAttributes boolean if present
203   the oid sets the rule_id string
204   
205 */
206 static struct ldb_parse_tree *ldb_parse_extended(struct ldb_parse_tree *ret, 
207                                                  char *attr, char *value)
208 {
209         char *p1, *p2;
210
211         ret->operation = LDB_OP_EXTENDED;
212         ret->u.extended.value = ldb_binary_decode(ret, value);
213         if (ret->u.extended.value.data == NULL) goto failed;
214
215         p1 = strchr(attr, ':');
216         if (p1 == NULL) goto failed;
217         p2 = strchr(p1+1, ':');
218
219         *p1 = 0;
220         if (p2) *p2 = 0;
221
222         ret->u.extended.attr = attr;
223         if (strcmp(p1+1, "dn") == 0) {
224                 ret->u.extended.dnAttributes = 1;
225                 if (p2) {
226                         ret->u.extended.rule_id = talloc_strdup(ret, p2+1);
227                         if (ret->u.extended.rule_id == NULL) goto failed;
228                 } else {
229                         ret->u.extended.rule_id = NULL;
230                 }
231         } else {
232                 ret->u.extended.dnAttributes = 0;
233                 ret->u.extended.rule_id = talloc_strdup(ret, p1+1);
234                 if (ret->u.extended.rule_id == NULL) goto failed;
235         }
236
237         return ret;
238
239 failed:
240         talloc_free(ret);
241         return NULL;
242 }
243
244 static enum ldb_parse_op ldb_parse_filtertype(void *mem_ctx, char **type, char **value, const char **s)
245 {
246         enum ldb_parse_op filter = 0;
247         char *name, *val, *k;
248         const char *p = *s;
249         const char *t, *t1;
250
251         /* retrieve attributetype name */
252         t = p;
253
254         while ((isascii(*p) && isalnum((unsigned char)*p)) || (*p == '-')) { /* attribute names can only be alphanums */
255                 p++;
256         }
257
258         if (*p == ':') { /* but extended searches have : and . chars too */
259                 p = strstr(p, ":=");
260                 if (p == NULL) { /* malformed attribute name */
261                         return 0;
262                 }
263         }
264
265         t1 = p;
266
267         while (isspace((unsigned char)*p)) p++;
268
269         if (!strchr("=<>~:", *p)) {
270                 return 0;
271         }
272
273         /* save name */
274         name = talloc_memdup(mem_ctx, t, t1 - t + 1);
275         if (name == NULL) return 0;
276         name[t1 - t] = '\0';
277
278         /* retrieve filtertype */
279
280         if (*p == '=') {
281                 filter = LDB_OP_EQUALITY;
282         } else if (*(p + 1) == '=') {
283                 switch (*p) {
284                 case '<':
285                         filter = LDB_OP_LESS;
286                         p++;
287                         break;
288                 case '>':
289                         filter = LDB_OP_GREATER;
290                         p++;
291                         break;
292                 case '~':
293                         filter = LDB_OP_APPROX;
294                         p++;
295                         break;
296                 case ':':
297                         filter = LDB_OP_EXTENDED;
298                         p++;
299                         break;
300                 }
301         }
302         if (!filter) {
303                 talloc_free(name);
304                 return filter;
305         }
306         p++;
307
308         while (isspace((unsigned char)*p)) p++;
309
310         /* retieve value */
311         t = p;
312
313         while (*p && ((*p != ')') || ((*p == ')') && (*(p - 1) == '\\')))) p++;
314
315         val = talloc_memdup(mem_ctx, t, p - t + 1);
316         if (val == NULL) {
317                 talloc_free(name);
318                 return 0;
319         }
320         val[p - t] = '\0';
321
322         k = &(val[p - t]);
323
324         /* remove trailing spaces from value */
325         while ((k > val) && (isspace((unsigned char)*(k - 1)))) k--;
326         *k = '\0';
327
328         *type = name;
329         *value = val;
330         *s = p;
331         return filter;
332 }
333
334 /*
335   <simple> ::= <attributetype> <filtertype> <attributevalue>
336 */
337 static struct ldb_parse_tree *ldb_parse_simple(void *mem_ctx, const char **s)
338 {
339         char *attr, *value;
340         struct ldb_parse_tree *ret;
341         enum ldb_parse_op filtertype;
342
343         ret = talloc(mem_ctx, struct ldb_parse_tree);
344         if (!ret) {
345                 errno = ENOMEM;
346                 return NULL;
347         }
348
349         filtertype = ldb_parse_filtertype(ret, &attr, &value, s);
350         if (!filtertype) {
351                 talloc_free(ret);
352                 return NULL;
353         }
354
355         switch (filtertype) {
356
357                 case LDB_OP_EQUALITY:
358
359                         if (strcmp(value, "*") == 0) {
360                                 ret->operation = LDB_OP_PRESENT;
361                                 ret->u.present.attr = attr;
362                                 break;
363                         }
364
365                         if (ldb_parse_find_wildcard(value) != NULL) {
366                                 ret->operation = LDB_OP_SUBSTRING;
367                                 ret->u.substring.attr = attr;
368                                 ret->u.substring.start_with_wildcard = 0;
369                                 ret->u.substring.end_with_wildcard = 0;
370                                 ret->u.substring.chunks = ldb_wildcard_decode(ret, value);
371                                 if (ret->u.substring.chunks == NULL){
372                                         talloc_free(ret);
373                                         return NULL;
374                                 }
375                                 if (value[0] == '*')
376                                         ret->u.substring.start_with_wildcard = 1;
377                                 if (value[strlen(value) - 1] == '*')
378                                         ret->u.substring.end_with_wildcard = 1;
379                                 talloc_free(value);
380
381                                 break;
382                         }
383
384                         ret->operation = LDB_OP_EQUALITY;
385                         ret->u.equality.attr = attr;
386                         ret->u.equality.value = ldb_binary_decode(ret, value);
387                         if (ret->u.equality.value.data == NULL) {
388                                 talloc_free(ret);
389                                 return NULL;
390                         }
391                         talloc_free(value);
392                         break;
393
394                 case LDB_OP_GREATER:
395                         ret->operation = LDB_OP_GREATER;
396                         ret->u.comparison.attr = attr;
397                         ret->u.comparison.value = ldb_binary_decode(ret, value);
398                         if (ret->u.comparison.value.data == NULL) {
399                                 talloc_free(ret);
400                                 return NULL;
401                         }
402                         talloc_free(value);
403                         break;
404
405                 case LDB_OP_LESS:
406                         ret->operation = LDB_OP_LESS;
407                         ret->u.comparison.attr = attr;
408                         ret->u.comparison.value = ldb_binary_decode(ret, value);
409                         if (ret->u.comparison.value.data == NULL) {
410                                 talloc_free(ret);
411                                 return NULL;
412                         }
413                         talloc_free(value);
414                         break;
415
416                 case LDB_OP_APPROX:
417                         ret->operation = LDB_OP_APPROX;
418                         ret->u.comparison.attr = attr;
419                         ret->u.comparison.value = ldb_binary_decode(ret, value);
420                         if (ret->u.comparison.value.data == NULL) {
421                                 talloc_free(ret);
422                                 return NULL;
423                         }
424                         talloc_free(value);
425                         break;
426
427                 case LDB_OP_EXTENDED:
428
429                         ret = ldb_parse_extended(ret, attr, value);
430                         break;
431
432                 default:
433                         talloc_free(ret);
434                         return NULL;
435         }
436
437         return ret;
438 }
439
440
441 /*
442   parse a filterlist
443   <and> ::= '&' <filterlist>
444   <or> ::= '|' <filterlist>
445   <filterlist> ::= <filter> | <filter> <filterlist>
446 */
447 static struct ldb_parse_tree *ldb_parse_filterlist(void *mem_ctx, const char **s)
448 {
449         struct ldb_parse_tree *ret, *next;
450         enum ldb_parse_op op;
451         const char *p = *s;
452
453         switch (*p) {
454                 case '&':
455                         op = LDB_OP_AND;
456                         break;
457                 case '|':
458                         op = LDB_OP_OR;
459                         break;
460                 default:
461                         return NULL;
462         }
463         p++;
464
465         while (isspace((unsigned char)*p)) p++;
466
467         ret = talloc(mem_ctx, struct ldb_parse_tree);
468         if (!ret) {
469                 errno = ENOMEM;
470                 return NULL;
471         }
472
473         ret->operation = op;
474         ret->u.list.num_elements = 1;
475         ret->u.list.elements = talloc(ret, struct ldb_parse_tree *);
476         if (!ret->u.list.elements) {
477                 errno = ENOMEM;
478                 talloc_free(ret);
479                 return NULL;
480         }
481
482         ret->u.list.elements[0] = ldb_parse_filter(ret->u.list.elements, &p);
483         if (!ret->u.list.elements[0]) {
484                 talloc_free(ret);
485                 return NULL;
486         }
487
488         while (isspace((unsigned char)*p)) p++;
489
490         while (*p && (next = ldb_parse_filter(ret->u.list.elements, &p))) {
491                 struct ldb_parse_tree **e;
492                 e = talloc_realloc(ret, ret->u.list.elements, 
493                                      struct ldb_parse_tree *, 
494                                      ret->u.list.num_elements + 1);
495                 if (!e) {
496                         errno = ENOMEM;
497                         talloc_free(ret);
498                         return NULL;
499                 }
500                 ret->u.list.elements = e;
501                 ret->u.list.elements[ret->u.list.num_elements] = next;
502                 ret->u.list.num_elements++;
503                 while (isspace((unsigned char)*p)) p++;
504         }
505
506         *s = p;
507
508         return ret;
509 }
510
511
512 /*
513   <not> ::= '!' <filter>
514 */
515 static struct ldb_parse_tree *ldb_parse_not(void *mem_ctx, const char **s)
516 {
517         struct ldb_parse_tree *ret;
518         const char *p = *s;
519
520         if (*p != '!') {
521                 return NULL;
522         }
523         p++;
524
525         ret = talloc(mem_ctx, struct ldb_parse_tree);
526         if (!ret) {
527                 errno = ENOMEM;
528                 return NULL;
529         }
530
531         ret->operation = LDB_OP_NOT;
532         ret->u.isnot.child = ldb_parse_filter(ret, &p);
533         if (!ret->u.isnot.child) {
534                 talloc_free(ret);
535                 return NULL;
536         }
537
538         *s = p;
539
540         return ret;
541 }
542
543 /*
544   parse a filtercomp
545   <filtercomp> ::= <and> | <or> | <not> | <simple>
546 */
547 static struct ldb_parse_tree *ldb_parse_filtercomp(void *mem_ctx, const char **s)
548 {
549         struct ldb_parse_tree *ret;
550         const char *p = *s;
551
552         while (isspace((unsigned char)*p)) p++;
553
554         switch (*p) {
555         case '&':
556                 ret = ldb_parse_filterlist(mem_ctx, &p);
557                 break;
558
559         case '|':
560                 ret = ldb_parse_filterlist(mem_ctx, &p);
561                 break;
562
563         case '!':
564                 ret = ldb_parse_not(mem_ctx, &p);
565                 break;
566
567         case '(':
568         case ')':
569                 return NULL;
570
571         default:
572                 ret = ldb_parse_simple(mem_ctx, &p);
573
574         }
575
576         *s = p;
577         return ret;
578 }
579
580
581 /*
582   <filter> ::= '(' <filtercomp> ')'
583 */
584 static struct ldb_parse_tree *ldb_parse_filter(void *mem_ctx, const char **s)
585 {
586         struct ldb_parse_tree *ret;
587         const char *p = *s;
588
589         if (*p != '(') {
590                 return NULL;
591         }
592         p++;
593
594         ret = ldb_parse_filtercomp(mem_ctx, &p);
595
596         if (*p != ')') {
597                 return NULL;
598         }
599         p++;
600
601         while (isspace((unsigned char)*p)) {
602                 p++;
603         }
604
605         *s = p;
606
607         return ret;
608 }
609
610
611 /*
612   main parser entry point. Takes a search string and returns a parse tree
613
614   expression ::= <simple> | <filter>
615 */
616 struct ldb_parse_tree *ldb_parse_tree(void *mem_ctx, const char *s)
617 {
618         while (isspace((unsigned char)*s)) s++;
619
620         if (*s == '(') {
621                 return ldb_parse_filter(mem_ctx, &s);
622         }
623
624         return ldb_parse_simple(mem_ctx, &s);
625 }
626
627
628 /*
629   construct a ldap parse filter given a parse tree
630 */
631 char *ldb_filter_from_tree(void *mem_ctx, struct ldb_parse_tree *tree)
632 {
633         char *s, *s2, *ret;
634         int i;
635
636         switch (tree->operation) {
637         case LDB_OP_AND:
638         case LDB_OP_OR:
639                 ret = talloc_asprintf(mem_ctx, "(%c", (char)tree->operation);
640                 if (ret == NULL) return NULL;
641                 for (i=0;i<tree->u.list.num_elements;i++) {
642                         s = ldb_filter_from_tree(mem_ctx, tree->u.list.elements[i]);
643                         if (s == NULL) {
644                                 talloc_free(ret);
645                                 return NULL;
646                         }
647                         s2 = talloc_asprintf_append(ret, "%s", s);
648                         talloc_free(s);
649                         if (s2 == NULL) {
650                                 talloc_free(ret);
651                                 return NULL;
652                         }
653                         ret = s2;
654                 }
655                 s = talloc_asprintf_append(ret, ")");
656                 if (s == NULL) {
657                         talloc_free(ret);
658                         return NULL;
659                 }
660                 return s;
661         case LDB_OP_NOT:
662                 s = ldb_filter_from_tree(mem_ctx, tree->u.isnot.child);
663                 if (s == NULL) return NULL;
664
665                 ret = talloc_asprintf(mem_ctx, "(!%s)", s);
666                 talloc_free(s);
667                 return ret;
668         case LDB_OP_EQUALITY:
669                 s = ldb_binary_encode(mem_ctx, tree->u.equality.value);
670                 if (s == NULL) return NULL;
671                 ret = talloc_asprintf(mem_ctx, "(%s=%s)", 
672                                       tree->u.equality.attr, s);
673                 talloc_free(s);
674                 return ret;
675         case LDB_OP_SUBSTRING:
676                 ret = talloc_strdup(mem_ctx, (tree->u.substring.start_with_wildcard)?"*":"");
677                 if (ret == NULL) return NULL;
678                 for (i = 0; tree->u.substring.chunks[i]; i++) {
679                         s2 = ldb_binary_encode(mem_ctx, *(tree->u.substring.chunks[i]));
680                         if (s2 == NULL) {
681                                 talloc_free(ret);
682                                 return NULL;
683                         }
684                         s = talloc_asprintf_append(ret, "%s*", s2);
685                         if (s == NULL) {
686                                 talloc_free(ret);
687                                 return NULL;
688                         }
689                         ret = s;
690                 }
691                 if ( ! tree->u.substring.end_with_wildcard ) {
692                         ret[strlen(ret) - 1] = '\0'; /* remove last wildcard */
693                 }
694                 return ret;
695         case LDB_OP_GREATER:
696                 s = ldb_binary_encode(mem_ctx, tree->u.equality.value);
697                 if (s == NULL) return NULL;
698                 ret = talloc_asprintf(mem_ctx, "(%s>=%s)", 
699                                       tree->u.equality.attr, s);
700                 talloc_free(s);
701                 return ret;
702         case LDB_OP_LESS:
703                 s = ldb_binary_encode(mem_ctx, tree->u.equality.value);
704                 if (s == NULL) return NULL;
705                 ret = talloc_asprintf(mem_ctx, "(%s<=%s)", 
706                                       tree->u.equality.attr, s);
707                 talloc_free(s);
708                 return ret;
709         case LDB_OP_PRESENT:
710                 ret = talloc_strdup(mem_ctx, "*");
711                 if (ret == NULL) return NULL;
712                 return ret;
713         case LDB_OP_APPROX:
714                 s = ldb_binary_encode(mem_ctx, tree->u.equality.value);
715                 if (s == NULL) return NULL;
716                 ret = talloc_asprintf(mem_ctx, "(%s~=%s)", 
717                                       tree->u.equality.attr, s);
718                 talloc_free(s);
719                 return ret;
720         case LDB_OP_EXTENDED:
721                 s = ldb_binary_encode(mem_ctx, tree->u.extended.value);
722                 if (s == NULL) return NULL;
723                 ret = talloc_asprintf(mem_ctx, "(%s%s%s%s:=%s)", 
724                                       tree->u.extended.attr?tree->u.extended.attr:"", 
725                                       tree->u.extended.dnAttributes?":dn":"",
726                                       tree->u.extended.rule_id?":":"", 
727                                       tree->u.extended.rule_id?tree->u.extended.rule_id:"", 
728                                       s);
729                 talloc_free(s);
730                 return ret;
731         }
732         
733         return NULL;
734 }