r2778: merged the new samba4 ms_fnmatch code to Samba3. Thanks to Rusty
authorAndrew Tridgell <tridge@samba.org>
Sat, 2 Oct 2004 09:14:43 +0000 (09:14 +0000)
committerGerald (Jerry) Carter <jerry@samba.org>
Wed, 10 Oct 2007 15:52:52 +0000 (10:52 -0500)
Russel for some help in designing the new algorithm.

source/lib/ms_fnmatch.c

index 42c91bd18df9067048f801c3e2bf3c351185649c..bdfb26d0ca575192fe1ef3df52915a0ad0286fa8 100644 (file)
@@ -1,7 +1,7 @@
 /* 
    Unix SMB/CIFS implementation.
    filename matching routine
-   Copyright (C) Andrew Tridgell 1992-1998 
+   Copyright (C) Andrew Tridgell 1992-2004
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
+*/
 
 /*
    This module was originally based on fnmatch.c copyright by the Free
-   Software Foundation. It bears little resemblence to that code now 
+   Software Foundation. It bears little (if any) resemblence to that
+   code now
 */  
 
 
-#if FNMATCH_TEST
-#include <stdio.h>
-#include <stdlib.h>
-#else
 #include "includes.h"
-#endif
 
-/* 
-   bugger. we need a separate wildcard routine for older versions
-   of the protocol. This is not yet perfect, but its a lot
-   better than what we had */
-static int ms_fnmatch_lanman_core(const smb_ucs2_t *pattern, 
-                                 const smb_ucs2_t *string,
-                                 BOOL case_sensitive)
+static int null_match(const smb_ucs2_t *p)
 {
-       const smb_ucs2_t *p = pattern, *n = string;
-       smb_ucs2_t c;
+       for (;*p;p++) {
+               if (*p != UCS2_CHAR('*') &&
+                   *p != UCS2_CHAR('<') &&
+                   *p != UCS2_CHAR('"') &&
+                   *p != UCS2_CHAR('>')) return -1;
+       }
+       return 0;
+}
+
+/*
+  the max_n structure is purely for efficiency, it doesn't contribute
+  to the matching algorithm except by ensuring that the algorithm does
+  not grow exponentially
+*/
+struct max_n {
+       const smb_ucs2_t *predot;
+       const smb_ucs2_t *postdot;
+};
 
-       if (strcmp_wa(p, "?")==0 && strcmp_wa(n, ".")) goto match;
+
+/*
+  p and n are the pattern and string being matched. The max_n array is
+  an optimisation only. The ldot pointer is NULL if the string does
+  not contain a '.', otherwise it points at the last dot in 'n'.
+*/
+static int ms_fnmatch_core(const smb_ucs2_t *p, const smb_ucs2_t *n, 
+                          struct max_n *max_n, const smb_ucs2_t *ldot)
+{
+       smb_ucs2_t c;
+       int i;
 
        while ((c = *p++)) {
                switch (c) {
-               case UCS2_CHAR('.'):
-                       if (! *n) goto next;
-                       if (*n != UCS2_CHAR('.')) goto nomatch;
-                       n++;
-                       break;
+                       /* a '*' matches zero or more characters of any type */
+               case UCS2_CHAR('*'):
+                       if (max_n->predot && max_n->predot <= n) {
+                               return null_match(p);
+                       }
+                       for (i=0; n[i]; i++) {
+                               if (ms_fnmatch_core(p, n+i, max_n+1, ldot) == 0) {
+                                       return 0;
+                               }
+                       }
+                       if (!max_n->predot || max_n->predot > n) max_n->predot = n;
+                       return null_match(p);
+
+                       /* a '<' matches zero or more characters of
+                          any type, but stops matching at the last
+                          '.' in the string. */
+               case UCS2_CHAR('<'):
+                       if (max_n->predot && max_n->predot <= n) {
+                               return null_match(p);
+                       }
+                       if (max_n->postdot && max_n->postdot <= n && n <= ldot) {
+                               return -1;
+                       }
+                       for (i=0; n[i]; i++) {
+                               if (ms_fnmatch_core(p, n+i, max_n+1, ldot) == 0) return 0;
+                               if (n+i == ldot) {
+                                       if (ms_fnmatch_core(p, n+i+1, max_n+1, ldot) == 0) return 0;
+                                       if (!max_n->postdot || max_n->postdot > n) max_n->postdot = n;
+                                       return -1;
+                               }
+                       }
+                       if (!max_n->predot || max_n->predot > n) max_n->predot = n;
+                       return null_match(p);
 
+                       /* a '?' matches any single character */
                case UCS2_CHAR('?'):
-                       if (! *n) goto next;
-                       if ((*n == UCS2_CHAR('.') && 
-                            n[1] != UCS2_CHAR('.')) || ! *n) 
-                               goto next;
+                       if (! *n) {
+                               return -1;
+                       }
                        n++;
                        break;
 
+                       /* a '?' matches any single character */
                case UCS2_CHAR('>'):
-                       if (! *n) goto next;
                        if (n[0] == UCS2_CHAR('.')) {
-                               if (! n[1] && ms_fnmatch_lanman_core(p, n+1, case_sensitive) == 0) goto match;
-                               if (ms_fnmatch_lanman_core(p, n, case_sensitive) == 0) goto match;
-                               goto nomatch;
-                       }
-                       n++;
-                       break;
-
-               case UCS2_CHAR('*'):
-                       if (! *n) goto next;
-                       if (! *p) goto match;
-                       for (; *n; n++) {
-                               if (ms_fnmatch_lanman_core(p, n, case_sensitive) == 0) goto match;
-                       }
-                       break;
-
-               case UCS2_CHAR('<'):
-                       for (; *n; n++) {
-                               if (ms_fnmatch_lanman_core(p, n, case_sensitive) == 0) goto match;
-                               if (*n == UCS2_CHAR('.') && 
-                                   !strchr_w(n+1,UCS2_CHAR('.'))) {
-                                       n++;
-                                       break;
+                               if (! n[1] && null_match(p) == 0) {
+                                       return 0;
                                }
+                               break;
                        }
+                       if (! *n) return null_match(p);
+                       n++;
                        break;
 
                case UCS2_CHAR('"'):
-                       if (*n == 0 && ms_fnmatch_lanman_core(p, n, case_sensitive) == 0) goto match;
-                       if (*n != UCS2_CHAR('.')) goto nomatch;
+                       if (*n == 0 && null_match(p) == 0) {
+                               return 0;
+                       }
+                       if (*n != UCS2_CHAR('.')) return -1;
                        n++;
                        break;
 
                default:
-                       if (case_sensitive) {
-                               if (c != *n) goto nomatch;
-                       } else {
-                               if (tolower_w(c) != tolower_w(*n)) goto nomatch;
+                       if (c != *n && toupper_w(c) != toupper_w(*n)) {
+                               return -1;
                        }
                        n++;
+                       break;
                }
        }
        
-       if (! *n) goto match;
+       if (! *n) {
+               return 0;
+       }
        
- nomatch:
-       /*
-       if (verbose) printf("NOMATCH pattern=[%s] string=[%s]\n", pattern, string);
-       */
        return -1;
-
-next:
-       if (ms_fnmatch_lanman_core(p, n, case_sensitive) == 0) goto match;
-        goto nomatch;
-
- match:
-       /*
-       if (verbose) printf("MATCH   pattern=[%s] string=[%s]\n", pattern, string);
-       */
-       return 0;
 }
 
-static int ms_fnmatch_lanman1(const smb_ucs2_t *pattern, 
-                             const smb_ucs2_t *string, BOOL case_sensitive)
+int ms_fnmatch(const char *pattern, const char *string, enum protocol_types protocol)
 {
-       if (!strpbrk_wa(pattern, "?*<>\"")) {
-               smb_ucs2_t s[] = {UCS2_CHAR('.'), 0};
-               if (strcmp_wa(string,"..") == 0) string = s;
-               return strcasecmp_w(pattern, string);
-       }
+       wpstring p, s;
+       int ret, count, i;
+       struct max_n *max_n = NULL;
 
-       if (strcmp_wa(string,"..") == 0 || strcmp_wa(string,".") == 0) {
-               smb_ucs2_t dot[] = {UCS2_CHAR('.'), 0};
-               smb_ucs2_t dotdot[] = {UCS2_CHAR('.'), UCS2_CHAR('.'), 0};
-               return ms_fnmatch_lanman_core(pattern, dotdot, case_sensitive) &&
-                       ms_fnmatch_lanman_core(pattern, dot, case_sensitive);
+       if (strcmp(string, "..") == 0) {
+               string = ".";
        }
 
-       return ms_fnmatch_lanman_core(pattern, string, case_sensitive);
-}
-
-
-/* the following function was derived using the masktest utility -
-   after years of effort we finally have a perfect MS wildcard
-   matching routine! 
-
-   NOTE: this matches only filenames with no directory component
-
-   Returns 0 on match, -1 on fail.
-*/
-static int ms_fnmatch_w(const smb_ucs2_t *pattern, const smb_ucs2_t *string, 
-                       int protocol, BOOL case_sensitive)
-{
-       const smb_ucs2_t *p = pattern, *n = string;
-       smb_ucs2_t c;
-
-       if (protocol <= PROTOCOL_LANMAN2) {
-               return ms_fnmatch_lanman1(pattern, string, case_sensitive);
+       if (strpbrk(pattern, "<>*?\"") == NULL) {
+               /* this is not just an optmisation - it is essential
+                  for LANMAN1 correctness */
+               return StrCaseCmp(pattern, string);
        }
 
-       while ((c = *p++)) {
-               switch (c) {
-               case UCS2_CHAR('?'):
-                       if (! *n) return -1;
-                       n++;
-                       break;
-
-               case UCS2_CHAR('>'):
-                       if (n[0] == UCS2_CHAR('.')) {
-                               if (! n[1] && ms_fnmatch_w(p, n+1, protocol, case_sensitive) == 0) return 0;
-                               if (ms_fnmatch_w(p, n, protocol, case_sensitive) == 0) return 0;
-                               return -1;
-                       }
-                       if (! *n) return ms_fnmatch_w(p, n, protocol, case_sensitive);
-                       n++;
-                       break;
-
-               case UCS2_CHAR('*'):
-                       while (*p == UCS2_CHAR('*')) {
-                               p++;
-                       }
-                       for (; *n; n++) {
-                               if (ms_fnmatch_w(p, n, protocol, case_sensitive) == 0) return 0;
-                       }
-                       break;
+       pstrcpy_wa(p, pattern);
+       pstrcpy_wa(s, string);
 
-               case UCS2_CHAR('<'):
-                       for (; *n; n++) {
-                               if (ms_fnmatch_w(p, n, protocol, case_sensitive) == 0) return 0;
-                               if (*n == UCS2_CHAR('.') && !strchr_wa(n+1,'.')) {
-                                       n++;
-                                       break;
-                               }
-                       }
-                       break;
-
-               case UCS2_CHAR('"'):
-                       if (*n == 0 && ms_fnmatch_w(p, n, protocol, case_sensitive) == 0) return 0;
-                       if (*n != UCS2_CHAR('.')) return -1;
-                       n++;
-                       break;
-
-               default:
-                       if (case_sensitive) {
-                               if (c != *n) return -1;
-                       } else {
-                               if (tolower_w(c) != tolower_w(*n)) return -1;
+       if (protocol <= PROTOCOL_LANMAN2) {
+               /*
+                 for older negotiated protocols it is possible to
+                 translate the pattern to produce a "new style"
+                 pattern that exactly matches w2k behaviour
+               */
+               for (i=0;p[i];i++) {
+                       if (p[i] == UCS2_CHAR('?')) {
+                               p[i] = UCS2_CHAR('>');
+                       } else if (p[i] == UCS2_CHAR('.') && 
+                                  (p[i+1] == UCS2_CHAR('?') || 
+                                   p[i+1] == UCS2_CHAR('*') ||
+                                   p[i+1] == 0)) {
+                               p[i] = UCS2_CHAR('"');
+                       } else if (p[i] == UCS2_CHAR('*') && p[i+1] == UCS2_CHAR('.')) {
+                               p[i] = UCS2_CHAR('<');
                        }
-                       n++;
                }
        }
-       
-       if (! *n) return 0;
-       
-       return -1;
-}
 
-int ms_fnmatch(const char *pattern, const char *string, int protocol, 
-              BOOL case_senstive)
-{
-       wpstring buffer_pattern, buffer_string;
-       int ret;
-       size_t size;
-
-       size = push_ucs2(NULL, buffer_pattern, pattern, sizeof(buffer_pattern), STR_TERMINATE);
-       if (size == (size_t)-1) {
-               return -1;
-               /* Not quite the right answer, but finding the right one
-                  under this failure case is expensive, and it's pretty close */
+       for (count=i=0;p[i];i++) {
+               if (p[i] == UCS2_CHAR('*') || p[i] == UCS2_CHAR('<')) count++;
        }
-       
-       size = push_ucs2(NULL, buffer_string, string, sizeof(buffer_string), STR_TERMINATE);
-       if (size == (size_t)-1) {
-               return -1;
-               /* Not quite the right answer, but finding the right one
-                  under this failure case is expensive, and it's pretty close */
+
+       if (count != 0) {
+               max_n = calloc(sizeof(struct max_n), count);
+               if (!max_n) {
+                       return -1;
+               }
        }
 
-       ret = ms_fnmatch_w(buffer_pattern, buffer_string, protocol, case_senstive);
-       DEBUG(10,("ms_fnmatch(%s,%s) -> %d\n", pattern, string, ret));
+       ret = ms_fnmatch_core(p, s, max_n, strrchr_w(s, UCS2_CHAR('.')));
+
+       if (max_n) {
+               free(max_n);
+       }
 
        return ret;
 }
 
+
 /* a generic fnmatch function - uses for non-CIFS pattern matching */
 int gen_fnmatch(const char *pattern, const char *string)
 {
-       return ms_fnmatch(pattern, string, PROTOCOL_NT1, True);
+       return ms_fnmatch(pattern, string, PROTOCOL_NT1);
 }