2 Unix SMB/CIFS implementation.
4 POSIX NTVFS backend - 8.3 name routines
6 Copyright (C) Andrew Tridgell 2004
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "vfs_posix.h"
25 #include "system/iconv.h"
28 this mangling scheme uses the following format
32 where nnnnn is a base 36 hash, and A represents characters from the original string
34 The hash is taken of the leading part of the long filename, in uppercase
36 for simplicity, we only allow ascii characters in 8.3 names
40 ===============================================================================
43 This file deliberately uses non-multibyte string functions in many places. This
44 is *not* a mistake. This code is multi-byte safe, but it gets this property
45 through some very subtle knowledge of the way multi-byte strings are encoded
46 and the fact that this mangling algorithm only supports ascii characters in
49 please don't convert this file to use the *_m() functions!!
50 ===============================================================================
55 #define M_DEBUG(level, x) DEBUG(level, x)
57 #define M_DEBUG(level, x)
60 /* these flags are used to mark characters in as having particular
62 #define FLAG_BASECHAR 1
64 #define FLAG_ILLEGAL 4
65 #define FLAG_WILDCARD 8
67 /* the "possible" flags are used as a fast way to find possible DOS
69 #define FLAG_POSSIBLE1 16
70 #define FLAG_POSSIBLE2 32
71 #define FLAG_POSSIBLE3 64
72 #define FLAG_POSSIBLE4 128
74 /* by default have a max of 512 entries in the cache. */
75 #ifndef MANGLE_CACHE_SIZE
76 #define MANGLE_CACHE_SIZE 512
79 #define DEFAULT_MANGLE_PREFIX 4
81 #define MANGLE_BASECHARS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
83 #define FLAG_CHECK(c, flag) (ctx->char_flags[(uint8_t)(c)] & (flag))
85 static const char *reserved_names[] =
86 { "AUX", "CON", "COM1", "COM2", "COM3", "COM4",
87 "LPT1", "LPT2", "LPT3", "NUL", "PRN", NULL };
91 hash a string of the specified length. The string does not need to be
94 this hash needs to be fast with a low collision rate (what hash doesn't?)
96 static uint32_t mangle_hash(struct pvfs_mangle_context *ctx,
97 const char *key, size_t length)
99 return pvfs_name_hash(key, length) % ctx->mangle_modulus;
103 insert an entry into the prefix cache. The string might not be null
105 static void cache_insert(struct pvfs_mangle_context *ctx,
106 const char *prefix, int length, uint32_t hash)
108 int i = hash % MANGLE_CACHE_SIZE;
110 if (ctx->prefix_cache[i]) {
111 talloc_free(ctx->prefix_cache[i]);
114 ctx->prefix_cache[i] = talloc_strndup(ctx->prefix_cache, prefix, length);
115 ctx->prefix_cache_hashes[i] = hash;
119 lookup an entry in the prefix cache. Return NULL if not found.
121 static const char *cache_lookup(struct pvfs_mangle_context *ctx, uint32_t hash)
123 int i = hash % MANGLE_CACHE_SIZE;
126 if (!ctx->prefix_cache[i] || hash != ctx->prefix_cache_hashes[i]) {
130 /* yep, it matched */
131 return ctx->prefix_cache[i];
136 determine if a string is possibly in a mangled format, ignoring
139 In this algorithm, mangled names use only pure ascii characters (no
140 multi-byte) so we can avoid doing a UCS2 conversion
142 static BOOL is_mangled_component(struct pvfs_mangle_context *ctx,
143 const char *name, size_t len)
147 M_DEBUG(10,("is_mangled_component %s (len %u) ?\n", name, (unsigned int)len));
149 /* check the length */
150 if (len > 12 || len < 8)
153 /* the best distinguishing characteristic is the ~ */
157 /* check extension */
161 for (i=9; name[i] && i < len; i++) {
162 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
168 /* check lead characters */
169 for (i=0;i<ctx->mangle_prefix;i++) {
170 if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
175 /* check rest of hash */
176 if (! FLAG_CHECK(name[7], FLAG_BASECHAR)) {
179 for (i=ctx->mangle_prefix;i<6;i++) {
180 if (! FLAG_CHECK(name[i], FLAG_BASECHAR)) {
185 M_DEBUG(10,("is_mangled_component %s (len %u) -> yes\n", name, (unsigned int)len));
193 determine if a string is possibly in a mangled format, ignoring
196 In this algorithm, mangled names use only pure ascii characters (no
197 multi-byte) so we can avoid doing a UCS2 conversion
199 NOTE! This interface must be able to handle a path with unix
200 directory separators. It should return true if any component is
203 static BOOL is_mangled(struct pvfs_mangle_context *ctx, const char *name)
208 M_DEBUG(10,("is_mangled %s ?\n", name));
210 for (s=name; (p=strchr(s, '/')); s=p+1) {
211 if (is_mangled_component(ctx, s, PTR_DIFF(p, s))) {
216 /* and the last part ... */
217 return is_mangled_component(ctx, s, strlen(s));
222 see if a filename is an allowable 8.3 name.
224 we are only going to allow ascii characters in 8.3 names, as this
225 simplifies things greatly (it means that we know the string won't
226 get larger when converted from UNIX to DOS formats)
228 static BOOL is_8_3(struct pvfs_mangle_context *ctx,
229 const char *name, BOOL check_case, BOOL allow_wildcards)
234 /* as a special case, the names '.' and '..' are allowable 8.3 names */
235 if (name[0] == '.') {
236 if (!name[1] || (name[1] == '.' && !name[2])) {
241 /* the simplest test is on the overall length of the
242 filename. Note that we deliberately use the ascii string
243 length (not the multi-byte one) as it is faster, and gives us
244 the result we need in this case. Using strlen_m would not
245 only be slower, it would be incorrect */
250 /* find the '.'. Note that once again we use the non-multibyte
252 dot_p = strchr(name, '.');
255 /* if the name doesn't contain a '.' then its length
256 must be less than 8 */
261 int prefix_len, suffix_len;
263 /* if it does contain a dot then the prefix must be <=
264 8 and the suffix <= 3 in length */
265 prefix_len = PTR_DIFF(dot_p, name);
266 suffix_len = len - (prefix_len+1);
268 if (prefix_len > 8 || suffix_len > 3 || suffix_len == 0) {
272 /* a 8.3 name cannot contain more than 1 '.' */
273 if (strchr(dot_p+1, '.')) {
278 /* the length are all OK. Now check to see if the characters themselves are OK */
279 for (i=0; name[i]; i++) {
280 /* note that we may allow wildcard petterns! */
281 if (!FLAG_CHECK(name[i], FLAG_ASCII|(allow_wildcards ? FLAG_WILDCARD : 0)) &&
287 /* it is a good 8.3 name */
293 try to find a 8.3 name in the cache, and if found then
294 return the original long name.
296 static const char *check_cache(struct pvfs_mangle_context *ctx,
299 uint32_t hash, multiplier;
304 /* make sure that this is a mangled name from this cache */
305 if (!is_mangled(ctx, name)) {
306 M_DEBUG(10,("check_cache: %s -> not mangled\n", name));
310 /* we need to extract the hash from the 8.3 name */
311 hash = ctx->base_reverse[(unsigned char)name[7]];
312 for (multiplier=36, i=5;i>=ctx->mangle_prefix;i--) {
313 uint32_t v = ctx->base_reverse[(unsigned char)name[i]];
314 hash += multiplier * v;
318 /* now look in the prefix cache for that hash */
319 prefix = cache_lookup(ctx, hash);
321 M_DEBUG(10,("check_cache: %s -> %08X -> not found\n", name, hash));
325 /* we found it - construct the full name */
326 if (name[8] == '.') {
327 strncpy(extension, name+9, 3);
334 return talloc_asprintf(ctx, "%s.%s", prefix, extension);
337 return talloc_strdup(ctx, prefix);
342 look for a DOS reserved name
344 static BOOL is_reserved_name(struct pvfs_mangle_context *ctx, const char *name)
346 if (FLAG_CHECK(name[0], FLAG_POSSIBLE1) &&
347 FLAG_CHECK(name[1], FLAG_POSSIBLE2) &&
348 FLAG_CHECK(name[2], FLAG_POSSIBLE3) &&
349 FLAG_CHECK(name[3], FLAG_POSSIBLE4)) {
350 /* a likely match, scan the lot */
352 for (i=0; reserved_names[i]; i++) {
353 if (strcasecmp(name, reserved_names[i]) == 0) {
364 See if a filename is a legal long filename.
365 A filename ending in a '.' is not legal unless it's "." or "..". JRA.
367 static BOOL is_legal_name(struct pvfs_mangle_context *ctx, const char *name)
371 codepoint_t c = next_codepoint(name, &c_size);
372 if (c == INVALID_CODEPOINT) {
375 /* all high chars are OK */
380 if (FLAG_CHECK(c, FLAG_ILLEGAL)) {
390 the main forward mapping function, which converts a long filename to
393 if need83 is not set then we only do the mangling if the name is illegal
396 if cache83 is not set then we don't cache the result
398 return NULL if we don't need to do any conversion
400 static char *name_map(struct pvfs_mangle_context *ctx,
401 const char *name, BOOL need83, BOOL cache83)
406 unsigned int extension_length, i;
407 unsigned int prefix_len;
410 const char *basechars = MANGLE_BASECHARS;
412 /* reserved names are handled specially */
413 if (!is_reserved_name(ctx, name)) {
414 /* if the name is already a valid 8.3 name then we don't need to
416 if (is_8_3(ctx, name, False, False)) {
420 /* if the caller doesn't strictly need 8.3 then just check for illegal
422 if (!need83 && is_legal_name(ctx, name)) {
427 /* find the '.' if any */
428 dot_p = strrchr(name, '.');
431 /* if the extension contains any illegal characters or
432 is too long or zero length then we treat it as part
434 for (i=0; i<4 && dot_p[i+1]; i++) {
435 if (! FLAG_CHECK(dot_p[i+1], FLAG_ASCII)) {
440 if (i == 0 || i == 4) dot_p = NULL;
443 /* the leading characters in the mangled name is taken from
444 the first characters of the name, if they are ascii otherwise
447 for (i=0;i<ctx->mangle_prefix && name[i];i++) {
448 lead_chars[i] = name[i];
449 if (! FLAG_CHECK(lead_chars[i], FLAG_ASCII)) {
452 lead_chars[i] = toupper(lead_chars[i]);
454 for (;i<ctx->mangle_prefix;i++) {
458 /* the prefix is anything up to the first dot */
460 prefix_len = PTR_DIFF(dot_p, name);
462 prefix_len = strlen(name);
465 /* the extension of the mangled name is taken from the first 3
466 ascii chars after the dot */
467 extension_length = 0;
469 for (i=1; extension_length < 3 && dot_p[i]; i++) {
471 if (FLAG_CHECK(c, FLAG_ASCII)) {
472 extension[extension_length++] = toupper(c);
477 /* find the hash for this prefix */
478 v = hash = mangle_hash(ctx, name, prefix_len);
480 new_name = talloc_array_p(ctx, char, 13);
481 if (new_name == NULL) {
485 /* now form the mangled name. */
486 for (i=0;i<ctx->mangle_prefix;i++) {
487 new_name[i] = lead_chars[i];
489 new_name[7] = basechars[v % 36];
491 for (i=5; i>=ctx->mangle_prefix; i--) {
493 new_name[i] = basechars[v % 36];
496 /* add the extension */
497 if (extension_length) {
499 memcpy(&new_name[9], extension, extension_length);
500 new_name[9+extension_length] = 0;
506 /* put it in the cache */
507 cache_insert(ctx, name, prefix_len, hash);
510 M_DEBUG(10,("name_map: %s -> %08X -> %s (cache=%d)\n",
511 name, hash, new_name, cache83));
517 /* initialise the flags table
519 we allow only a very restricted set of characters as 'ascii' in this
520 mangling backend. This isn't a significant problem as modern clients
521 use the 'long' filenames anyway, and those don't have these
524 static void init_tables(struct pvfs_mangle_context *ctx)
526 const char *basechars = MANGLE_BASECHARS;
528 /* the list of reserved dos names - all of these are illegal */
530 ZERO_STRUCT(ctx->char_flags);
532 for (i=1;i<128;i++) {
533 if ((i >= '0' && i <= '9') ||
534 (i >= 'a' && i <= 'z') ||
535 (i >= 'A' && i <= 'Z')) {
536 ctx->char_flags[i] |= (FLAG_ASCII | FLAG_BASECHAR);
538 if (strchr("_-$~", i)) {
539 ctx->char_flags[i] |= FLAG_ASCII;
542 if (strchr("*\\/?<>|\":", i)) {
543 ctx->char_flags[i] |= FLAG_ILLEGAL;
546 if (strchr("*?\"<>", i)) {
547 ctx->char_flags[i] |= FLAG_WILDCARD;
551 ZERO_STRUCT(ctx->base_reverse);
553 ctx->base_reverse[(uint8_t)basechars[i]] = i;
556 /* fill in the reserved names flags. These are used as a very
557 fast filter for finding possible DOS reserved filenames */
558 for (i=0; reserved_names[i]; i++) {
559 unsigned char c1, c2, c3, c4;
561 c1 = (unsigned char)reserved_names[i][0];
562 c2 = (unsigned char)reserved_names[i][1];
563 c3 = (unsigned char)reserved_names[i][2];
564 c4 = (unsigned char)reserved_names[i][3];
566 ctx->char_flags[c1] |= FLAG_POSSIBLE1;
567 ctx->char_flags[c2] |= FLAG_POSSIBLE2;
568 ctx->char_flags[c3] |= FLAG_POSSIBLE3;
569 ctx->char_flags[c4] |= FLAG_POSSIBLE4;
570 ctx->char_flags[tolower(c1)] |= FLAG_POSSIBLE1;
571 ctx->char_flags[tolower(c2)] |= FLAG_POSSIBLE2;
572 ctx->char_flags[tolower(c3)] |= FLAG_POSSIBLE3;
573 ctx->char_flags[tolower(c4)] |= FLAG_POSSIBLE4;
575 ctx->char_flags[(unsigned char)'.'] |= FLAG_POSSIBLE4;
578 ctx->mangle_modulus = 1;
579 for (i=0;i<(7-ctx->mangle_prefix);i++) {
580 ctx->mangle_modulus *= 36;
585 initialise the mangling code
587 NTSTATUS pvfs_mangle_init(struct pvfs_state *pvfs)
589 struct pvfs_mangle_context *ctx;
591 ctx = talloc_p(pvfs, struct pvfs_mangle_context);
593 return NT_STATUS_NO_MEMORY;
595 ctx->prefix_cache = talloc_array_p(ctx, char *, MANGLE_CACHE_SIZE);
596 if (ctx->prefix_cache == NULL) {
597 return NT_STATUS_NO_MEMORY;
599 ctx->prefix_cache_hashes = talloc_array_p(ctx, uint32_t, MANGLE_CACHE_SIZE);
600 if (ctx->prefix_cache_hashes == NULL) {
601 return NT_STATUS_NO_MEMORY;
604 memset(ctx->prefix_cache, 0, sizeof(char *)*MANGLE_CACHE_SIZE);
605 memset(ctx->prefix_cache_hashes, 0, sizeof(uint32_t)*MANGLE_CACHE_SIZE);
607 ctx->mangle_prefix = lp_parm_int(-1, "mangle", "prefix", -1);
608 if (ctx->mangle_prefix < 0 || ctx->mangle_prefix > 6) {
609 ctx->mangle_prefix = DEFAULT_MANGLE_PREFIX;
614 pvfs->mangle_ctx = ctx;
621 return the short name for a component of a full name
623 char *pvfs_short_name_component(struct pvfs_state *pvfs, const char *name)
625 return name_map(pvfs->mangle_ctx, name, True, True);
630 return the short name for a given entry in a directory
632 const char *pvfs_short_name(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
633 struct pvfs_filename *name)
635 char *p = strrchr(name->full_name, '/');
636 char *ret = pvfs_short_name_component(pvfs, p+1);
640 talloc_steal(mem_ctx, ret);
645 lookup a mangled name, returning the original long name if present
648 char *pvfs_mangled_lookup(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
652 ret = check_cache(pvfs->mangle_ctx, name);
654 return talloc_steal(mem_ctx, ret);
661 look for a DOS reserved name
663 BOOL pvfs_is_reserved_name(struct pvfs_state *pvfs, const char *name)
665 return is_reserved_name(pvfs->mangle_ctx, name);
670 see if a component of a filename could be a mangled name from our
673 BOOL pvfs_is_mangled_component(struct pvfs_state *pvfs, const char *name)
675 return is_mangled_component(pvfs->mangle_ctx, name, strlen(name));