ntdb: create initial database to be multiple of NTDB_PGSIZE.
[ddiss/samba.git] / lib / ntdb / open.c
1  /*
2    Trivial Database 2: opening and closing TDBs
3    Copyright (C) Rusty Russell 2010
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 3 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "private.h"
19 #include <ccan/build_assert/build_assert.h>
20 #include <assert.h>
21
22 /* all tdbs, to detect double-opens (fcntl file don't nest!) */
23 static struct ntdb_context *tdbs = NULL;
24
25 static struct ntdb_file *find_file(dev_t device, ino_t ino)
26 {
27         struct ntdb_context *i;
28
29         for (i = tdbs; i; i = i->next) {
30                 if (i->file->device == device && i->file->inode == ino) {
31                         i->file->refcnt++;
32                         return i->file;
33                 }
34         }
35         return NULL;
36 }
37
38 static bool read_all(int fd, void *buf, size_t len)
39 {
40         while (len) {
41                 ssize_t ret;
42                 ret = read(fd, buf, len);
43                 if (ret < 0)
44                         return false;
45                 if (ret == 0) {
46                         /* ETOOSHORT? */
47                         errno = EWOULDBLOCK;
48                         return false;
49                 }
50                 buf = (char *)buf + ret;
51                 len -= ret;
52         }
53         return true;
54 }
55
56 static uint64_t random_number(struct ntdb_context *ntdb)
57 {
58         int fd;
59         uint64_t ret = 0;
60         struct timeval now;
61
62         fd = open("/dev/urandom", O_RDONLY);
63         if (fd >= 0) {
64                 if (read_all(fd, &ret, sizeof(ret))) {
65                         close(fd);
66                         return ret;
67                 }
68                 close(fd);
69         }
70         /* FIXME: Untested!  Based on Wikipedia protocol description! */
71         fd = open("/dev/egd-pool", O_RDWR);
72         if (fd >= 0) {
73                 /* Command is 1, next byte is size we want to read. */
74                 char cmd[2] = { 1, sizeof(uint64_t) };
75                 if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
76                         char reply[1 + sizeof(uint64_t)];
77                         int r = read(fd, reply, sizeof(reply));
78                         if (r > 1) {
79                                 /* Copy at least some bytes. */
80                                 memcpy(&ret, reply+1, r - 1);
81                                 if (reply[0] == sizeof(uint64_t)
82                                     && r == sizeof(reply)) {
83                                         close(fd);
84                                         return ret;
85                                 }
86                         }
87                 }
88                 close(fd);
89         }
90
91         /* Fallback: pid and time. */
92         gettimeofday(&now, NULL);
93         ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
94         ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
95                    "ntdb_open: random from getpid and time");
96         return ret;
97 }
98
99 static void ntdb_context_init(struct ntdb_context *ntdb)
100 {
101         /* Initialize the NTDB fields here */
102         ntdb_io_init(ntdb);
103         ntdb->direct_access = 0;
104         ntdb->transaction = NULL;
105         ntdb->access = NULL;
106 }
107
108 struct new_database {
109         struct ntdb_header hdr;
110         struct ntdb_freetable ftable;
111         struct ntdb_free_record remainder;
112 };
113
114 /* initialise a new database */
115 static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb,
116                                        struct ntdb_attribute_seed *seed,
117                                        struct ntdb_header *hdr)
118 {
119         /* We make it up in memory, then write it out if not internal */
120         struct new_database *newdb;
121         unsigned int magic_len;
122         ssize_t rlen;
123         size_t dbsize, remaindersize;
124         enum NTDB_ERROR ecode;
125
126         /* Always make db a multiple of NTDB_PGSIZE */
127         dbsize = (sizeof(*newdb) + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1);
128         remaindersize = dbsize - sizeof(*newdb);
129         newdb = malloc(dbsize);
130         if (!newdb) {
131                 return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
132                                    "ntdb_new_database: failed to allocate");
133         }
134
135         /* Fill in the header */
136         newdb->hdr.version = NTDB_VERSION;
137         if (seed)
138                 newdb->hdr.hash_seed = seed->seed;
139         else
140                 newdb->hdr.hash_seed = random_number(ntdb);
141         newdb->hdr.hash_test = NTDB_HASH_MAGIC;
142         newdb->hdr.hash_test = ntdb->hash_fn(&newdb->hdr.hash_test,
143                                            sizeof(newdb->hdr.hash_test),
144                                            newdb->hdr.hash_seed,
145                                            ntdb->hash_data);
146         newdb->hdr.recovery = 0;
147         newdb->hdr.features_used = newdb->hdr.features_offered = NTDB_FEATURE_MASK;
148         newdb->hdr.seqnum = 0;
149         newdb->hdr.capabilities = 0;
150         memset(newdb->hdr.reserved, 0, sizeof(newdb->hdr.reserved));
151         /* Initial hashes are empty. */
152         memset(newdb->hdr.hashtable, 0, sizeof(newdb->hdr.hashtable));
153
154         /* Free is empty. */
155         newdb->hdr.free_table = offsetof(struct new_database, ftable);
156         memset(&newdb->ftable, 0, sizeof(newdb->ftable));
157         ecode = set_header(NULL, &newdb->ftable.hdr, NTDB_FTABLE_MAGIC, 0,
158                            sizeof(newdb->ftable) - sizeof(newdb->ftable.hdr),
159                            sizeof(newdb->ftable) - sizeof(newdb->ftable.hdr),
160                            0);
161         if (ecode != NTDB_SUCCESS) {
162                 goto out;
163         }
164
165         /* Rest of database is a free record, containing junk. */
166         newdb->remainder.ftable_and_len
167                 = (remaindersize + sizeof(newdb->remainder)
168                    - sizeof(struct ntdb_used_record));
169         newdb->remainder.next = 0;
170         newdb->remainder.magic_and_prev
171                 = (NTDB_FREE_MAGIC << (64-NTDB_OFF_UPPER_STEAL))
172                 | offsetof(struct new_database, remainder);
173         memset(&newdb->remainder + 1, 0x43, remaindersize);
174
175         /* Put in our single free entry. */
176         newdb->ftable.buckets[size_to_bucket(remaindersize)] =
177                 offsetof(struct new_database, remainder);
178
179         /* Magic food */
180         memset(newdb->hdr.magic_food, 0, sizeof(newdb->hdr.magic_food));
181         strcpy(newdb->hdr.magic_food, NTDB_MAGIC_FOOD);
182
183         /* This creates an endian-converted database, as if read from disk */
184         magic_len = sizeof(newdb->hdr.magic_food);
185         ntdb_convert(ntdb,
186                      (char *)&newdb->hdr + magic_len,
187                      sizeof(*newdb) - magic_len);
188
189         *hdr = newdb->hdr;
190
191         if (ntdb->flags & NTDB_INTERNAL) {
192                 ntdb->file->map_size = dbsize;
193                 ntdb->file->map_ptr = newdb;
194                 return NTDB_SUCCESS;
195         }
196         if (lseek(ntdb->file->fd, 0, SEEK_SET) == -1) {
197                 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
198                                     "ntdb_new_database:"
199                                     " failed to seek: %s", strerror(errno));
200                 goto out;
201         }
202
203         if (ftruncate(ntdb->file->fd, 0) == -1) {
204                 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
205                                     "ntdb_new_database:"
206                                     " failed to truncate: %s", strerror(errno));
207                 goto out;
208         }
209
210         rlen = write(ntdb->file->fd, newdb, dbsize);
211         if (rlen != dbsize) {
212                 if (rlen >= 0)
213                         errno = ENOSPC;
214                 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
215                                     "ntdb_new_database: %zi writing header: %s",
216                                     rlen, strerror(errno));
217                 goto out;
218         }
219
220 out:
221         free(newdb);
222         return ecode;
223 }
224
225 static enum NTDB_ERROR ntdb_new_file(struct ntdb_context *ntdb)
226 {
227         ntdb->file = malloc(sizeof(*ntdb->file));
228         if (!ntdb->file)
229                 return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
230                                   "ntdb_open: cannot alloc ntdb_file structure");
231         ntdb->file->num_lockrecs = 0;
232         ntdb->file->lockrecs = NULL;
233         ntdb->file->allrecord_lock.count = 0;
234         ntdb->file->refcnt = 1;
235         ntdb->file->map_ptr = NULL;
236         return NTDB_SUCCESS;
237 }
238
239 _PUBLIC_ enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb,
240                                  const union ntdb_attribute *attr)
241 {
242         switch (attr->base.attr) {
243         case NTDB_ATTRIBUTE_LOG:
244                 ntdb->log_fn = attr->log.fn;
245                 ntdb->log_data = attr->log.data;
246                 break;
247         case NTDB_ATTRIBUTE_HASH:
248         case NTDB_ATTRIBUTE_SEED:
249         case NTDB_ATTRIBUTE_OPENHOOK:
250                 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
251                                    NTDB_LOG_USE_ERROR,
252                                    "ntdb_set_attribute:"
253                                    " cannot set %s after opening",
254                                    attr->base.attr == NTDB_ATTRIBUTE_HASH
255                                    ? "NTDB_ATTRIBUTE_HASH"
256                                    : attr->base.attr == NTDB_ATTRIBUTE_SEED
257                                    ? "NTDB_ATTRIBUTE_SEED"
258                                    : "NTDB_ATTRIBUTE_OPENHOOK");
259         case NTDB_ATTRIBUTE_STATS:
260                 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
261                                    NTDB_LOG_USE_ERROR,
262                                    "ntdb_set_attribute:"
263                                    " cannot set NTDB_ATTRIBUTE_STATS");
264         case NTDB_ATTRIBUTE_FLOCK:
265                 ntdb->lock_fn = attr->flock.lock;
266                 ntdb->unlock_fn = attr->flock.unlock;
267                 ntdb->lock_data = attr->flock.data;
268                 break;
269         default:
270                 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
271                                    NTDB_LOG_USE_ERROR,
272                                    "ntdb_set_attribute:"
273                                    " unknown attribute type %u",
274                                    attr->base.attr);
275         }
276         return NTDB_SUCCESS;
277 }
278
279 _PUBLIC_ enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb,
280                                  union ntdb_attribute *attr)
281 {
282         switch (attr->base.attr) {
283         case NTDB_ATTRIBUTE_LOG:
284                 if (!ntdb->log_fn)
285                         return NTDB_ERR_NOEXIST;
286                 attr->log.fn = ntdb->log_fn;
287                 attr->log.data = ntdb->log_data;
288                 break;
289         case NTDB_ATTRIBUTE_HASH:
290                 attr->hash.fn = ntdb->hash_fn;
291                 attr->hash.data = ntdb->hash_data;
292                 break;
293         case NTDB_ATTRIBUTE_SEED:
294                 attr->seed.seed = ntdb->hash_seed;
295                 break;
296         case NTDB_ATTRIBUTE_OPENHOOK:
297                 if (!ntdb->openhook)
298                         return NTDB_ERR_NOEXIST;
299                 attr->openhook.fn = ntdb->openhook;
300                 attr->openhook.data = ntdb->openhook_data;
301                 break;
302         case NTDB_ATTRIBUTE_STATS: {
303                 size_t size = attr->stats.size;
304                 if (size > ntdb->stats.size)
305                         size = ntdb->stats.size;
306                 memcpy(&attr->stats, &ntdb->stats, size);
307                 break;
308         }
309         case NTDB_ATTRIBUTE_FLOCK:
310                 attr->flock.lock = ntdb->lock_fn;
311                 attr->flock.unlock = ntdb->unlock_fn;
312                 attr->flock.data = ntdb->lock_data;
313                 break;
314         default:
315                 return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
316                                    NTDB_LOG_USE_ERROR,
317                                    "ntdb_get_attribute:"
318                                    " unknown attribute type %u",
319                                    attr->base.attr);
320         }
321         attr->base.next = NULL;
322         return NTDB_SUCCESS;
323 }
324
325 _PUBLIC_ void ntdb_unset_attribute(struct ntdb_context *ntdb,
326                          enum ntdb_attribute_type type)
327 {
328         switch (type) {
329         case NTDB_ATTRIBUTE_LOG:
330                 ntdb->log_fn = NULL;
331                 break;
332         case NTDB_ATTRIBUTE_OPENHOOK:
333                 ntdb->openhook = NULL;
334                 break;
335         case NTDB_ATTRIBUTE_HASH:
336         case NTDB_ATTRIBUTE_SEED:
337                 ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
338                            "ntdb_unset_attribute: cannot unset %s after opening",
339                            type == NTDB_ATTRIBUTE_HASH
340                            ? "NTDB_ATTRIBUTE_HASH"
341                            : "NTDB_ATTRIBUTE_SEED");
342                 break;
343         case NTDB_ATTRIBUTE_STATS:
344                 ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
345                            NTDB_LOG_USE_ERROR,
346                            "ntdb_unset_attribute:"
347                            "cannot unset NTDB_ATTRIBUTE_STATS");
348                 break;
349         case NTDB_ATTRIBUTE_FLOCK:
350                 ntdb->lock_fn = ntdb_fcntl_lock;
351                 ntdb->unlock_fn = ntdb_fcntl_unlock;
352                 break;
353         default:
354                 ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
355                            NTDB_LOG_USE_ERROR,
356                            "ntdb_unset_attribute: unknown attribute type %u",
357                            type);
358         }
359 }
360
361 /* The top three bits of the capability tell us whether it matters. */
362 enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller,
363                                   ntdb_off_t type)
364 {
365         if (type & NTDB_CAP_NOOPEN) {
366                 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
367                                   "%s: file has unknown capability %llu",
368                                   caller, type & NTDB_CAP_NOOPEN);
369         }
370
371         if ((type & NTDB_CAP_NOWRITE) && !(ntdb->flags & NTDB_RDONLY)) {
372                 return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_ERROR,
373                                   "%s: file has unknown capability %llu"
374                                   " (cannot write to it)",
375                                   caller, type & NTDB_CAP_NOOPEN);
376         }
377
378         if (type & NTDB_CAP_NOCHECK) {
379                 ntdb->flags |= NTDB_CANT_CHECK;
380         }
381         return NTDB_SUCCESS;
382 }
383
384 static enum NTDB_ERROR capabilities_ok(struct ntdb_context *ntdb,
385                                       ntdb_off_t capabilities)
386 {
387         ntdb_off_t off, next;
388         enum NTDB_ERROR ecode = NTDB_SUCCESS;
389         const struct ntdb_capability *cap;
390
391         /* Check capability list. */
392         for (off = capabilities; off && ecode == NTDB_SUCCESS; off = next) {
393                 cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
394                 if (NTDB_PTR_IS_ERR(cap)) {
395                         return NTDB_PTR_ERR(cap);
396                 }
397
398                 switch (cap->type & NTDB_CAP_TYPE_MASK) {
399                 /* We don't understand any capabilities (yet). */
400                 default:
401                         ecode = unknown_capability(ntdb, "ntdb_open", cap->type);
402                 }
403                 next = cap->next;
404                 ntdb_access_release(ntdb, cap);
405         }
406         return ecode;
407 }
408
409 _PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
410                              int open_flags, mode_t mode,
411                              union ntdb_attribute *attr)
412 {
413         struct ntdb_context *ntdb;
414         struct stat st;
415         int saved_errno = 0;
416         uint64_t hash_test;
417         unsigned v;
418         ssize_t rlen;
419         struct ntdb_header hdr;
420         struct ntdb_attribute_seed *seed = NULL;
421         ntdb_bool_err berr;
422         enum NTDB_ERROR ecode;
423         int openlock;
424
425         ntdb = malloc(sizeof(*ntdb) + (name ? strlen(name) + 1 : 0));
426         if (!ntdb) {
427                 /* Can't log this */
428                 errno = ENOMEM;
429                 return NULL;
430         }
431         /* Set name immediately for logging functions. */
432         if (name) {
433                 ntdb->name = strcpy((char *)(ntdb + 1), name);
434         } else {
435                 ntdb->name = NULL;
436         }
437         ntdb->flags = ntdb_flags;
438         ntdb->log_fn = NULL;
439         ntdb->open_flags = open_flags;
440         ntdb->file = NULL;
441         ntdb->openhook = NULL;
442         ntdb->lock_fn = ntdb_fcntl_lock;
443         ntdb->unlock_fn = ntdb_fcntl_unlock;
444         ntdb->hash_fn = ntdb_jenkins_hash;
445         memset(&ntdb->stats, 0, sizeof(ntdb->stats));
446         ntdb->stats.base.attr = NTDB_ATTRIBUTE_STATS;
447         ntdb->stats.size = sizeof(ntdb->stats);
448
449         while (attr) {
450                 switch (attr->base.attr) {
451                 case NTDB_ATTRIBUTE_HASH:
452                         ntdb->hash_fn = attr->hash.fn;
453                         ntdb->hash_data = attr->hash.data;
454                         break;
455                 case NTDB_ATTRIBUTE_SEED:
456                         seed = &attr->seed;
457                         break;
458                 case NTDB_ATTRIBUTE_OPENHOOK:
459                         ntdb->openhook = attr->openhook.fn;
460                         ntdb->openhook_data = attr->openhook.data;
461                         break;
462                 default:
463                         /* These are set as normal. */
464                         ecode = ntdb_set_attribute(ntdb, attr);
465                         if (ecode != NTDB_SUCCESS)
466                                 goto fail;
467                 }
468                 attr = attr->base.next;
469         }
470
471         if (ntdb_flags & ~(NTDB_INTERNAL | NTDB_NOLOCK | NTDB_NOMMAP | NTDB_CONVERT
472                           | NTDB_NOSYNC | NTDB_SEQNUM | NTDB_ALLOW_NESTING
473                           | NTDB_RDONLY)) {
474                 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
475                                    "ntdb_open: unknown flags %u", ntdb_flags);
476                 goto fail;
477         }
478
479         if (seed) {
480                 if (!(ntdb_flags & NTDB_INTERNAL) && !(open_flags & O_CREAT)) {
481                         ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
482                                            NTDB_LOG_USE_ERROR,
483                                            "ntdb_open:"
484                                            " cannot set NTDB_ATTRIBUTE_SEED"
485                                            " without O_CREAT.");
486                         goto fail;
487                 }
488         }
489
490         if ((open_flags & O_ACCMODE) == O_WRONLY) {
491                 ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
492                                    "ntdb_open: can't open ntdb %s write-only",
493                                    name);
494                 goto fail;
495         }
496
497         if ((open_flags & O_ACCMODE) == O_RDONLY) {
498                 openlock = F_RDLCK;
499                 ntdb->flags |= NTDB_RDONLY;
500         } else {
501                 if (ntdb_flags & NTDB_RDONLY) {
502                         ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
503                                            NTDB_LOG_USE_ERROR,
504                                            "ntdb_open: can't use NTDB_RDONLY"
505                                            " without O_RDONLY");
506                         goto fail;
507                 }
508                 openlock = F_WRLCK;
509         }
510
511         /* internal databases don't need any of the rest. */
512         if (ntdb->flags & NTDB_INTERNAL) {
513                 ntdb->flags |= (NTDB_NOLOCK | NTDB_NOMMAP);
514                 ecode = ntdb_new_file(ntdb);
515                 if (ecode != NTDB_SUCCESS) {
516                         goto fail;
517                 }
518                 ntdb->file->fd = -1;
519                 ecode = ntdb_new_database(ntdb, seed, &hdr);
520                 if (ecode == NTDB_SUCCESS) {
521                         ntdb_convert(ntdb, &hdr.hash_seed,
522                                     sizeof(hdr.hash_seed));
523                         ntdb->hash_seed = hdr.hash_seed;
524                         ntdb_context_init(ntdb);
525                         ntdb_ftable_init(ntdb);
526                 }
527                 if (ecode != NTDB_SUCCESS) {
528                         goto fail;
529                 }
530                 return ntdb;
531         }
532
533         if (stat(name, &st) != -1)
534                 ntdb->file = find_file(st.st_dev, st.st_ino);
535
536         if (!ntdb->file) {
537                 ecode = ntdb_new_file(ntdb);
538                 if (ecode != NTDB_SUCCESS) {
539                         goto fail;
540                 }
541
542                 /* Set this now, as ntdb_nest_lock examines it. */
543                 ntdb->file->map_size = 0;
544
545                 if ((ntdb->file->fd = open(name, open_flags, mode)) == -1) {
546                         enum ntdb_log_level lvl;
547                         /* errno set by open(2) */
548                         saved_errno = errno;
549
550                         /* Probing for files like this is a common pattern. */
551                         if (!(open_flags & O_CREAT) && errno == ENOENT) {
552                                 lvl = NTDB_LOG_WARNING;
553                         } else {
554                                 lvl = NTDB_LOG_ERROR;
555                         }
556                         ntdb_logerr(ntdb, NTDB_ERR_IO, lvl,
557                                    "ntdb_open: could not open file %s: %s",
558                                    name, strerror(errno));
559
560                         goto fail_errno;
561                 }
562
563                 /* ensure there is only one process initialising at once:
564                  * do it immediately to reduce the create/openlock race. */
565                 ecode = ntdb_lock_open(ntdb, openlock,
566                                        NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
567                 if (ecode != NTDB_SUCCESS) {
568                         saved_errno = errno;
569                         goto fail_errno;
570                 }
571
572                 /* on exec, don't inherit the fd */
573                 v = fcntl(ntdb->file->fd, F_GETFD, 0);
574                 fcntl(ntdb->file->fd, F_SETFD, v | FD_CLOEXEC);
575
576                 if (fstat(ntdb->file->fd, &st) == -1) {
577                         saved_errno = errno;
578                         ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
579                                    "ntdb_open: could not stat open %s: %s",
580                                    name, strerror(errno));
581                         goto fail_errno;
582                 }
583
584                 ntdb->file->device = st.st_dev;
585                 ntdb->file->inode = st.st_ino;
586         } else {
587                 /* ensure there is only one process initialising at once */
588                 ecode = ntdb_lock_open(ntdb, openlock,
589                                        NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
590                 if (ecode != NTDB_SUCCESS) {
591                         saved_errno = errno;
592                         goto fail_errno;
593                 }
594         }
595
596         /* call their open hook if they gave us one. */
597         if (ntdb->openhook) {
598                 ecode = ntdb->openhook(ntdb->file->fd, ntdb->openhook_data);
599                 if (ecode != NTDB_SUCCESS) {
600                         ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
601                                    "ntdb_open: open hook failed");
602                         goto fail;
603                 }
604                 open_flags |= O_CREAT;
605         }
606
607         /* If they used O_TRUNC, read will return 0. */
608         rlen = pread(ntdb->file->fd, &hdr, sizeof(hdr), 0);
609         if (rlen == 0 && (open_flags & O_CREAT)) {
610                 ecode = ntdb_new_database(ntdb, seed, &hdr);
611                 if (ecode != NTDB_SUCCESS) {
612                         goto fail;
613                 }
614         } else if (rlen < 0) {
615                 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
616                                    "ntdb_open: error %s reading %s",
617                                    strerror(errno), name);
618                 goto fail;
619         } else if (rlen < sizeof(hdr)
620                    || strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) {
621                 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
622                                    "ntdb_open: %s is not a ntdb file", name);
623                 goto fail;
624         }
625
626         if (hdr.version != NTDB_VERSION) {
627                 if (hdr.version == bswap_64(NTDB_VERSION))
628                         ntdb->flags |= NTDB_CONVERT;
629                 else {
630                         /* wrong version */
631                         ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
632                                            "ntdb_open:"
633                                            " %s is unknown version 0x%llx",
634                                            name, (long long)hdr.version);
635                         goto fail;
636                 }
637         } else if (ntdb->flags & NTDB_CONVERT) {
638                 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
639                                    "ntdb_open:"
640                                    " %s does not need NTDB_CONVERT",
641                                    name);
642                 goto fail;
643         }
644
645         ntdb_context_init(ntdb);
646
647         ntdb_convert(ntdb, &hdr, sizeof(hdr));
648         ntdb->hash_seed = hdr.hash_seed;
649         hash_test = NTDB_HASH_MAGIC;
650         hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test));
651         if (hdr.hash_test != hash_test) {
652                 /* wrong hash variant */
653                 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
654                                    "ntdb_open:"
655                                    " %s uses a different hash function",
656                                    name);
657                 goto fail;
658         }
659
660         ecode = capabilities_ok(ntdb, hdr.capabilities);
661         if (ecode != NTDB_SUCCESS) {
662                 goto fail;
663         }
664
665         /* Clear any features we don't understand. */
666         if ((open_flags & O_ACCMODE) != O_RDONLY) {
667                 hdr.features_used &= NTDB_FEATURE_MASK;
668                 ecode = ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
669                                                         features_used),
670                                           &hdr.features_used,
671                                           sizeof(hdr.features_used));
672                 if (ecode != NTDB_SUCCESS)
673                         goto fail;
674         }
675
676         ntdb_unlock_open(ntdb, openlock);
677
678         /* This makes sure we have current map_size and mmap. */
679         ecode = ntdb->io->oob(ntdb, ntdb->file->map_size, 1, true);
680         if (unlikely(ecode != NTDB_SUCCESS))
681                 goto fail;
682
683         if (ntdb->file->map_size % NTDB_PGSIZE != 0) {
684                 ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
685                                     "ntdb_open:"
686                                     " %s size %llu isn't a multiple of %u",
687                                     name, (long long)ntdb->file->map_size,
688                                     NTDB_PGSIZE);
689                 goto fail;
690         }
691
692         /* Now it's fully formed, recover if necessary. */
693         berr = ntdb_needs_recovery(ntdb);
694         if (unlikely(berr != false)) {
695                 if (berr < 0) {
696                         ecode = NTDB_OFF_TO_ERR(berr);
697                         goto fail;
698                 }
699                 ecode = ntdb_lock_and_recover(ntdb);
700                 if (ecode != NTDB_SUCCESS) {
701                         goto fail;
702                 }
703         }
704
705         ecode = ntdb_ftable_init(ntdb);
706         if (ecode != NTDB_SUCCESS) {
707                 goto fail;
708         }
709
710         ntdb->next = tdbs;
711         tdbs = ntdb;
712         return ntdb;
713
714  fail:
715         /* Map ecode to some logical errno. */
716         switch (NTDB_ERR_TO_OFF(ecode)) {
717         case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT):
718         case NTDB_ERR_TO_OFF(NTDB_ERR_IO):
719                 saved_errno = EIO;
720                 break;
721         case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK):
722                 saved_errno = EWOULDBLOCK;
723                 break;
724         case NTDB_ERR_TO_OFF(NTDB_ERR_OOM):
725                 saved_errno = ENOMEM;
726                 break;
727         case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL):
728                 saved_errno = EINVAL;
729                 break;
730         default:
731                 saved_errno = EINVAL;
732                 break;
733         }
734
735 fail_errno:
736 #ifdef NTDB_TRACE
737         close(ntdb->tracefd);
738 #endif
739         if (ntdb->file) {
740                 ntdb_lock_cleanup(ntdb);
741                 if (--ntdb->file->refcnt == 0) {
742                         assert(ntdb->file->num_lockrecs == 0);
743                         if (ntdb->file->map_ptr) {
744                                 if (ntdb->flags & NTDB_INTERNAL) {
745                                         free(ntdb->file->map_ptr);
746                                 } else
747                                         ntdb_munmap(ntdb->file);
748                         }
749                         if (ntdb->file->fd != -1 && close(ntdb->file->fd) != 0)
750                                 ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
751                                            "ntdb_open: failed to close ntdb fd"
752                                            " on error: %s", strerror(errno));
753                         free(ntdb->file->lockrecs);
754                         free(ntdb->file);
755                 }
756         }
757
758         free(ntdb);
759         errno = saved_errno;
760         return NULL;
761 }
762
763 _PUBLIC_ int ntdb_close(struct ntdb_context *ntdb)
764 {
765         int ret = 0;
766         struct ntdb_context **i;
767
768         ntdb_trace(ntdb, "ntdb_close");
769
770         if (ntdb->transaction) {
771                 ntdb_transaction_cancel(ntdb);
772         }
773
774         if (ntdb->file->map_ptr) {
775                 if (ntdb->flags & NTDB_INTERNAL)
776                         free(ntdb->file->map_ptr);
777                 else
778                         ntdb_munmap(ntdb->file);
779         }
780         if (ntdb->file) {
781                 ntdb_lock_cleanup(ntdb);
782                 if (--ntdb->file->refcnt == 0) {
783                         ret = close(ntdb->file->fd);
784                         free(ntdb->file->lockrecs);
785                         free(ntdb->file);
786                 }
787         }
788
789         /* Remove from tdbs list */
790         for (i = &tdbs; *i; i = &(*i)->next) {
791                 if (*i == ntdb) {
792                         *i = ntdb->next;
793                         break;
794                 }
795         }
796
797 #ifdef NTDB_TRACE
798         close(ntdb->tracefd);
799 #endif
800         free(ntdb);
801
802         return ret;
803 }
804
805 _PUBLIC_ void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p)
806 {
807         struct ntdb_context *i;
808
809         for (i = tdbs; i; i = i->next) {
810                 if (fn(i, p) != 0)
811                         break;
812         }
813 }