023dc7fe7d5dc64e3f80047545a8e7c491ca0087
[ddiss/samba.git] / lib / tdb2 / open.c
1  /*
2    Trivial Database 2: opening and closing TDBs
3    Copyright (C) Rusty Russell 2010
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 3 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "private.h"
19 #include <ccan/build_assert/build_assert.h>
20 #include <assert.h>
21
22 /* all tdbs, to detect double-opens (fcntl file don't nest!) */
23 static struct tdb_context *tdbs = NULL;
24
25 static struct tdb_file *find_file(dev_t device, ino_t ino)
26 {
27         struct tdb_context *i;
28
29         for (i = tdbs; i; i = i->next) {
30                 if (i->file->device == device && i->file->inode == ino) {
31                         i->file->refcnt++;
32                         return i->file;
33                 }
34         }
35         return NULL;
36 }
37
38 static bool read_all(int fd, void *buf, size_t len)
39 {
40         while (len) {
41                 ssize_t ret;
42                 ret = read(fd, buf, len);
43                 if (ret < 0)
44                         return false;
45                 if (ret == 0) {
46                         /* ETOOSHORT? */
47                         errno = EWOULDBLOCK;
48                         return false;
49                 }
50                 buf = (char *)buf + ret;
51                 len -= ret;
52         }
53         return true;
54 }
55
56 static uint64_t random_number(struct tdb_context *tdb)
57 {
58         int fd;
59         uint64_t ret = 0;
60         struct timeval now;
61
62         fd = open("/dev/urandom", O_RDONLY);
63         if (fd >= 0) {
64                 if (read_all(fd, &ret, sizeof(ret))) {
65                         close(fd);
66                         return ret;
67                 }
68                 close(fd);
69         }
70         /* FIXME: Untested!  Based on Wikipedia protocol description! */
71         fd = open("/dev/egd-pool", O_RDWR);
72         if (fd >= 0) {
73                 /* Command is 1, next byte is size we want to read. */
74                 char cmd[2] = { 1, sizeof(uint64_t) };
75                 if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
76                         char reply[1 + sizeof(uint64_t)];
77                         int r = read(fd, reply, sizeof(reply));
78                         if (r > 1) {
79                                 /* Copy at least some bytes. */
80                                 memcpy(&ret, reply+1, r - 1);
81                                 if (reply[0] == sizeof(uint64_t)
82                                     && r == sizeof(reply)) {
83                                         close(fd);
84                                         return ret;
85                                 }
86                         }
87                 }
88                 close(fd);
89         }
90
91         /* Fallback: pid and time. */
92         gettimeofday(&now, NULL);
93         ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
94         tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
95                    "tdb_open: random from getpid and time");
96         return ret;
97 }
98
99 static void tdb2_context_init(struct tdb_context *tdb)
100 {
101         /* Initialize the TDB2 fields here */
102         tdb_io_init(tdb);
103         tdb->tdb2.direct_access = 0;
104         tdb->tdb2.transaction = NULL;
105         tdb->tdb2.access = NULL;
106 }
107
108 struct new_database {
109         struct tdb_header hdr;
110         struct tdb_freetable ftable;
111 };
112
113 /* initialise a new database */
114 static enum TDB_ERROR tdb_new_database(struct tdb_context *tdb,
115                                        struct tdb_attribute_seed *seed,
116                                        struct tdb_header *hdr)
117 {
118         /* We make it up in memory, then write it out if not internal */
119         struct new_database newdb;
120         unsigned int magic_len;
121         ssize_t rlen;
122         enum TDB_ERROR ecode;
123
124         /* Fill in the header */
125         newdb.hdr.version = TDB_VERSION;
126         if (seed)
127                 newdb.hdr.hash_seed = seed->seed;
128         else
129                 newdb.hdr.hash_seed = random_number(tdb);
130         newdb.hdr.hash_test = TDB_HASH_MAGIC;
131         newdb.hdr.hash_test = tdb->hash_fn(&newdb.hdr.hash_test,
132                                            sizeof(newdb.hdr.hash_test),
133                                            newdb.hdr.hash_seed,
134                                            tdb->hash_data);
135         newdb.hdr.recovery = 0;
136         newdb.hdr.features_used = newdb.hdr.features_offered = TDB_FEATURE_MASK;
137         newdb.hdr.seqnum = 0;
138         newdb.hdr.capabilities = 0;
139         memset(newdb.hdr.reserved, 0, sizeof(newdb.hdr.reserved));
140         /* Initial hashes are empty. */
141         memset(newdb.hdr.hashtable, 0, sizeof(newdb.hdr.hashtable));
142
143         /* Free is empty. */
144         newdb.hdr.free_table = offsetof(struct new_database, ftable);
145         memset(&newdb.ftable, 0, sizeof(newdb.ftable));
146         ecode = set_header(NULL, &newdb.ftable.hdr, TDB_FTABLE_MAGIC, 0,
147                            sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
148                            sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr),
149                            0);
150         if (ecode != TDB_SUCCESS) {
151                 return ecode;
152         }
153
154         /* Magic food */
155         memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
156         strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD);
157
158         /* This creates an endian-converted database, as if read from disk */
159         magic_len = sizeof(newdb.hdr.magic_food);
160         tdb_convert(tdb,
161                     (char *)&newdb.hdr + magic_len, sizeof(newdb) - magic_len);
162
163         *hdr = newdb.hdr;
164
165         if (tdb->flags & TDB_INTERNAL) {
166                 tdb->file->map_size = sizeof(newdb);
167                 tdb->file->map_ptr = malloc(tdb->file->map_size);
168                 if (!tdb->file->map_ptr) {
169                         return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
170                                           "tdb_new_database:"
171                                           " failed to allocate");
172                 }
173                 memcpy(tdb->file->map_ptr, &newdb, tdb->file->map_size);
174                 return TDB_SUCCESS;
175         }
176         if (lseek(tdb->file->fd, 0, SEEK_SET) == -1) {
177                 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
178                                   "tdb_new_database:"
179                                   " failed to seek: %s", strerror(errno));
180         }
181
182         if (ftruncate(tdb->file->fd, 0) == -1) {
183                 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
184                                   "tdb_new_database:"
185                                   " failed to truncate: %s", strerror(errno));
186         }
187
188         rlen = write(tdb->file->fd, &newdb, sizeof(newdb));
189         if (rlen != sizeof(newdb)) {
190                 if (rlen >= 0)
191                         errno = ENOSPC;
192                 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
193                                   "tdb_new_database: %zi writing header: %s",
194                                   rlen, strerror(errno));
195         }
196         return TDB_SUCCESS;
197 }
198
199 static enum TDB_ERROR tdb_new_file(struct tdb_context *tdb)
200 {
201         tdb->file = malloc(sizeof(*tdb->file));
202         if (!tdb->file)
203                 return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
204                                   "tdb_open: cannot alloc tdb_file structure");
205         tdb->file->num_lockrecs = 0;
206         tdb->file->lockrecs = NULL;
207         tdb->file->allrecord_lock.count = 0;
208         tdb->file->refcnt = 1;
209         tdb->file->map_ptr = NULL;
210         return TDB_SUCCESS;
211 }
212
213 _PUBLIC_ enum TDB_ERROR tdb_set_attribute(struct tdb_context *tdb,
214                                  const union tdb_attribute *attr)
215 {
216         switch (attr->base.attr) {
217         case TDB_ATTRIBUTE_LOG:
218                 tdb->log_fn = attr->log.fn;
219                 tdb->log_data = attr->log.data;
220                 break;
221         case TDB_ATTRIBUTE_HASH:
222         case TDB_ATTRIBUTE_SEED:
223         case TDB_ATTRIBUTE_OPENHOOK:
224                 return tdb->last_error
225                         = tdb_logerr(tdb, TDB_ERR_EINVAL,
226                                      TDB_LOG_USE_ERROR,
227                                      "tdb_set_attribute:"
228                                      " cannot set %s after opening",
229                                      attr->base.attr == TDB_ATTRIBUTE_HASH
230                                      ? "TDB_ATTRIBUTE_HASH"
231                                      : attr->base.attr == TDB_ATTRIBUTE_SEED
232                                      ? "TDB_ATTRIBUTE_SEED"
233                                      : "TDB_ATTRIBUTE_OPENHOOK");
234         case TDB_ATTRIBUTE_STATS:
235                 return tdb->last_error
236                         = tdb_logerr(tdb, TDB_ERR_EINVAL,
237                                      TDB_LOG_USE_ERROR,
238                                      "tdb_set_attribute:"
239                                      " cannot set TDB_ATTRIBUTE_STATS");
240         case TDB_ATTRIBUTE_FLOCK:
241                 tdb->lock_fn = attr->flock.lock;
242                 tdb->unlock_fn = attr->flock.unlock;
243                 tdb->lock_data = attr->flock.data;
244                 break;
245         default:
246                 return tdb->last_error
247                         = tdb_logerr(tdb, TDB_ERR_EINVAL,
248                                      TDB_LOG_USE_ERROR,
249                                      "tdb_set_attribute:"
250                                      " unknown attribute type %u",
251                                      attr->base.attr);
252         }
253         return TDB_SUCCESS;
254 }
255
256 _PUBLIC_ enum TDB_ERROR tdb_get_attribute(struct tdb_context *tdb,
257                                  union tdb_attribute *attr)
258 {
259         switch (attr->base.attr) {
260         case TDB_ATTRIBUTE_LOG:
261                 if (!tdb->log_fn)
262                         return tdb->last_error = TDB_ERR_NOEXIST;
263                 attr->log.fn = tdb->log_fn;
264                 attr->log.data = tdb->log_data;
265                 break;
266         case TDB_ATTRIBUTE_HASH:
267                 attr->hash.fn = tdb->hash_fn;
268                 attr->hash.data = tdb->hash_data;
269                 break;
270         case TDB_ATTRIBUTE_SEED:
271                 attr->seed.seed = tdb->hash_seed;
272                 break;
273         case TDB_ATTRIBUTE_OPENHOOK:
274                 if (!tdb->openhook)
275                         return tdb->last_error = TDB_ERR_NOEXIST;
276                 attr->openhook.fn = tdb->openhook;
277                 attr->openhook.data = tdb->openhook_data;
278                 break;
279         case TDB_ATTRIBUTE_STATS: {
280                 size_t size = attr->stats.size;
281                 if (size > tdb->stats.size)
282                         size = tdb->stats.size;
283                 memcpy(&attr->stats, &tdb->stats, size);
284                 break;
285         }
286         case TDB_ATTRIBUTE_FLOCK:
287                 attr->flock.lock = tdb->lock_fn;
288                 attr->flock.unlock = tdb->unlock_fn;
289                 attr->flock.data = tdb->lock_data;
290                 break;
291         default:
292                 return tdb->last_error
293                         = tdb_logerr(tdb, TDB_ERR_EINVAL,
294                                      TDB_LOG_USE_ERROR,
295                                      "tdb_get_attribute:"
296                                      " unknown attribute type %u",
297                                      attr->base.attr);
298         }
299         attr->base.next = NULL;
300         return TDB_SUCCESS;
301 }
302
303 _PUBLIC_ void tdb_unset_attribute(struct tdb_context *tdb,
304                          enum tdb_attribute_type type)
305 {
306         switch (type) {
307         case TDB_ATTRIBUTE_LOG:
308                 tdb->log_fn = NULL;
309                 break;
310         case TDB_ATTRIBUTE_OPENHOOK:
311                 tdb->openhook = NULL;
312                 break;
313         case TDB_ATTRIBUTE_HASH:
314         case TDB_ATTRIBUTE_SEED:
315                 tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
316                            "tdb_unset_attribute: cannot unset %s after opening",
317                            type == TDB_ATTRIBUTE_HASH
318                            ? "TDB_ATTRIBUTE_HASH"
319                            : "TDB_ATTRIBUTE_SEED");
320                 break;
321         case TDB_ATTRIBUTE_STATS:
322                 tdb_logerr(tdb, TDB_ERR_EINVAL,
323                            TDB_LOG_USE_ERROR,
324                            "tdb_unset_attribute:"
325                            "cannot unset TDB_ATTRIBUTE_STATS");
326                 break;
327         case TDB_ATTRIBUTE_FLOCK:
328                 tdb->lock_fn = tdb_fcntl_lock;
329                 tdb->unlock_fn = tdb_fcntl_unlock;
330                 break;
331         default:
332                 tdb_logerr(tdb, TDB_ERR_EINVAL,
333                            TDB_LOG_USE_ERROR,
334                            "tdb_unset_attribute: unknown attribute type %u",
335                            type);
336         }
337 }
338
339 /* The top three bits of the capability tell us whether it matters. */
340 enum TDB_ERROR unknown_capability(struct tdb_context *tdb, const char *caller,
341                                   tdb_off_t type)
342 {
343         if (type & TDB_CAP_NOOPEN) {
344                 return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
345                                   "%s: file has unknown capability %llu",
346                                   caller, type & TDB_CAP_NOOPEN);
347         }
348
349         if ((type & TDB_CAP_NOWRITE) && !(tdb->flags & TDB_RDONLY)) {
350                 return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_ERROR,
351                                   "%s: file has unknown capability %llu"
352                                   " (cannot write to it)",
353                                   caller, type & TDB_CAP_NOOPEN);
354         }
355
356         if (type & TDB_CAP_NOCHECK) {
357                 tdb->flags |= TDB_CANT_CHECK;
358         }
359         return TDB_SUCCESS;
360 }
361
362 static enum TDB_ERROR capabilities_ok(struct tdb_context *tdb,
363                                       tdb_off_t capabilities)
364 {
365         tdb_off_t off, next;
366         enum TDB_ERROR ecode = TDB_SUCCESS;
367         const struct tdb_capability *cap;
368
369         /* Check capability list. */
370         for (off = capabilities; off && ecode == TDB_SUCCESS; off = next) {
371                 cap = tdb_access_read(tdb, off, sizeof(*cap), true);
372                 if (TDB_PTR_IS_ERR(cap)) {
373                         return TDB_PTR_ERR(cap);
374                 }
375
376                 switch (cap->type & TDB_CAP_TYPE_MASK) {
377                 /* We don't understand any capabilities (yet). */
378                 default:
379                         ecode = unknown_capability(tdb, "tdb_open", cap->type);
380                 }
381                 next = cap->next;
382                 tdb_access_release(tdb, cap);
383         }
384         return ecode;
385 }
386
387 _PUBLIC_ struct tdb_context *tdb_open(const char *name, int tdb_flags,
388                              int open_flags, mode_t mode,
389                              union tdb_attribute *attr)
390 {
391         struct tdb_context *tdb;
392         struct stat st;
393         int saved_errno = 0;
394         uint64_t hash_test;
395         unsigned v;
396         ssize_t rlen;
397         struct tdb_header hdr;
398         struct tdb_attribute_seed *seed = NULL;
399         tdb_bool_err berr;
400         enum TDB_ERROR ecode;
401         int openlock;
402
403         tdb = malloc(sizeof(*tdb) + (name ? strlen(name) + 1 : 0));
404         if (!tdb) {
405                 /* Can't log this */
406                 errno = ENOMEM;
407                 return NULL;
408         }
409         /* Set name immediately for logging functions. */
410         if (name) {
411                 tdb->name = strcpy((char *)(tdb + 1), name);
412         } else {
413                 tdb->name = NULL;
414         }
415         tdb->flags = tdb_flags;
416         tdb->log_fn = NULL;
417         tdb->open_flags = open_flags;
418         tdb->last_error = TDB_SUCCESS;
419         tdb->file = NULL;
420         tdb->openhook = NULL;
421         tdb->lock_fn = tdb_fcntl_lock;
422         tdb->unlock_fn = tdb_fcntl_unlock;
423         tdb->hash_fn = tdb_jenkins_hash;
424         memset(&tdb->stats, 0, sizeof(tdb->stats));
425         tdb->stats.base.attr = TDB_ATTRIBUTE_STATS;
426         tdb->stats.size = sizeof(tdb->stats);
427
428         while (attr) {
429                 switch (attr->base.attr) {
430                 case TDB_ATTRIBUTE_HASH:
431                         tdb->hash_fn = attr->hash.fn;
432                         tdb->hash_data = attr->hash.data;
433                         break;
434                 case TDB_ATTRIBUTE_SEED:
435                         seed = &attr->seed;
436                         break;
437                 case TDB_ATTRIBUTE_OPENHOOK:
438                         tdb->openhook = attr->openhook.fn;
439                         tdb->openhook_data = attr->openhook.data;
440                         break;
441                 default:
442                         /* These are set as normal. */
443                         ecode = tdb_set_attribute(tdb, attr);
444                         if (ecode != TDB_SUCCESS)
445                                 goto fail;
446                 }
447                 attr = attr->base.next;
448         }
449
450         if (tdb_flags & ~(TDB_INTERNAL | TDB_NOLOCK | TDB_NOMMAP | TDB_CONVERT
451                           | TDB_NOSYNC | TDB_SEQNUM | TDB_ALLOW_NESTING
452                           | TDB_RDONLY)) {
453                 ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
454                                    "tdb_open: unknown flags %u", tdb_flags);
455                 goto fail;
456         }
457
458         if (seed) {
459                 if (!(tdb_flags & TDB_INTERNAL) && !(open_flags & O_CREAT)) {
460                         ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
461                                            TDB_LOG_USE_ERROR,
462                                            "tdb_open:"
463                                            " cannot set TDB_ATTRIBUTE_SEED"
464                                            " without O_CREAT.");
465                         goto fail;
466                 }
467         }
468
469         if ((open_flags & O_ACCMODE) == O_WRONLY) {
470                 ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR,
471                                    "tdb_open: can't open tdb %s write-only",
472                                    name);
473                 goto fail;
474         }
475
476         if ((open_flags & O_ACCMODE) == O_RDONLY) {
477                 openlock = F_RDLCK;
478                 tdb->flags |= TDB_RDONLY;
479         } else {
480                 if (tdb_flags & TDB_RDONLY) {
481                         ecode = tdb_logerr(tdb, TDB_ERR_EINVAL,
482                                            TDB_LOG_USE_ERROR,
483                                            "tdb_open: can't use TDB_RDONLY"
484                                            " without O_RDONLY");
485                         goto fail;
486                 }
487                 openlock = F_WRLCK;
488         }
489
490         /* internal databases don't need any of the rest. */
491         if (tdb->flags & TDB_INTERNAL) {
492                 tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
493                 ecode = tdb_new_file(tdb);
494                 if (ecode != TDB_SUCCESS) {
495                         goto fail;
496                 }
497                 tdb->file->fd = -1;
498                 ecode = tdb_new_database(tdb, seed, &hdr);
499                 if (ecode == TDB_SUCCESS) {
500                         tdb_convert(tdb, &hdr.hash_seed,
501                                     sizeof(hdr.hash_seed));
502                         tdb->hash_seed = hdr.hash_seed;
503                         tdb2_context_init(tdb);
504                         tdb_ftable_init(tdb);
505                 }
506                 if (ecode != TDB_SUCCESS) {
507                         goto fail;
508                 }
509                 return tdb;
510         }
511
512         if (stat(name, &st) != -1)
513                 tdb->file = find_file(st.st_dev, st.st_ino);
514
515         if (!tdb->file) {
516                 int fd;
517
518                 if ((fd = open(name, open_flags, mode)) == -1) {
519                         /* errno set by open(2) */
520                         saved_errno = errno;
521                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
522                                    "tdb_open: could not open file %s: %s",
523                                    name, strerror(errno));
524                         goto fail_errno;
525                 }
526
527                 /* on exec, don't inherit the fd */
528                 v = fcntl(fd, F_GETFD, 0);
529                 fcntl(fd, F_SETFD, v | FD_CLOEXEC);
530
531                 if (fstat(fd, &st) == -1) {
532                         saved_errno = errno;
533                         tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
534                                    "tdb_open: could not stat open %s: %s",
535                                    name, strerror(errno));
536                         close(fd);
537                         goto fail_errno;
538                 }
539
540                 ecode = tdb_new_file(tdb);
541                 if (ecode != TDB_SUCCESS) {
542                         close(fd);
543                         goto fail;
544                 }
545
546                 tdb->file->fd = fd;
547                 tdb->file->device = st.st_dev;
548                 tdb->file->inode = st.st_ino;
549                 tdb->file->map_ptr = NULL;
550                 tdb->file->map_size = 0;
551         }
552
553         /* ensure there is only one process initialising at once */
554         ecode = tdb_lock_open(tdb, openlock, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
555         if (ecode != TDB_SUCCESS) {
556                 saved_errno = errno;
557                 goto fail_errno;
558         }
559
560         /* call their open hook if they gave us one. */
561         if (tdb->openhook) {
562                 ecode = tdb->openhook(tdb->file->fd, tdb->openhook_data);
563                 if (ecode != TDB_SUCCESS) {
564                         tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
565                                    "tdb_open: open hook failed");
566                         goto fail;
567                 }
568                 open_flags |= O_CREAT;
569         }
570
571         /* If they used O_TRUNC, read will return 0. */
572         rlen = pread(tdb->file->fd, &hdr, sizeof(hdr), 0);
573         if (rlen == 0 && (open_flags & O_CREAT)) {
574                 ecode = tdb_new_database(tdb, seed, &hdr);
575                 if (ecode != TDB_SUCCESS) {
576                         goto fail;
577                 }
578         } else if (rlen < 0) {
579                 ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
580                                    "tdb_open: error %s reading %s",
581                                    strerror(errno), name);
582                 goto fail;
583         } else if (rlen < sizeof(hdr)
584                    || strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
585                 ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
586                                    "tdb_open: %s is not a tdb2 file", name);
587                 goto fail;
588         }
589
590         if (hdr.version != TDB_VERSION) {
591                 if (hdr.version == bswap_64(TDB_VERSION))
592                         tdb->flags |= TDB_CONVERT;
593                 else {
594                         /* wrong version */
595                         ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
596                                            "tdb_open:"
597                                            " %s is unknown version 0x%llx",
598                                            name, (long long)hdr.version);
599                         goto fail;
600                 }
601         } else if (tdb->flags & TDB_CONVERT) {
602                 ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
603                                    "tdb_open:"
604                                    " %s does not need TDB_CONVERT",
605                                    name);
606                 goto fail;
607         }
608
609         tdb2_context_init(tdb);
610
611         tdb_convert(tdb, &hdr, sizeof(hdr));
612         tdb->hash_seed = hdr.hash_seed;
613         hash_test = TDB_HASH_MAGIC;
614         hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
615         if (hdr.hash_test != hash_test) {
616                 /* wrong hash variant */
617                 ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
618                                    "tdb_open:"
619                                    " %s uses a different hash function",
620                                    name);
621                 goto fail;
622         }
623
624         ecode = capabilities_ok(tdb, hdr.capabilities);
625         if (ecode != TDB_SUCCESS) {
626                 goto fail;
627         }
628
629         /* Clear any features we don't understand. */
630         if ((open_flags & O_ACCMODE) != O_RDONLY) {
631                 hdr.features_used &= TDB_FEATURE_MASK;
632                 ecode = tdb_write_convert(tdb, offsetof(struct tdb_header,
633                                                         features_used),
634                                           &hdr.features_used,
635                                           sizeof(hdr.features_used));
636                 if (ecode != TDB_SUCCESS)
637                         goto fail;
638         }
639
640         tdb_unlock_open(tdb, openlock);
641
642         /* This makes sure we have current map_size and mmap. */
643         ecode = tdb->tdb2.io->oob(tdb, tdb->file->map_size, 1, true);
644         if (unlikely(ecode != TDB_SUCCESS))
645                 goto fail;
646
647         /* Now it's fully formed, recover if necessary. */
648         berr = tdb_needs_recovery(tdb);
649         if (unlikely(berr != false)) {
650                 if (berr < 0) {
651                         ecode = TDB_OFF_TO_ERR(berr);
652                         goto fail;
653                 }
654                 ecode = tdb_lock_and_recover(tdb);
655                 if (ecode != TDB_SUCCESS) {
656                         goto fail;
657                 }
658         }
659
660         ecode = tdb_ftable_init(tdb);
661         if (ecode != TDB_SUCCESS) {
662                 goto fail;
663         }
664
665         tdb->next = tdbs;
666         tdbs = tdb;
667         return tdb;
668
669  fail:
670         /* Map ecode to some logical errno. */
671         switch (TDB_ERR_TO_OFF(ecode)) {
672         case TDB_ERR_TO_OFF(TDB_ERR_CORRUPT):
673         case TDB_ERR_TO_OFF(TDB_ERR_IO):
674                 saved_errno = EIO;
675                 break;
676         case TDB_ERR_TO_OFF(TDB_ERR_LOCK):
677                 saved_errno = EWOULDBLOCK;
678                 break;
679         case TDB_ERR_TO_OFF(TDB_ERR_OOM):
680                 saved_errno = ENOMEM;
681                 break;
682         case TDB_ERR_TO_OFF(TDB_ERR_EINVAL):
683                 saved_errno = EINVAL;
684                 break;
685         default:
686                 saved_errno = EINVAL;
687                 break;
688         }
689
690 fail_errno:
691 #ifdef TDB_TRACE
692         close(tdb->tracefd);
693 #endif
694         if (tdb->file) {
695                 tdb_lock_cleanup(tdb);
696                 if (--tdb->file->refcnt == 0) {
697                         assert(tdb->file->num_lockrecs == 0);
698                         if (tdb->file->map_ptr) {
699                                 if (tdb->flags & TDB_INTERNAL) {
700                                         free(tdb->file->map_ptr);
701                                 } else
702                                         tdb_munmap(tdb->file);
703                         }
704                         if (close(tdb->file->fd) != 0)
705                                 tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
706                                            "tdb_open: failed to close tdb fd"
707                                            " on error: %s", strerror(errno));
708                         free(tdb->file->lockrecs);
709                         free(tdb->file);
710                 }
711         }
712
713         free(tdb);
714         errno = saved_errno;
715         return NULL;
716 }
717
718 _PUBLIC_ int tdb_close(struct tdb_context *tdb)
719 {
720         int ret = 0;
721         struct tdb_context **i;
722
723         tdb_trace(tdb, "tdb_close");
724
725         if (tdb->tdb2.transaction) {
726                 tdb_transaction_cancel(tdb);
727         }
728
729         if (tdb->file->map_ptr) {
730                 if (tdb->flags & TDB_INTERNAL)
731                         free(tdb->file->map_ptr);
732                 else
733                         tdb_munmap(tdb->file);
734         }
735         if (tdb->file) {
736                 tdb_lock_cleanup(tdb);
737                 if (--tdb->file->refcnt == 0) {
738                         ret = close(tdb->file->fd);
739                         free(tdb->file->lockrecs);
740                         free(tdb->file);
741                 }
742         }
743
744         /* Remove from tdbs list */
745         for (i = &tdbs; *i; i = &(*i)->next) {
746                 if (*i == tdb) {
747                         *i = tdb->next;
748                         break;
749                 }
750         }
751
752 #ifdef TDB_TRACE
753         close(tdb->tracefd);
754 #endif
755         free(tdb);
756
757         return ret;
758 }
759
760 _PUBLIC_ void tdb_foreach_(int (*fn)(struct tdb_context *, void *), void *p)
761 {
762         struct tdb_context *i;
763
764         for (i = tdbs; i; i = i->next) {
765                 if (fn(i, p) != 0)
766                         break;
767         }
768 }