2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9 Copyright (C) Rusty Russell 2010
11 ** NOTE! The following LGPL license applies to the ntdb
12 ** library. This does NOT imply that all of Samba is released
15 This library is free software; you can redistribute it and/or
16 modify it under the terms of the GNU Lesser General Public
17 License as published by the Free Software Foundation; either
18 version 3 of the License, or (at your option) any later version.
20 This library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
25 You should have received a copy of the GNU Lesser General Public
26 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include <ccan/likely/likely.h>
31 static void free_old_mmaps(struct ntdb_context *ntdb)
33 struct ntdb_old_mmap *i;
35 assert(ntdb->file->direct_count == 0);
37 while ((i = ntdb->file->old_mmaps) != NULL) {
38 ntdb->file->old_mmaps = i->next;
39 munmap(i->map_ptr, i->map_size);
40 ntdb->free_fn(i, ntdb->alloc_data);
44 enum NTDB_ERROR ntdb_munmap(struct ntdb_context *ntdb)
46 if (ntdb->file->fd == -1) {
50 if (!ntdb->file->map_ptr) {
54 /* We can't unmap now if there are accessors. */
55 if (ntdb->file->direct_count) {
56 struct ntdb_old_mmap *old
57 = ntdb->alloc_fn(ntdb, sizeof(*old), ntdb->alloc_data);
59 return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
60 "ntdb_munmap alloc failed");
62 old->next = ntdb->file->old_mmaps;
63 old->map_ptr = ntdb->file->map_ptr;
64 old->map_size = ntdb->file->map_size;
65 ntdb->file->old_mmaps = old;
67 munmap(ntdb->file->map_ptr, ntdb->file->map_size);
68 ntdb->file->map_ptr = NULL;
73 enum NTDB_ERROR ntdb_mmap(struct ntdb_context *ntdb)
77 if (ntdb->flags & NTDB_INTERNAL)
80 #ifndef HAVE_INCOHERENT_MMAP
81 if (ntdb->flags & NTDB_NOMMAP)
85 if ((ntdb->open_flags & O_ACCMODE) == O_RDONLY)
86 mmap_flags = PROT_READ;
88 mmap_flags = PROT_READ | PROT_WRITE;
90 /* size_t can be smaller than off_t. */
91 if ((size_t)ntdb->file->map_size == ntdb->file->map_size) {
92 ntdb->file->map_ptr = mmap(NULL, ntdb->file->map_size,
94 MAP_SHARED, ntdb->file->fd, 0);
96 ntdb->file->map_ptr = MAP_FAILED;
99 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
101 if (ntdb->file->map_ptr == MAP_FAILED) {
102 ntdb->file->map_ptr = NULL;
103 #ifdef HAVE_INCOHERENT_MMAP
104 /* Incoherent mmap means everyone must mmap! */
105 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
106 "ntdb_mmap failed for size %lld (%s)",
107 (long long)ntdb->file->map_size,
110 ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
111 "ntdb_mmap failed for size %lld (%s)",
112 (long long)ntdb->file->map_size, strerror(errno));
118 /* check for an out of bounds access - if it is out of bounds then
119 see if the database has been expanded by someone else and expand
121 note that "len" is the minimum length needed for the db.
123 If probe is true, len being too large isn't a failure.
125 static enum NTDB_ERROR ntdb_normal_oob(struct ntdb_context *ntdb,
126 ntdb_off_t off, ntdb_len_t len,
130 enum NTDB_ERROR ecode;
132 if (len + off < len) {
136 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
137 "ntdb_oob off %llu len %llu wrap\n",
138 (long long)off, (long long)len);
141 if (ntdb->flags & NTDB_INTERNAL) {
145 ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
146 "ntdb_oob len %lld beyond internal"
148 (long long)(off + len),
149 (long long)ntdb->file->map_size);
153 ecode = ntdb_lock_expand(ntdb, F_RDLCK);
154 if (ecode != NTDB_SUCCESS) {
158 if (fstat(ntdb->file->fd, &st) != 0) {
159 ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
160 "Failed to fstat file: %s", strerror(errno));
161 ntdb_unlock_expand(ntdb, F_RDLCK);
165 ntdb_unlock_expand(ntdb, F_RDLCK);
167 if (st.st_size < off + len) {
171 ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
172 "ntdb_oob len %llu beyond eof at %llu",
173 (long long)(off + len), (long long)st.st_size);
177 /* Unmap, update size, remap */
178 ecode = ntdb_munmap(ntdb);
183 ntdb->file->map_size = st.st_size;
184 return ntdb_mmap(ntdb);
187 /* Endian conversion: we only ever deal with 8 byte quantities */
188 void *ntdb_convert(const struct ntdb_context *ntdb, void *buf, ntdb_len_t size)
190 assert(size % 8 == 0);
191 if (unlikely((ntdb->flags & NTDB_CONVERT)) && buf) {
192 uint64_t i, *p = (uint64_t *)buf;
193 for (i = 0; i < size / 8; i++)
194 p[i] = bswap_64(p[i]);
199 /* Return first non-zero offset in offset array, or end, or -ve error. */
200 /* FIXME: Return the off? */
201 uint64_t ntdb_find_nonzero_off(struct ntdb_context *ntdb,
202 ntdb_off_t base, uint64_t start, uint64_t end)
207 /* Zero vs non-zero is the same unconverted: minor optimization. */
208 val = ntdb_access_read(ntdb, base + start * sizeof(ntdb_off_t),
209 (end - start) * sizeof(ntdb_off_t), false);
210 if (NTDB_PTR_IS_ERR(val)) {
211 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val));
214 for (i = 0; i < (end - start); i++) {
218 ntdb_access_release(ntdb, val);
222 /* Return first zero offset in num offset array, or num, or -ve error. */
223 uint64_t ntdb_find_zero_off(struct ntdb_context *ntdb, ntdb_off_t off,
229 /* Zero vs non-zero is the same unconverted: minor optimization. */
230 val = ntdb_access_read(ntdb, off, num * sizeof(ntdb_off_t), false);
231 if (NTDB_PTR_IS_ERR(val)) {
232 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(val));
235 for (i = 0; i < num; i++) {
239 ntdb_access_release(ntdb, val);
243 enum NTDB_ERROR zero_out(struct ntdb_context *ntdb, ntdb_off_t off, ntdb_len_t len)
245 char buf[8192] = { 0 };
246 void *p = ntdb->io->direct(ntdb, off, len, true);
247 enum NTDB_ERROR ecode = NTDB_SUCCESS;
249 assert(!(ntdb->flags & NTDB_RDONLY));
250 if (NTDB_PTR_IS_ERR(p)) {
251 return NTDB_PTR_ERR(p);
258 unsigned todo = len < sizeof(buf) ? len : sizeof(buf);
259 ecode = ntdb->io->twrite(ntdb, off, buf, todo);
260 if (ecode != NTDB_SUCCESS) {
269 /* write a lump of data at a specified offset */
270 static enum NTDB_ERROR ntdb_write(struct ntdb_context *ntdb, ntdb_off_t off,
271 const void *buf, ntdb_len_t len)
273 enum NTDB_ERROR ecode;
275 if (ntdb->flags & NTDB_RDONLY) {
276 return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
277 "Write to read-only database");
280 ecode = ntdb_oob(ntdb, off, len, false);
281 if (ecode != NTDB_SUCCESS) {
285 if (ntdb->file->map_ptr) {
286 memcpy(off + (char *)ntdb->file->map_ptr, buf, len);
288 #ifdef HAVE_INCOHERENT_MMAP
292 ret = pwrite(ntdb->file->fd, buf, len, off);
294 /* This shouldn't happen: we avoid sparse files. */
298 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
299 "ntdb_write: %zi at %zu len=%zu (%s)",
300 ret, (size_t)off, (size_t)len,
308 /* read a lump of data at a specified offset */
309 static enum NTDB_ERROR ntdb_read(struct ntdb_context *ntdb, ntdb_off_t off,
310 void *buf, ntdb_len_t len)
312 enum NTDB_ERROR ecode;
314 ecode = ntdb_oob(ntdb, off, len, false);
315 if (ecode != NTDB_SUCCESS) {
319 if (ntdb->file->map_ptr) {
320 memcpy(buf, off + (char *)ntdb->file->map_ptr, len);
322 #ifdef HAVE_INCOHERENT_MMAP
325 ssize_t r = pread(ntdb->file->fd, buf, len, off);
327 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
328 "ntdb_read failed with %zi at %zu "
329 "len=%zu (%s) map_size=%zu",
330 r, (size_t)off, (size_t)len,
332 (size_t)ntdb->file->map_size);
339 enum NTDB_ERROR ntdb_write_convert(struct ntdb_context *ntdb, ntdb_off_t off,
340 const void *rec, size_t len)
342 enum NTDB_ERROR ecode;
344 if (unlikely((ntdb->flags & NTDB_CONVERT))) {
345 void *conv = ntdb->alloc_fn(ntdb, len, ntdb->alloc_data);
347 return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
348 "ntdb_write: no memory converting"
351 memcpy(conv, rec, len);
352 ecode = ntdb->io->twrite(ntdb, off,
353 ntdb_convert(ntdb, conv, len), len);
354 ntdb->free_fn(conv, ntdb->alloc_data);
356 ecode = ntdb->io->twrite(ntdb, off, rec, len);
361 enum NTDB_ERROR ntdb_read_convert(struct ntdb_context *ntdb, ntdb_off_t off,
362 void *rec, size_t len)
364 enum NTDB_ERROR ecode = ntdb->io->tread(ntdb, off, rec, len);
365 ntdb_convert(ntdb, rec, len);
369 static void *_ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset,
370 ntdb_len_t len, unsigned int prefix)
373 enum NTDB_ERROR ecode;
375 /* some systems don't like zero length malloc */
376 buf = ntdb->alloc_fn(ntdb, prefix + len ? prefix + len : 1,
379 ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
380 "ntdb_alloc_read alloc failed len=%zu",
381 (size_t)(prefix + len));
382 return NTDB_ERR_PTR(NTDB_ERR_OOM);
384 ecode = ntdb->io->tread(ntdb, offset, buf+prefix, len);
385 if (unlikely(ecode != NTDB_SUCCESS)) {
386 ntdb->free_fn(buf, ntdb->alloc_data);
387 return NTDB_ERR_PTR(ecode);
393 /* read a lump of data, allocating the space for it */
394 void *ntdb_alloc_read(struct ntdb_context *ntdb, ntdb_off_t offset, ntdb_len_t len)
396 return _ntdb_alloc_read(ntdb, offset, len, 0);
399 static enum NTDB_ERROR fill(struct ntdb_context *ntdb,
400 const void *buf, size_t size,
401 ntdb_off_t off, ntdb_len_t len)
404 size_t n = len > size ? size : len;
405 ssize_t ret = pwrite(ntdb->file->fd, buf, n, off);
410 return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
412 " %zi at %zu len=%zu (%s)",
413 ret, (size_t)off, (size_t)len,
422 /* expand a file. we prefer to use ftruncate, as that is what posix
423 says to use for mmap expansion */
424 static enum NTDB_ERROR ntdb_expand_file(struct ntdb_context *ntdb,
428 enum NTDB_ERROR ecode;
430 assert((ntdb->file->map_size + addition) % NTDB_PGSIZE == 0);
431 if (ntdb->flags & NTDB_RDONLY) {
432 return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
433 "Expand on read-only database");
436 if (ntdb->flags & NTDB_INTERNAL) {
437 char *new = ntdb->expand_fn(ntdb->file->map_ptr,
438 ntdb->file->map_size + addition,
441 return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
442 "No memory to expand database");
444 ntdb->file->map_ptr = new;
445 ntdb->file->map_size += addition;
448 /* Unmap before trying to write; old NTDB claimed OpenBSD had
449 * problem with this otherwise. */
450 ecode = ntdb_munmap(ntdb);
455 /* If this fails, we try to fill anyway. */
456 if (ftruncate(ntdb->file->fd, ntdb->file->map_size + addition))
459 /* now fill the file with something. This ensures that the
460 file isn't sparse, which would be very bad if we ran out of
461 disk. This must be done with write, not via mmap */
462 memset(buf, 0x43, sizeof(buf));
463 ecode = fill(ntdb, buf, sizeof(buf), ntdb->file->map_size,
465 if (ecode != NTDB_SUCCESS)
467 ntdb->file->map_size += addition;
468 return ntdb_mmap(ntdb);
472 const void *ntdb_access_read(struct ntdb_context *ntdb,
473 ntdb_off_t off, ntdb_len_t len, bool convert)
477 if (likely(!(ntdb->flags & NTDB_CONVERT))) {
478 ret = ntdb->io->direct(ntdb, off, len, false);
480 if (NTDB_PTR_IS_ERR(ret)) {
485 struct ntdb_access_hdr *hdr;
486 hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr));
487 if (NTDB_PTR_IS_ERR(hdr)) {
490 hdr->next = ntdb->access;
494 ntdb_convert(ntdb, (void *)ret, len);
497 ntdb->file->direct_count++;
503 void *ntdb_access_write(struct ntdb_context *ntdb,
504 ntdb_off_t off, ntdb_len_t len, bool convert)
508 if (ntdb->flags & NTDB_RDONLY) {
509 ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
510 "Write to read-only database");
511 return NTDB_ERR_PTR(NTDB_ERR_RDONLY);
514 if (likely(!(ntdb->flags & NTDB_CONVERT))) {
515 ret = ntdb->io->direct(ntdb, off, len, true);
517 if (NTDB_PTR_IS_ERR(ret)) {
523 struct ntdb_access_hdr *hdr;
524 hdr = _ntdb_alloc_read(ntdb, off, len, sizeof(*hdr));
525 if (NTDB_PTR_IS_ERR(hdr)) {
528 hdr->next = ntdb->access;
532 hdr->convert = convert;
535 ntdb_convert(ntdb, (void *)ret, len);
537 ntdb->file->direct_count++;
542 static struct ntdb_access_hdr **find_hdr(struct ntdb_context *ntdb, const void *p)
544 struct ntdb_access_hdr **hp;
546 for (hp = &ntdb->access; *hp; hp = &(*hp)->next) {
553 void ntdb_access_release(struct ntdb_context *ntdb, const void *p)
555 struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p);
560 ntdb->free_fn(hdr, ntdb->alloc_data);
562 if (--ntdb->file->direct_count == 0) {
563 free_old_mmaps(ntdb);
568 enum NTDB_ERROR ntdb_access_commit(struct ntdb_context *ntdb, void *p)
570 struct ntdb_access_hdr *hdr, **hp = find_hdr(ntdb, p);
571 enum NTDB_ERROR ecode;
576 ecode = ntdb_write_convert(ntdb, hdr->off, p, hdr->len);
578 ecode = ntdb_write(ntdb, hdr->off, p, hdr->len);
580 ntdb->free_fn(hdr, ntdb->alloc_data);
582 if (--ntdb->file->direct_count == 0) {
583 free_old_mmaps(ntdb);
585 ecode = NTDB_SUCCESS;
591 static void *ntdb_direct(struct ntdb_context *ntdb, ntdb_off_t off, size_t len,
594 enum NTDB_ERROR ecode;
596 if (unlikely(!ntdb->file->map_ptr))
599 ecode = ntdb_oob(ntdb, off, len, false);
600 if (unlikely(ecode != NTDB_SUCCESS))
601 return NTDB_ERR_PTR(ecode);
602 return (char *)ntdb->file->map_ptr + off;
605 static ntdb_off_t ntdb_read_normal_off(struct ntdb_context *ntdb,
609 enum NTDB_ERROR ecode;
612 p = ntdb_direct(ntdb, off, sizeof(*p), false);
613 if (NTDB_PTR_IS_ERR(p)) {
614 return NTDB_ERR_TO_OFF(NTDB_PTR_ERR(p));
620 ecode = ntdb_read(ntdb, off, &ret, sizeof(ret));
621 if (ecode != NTDB_SUCCESS) {
622 return NTDB_ERR_TO_OFF(ecode);
627 static ntdb_off_t ntdb_read_convert_off(struct ntdb_context *ntdb,
631 enum NTDB_ERROR ecode;
633 ecode = ntdb_read_convert(ntdb, off, &ret, sizeof(ret));
634 if (ecode != NTDB_SUCCESS) {
635 return NTDB_ERR_TO_OFF(ecode);
640 static enum NTDB_ERROR ntdb_write_normal_off(struct ntdb_context *ntdb,
641 ntdb_off_t off, ntdb_off_t val)
645 p = ntdb_direct(ntdb, off, sizeof(*p), true);
646 if (NTDB_PTR_IS_ERR(p)) {
647 return NTDB_PTR_ERR(p);
653 return ntdb_write(ntdb, off, &val, sizeof(val));
656 static enum NTDB_ERROR ntdb_write_convert_off(struct ntdb_context *ntdb,
657 ntdb_off_t off, ntdb_off_t val)
659 return ntdb_write_convert(ntdb, off, &val, sizeof(val));
662 void ntdb_inc_seqnum(struct ntdb_context *ntdb)
666 if (likely(!(ntdb->flags & NTDB_CONVERT))) {
669 direct = ntdb->io->direct(ntdb,
670 offsetof(struct ntdb_header, seqnum),
671 sizeof(*direct), true);
672 if (likely(direct)) {
673 /* Don't let it go negative, even briefly */
674 if (unlikely((*direct) + 1) < 0)
681 seq = ntdb_read_off(ntdb, offsetof(struct ntdb_header, seqnum));
682 if (!NTDB_OFF_IS_ERR(seq)) {
684 if (unlikely((int64_t)seq < 0))
686 ntdb_write_off(ntdb, offsetof(struct ntdb_header, seqnum), seq);
690 static const struct ntdb_methods io_methods = {
696 ntdb_read_normal_off,
697 ntdb_write_normal_off,
700 static const struct ntdb_methods io_convert_methods = {
706 ntdb_read_convert_off,
707 ntdb_write_convert_off,
711 initialise the default methods table
713 void ntdb_io_init(struct ntdb_context *ntdb)
715 if (ntdb->flags & NTDB_CONVERT)
716 ntdb->io = &io_convert_methods;
718 ntdb->io = &io_methods;