2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 _PUBLIC_ TDB_DATA tdb_null;
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 _PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
40 if (!(tdb->flags & TDB_SEQNUM)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
62 if (tdb->transaction != NULL) {
63 tdb_increment_seqnum_nonblock(tdb);
67 if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
68 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
72 tdb_increment_seqnum_nonblock(tdb);
74 tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
77 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
79 return memcmp(data.dptr, key.dptr, data.dsize);
82 void tdb_chainwalk_init(struct tdb_chainwalk_ctx *ctx, tdb_off_t ptr)
84 *ctx = (struct tdb_chainwalk_ctx) { .slow_ptr = ptr };
87 bool tdb_chainwalk_check(struct tdb_context *tdb,
88 struct tdb_chainwalk_ctx *ctx,
93 if (ctx->slow_chase) {
94 ret = tdb_ofs_read(tdb, ctx->slow_ptr, &ctx->slow_ptr);
99 ctx->slow_chase = !ctx->slow_chase;
101 if (next_ptr == ctx->slow_ptr) {
102 tdb->ecode = TDB_ERR_CORRUPT;
103 TDB_LOG((tdb, TDB_DEBUG_ERROR,
104 "tdb_chainwalk_check: circular chain\n"));
111 /* Returns 0 on fail. On success, return offset of record, and fills
113 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
114 struct tdb_record *r)
117 struct tdb_chainwalk_ctx chainwalk;
119 /* read in the hash top */
120 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
123 tdb_chainwalk_init(&chainwalk, rec_ptr);
125 /* keep looking until we find the right record */
129 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
132 if (!TDB_DEAD(r) && hash==r->full_hash
133 && key.dsize==r->key_len
134 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
135 r->key_len, tdb_key_compare,
141 ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
146 tdb->ecode = TDB_ERR_NOEXIST;
150 /* As tdb_find, but if you succeed, keep the lock */
151 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
152 struct tdb_record *rec)
156 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
158 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
159 tdb_unlock(tdb, BUCKET(hash), locktype);
163 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
165 struct tdb_update_hash_state {
166 const TDB_DATA *dbufs;
171 static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data)
173 struct tdb_update_hash_state *state = private_data;
174 unsigned char *dptr = data.dptr;
177 if (state->dbufs_len != data.dsize) {
181 for (i=0; i<state->num_dbufs; i++) {
182 TDB_DATA dbuf = state->dbufs[i];
184 ret = memcmp(dptr, dbuf.dptr, dbuf.dsize);
194 /* update an entry in place - this only works if the new data size
195 is <= the old data size and the key exists.
196 on failure return -1.
198 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key,
200 const TDB_DATA *dbufs, int num_dbufs,
203 struct tdb_record rec;
204 tdb_off_t rec_ptr, ofs;
208 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
211 /* it could be an exact duplicate of what is there - this is
212 * surprisingly common (eg. with a ldb re-index). */
213 if (rec.data_len == dbufs_len) {
214 struct tdb_update_hash_state state = {
215 .dbufs = dbufs, .num_dbufs = num_dbufs,
216 .dbufs_len = dbufs_len
220 ret = tdb_parse_record(tdb, key, tdb_update_hash_cmp, &state);
226 /* must be long enough key, data and tailer */
227 if (rec.rec_len < key.dsize + dbufs_len + sizeof(tdb_off_t)) {
228 tdb->ecode = TDB_SUCCESS; /* Not really an error */
232 ofs = rec_ptr + sizeof(rec) + rec.key_len;
234 for (i=0; i<num_dbufs; i++) {
235 TDB_DATA dbuf = dbufs[i];
238 ret = tdb->methods->tdb_write(tdb, ofs, dbuf.dptr, dbuf.dsize);
245 if (dbufs_len != rec.data_len) {
247 rec.data_len = dbufs_len;
248 return tdb_rec_write(tdb, rec_ptr, &rec);
254 /* find an entry in the database given a key */
255 /* If an entry doesn't exist tdb_err will be set to
256 * TDB_ERR_NOEXIST. If a key has no data attached
257 * then the TDB_DATA will have zero length but
260 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
263 struct tdb_record rec;
267 /* find which hash bucket it is in */
268 hash = tdb->hash_fn(&key);
269 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
272 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
274 ret.dsize = rec.data_len;
275 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
279 _PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
281 TDB_DATA ret = _tdb_fetch(tdb, key);
283 tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
288 * Find an entry in the database and hand the record's data to a parsing
289 * function. The parsing function is executed under the chain read lock, so it
290 * should be fast and should not block on other syscalls.
292 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
294 * For mmapped tdb's that do not have a transaction open it points the parsing
295 * function directly at the mmap area, it avoids the malloc/memcpy in this
296 * case. If a transaction is open or no mmap is available, it has to do
297 * malloc/read/parse/free.
299 * This is interesting for all readers of potentially large data structures in
300 * the tdb records, ldb indexes being one example.
302 * Return -1 if the record was not found.
305 _PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
306 int (*parser)(TDB_DATA key, TDB_DATA data,
311 struct tdb_record rec;
315 /* find which hash bucket it is in */
316 hash = tdb->hash_fn(&key);
318 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
319 /* record not found */
320 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
321 tdb->ecode = TDB_ERR_NOEXIST;
324 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
326 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
327 rec.data_len, parser, private_data);
329 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
334 /* check if an entry in the database exists
336 note that 1 is returned if the key is found and 0 is returned if not found
337 this doesn't match the conventions in the rest of this module, but is
340 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
342 struct tdb_record rec;
344 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
346 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
350 _PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
352 uint32_t hash = tdb->hash_fn(&key);
355 ret = tdb_exists_hash(tdb, key, hash);
356 tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
360 /* actually delete an entry in the database given the offset */
361 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec)
363 tdb_off_t last_ptr, i;
364 struct tdb_record lastrec;
366 if (tdb->read_only || tdb->traverse_read) return -1;
368 if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
369 tdb_write_lock_record(tdb, rec_ptr) == -1) {
370 /* Someone traversing here: mark it as dead */
371 rec->magic = TDB_DEAD_MAGIC;
372 return tdb_rec_write(tdb, rec_ptr, rec);
374 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
377 /* find previous record in hash chain */
378 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
380 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
381 if (tdb_rec_read(tdb, i, &lastrec) == -1)
384 /* unlink it: next ptr is at start of record. */
386 last_ptr = TDB_HASH_TOP(rec->full_hash);
387 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
390 /* recover the space */
391 if (tdb_free(tdb, rec_ptr, rec) == -1)
396 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
400 struct tdb_record rec;
402 /* read in the hash top */
403 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
407 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
410 if (rec.magic == TDB_DEAD_MAGIC) {
419 * Purge all DEAD records from a hash chain
421 int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
424 struct tdb_record rec;
427 if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == -1) {
429 * Don't block the freelist if not strictly necessary
434 /* read in the hash top */
435 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
441 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
447 if (rec.magic == TDB_DEAD_MAGIC
448 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
455 tdb_unlock(tdb, -1, F_WRLCK);
459 /* delete an entry in the database given a key */
460 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
463 struct tdb_record rec;
466 if (tdb->read_only || tdb->traverse_read) {
467 tdb->ecode = TDB_ERR_RDONLY;
471 rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec);
476 if (tdb->max_dead_records != 0) {
478 uint32_t magic = TDB_DEAD_MAGIC;
481 * Allow for some dead records per hash chain, mainly for
482 * tdb's with a very high create/delete rate like locking.tdb.
485 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
487 * Don't let the per-chain freelist grow too large,
488 * delete all existing dead records
490 tdb_purge_dead(tdb, hash);
494 * Just mark the record as dead.
497 tdb, rec_ptr + offsetof(struct tdb_record, magic),
501 ret = tdb_do_delete(tdb, rec_ptr, &rec);
505 tdb_increment_seqnum(tdb);
508 if (tdb_unlock(tdb, BUCKET(hash), F_WRLCK) != 0)
509 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
513 _PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
515 uint32_t hash = tdb->hash_fn(&key);
518 ret = tdb_delete_hash(tdb, key, hash);
519 tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
524 * See if we have a dead record around with enough space
526 tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
527 struct tdb_record *r, tdb_len_t length,
528 tdb_off_t *p_last_ptr)
530 tdb_off_t rec_ptr, last_ptr;
531 struct tdb_chainwalk_ctx chainwalk;
532 tdb_off_t best_rec_ptr = 0;
533 tdb_off_t best_last_ptr = 0;
534 struct tdb_record best = { .rec_len = UINT32_MAX };
536 length += sizeof(tdb_off_t); /* tailer */
538 last_ptr = TDB_HASH_TOP(hash);
540 /* read in the hash top */
541 if (tdb_ofs_read(tdb, last_ptr, &rec_ptr) == -1)
544 tdb_chainwalk_init(&chainwalk, rec_ptr);
546 /* keep looking until we find the right record */
550 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
553 if (TDB_DEAD(r) && (r->rec_len >= length) &&
554 (r->rec_len < best.rec_len)) {
555 best_rec_ptr = rec_ptr;
556 best_last_ptr = last_ptr;
562 ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr);
568 if (best.rec_len == UINT32_MAX) {
573 *p_last_ptr = best_last_ptr;
577 static int _tdb_storev(struct tdb_context *tdb, TDB_DATA key,
578 const TDB_DATA *dbufs, int num_dbufs,
579 int flag, uint32_t hash)
581 struct tdb_record rec;
582 tdb_off_t rec_ptr, ofs;
583 tdb_len_t rec_len, dbufs_len;
589 for (i=0; i<num_dbufs; i++) {
590 size_t dsize = dbufs[i].dsize;
592 if ((dsize != 0) && (dbufs[i].dptr == NULL)) {
593 tdb->ecode = TDB_ERR_EINVAL;
598 if (dbufs_len < dsize) {
599 tdb->ecode = TDB_ERR_OOM;
604 rec_len = key.dsize + dbufs_len;
605 if ((rec_len < key.dsize) || (rec_len < dbufs_len)) {
606 tdb->ecode = TDB_ERR_OOM;
610 /* check for it existing, on insert. */
611 if (flag == TDB_INSERT) {
612 if (tdb_exists_hash(tdb, key, hash)) {
613 tdb->ecode = TDB_ERR_EXISTS;
617 /* first try in-place update, on modify or replace. */
618 if (tdb_update_hash(tdb, key, hash, dbufs, num_dbufs,
622 if (tdb->ecode == TDB_ERR_NOEXIST &&
623 flag == TDB_MODIFY) {
624 /* if the record doesn't exist and we are in TDB_MODIFY mode then
625 we should fail the store */
629 /* reset the error code potentially set by the tdb_update_hash() */
630 tdb->ecode = TDB_SUCCESS;
632 /* delete any existing record - if it doesn't exist we don't
633 care. Doing this first reduces fragmentation, and avoids
634 coalescing with `allocated' block before it's updated. */
635 if (flag != TDB_INSERT)
636 tdb_delete_hash(tdb, key, hash);
638 /* we have to allocate some space */
639 rec_ptr = tdb_allocate(tdb, hash, rec_len, &rec);
645 /* Read hash top into next ptr */
646 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
649 rec.key_len = key.dsize;
650 rec.data_len = dbufs_len;
651 rec.full_hash = hash;
652 rec.magic = TDB_MAGIC;
656 /* write out and point the top of the hash chain at it */
657 ret = tdb_rec_write(tdb, ofs, &rec);
663 ret = tdb->methods->tdb_write(tdb, ofs, key.dptr, key.dsize);
669 for (i=0; i<num_dbufs; i++) {
670 if (dbufs[i].dsize == 0) {
674 ret = tdb->methods->tdb_write(tdb, ofs, dbufs[i].dptr,
679 ofs += dbufs[i].dsize;
682 ret = tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr);
684 /* Need to tdb_unallocate() here */
692 tdb_increment_seqnum(tdb);
697 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
698 TDB_DATA dbuf, int flag, uint32_t hash)
700 return _tdb_storev(tdb, key, &dbuf, 1, flag, hash);
703 /* store an element in the database, replacing any existing element
706 return 0 on success, -1 on failure
708 _PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
713 if (tdb->read_only || tdb->traverse_read) {
714 tdb->ecode = TDB_ERR_RDONLY;
715 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1);
719 /* find which hash bucket it is in */
720 hash = tdb->hash_fn(&key);
721 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
724 ret = _tdb_store(tdb, key, dbuf, flag, hash);
725 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
726 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
730 _PUBLIC_ int tdb_storev(struct tdb_context *tdb, TDB_DATA key,
731 const TDB_DATA *dbufs, int num_dbufs, int flag)
736 if (tdb->read_only || tdb->traverse_read) {
737 tdb->ecode = TDB_ERR_RDONLY;
738 tdb_trace_1plusn_rec_flag_ret(tdb, "tdb_storev", key,
739 dbufs, num_dbufs, flag, -1);
743 /* find which hash bucket it is in */
744 hash = tdb->hash_fn(&key);
745 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
748 ret = _tdb_storev(tdb, key, dbufs, num_dbufs, flag, hash);
749 tdb_trace_1plusn_rec_flag_ret(tdb, "tdb_storev", key,
750 dbufs, num_dbufs, flag, -1);
751 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
755 /* Append to an entry. Create if not exist. */
756 _PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
762 /* find which hash bucket it is in */
763 hash = tdb->hash_fn(&key);
764 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
767 dbufs[0] = _tdb_fetch(tdb, key);
770 ret = _tdb_storev(tdb, key, dbufs, 2, 0, hash);
771 tdb_trace_2rec_retrec(tdb, "tdb_append", key, dbufs[0], dbufs[1]);
773 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
774 SAFE_FREE(dbufs[0].dptr);
780 return the name of the current tdb file
781 useful for external logging functions
783 _PUBLIC_ const char *tdb_name(struct tdb_context *tdb)
789 return the underlying file descriptor being used by tdb, or -1
790 useful for external routines that want to check the device/inode
793 _PUBLIC_ int tdb_fd(struct tdb_context *tdb)
799 return the current logging function
800 useful for external tdb routines that wish to log tdb errors
802 _PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb)
804 return tdb->log.log_fn;
809 get the tdb sequence number. Only makes sense if the writers opened
810 with TDB_SEQNUM set. Note that this sequence number will wrap quite
811 quickly, so it should only be used for a 'has something changed'
812 test, not for code that relies on the count of the number of changes
813 made. If you want a counter then use a tdb record.
815 The aim of this sequence number is to allow for a very lightweight
816 test of a possible tdb change.
818 _PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb)
822 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
826 _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb)
828 return tdb->hash_size;
831 _PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb)
833 return tdb->map_size;
836 _PUBLIC_ int tdb_get_flags(struct tdb_context *tdb)
841 _PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
843 if ((flags & TDB_ALLOW_NESTING) &&
844 (flags & TDB_DISALLOW_NESTING)) {
845 tdb->ecode = TDB_ERR_NESTING;
846 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: "
847 "allow_nesting and disallow_nesting are not allowed together!"));
851 if (flags & TDB_ALLOW_NESTING) {
852 tdb->flags &= ~TDB_DISALLOW_NESTING;
854 if (flags & TDB_DISALLOW_NESTING) {
855 tdb->flags &= ~TDB_ALLOW_NESTING;
861 _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
863 if ((flags & TDB_ALLOW_NESTING) &&
864 (flags & TDB_DISALLOW_NESTING)) {
865 tdb->ecode = TDB_ERR_NESTING;
866 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
867 "allow_nesting and disallow_nesting are not allowed together!"));
871 if ((flags & TDB_NOLOCK) &&
872 (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) &&
873 (tdb->mutexes == NULL)) {
874 tdb->ecode = TDB_ERR_LOCK;
875 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
876 "Can not remove NOLOCK flag on mutexed databases"));
880 if (flags & TDB_ALLOW_NESTING) {
881 tdb->flags |= TDB_DISALLOW_NESTING;
883 if (flags & TDB_DISALLOW_NESTING) {
884 tdb->flags |= TDB_ALLOW_NESTING;
887 tdb->flags &= ~flags;
892 enable sequence number handling on an open tdb
894 _PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb)
896 tdb->flags |= TDB_SEQNUM;
901 add a region of the file to the freelist. Length is the size of the region in bytes,
902 which includes the free list header that needs to be added
904 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
906 struct tdb_record rec;
907 if (length <= sizeof(rec)) {
908 /* the region is not worth adding */
911 if (length + offset > tdb->map_size) {
912 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
915 memset(&rec,'\0',sizeof(rec));
916 rec.rec_len = length - sizeof(rec);
917 if (tdb_free(tdb, offset, &rec) == -1) {
918 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
925 wipe the entire database, deleting all records. This can be done
926 very fast by using a allrecord lock. The entire data portion of the
927 file becomes a single entry in the freelist.
929 This code carefully steps around the recovery area, leaving it alone
931 _PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb)
934 tdb_off_t offset = 0;
936 tdb_off_t recovery_head;
937 tdb_len_t recovery_size = 0;
939 if (tdb_lockall(tdb) != 0) {
943 tdb_trace(tdb, "tdb_wipe_all");
945 /* see if the tdb has a recovery area, and remember its size
946 if so. We don't want to lose this as otherwise each
947 tdb_wipe_all() in a transaction will increase the size of
948 the tdb by the size of the recovery area */
949 if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
950 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
954 if (recovery_head != 0) {
955 struct tdb_record rec;
956 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
957 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
960 recovery_size = rec.rec_len + sizeof(rec);
963 /* wipe the hashes */
964 for (i=0;i<tdb->hash_size;i++) {
965 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
966 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
971 /* wipe the freelist */
972 if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
973 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
977 /* add all the rest of the file to the freelist, possibly leaving a gap
978 for the recovery area */
979 if (recovery_size == 0) {
980 /* the simple case - the whole file can be used as a freelist */
981 data_len = (tdb->map_size - TDB_DATA_START(tdb->hash_size));
982 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
986 /* we need to add two freelist entries - one on either
987 side of the recovery area
989 Note that we cannot shift the recovery area during
990 this operation. Only the transaction.c code may
991 move the recovery area or we risk subtle data
994 data_len = (recovery_head - TDB_DATA_START(tdb->hash_size));
995 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
998 /* and the 2nd free list entry after the recovery area - if any */
999 data_len = tdb->map_size - (recovery_head+recovery_size);
1000 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
1005 tdb_increment_seqnum_nonblock(tdb);
1007 if (tdb_unlockall(tdb) != 0) {
1008 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
1019 struct traverse_state {
1021 struct tdb_context *dest_db;
1025 traverse function for repacking
1027 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
1029 struct traverse_state *state = (struct traverse_state *)private_data;
1030 if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
1031 state->error = true;
1040 _PUBLIC_ int tdb_repack(struct tdb_context *tdb)
1042 struct tdb_context *tmp_db;
1043 struct traverse_state state;
1045 tdb_trace(tdb, "tdb_repack");
1047 if (tdb_transaction_start(tdb) != 0) {
1048 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
1052 tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
1053 if (tmp_db == NULL) {
1054 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
1055 tdb_transaction_cancel(tdb);
1059 state.error = false;
1060 state.dest_db = tmp_db;
1062 if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
1063 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
1064 tdb_transaction_cancel(tdb);
1070 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
1071 tdb_transaction_cancel(tdb);
1076 if (tdb_wipe_all(tdb) != 0) {
1077 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
1078 tdb_transaction_cancel(tdb);
1083 state.error = false;
1084 state.dest_db = tdb;
1086 if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
1087 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
1088 tdb_transaction_cancel(tdb);
1094 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
1095 tdb_transaction_cancel(tdb);
1102 if (tdb_transaction_commit(tdb) != 0) {
1103 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
1110 /* Even on files, we can get partial writes due to signals. */
1111 bool tdb_write_all(int fd, const void *buf, size_t count)
1115 ret = write(fd, buf, count);
1118 buf = (const char *)buf + ret;
1124 bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret)
1126 tdb_off_t ret = a + b;
1128 if ((ret < a) || (ret < b)) {
1136 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
1138 if (!tdb_write_all(tdb->tracefd, str, strlen(str))) {
1139 close(tdb->tracefd);
1144 static void tdb_trace_start(struct tdb_context *tdb)
1147 char msg[sizeof(tdb_off_t) * 4 + 1];
1149 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
1150 snprintf(msg, sizeof(msg), "%u ", seqnum);
1151 tdb_trace_write(tdb, msg);
1154 static void tdb_trace_end(struct tdb_context *tdb)
1156 tdb_trace_write(tdb, "\n");
1159 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
1161 char msg[sizeof(ret) * 4 + 4];
1162 snprintf(msg, sizeof(msg), " = %i\n", ret);
1163 tdb_trace_write(tdb, msg);
1166 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
1168 char msg[20 + rec.dsize*2], *p;
1171 /* We differentiate zero-length records from non-existent ones. */
1172 if (rec.dptr == NULL) {
1173 tdb_trace_write(tdb, " NULL");
1177 /* snprintf here is purely cargo-cult programming. */
1179 p += snprintf(p, sizeof(msg), " %zu:", rec.dsize);
1180 for (i = 0; i < rec.dsize; i++)
1181 p += snprintf(p, 2, "%02x", rec.dptr[i]);
1183 tdb_trace_write(tdb, msg);
1186 void tdb_trace(struct tdb_context *tdb, const char *op)
1188 tdb_trace_start(tdb);
1189 tdb_trace_write(tdb, op);
1193 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1195 char msg[sizeof(tdb_off_t) * 4 + 1];
1197 snprintf(msg, sizeof(msg), "%u ", seqnum);
1198 tdb_trace_write(tdb, msg);
1199 tdb_trace_write(tdb, op);
1203 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1204 unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1208 snprintf(msg, sizeof(msg),
1209 "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags);
1210 tdb_trace_start(tdb);
1211 tdb_trace_write(tdb, msg);
1215 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1217 tdb_trace_start(tdb);
1218 tdb_trace_write(tdb, op);
1219 tdb_trace_end_ret(tdb, ret);
1222 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1224 tdb_trace_start(tdb);
1225 tdb_trace_write(tdb, op);
1226 tdb_trace_write(tdb, " =");
1227 tdb_trace_record(tdb, ret);
1231 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1234 tdb_trace_start(tdb);
1235 tdb_trace_write(tdb, op);
1236 tdb_trace_record(tdb, rec);
1240 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1241 TDB_DATA rec, int ret)
1243 tdb_trace_start(tdb);
1244 tdb_trace_write(tdb, op);
1245 tdb_trace_record(tdb, rec);
1246 tdb_trace_end_ret(tdb, ret);
1249 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1250 TDB_DATA rec, TDB_DATA ret)
1252 tdb_trace_start(tdb);
1253 tdb_trace_write(tdb, op);
1254 tdb_trace_record(tdb, rec);
1255 tdb_trace_write(tdb, " =");
1256 tdb_trace_record(tdb, ret);
1260 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1261 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1264 char msg[1 + sizeof(ret) * 4];
1266 snprintf(msg, sizeof(msg), " %#x", flag);
1267 tdb_trace_start(tdb);
1268 tdb_trace_write(tdb, op);
1269 tdb_trace_record(tdb, rec1);
1270 tdb_trace_record(tdb, rec2);
1271 tdb_trace_write(tdb, msg);
1272 tdb_trace_end_ret(tdb, ret);
1275 void tdb_trace_1plusn_rec_flag_ret(struct tdb_context *tdb, const char *op,
1277 const TDB_DATA *recs, int num_recs,
1278 unsigned flag, int ret)
1280 char msg[1 + sizeof(ret) * 4];
1283 snprintf(msg, sizeof(msg), " %#x", flag);
1284 tdb_trace_start(tdb);
1285 tdb_trace_write(tdb, op);
1286 tdb_trace_record(tdb, rec);
1287 for (i=0; i<num_recs; i++) {
1288 tdb_trace_record(tdb, recs[i]);
1290 tdb_trace_write(tdb, msg);
1291 tdb_trace_end_ret(tdb, ret);
1294 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1295 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1297 tdb_trace_start(tdb);
1298 tdb_trace_write(tdb, op);
1299 tdb_trace_record(tdb, rec1);
1300 tdb_trace_record(tdb, rec2);
1301 tdb_trace_write(tdb, " =");
1302 tdb_trace_record(tdb, ret);