2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 _PUBLIC_ TDB_DATA tdb_null;
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 _PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
40 if (!(tdb->flags & TDB_SEQNUM)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
62 if (tdb->transaction != NULL) {
63 tdb_increment_seqnum_nonblock(tdb);
67 if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
68 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
72 tdb_increment_seqnum_nonblock(tdb);
74 tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
77 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
79 return memcmp(data.dptr, key.dptr, data.dsize);
82 /* Returns 0 on fail. On success, return offset of record, and fills
84 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
89 /* read in the hash top */
90 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
93 /* keep looking until we find the right record */
95 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
98 if (!TDB_DEAD(r) && hash==r->full_hash
99 && key.dsize==r->key_len
100 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
101 r->key_len, tdb_key_compare,
105 /* detect tight infinite loop */
106 if (rec_ptr == r->next) {
107 tdb->ecode = TDB_ERR_CORRUPT;
108 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_find: loop detected.\n"));
113 tdb->ecode = TDB_ERR_NOEXIST;
117 /* As tdb_find, but if you succeed, keep the lock */
118 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
119 struct tdb_record *rec)
123 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
125 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
126 tdb_unlock(tdb, BUCKET(hash), locktype);
130 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
132 static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data)
134 TDB_DATA *dbuf = (TDB_DATA *)private_data;
136 if (dbuf->dsize != data.dsize) {
139 if (memcmp(dbuf->dptr, data.dptr, data.dsize) != 0) {
145 /* update an entry in place - this only works if the new data size
146 is <= the old data size and the key exists.
147 on failure return -1.
149 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
151 struct tdb_record rec;
155 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
158 /* it could be an exact duplicate of what is there - this is
159 * surprisingly common (eg. with a ldb re-index). */
160 if (rec.key_len == key.dsize &&
161 rec.data_len == dbuf.dsize &&
162 rec.full_hash == hash &&
163 tdb_parse_record(tdb, key, tdb_update_hash_cmp, &dbuf) == 0) {
167 /* must be long enough key, data and tailer */
168 if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
169 tdb->ecode = TDB_SUCCESS; /* Not really an error */
173 if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
174 dbuf.dptr, dbuf.dsize) == -1)
177 if (dbuf.dsize != rec.data_len) {
179 rec.data_len = dbuf.dsize;
180 return tdb_rec_write(tdb, rec_ptr, &rec);
186 /* find an entry in the database given a key */
187 /* If an entry doesn't exist tdb_err will be set to
188 * TDB_ERR_NOEXIST. If a key has no data attached
189 * then the TDB_DATA will have zero length but
192 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
195 struct tdb_record rec;
199 /* find which hash bucket it is in */
200 hash = tdb->hash_fn(&key);
201 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
204 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
206 ret.dsize = rec.data_len;
207 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
211 _PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
213 TDB_DATA ret = _tdb_fetch(tdb, key);
215 tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
220 * Find an entry in the database and hand the record's data to a parsing
221 * function. The parsing function is executed under the chain read lock, so it
222 * should be fast and should not block on other syscalls.
224 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
226 * For mmapped tdb's that do not have a transaction open it points the parsing
227 * function directly at the mmap area, it avoids the malloc/memcpy in this
228 * case. If a transaction is open or no mmap is available, it has to do
229 * malloc/read/parse/free.
231 * This is interesting for all readers of potentially large data structures in
232 * the tdb records, ldb indexes being one example.
234 * Return -1 if the record was not found.
237 _PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
238 int (*parser)(TDB_DATA key, TDB_DATA data,
243 struct tdb_record rec;
247 /* find which hash bucket it is in */
248 hash = tdb->hash_fn(&key);
250 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
251 /* record not found */
252 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
253 tdb->ecode = TDB_ERR_NOEXIST;
256 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
258 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
259 rec.data_len, parser, private_data);
261 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
266 /* check if an entry in the database exists
268 note that 1 is returned if the key is found and 0 is returned if not found
269 this doesn't match the conventions in the rest of this module, but is
272 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
274 struct tdb_record rec;
276 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
278 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
282 _PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
284 uint32_t hash = tdb->hash_fn(&key);
287 ret = tdb_exists_hash(tdb, key, hash);
288 tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
292 /* actually delete an entry in the database given the offset */
293 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec)
295 tdb_off_t last_ptr, i;
296 struct tdb_record lastrec;
298 if (tdb->read_only || tdb->traverse_read) return -1;
300 if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
301 tdb_write_lock_record(tdb, rec_ptr) == -1) {
302 /* Someone traversing here: mark it as dead */
303 rec->magic = TDB_DEAD_MAGIC;
304 return tdb_rec_write(tdb, rec_ptr, rec);
306 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
309 /* find previous record in hash chain */
310 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
312 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
313 if (tdb_rec_read(tdb, i, &lastrec) == -1)
316 /* unlink it: next ptr is at start of record. */
318 last_ptr = TDB_HASH_TOP(rec->full_hash);
319 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
322 /* recover the space */
323 if (tdb_free(tdb, rec_ptr, rec) == -1)
328 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
332 struct tdb_record rec;
334 /* read in the hash top */
335 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
339 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
342 if (rec.magic == TDB_DEAD_MAGIC) {
351 * Purge all DEAD records from a hash chain
353 int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
356 struct tdb_record rec;
359 if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == -1) {
361 * Don't block the freelist if not strictly necessary
366 /* read in the hash top */
367 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
373 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
379 if (rec.magic == TDB_DEAD_MAGIC
380 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
387 tdb_unlock(tdb, -1, F_WRLCK);
391 /* delete an entry in the database given a key */
392 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
395 struct tdb_record rec;
398 rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec);
403 if (tdb->max_dead_records != 0) {
405 uint32_t magic = TDB_DEAD_MAGIC;
408 * Allow for some dead records per hash chain, mainly for
409 * tdb's with a very high create/delete rate like locking.tdb.
412 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
414 * Don't let the per-chain freelist grow too large,
415 * delete all existing dead records
417 tdb_purge_dead(tdb, hash);
421 * Just mark the record as dead.
424 tdb, rec_ptr + offsetof(struct tdb_record, magic),
428 ret = tdb_do_delete(tdb, rec_ptr, &rec);
432 tdb_increment_seqnum(tdb);
435 if (tdb_unlock(tdb, BUCKET(hash), F_WRLCK) != 0)
436 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
440 _PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
442 uint32_t hash = tdb->hash_fn(&key);
445 ret = tdb_delete_hash(tdb, key, hash);
446 tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
451 * See if we have a dead record around with enough space
453 tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
454 struct tdb_record *r, tdb_len_t length,
455 tdb_off_t *p_last_ptr)
457 tdb_off_t rec_ptr, last_ptr;
458 tdb_off_t best_rec_ptr = 0;
459 tdb_off_t best_last_ptr = 0;
460 struct tdb_record best = { .rec_len = UINT32_MAX };
462 length += sizeof(tdb_off_t); /* tailer */
464 last_ptr = TDB_HASH_TOP(hash);
466 /* read in the hash top */
467 if (tdb_ofs_read(tdb, last_ptr, &rec_ptr) == -1)
470 /* keep looking until we find the right record */
472 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
475 if (TDB_DEAD(r) && (r->rec_len >= length) &&
476 (r->rec_len < best.rec_len)) {
477 best_rec_ptr = rec_ptr;
478 best_last_ptr = last_ptr;
485 if (best.rec_len == UINT32_MAX) {
490 *p_last_ptr = best_last_ptr;
494 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
495 TDB_DATA dbuf, int flag, uint32_t hash)
497 struct tdb_record rec;
502 rec_len = key.dsize + dbuf.dsize;
503 if ((rec_len < key.dsize) || (rec_len < dbuf.dsize)) {
504 tdb->ecode = TDB_ERR_OOM;
508 /* check for it existing, on insert. */
509 if (flag == TDB_INSERT) {
510 if (tdb_exists_hash(tdb, key, hash)) {
511 tdb->ecode = TDB_ERR_EXISTS;
515 /* first try in-place update, on modify or replace. */
516 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
519 if (tdb->ecode == TDB_ERR_NOEXIST &&
520 flag == TDB_MODIFY) {
521 /* if the record doesn't exist and we are in TDB_MODIFY mode then
522 we should fail the store */
526 /* reset the error code potentially set by the tdb_update_hash() */
527 tdb->ecode = TDB_SUCCESS;
529 /* delete any existing record - if it doesn't exist we don't
530 care. Doing this first reduces fragmentation, and avoids
531 coalescing with `allocated' block before it's updated. */
532 if (flag != TDB_INSERT)
533 tdb_delete_hash(tdb, key, hash);
535 /* we have to allocate some space */
536 rec_ptr = tdb_allocate(tdb, hash, rec_len, &rec);
542 /* Read hash top into next ptr */
543 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
546 rec.key_len = key.dsize;
547 rec.data_len = dbuf.dsize;
548 rec.full_hash = hash;
549 rec.magic = TDB_MAGIC;
551 /* write out and point the top of the hash chain at it */
552 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
553 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec),
554 key.dptr, key.dsize) == -1
555 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec)+key.dsize,
556 dbuf.dptr, dbuf.dsize) == -1
557 || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
558 /* Need to tdb_unallocate() here */
566 tdb_increment_seqnum(tdb);
571 /* store an element in the database, replacing any existing element
574 return 0 on success, -1 on failure
576 _PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
581 if (tdb->read_only || tdb->traverse_read) {
582 tdb->ecode = TDB_ERR_RDONLY;
583 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1);
587 /* find which hash bucket it is in */
588 hash = tdb->hash_fn(&key);
589 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
592 ret = _tdb_store(tdb, key, dbuf, flag, hash);
593 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
594 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
598 /* Append to an entry. Create if not exist. */
599 _PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
605 /* find which hash bucket it is in */
606 hash = tdb->hash_fn(&key);
607 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
610 dbuf = _tdb_fetch(tdb, key);
612 if (dbuf.dptr == NULL) {
613 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
615 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
616 unsigned char *new_dptr;
618 /* realloc '0' is special: don't do that. */
621 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
622 if (new_dptr == NULL) {
625 dbuf.dptr = new_dptr;
628 if (dbuf.dptr == NULL) {
629 tdb->ecode = TDB_ERR_OOM;
633 memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
634 dbuf.dsize += new_dbuf.dsize;
636 ret = _tdb_store(tdb, key, dbuf, 0, hash);
637 tdb_trace_2rec_retrec(tdb, "tdb_append", key, new_dbuf, dbuf);
640 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
641 SAFE_FREE(dbuf.dptr);
647 return the name of the current tdb file
648 useful for external logging functions
650 _PUBLIC_ const char *tdb_name(struct tdb_context *tdb)
656 return the underlying file descriptor being used by tdb, or -1
657 useful for external routines that want to check the device/inode
660 _PUBLIC_ int tdb_fd(struct tdb_context *tdb)
666 return the current logging function
667 useful for external tdb routines that wish to log tdb errors
669 _PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb)
671 return tdb->log.log_fn;
676 get the tdb sequence number. Only makes sense if the writers opened
677 with TDB_SEQNUM set. Note that this sequence number will wrap quite
678 quickly, so it should only be used for a 'has something changed'
679 test, not for code that relies on the count of the number of changes
680 made. If you want a counter then use a tdb record.
682 The aim of this sequence number is to allow for a very lightweight
683 test of a possible tdb change.
685 _PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb)
689 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
693 _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb)
695 return tdb->hash_size;
698 _PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb)
700 return tdb->map_size;
703 _PUBLIC_ int tdb_get_flags(struct tdb_context *tdb)
708 _PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
710 if ((flags & TDB_ALLOW_NESTING) &&
711 (flags & TDB_DISALLOW_NESTING)) {
712 tdb->ecode = TDB_ERR_NESTING;
713 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: "
714 "allow_nesting and disallow_nesting are not allowed together!"));
718 if (flags & TDB_ALLOW_NESTING) {
719 tdb->flags &= ~TDB_DISALLOW_NESTING;
721 if (flags & TDB_DISALLOW_NESTING) {
722 tdb->flags &= ~TDB_ALLOW_NESTING;
728 _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
730 if ((flags & TDB_ALLOW_NESTING) &&
731 (flags & TDB_DISALLOW_NESTING)) {
732 tdb->ecode = TDB_ERR_NESTING;
733 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
734 "allow_nesting and disallow_nesting are not allowed together!"));
738 if ((flags & TDB_NOLOCK) &&
739 (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) &&
740 (tdb->mutexes == NULL)) {
741 tdb->ecode = TDB_ERR_LOCK;
742 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
743 "Can not remove NOLOCK flag on mutexed databases"));
747 if (flags & TDB_ALLOW_NESTING) {
748 tdb->flags |= TDB_DISALLOW_NESTING;
750 if (flags & TDB_DISALLOW_NESTING) {
751 tdb->flags |= TDB_ALLOW_NESTING;
754 tdb->flags &= ~flags;
759 enable sequence number handling on an open tdb
761 _PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb)
763 tdb->flags |= TDB_SEQNUM;
768 add a region of the file to the freelist. Length is the size of the region in bytes,
769 which includes the free list header that needs to be added
771 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
773 struct tdb_record rec;
774 if (length <= sizeof(rec)) {
775 /* the region is not worth adding */
778 if (length + offset > tdb->map_size) {
779 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
782 memset(&rec,'\0',sizeof(rec));
783 rec.rec_len = length - sizeof(rec);
784 if (tdb_free(tdb, offset, &rec) == -1) {
785 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
792 wipe the entire database, deleting all records. This can be done
793 very fast by using a allrecord lock. The entire data portion of the
794 file becomes a single entry in the freelist.
796 This code carefully steps around the recovery area, leaving it alone
798 _PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb)
801 tdb_off_t offset = 0;
803 tdb_off_t recovery_head;
804 tdb_len_t recovery_size = 0;
806 if (tdb_lockall(tdb) != 0) {
810 tdb_trace(tdb, "tdb_wipe_all");
812 /* see if the tdb has a recovery area, and remember its size
813 if so. We don't want to lose this as otherwise each
814 tdb_wipe_all() in a transaction will increase the size of
815 the tdb by the size of the recovery area */
816 if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
817 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
821 if (recovery_head != 0) {
822 struct tdb_record rec;
823 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
824 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
827 recovery_size = rec.rec_len + sizeof(rec);
830 /* wipe the hashes */
831 for (i=0;i<tdb->hash_size;i++) {
832 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
833 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
838 /* wipe the freelist */
839 if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
840 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
844 /* add all the rest of the file to the freelist, possibly leaving a gap
845 for the recovery area */
846 if (recovery_size == 0) {
847 /* the simple case - the whole file can be used as a freelist */
848 data_len = (tdb->map_size - TDB_DATA_START(tdb->hash_size));
849 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
853 /* we need to add two freelist entries - one on either
854 side of the recovery area
856 Note that we cannot shift the recovery area during
857 this operation. Only the transaction.c code may
858 move the recovery area or we risk subtle data
861 data_len = (recovery_head - TDB_DATA_START(tdb->hash_size));
862 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
865 /* and the 2nd free list entry after the recovery area - if any */
866 data_len = tdb->map_size - (recovery_head+recovery_size);
867 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
872 tdb_increment_seqnum_nonblock(tdb);
874 if (tdb_unlockall(tdb) != 0) {
875 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
886 struct traverse_state {
888 struct tdb_context *dest_db;
892 traverse function for repacking
894 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
896 struct traverse_state *state = (struct traverse_state *)private_data;
897 if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
907 _PUBLIC_ int tdb_repack(struct tdb_context *tdb)
909 struct tdb_context *tmp_db;
910 struct traverse_state state;
912 tdb_trace(tdb, "tdb_repack");
914 if (tdb_transaction_start(tdb) != 0) {
915 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
919 tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
920 if (tmp_db == NULL) {
921 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
922 tdb_transaction_cancel(tdb);
927 state.dest_db = tmp_db;
929 if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
930 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
931 tdb_transaction_cancel(tdb);
937 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
938 tdb_transaction_cancel(tdb);
943 if (tdb_wipe_all(tdb) != 0) {
944 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
945 tdb_transaction_cancel(tdb);
953 if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
954 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
955 tdb_transaction_cancel(tdb);
961 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
962 tdb_transaction_cancel(tdb);
969 if (tdb_transaction_commit(tdb) != 0) {
970 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
977 /* Even on files, we can get partial writes due to signals. */
978 bool tdb_write_all(int fd, const void *buf, size_t count)
982 ret = write(fd, buf, count);
985 buf = (const char *)buf + ret;
991 bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret)
993 tdb_off_t ret = a + b;
995 if ((ret < a) || (ret < b)) {
1003 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
1005 if (!tdb_write_all(tdb->tracefd, str, strlen(str))) {
1006 close(tdb->tracefd);
1011 static void tdb_trace_start(struct tdb_context *tdb)
1014 char msg[sizeof(tdb_off_t) * 4 + 1];
1016 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
1017 snprintf(msg, sizeof(msg), "%u ", seqnum);
1018 tdb_trace_write(tdb, msg);
1021 static void tdb_trace_end(struct tdb_context *tdb)
1023 tdb_trace_write(tdb, "\n");
1026 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
1028 char msg[sizeof(ret) * 4 + 4];
1029 snprintf(msg, sizeof(msg), " = %i\n", ret);
1030 tdb_trace_write(tdb, msg);
1033 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
1035 char msg[20 + rec.dsize*2], *p;
1038 /* We differentiate zero-length records from non-existent ones. */
1039 if (rec.dptr == NULL) {
1040 tdb_trace_write(tdb, " NULL");
1044 /* snprintf here is purely cargo-cult programming. */
1046 p += snprintf(p, sizeof(msg), " %zu:", rec.dsize);
1047 for (i = 0; i < rec.dsize; i++)
1048 p += snprintf(p, 2, "%02x", rec.dptr[i]);
1050 tdb_trace_write(tdb, msg);
1053 void tdb_trace(struct tdb_context *tdb, const char *op)
1055 tdb_trace_start(tdb);
1056 tdb_trace_write(tdb, op);
1060 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1062 char msg[sizeof(tdb_off_t) * 4 + 1];
1064 snprintf(msg, sizeof(msg), "%u ", seqnum);
1065 tdb_trace_write(tdb, msg);
1066 tdb_trace_write(tdb, op);
1070 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1071 unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1075 snprintf(msg, sizeof(msg),
1076 "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags);
1077 tdb_trace_start(tdb);
1078 tdb_trace_write(tdb, msg);
1082 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1084 tdb_trace_start(tdb);
1085 tdb_trace_write(tdb, op);
1086 tdb_trace_end_ret(tdb, ret);
1089 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1091 tdb_trace_start(tdb);
1092 tdb_trace_write(tdb, op);
1093 tdb_trace_write(tdb, " =");
1094 tdb_trace_record(tdb, ret);
1098 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1101 tdb_trace_start(tdb);
1102 tdb_trace_write(tdb, op);
1103 tdb_trace_record(tdb, rec);
1107 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1108 TDB_DATA rec, int ret)
1110 tdb_trace_start(tdb);
1111 tdb_trace_write(tdb, op);
1112 tdb_trace_record(tdb, rec);
1113 tdb_trace_end_ret(tdb, ret);
1116 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1117 TDB_DATA rec, TDB_DATA ret)
1119 tdb_trace_start(tdb);
1120 tdb_trace_write(tdb, op);
1121 tdb_trace_record(tdb, rec);
1122 tdb_trace_write(tdb, " =");
1123 tdb_trace_record(tdb, ret);
1127 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1128 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1131 char msg[1 + sizeof(ret) * 4];
1133 snprintf(msg, sizeof(msg), " %#x", flag);
1134 tdb_trace_start(tdb);
1135 tdb_trace_write(tdb, op);
1136 tdb_trace_record(tdb, rec1);
1137 tdb_trace_record(tdb, rec2);
1138 tdb_trace_write(tdb, msg);
1139 tdb_trace_end_ret(tdb, ret);
1142 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1143 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1145 tdb_trace_start(tdb);
1146 tdb_trace_write(tdb, op);
1147 tdb_trace_record(tdb, rec1);
1148 tdb_trace_record(tdb, rec2);
1149 tdb_trace_write(tdb, " =");
1150 tdb_trace_record(tdb, ret);