2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 _PUBLIC_ TDB_DATA tdb_null;
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 _PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
40 if (!(tdb->flags & TDB_SEQNUM)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
62 if (tdb->transaction != NULL) {
63 tdb_increment_seqnum_nonblock(tdb);
67 if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
68 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
72 tdb_increment_seqnum_nonblock(tdb);
74 tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
77 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
79 return memcmp(data.dptr, key.dptr, data.dsize);
82 /* Returns 0 on fail. On success, return offset of record, and fills
84 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
89 /* read in the hash top */
90 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
93 /* keep looking until we find the right record */
95 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
98 if (!TDB_DEAD(r) && hash==r->full_hash
99 && key.dsize==r->key_len
100 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
101 r->key_len, tdb_key_compare,
105 /* detect tight infinite loop */
106 if (rec_ptr == r->next) {
107 tdb->ecode = TDB_ERR_CORRUPT;
108 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_find: loop detected.\n"));
113 tdb->ecode = TDB_ERR_NOEXIST;
117 /* As tdb_find, but if you succeed, keep the lock */
118 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
119 struct tdb_record *rec)
123 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
125 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
126 tdb_unlock(tdb, BUCKET(hash), locktype);
130 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
132 static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data)
134 TDB_DATA *dbuf = (TDB_DATA *)private_data;
136 if (dbuf->dsize != data.dsize) {
139 if (memcmp(dbuf->dptr, data.dptr, data.dsize) != 0) {
145 /* update an entry in place - this only works if the new data size
146 is <= the old data size and the key exists.
147 on failure return -1.
149 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
151 struct tdb_record rec;
155 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
158 /* it could be an exact duplicate of what is there - this is
159 * surprisingly common (eg. with a ldb re-index). */
160 if (rec.key_len == key.dsize &&
161 rec.data_len == dbuf.dsize &&
162 rec.full_hash == hash &&
163 tdb_parse_record(tdb, key, tdb_update_hash_cmp, &dbuf) == 0) {
167 /* must be long enough key, data and tailer */
168 if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
169 tdb->ecode = TDB_SUCCESS; /* Not really an error */
173 if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
174 dbuf.dptr, dbuf.dsize) == -1)
177 if (dbuf.dsize != rec.data_len) {
179 rec.data_len = dbuf.dsize;
180 return tdb_rec_write(tdb, rec_ptr, &rec);
186 /* find an entry in the database given a key */
187 /* If an entry doesn't exist tdb_err will be set to
188 * TDB_ERR_NOEXIST. If a key has no data attached
189 * then the TDB_DATA will have zero length but
192 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
195 struct tdb_record rec;
199 /* find which hash bucket it is in */
200 hash = tdb->hash_fn(&key);
201 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
204 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
206 ret.dsize = rec.data_len;
207 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
211 _PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
213 TDB_DATA ret = _tdb_fetch(tdb, key);
215 tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
220 * Find an entry in the database and hand the record's data to a parsing
221 * function. The parsing function is executed under the chain read lock, so it
222 * should be fast and should not block on other syscalls.
224 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
226 * For mmapped tdb's that do not have a transaction open it points the parsing
227 * function directly at the mmap area, it avoids the malloc/memcpy in this
228 * case. If a transaction is open or no mmap is available, it has to do
229 * malloc/read/parse/free.
231 * This is interesting for all readers of potentially large data structures in
232 * the tdb records, ldb indexes being one example.
234 * Return -1 if the record was not found.
237 _PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
238 int (*parser)(TDB_DATA key, TDB_DATA data,
243 struct tdb_record rec;
247 /* find which hash bucket it is in */
248 hash = tdb->hash_fn(&key);
250 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
251 /* record not found */
252 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
253 tdb->ecode = TDB_ERR_NOEXIST;
256 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
258 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
259 rec.data_len, parser, private_data);
261 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
266 /* check if an entry in the database exists
268 note that 1 is returned if the key is found and 0 is returned if not found
269 this doesn't match the conventions in the rest of this module, but is
272 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
274 struct tdb_record rec;
276 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
278 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
282 _PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
284 uint32_t hash = tdb->hash_fn(&key);
287 ret = tdb_exists_hash(tdb, key, hash);
288 tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
292 /* actually delete an entry in the database given the offset */
293 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec)
295 tdb_off_t last_ptr, i;
296 struct tdb_record lastrec;
298 if (tdb->read_only || tdb->traverse_read) return -1;
300 if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
301 tdb_write_lock_record(tdb, rec_ptr) == -1) {
302 /* Someone traversing here: mark it as dead */
303 rec->magic = TDB_DEAD_MAGIC;
304 return tdb_rec_write(tdb, rec_ptr, rec);
306 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
309 /* find previous record in hash chain */
310 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
312 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
313 if (tdb_rec_read(tdb, i, &lastrec) == -1)
316 /* unlink it: next ptr is at start of record. */
318 last_ptr = TDB_HASH_TOP(rec->full_hash);
319 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
322 /* recover the space */
323 if (tdb_free(tdb, rec_ptr, rec) == -1)
328 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
332 struct tdb_record rec;
334 /* read in the hash top */
335 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
339 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
342 if (rec.magic == TDB_DEAD_MAGIC) {
351 * Purge all DEAD records from a hash chain
353 int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
356 struct tdb_record rec;
359 if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == -1) {
361 * Don't block the freelist if not strictly necessary
366 /* read in the hash top */
367 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
373 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
379 if (rec.magic == TDB_DEAD_MAGIC
380 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
387 tdb_unlock(tdb, -1, F_WRLCK);
391 /* delete an entry in the database given a key */
392 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
395 struct tdb_record rec;
398 rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec);
403 if (tdb->max_dead_records != 0) {
405 uint32_t magic = TDB_DEAD_MAGIC;
408 * Allow for some dead records per hash chain, mainly for
409 * tdb's with a very high create/delete rate like locking.tdb.
412 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
414 * Don't let the per-chain freelist grow too large,
415 * delete all existing dead records
417 tdb_purge_dead(tdb, hash);
421 * Just mark the record as dead.
424 tdb, rec_ptr + offsetof(struct tdb_record, magic),
428 ret = tdb_do_delete(tdb, rec_ptr, &rec);
432 tdb_increment_seqnum(tdb);
435 if (tdb_unlock(tdb, BUCKET(hash), F_WRLCK) != 0)
436 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
440 _PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
442 uint32_t hash = tdb->hash_fn(&key);
445 ret = tdb_delete_hash(tdb, key, hash);
446 tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
451 * See if we have a dead record around with enough space
453 tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
454 struct tdb_record *r, tdb_len_t length,
455 tdb_off_t *p_last_ptr)
457 tdb_off_t rec_ptr, last_ptr;
458 tdb_off_t best_rec_ptr = 0;
459 tdb_off_t best_last_ptr = 0;
460 struct tdb_record best = { .rec_len = UINT32_MAX };
462 length += sizeof(tdb_off_t); /* tailer */
464 last_ptr = TDB_HASH_TOP(hash);
466 /* read in the hash top */
467 if (tdb_ofs_read(tdb, last_ptr, &rec_ptr) == -1)
470 /* keep looking until we find the right record */
472 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
475 if (TDB_DEAD(r) && (r->rec_len >= length) &&
476 (r->rec_len < best.rec_len)) {
477 best_rec_ptr = rec_ptr;
478 best_last_ptr = last_ptr;
485 if (best.rec_len == UINT32_MAX) {
490 *p_last_ptr = best_last_ptr;
494 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
495 TDB_DATA dbuf, int flag, uint32_t hash)
497 struct tdb_record rec;
501 /* check for it existing, on insert. */
502 if (flag == TDB_INSERT) {
503 if (tdb_exists_hash(tdb, key, hash)) {
504 tdb->ecode = TDB_ERR_EXISTS;
508 /* first try in-place update, on modify or replace. */
509 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
512 if (tdb->ecode == TDB_ERR_NOEXIST &&
513 flag == TDB_MODIFY) {
514 /* if the record doesn't exist and we are in TDB_MODIFY mode then
515 we should fail the store */
519 /* reset the error code potentially set by the tdb_update_hash() */
520 tdb->ecode = TDB_SUCCESS;
522 /* delete any existing record - if it doesn't exist we don't
523 care. Doing this first reduces fragmentation, and avoids
524 coalescing with `allocated' block before it's updated. */
525 if (flag != TDB_INSERT)
526 tdb_delete_hash(tdb, key, hash);
528 /* we have to allocate some space */
529 rec_ptr = tdb_allocate(tdb, hash, key.dsize + dbuf.dsize, &rec);
535 /* Read hash top into next ptr */
536 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
539 rec.key_len = key.dsize;
540 rec.data_len = dbuf.dsize;
541 rec.full_hash = hash;
542 rec.magic = TDB_MAGIC;
544 /* write out and point the top of the hash chain at it */
545 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
546 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec),
547 key.dptr, key.dsize) == -1
548 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec)+key.dsize,
549 dbuf.dptr, dbuf.dsize) == -1
550 || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
551 /* Need to tdb_unallocate() here */
559 tdb_increment_seqnum(tdb);
564 /* store an element in the database, replacing any existing element
567 return 0 on success, -1 on failure
569 _PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
574 if (tdb->read_only || tdb->traverse_read) {
575 tdb->ecode = TDB_ERR_RDONLY;
576 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1);
580 /* find which hash bucket it is in */
581 hash = tdb->hash_fn(&key);
582 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
585 ret = _tdb_store(tdb, key, dbuf, flag, hash);
586 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
587 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
591 /* Append to an entry. Create if not exist. */
592 _PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
598 /* find which hash bucket it is in */
599 hash = tdb->hash_fn(&key);
600 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
603 dbuf = _tdb_fetch(tdb, key);
605 if (dbuf.dptr == NULL) {
606 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
608 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
609 unsigned char *new_dptr;
611 /* realloc '0' is special: don't do that. */
614 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
615 if (new_dptr == NULL) {
618 dbuf.dptr = new_dptr;
621 if (dbuf.dptr == NULL) {
622 tdb->ecode = TDB_ERR_OOM;
626 memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
627 dbuf.dsize += new_dbuf.dsize;
629 ret = _tdb_store(tdb, key, dbuf, 0, hash);
630 tdb_trace_2rec_retrec(tdb, "tdb_append", key, new_dbuf, dbuf);
633 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
634 SAFE_FREE(dbuf.dptr);
640 return the name of the current tdb file
641 useful for external logging functions
643 _PUBLIC_ const char *tdb_name(struct tdb_context *tdb)
649 return the underlying file descriptor being used by tdb, or -1
650 useful for external routines that want to check the device/inode
653 _PUBLIC_ int tdb_fd(struct tdb_context *tdb)
659 return the current logging function
660 useful for external tdb routines that wish to log tdb errors
662 _PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb)
664 return tdb->log.log_fn;
669 get the tdb sequence number. Only makes sense if the writers opened
670 with TDB_SEQNUM set. Note that this sequence number will wrap quite
671 quickly, so it should only be used for a 'has something changed'
672 test, not for code that relies on the count of the number of changes
673 made. If you want a counter then use a tdb record.
675 The aim of this sequence number is to allow for a very lightweight
676 test of a possible tdb change.
678 _PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb)
682 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
686 _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb)
688 return tdb->hash_size;
691 _PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb)
693 return tdb->map_size;
696 _PUBLIC_ int tdb_get_flags(struct tdb_context *tdb)
701 _PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
703 if ((flags & TDB_ALLOW_NESTING) &&
704 (flags & TDB_DISALLOW_NESTING)) {
705 tdb->ecode = TDB_ERR_NESTING;
706 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: "
707 "allow_nesting and disallow_nesting are not allowed together!"));
711 if (flags & TDB_ALLOW_NESTING) {
712 tdb->flags &= ~TDB_DISALLOW_NESTING;
714 if (flags & TDB_DISALLOW_NESTING) {
715 tdb->flags &= ~TDB_ALLOW_NESTING;
721 _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
723 if ((flags & TDB_ALLOW_NESTING) &&
724 (flags & TDB_DISALLOW_NESTING)) {
725 tdb->ecode = TDB_ERR_NESTING;
726 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
727 "allow_nesting and disallow_nesting are not allowed together!"));
731 if ((flags & TDB_NOLOCK) &&
732 (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) &&
733 (tdb->mutexes == NULL)) {
734 tdb->ecode = TDB_ERR_LOCK;
735 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
736 "Can not remove NOLOCK flag on mutexed databases"));
740 if (flags & TDB_ALLOW_NESTING) {
741 tdb->flags |= TDB_DISALLOW_NESTING;
743 if (flags & TDB_DISALLOW_NESTING) {
744 tdb->flags |= TDB_ALLOW_NESTING;
747 tdb->flags &= ~flags;
752 enable sequence number handling on an open tdb
754 _PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb)
756 tdb->flags |= TDB_SEQNUM;
761 add a region of the file to the freelist. Length is the size of the region in bytes,
762 which includes the free list header that needs to be added
764 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
766 struct tdb_record rec;
767 if (length <= sizeof(rec)) {
768 /* the region is not worth adding */
771 if (length + offset > tdb->map_size) {
772 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
775 memset(&rec,'\0',sizeof(rec));
776 rec.rec_len = length - sizeof(rec);
777 if (tdb_free(tdb, offset, &rec) == -1) {
778 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
785 wipe the entire database, deleting all records. This can be done
786 very fast by using a allrecord lock. The entire data portion of the
787 file becomes a single entry in the freelist.
789 This code carefully steps around the recovery area, leaving it alone
791 _PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb)
794 tdb_off_t offset = 0;
796 tdb_off_t recovery_head;
797 tdb_len_t recovery_size = 0;
799 if (tdb_lockall(tdb) != 0) {
803 tdb_trace(tdb, "tdb_wipe_all");
805 /* see if the tdb has a recovery area, and remember its size
806 if so. We don't want to lose this as otherwise each
807 tdb_wipe_all() in a transaction will increase the size of
808 the tdb by the size of the recovery area */
809 if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
810 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
814 if (recovery_head != 0) {
815 struct tdb_record rec;
816 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
817 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
820 recovery_size = rec.rec_len + sizeof(rec);
823 /* wipe the hashes */
824 for (i=0;i<tdb->hash_size;i++) {
825 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
826 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
831 /* wipe the freelist */
832 if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
833 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
837 /* add all the rest of the file to the freelist, possibly leaving a gap
838 for the recovery area */
839 if (recovery_size == 0) {
840 /* the simple case - the whole file can be used as a freelist */
841 data_len = (tdb->map_size - TDB_DATA_START(tdb->hash_size));
842 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
846 /* we need to add two freelist entries - one on either
847 side of the recovery area
849 Note that we cannot shift the recovery area during
850 this operation. Only the transaction.c code may
851 move the recovery area or we risk subtle data
854 data_len = (recovery_head - TDB_DATA_START(tdb->hash_size));
855 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
858 /* and the 2nd free list entry after the recovery area - if any */
859 data_len = tdb->map_size - (recovery_head+recovery_size);
860 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
865 tdb_increment_seqnum_nonblock(tdb);
867 if (tdb_unlockall(tdb) != 0) {
868 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
879 struct traverse_state {
881 struct tdb_context *dest_db;
885 traverse function for repacking
887 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
889 struct traverse_state *state = (struct traverse_state *)private_data;
890 if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
900 _PUBLIC_ int tdb_repack(struct tdb_context *tdb)
902 struct tdb_context *tmp_db;
903 struct traverse_state state;
905 tdb_trace(tdb, "tdb_repack");
907 if (tdb_transaction_start(tdb) != 0) {
908 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
912 tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
913 if (tmp_db == NULL) {
914 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
915 tdb_transaction_cancel(tdb);
920 state.dest_db = tmp_db;
922 if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
923 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
924 tdb_transaction_cancel(tdb);
930 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
931 tdb_transaction_cancel(tdb);
936 if (tdb_wipe_all(tdb) != 0) {
937 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
938 tdb_transaction_cancel(tdb);
946 if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
947 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
948 tdb_transaction_cancel(tdb);
954 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
955 tdb_transaction_cancel(tdb);
962 if (tdb_transaction_commit(tdb) != 0) {
963 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
970 /* Even on files, we can get partial writes due to signals. */
971 bool tdb_write_all(int fd, const void *buf, size_t count)
975 ret = write(fd, buf, count);
978 buf = (const char *)buf + ret;
984 bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret)
986 tdb_off_t ret = a + b;
988 if ((ret < a) || (ret < b)) {
996 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
998 if (!tdb_write_all(tdb->tracefd, str, strlen(str))) {
1004 static void tdb_trace_start(struct tdb_context *tdb)
1007 char msg[sizeof(tdb_off_t) * 4 + 1];
1009 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
1010 snprintf(msg, sizeof(msg), "%u ", seqnum);
1011 tdb_trace_write(tdb, msg);
1014 static void tdb_trace_end(struct tdb_context *tdb)
1016 tdb_trace_write(tdb, "\n");
1019 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
1021 char msg[sizeof(ret) * 4 + 4];
1022 snprintf(msg, sizeof(msg), " = %i\n", ret);
1023 tdb_trace_write(tdb, msg);
1026 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
1028 char msg[20 + rec.dsize*2], *p;
1031 /* We differentiate zero-length records from non-existent ones. */
1032 if (rec.dptr == NULL) {
1033 tdb_trace_write(tdb, " NULL");
1037 /* snprintf here is purely cargo-cult programming. */
1039 p += snprintf(p, sizeof(msg), " %zu:", rec.dsize);
1040 for (i = 0; i < rec.dsize; i++)
1041 p += snprintf(p, 2, "%02x", rec.dptr[i]);
1043 tdb_trace_write(tdb, msg);
1046 void tdb_trace(struct tdb_context *tdb, const char *op)
1048 tdb_trace_start(tdb);
1049 tdb_trace_write(tdb, op);
1053 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1055 char msg[sizeof(tdb_off_t) * 4 + 1];
1057 snprintf(msg, sizeof(msg), "%u ", seqnum);
1058 tdb_trace_write(tdb, msg);
1059 tdb_trace_write(tdb, op);
1063 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1064 unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1068 snprintf(msg, sizeof(msg),
1069 "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags);
1070 tdb_trace_start(tdb);
1071 tdb_trace_write(tdb, msg);
1075 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1077 tdb_trace_start(tdb);
1078 tdb_trace_write(tdb, op);
1079 tdb_trace_end_ret(tdb, ret);
1082 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1084 tdb_trace_start(tdb);
1085 tdb_trace_write(tdb, op);
1086 tdb_trace_write(tdb, " =");
1087 tdb_trace_record(tdb, ret);
1091 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1094 tdb_trace_start(tdb);
1095 tdb_trace_write(tdb, op);
1096 tdb_trace_record(tdb, rec);
1100 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1101 TDB_DATA rec, int ret)
1103 tdb_trace_start(tdb);
1104 tdb_trace_write(tdb, op);
1105 tdb_trace_record(tdb, rec);
1106 tdb_trace_end_ret(tdb, ret);
1109 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1110 TDB_DATA rec, TDB_DATA ret)
1112 tdb_trace_start(tdb);
1113 tdb_trace_write(tdb, op);
1114 tdb_trace_record(tdb, rec);
1115 tdb_trace_write(tdb, " =");
1116 tdb_trace_record(tdb, ret);
1120 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1121 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1124 char msg[1 + sizeof(ret) * 4];
1126 snprintf(msg, sizeof(msg), " %#x", flag);
1127 tdb_trace_start(tdb);
1128 tdb_trace_write(tdb, op);
1129 tdb_trace_record(tdb, rec1);
1130 tdb_trace_record(tdb, rec2);
1131 tdb_trace_write(tdb, msg);
1132 tdb_trace_end_ret(tdb, ret);
1135 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1136 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1138 tdb_trace_start(tdb);
1139 tdb_trace_write(tdb, op);
1140 tdb_trace_record(tdb, rec1);
1141 tdb_trace_record(tdb, rec2);
1142 tdb_trace_write(tdb, " =");
1143 tdb_trace_record(tdb, ret);