2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 _PUBLIC_ TDB_DATA tdb_null;
33 non-blocking increment of the tdb sequence number if the tdb has been opened using
36 _PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
40 if (!(tdb->flags & TDB_SEQNUM)) {
44 /* we ignore errors from this, as we have no sane way of
47 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
49 tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
53 increment the tdb sequence number if the tdb has been opened using
56 static void tdb_increment_seqnum(struct tdb_context *tdb)
58 if (!(tdb->flags & TDB_SEQNUM)) {
62 if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
63 TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
67 tdb_increment_seqnum_nonblock(tdb);
69 tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
72 static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
74 return memcmp(data.dptr, key.dptr, data.dsize);
77 /* Returns 0 on fail. On success, return offset of record, and fills
79 static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
84 /* read in the hash top */
85 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
88 /* keep looking until we find the right record */
90 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
93 if (!TDB_DEAD(r) && hash==r->full_hash
94 && key.dsize==r->key_len
95 && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
96 r->key_len, tdb_key_compare,
100 /* detect tight infinite loop */
101 if (rec_ptr == r->next) {
102 tdb->ecode = TDB_ERR_CORRUPT;
103 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_find: loop detected.\n"));
108 tdb->ecode = TDB_ERR_NOEXIST;
112 /* As tdb_find, but if you succeed, keep the lock */
113 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
114 struct tdb_record *rec)
118 if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
120 if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
121 tdb_unlock(tdb, BUCKET(hash), locktype);
125 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
127 /* update an entry in place - this only works if the new data size
128 is <= the old data size and the key exists.
129 on failure return -1.
131 static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf)
133 struct tdb_record rec;
137 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
140 /* it could be an exact duplicate of what is there - this is
141 * surprisingly common (eg. with a ldb re-index). */
142 if (rec.key_len == key.dsize &&
143 rec.data_len == dbuf.dsize &&
144 rec.full_hash == hash) {
145 TDB_DATA data = _tdb_fetch(tdb, key);
146 if (data.dsize == dbuf.dsize &&
147 memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
158 /* must be long enough key, data and tailer */
159 if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
160 tdb->ecode = TDB_SUCCESS; /* Not really an error */
164 if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
165 dbuf.dptr, dbuf.dsize) == -1)
168 if (dbuf.dsize != rec.data_len) {
170 rec.data_len = dbuf.dsize;
171 return tdb_rec_write(tdb, rec_ptr, &rec);
177 /* find an entry in the database given a key */
178 /* If an entry doesn't exist tdb_err will be set to
179 * TDB_ERR_NOEXIST. If a key has no data attached
180 * then the TDB_DATA will have zero length but
183 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
186 struct tdb_record rec;
190 /* find which hash bucket it is in */
191 hash = tdb->hash_fn(&key);
192 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
195 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
197 ret.dsize = rec.data_len;
198 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
202 _PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
204 TDB_DATA ret = _tdb_fetch(tdb, key);
206 tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret);
211 * Find an entry in the database and hand the record's data to a parsing
212 * function. The parsing function is executed under the chain read lock, so it
213 * should be fast and should not block on other syscalls.
215 * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
217 * For mmapped tdb's that do not have a transaction open it points the parsing
218 * function directly at the mmap area, it avoids the malloc/memcpy in this
219 * case. If a transaction is open or no mmap is available, it has to do
220 * malloc/read/parse/free.
222 * This is interesting for all readers of potentially large data structures in
223 * the tdb records, ldb indexes being one example.
225 * Return -1 if the record was not found.
228 _PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
229 int (*parser)(TDB_DATA key, TDB_DATA data,
234 struct tdb_record rec;
238 /* find which hash bucket it is in */
239 hash = tdb->hash_fn(&key);
241 if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
242 /* record not found */
243 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
244 tdb->ecode = TDB_ERR_NOEXIST;
247 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
249 ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
250 rec.data_len, parser, private_data);
252 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
257 /* check if an entry in the database exists
259 note that 1 is returned if the key is found and 0 is returned if not found
260 this doesn't match the conventions in the rest of this module, but is
263 static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
265 struct tdb_record rec;
267 if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
269 tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
273 _PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
275 uint32_t hash = tdb->hash_fn(&key);
278 ret = tdb_exists_hash(tdb, key, hash);
279 tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret);
283 /* actually delete an entry in the database given the offset */
284 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec)
286 tdb_off_t last_ptr, i;
287 struct tdb_record lastrec;
289 if (tdb->read_only || tdb->traverse_read) return -1;
291 if (((tdb->traverse_write != 0) && (!TDB_DEAD(rec))) ||
292 tdb_write_lock_record(tdb, rec_ptr) == -1) {
293 /* Someone traversing here: mark it as dead */
294 rec->magic = TDB_DEAD_MAGIC;
295 return tdb_rec_write(tdb, rec_ptr, rec);
297 if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
300 /* find previous record in hash chain */
301 if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
303 for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
304 if (tdb_rec_read(tdb, i, &lastrec) == -1)
307 /* unlink it: next ptr is at start of record. */
309 last_ptr = TDB_HASH_TOP(rec->full_hash);
310 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
313 /* recover the space */
314 if (tdb_free(tdb, rec_ptr, rec) == -1)
319 static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
323 struct tdb_record rec;
325 /* read in the hash top */
326 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
330 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
333 if (rec.magic == TDB_DEAD_MAGIC) {
342 * Purge all DEAD records from a hash chain
344 static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
347 struct tdb_record rec;
350 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
354 /* read in the hash top */
355 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
361 if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
367 if (rec.magic == TDB_DEAD_MAGIC
368 && tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
375 tdb_unlock(tdb, -1, F_WRLCK);
379 /* delete an entry in the database given a key */
380 static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
383 struct tdb_record rec;
386 if (tdb->max_dead_records != 0) {
389 * Allow for some dead records per hash chain, mainly for
390 * tdb's with a very high create/delete rate like locking.tdb.
393 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
396 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
398 * Don't let the per-chain freelist grow too large,
399 * delete all existing dead records
401 tdb_purge_dead(tdb, hash);
404 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
405 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
410 * Just mark the record as dead.
412 rec.magic = TDB_DEAD_MAGIC;
413 ret = tdb_rec_write(tdb, rec_ptr, &rec);
416 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
420 ret = tdb_do_delete(tdb, rec_ptr, &rec);
424 tdb_increment_seqnum(tdb);
427 if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
428 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
432 _PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
434 uint32_t hash = tdb->hash_fn(&key);
437 ret = tdb_delete_hash(tdb, key, hash);
438 tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret);
443 * See if we have a dead record around with enough space
445 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
446 struct tdb_record *r, tdb_len_t length)
450 /* read in the hash top */
451 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
454 /* keep looking until we find the right record */
456 if (tdb_rec_read(tdb, rec_ptr, r) == -1)
459 if (TDB_DEAD(r) && r->rec_len >= length) {
461 * First fit for simple coding, TODO: change to best
471 static int _tdb_store(struct tdb_context *tdb, TDB_DATA key,
472 TDB_DATA dbuf, int flag, uint32_t hash)
474 struct tdb_record rec;
478 /* check for it existing, on insert. */
479 if (flag == TDB_INSERT) {
480 if (tdb_exists_hash(tdb, key, hash)) {
481 tdb->ecode = TDB_ERR_EXISTS;
485 /* first try in-place update, on modify or replace. */
486 if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
489 if (tdb->ecode == TDB_ERR_NOEXIST &&
490 flag == TDB_MODIFY) {
491 /* if the record doesn't exist and we are in TDB_MODIFY mode then
492 we should fail the store */
496 /* reset the error code potentially set by the tdb_update() */
497 tdb->ecode = TDB_SUCCESS;
499 /* delete any existing record - if it doesn't exist we don't
500 care. Doing this first reduces fragmentation, and avoids
501 coalescing with `allocated' block before it's updated. */
502 if (flag != TDB_INSERT)
503 tdb_delete_hash(tdb, key, hash);
505 if (tdb->max_dead_records != 0) {
507 * Allow for some dead records per hash chain, look if we can
508 * find one that can hold the new record. We need enough space
509 * for key, data and tailer. If we find one, we don't have to
510 * consult the central freelist.
512 rec_ptr = tdb_find_dead(
514 key.dsize + dbuf.dsize + sizeof(tdb_off_t));
517 rec.key_len = key.dsize;
518 rec.data_len = dbuf.dsize;
519 rec.full_hash = hash;
520 rec.magic = TDB_MAGIC;
521 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
522 || tdb->methods->tdb_write(
523 tdb, rec_ptr + sizeof(rec),
524 key.dptr, key.dsize) == -1
525 || tdb->methods->tdb_write(
526 tdb, rec_ptr + sizeof(rec) + key.dsize,
527 dbuf.dptr, dbuf.dsize) == -1) {
535 * We have to allocate some space from the freelist, so this means we
536 * have to lock it. Use the chance to purge all the DEAD records from
537 * the hash chain under the freelist lock.
540 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
544 if ((tdb->max_dead_records != 0)
545 && (tdb_purge_dead(tdb, hash) == -1)) {
546 tdb_unlock(tdb, -1, F_WRLCK);
550 /* we have to allocate some space */
551 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
553 tdb_unlock(tdb, -1, F_WRLCK);
559 /* Read hash top into next ptr */
560 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
563 rec.key_len = key.dsize;
564 rec.data_len = dbuf.dsize;
565 rec.full_hash = hash;
566 rec.magic = TDB_MAGIC;
568 /* write out and point the top of the hash chain at it */
569 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
570 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec),
571 key.dptr, key.dsize) == -1
572 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec)+key.dsize,
573 dbuf.dptr, dbuf.dsize) == -1
574 || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
575 /* Need to tdb_unallocate() here */
583 tdb_increment_seqnum(tdb);
588 /* store an element in the database, replacing any existing element
591 return 0 on success, -1 on failure
593 _PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
598 if (tdb->read_only || tdb->traverse_read) {
599 tdb->ecode = TDB_ERR_RDONLY;
600 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1);
604 /* find which hash bucket it is in */
605 hash = tdb->hash_fn(&key);
606 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
609 ret = _tdb_store(tdb, key, dbuf, flag, hash);
610 tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret);
611 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
615 /* Append to an entry. Create if not exist. */
616 _PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
622 /* find which hash bucket it is in */
623 hash = tdb->hash_fn(&key);
624 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
627 dbuf = _tdb_fetch(tdb, key);
629 if (dbuf.dptr == NULL) {
630 dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
632 unsigned int new_len = dbuf.dsize + new_dbuf.dsize;
633 unsigned char *new_dptr;
635 /* realloc '0' is special: don't do that. */
638 new_dptr = (unsigned char *)realloc(dbuf.dptr, new_len);
639 if (new_dptr == NULL) {
642 dbuf.dptr = new_dptr;
645 if (dbuf.dptr == NULL) {
646 tdb->ecode = TDB_ERR_OOM;
650 memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
651 dbuf.dsize += new_dbuf.dsize;
653 ret = _tdb_store(tdb, key, dbuf, 0, hash);
654 tdb_trace_2rec_retrec(tdb, "tdb_append", key, new_dbuf, dbuf);
657 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
658 SAFE_FREE(dbuf.dptr);
664 return the name of the current tdb file
665 useful for external logging functions
667 _PUBLIC_ const char *tdb_name(struct tdb_context *tdb)
673 return the underlying file descriptor being used by tdb, or -1
674 useful for external routines that want to check the device/inode
677 _PUBLIC_ int tdb_fd(struct tdb_context *tdb)
683 return the current logging function
684 useful for external tdb routines that wish to log tdb errors
686 _PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb)
688 return tdb->log.log_fn;
693 get the tdb sequence number. Only makes sense if the writers opened
694 with TDB_SEQNUM set. Note that this sequence number will wrap quite
695 quickly, so it should only be used for a 'has something changed'
696 test, not for code that relies on the count of the number of changes
697 made. If you want a counter then use a tdb record.
699 The aim of this sequence number is to allow for a very lightweight
700 test of a possible tdb change.
702 _PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb)
706 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
710 _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb)
712 return tdb->header.hash_size;
715 _PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb)
717 return tdb->map_size;
720 _PUBLIC_ int tdb_get_flags(struct tdb_context *tdb)
725 _PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flags)
727 if ((flags & TDB_ALLOW_NESTING) &&
728 (flags & TDB_DISALLOW_NESTING)) {
729 tdb->ecode = TDB_ERR_NESTING;
730 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: "
731 "allow_nesting and disallow_nesting are not allowed together!"));
735 if (flags & TDB_ALLOW_NESTING) {
736 tdb->flags &= ~TDB_DISALLOW_NESTING;
738 if (flags & TDB_DISALLOW_NESTING) {
739 tdb->flags &= ~TDB_ALLOW_NESTING;
745 _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags)
747 if ((flags & TDB_ALLOW_NESTING) &&
748 (flags & TDB_DISALLOW_NESTING)) {
749 tdb->ecode = TDB_ERR_NESTING;
750 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
751 "allow_nesting and disallow_nesting are not allowed together!"));
755 if (flags & TDB_ALLOW_NESTING) {
756 tdb->flags |= TDB_DISALLOW_NESTING;
758 if (flags & TDB_DISALLOW_NESTING) {
759 tdb->flags |= TDB_ALLOW_NESTING;
762 tdb->flags &= ~flags;
767 enable sequence number handling on an open tdb
769 _PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb)
771 tdb->flags |= TDB_SEQNUM;
776 add a region of the file to the freelist. Length is the size of the region in bytes,
777 which includes the free list header that needs to be added
779 static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length)
781 struct tdb_record rec;
782 if (length <= sizeof(rec)) {
783 /* the region is not worth adding */
786 if (length + offset > tdb->map_size) {
787 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
790 memset(&rec,'\0',sizeof(rec));
791 rec.rec_len = length - sizeof(rec);
792 if (tdb_free(tdb, offset, &rec) == -1) {
793 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n"));
800 wipe the entire database, deleting all records. This can be done
801 very fast by using a allrecord lock. The entire data portion of the
802 file becomes a single entry in the freelist.
804 This code carefully steps around the recovery area, leaving it alone
806 _PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb)
809 tdb_off_t offset = 0;
811 tdb_off_t recovery_head;
812 tdb_len_t recovery_size = 0;
814 if (tdb_lockall(tdb) != 0) {
818 tdb_trace(tdb, "tdb_wipe_all");
820 /* see if the tdb has a recovery area, and remember its size
821 if so. We don't want to lose this as otherwise each
822 tdb_wipe_all() in a transaction will increase the size of
823 the tdb by the size of the recovery area */
824 if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
825 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n"));
829 if (recovery_head != 0) {
830 struct tdb_record rec;
831 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
832 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
835 recovery_size = rec.rec_len + sizeof(rec);
838 /* wipe the hashes */
839 for (i=0;i<tdb->header.hash_size;i++) {
840 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
841 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
846 /* wipe the freelist */
847 if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
848 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n"));
852 /* add all the rest of the file to the freelist, possibly leaving a gap
853 for the recovery area */
854 if (recovery_size == 0) {
855 /* the simple case - the whole file can be used as a freelist */
856 data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size));
857 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
861 /* we need to add two freelist entries - one on either
862 side of the recovery area
864 Note that we cannot shift the recovery area during
865 this operation. Only the transaction.c code may
866 move the recovery area or we risk subtle data
869 data_len = (recovery_head - TDB_DATA_START(tdb->header.hash_size));
870 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
873 /* and the 2nd free list entry after the recovery area - if any */
874 data_len = tdb->map_size - (recovery_head+recovery_size);
875 if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) {
880 tdb_increment_seqnum_nonblock(tdb);
882 if (tdb_unlockall(tdb) != 0) {
883 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
894 struct traverse_state {
896 struct tdb_context *dest_db;
900 traverse function for repacking
902 static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data)
904 struct traverse_state *state = (struct traverse_state *)private_data;
905 if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) {
915 _PUBLIC_ int tdb_repack(struct tdb_context *tdb)
917 struct tdb_context *tmp_db;
918 struct traverse_state state;
920 tdb_trace(tdb, "tdb_repack");
922 if (tdb_transaction_start(tdb) != 0) {
923 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n"));
927 tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0);
928 if (tmp_db == NULL) {
929 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n"));
930 tdb_transaction_cancel(tdb);
935 state.dest_db = tmp_db;
937 if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) {
938 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n"));
939 tdb_transaction_cancel(tdb);
945 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n"));
946 tdb_transaction_cancel(tdb);
951 if (tdb_wipe_all(tdb) != 0) {
952 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n"));
953 tdb_transaction_cancel(tdb);
961 if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) {
962 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n"));
963 tdb_transaction_cancel(tdb);
969 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n"));
970 tdb_transaction_cancel(tdb);
977 if (tdb_transaction_commit(tdb) != 0) {
978 TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n"));
985 /* Even on files, we can get partial writes due to signals. */
986 bool tdb_write_all(int fd, const void *buf, size_t count)
990 ret = write(fd, buf, count);
993 buf = (const char *)buf + ret;
1000 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
1002 if (!tdb_write_alltdb->tracefd, str, strlen(str)) {
1003 close(tdb->tracefd);
1008 static void tdb_trace_start(struct tdb_context *tdb)
1011 char msg[sizeof(tdb_off_t) * 4 + 1];
1013 tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
1014 snprintf(msg, sizeof(msg), "%u ", seqnum);
1015 tdb_trace_write(tdb, msg);
1018 static void tdb_trace_end(struct tdb_context *tdb)
1020 tdb_trace_write(tdb, "\n");
1023 static void tdb_trace_end_ret(struct tdb_context *tdb, int ret)
1025 char msg[sizeof(ret) * 4 + 4];
1026 snprintf(msg, sizeof(msg), " = %i\n", ret);
1027 tdb_trace_write(tdb, msg);
1030 static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec)
1032 char msg[20 + rec.dsize*2], *p;
1035 /* We differentiate zero-length records from non-existent ones. */
1036 if (rec.dptr == NULL) {
1037 tdb_trace_write(tdb, " NULL");
1041 /* snprintf here is purely cargo-cult programming. */
1043 p += snprintf(p, sizeof(msg), " %zu:", rec.dsize);
1044 for (i = 0; i < rec.dsize; i++)
1045 p += snprintf(p, 2, "%02x", rec.dptr[i]);
1047 tdb_trace_write(tdb, msg);
1050 void tdb_trace(struct tdb_context *tdb, const char *op)
1052 tdb_trace_start(tdb);
1053 tdb_trace_write(tdb, op);
1057 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op)
1059 char msg[sizeof(tdb_off_t) * 4 + 1];
1061 snprintf(msg, sizeof(msg), "%u ", seqnum);
1062 tdb_trace_write(tdb, msg);
1063 tdb_trace_write(tdb, op);
1067 void tdb_trace_open(struct tdb_context *tdb, const char *op,
1068 unsigned hash_size, unsigned tdb_flags, unsigned open_flags)
1072 snprintf(msg, sizeof(msg),
1073 "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags);
1074 tdb_trace_start(tdb);
1075 tdb_trace_write(tdb, msg);
1079 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret)
1081 tdb_trace_start(tdb);
1082 tdb_trace_write(tdb, op);
1083 tdb_trace_end_ret(tdb, ret);
1086 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret)
1088 tdb_trace_start(tdb);
1089 tdb_trace_write(tdb, op);
1090 tdb_trace_write(tdb, " =");
1091 tdb_trace_record(tdb, ret);
1095 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
1098 tdb_trace_start(tdb);
1099 tdb_trace_write(tdb, op);
1100 tdb_trace_record(tdb, rec);
1104 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
1105 TDB_DATA rec, int ret)
1107 tdb_trace_start(tdb);
1108 tdb_trace_write(tdb, op);
1109 tdb_trace_record(tdb, rec);
1110 tdb_trace_end_ret(tdb, ret);
1113 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
1114 TDB_DATA rec, TDB_DATA ret)
1116 tdb_trace_start(tdb);
1117 tdb_trace_write(tdb, op);
1118 tdb_trace_record(tdb, rec);
1119 tdb_trace_write(tdb, " =");
1120 tdb_trace_record(tdb, ret);
1124 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
1125 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
1128 char msg[1 + sizeof(ret) * 4];
1130 snprintf(msg, sizeof(msg), " %#x", flag);
1131 tdb_trace_start(tdb);
1132 tdb_trace_write(tdb, op);
1133 tdb_trace_record(tdb, rec1);
1134 tdb_trace_record(tdb, rec2);
1135 tdb_trace_write(tdb, msg);
1136 tdb_trace_end_ret(tdb, ret);
1139 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
1140 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret)
1142 tdb_trace_start(tdb);
1143 tdb_trace_write(tdb, op);
1144 tdb_trace_record(tdb, rec1);
1145 tdb_trace_record(tdb, rec2);
1146 tdb_trace_write(tdb, " =");
1147 tdb_trace_record(tdb, ret);