goto free;
break;
case TDB_RECOVERY_MAGIC:
- case 0: /* Used for invalid (or in-progress) recovery area. */
+ case TDB_RECOVERY_INVALID_MAGIC:
if (recovery_start != off) {
TDB_LOG((tdb, TDB_DEBUG_ERROR,
"Unexpected recovery record at offset %d\n",
Copyright (C) Andrew Tridgell 1999-2005
Copyright (C) Paul `Rusty' Russell 2000
Copyright (C) Jeremy Allison 2000-2003
-
+
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
Copyright (C) Andrew Tridgell 1999-2005
Copyright (C) Paul `Rusty' Russell 2000
Copyright (C) Jeremy Allison 2000-2003
-
+
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
Copyright (C) Andrew Tridgell 1999-2005
Copyright (C) Paul `Rusty' Russell 2000
Copyright (C) Jeremy Allison 2000-2003
-
+
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
}
/* Add an element into the freelist. Merge adjacent records if
- neccessary. */
+ necessary. */
int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
{
/* Allocation and tailer lock */
tdb_off_t left = offset - sizeof(tdb_off_t);
struct tdb_record l;
tdb_off_t leftsize;
-
+
/* Read in tailer and jump back to header */
if (tdb_ofs_read(tdb, left, &leftsize) == -1) {
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left offset read failed at %u\n", left));
bestfit.rec_len < length * multiplier) {
break;
}
-
+
/* this multiplier means we only extremely rarely
search more than 50 or so records. At 50 records we
accept records up to 11 times larger than what we
Copyright (C) Andrew Tridgell 1999-2005
Copyright (C) Paul `Rusty' Russell 2000
Copyright (C) Jeremy Allison 2000-2003
-
+
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
tdb_next_hash_chain,
tdb_oob,
tdb_expand_file,
- tdb_brlock
};
/*
Copyright (C) Andrew Tridgell 1999-2005
Copyright (C) Paul `Rusty' Russell 2000
Copyright (C) Jeremy Allison 2000-2003
-
+
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
#include "tdb_private.h"
-#define TDB_MARK_LOCK 0x80000000
-
void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *ptr)
{
tdb->interrupt_sig_ptr = ptr;
}
+static int fcntl_lock(struct tdb_context *tdb,
+ int rw, off_t off, off_t len, bool waitflag)
+{
+ struct flock fl;
+
+ fl.l_type = rw;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = off;
+ fl.l_len = len;
+ fl.l_pid = 0;
+
+ if (waitflag)
+ return fcntl(tdb->fd, F_SETLKW, &fl);
+ else
+ return fcntl(tdb->fd, F_SETLK, &fl);
+}
+
+static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
+{
+ struct flock fl;
+#if 0 /* Check they matched up locks and unlocks correctly. */
+ char line[80];
+ FILE *locks;
+ bool found = false;
+
+ locks = fopen("/proc/locks", "r");
+
+ while (fgets(line, 80, locks)) {
+ char *p;
+ int type, start, l;
+
+ /* eg. 1: FLOCK ADVISORY WRITE 2440 08:01:2180826 0 EOF */
+ p = strchr(line, ':') + 1;
+ if (strncmp(p, " POSIX ADVISORY ", strlen(" POSIX ADVISORY ")))
+ continue;
+ p += strlen(" FLOCK ADVISORY ");
+ if (strncmp(p, "READ ", strlen("READ ")) == 0)
+ type = F_RDLCK;
+ else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
+ type = F_WRLCK;
+ else
+ abort();
+ p += 6;
+ if (atoi(p) != getpid())
+ continue;
+ p = strchr(strchr(p, ' ') + 1, ' ') + 1;
+ start = atoi(p);
+ p = strchr(p, ' ') + 1;
+ if (strncmp(p, "EOF", 3) == 0)
+ l = 0;
+ else
+ l = atoi(p) - start + 1;
+
+ if (off == start) {
+ if (len != l) {
+ fprintf(stderr, "Len %u should be %u: %s",
+ (int)len, l, line);
+ abort();
+ }
+ if (type != rw) {
+ fprintf(stderr, "Type %s wrong: %s",
+ rw == F_RDLCK ? "READ" : "WRITE", line);
+ abort();
+ }
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr, "Unlock on %u@%u not found!\n",
+ (int)off, (int)len);
+ abort();
+ }
+
+ fclose(locks);
+#endif
+
+ fl.l_type = F_UNLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = off;
+ fl.l_len = len;
+ fl.l_pid = 0;
+
+ return fcntl(tdb->fd, F_SETLKW, &fl);
+}
+
+/* list -1 is the alloc list, otherwise a hash chain. */
+static tdb_off_t lock_offset(int list)
+{
+ return FREELIST_TOP + 4*list;
+}
+
/* a byte range locking function - return 0 on success
this functions locks/unlocks 1 byte at the specified offset.
note that a len of zero means lock to end of file
*/
-int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset,
- int rw_type, int lck_type, int probe, size_t len)
+int tdb_brlock(struct tdb_context *tdb,
+ int rw_type, tdb_off_t offset, size_t len,
+ enum tdb_lock_flags flags)
{
- struct flock fl;
int ret;
if (tdb->flags & TDB_NOLOCK) {
return 0;
}
+ if (flags & TDB_LOCK_MARK_ONLY) {
+ return 0;
+ }
+
if ((rw_type == F_WRLCK) && (tdb->read_only || tdb->traverse_read)) {
tdb->ecode = TDB_ERR_RDONLY;
return -1;
}
- fl.l_type = rw_type;
- fl.l_whence = SEEK_SET;
- fl.l_start = offset;
- fl.l_len = len;
- fl.l_pid = 0;
+ /* Sanity check */
+ if (tdb->transaction && offset >= lock_offset(-1) && len != 0) {
+ tdb->ecode = TDB_ERR_RDONLY;
+ TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_brlock attempted in transaction at offset %d rw_type=%d flags=%d len=%d\n",
+ offset, rw_type, flags, (int)len));
+ return -1;
+ }
do {
- ret = fcntl(tdb->fd,lck_type,&fl);
-
+ ret = fcntl_lock(tdb, rw_type, offset, len,
+ flags & TDB_LOCK_WAIT);
/* Check for a sigalarm break. */
if (ret == -1 && errno == EINTR &&
tdb->interrupt_sig_ptr &&
/* Generic lock error. errno set by fcntl.
* EAGAIN is an expected return from non-blocking
* locks. */
- if (!probe && lck_type != F_SETLK) {
- TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d len=%d\n",
- tdb->fd, offset, rw_type, lck_type, (int)len));
+ if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) {
+ TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d flags=%d len=%d\n",
+ tdb->fd, offset, rw_type, flags, (int)len));
}
return -1;
}
return 0;
}
+int tdb_brunlock(struct tdb_context *tdb,
+ int rw_type, tdb_off_t offset, size_t len)
+{
+ int ret;
+
+ if (tdb->flags & TDB_NOLOCK) {
+ return 0;
+ }
+
+ do {
+ ret = fcntl_unlock(tdb, rw_type, offset, len);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brunlock failed (fd=%d) at offset %d rw_type=%d len=%d\n",
+ tdb->fd, offset, rw_type, (int)len));
+ }
+ return ret;
+}
/*
upgrade a read lock to a write lock. This needs to be handled in a
deadlock detection and claim a deadlock when progress can be
made. For those OSes we may loop for a while.
*/
-int tdb_brlock_upgrade(struct tdb_context *tdb, tdb_off_t offset, size_t len)
+int tdb_allrecord_upgrade(struct tdb_context *tdb)
{
int count = 1000;
+
+ if (tdb->allrecord_lock.count != 1) {
+ TDB_LOG((tdb, TDB_DEBUG_ERROR,
+ "tdb_allrecord_upgrade failed: count %u too high\n",
+ tdb->allrecord_lock.count));
+ return -1;
+ }
+
+ if (tdb->allrecord_lock.off != 1) {
+ TDB_LOG((tdb, TDB_DEBUG_ERROR,
+ "tdb_allrecord_upgrade failed: already upgraded?\n"));
+ return -1;
+ }
+
while (count--) {
struct timeval tv;
- if (tdb_brlock(tdb, offset, F_WRLCK, F_SETLKW, 1, len) == 0) {
+ if (tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0,
+ TDB_LOCK_WAIT|TDB_LOCK_PROBE) == 0) {
+ tdb->allrecord_lock.ltype = F_WRLCK;
+ tdb->allrecord_lock.off = 0;
return 0;
}
if (errno != EDEADLK) {
tv.tv_usec = 1;
select(0, NULL, NULL, NULL, &tv);
}
- TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock_upgrade failed at offset %d\n", offset));
+ TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_allrecord_upgrade failed\n"));
return -1;
}
-
-/* lock a list in the database. list -1 is the alloc list */
-static int _tdb_lock(struct tdb_context *tdb, int list, int ltype, int op)
+static struct tdb_lock_type *find_nestlock(struct tdb_context *tdb,
+ tdb_off_t offset)
{
- struct tdb_lock_type *new_lck;
- int i;
- bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
+ unsigned int i;
- ltype &= ~TDB_MARK_LOCK;
-
- /* a global lock allows us to avoid per chain locks */
- if (tdb->global_lock.count &&
- (ltype == tdb->global_lock.ltype || ltype == F_RDLCK)) {
- return 0;
+ for (i=0; i<tdb->num_lockrecs; i++) {
+ if (tdb->lockrecs[i].off == offset) {
+ return &tdb->lockrecs[i];
+ }
}
+ return NULL;
+}
- if (tdb->global_lock.count) {
- tdb->ecode = TDB_ERR_LOCK;
- return -1;
- }
+/* lock an offset in the database. */
+int tdb_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype,
+ enum tdb_lock_flags flags)
+{
+ struct tdb_lock_type *new_lck;
- if (list < -1 || list >= (int)tdb->header.hash_size) {
+ if (offset >= lock_offset(tdb->header.hash_size)) {
tdb->ecode = TDB_ERR_LOCK;
- TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_lock: invalid list %d for ltype=%d\n",
- list, ltype));
+ TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_lock: invalid offset %u for ltype=%d\n",
+ offset, ltype));
return -1;
}
if (tdb->flags & TDB_NOLOCK)
return 0;
- for (i=0; i<tdb->num_lockrecs; i++) {
- if (tdb->lockrecs[i].list == list) {
- if (tdb->lockrecs[i].count == 0) {
- /*
- * Can't happen, see tdb_unlock(). It should
- * be an assert.
- */
- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lock: "
- "lck->count == 0 for list %d", list));
- }
- /*
- * Just increment the in-memory struct, posix locks
- * don't stack.
- */
- tdb->lockrecs[i].count++;
- return 0;
- }
+ new_lck = find_nestlock(tdb, offset);
+ if (new_lck) {
+ /*
+ * Just increment the in-memory struct, posix locks
+ * don't stack.
+ */
+ new_lck->count++;
+ return 0;
}
new_lck = (struct tdb_lock_type *)realloc(
/* Since fcntl locks don't nest, we do a lock for the first one,
and simply bump the count for future ones */
- if (!mark_lock &&
- tdb->methods->tdb_brlock(tdb,FREELIST_TOP+4*list, ltype, op,
- 0, 1)) {
+ if (tdb_brlock(tdb, ltype, offset, 1, flags)) {
return -1;
}
- tdb->num_locks++;
-
- tdb->lockrecs[tdb->num_lockrecs].list = list;
+ tdb->lockrecs[tdb->num_lockrecs].off = offset;
tdb->lockrecs[tdb->num_lockrecs].count = 1;
tdb->lockrecs[tdb->num_lockrecs].ltype = ltype;
- tdb->num_lockrecs += 1;
+ tdb->num_lockrecs++;
return 0;
}
+static int tdb_lock_and_recover(struct tdb_context *tdb)
+{
+ int ret;
+
+ /* We need to match locking order in transaction commit. */
+ if (tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, TDB_LOCK_WAIT)) {
+ return -1;
+ }
+
+ if (tdb_brlock(tdb, F_WRLCK, OPEN_LOCK, 1, TDB_LOCK_WAIT)) {
+ tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0);
+ return -1;
+ }
+
+ ret = tdb_transaction_recover(tdb);
+
+ tdb_brunlock(tdb, F_WRLCK, OPEN_LOCK, 1);
+ tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0);
+
+ return ret;
+}
+
+static bool have_data_locks(const struct tdb_context *tdb)
+{
+ unsigned int i;
+
+ for (i = 0; i < tdb->num_lockrecs; i++) {
+ if (tdb->lockrecs[i].off >= lock_offset(-1))
+ return true;
+ }
+ return false;
+}
+
+static int tdb_lock_list(struct tdb_context *tdb, int list, int ltype,
+ enum tdb_lock_flags waitflag)
+{
+ int ret;
+ bool check = false;
+
+ /* a allrecord lock allows us to avoid per chain locks */
+ if (tdb->allrecord_lock.count &&
+ (ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) {
+ return 0;
+ }
+
+ if (tdb->allrecord_lock.count) {
+ tdb->ecode = TDB_ERR_LOCK;
+ ret = -1;
+ } else {
+ /* Only check when we grab first data lock. */
+ check = !have_data_locks(tdb);
+ ret = tdb_nest_lock(tdb, lock_offset(list), ltype, waitflag);
+
+ if (ret == 0 && check && tdb_needs_recovery(tdb)) {
+ tdb_nest_unlock(tdb, lock_offset(list), ltype, false);
+
+ if (tdb_lock_and_recover(tdb) == -1) {
+ return -1;
+ }
+ return tdb_lock_list(tdb, list, ltype, waitflag);
+ }
+ }
+ return ret;
+}
+
/* lock a list in the database. list -1 is the alloc list */
int tdb_lock(struct tdb_context *tdb, int list, int ltype)
{
int ret;
- ret = _tdb_lock(tdb, list, ltype, F_SETLKW);
+
+ ret = tdb_lock_list(tdb, list, ltype, TDB_LOCK_WAIT);
if (ret) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lock failed on list %d "
"ltype=%d (%s)\n", list, ltype, strerror(errno)));
/* lock a list in the database. list -1 is the alloc list. non-blocking lock */
int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype)
{
- return _tdb_lock(tdb, list, ltype, F_SETLK);
+ return tdb_lock_list(tdb, list, ltype, TDB_LOCK_NOWAIT);
}
-/* unlock the database: returns void because it's too late for errors. */
- /* changed to return int it may be interesting to know there
- has been an error --simo */
-int tdb_unlock(struct tdb_context *tdb, int list, int ltype)
+int tdb_nest_unlock(struct tdb_context *tdb, uint32_t offset, int ltype,
+ bool mark_lock)
{
int ret = -1;
- int i;
- struct tdb_lock_type *lck = NULL;
- bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
-
- ltype &= ~TDB_MARK_LOCK;
-
- /* a global lock allows us to avoid per chain locks */
- if (tdb->global_lock.count &&
- (ltype == tdb->global_lock.ltype || ltype == F_RDLCK)) {
- return 0;
- }
-
- if (tdb->global_lock.count) {
- tdb->ecode = TDB_ERR_LOCK;
- return -1;
- }
+ struct tdb_lock_type *lck;
if (tdb->flags & TDB_NOLOCK)
return 0;
/* Sanity checks */
- if (list < -1 || list >= (int)tdb->header.hash_size) {
- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: list %d invalid (%d)\n", list, tdb->header.hash_size));
+ if (offset >= lock_offset(tdb->header.hash_size)) {
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: offset %u invalid (%d)\n", offset, tdb->header.hash_size));
return ret;
}
- for (i=0; i<tdb->num_lockrecs; i++) {
- if (tdb->lockrecs[i].list == list) {
- lck = &tdb->lockrecs[i];
- break;
- }
- }
-
+ lck = find_nestlock(tdb, offset);
if ((lck == NULL) || (lck->count == 0)) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: count is 0\n"));
return -1;
if (mark_lock) {
ret = 0;
} else {
- ret = tdb->methods->tdb_brlock(tdb, FREELIST_TOP+4*list, F_UNLCK,
- F_SETLKW, 0, 1);
+ ret = tdb_brunlock(tdb, ltype, offset, 1);
}
- tdb->num_locks--;
/*
* Shrink the array by overwriting the element just unlocked with the
* last array element.
*/
-
- if (tdb->num_lockrecs > 1) {
- *lck = tdb->lockrecs[tdb->num_lockrecs-1];
- }
- tdb->num_lockrecs -= 1;
+ *lck = tdb->lockrecs[--tdb->num_lockrecs];
/*
* We don't bother with realloc when the array shrinks, but if we have
return ret;
}
-/*
- get the transaction lock
- */
-int tdb_transaction_lock(struct tdb_context *tdb, int ltype)
+int tdb_unlock(struct tdb_context *tdb, int list, int ltype)
{
- if (tdb->global_lock.count) {
- return 0;
- }
- if (tdb->transaction_lock_count > 0) {
- tdb->transaction_lock_count++;
+ /* a global lock allows us to avoid per chain locks */
+ if (tdb->allrecord_lock.count &&
+ (ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) {
return 0;
}
- if (tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, ltype,
- F_SETLKW, 0, 1) == -1) {
- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_lock: failed to get transaction lock\n"));
+ if (tdb->allrecord_lock.count) {
tdb->ecode = TDB_ERR_LOCK;
return -1;
}
- tdb->transaction_lock_count++;
- return 0;
+
+ return tdb_nest_unlock(tdb, lock_offset(list), ltype, false);
}
/*
- release the transaction lock
+ get the transaction lock
*/
-int tdb_transaction_unlock(struct tdb_context *tdb)
+int tdb_transaction_lock(struct tdb_context *tdb, int ltype,
+ enum tdb_lock_flags lockflags)
{
- int ret;
- if (tdb->global_lock.count) {
- return 0;
- }
- if (tdb->transaction_lock_count > 1) {
- tdb->transaction_lock_count--;
- return 0;
- }
- ret = tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1);
- if (ret == 0) {
- tdb->transaction_lock_count = 0;
- }
- return ret;
+ return tdb_nest_lock(tdb, TRANSACTION_LOCK, ltype, lockflags);
}
-
-
-
-/* lock/unlock entire database */
-static int _tdb_lockall(struct tdb_context *tdb, int ltype, int op)
+/*
+ release the transaction lock
+ */
+int tdb_transaction_unlock(struct tdb_context *tdb, int ltype)
{
- bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
+ return tdb_nest_unlock(tdb, TRANSACTION_LOCK, ltype, false);
+}
- ltype &= ~TDB_MARK_LOCK;
+/* lock/unlock entire database. It can only be upgradable if you have some
+ * other way of guaranteeing exclusivity (ie. transaction write lock). */
+int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
+ enum tdb_lock_flags flags, bool upgradable)
+{
/* There are no locks on read-only dbs */
if (tdb->read_only || tdb->traverse_read) {
tdb->ecode = TDB_ERR_LOCK;
return -1;
}
- if (tdb->global_lock.count && tdb->global_lock.ltype == ltype) {
- tdb->global_lock.count++;
+ if (tdb->allrecord_lock.count && tdb->allrecord_lock.ltype == ltype) {
+ tdb->allrecord_lock.count++;
return 0;
}
- if (tdb->global_lock.count) {
+ if (tdb->allrecord_lock.count) {
/* a global lock of a different type exists */
tdb->ecode = TDB_ERR_LOCK;
return -1;
}
-
- if (tdb->num_locks != 0) {
+
+ if (tdb_have_extra_locks(tdb)) {
/* can't combine global and chain locks */
tdb->ecode = TDB_ERR_LOCK;
return -1;
}
- if (!mark_lock &&
- tdb->methods->tdb_brlock(tdb, FREELIST_TOP, ltype, op,
- 0, 4*tdb->header.hash_size)) {
- if (op == F_SETLKW) {
+ if (upgradable && ltype != F_RDLCK) {
+ /* tdb error: you can't upgrade a write lock! */
+ tdb->ecode = TDB_ERR_LOCK;
+ return -1;
+ }
+
+ if (tdb_brlock(tdb, ltype, FREELIST_TOP, 0, flags)) {
+ if (flags & TDB_LOCK_WAIT) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lockall failed (%s)\n", strerror(errno)));
}
return -1;
}
- tdb->global_lock.count = 1;
- tdb->global_lock.ltype = ltype;
+ tdb->allrecord_lock.count = 1;
+ /* If it's upgradable, it's actually exclusive so we can treat
+ * it as a write lock. */
+ tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
+ tdb->allrecord_lock.off = upgradable;
+
+ if (tdb_needs_recovery(tdb)) {
+ bool mark = flags & TDB_LOCK_MARK_ONLY;
+ tdb_allrecord_unlock(tdb, ltype, mark);
+ if (mark) {
+ tdb->ecode = TDB_ERR_LOCK;
+ TDB_LOG((tdb, TDB_DEBUG_ERROR,
+ "tdb_lockall_mark cannot do recovery\n"));
+ return -1;
+ }
+ if (tdb_lock_and_recover(tdb) == -1) {
+ return -1;
+ }
+ return tdb_allrecord_lock(tdb, ltype, flags, upgradable);
+ }
return 0;
}
/* unlock entire db */
-static int _tdb_unlockall(struct tdb_context *tdb, int ltype)
+int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock)
{
- bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
-
- ltype &= ~TDB_MARK_LOCK;
-
/* There are no locks on read-only dbs */
if (tdb->read_only || tdb->traverse_read) {
tdb->ecode = TDB_ERR_LOCK;
return -1;
}
- if (tdb->global_lock.ltype != ltype || tdb->global_lock.count == 0) {
+ if (tdb->allrecord_lock.count == 0) {
+ tdb->ecode = TDB_ERR_LOCK;
+ return -1;
+ }
+
+ /* Upgradable locks are marked as write locks. */
+ if (tdb->allrecord_lock.ltype != ltype
+ && (!tdb->allrecord_lock.off || ltype != F_RDLCK)) {
tdb->ecode = TDB_ERR_LOCK;
return -1;
}
- if (tdb->global_lock.count > 1) {
- tdb->global_lock.count--;
+ if (tdb->allrecord_lock.count > 1) {
+ tdb->allrecord_lock.count--;
return 0;
}
- if (!mark_lock &&
- tdb->methods->tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW,
- 0, 4*tdb->header.hash_size)) {
+ if (!mark_lock && tdb_brunlock(tdb, ltype, FREELIST_TOP, 0)) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed (%s)\n", strerror(errno)));
return -1;
}
- tdb->global_lock.count = 0;
- tdb->global_lock.ltype = 0;
+ tdb->allrecord_lock.count = 0;
+ tdb->allrecord_lock.ltype = 0;
return 0;
}
int tdb_lockall(struct tdb_context *tdb)
{
tdb_trace(tdb, "tdb_lockall");
- return _tdb_lockall(tdb, F_WRLCK, F_SETLKW);
+ return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false);
}
/* lock entire database with write lock - mark only */
int tdb_lockall_mark(struct tdb_context *tdb)
{
tdb_trace(tdb, "tdb_lockall_mark");
- return _tdb_lockall(tdb, F_WRLCK | TDB_MARK_LOCK, F_SETLKW);
+ return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_MARK_ONLY, false);
}
/* unlock entire database with write lock - unmark only */
int tdb_lockall_unmark(struct tdb_context *tdb)
{
tdb_trace(tdb, "tdb_lockall_unmark");
- return _tdb_unlockall(tdb, F_WRLCK | TDB_MARK_LOCK);
+ return tdb_allrecord_unlock(tdb, F_WRLCK, true);
}
/* lock entire database with write lock - nonblocking varient */
int tdb_lockall_nonblock(struct tdb_context *tdb)
{
- int ret = _tdb_lockall(tdb, F_WRLCK, F_SETLK);
+ int ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_NOWAIT, false);
tdb_trace_ret(tdb, "tdb_lockall_nonblock", ret);
return ret;
}
int tdb_unlockall(struct tdb_context *tdb)
{
tdb_trace(tdb, "tdb_unlockall");
- return _tdb_unlockall(tdb, F_WRLCK);
+ return tdb_allrecord_unlock(tdb, F_WRLCK, false);
}
/* lock entire database with read lock */
int tdb_lockall_read(struct tdb_context *tdb)
{
tdb_trace(tdb, "tdb_lockall_read");
- return _tdb_lockall(tdb, F_RDLCK, F_SETLKW);
+ return tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
}
/* lock entire database with read lock - nonblock varient */
int tdb_lockall_read_nonblock(struct tdb_context *tdb)
{
- int ret = _tdb_lockall(tdb, F_RDLCK, F_SETLK);
+ int ret = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_NOWAIT, false);
tdb_trace_ret(tdb, "tdb_lockall_read_nonblock", ret);
return ret;
}
int tdb_unlockall_read(struct tdb_context *tdb)
{
tdb_trace(tdb, "tdb_unlockall_read");
- return _tdb_unlockall(tdb, F_RDLCK);
+ return tdb_allrecord_unlock(tdb, F_RDLCK, false);
}
/* lock/unlock one hash chain. This is meant to be used to reduce
/* mark a chain as locked without actually locking it. Warning! use with great caution! */
int tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key)
{
- int ret = tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK | TDB_MARK_LOCK);
+ int ret = tdb_nest_lock(tdb, lock_offset(BUCKET(tdb->hash_fn(&key))),
+ F_WRLCK, TDB_LOCK_MARK_ONLY);
tdb_trace_1rec(tdb, "tdb_chainlock_mark", key);
return ret;
}
int tdb_chainlock_unmark(struct tdb_context *tdb, TDB_DATA key)
{
tdb_trace_1rec(tdb, "tdb_chainlock_unmark", key);
- return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK | TDB_MARK_LOCK);
+ return tdb_nest_unlock(tdb, lock_offset(BUCKET(tdb->hash_fn(&key))),
+ F_WRLCK, true);
}
int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
/* record lock stops delete underneath */
int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off)
{
- if (tdb->global_lock.count) {
+ if (tdb->allrecord_lock.count) {
return 0;
}
- return off ? tdb->methods->tdb_brlock(tdb, off, F_RDLCK, F_SETLKW, 0, 1) : 0;
+ return off ? tdb_brlock(tdb, F_RDLCK, off, 1, TDB_LOCK_WAIT) : 0;
}
/*
for (i = &tdb->travlocks; i; i = i->next)
if (i->off == off)
return -1;
- return tdb->methods->tdb_brlock(tdb, off, F_WRLCK, F_SETLK, 1, 1);
+ if (tdb->allrecord_lock.count) {
+ if (tdb->allrecord_lock.ltype == F_WRLCK) {
+ return 0;
+ }
+ return -1;
+ }
+ return tdb_brlock(tdb, F_WRLCK, off, 1, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
}
-/*
- Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
- an error to fail to get the lock here.
-*/
int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off)
{
- return tdb->methods->tdb_brlock(tdb, off, F_UNLCK, F_SETLK, 0, 1);
+ if (tdb->allrecord_lock.count) {
+ return 0;
+ }
+ return tdb_brunlock(tdb, F_WRLCK, off, 1);
}
/* fcntl locks don't stack: avoid unlocking someone else's */
struct tdb_traverse_lock *i;
uint32_t count = 0;
- if (tdb->global_lock.count) {
+ if (tdb->allrecord_lock.count) {
return 0;
}
for (i = &tdb->travlocks; i; i = i->next)
if (i->off == off)
count++;
- return (count == 1 ? tdb->methods->tdb_brlock(tdb, off, F_UNLCK, F_SETLKW, 0, 1) : 0);
+ return (count == 1 ? tdb_brunlock(tdb, F_RDLCK, off, 1) : 0);
+}
+
+bool tdb_have_extra_locks(struct tdb_context *tdb)
+{
+ unsigned int extra = tdb->num_lockrecs;
+
+ /* A transaction holds the lock for all records. */
+ if (!tdb->transaction && tdb->allrecord_lock.count) {
+ return true;
+ }
+
+ /* We always hold the active lock if CLEAR_IF_FIRST. */
+ if (find_nestlock(tdb, ACTIVE_LOCK)) {
+ extra--;
+ }
+
+ /* In a transaction, we expect to hold the transaction lock */
+ if (tdb->transaction && find_nestlock(tdb, TRANSACTION_LOCK)) {
+ extra--;
+ }
+
+ return extra;
+}
+
+/* The transaction code uses this to remove all locks. */
+void tdb_release_transaction_locks(struct tdb_context *tdb)
+{
+ unsigned int i, active = 0;
+
+ if (tdb->allrecord_lock.count != 0) {
+ tdb_brunlock(tdb, tdb->allrecord_lock.ltype, FREELIST_TOP, 0);
+ tdb->allrecord_lock.count = 0;
+ }
+
+ for (i=0;i<tdb->num_lockrecs;i++) {
+ struct tdb_lock_type *lck = &tdb->lockrecs[i];
+
+ /* Don't release the active lock! Copy it to first entry. */
+ if (lck->off == ACTIVE_LOCK) {
+ tdb->lockrecs[active++] = *lck;
+ } else {
+ tdb_brunlock(tdb, lck->ltype, lck->off, 1);
+ }
+ }
+ tdb->num_lockrecs = active;
+ if (tdb->num_lockrecs == 0) {
+ SAFE_FREE(tdb->lockrecs);
+ }
}
Copyright (C) Andrew Tridgell 1999-2005
Copyright (C) Paul `Rusty' Russell 2000
Copyright (C) Jeremy Allison 2000-2003
-
+
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
size -= written;
written = write(tdb->fd, newdb+written, size);
if (written == size) {
- ret = 0;
+ ret = 0;
} else if (written >= 0) {
/* a second incomplete write - we give up.
* guessing the errno... */
ino_t ino)
{
struct tdb_context *i;
-
+
for (i = tdbs; i; i = i->next) {
if (i->device == device && i->inode == ino) {
return 1;
errno = EINVAL;
goto fail;
}
-
+
if (hash_size == 0)
hash_size = DEFAULT_HASH_SIZE;
if ((open_flags & O_ACCMODE) == O_RDONLY) {
fcntl(tdb->fd, F_SETFD, v | FD_CLOEXEC);
/* ensure there is only one process initialising at once */
- if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) {
- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to get global lock on %s: %s\n",
+ if (tdb_nest_lock(tdb, OPEN_LOCK, F_WRLCK, TDB_LOCK_WAIT) == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to get open lock on %s: %s\n",
name, strerror(errno)));
goto fail; /* errno set by tdb_brlock */
}
/* we need to zero database if we are the only one with it open */
if ((tdb_flags & TDB_CLEAR_IF_FIRST) &&
(!tdb->read_only) &&
- (locked = (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK, 0, 1) == 0))) {
+ (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) {
open_flags |= O_CREAT;
if (ftruncate(tdb->fd, 0) == -1) {
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
tdb->inode = st.st_ino;
tdb_mmap(tdb);
if (locked) {
- if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0, 1) == -1) {
+ if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
- "failed to take ACTIVE_LOCK on %s: %s\n",
+ "failed to release ACTIVE_LOCK on %s: %s\n",
name, strerror(errno)));
goto fail;
}
if (tdb_flags & TDB_CLEAR_IF_FIRST) {
/* leave this lock in place to indicate it's in use */
- if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1)
+ if (tdb_nest_lock(tdb, ACTIVE_LOCK, F_RDLCK, TDB_LOCK_WAIT) == -1) {
goto fail;
+ }
}
/* if needed, run recovery */
internal:
/* Internal (memory-only) databases skip all the code above to
* do with disk files, and resume here by releasing their
- * global lock and hooking into the active list. */
- if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1) == -1)
+ * open lock and hooking into the active list. */
+ if (tdb_nest_unlock(tdb, OPEN_LOCK, F_WRLCK, false) == -1) {
goto fail;
+ }
tdb->next = tdbs;
tdbs = tdb;
return tdb;
if (tdb->fd != -1)
if (close(tdb->fd) != 0)
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to close tdb->fd on error!\n"));
+ SAFE_FREE(tdb->lockrecs);
SAFE_FREE(tdb);
errno = save_errno;
return NULL;
struct tdb_context **i;
int ret = 0;
- tdb_trace(tdb, "tdb_close");
if (tdb->transaction) {
- _tdb_transaction_cancel(tdb);
+ tdb_transaction_cancel(tdb);
}
+ tdb_trace(tdb, "tdb_close");
if (tdb->map_ptr) {
if (tdb->flags & TDB_INTERNAL)
return 0; /* Nothing to do. */
}
- if (tdb->num_locks != 0 || tdb->global_lock.count) {
+ if (tdb_have_extra_locks(tdb)) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_reopen: reopen not allowed with locks held\n"));
goto fail;
}
tdb_mmap(tdb);
#endif /* fake pread or pwrite */
- if (active_lock &&
- (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1)) {
+ /* We may still think we hold the active lock. */
+ tdb->num_lockrecs = 0;
+ SAFE_FREE(tdb->lockrecs);
+
+ if (active_lock && tdb_nest_lock(tdb, ACTIVE_LOCK, F_RDLCK, TDB_LOCK_WAIT) == -1) {
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: failed to obtain active lock\n"));
goto fail;
}
Copyright (C) Andrew Tridgell 1999-2005
Copyright (C) Paul `Rusty' Russell 2000
Copyright (C) Jeremy Allison 2000-2003
-
+
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
{
tdb_off_t seqnum=0;
-
+
if (!(tdb->flags & TDB_SEQNUM)) {
return;
}
return;
}
- if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
+ if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
+ TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
return;
}
tdb_increment_seqnum_nonblock(tdb);
- tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
+ tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
}
static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
struct tdb_record *r)
{
tdb_off_t rec_ptr;
-
+
/* read in the hash top */
if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
return 0;
free(data.dptr);
}
}
-
/* must be long enough key, data and tailer */
if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
rec.data_len = dbuf.dsize;
return tdb_rec_write(tdb, rec_ptr, &rec);
}
-
+
return 0;
}
* function. The parsing function is executed under the chain read lock, so it
* should be fast and should not block on other syscalls.
*
- * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
+ * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
*
* For mmapped tdb's that do not have a transaction open it points the parsing
* function directly at the mmap area, it avoids the malloc/memcpy in this
*
* This is interesting for all readers of potentially large data structures in
* the tdb records, ldb indexes being one example.
+ *
+ * Return -1 if the record was not found.
*/
int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
hash = tdb->hash_fn(&key);
if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
+ /* record not found */
tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
tdb->ecode = TDB_ERR_NOEXIST;
- return 0;
+ return -1;
}
tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
{
struct tdb_record rec;
-
+
if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
return 0;
tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
int res = 0;
tdb_off_t rec_ptr;
struct tdb_record rec;
-
+
/* read in the hash top */
if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
return 0;
if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
return -1;
}
-
+
/* read in the hash top */
if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
goto fail;
struct tdb_record *r, tdb_len_t length)
{
tdb_off_t rec_ptr;
-
+
/* read in the hash top */
if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
return 0;
ret = _tdb_store(tdb, key, dbuf, 0, hash);
tdb_trace_2rec_retrec(tdb, "tdb_append", key, new_dbuf, dbuf);
-
+
failed:
tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
SAFE_FREE(dbuf.dptr);
/*
wipe the entire database, deleting all records. This can be done
- very fast by using a global lock. The entire data portion of the
+ very fast by using a allrecord lock. The entire data portion of the
file becomes a single entry in the freelist.
This code carefully steps around the recovery area, leaving it alone
trivial database library - private includes
Copyright (C) Andrew Tridgell 2005
-
+
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
#define TDB_FREE_MAGIC (~TDB_MAGIC)
#define TDB_DEAD_MAGIC (0xFEE1DEAD)
#define TDB_RECOVERY_MAGIC (0xf53bc0e7U)
+#define TDB_RECOVERY_INVALID_MAGIC (0x0)
#define TDB_ALIGNMENT 4
#define DEFAULT_HASH_SIZE 131
#define FREELIST_TOP (sizeof(struct tdb_header))
#endif /* !TDB_TRACE */
/* lock offsets */
-#define GLOBAL_LOCK 0
+#define OPEN_LOCK 0
#define ACTIVE_LOCK 4
#define TRANSACTION_LOCK 8
};
struct tdb_lock_type {
- int list;
+ uint32_t off;
uint32_t count;
uint32_t ltype;
};
int lock_rw;
};
+enum tdb_lock_flags {
+ /* WAIT == F_SETLKW, NOWAIT == F_SETLK */
+ TDB_LOCK_NOWAIT = 0,
+ TDB_LOCK_WAIT = 1,
+ /* If set, don't log an error on failure. */
+ TDB_LOCK_PROBE = 2,
+ /* If set, don't actually lock at all. */
+ TDB_LOCK_MARK_ONLY = 4,
+};
struct tdb_methods {
int (*tdb_read)(struct tdb_context *, tdb_off_t , void *, tdb_len_t , int );
void (*next_hash_chain)(struct tdb_context *, uint32_t *);
int (*tdb_oob)(struct tdb_context *, tdb_off_t , int );
int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t );
- int (*tdb_brlock)(struct tdb_context *, tdb_off_t , int, int, int, size_t);
};
struct tdb_context {
int read_only; /* opened read-only */
int traverse_read; /* read-only traversal */
int traverse_write; /* read-write traversal */
- struct tdb_lock_type global_lock;
+ struct tdb_lock_type allrecord_lock; /* .offset == upgradable */
int num_lockrecs;
struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */
enum TDB_ERROR ecode; /* error code for last tdb error */
struct tdb_logging_context log;
unsigned int (*hash_fn)(TDB_DATA *key);
int open_flags; /* flags used in the open - needed by reopen */
- unsigned int num_locks; /* number of chain locks held */
const struct tdb_methods *methods;
struct tdb_transaction *transaction;
int page_size;
int max_dead_records;
- int transaction_lock_count;
#ifdef TDB_TRACE
int tracefd;
#endif
void tdb_mmap(struct tdb_context *tdb);
int tdb_lock(struct tdb_context *tdb, int list, int ltype);
int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype);
+int tdb_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype,
+ enum tdb_lock_flags flags);
+int tdb_nest_unlock(struct tdb_context *tdb, uint32_t offset, int ltype,
+ bool mark_lock);
int tdb_unlock(struct tdb_context *tdb, int list, int ltype);
-int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset, int rw_type, int lck_type, int probe, size_t len);
-int tdb_transaction_lock(struct tdb_context *tdb, int ltype);
-int tdb_transaction_unlock(struct tdb_context *tdb);
-int tdb_brlock_upgrade(struct tdb_context *tdb, tdb_off_t offset, size_t len);
+int tdb_brlock(struct tdb_context *tdb,
+ int rw_type, tdb_off_t offset, size_t len,
+ enum tdb_lock_flags flags);
+int tdb_brunlock(struct tdb_context *tdb,
+ int rw_type, tdb_off_t offset, size_t len);
+bool tdb_have_extra_locks(struct tdb_context *tdb);
+void tdb_release_transaction_locks(struct tdb_context *tdb);
+int tdb_transaction_lock(struct tdb_context *tdb, int ltype,
+ enum tdb_lock_flags lockflags);
+int tdb_transaction_unlock(struct tdb_context *tdb, int ltype);
+int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
+ enum tdb_lock_flags flags, bool upgradable);
+int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock);
+int tdb_allrecord_upgrade(struct tdb_context *tdb);
int tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off);
int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off);
int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off);
int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off);
-int _tdb_transaction_cancel(struct tdb_context *tdb);
+bool tdb_needs_recovery(struct tdb_context *tdb);
int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec);
int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec);
int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec);
int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off,
struct tdb_record *rec);
-
+int tdb_transaction_recover(struct tdb_context *tdb);
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
- allow for nested calls to tdb_transaction_start(), re-using the
existing transaction record. If the inner transaction is cancelled
then a subsequent commit will fail
-
+
- keep a mirrored copy of the tdb hash chain heads to allow for the
fast hash heads scan on traverse, updating the mirrored copy in
the transaction version of tdb_write
to reduce this to 3 or even 2 with some more work.
- check for a valid recovery record on open of the tdb, while the
- global lock is held. Automatically recover from the transaction
+ open lock is held. Automatically recover from the transaction
recovery area if needed, then continue with the open as
usual. This allows for smooth crash recovery with no administrator
intervention.
bool prepared;
tdb_off_t magic_offset;
- /* set when the GLOBAL_LOCK has been taken */
- bool global_lock_taken;
-
/* old file size before transaction */
tdb_len_t old_map_size;
goto fail;
}
}
-
+
/* now copy it out of this block */
memcpy(buf, tdb->transaction->blocks[blk] + (off % tdb->transaction->block_size), len);
if (cv) {
}
}
}
-
+
/* overwrite part of an existing block */
if (buf == NULL) {
memset(tdb->transaction->blocks[blk] + off, 0, len);
return 0;
}
-/*
- brlock during a transaction - ignore them
-*/
-static int transaction_brlock(struct tdb_context *tdb, tdb_off_t offset,
- int rw_type, int lck_type, int probe, size_t len)
-{
- return 0;
-}
-
static const struct tdb_methods transaction_methods = {
transaction_read,
transaction_write,
transaction_next_hash_chain,
transaction_oob,
transaction_expand_file,
- transaction_brlock
};
start a tdb transaction. No token is returned, as only a single
transaction is allowed to be pending per tdb_context
*/
-int tdb_transaction_start(struct tdb_context *tdb)
+static int _tdb_transaction_start(struct tdb_context *tdb,
+ enum tdb_lock_flags lockflags)
{
/* some sanity checks */
if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) {
return 0;
}
- if (tdb->num_locks != 0 || tdb->global_lock.count) {
+ if (tdb_have_extra_locks(tdb)) {
/* the caller must not have any locks when starting a
transaction as otherwise we'll be screwed by lack
of nested locks in posix */
/* get the transaction write lock. This is a blocking lock. As
discussed with Volker, there are a number of ways we could
make this async, which we will probably do in the future */
- if (tdb_transaction_lock(tdb, F_WRLCK) == -1) {
+ if (tdb_transaction_lock(tdb, F_WRLCK, lockflags) == -1) {
SAFE_FREE(tdb->transaction->blocks);
SAFE_FREE(tdb->transaction);
+ if ((lockflags & TDB_LOCK_WAIT) == 0) {
+ tdb->ecode = TDB_ERR_NOLOCK;
+ }
return -1;
}
-
+
/* get a read lock from the freelist to the end of file. This
is upgraded to a write lock during the commit */
- if (tdb_brlock(tdb, FREELIST_TOP, F_RDLCK, F_SETLKW, 0, 0) == -1) {
+ if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, true) == -1) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to get hash locks\n"));
- tdb->ecode = TDB_ERR_LOCK;
- goto fail;
+ goto fail_allrecord_lock;
}
/* setup a copy of the hash table heads so the hash scan in
/* Trace at the end, so we get sequence number correct. */
tdb_trace(tdb, "tdb_transaction_start");
return 0;
-
+
fail:
- tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0);
- tdb_transaction_unlock(tdb);
+ tdb_allrecord_unlock(tdb, F_RDLCK, false);
+fail_allrecord_lock:
+ tdb_transaction_unlock(tdb, F_WRLCK);
SAFE_FREE(tdb->transaction->blocks);
SAFE_FREE(tdb->transaction->hash_heads);
SAFE_FREE(tdb->transaction);
return -1;
}
+int tdb_transaction_start(struct tdb_context *tdb)
+{
+ return _tdb_transaction_start(tdb, TDB_LOCK_WAIT);
+}
+
+int tdb_transaction_start_nonblock(struct tdb_context *tdb)
+{
+ return _tdb_transaction_start(tdb, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
+}
/*
sync to disk
return 0;
}
- if (fsync(tdb->fd) != 0) {
+ if (fdatasync(tdb->fd) != 0) {
tdb->ecode = TDB_ERR_IO;
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: fsync failed\n"));
return -1;
}
-int _tdb_transaction_cancel(struct tdb_context *tdb)
+static int _tdb_transaction_cancel(struct tdb_context *tdb)
{
int i, ret = 0;
if (tdb->transaction->magic_offset) {
const struct tdb_methods *methods = tdb->transaction->io_methods;
- uint32_t zero = 0;
+ const uint32_t invalid = TDB_RECOVERY_INVALID_MAGIC;
/* remove the recovery marker */
- if (methods->tdb_write(tdb, tdb->transaction->magic_offset, &zero, 4) == -1 ||
+ if (methods->tdb_write(tdb, tdb->transaction->magic_offset, &invalid, 4) == -1 ||
transaction_sync(tdb, tdb->transaction->magic_offset, 4) == -1) {
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_cancel: failed to remove recovery magic\n"));
ret = -1;
}
}
- if (tdb->transaction->global_lock_taken) {
- tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
- tdb->transaction->global_lock_taken = false;
- }
-
- /* remove any global lock created during the transaction */
- if (tdb->global_lock.count != 0) {
- tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 4*tdb->header.hash_size);
- tdb->global_lock.count = 0;
- }
-
- /* remove any locks created during the transaction */
- if (tdb->num_locks != 0) {
- for (i=0;i<tdb->num_lockrecs;i++) {
- tdb_brlock(tdb,FREELIST_TOP+4*tdb->lockrecs[i].list,
- F_UNLCK,F_SETLKW, 0, 1);
- }
- tdb->num_locks = 0;
- tdb->num_lockrecs = 0;
- SAFE_FREE(tdb->lockrecs);
- }
+ /* This also removes the OPEN_LOCK, if we have it. */
+ tdb_release_transaction_locks(tdb);
/* restore the normal io methods */
tdb->methods = tdb->transaction->io_methods;
- tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0);
- tdb_transaction_unlock(tdb);
SAFE_FREE(tdb->transaction->hash_heads);
SAFE_FREE(tdb->transaction);
-
+
return ret;
}
rec.rec_len = 0;
- if (recovery_head != 0 &&
- methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to read recovery record\n"));
- return -1;
+ if (recovery_head != 0) {
+ if (methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to read recovery record\n"));
+ return -1;
+ }
+ /* ignore invalid recovery regions: can happen in crash */
+ if (rec.magic != TDB_RECOVERY_MAGIC &&
+ rec.magic != TDB_RECOVERY_INVALID_MAGIC) {
+ recovery_head = 0;
+ }
}
*recovery_size = tdb_recovery_size(tdb);
rec = (struct tdb_record *)data;
memset(rec, 0, sizeof(*rec));
- rec->magic = 0;
+ rec->magic = TDB_RECOVERY_INVALID_MAGIC;
rec->data_len = recovery_size;
rec->rec_len = recovery_max_size;
rec->key_len = old_map_size;
if (i == tdb->transaction->num_blocks-1) {
length = tdb->transaction->last_block_size;
}
-
+
if (offset >= old_map_size) {
continue;
}
}
methods = tdb->transaction->io_methods;
-
+
/* if there are any locks pending then the caller has not
nested their locks properly, so fail the transaction */
- if (tdb->num_locks || tdb->global_lock.count) {
+ if (tdb_have_extra_locks(tdb)) {
tdb->ecode = TDB_ERR_LOCK;
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: locks pending on commit\n"));
_tdb_transaction_cancel(tdb);
}
/* upgrade the main transaction lock region to a write lock */
- if (tdb_brlock_upgrade(tdb, FREELIST_TOP, 0) == -1) {
+ if (tdb_allrecord_upgrade(tdb) == -1) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: failed to upgrade hash locks\n"));
- tdb->ecode = TDB_ERR_LOCK;
_tdb_transaction_cancel(tdb);
return -1;
}
- /* get the global lock - this prevents new users attaching to the database
+ /* get the open lock - this prevents new users attaching to the database
during the commit */
- if (tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) {
- TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: failed to get global lock\n"));
- tdb->ecode = TDB_ERR_LOCK;
+ if (tdb_nest_lock(tdb, OPEN_LOCK, F_WRLCK, TDB_LOCK_WAIT) == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: failed to get open lock\n"));
_tdb_transaction_cancel(tdb);
return -1;
}
- tdb->transaction->global_lock_taken = true;
-
if (!(tdb->flags & TDB_NOSYNC)) {
/* write the recovery data to the end of the file */
if (transaction_setup_recovery(tdb, &tdb->transaction->magic_offset) == -1) {
methods->tdb_oob(tdb, tdb->map_size + 1, 1);
}
- /* Keep the global lock until the actual commit */
+ /* Keep the open lock until the actual commit */
return 0;
}
if (methods->tdb_write(tdb, offset, tdb->transaction->blocks[i], length) == -1) {
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed during commit\n"));
-
+
/* we've overwritten part of the data and
possibly expanded the file, so we need to
run the crash recovery code */
/*
recover from an aborted transaction. Must be called with exclusive
- database write access already established (including the global
+ database write access already established (including the open
lock to prevent new processes attaching)
*/
int tdb_transaction_recover(struct tdb_context *tdb)
tdb->ecode = TDB_ERR_IO;
return -1;
}
-
- /* reduce the file size to the old size */
- tdb_munmap(tdb);
- if (ftruncate(tdb->fd, recovery_eof) != 0) {
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to reduce to recovery size\n"));
- tdb->ecode = TDB_ERR_IO;
- return -1;
- }
- tdb->map_size = recovery_eof;
- tdb_mmap(tdb);
if (transaction_sync(tdb, 0, recovery_eof) == -1) {
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to sync2 recovery\n"));
/* all done */
return 0;
}
+
+/* Any I/O failures we say "needs recovery". */
+bool tdb_needs_recovery(struct tdb_context *tdb)
+{
+ tdb_off_t recovery_head;
+ struct tdb_record rec;
+
+ /* find the recovery area */
+ if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
+ return true;
+ }
+
+ if (recovery_head == 0) {
+ /* we have never allocated a recovery record */
+ return false;
+ }
+
+ /* read the recovery record */
+ if (tdb->methods->tdb_read(tdb, recovery_head, &rec,
+ sizeof(rec), DOCONV()) == -1) {
+ return true;
+ }
+
+ return (rec.magic == TDB_RECOVERY_MAGIC);
+}
Copyright (C) Andrew Tridgell 1999-2005
Copyright (C) Paul `Rusty' Russell 2000
Copyright (C) Jeremy Allison 2000-2003
-
+
** NOTE! The following LGPL license applies to the tdb
** library. This does NOT imply that all of Samba is released
** under the LGPL
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
common for the use of tdb with ldb, where large
hashes are used. In that case we spend most of our
time in tdb_brlock(), locking empty hash chains.
-
+
To avoid this, we do an unlocked pre-check to see
if the hash chain is empty before starting to look
inside it. If it is empty then we can avoid that
the value we get back, as we read it without a
lock, so instead we get the lock and re-fetch the
value below.
-
+
Notice that not doing this optimisation on the
first hash chain is critical. We must guarantee
that we have done at least one fcntl lock at the
could possibly miss those with this trick, but we
could miss them anyway without this trick, so the
semantics don't change.
-
+
With a non-indexed ldb search this trick gains us a
factor of around 80 in speed on a linux 2.6.x
system (testing using ldbtest).
/* we need to get a read lock on the transaction lock here to
cope with the lock ordering semantics of solaris10 */
- if (tdb_transaction_lock(tdb, F_RDLCK)) {
+ if (tdb_transaction_lock(tdb, F_RDLCK, TDB_LOCK_WAIT)) {
return -1;
}
ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
tdb->traverse_read--;
- tdb_transaction_unlock(tdb);
+ tdb_transaction_unlock(tdb, F_RDLCK);
return ret;
}
return tdb_traverse_read(tdb, fn, private_data);
}
- if (tdb_transaction_lock(tdb, F_WRLCK)) {
+ if (tdb_transaction_lock(tdb, F_WRLCK, TDB_LOCK_WAIT)) {
return -1;
}
ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
tdb->traverse_write--;
- tdb_transaction_unlock(tdb);
+ tdb_transaction_unlock(tdb, F_WRLCK);
return ret;
}
AC_DEFUN([SMB_MODULE_DEFAULT], [echo -n ""])
AC_DEFUN([SMB_LIBRARY_ENABLE], [echo -n ""])
AC_DEFUN([SMB_ENABLE], [echo -n ""])
-AC_INIT(tdb, 1.2.0)
+AC_INIT(tdb, 1.2.2)
AC_CONFIG_SRCDIR([common/tdb.c])
AC_CONFIG_HEADER(include/config.h)
AC_LIBREPLACE_ALL_CHECKS
caller must free the resulting data
+----------------------------------------------------------------------
+int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
+ int (*parser)(TDB_DATA key, TDB_DATA data,
+ void *private_data),
+ void *private_data);
+
+ Hand a record to a parser function without allocating it.
+
+ This function is meant as a fast tdb_fetch alternative for large records
+ that are frequently read. The "key" and "data" arguments point directly
+ into the tdb shared memory, they are not aligned at any boundary.
+
+ WARNING: The parser is called while tdb holds a lock on the record. DO NOT
+ call other tdb routines from within the parser. Also, for good performance
+ you should make the parser fast to allow parallel operations.
+
+ tdb_parse_record returns -1 if the record was not found. If the record was
+ found, the return value of "parser" is passed up to the caller.
+
----------------------------------------------------------------------
int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key);
#include "signal.h"
+/* Samba sets hidden attribute when building libraries: we don't. */
+#ifndef _PUBLIC_
+#define _PUBLIC_
+#endif
+
/* flags to tdb_store() */
#define TDB_REPLACE 1 /* Unused */
#define TDB_INSERT 2 /* Don't overwrite an existing entry */
void *log_private;
};
-struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags,
+_PUBLIC_ struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags,
int open_flags, mode_t mode);
-struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
+_PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
int open_flags, mode_t mode,
const struct tdb_logging_context *log_ctx,
tdb_hash_func hash_fn);
-void tdb_set_max_dead(struct tdb_context *tdb, int max_dead);
-
-int tdb_reopen(struct tdb_context *tdb);
-int tdb_reopen_all(int parent_longlived);
-void tdb_set_logging_function(struct tdb_context *tdb, const struct tdb_logging_context *log_ctx);
-enum TDB_ERROR tdb_error(struct tdb_context *tdb);
-const char *tdb_errorstr(struct tdb_context *tdb);
-TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
-int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
- int (*parser)(TDB_DATA key, TDB_DATA data,
- void *private_data),
- void *private_data);
-int tdb_delete(struct tdb_context *tdb, TDB_DATA key);
-int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag);
-int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf);
-int tdb_close(struct tdb_context *tdb);
-TDB_DATA tdb_firstkey(struct tdb_context *tdb);
-TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA key);
-int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *);
-int tdb_traverse_read(struct tdb_context *tdb, tdb_traverse_func fn, void *);
-int tdb_exists(struct tdb_context *tdb, TDB_DATA key);
-int tdb_lockall(struct tdb_context *tdb);
-int tdb_lockall_nonblock(struct tdb_context *tdb);
-int tdb_unlockall(struct tdb_context *tdb);
-int tdb_lockall_read(struct tdb_context *tdb);
-int tdb_lockall_read_nonblock(struct tdb_context *tdb);
-int tdb_unlockall_read(struct tdb_context *tdb);
-int tdb_lockall_mark(struct tdb_context *tdb);
-int tdb_lockall_unmark(struct tdb_context *tdb);
-const char *tdb_name(struct tdb_context *tdb);
-int tdb_fd(struct tdb_context *tdb);
-tdb_log_func tdb_log_fn(struct tdb_context *tdb);
-void *tdb_get_logging_private(struct tdb_context *tdb);
-int tdb_transaction_start(struct tdb_context *tdb);
-int tdb_transaction_prepare_commit(struct tdb_context *tdb);
-int tdb_transaction_commit(struct tdb_context *tdb);
-int tdb_transaction_cancel(struct tdb_context *tdb);
-int tdb_transaction_recover(struct tdb_context *tdb);
-int tdb_get_seqnum(struct tdb_context *tdb);
-int tdb_hash_size(struct tdb_context *tdb);
-size_t tdb_map_size(struct tdb_context *tdb);
-int tdb_get_flags(struct tdb_context *tdb);
-void tdb_add_flags(struct tdb_context *tdb, unsigned flag);
-void tdb_remove_flags(struct tdb_context *tdb, unsigned flag);
-void tdb_enable_seqnum(struct tdb_context *tdb);
-void tdb_increment_seqnum_nonblock(struct tdb_context *tdb);
-int tdb_check(struct tdb_context *tdb,
+_PUBLIC_ void tdb_set_max_dead(struct tdb_context *tdb, int max_dead);
+
+_PUBLIC_ int tdb_reopen(struct tdb_context *tdb);
+_PUBLIC_ int tdb_reopen_all(int parent_longlived);
+_PUBLIC_ void tdb_set_logging_function(struct tdb_context *tdb, const struct tdb_logging_context *log_ctx);
+_PUBLIC_ enum TDB_ERROR tdb_error(struct tdb_context *tdb);
+_PUBLIC_ const char *tdb_errorstr(struct tdb_context *tdb);
+_PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
+ int (*parser)(TDB_DATA key, TDB_DATA data,
+ void *private_data),
+ void *private_data);
+_PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag);
+_PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf);
+_PUBLIC_ int tdb_close(struct tdb_context *tdb);
+_PUBLIC_ TDB_DATA tdb_firstkey(struct tdb_context *tdb);
+_PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *);
+_PUBLIC_ int tdb_traverse_read(struct tdb_context *tdb, tdb_traverse_func fn, void *);
+_PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_lockall(struct tdb_context *tdb);
+_PUBLIC_ int tdb_lockall_nonblock(struct tdb_context *tdb);
+_PUBLIC_ int tdb_unlockall(struct tdb_context *tdb);
+_PUBLIC_ int tdb_lockall_read(struct tdb_context *tdb);
+_PUBLIC_ int tdb_lockall_read_nonblock(struct tdb_context *tdb);
+_PUBLIC_ int tdb_unlockall_read(struct tdb_context *tdb);
+_PUBLIC_ int tdb_lockall_mark(struct tdb_context *tdb);
+_PUBLIC_ int tdb_lockall_unmark(struct tdb_context *tdb);
+_PUBLIC_ const char *tdb_name(struct tdb_context *tdb);
+_PUBLIC_ int tdb_fd(struct tdb_context *tdb);
+_PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb);
+_PUBLIC_ void *tdb_get_logging_private(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_start(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_start_nonblock(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_prepare_commit(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_commit(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_cancel(struct tdb_context *tdb);
+_PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb);
+_PUBLIC_ int tdb_hash_size(struct tdb_context *tdb);
+_PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb);
+_PUBLIC_ int tdb_get_flags(struct tdb_context *tdb);
+_PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flag);
+_PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flag);
+_PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb);
+_PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb);
+_PUBLIC_ int tdb_check(struct tdb_context *tdb,
int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
void *private_data);
/* Low level locking functions: use with care */
-int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainlock_unmark(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainlock_unmark(struct tdb_context *tdb, TDB_DATA key);
-void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *sigptr);
+_PUBLIC_ void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *sigptr);
/* wipe and repack */
-int tdb_wipe_all(struct tdb_context *tdb);
-int tdb_repack(struct tdb_context *tdb);
+_PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb);
+_PUBLIC_ int tdb_repack(struct tdb_context *tdb);
/* Debug functions. Not used in production. */
-void tdb_dump_all(struct tdb_context *tdb);
-int tdb_printfreelist(struct tdb_context *tdb);
-int tdb_validate_freelist(struct tdb_context *tdb, int *pnum_entries);
-int tdb_freelist_size(struct tdb_context *tdb);
+_PUBLIC_ void tdb_dump_all(struct tdb_context *tdb);
+_PUBLIC_ int tdb_printfreelist(struct tdb_context *tdb);
+_PUBLIC_ int tdb_validate_freelist(struct tdb_context *tdb, int *pnum_entries);
+_PUBLIC_ int tdb_freelist_size(struct tdb_context *tdb);
-extern TDB_DATA tdb_null;
+_PUBLIC_ extern TDB_DATA tdb_null;
#ifdef __cplusplus
}
Py_RETURN_NONE;
}
-static PyObject *obj_transaction_recover(PyTdbObject *self)
-{
- int ret = tdb_transaction_recover(self->ctx);
- PyErr_TDB_ERROR_IS_ERR_RAISE(ret, self->ctx);
- Py_RETURN_NONE;
-}
-
static PyObject *obj_transaction_start(PyTdbObject *self)
{
int ret = tdb_transaction_start(self->ctx);
{ "transaction_commit", (PyCFunction)obj_transaction_commit, METH_NOARGS,
"S.transaction_commit() -> None\n"
"Commit the currently active transaction." },
- { "transaction_recover", (PyCFunction)obj_transaction_recover, METH_NOARGS,
- "S.transaction_recover() -> None\n"
- "Recover the currently active transaction." },
{ "transaction_start", (PyCFunction)obj_transaction_start, METH_NOARGS,
"S.transaction_start() -> None\n"
"Start a new transaction." },
-#!/usr/bin/python
+#!/usr/bin/env python
# Trivial reimplementation of tdbdump in Python
import tdb, sys
-#!/usr/bin/python
+#!/usr/bin/env python
# Some simple tests for the Python bindings for TDB
# Note that this tests the interface of the Python bindings
# It does not test tdb itself.
+++ /dev/null
-#!/bin/bash
-
-if [ "$1" = "" ]; then
- echo "Please provide version string, eg: 1.2.0"
- exit 1
-fi
-
-if [ ! -d "lib/tdb" ]; then
- echo "Run this script from the samba base directory."
- exit 1
-fi
-
-git clean -f -x -d lib/tdb
-git clean -f -x -d lib/replace
-
-curbranch=`git branch |grep "^*" | tr -d "* "`
-
-version=$1
-strver=`echo ${version} | tr "." "-"`
-
-# Checkout the release tag
-git branch -f tdb-release-script-${strver} tdb-${strver}
-if [ ! "$?" = "0" ]; then
- echo "Unable to checkout tdb-${strver} release"
- exit 1
-fi
-
-git checkout tdb-release-script-${strver}
-
-# Test configure agrees with us
-confver=`grep "^AC_INIT" lib/tdb/configure.ac | tr -d "AC_INIT(tdb, " | tr -d ")"`
-if [ ! "$confver" = "$version" ]; then
- echo "Wrong version, requested release for ${version}, found ${confver}"
- exit 1
-fi
-
-# Now build tarball
-cp -a lib/tdb tdb-${version}
-cp -a lib/replace tdb-${version}/libreplace
-pushd tdb-${version}
-./autogen.sh
-popd
-tar cvzf tdb-${version}.tar.gz tdb-${version}
-rm -fr tdb-${version}
-
-#Clean up
-git checkout $curbranch
-git branch -d tdb-release-script-${strver}
--- /dev/null
+#!/bin/bash
+
+LNAME=tdb
+LINCLUDE=include/tdb.h
+
+if [ "$1" = "" ]; then
+ echo "Please provide version string, eg: 1.2.0"
+ exit 1
+fi
+
+if [ ! -d "lib/${LNAME}" ]; then
+ echo "Run this script from the samba base directory."
+ exit 1
+fi
+
+curbranch=`git branch |grep "^*" | tr -d "* "`
+
+version=$1
+strver=`echo ${version} | tr "." "-"`
+
+# Checkout the release tag
+git branch -f ${LNAME}-release-script-${strver} ${LNAME}-${strver}
+if [ ! "$?" = "0" ]; then
+ echo "Unable to checkout ${LNAME}-${strver} release"
+ exit 1
+fi
+
+function cleanquit {
+ #Clean up
+ git checkout $curbranch
+ git branch -d ${LNAME}-release-script-${strver}
+ exit $1
+}
+
+# NOTE: use cleanquit after this point
+git checkout ${LNAME}-release-script-${strver}
+
+# Test configure agrees with us
+confver=`grep "^AC_INIT" lib/${LNAME}/configure.ac | tr -d "AC_INIT(${LNAME}, " | tr -d ")"`
+if [ ! "$confver" = "$version" ]; then
+ echo "Wrong version, requested release for ${version}, found ${confver}"
+ exit 1
+fi
+
+# Check exports and signatures are up to date
+pushd lib/${LNAME}
+./script/abi_checks.sh ${LNAME} ${LINCLUDE}
+abicheck=$?
+popd
+if [ ! "$abicheck" = "0" ]; then
+ echo "ERROR: ABI Checks produced warnings!"
+ cleanquit 1
+fi
+
+git clean -f -x -d lib/${LNAME}
+git clean -f -x -d lib/replace
+
+# Now build tarball
+cp -a lib/${LNAME} ${LNAME}-${version}
+cp -a lib/replace ${LNAME}-${version}/libreplace
+pushd ${LNAME}-${version}
+./autogen.sh
+popd
+tar cvzf ${LNAME}-${version}.tar.gz ${LNAME}-${version}
+rm -fr ${LNAME}-${version}
+
+cleanquit 0
tdb_transaction_cancel;
tdb_transaction_commit;
tdb_transaction_prepare_commit;
- tdb_transaction_recover;
tdb_transaction_start;
+ tdb_transaction_start_nonblock;
tdb_traverse;
tdb_traverse_read;
tdb_unlockall;
int tdb_transaction_prepare_commit (struct tdb_context *);
int tdb_transaction_recover (struct tdb_context *);
int tdb_transaction_start (struct tdb_context *);
+int tdb_transaction_start_nonblock (struct tdb_context *);
int tdb_traverse_read (struct tdb_context *, tdb_traverse_func, void *);
int tdb_traverse (struct tdb_context *, tdb_traverse_func, void *);
int tdb_unlockall_read (struct tdb_context *);
static int in_transaction;
static int error_count;
static int always_transaction = 0;
+static int hash_size = 2;
+static int loopnum;
+static int count_pipe;
+static struct tdb_logging_context log_ctx;
#ifdef PRINTF_ATTRIBUTE
static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) PRINTF_ATTRIBUTE(3,4);
va_end(ap);
fflush(stdout);
#if 0
- {
+ if (level != TDB_DEBUG_TRACE) {
char *ptr;
+ signal(SIGUSR1, SIG_IGN);
asprintf(&ptr,"xterm -e gdb /proc/%d/exe %d", getpid(), getpid());
system(ptr);
free(ptr);
static void usage(void)
{
- printf("Usage: tdbtorture [-t] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n");
+ printf("Usage: tdbtorture [-t] [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n");
exit(0);
}
- int main(int argc, char * const *argv)
+static void send_count_and_suicide(int sig)
+{
+ /* This ensures our successor can continue where we left off. */
+ write(count_pipe, &loopnum, sizeof(loopnum));
+ /* This gives a unique signature. */
+ kill(getpid(), SIGUSR2);
+}
+
+static int run_child(int i, int seed, unsigned num_loops, unsigned start)
+{
+ db = tdb_open_ex("torture.tdb", hash_size, TDB_DEFAULT,
+ O_RDWR | O_CREAT, 0600, &log_ctx, NULL);
+ if (!db) {
+ fatal("db open failed");
+ }
+
+ srand(seed + i);
+ srandom(seed + i);
+
+ /* Set global, then we're ready to handle being killed. */
+ loopnum = start;
+ signal(SIGUSR1, send_count_and_suicide);
+
+ for (;loopnum<num_loops && error_count == 0;loopnum++) {
+ addrec_db();
+ }
+
+ if (error_count == 0) {
+ tdb_traverse_read(db, NULL, NULL);
+ if (always_transaction) {
+ while (in_transaction) {
+ tdb_transaction_cancel(db);
+ in_transaction--;
+ }
+ if (tdb_transaction_start(db) != 0)
+ fatal("tdb_transaction_start failed");
+ }
+ tdb_traverse(db, traverse_fn, NULL);
+ tdb_traverse(db, traverse_fn, NULL);
+ if (always_transaction) {
+ if (tdb_transaction_commit(db) != 0)
+ fatal("tdb_transaction_commit failed");
+ }
+ }
+
+ tdb_close(db);
+
+ return (error_count < 100 ? error_count : 100);
+}
+
+int main(int argc, char * const *argv)
{
int i, seed = -1;
- int num_procs = 3;
int num_loops = 5000;
- int hash_size = 2;
- int c;
+ int num_procs = 3;
+ int c, pfds[2];
extern char *optarg;
pid_t *pids;
+ int kill_random = 0;
+ int *done;
- struct tdb_logging_context log_ctx;
log_ctx.log_fn = tdb_log;
- while ((c = getopt(argc, argv, "n:l:s:H:th")) != -1) {
+ while ((c = getopt(argc, argv, "n:l:s:H:thk")) != -1) {
switch (c) {
case 'n':
num_procs = strtol(optarg, NULL, 0);
case 't':
always_transaction = 1;
break;
+ case 'k':
+ kill_random = 1;
+ break;
default:
usage();
}
unlink("torture.tdb");
- pids = (pid_t *)calloc(sizeof(pid_t), num_procs);
- pids[0] = getpid();
-
- for (i=0;i<num_procs-1;i++) {
- if ((pids[i+1]=fork()) == 0) break;
- }
-
- db = tdb_open_ex("torture.tdb", hash_size, TDB_CLEAR_IF_FIRST,
- O_RDWR | O_CREAT, 0600, &log_ctx, NULL);
- if (!db) {
- fatal("db open failed");
- }
-
if (seed == -1) {
seed = (getpid() + time(NULL)) & 0x7FFFFFFF;
}
- if (i == 0) {
- printf("testing with %d processes, %d loops, %d hash_size, seed=%d%s\n",
- num_procs, num_loops, hash_size, seed, always_transaction ? " (all within transactions)" : "");
+ if (num_procs == 1 && !kill_random) {
+ /* Don't fork for this case, makes debugging easier. */
+ error_count = run_child(0, seed, num_loops, 0);
+ goto done;
}
- srand(seed + i);
- srandom(seed + i);
+ pids = (pid_t *)calloc(sizeof(pid_t), num_procs);
+ done = (int *)calloc(sizeof(int), num_procs);
- for (i=0;i<num_loops && error_count == 0;i++) {
- addrec_db();
+ if (pipe(pfds) != 0) {
+ perror("Creating pipe");
+ exit(1);
}
-
- if (error_count == 0) {
- tdb_traverse_read(db, NULL, NULL);
- if (always_transaction) {
- while (in_transaction) {
- tdb_transaction_cancel(db);
- in_transaction--;
+ count_pipe = pfds[1];
+
+ for (i=0;i<num_procs;i++) {
+ if ((pids[i]=fork()) == 0) {
+ close(pfds[0]);
+ if (i == 0) {
+ printf("Testing with %d processes, %d loops, %d hash_size, seed=%d%s\n",
+ num_procs, num_loops, hash_size, seed, always_transaction ? " (all within transactions)" : "");
}
- if (tdb_transaction_start(db) != 0)
- fatal("tdb_transaction_start failed");
- }
- tdb_traverse(db, traverse_fn, NULL);
- tdb_traverse(db, traverse_fn, NULL);
- if (always_transaction) {
- if (tdb_transaction_commit(db) != 0)
- fatal("tdb_transaction_commit failed");
+ exit(run_child(i, seed, num_loops, 0));
}
}
- tdb_close(db);
-
- if (getpid() != pids[0]) {
- return error_count;
- }
-
- for (i=1;i<num_procs;i++) {
+ while (num_procs) {
int status, j;
pid_t pid;
+
if (error_count != 0) {
/* try and stop the test on any failure */
- for (j=1;j<num_procs;j++) {
+ for (j=0;j<num_procs;j++) {
if (pids[j] != 0) {
kill(pids[j], SIGTERM);
}
}
}
- pid = waitpid(-1, &status, 0);
+
+ pid = waitpid(-1, &status, kill_random ? WNOHANG : 0);
+ if (pid == 0) {
+ struct timeval tv;
+
+ /* Sleep for 1/10 second. */
+ tv.tv_sec = 0;
+ tv.tv_usec = 100000;
+ select(0, NULL, NULL, NULL, &tv);
+
+ /* Kill someone. */
+ kill(pids[random() % num_procs], SIGUSR1);
+ continue;
+ }
+
if (pid == -1) {
perror("failed to wait for child\n");
exit(1);
}
- for (j=1;j<num_procs;j++) {
+
+ for (j=0;j<num_procs;j++) {
if (pids[j] == pid) break;
}
if (j == num_procs) {
printf("unknown child %d exited!?\n", (int)pid);
exit(1);
}
- if (WEXITSTATUS(status) != 0) {
- printf("child %d exited with status %d\n",
- (int)pid, WEXITSTATUS(status));
+ if (WIFSIGNALED(status)) {
+ if (WTERMSIG(status) == SIGUSR2
+ || WTERMSIG(status) == SIGUSR1) {
+ /* SIGUSR2 means they wrote to pipe. */
+ if (WTERMSIG(status) == SIGUSR2) {
+ read(pfds[0], &done[j],
+ sizeof(done[j]));
+ }
+ pids[j] = fork();
+ if (pids[j] == 0)
+ exit(run_child(j, seed, num_loops,
+ done[j]));
+ printf("Restarting child %i for %u-%u\n",
+ j, done[j], num_loops);
+ continue;
+ }
+ printf("child %d exited with signal %d\n",
+ (int)pid, WTERMSIG(status));
error_count++;
+ } else {
+ if (WEXITSTATUS(status) != 0) {
+ printf("child %d exited with status %d\n",
+ (int)pid, WEXITSTATUS(status));
+ error_count++;
+ }
}
- pids[j] = 0;
+ memmove(&pids[j], &pids[j+1],
+ (num_procs - j - 1)*sizeof(pids[0]));
+ num_procs--;
}
free(pids);
+done:
if (error_count == 0) {
+ db = tdb_open_ex("torture.tdb", hash_size, TDB_DEFAULT,
+ O_RDWR, 0, &log_ctx, NULL);
+ if (!db) {
+ fatal("db open failed");
+ }
+ if (tdb_check(db, NULL, NULL) == -1) {
+ printf("db check failed");
+ exit(1);
+ }
+ tdb_close(db);
printf("OK\n");
}