Merge commit 'rusty/tdb-update'

author Ronnie sahlberg <ronniesahlberg@gmail.com>

Thu, 22 Apr 2010 23:25:25 +0000 (09:25 +1000)

committer Ronnie sahlberg <ronniesahlberg@gmail.com>

Thu, 22 Apr 2010 23:25:25 +0000 (09:25 +1000)
author Ronnie sahlberg <ronniesahlberg@gmail.com>
Thu, 22 Apr 2010 23:25:25 +0000 (09:25 +1000)
committer Ronnie sahlberg <ronniesahlberg@gmail.com>
Thu, 22 Apr 2010 23:25:25 +0000 (09:25 +1000)
diff --git a/lib/tdb/common/check.c b/lib/tdb/common/check.c

index f0a15f801b6e51c76cbff860ab007bfad4b0878c..6bbfd7d82aceecc4114395a2d61619e373525f2e 100644 (file)
--- a/lib/tdb/common/check.c
+++ b/lib/tdb/common/check.c
@@ -370,7 +370,7 @@ int tdb_check(struct tdb_context *tdb,
                                 goto free;
                         break;
                 case TDB_RECOVERY_MAGIC:
-               case 0: /* Used for invalid (or in-progress) recovery area. */
+               case TDB_RECOVERY_INVALID_MAGIC:
                         if (recovery_start != off) {
                                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
                                          "Unexpected recovery record at offset %d\n",
diff --git a/lib/tdb/common/dump.c b/lib/tdb/common/dump.c

index bdcbfab139b3133acdae64c4e5dda0ad2c332cb7..9f770f81a52381d13d339bfff1b8701ca862cc80 100644 (file)
--- a/lib/tdb/common/dump.c
+++ b/lib/tdb/common/dump.c
@@ -6,11 +6,11 @@
     Copyright (C) Andrew Tridgell              1999-2005
     Copyright (C) Paul `Rusty' Russell             2000
     Copyright (C) Jeremy Allison                           2000-2003
-   
+
       ** NOTE! The following LGPL license applies to the tdb
       ** library. This does NOT imply that all of Samba is released
       ** under the LGPL
-   
+
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
diff --git a/lib/tdb/common/error.c b/lib/tdb/common/error.c

index 195ab238154df7b34f1631c08c2eaffa5a833e1e..9197918ddeaa048f56c1ebbaa611989add0a6bf4 100644 (file)
--- a/lib/tdb/common/error.c
+++ b/lib/tdb/common/error.c
@@ -6,11 +6,11 @@
     Copyright (C) Andrew Tridgell              1999-2005
     Copyright (C) Paul `Rusty' Russell             2000
     Copyright (C) Jeremy Allison                           2000-2003
-   
+
       ** NOTE! The following LGPL license applies to the tdb
       ** library. This does NOT imply that all of Samba is released
       ** under the LGPL
-   
+
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
diff --git a/lib/tdb/common/freelist.c b/lib/tdb/common/freelist.c

index 8113b54951cf83324a086deb24521a5bb2c572f8..79e3c344b8bb0f93a4dd85d4a42807afc1820f35 100644 (file)
--- a/lib/tdb/common/freelist.c
+++ b/lib/tdb/common/freelist.c
@@ -6,11 +6,11 @@
     Copyright (C) Andrew Tridgell              1999-2005
     Copyright (C) Paul `Rusty' Russell             2000
     Copyright (C) Jeremy Allison                           2000-2003
-   
+
       ** NOTE! The following LGPL license applies to the tdb
       ** library. This does NOT imply that all of Samba is released
       ** under the LGPL
-   
+
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
@@ -98,7 +98,7 @@ static int update_tailer(struct tdb_context *tdb, tdb_off_t offset,
  }
  
  /* Add an element into the freelist. Merge adjacent records if
-   neccessary. */
+   necessary. */
  int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
  {
         /* Allocation and tailer lock */
@@ -143,7 +143,7 @@ left:
                 tdb_off_t left = offset - sizeof(tdb_off_t);
                 struct tdb_record l;
                 tdb_off_t leftsize;
-               
+
                 /* Read in tailer and jump back to header */
                 if (tdb_ofs_read(tdb, left, &leftsize) == -1) {
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left offset read failed at %u\n", left));
@@ -334,7 +334,7 @@ tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_rec
                     bestfit.rec_len < length * multiplier) {
                         break;
                 }
-               
+
                 /* this multiplier means we only extremely rarely
                    search more than 50 or so records. At 50 records we
                    accept records up to 11 times larger than what we
diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c

index d549715f831b414fc098b334a97f6572c74ffc76..058ca6c6b5194691fe872e3faf35fc0a92dffd97 100644 (file)
--- a/lib/tdb/common/io.c
+++ b/lib/tdb/common/io.c
@@ -6,11 +6,11 @@
     Copyright (C) Andrew Tridgell              1999-2005
     Copyright (C) Paul `Rusty' Russell             2000
     Copyright (C) Jeremy Allison                           2000-2003
-   
+
       ** NOTE! The following LGPL license applies to the tdb
       ** library. This does NOT imply that all of Samba is released
       ** under the LGPL
-   
+
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
@@ -461,7 +461,6 @@ static const struct tdb_methods io_methods = {
         tdb_next_hash_chain,
         tdb_oob,
         tdb_expand_file,
-       tdb_brlock
  };
  
  /*
diff --git a/lib/tdb/common/lock.c b/lib/tdb/common/lock.c

index 0984e516ea14cd88530fa29d75fe05fbbf4bde39..285b7a34c3a12cb985b7799fbd9a2da8277ba1d7 100644 (file)
--- a/lib/tdb/common/lock.c
+++ b/lib/tdb/common/lock.c
@@ -6,11 +6,11 @@
     Copyright (C) Andrew Tridgell              1999-2005
     Copyright (C) Paul `Rusty' Russell             2000
     Copyright (C) Jeremy Allison                           2000-2003
-   
+
       ** NOTE! The following LGPL license applies to the tdb
       ** library. This does NOT imply that all of Samba is released
       ** under the LGPL
-   
+
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
@@ -27,13 +27,104 @@
  
  #include "tdb_private.h"
  
-#define TDB_MARK_LOCK 0x80000000
-
  void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *ptr)
  {
         tdb->interrupt_sig_ptr = ptr;
  }
  
+static int fcntl_lock(struct tdb_context *tdb,
+                     int rw, off_t off, off_t len, bool waitflag)
+{
+       struct flock fl;
+
+       fl.l_type = rw;
+       fl.l_whence = SEEK_SET;
+       fl.l_start = off;
+       fl.l_len = len;
+       fl.l_pid = 0;
+
+       if (waitflag)
+               return fcntl(tdb->fd, F_SETLKW, &fl);
+       else
+               return fcntl(tdb->fd, F_SETLK, &fl);
+}
+
+static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
+{
+       struct flock fl;
+#if 0 /* Check they matched up locks and unlocks correctly. */
+       char line[80];
+       FILE *locks;
+       bool found = false;
+
+       locks = fopen("/proc/locks", "r");
+
+       while (fgets(line, 80, locks)) {
+               char *p;
+               int type, start, l;
+
+               /* eg. 1: FLOCK  ADVISORY  WRITE 2440 08:01:2180826 0 EOF */
+               p = strchr(line, ':') + 1;
+               if (strncmp(p, " POSIX  ADVISORY  ", strlen(" POSIX  ADVISORY  ")))
+                       continue;
+               p += strlen(" FLOCK  ADVISORY  ");
+               if (strncmp(p, "READ  ", strlen("READ  ")) == 0)
+                       type = F_RDLCK;
+               else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
+                       type = F_WRLCK;
+               else
+                       abort();
+               p += 6;
+               if (atoi(p) != getpid())
+                       continue;
+               p = strchr(strchr(p, ' ') + 1, ' ') + 1;
+               start = atoi(p);
+               p = strchr(p, ' ') + 1;
+               if (strncmp(p, "EOF", 3) == 0)
+                       l = 0;
+               else
+                       l = atoi(p) - start + 1;
+
+               if (off == start) {
+                       if (len != l) {
+                               fprintf(stderr, "Len %u should be %u: %s",
+                                       (int)len, l, line);
+                               abort();
+                       }
+                       if (type != rw) {
+                               fprintf(stderr, "Type %s wrong: %s",
+                                       rw == F_RDLCK ? "READ" : "WRITE", line);
+                               abort();
+                       }
+                       found = true;
+                       break;
+               }
+       }
+
+       if (!found) {
+               fprintf(stderr, "Unlock on %u@%u not found!\n",
+                       (int)off, (int)len);
+               abort();
+       }
+
+       fclose(locks);
+#endif
+
+       fl.l_type = F_UNLCK;
+       fl.l_whence = SEEK_SET;
+       fl.l_start = off;
+       fl.l_len = len;
+       fl.l_pid = 0;
+
+       return fcntl(tdb->fd, F_SETLKW, &fl);
+}
+
+/* list -1 is the alloc list, otherwise a hash chain. */
+static tdb_off_t lock_offset(int list)
+{
+       return FREELIST_TOP + 4*list;
+}
+
  /* a byte range locking function - return 0 on success
     this functions locks/unlocks 1 byte at the specified offset.
  
@@ -42,30 +133,36 @@ void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *ptr)
  
     note that a len of zero means lock to end of file
  */
-int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset, 
-              int rw_type, int lck_type, int probe, size_t len)
+int tdb_brlock(struct tdb_context *tdb,
+              int rw_type, tdb_off_t offset, size_t len,
+              enum tdb_lock_flags flags)
  {
-       struct flock fl;
         int ret;
  
         if (tdb->flags & TDB_NOLOCK) {
                 return 0;
         }
  
+       if (flags & TDB_LOCK_MARK_ONLY) {
+               return 0;
+       }
+
         if ((rw_type == F_WRLCK) && (tdb->read_only || tdb->traverse_read)) {
                 tdb->ecode = TDB_ERR_RDONLY;
                 return -1;
         }
  
-       fl.l_type = rw_type;
-       fl.l_whence = SEEK_SET;
-       fl.l_start = offset;
-       fl.l_len = len;
-       fl.l_pid = 0;
+       /* Sanity check */
+       if (tdb->transaction && offset >= lock_offset(-1) && len != 0) {
+               tdb->ecode = TDB_ERR_RDONLY;
+               TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_brlock attempted in transaction at offset %d rw_type=%d flags=%d len=%d\n",
+                        offset, rw_type, flags, (int)len));
+               return -1;
+       }
  
         do {
-               ret = fcntl(tdb->fd,lck_type,&fl);
-
+               ret = fcntl_lock(tdb, rw_type, offset, len,
+                                flags & TDB_LOCK_WAIT);
                 /* Check for a sigalarm break. */
                 if (ret == -1 && errno == EINTR &&
                                 tdb->interrupt_sig_ptr &&
@@ -79,15 +176,34 @@ int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset,
                 /* Generic lock error. errno set by fcntl.
                  * EAGAIN is an expected return from non-blocking
                  * locks. */
-               if (!probe && lck_type != F_SETLK) {
-                       TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d len=%d\n", 
-                                tdb->fd, offset, rw_type, lck_type, (int)len));
+               if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) {
+                       TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d flags=%d len=%d\n",
+                                tdb->fd, offset, rw_type, flags, (int)len));
                 }
                 return -1;
         }
         return 0;
  }
  
+int tdb_brunlock(struct tdb_context *tdb,
+                int rw_type, tdb_off_t offset, size_t len)
+{
+       int ret;
+
+       if (tdb->flags & TDB_NOLOCK) {
+               return 0;
+       }
+
+       do {
+               ret = fcntl_unlock(tdb, rw_type, offset, len);
+       } while (ret == -1 && errno == EINTR);
+
+       if (ret == -1) {
+               TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brunlock failed (fd=%d) at offset %d rw_type=%d len=%d\n",
+                        tdb->fd, offset, rw_type, (int)len));
+       }
+       return ret;
+}
  
  /*
    upgrade a read lock to a write lock. This needs to be handled in a
@@ -95,12 +211,29 @@ int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset,
    deadlock detection and claim a deadlock when progress can be
    made. For those OSes we may loop for a while.  
  */
-int tdb_brlock_upgrade(struct tdb_context *tdb, tdb_off_t offset, size_t len)
+int tdb_allrecord_upgrade(struct tdb_context *tdb)
  {
         int count = 1000;
+
+       if (tdb->allrecord_lock.count != 1) {
+               TDB_LOG((tdb, TDB_DEBUG_ERROR,
+                        "tdb_allrecord_upgrade failed: count %u too high\n",
+                        tdb->allrecord_lock.count));
+               return -1;
+       }
+
+       if (tdb->allrecord_lock.off != 1) {
+               TDB_LOG((tdb, TDB_DEBUG_ERROR,
+                        "tdb_allrecord_upgrade failed: already upgraded?\n"));
+               return -1;
+       }
+
         while (count--) {
                 struct timeval tv;
-               if (tdb_brlock(tdb, offset, F_WRLCK, F_SETLKW, 1, len) == 0) {
+               if (tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0,
+                              TDB_LOCK_WAIT|TDB_LOCK_PROBE) == 0) {
+                       tdb->allrecord_lock.ltype = F_WRLCK;
+                       tdb->allrecord_lock.off = 0;
                         return 0;
                 }
                 if (errno != EDEADLK) {
@@ -111,57 +244,46 @@ int tdb_brlock_upgrade(struct tdb_context *tdb, tdb_off_t offset, size_t len)
                 tv.tv_usec = 1;
                 select(0, NULL, NULL, NULL, &tv);
         }
-       TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock_upgrade failed at offset %d\n", offset));
+       TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_allrecord_upgrade failed\n"));
         return -1;
  }
  
-
-/* lock a list in the database. list -1 is the alloc list */
-static int _tdb_lock(struct tdb_context *tdb, int list, int ltype, int op)
+static struct tdb_lock_type *find_nestlock(struct tdb_context *tdb,
+                                          tdb_off_t offset)
  {
-       struct tdb_lock_type *new_lck;
-       int i;
-       bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
+       unsigned int i;
  
-       ltype &= ~TDB_MARK_LOCK;
-
-       /* a global lock allows us to avoid per chain locks */
-       if (tdb->global_lock.count && 
-           (ltype == tdb->global_lock.ltype || ltype == F_RDLCK)) {
-               return 0;
+       for (i=0; i<tdb->num_lockrecs; i++) {
+               if (tdb->lockrecs[i].off == offset) {
+                       return &tdb->lockrecs[i];
+               }
         }
+       return NULL;
+}
  
-       if (tdb->global_lock.count) {
-               tdb->ecode = TDB_ERR_LOCK;
-               return -1;
-       }
+/* lock an offset in the database. */
+int tdb_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype,
+                 enum tdb_lock_flags flags)
+{
+       struct tdb_lock_type *new_lck;
  
-       if (list < -1 || list >= (int)tdb->header.hash_size) {
+       if (offset >= lock_offset(tdb->header.hash_size)) {
                 tdb->ecode = TDB_ERR_LOCK;
-               TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_lock: invalid list %d for ltype=%d\n", 
-                          list, ltype));
+               TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_lock: invalid offset %u for ltype=%d\n",
+                        offset, ltype));
                 return -1;
         }
         if (tdb->flags & TDB_NOLOCK)
                 return 0;
  
-       for (i=0; i<tdb->num_lockrecs; i++) {
-               if (tdb->lockrecs[i].list == list) {
-                       if (tdb->lockrecs[i].count == 0) {
-                               /*
-                                * Can't happen, see tdb_unlock(). It should
-                                * be an assert.
-                                */
-                               TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lock: "
-                                        "lck->count == 0 for list %d", list));
-                       }
-                       /*
-                        * Just increment the in-memory struct, posix locks
-                        * don't stack.
-                        */
-                       tdb->lockrecs[i].count++;
-                       return 0;
-               }
+       new_lck = find_nestlock(tdb, offset);
+       if (new_lck) {
+               /*
+                * Just increment the in-memory struct, posix locks
+                * don't stack.
+                */
+               new_lck->count++;
+               return 0;
         }
  
         new_lck = (struct tdb_lock_type *)realloc(
@@ -175,27 +297,89 @@ static int _tdb_lock(struct tdb_context *tdb, int list, int ltype, int op)
  
         /* Since fcntl locks don't nest, we do a lock for the first one,
            and simply bump the count for future ones */
-       if (!mark_lock &&
-           tdb->methods->tdb_brlock(tdb,FREELIST_TOP+4*list, ltype, op,
-                                    0, 1)) {
+       if (tdb_brlock(tdb, ltype, offset, 1, flags)) {
                 return -1;
         }
  
-       tdb->num_locks++;
-
-       tdb->lockrecs[tdb->num_lockrecs].list = list;
+       tdb->lockrecs[tdb->num_lockrecs].off = offset;
         tdb->lockrecs[tdb->num_lockrecs].count = 1;
         tdb->lockrecs[tdb->num_lockrecs].ltype = ltype;
-       tdb->num_lockrecs += 1;
+       tdb->num_lockrecs++;
  
         return 0;
  }
  
+static int tdb_lock_and_recover(struct tdb_context *tdb)
+{
+       int ret;
+
+       /* We need to match locking order in transaction commit. */
+       if (tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, TDB_LOCK_WAIT)) {
+               return -1;
+       }
+
+       if (tdb_brlock(tdb, F_WRLCK, OPEN_LOCK, 1, TDB_LOCK_WAIT)) {
+               tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0);
+               return -1;
+       }
+
+       ret = tdb_transaction_recover(tdb);
+
+       tdb_brunlock(tdb, F_WRLCK, OPEN_LOCK, 1);
+       tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0);
+
+       return ret;
+}
+
+static bool have_data_locks(const struct tdb_context *tdb)
+{
+       unsigned int i;
+
+       for (i = 0; i < tdb->num_lockrecs; i++) {
+               if (tdb->lockrecs[i].off >= lock_offset(-1))
+                       return true;
+       }
+       return false;
+}
+
+static int tdb_lock_list(struct tdb_context *tdb, int list, int ltype,
+                        enum tdb_lock_flags waitflag)
+{
+       int ret;
+       bool check = false;
+
+       /* a allrecord lock allows us to avoid per chain locks */
+       if (tdb->allrecord_lock.count &&
+           (ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) {
+               return 0;
+       }
+
+       if (tdb->allrecord_lock.count) {
+               tdb->ecode = TDB_ERR_LOCK;
+               ret = -1;
+       } else {
+               /* Only check when we grab first data lock. */
+               check = !have_data_locks(tdb);
+               ret = tdb_nest_lock(tdb, lock_offset(list), ltype, waitflag);
+
+               if (ret == 0 && check && tdb_needs_recovery(tdb)) {
+                       tdb_nest_unlock(tdb, lock_offset(list), ltype, false);
+
+                       if (tdb_lock_and_recover(tdb) == -1) {
+                               return -1;
+                       }
+                       return tdb_lock_list(tdb, list, ltype, waitflag);
+               }
+       }
+       return ret;
+}
+
  /* lock a list in the database. list -1 is the alloc list */
  int tdb_lock(struct tdb_context *tdb, int list, int ltype)
  {
         int ret;
-       ret = _tdb_lock(tdb, list, ltype, F_SETLKW);
+
+       ret = tdb_lock_list(tdb, list, ltype, TDB_LOCK_WAIT);
         if (ret) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lock failed on list %d "
                          "ltype=%d (%s)\n",  list, ltype, strerror(errno)));
@@ -206,49 +390,26 @@ int tdb_lock(struct tdb_context *tdb, int list, int ltype)
  /* lock a list in the database. list -1 is the alloc list. non-blocking lock */
  int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype)
  {
-       return _tdb_lock(tdb, list, ltype, F_SETLK);
+       return tdb_lock_list(tdb, list, ltype, TDB_LOCK_NOWAIT);
  }
  
  
-/* unlock the database: returns void because it's too late for errors. */
-       /* changed to return int it may be interesting to know there
-          has been an error  --simo */
-int tdb_unlock(struct tdb_context *tdb, int list, int ltype)
+int tdb_nest_unlock(struct tdb_context *tdb, uint32_t offset, int ltype,
+                   bool mark_lock)
  {
         int ret = -1;
-       int i;
-       struct tdb_lock_type *lck = NULL;
-       bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
-
-       ltype &= ~TDB_MARK_LOCK;
-
-       /* a global lock allows us to avoid per chain locks */
-       if (tdb->global_lock.count && 
-           (ltype == tdb->global_lock.ltype || ltype == F_RDLCK)) {
-               return 0;
-       }
-
-       if (tdb->global_lock.count) {
-               tdb->ecode = TDB_ERR_LOCK;
-               return -1;
-       }
+       struct tdb_lock_type *lck;
  
         if (tdb->flags & TDB_NOLOCK)
                 return 0;
  
         /* Sanity checks */
-       if (list < -1 || list >= (int)tdb->header.hash_size) {
-               TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: list %d invalid (%d)\n", list, tdb->header.hash_size));
+       if (offset >= lock_offset(tdb->header.hash_size)) {
+               TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: offset %u invalid (%d)\n", offset, tdb->header.hash_size));
                 return ret;
         }
  
-       for (i=0; i<tdb->num_lockrecs; i++) {
-               if (tdb->lockrecs[i].list == list) {
-                       lck = &tdb->lockrecs[i];
-                       break;
-               }
-       }
-
+       lck = find_nestlock(tdb, offset);
         if ((lck == NULL) || (lck->count == 0)) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: count is 0\n"));
                 return -1;
@@ -269,20 +430,14 @@ int tdb_unlock(struct tdb_context *tdb, int list, int ltype)
         if (mark_lock) {
                 ret = 0;
         } else {
-               ret = tdb->methods->tdb_brlock(tdb, FREELIST_TOP+4*list, F_UNLCK,
-                                              F_SETLKW, 0, 1);
+               ret = tdb_brunlock(tdb, ltype, offset, 1);
         }
-       tdb->num_locks--;
  
         /*
          * Shrink the array by overwriting the element just unlocked with the
          * last array element.
          */
-
-       if (tdb->num_lockrecs > 1) {
-               *lck = tdb->lockrecs[tdb->num_lockrecs-1];
-       }
-       tdb->num_lockrecs -= 1;
+       *lck = tdb->lockrecs[--tdb->num_lockrecs];
  
         /*
          * We don't bother with realloc when the array shrinks, but if we have
@@ -298,93 +453,101 @@ int tdb_unlock(struct tdb_context *tdb, int list, int ltype)
         return ret;
  }
  
-/*
-  get the transaction lock
- */
-int tdb_transaction_lock(struct tdb_context *tdb, int ltype)
+int tdb_unlock(struct tdb_context *tdb, int list, int ltype)
  {
-       if (tdb->global_lock.count) {
-               return 0;
-       }
-       if (tdb->transaction_lock_count > 0) {
-               tdb->transaction_lock_count++;
+       /* a global lock allows us to avoid per chain locks */
+       if (tdb->allrecord_lock.count &&
+           (ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) {
                 return 0;
         }
  
-       if (tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, ltype, 
-                                    F_SETLKW, 0, 1) == -1) {
-               TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_lock: failed to get transaction lock\n"));
+       if (tdb->allrecord_lock.count) {
                 tdb->ecode = TDB_ERR_LOCK;
                 return -1;
         }
-       tdb->transaction_lock_count++;
-       return 0;
+
+       return tdb_nest_unlock(tdb, lock_offset(list), ltype, false);
  }
  
  /*
-  release the transaction lock
+  get the transaction lock
   */
-int tdb_transaction_unlock(struct tdb_context *tdb)
+int tdb_transaction_lock(struct tdb_context *tdb, int ltype,
+                        enum tdb_lock_flags lockflags)
  {
-       int ret;
-       if (tdb->global_lock.count) {
-               return 0;
-       }
-       if (tdb->transaction_lock_count > 1) {
-               tdb->transaction_lock_count--;
-               return 0;
-       }
-       ret = tdb->methods->tdb_brlock(tdb, TRANSACTION_LOCK, F_UNLCK, F_SETLKW, 0, 1);
-       if (ret == 0) {
-               tdb->transaction_lock_count = 0;
-       }
-       return ret;
+       return tdb_nest_lock(tdb, TRANSACTION_LOCK, ltype, lockflags);
  }
  
-
-
-
-/* lock/unlock entire database */
-static int _tdb_lockall(struct tdb_context *tdb, int ltype, int op)
+/*
+  release the transaction lock
+ */
+int tdb_transaction_unlock(struct tdb_context *tdb, int ltype)
  {
-       bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
+       return tdb_nest_unlock(tdb, TRANSACTION_LOCK, ltype, false);
+}
  
-       ltype &= ~TDB_MARK_LOCK;
  
+/* lock/unlock entire database.  It can only be upgradable if you have some
+ * other way of guaranteeing exclusivity (ie. transaction write lock). */
+int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
+                      enum tdb_lock_flags flags, bool upgradable)
+{
         /* There are no locks on read-only dbs */
         if (tdb->read_only || tdb->traverse_read) {
                 tdb->ecode = TDB_ERR_LOCK;
                 return -1;
         }
  
-       if (tdb->global_lock.count && tdb->global_lock.ltype == ltype) {
-               tdb->global_lock.count++;
+       if (tdb->allrecord_lock.count && tdb->allrecord_lock.ltype == ltype) {
+               tdb->allrecord_lock.count++;
                 return 0;
         }
  
-       if (tdb->global_lock.count) {
+       if (tdb->allrecord_lock.count) {
                 /* a global lock of a different type exists */
                 tdb->ecode = TDB_ERR_LOCK;
                 return -1;
         }
-       
-       if (tdb->num_locks != 0) {
+
+       if (tdb_have_extra_locks(tdb)) {
                 /* can't combine global and chain locks */
                 tdb->ecode = TDB_ERR_LOCK;
                 return -1;
         }
  
-       if (!mark_lock &&
-           tdb->methods->tdb_brlock(tdb, FREELIST_TOP, ltype, op,
-                                    0, 4*tdb->header.hash_size)) {
-               if (op == F_SETLKW) {
+       if (upgradable && ltype != F_RDLCK) {
+               /* tdb error: you can't upgrade a write lock! */
+               tdb->ecode = TDB_ERR_LOCK;
+               return -1;
+       }
+
+       if (tdb_brlock(tdb, ltype, FREELIST_TOP, 0, flags)) {
+               if (flags & TDB_LOCK_WAIT) {
                         TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_lockall failed (%s)\n", strerror(errno)));
                 }
                 return -1;
         }
  
-       tdb->global_lock.count = 1;
-       tdb->global_lock.ltype = ltype;
+       tdb->allrecord_lock.count = 1;
+       /* If it's upgradable, it's actually exclusive so we can treat
+        * it as a write lock. */
+       tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
+       tdb->allrecord_lock.off = upgradable;
+
+       if (tdb_needs_recovery(tdb)) {
+               bool mark = flags & TDB_LOCK_MARK_ONLY;
+               tdb_allrecord_unlock(tdb, ltype, mark);
+               if (mark) {
+                       tdb->ecode = TDB_ERR_LOCK;
+                       TDB_LOG((tdb, TDB_DEBUG_ERROR,
+                                "tdb_lockall_mark cannot do recovery\n"));
+                       return -1;
+               }
+               if (tdb_lock_and_recover(tdb) == -1) {
+                       return -1;
+               }
+               return tdb_allrecord_lock(tdb, ltype, flags, upgradable);
+       }
  
         return 0;
  }
@@ -392,37 +555,38 @@ static int _tdb_lockall(struct tdb_context *tdb, int ltype, int op)
  
  
  /* unlock entire db */
-static int _tdb_unlockall(struct tdb_context *tdb, int ltype)
+int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock)
  {
-       bool mark_lock = ((ltype & TDB_MARK_LOCK) == TDB_MARK_LOCK);
-
-       ltype &= ~TDB_MARK_LOCK;
-
         /* There are no locks on read-only dbs */
         if (tdb->read_only || tdb->traverse_read) {
                 tdb->ecode = TDB_ERR_LOCK;
                 return -1;
         }
  
-       if (tdb->global_lock.ltype != ltype || tdb->global_lock.count == 0) {
+       if (tdb->allrecord_lock.count == 0) {
+               tdb->ecode = TDB_ERR_LOCK;
+               return -1;
+       }
+
+       /* Upgradable locks are marked as write locks. */
+       if (tdb->allrecord_lock.ltype != ltype
+           && (!tdb->allrecord_lock.off || ltype != F_RDLCK)) {
                 tdb->ecode = TDB_ERR_LOCK;
                 return -1;
         }
  
-       if (tdb->global_lock.count > 1) {
-               tdb->global_lock.count--;
+       if (tdb->allrecord_lock.count > 1) {
+               tdb->allrecord_lock.count--;
                 return 0;
         }
  
-       if (!mark_lock &&
-           tdb->methods->tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 
-                                    0, 4*tdb->header.hash_size)) {
+       if (!mark_lock && tdb_brunlock(tdb, ltype, FREELIST_TOP, 0)) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed (%s)\n", strerror(errno)));
                 return -1;
         }
  
-       tdb->global_lock.count = 0;
-       tdb->global_lock.ltype = 0;
+       tdb->allrecord_lock.count = 0;
+       tdb->allrecord_lock.ltype = 0;
  
         return 0;
  }
@@ -431,27 +595,27 @@ static int _tdb_unlockall(struct tdb_context *tdb, int ltype)
  int tdb_lockall(struct tdb_context *tdb)
  {
         tdb_trace(tdb, "tdb_lockall");
-       return _tdb_lockall(tdb, F_WRLCK, F_SETLKW);
+       return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false);
  }
  
  /* lock entire database with write lock - mark only */
  int tdb_lockall_mark(struct tdb_context *tdb)
  {
         tdb_trace(tdb, "tdb_lockall_mark");
-       return _tdb_lockall(tdb, F_WRLCK | TDB_MARK_LOCK, F_SETLKW);
+       return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_MARK_ONLY, false);
  }
  
  /* unlock entire database with write lock - unmark only */
  int tdb_lockall_unmark(struct tdb_context *tdb)
  {
         tdb_trace(tdb, "tdb_lockall_unmark");
-       return _tdb_unlockall(tdb, F_WRLCK | TDB_MARK_LOCK);
+       return tdb_allrecord_unlock(tdb, F_WRLCK, true);
  }
  
  /* lock entire database with write lock - nonblocking varient */
  int tdb_lockall_nonblock(struct tdb_context *tdb)
  {
-       int ret = _tdb_lockall(tdb, F_WRLCK, F_SETLK);
+       int ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_NOWAIT, false);
         tdb_trace_ret(tdb, "tdb_lockall_nonblock", ret);
         return ret;
  }
@@ -460,20 +624,20 @@ int tdb_lockall_nonblock(struct tdb_context *tdb)
  int tdb_unlockall(struct tdb_context *tdb)
  {
         tdb_trace(tdb, "tdb_unlockall");
-       return _tdb_unlockall(tdb, F_WRLCK);
+       return tdb_allrecord_unlock(tdb, F_WRLCK, false);
  }
  
  /* lock entire database with read lock */
  int tdb_lockall_read(struct tdb_context *tdb)
  {
         tdb_trace(tdb, "tdb_lockall_read");
-       return _tdb_lockall(tdb, F_RDLCK, F_SETLKW);
+       return tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
  }
  
  /* lock entire database with read lock - nonblock varient */
  int tdb_lockall_read_nonblock(struct tdb_context *tdb)
  {
-       int ret = _tdb_lockall(tdb, F_RDLCK, F_SETLK);
+       int ret = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_NOWAIT, false);
         tdb_trace_ret(tdb, "tdb_lockall_read_nonblock", ret);
         return ret;
  }
@@ -482,7 +646,7 @@ int tdb_lockall_read_nonblock(struct tdb_context *tdb)
  int tdb_unlockall_read(struct tdb_context *tdb)
  {
         tdb_trace(tdb, "tdb_unlockall_read");
-       return _tdb_unlockall(tdb, F_RDLCK);
+       return tdb_allrecord_unlock(tdb, F_RDLCK, false);
  }
  
  /* lock/unlock one hash chain. This is meant to be used to reduce
@@ -507,7 +671,8 @@ int tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key)
  /* mark a chain as locked without actually locking it. Warning! use with great caution! */
  int tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key)
  {
-       int ret = tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK | TDB_MARK_LOCK);
+       int ret = tdb_nest_lock(tdb, lock_offset(BUCKET(tdb->hash_fn(&key))),
+                               F_WRLCK, TDB_LOCK_MARK_ONLY);
         tdb_trace_1rec(tdb, "tdb_chainlock_mark", key);
         return ret;
  }
@@ -516,7 +681,8 @@ int tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key)
  int tdb_chainlock_unmark(struct tdb_context *tdb, TDB_DATA key)
  {
         tdb_trace_1rec(tdb, "tdb_chainlock_unmark", key);
-       return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK | TDB_MARK_LOCK);
+       return tdb_nest_unlock(tdb, lock_offset(BUCKET(tdb->hash_fn(&key))),
+                              F_WRLCK, true);
  }
  
  int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key)
@@ -544,10 +710,10 @@ int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key)
  /* record lock stops delete underneath */
  int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off)
  {
-       if (tdb->global_lock.count) {
+       if (tdb->allrecord_lock.count) {
                 return 0;
         }
-       return off ? tdb->methods->tdb_brlock(tdb, off, F_RDLCK, F_SETLKW, 0, 1) : 0;
+       return off ? tdb_brlock(tdb, F_RDLCK, off, 1, TDB_LOCK_WAIT) : 0;
  }
  
  /*
@@ -561,16 +727,21 @@ int tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off)
         for (i = &tdb->travlocks; i; i = i->next)
                 if (i->off == off)
                         return -1;
-       return tdb->methods->tdb_brlock(tdb, off, F_WRLCK, F_SETLK, 1, 1);
+       if (tdb->allrecord_lock.count) {
+               if (tdb->allrecord_lock.ltype == F_WRLCK) {
+                       return 0;
+               }
+               return -1;
+       }
+       return tdb_brlock(tdb, F_WRLCK, off, 1, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
  }
  
-/*
-  Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not
-  an error to fail to get the lock here.
-*/
  int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off)
  {
-       return tdb->methods->tdb_brlock(tdb, off, F_UNLCK, F_SETLK, 0, 1);
+       if (tdb->allrecord_lock.count) {
+               return 0;
+       }
+       return tdb_brunlock(tdb, F_WRLCK, off, 1);
  }
  
  /* fcntl locks don't stack: avoid unlocking someone else's */
@@ -579,7 +750,7 @@ int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off)
         struct tdb_traverse_lock *i;
         uint32_t count = 0;
  
-       if (tdb->global_lock.count) {
+       if (tdb->allrecord_lock.count) {
                 return 0;
         }
  
@@ -588,5 +759,53 @@ int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off)
         for (i = &tdb->travlocks; i; i = i->next)
                 if (i->off == off)
                         count++;
-       return (count == 1 ? tdb->methods->tdb_brlock(tdb, off, F_UNLCK, F_SETLKW, 0, 1) : 0);
+       return (count == 1 ? tdb_brunlock(tdb, F_RDLCK, off, 1) : 0);
+}
+
+bool tdb_have_extra_locks(struct tdb_context *tdb)
+{
+       unsigned int extra = tdb->num_lockrecs;
+
+       /* A transaction holds the lock for all records. */
+       if (!tdb->transaction && tdb->allrecord_lock.count) {
+               return true;
+       }
+
+       /* We always hold the active lock if CLEAR_IF_FIRST. */
+       if (find_nestlock(tdb, ACTIVE_LOCK)) {
+               extra--;
+       }
+
+       /* In a transaction, we expect to hold the transaction lock */
+       if (tdb->transaction && find_nestlock(tdb, TRANSACTION_LOCK)) {
+               extra--;
+       }
+
+       return extra;
+}
+
+/* The transaction code uses this to remove all locks. */
+void tdb_release_transaction_locks(struct tdb_context *tdb)
+{
+       unsigned int i, active = 0;
+
+       if (tdb->allrecord_lock.count != 0) {
+               tdb_brunlock(tdb, tdb->allrecord_lock.ltype, FREELIST_TOP, 0);
+               tdb->allrecord_lock.count = 0;
+       }
+
+       for (i=0;i<tdb->num_lockrecs;i++) {
+               struct tdb_lock_type *lck = &tdb->lockrecs[i];
+
+               /* Don't release the active lock!  Copy it to first entry. */
+               if (lck->off == ACTIVE_LOCK) {
+                       tdb->lockrecs[active++] = *lck;
+               } else {
+                       tdb_brunlock(tdb, lck->ltype, lck->off, 1);
+               }
+       }
+       tdb->num_lockrecs = active;
+       if (tdb->num_lockrecs == 0) {
+               SAFE_FREE(tdb->lockrecs);
+       }
  }
diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c

index 4d4f95a3daa7e7c57a1349467c7503a077ba8df4..dfe780d21b7d0d76f1484a28431ca4ed165c7fb1 100644 (file)
--- a/lib/tdb/common/open.c
+++ b/lib/tdb/common/open.c
@@ -6,11 +6,11 @@
     Copyright (C) Andrew Tridgell              1999-2005
     Copyright (C) Paul `Rusty' Russell             2000
     Copyright (C) Jeremy Allison                           2000-2003
-   
+
       ** NOTE! The following LGPL license applies to the tdb
       ** library. This does NOT imply that all of Samba is released
       ** under the LGPL
-   
+
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
@@ -92,7 +92,7 @@ static int tdb_new_database(struct tdb_context *tdb, int hash_size)
                 size -= written;
                 written = write(tdb->fd, newdb+written, size);
                 if (written == size) {
-               ret = 0;
+                       ret = 0;
                 } else if (written >= 0) {
                         /* a second incomplete write - we give up.
                          * guessing the errno... */
@@ -111,7 +111,7 @@ static int tdb_already_open(dev_t device,
                             ino_t ino)
  {
         struct tdb_context *i;
-       
+
         for (i = tdbs; i; i = i->next) {
                 if (i->device == device && i->inode == ino) {
                         return 1;
@@ -192,7 +192,7 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
                 errno = EINVAL;
                 goto fail;
         }
-       
+
         if (hash_size == 0)
                 hash_size = DEFAULT_HASH_SIZE;
         if ((open_flags & O_ACCMODE) == O_RDONLY) {
@@ -241,8 +241,8 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
          fcntl(tdb->fd, F_SETFD, v | FD_CLOEXEC);
  
         /* ensure there is only one process initialising at once */
-       if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) {
-               TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to get global lock on %s: %s\n",
+       if (tdb_nest_lock(tdb, OPEN_LOCK, F_WRLCK, TDB_LOCK_WAIT) == -1) {
+               TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to get open lock on %s: %s\n",
                          name, strerror(errno)));
                 goto fail;      /* errno set by tdb_brlock */
         }
@@ -250,7 +250,7 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
         /* we need to zero database if we are the only one with it open */
         if ((tdb_flags & TDB_CLEAR_IF_FIRST) &&
             (!tdb->read_only) &&
-           (locked = (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK, 0, 1) == 0))) {
+           (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) {
                 open_flags |= O_CREAT;
                 if (ftruncate(tdb->fd, 0) == -1) {
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
@@ -313,9 +313,9 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
         tdb->inode = st.st_ino;
         tdb_mmap(tdb);
         if (locked) {
-               if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0, 1) == -1) {
+               if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) {
                         TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
-                                "failed to take ACTIVE_LOCK on %s: %s\n",
+                                "failed to release ACTIVE_LOCK on %s: %s\n",
                                  name, strerror(errno)));
                         goto fail;
                 }
@@ -328,8 +328,9 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
  
         if (tdb_flags & TDB_CLEAR_IF_FIRST) {
                 /* leave this lock in place to indicate it's in use */
-               if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1)
+               if (tdb_nest_lock(tdb, ACTIVE_LOCK, F_RDLCK, TDB_LOCK_WAIT) == -1) {
                         goto fail;
+               }
         }
  
         /* if needed, run recovery */
@@ -356,9 +357,10 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
   internal:
         /* Internal (memory-only) databases skip all the code above to
          * do with disk files, and resume here by releasing their
-        * global lock and hooking into the active list. */
-       if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1) == -1)
+        * open lock and hooking into the active list. */
+       if (tdb_nest_unlock(tdb, OPEN_LOCK, F_WRLCK, false) == -1) {
                 goto fail;
+       }
         tdb->next = tdbs;
         tdbs = tdb;
         return tdb;
@@ -382,6 +384,7 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
         if (tdb->fd != -1)
                 if (close(tdb->fd) != 0)
                         TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to close tdb->fd on error!\n"));
+       SAFE_FREE(tdb->lockrecs);
         SAFE_FREE(tdb);
         errno = save_errno;
         return NULL;
@@ -407,10 +410,10 @@ int tdb_close(struct tdb_context *tdb)
         struct tdb_context **i;
         int ret = 0;
  
-       tdb_trace(tdb, "tdb_close");
         if (tdb->transaction) {
-               _tdb_transaction_cancel(tdb);
+               tdb_transaction_cancel(tdb);
         }
+       tdb_trace(tdb, "tdb_close");
  
         if (tdb->map_ptr) {
                 if (tdb->flags & TDB_INTERNAL)
@@ -465,7 +468,7 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock)
                 return 0; /* Nothing to do. */
         }
  
-       if (tdb->num_locks != 0 || tdb->global_lock.count) {
+       if (tdb_have_extra_locks(tdb)) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_reopen: reopen not allowed with locks held\n"));
                 goto fail;
         }
@@ -500,8 +503,11 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock)
         tdb_mmap(tdb);
  #endif /* fake pread or pwrite */
  
-       if (active_lock &&
-           (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1)) {
+       /* We may still think we hold the active lock. */
+       tdb->num_lockrecs = 0;
+       SAFE_FREE(tdb->lockrecs);
+
+       if (active_lock && tdb_nest_lock(tdb, ACTIVE_LOCK, F_RDLCK, TDB_LOCK_WAIT) == -1) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: failed to obtain active lock\n"));
                 goto fail;
         }
diff --git a/lib/tdb/common/tdb.c b/lib/tdb/common/tdb.c

index d2688def04743ae4e03cc84489cf251bec30dc8b..dac3f4e66661a2e53e086422671c03dad4a1b0e3 100644 (file)
--- a/lib/tdb/common/tdb.c
+++ b/lib/tdb/common/tdb.c
@@ -6,11 +6,11 @@
     Copyright (C) Andrew Tridgell              1999-2005
     Copyright (C) Paul `Rusty' Russell             2000
     Copyright (C) Jeremy Allison                           2000-2003
-   
+
       ** NOTE! The following LGPL license applies to the tdb
       ** library. This does NOT imply that all of Samba is released
       ** under the LGPL
-   
+
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
@@ -36,7 +36,7 @@ TDB_DATA tdb_null;
  void tdb_increment_seqnum_nonblock(struct tdb_context *tdb)
  {
         tdb_off_t seqnum=0;
-       
+
         if (!(tdb->flags & TDB_SEQNUM)) {
                 return;
         }
@@ -59,13 +59,14 @@ static void tdb_increment_seqnum(struct tdb_context *tdb)
                 return;
         }
  
-       if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
+       if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK,
+                         TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) {
                 return;
         }
  
         tdb_increment_seqnum_nonblock(tdb);
  
-       tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
+       tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false);
  }
  
  static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
@@ -79,7 +80,7 @@ static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
                         struct tdb_record *r)
  {
         tdb_off_t rec_ptr;
-       
+
         /* read in the hash top */
         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
                 return 0;
@@ -153,7 +154,6 @@ static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
                         free(data.dptr);
                 }
         }
-        
  
         /* must be long enough key, data and tailer */
         if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
@@ -170,7 +170,7 @@ static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash,
                 rec.data_len = dbuf.dsize;
                 return tdb_rec_write(tdb, rec_ptr, &rec);
         }
- 
+
         return 0;
  }
  
@@ -212,7 +212,7 @@ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
   * function. The parsing function is executed under the chain read lock, so it
   * should be fast and should not block on other syscalls.
   *
- * DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
+ * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
   *
   * For mmapped tdb's that do not have a transaction open it points the parsing
   * function directly at the mmap area, it avoids the malloc/memcpy in this
@@ -221,6 +221,8 @@ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
   *
   * This is interesting for all readers of potentially large data structures in
   * the tdb records, ldb indexes being one example.
+ *
+ * Return -1 if the record was not found.
   */
  
  int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
@@ -237,9 +239,10 @@ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
         hash = tdb->hash_fn(&key);
  
         if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
+               /* record not found */
                 tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1);
                 tdb->ecode = TDB_ERR_NOEXIST;
-               return 0;
+               return -1;
         }
         tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0);
  
@@ -260,7 +263,7 @@ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
  static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash)
  {
         struct tdb_record rec;
-       
+
         if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
                 return 0;
         tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
@@ -318,7 +321,7 @@ static int tdb_count_dead(struct tdb_context *tdb, uint32_t hash)
         int res = 0;
         tdb_off_t rec_ptr;
         struct tdb_record rec;
-       
+
         /* read in the hash top */
         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
                 return 0;
@@ -347,7 +350,7 @@ static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
                 return -1;
         }
-       
+
         /* read in the hash top */
         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
                 goto fail;
@@ -443,7 +446,7 @@ static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
                                struct tdb_record *r, tdb_len_t length)
  {
         tdb_off_t rec_ptr;
-       
+
         /* read in the hash top */
         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
                 return 0;
@@ -658,7 +661,7 @@ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
  
         ret = _tdb_store(tdb, key, dbuf, 0, hash);
         tdb_trace_2rec_retrec(tdb, "tdb_append", key, new_dbuf, dbuf);
-       
+
  failed:
         tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
         SAFE_FREE(dbuf.dptr);
@@ -804,7 +807,7 @@ static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t le
  
  /*
    wipe the entire database, deleting all records. This can be done
-  very fast by using a global lock. The entire data portion of the
+  very fast by using a allrecord lock. The entire data portion of the
    file becomes a single entry in the freelist.
  
    This code carefully steps around the recovery area, leaving it alone
diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h

index be9be72b156ab96f107bc21f0407ffeb198f2c11..e2167132b4844275472235db6d7f04de1bfc2c08 100644 (file)
--- a/lib/tdb/common/tdb_private.h
+++ b/lib/tdb/common/tdb_private.h
@@ -4,11 +4,11 @@
     trivial database library - private includes
  
     Copyright (C) Andrew Tridgell              2005
-   
+
       ** NOTE! The following LGPL license applies to the tdb
       ** library. This does NOT imply that all of Samba is released
       ** under the LGPL
-   
+
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
@@ -49,6 +49,7 @@ typedef uint32_t tdb_off_t;
  #define TDB_FREE_MAGIC (~TDB_MAGIC)
  #define TDB_DEAD_MAGIC (0xFEE1DEAD)
  #define TDB_RECOVERY_MAGIC (0xf53bc0e7U)
+#define TDB_RECOVERY_INVALID_MAGIC (0x0)
  #define TDB_ALIGNMENT 4
  #define DEFAULT_HASH_SIZE 131
  #define FREELIST_TOP (sizeof(struct tdb_header))
@@ -101,7 +102,7 @@ void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
  #endif /* !TDB_TRACE */
  
  /* lock offsets */
-#define GLOBAL_LOCK      0
+#define OPEN_LOCK        0
  #define ACTIVE_LOCK      4
  #define TRANSACTION_LOCK 8
  
@@ -150,7 +151,7 @@ struct tdb_header {
  };
  
  struct tdb_lock_type {
-       int list;
+       uint32_t off;
         uint32_t count;
         uint32_t ltype;
  };
@@ -162,6 +163,15 @@ struct tdb_traverse_lock {
         int lock_rw;
  };
  
+enum tdb_lock_flags {
+       /* WAIT == F_SETLKW, NOWAIT == F_SETLK */
+       TDB_LOCK_NOWAIT = 0,
+       TDB_LOCK_WAIT = 1,
+       /* If set, don't log an error on failure. */
+       TDB_LOCK_PROBE = 2,
+       /* If set, don't actually lock at all. */
+       TDB_LOCK_MARK_ONLY = 4,
+};
  
  struct tdb_methods {
         int (*tdb_read)(struct tdb_context *, tdb_off_t , void *, tdb_len_t , int );
@@ -169,7 +179,6 @@ struct tdb_methods {
         void (*next_hash_chain)(struct tdb_context *, uint32_t *);
         int (*tdb_oob)(struct tdb_context *, tdb_off_t , int );
         int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t );
-       int (*tdb_brlock)(struct tdb_context *, tdb_off_t , int, int, int, size_t);
  };
  
  struct tdb_context {
@@ -180,7 +189,7 @@ struct tdb_context {
         int read_only; /* opened read-only */
         int traverse_read; /* read-only traversal */
         int traverse_write; /* read-write traversal */
-       struct tdb_lock_type global_lock;
+       struct tdb_lock_type allrecord_lock; /* .offset == upgradable */
         int num_lockrecs;
         struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */
         enum TDB_ERROR ecode; /* error code for last tdb error */
@@ -193,12 +202,10 @@ struct tdb_context {
         struct tdb_logging_context log;
         unsigned int (*hash_fn)(TDB_DATA *key);
         int open_flags; /* flags used in the open - needed by reopen */
-       unsigned int num_locks; /* number of chain locks held */
         const struct tdb_methods *methods;
         struct tdb_transaction *transaction;
         int page_size;
         int max_dead_records;
-       int transaction_lock_count;
  #ifdef TDB_TRACE
         int tracefd;
  #endif
@@ -213,11 +220,25 @@ int tdb_munmap(struct tdb_context *tdb);
  void tdb_mmap(struct tdb_context *tdb);
  int tdb_lock(struct tdb_context *tdb, int list, int ltype);
  int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype);
+int tdb_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype,
+                 enum tdb_lock_flags flags);
+int tdb_nest_unlock(struct tdb_context *tdb, uint32_t offset, int ltype,
+                   bool mark_lock);
  int tdb_unlock(struct tdb_context *tdb, int list, int ltype);
-int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset, int rw_type, int lck_type, int probe, size_t len);
-int tdb_transaction_lock(struct tdb_context *tdb, int ltype);
-int tdb_transaction_unlock(struct tdb_context *tdb);
-int tdb_brlock_upgrade(struct tdb_context *tdb, tdb_off_t offset, size_t len);
+int tdb_brlock(struct tdb_context *tdb,
+              int rw_type, tdb_off_t offset, size_t len,
+              enum tdb_lock_flags flags);
+int tdb_brunlock(struct tdb_context *tdb,
+                int rw_type, tdb_off_t offset, size_t len);
+bool tdb_have_extra_locks(struct tdb_context *tdb);
+void tdb_release_transaction_locks(struct tdb_context *tdb);
+int tdb_transaction_lock(struct tdb_context *tdb, int ltype,
+                        enum tdb_lock_flags lockflags);
+int tdb_transaction_unlock(struct tdb_context *tdb, int ltype);
+int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
+                      enum tdb_lock_flags flags, bool upgradable);
+int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock);
+int tdb_allrecord_upgrade(struct tdb_context *tdb);
  int tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off);
  int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off);
  int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
@@ -229,7 +250,7 @@ int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
  int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
  int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off);
  int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off);
-int _tdb_transaction_cancel(struct tdb_context *tdb);
+bool tdb_needs_recovery(struct tdb_context *tdb);
  int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec);
  int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec);
  int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec);
@@ -246,4 +267,4 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size);
  int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off,
                       struct tdb_record *rec);
  
-
+int tdb_transaction_recover(struct tdb_context *tdb);
diff --git a/lib/tdb/common/transaction.c b/lib/tdb/common/transaction.c

index b8988ea8301d807600f14925e756a9356848ea23..304a03fa3836940197c1319eeae5ddbcb3fce823 100644 (file)
--- a/lib/tdb/common/transaction.c
+++ b/lib/tdb/common/transaction.c
@@ -8,7 +8,7 @@
       ** NOTE! The following LGPL license applies to the tdb
       ** library. This does NOT imply that all of Samba is released
       ** under the LGPL
-   
+
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
@@ -59,7 +59,7 @@
    - allow for nested calls to tdb_transaction_start(), re-using the
      existing transaction record. If the inner transaction is cancelled
      then a subsequent commit will fail
- 
+
    - keep a mirrored copy of the tdb hash chain heads to allow for the
      fast hash heads scan on traverse, updating the mirrored copy in
      the transaction version of tdb_write
@@ -76,7 +76,7 @@
      to reduce this to 3 or even 2 with some more work.
  
    - check for a valid recovery record on open of the tdb, while the
-    global lock is held. Automatically recover from the transaction
+    open lock is held. Automatically recover from the transaction
      recovery area if needed, then continue with the open as
      usual. This allows for smooth crash recovery with no administrator
      intervention.
@@ -135,9 +135,6 @@ struct tdb_transaction {
         bool prepared;
         tdb_off_t magic_offset;
  
-       /* set when the GLOBAL_LOCK has been taken */
-       bool global_lock_taken;
-
         /* old file size before transaction */
         tdb_len_t old_map_size;
  
@@ -188,7 +185,7 @@ static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
                         goto fail;
                 }
         }
-       
+
         /* now copy it out of this block */
         memcpy(buf, tdb->transaction->blocks[blk] + (off % tdb->transaction->block_size), len);
         if (cv) {
@@ -295,7 +292,7 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off,
                         }                       
                 }
         }
-       
+
         /* overwrite part of an existing block */
         if (buf == NULL) {
                 memset(tdb->transaction->blocks[blk] + off, 0, len);
@@ -411,22 +408,12 @@ static int transaction_expand_file(struct tdb_context *tdb, tdb_off_t size,
         return 0;
  }
  
-/*
-  brlock during a transaction - ignore them
-*/
-static int transaction_brlock(struct tdb_context *tdb, tdb_off_t offset, 
-                             int rw_type, int lck_type, int probe, size_t len)
-{
-       return 0;
-}
-
  static const struct tdb_methods transaction_methods = {
         transaction_read,
         transaction_write,
         transaction_next_hash_chain,
         transaction_oob,
         transaction_expand_file,
-       transaction_brlock
  };
  
  
@@ -434,7 +421,8 @@ static const struct tdb_methods transaction_methods = {
    start a tdb transaction. No token is returned, as only a single
    transaction is allowed to be pending per tdb_context
  */
-int tdb_transaction_start(struct tdb_context *tdb)
+static int _tdb_transaction_start(struct tdb_context *tdb,
+                                 enum tdb_lock_flags lockflags)
  {
         /* some sanity checks */
         if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) {
@@ -455,7 +443,7 @@ int tdb_transaction_start(struct tdb_context *tdb)
                 return 0;
         }
  
-       if (tdb->num_locks != 0 || tdb->global_lock.count) {
+       if (tdb_have_extra_locks(tdb)) {
                 /* the caller must not have any locks when starting a
                    transaction as otherwise we'll be screwed by lack
                    of nested locks in posix */
@@ -486,18 +474,20 @@ int tdb_transaction_start(struct tdb_context *tdb)
         /* get the transaction write lock. This is a blocking lock. As
            discussed with Volker, there are a number of ways we could
            make this async, which we will probably do in the future */
-       if (tdb_transaction_lock(tdb, F_WRLCK) == -1) {
+       if (tdb_transaction_lock(tdb, F_WRLCK, lockflags) == -1) {
                 SAFE_FREE(tdb->transaction->blocks);
                 SAFE_FREE(tdb->transaction);
+               if ((lockflags & TDB_LOCK_WAIT) == 0) {
+                       tdb->ecode = TDB_ERR_NOLOCK;
+               }
                 return -1;
         }
-       
+
         /* get a read lock from the freelist to the end of file. This
            is upgraded to a write lock during the commit */
-       if (tdb_brlock(tdb, FREELIST_TOP, F_RDLCK, F_SETLKW, 0, 0) == -1) {
+       if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, true) == -1) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to get hash locks\n"));
-               tdb->ecode = TDB_ERR_LOCK;
-               goto fail;
+               goto fail_allrecord_lock;
         }
  
         /* setup a copy of the hash table heads so the hash scan in
@@ -528,16 +518,26 @@ int tdb_transaction_start(struct tdb_context *tdb)
         /* Trace at the end, so we get sequence number correct. */
         tdb_trace(tdb, "tdb_transaction_start");
         return 0;
-       
+
  fail:
-       tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0);
-       tdb_transaction_unlock(tdb);
+       tdb_allrecord_unlock(tdb, F_RDLCK, false);
+fail_allrecord_lock:
+       tdb_transaction_unlock(tdb, F_WRLCK);
         SAFE_FREE(tdb->transaction->blocks);
         SAFE_FREE(tdb->transaction->hash_heads);
         SAFE_FREE(tdb->transaction);
         return -1;
  }
  
+int tdb_transaction_start(struct tdb_context *tdb)
+{
+       return _tdb_transaction_start(tdb, TDB_LOCK_WAIT);
+}
+
+int tdb_transaction_start_nonblock(struct tdb_context *tdb)
+{
+       return _tdb_transaction_start(tdb, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
+}
  
  /*
    sync to disk
@@ -548,7 +548,7 @@ static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t
                 return 0;
         }
  
-       if (fsync(tdb->fd) != 0) {
+       if (fdatasync(tdb->fd) != 0) {
                 tdb->ecode = TDB_ERR_IO;
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction: fsync failed\n"));
                 return -1;
@@ -569,7 +569,7 @@ static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t
  }
  
  
-int _tdb_transaction_cancel(struct tdb_context *tdb)
+static int _tdb_transaction_cancel(struct tdb_context *tdb)
  {      
         int i, ret = 0;
  
@@ -596,46 +596,25 @@ int _tdb_transaction_cancel(struct tdb_context *tdb)
  
         if (tdb->transaction->magic_offset) {
                 const struct tdb_methods *methods = tdb->transaction->io_methods;
-               uint32_t zero = 0;
+               const uint32_t invalid = TDB_RECOVERY_INVALID_MAGIC;
  
                 /* remove the recovery marker */
-               if (methods->tdb_write(tdb, tdb->transaction->magic_offset, &zero, 4) == -1 ||
+               if (methods->tdb_write(tdb, tdb->transaction->magic_offset, &invalid, 4) == -1 ||
                 transaction_sync(tdb, tdb->transaction->magic_offset, 4) == -1) {
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_cancel: failed to remove recovery magic\n"));
                         ret = -1;
                 }
         }
  
-       if (tdb->transaction->global_lock_taken) {
-               tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
-               tdb->transaction->global_lock_taken = false;
-       }
-
-       /* remove any global lock created during the transaction */
-       if (tdb->global_lock.count != 0) {
-               tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 4*tdb->header.hash_size);
-               tdb->global_lock.count = 0;
-       }
-
-       /* remove any locks created during the transaction */
-       if (tdb->num_locks != 0) {
-               for (i=0;i<tdb->num_lockrecs;i++) {
-                       tdb_brlock(tdb,FREELIST_TOP+4*tdb->lockrecs[i].list,
-                                  F_UNLCK,F_SETLKW, 0, 1);
-               }
-               tdb->num_locks = 0;
-               tdb->num_lockrecs = 0;
-               SAFE_FREE(tdb->lockrecs);
-       }
+       /* This also removes the OPEN_LOCK, if we have it. */
+       tdb_release_transaction_locks(tdb);
  
         /* restore the normal io methods */
         tdb->methods = tdb->transaction->io_methods;
  
-       tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0);
-       tdb_transaction_unlock(tdb);
         SAFE_FREE(tdb->transaction->hash_heads);
         SAFE_FREE(tdb->transaction);
-       
+
         return ret;
  }
  
@@ -695,10 +674,16 @@ static int tdb_recovery_allocate(struct tdb_context *tdb,
  
         rec.rec_len = 0;
  
-       if (recovery_head != 0 && 
-           methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
-               TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to read recovery record\n"));
-               return -1;
+       if (recovery_head != 0) {
+               if (methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) {
+                       TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to read recovery record\n"));
+                       return -1;
+               }
+               /* ignore invalid recovery regions: can happen in crash */
+               if (rec.magic != TDB_RECOVERY_MAGIC &&
+                   rec.magic != TDB_RECOVERY_INVALID_MAGIC) {
+                       recovery_head = 0;
+               }
         }
  
         *recovery_size = tdb_recovery_size(tdb);
@@ -793,7 +778,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb,
         rec = (struct tdb_record *)data;
         memset(rec, 0, sizeof(*rec));
  
-       rec->magic    = 0;
+       rec->magic    = TDB_RECOVERY_INVALID_MAGIC;
         rec->data_len = recovery_size;
         rec->rec_len  = recovery_max_size;
         rec->key_len  = old_map_size;
@@ -815,7 +800,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb,
                 if (i == tdb->transaction->num_blocks-1) {
                         length = tdb->transaction->last_block_size;
                 }
-               
+
                 if (offset >= old_map_size) {
                         continue;
                 }
@@ -928,10 +913,10 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
         }
  
         methods = tdb->transaction->io_methods;
-       
+
         /* if there are any locks pending then the caller has not
            nested their locks properly, so fail the transaction */
-       if (tdb->num_locks || tdb->global_lock.count) {
+       if (tdb_have_extra_locks(tdb)) {
                 tdb->ecode = TDB_ERR_LOCK;
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: locks pending on commit\n"));
                 _tdb_transaction_cancel(tdb);
@@ -939,24 +924,20 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
         }
  
         /* upgrade the main transaction lock region to a write lock */
-       if (tdb_brlock_upgrade(tdb, FREELIST_TOP, 0) == -1) {
+       if (tdb_allrecord_upgrade(tdb) == -1) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: failed to upgrade hash locks\n"));
-               tdb->ecode = TDB_ERR_LOCK;
                 _tdb_transaction_cancel(tdb);
                 return -1;
         }
  
-       /* get the global lock - this prevents new users attaching to the database
+       /* get the open lock - this prevents new users attaching to the database
            during the commit */
-       if (tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) {
-               TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: failed to get global lock\n"));
-               tdb->ecode = TDB_ERR_LOCK;
+       if (tdb_nest_lock(tdb, OPEN_LOCK, F_WRLCK, TDB_LOCK_WAIT) == -1) {
+               TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: failed to get open lock\n"));
                 _tdb_transaction_cancel(tdb);
                 return -1;
         }
  
-       tdb->transaction->global_lock_taken = true;
-
         if (!(tdb->flags & TDB_NOSYNC)) {
                 /* write the recovery data to the end of the file */
                 if (transaction_setup_recovery(tdb, &tdb->transaction->magic_offset) == -1) {
@@ -982,7 +963,7 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
                 methods->tdb_oob(tdb, tdb->map_size + 1, 1);
         }
  
-       /* Keep the global lock until the actual commit */
+       /* Keep the open lock until the actual commit */
  
         return 0;
  }
@@ -1056,7 +1037,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
  
                 if (methods->tdb_write(tdb, offset, tdb->transaction->blocks[i], length) == -1) {
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed during commit\n"));
-                       
+
                         /* we've overwritten part of the data and
                            possibly expanded the file, so we need to
                            run the crash recovery code */
@@ -1110,7 +1091,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
  
  /*
    recover from an aborted transaction. Must be called with exclusive
-  database write access already established (including the global
+  database write access already established (including the open
    lock to prevent new processes attaching)
  */
  int tdb_transaction_recover(struct tdb_context *tdb)
@@ -1211,16 +1192,6 @@ int tdb_transaction_recover(struct tdb_context *tdb)
                 tdb->ecode = TDB_ERR_IO;
                 return -1;                      
         }
-       
-       /* reduce the file size to the old size */
-       tdb_munmap(tdb);
-       if (ftruncate(tdb->fd, recovery_eof) != 0) {
-               TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to reduce to recovery size\n"));
-               tdb->ecode = TDB_ERR_IO;
-               return -1;                      
-       }
-       tdb->map_size = recovery_eof;
-       tdb_mmap(tdb);
  
         if (transaction_sync(tdb, 0, recovery_eof) == -1) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to sync2 recovery\n"));
@@ -1234,3 +1205,28 @@ int tdb_transaction_recover(struct tdb_context *tdb)
         /* all done */
         return 0;
  }
+
+/* Any I/O failures we say "needs recovery". */
+bool tdb_needs_recovery(struct tdb_context *tdb)
+{
+       tdb_off_t recovery_head;
+       struct tdb_record rec;
+
+       /* find the recovery area */
+       if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
+               return true;
+       }
+
+       if (recovery_head == 0) {
+               /* we have never allocated a recovery record */
+               return false;
+       }
+
+       /* read the recovery record */
+       if (tdb->methods->tdb_read(tdb, recovery_head, &rec,
+                                  sizeof(rec), DOCONV()) == -1) {
+               return true;
+       }
+
+       return (rec.magic == TDB_RECOVERY_MAGIC);
+}
diff --git a/lib/tdb/common/traverse.c b/lib/tdb/common/traverse.c

index c340dd354b7ad6a239aa8ecc7d4c6d2a13e11b32..d77086a79aa6c270dd3a7f799fa978ad74ca8da2 100644 (file)
--- a/lib/tdb/common/traverse.c
+++ b/lib/tdb/common/traverse.c
@@ -6,11 +6,11 @@
     Copyright (C) Andrew Tridgell              1999-2005
     Copyright (C) Paul `Rusty' Russell             2000
     Copyright (C) Jeremy Allison                           2000-2003
-   
+
       ** NOTE! The following LGPL license applies to the tdb
       ** library. This does NOT imply that all of Samba is released
       ** under the LGPL
-   
+
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
@@ -44,7 +44,7 @@ static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock
                            common for the use of tdb with ldb, where large
                            hashes are used. In that case we spend most of our
                            time in tdb_brlock(), locking empty hash chains.
-                          
+
                            To avoid this, we do an unlocked pre-check to see
                            if the hash chain is empty before starting to look
                            inside it. If it is empty then we can avoid that
@@ -52,7 +52,7 @@ static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock
                            the value we get back, as we read it without a
                            lock, so instead we get the lock and re-fetch the
                            value below.
-                          
+
                            Notice that not doing this optimisation on the
                            first hash chain is critical. We must guarantee
                            that we have done at least one fcntl lock at the
@@ -62,7 +62,7 @@ static tdb_off_t tdb_next_lock(struct tdb_context *tdb, struct tdb_traverse_lock
                            could possibly miss those with this trick, but we
                            could miss them anyway without this trick, so the
                            semantics don't change.
-                          
+
                            With a non-indexed ldb search this trick gains us a
                            factor of around 80 in speed on a linux 2.6.x
                            system (testing using ldbtest).
@@ -220,7 +220,7 @@ int tdb_traverse_read(struct tdb_context *tdb,
  
         /* we need to get a read lock on the transaction lock here to
            cope with the lock ordering semantics of solaris10 */
-       if (tdb_transaction_lock(tdb, F_RDLCK)) {
+       if (tdb_transaction_lock(tdb, F_RDLCK, TDB_LOCK_WAIT)) {
                 return -1;
         }
  
@@ -229,7 +229,7 @@ int tdb_traverse_read(struct tdb_context *tdb,
         ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
         tdb->traverse_read--;
  
-       tdb_transaction_unlock(tdb);
+       tdb_transaction_unlock(tdb, F_RDLCK);
  
         return ret;
  }
@@ -251,7 +251,7 @@ int tdb_traverse(struct tdb_context *tdb,
                 return tdb_traverse_read(tdb, fn, private_data);
         }
  
-       if (tdb_transaction_lock(tdb, F_WRLCK)) {
+       if (tdb_transaction_lock(tdb, F_WRLCK, TDB_LOCK_WAIT)) {
                 return -1;
         }
  
@@ -260,7 +260,7 @@ int tdb_traverse(struct tdb_context *tdb,
         ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
         tdb->traverse_write--;
  
-       tdb_transaction_unlock(tdb);
+       tdb_transaction_unlock(tdb, F_WRLCK);
  
         return ret;
  }
diff --git a/lib/tdb/configure.ac b/lib/tdb/configure.ac

index 779f596e1875f69e9aa77fef797a03f558024a04..686b0a6763379ed35969d81c04dd2e546ae7a8e3 100644 (file)
--- a/lib/tdb/configure.ac
+++ b/lib/tdb/configure.ac
@@ -2,7 +2,7 @@ AC_PREREQ(2.50)
  AC_DEFUN([SMB_MODULE_DEFAULT], [echo -n ""])
  AC_DEFUN([SMB_LIBRARY_ENABLE], [echo -n ""])
  AC_DEFUN([SMB_ENABLE], [echo -n ""])
-AC_INIT(tdb, 1.2.0)
+AC_INIT(tdb, 1.2.2)
  AC_CONFIG_SRCDIR([common/tdb.c])
  AC_CONFIG_HEADER(include/config.h)
  AC_LIBREPLACE_ALL_CHECKS
diff --git a/lib/tdb/docs/README b/lib/tdb/docs/README

index c02ee0e030ad9e1ae6b71947d18cbd5b23433705..fe0e2581838a99512d4b85806b37cd6679f496f1 100644 (file)
--- a/lib/tdb/docs/README
+++ b/lib/tdb/docs/README
@@ -104,6 +104,25 @@ TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key);
  
     caller must free the resulting data
  
+----------------------------------------------------------------------
+int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
+                    int (*parser)(TDB_DATA key, TDB_DATA data,
+                                  void *private_data),
+                    void *private_data);
+
+   Hand a record to a parser function without allocating it.
+
+   This function is meant as a fast tdb_fetch alternative for large records
+   that are frequently read. The "key" and "data" arguments point directly
+   into the tdb shared memory, they are not aligned at any boundary.
+
+   WARNING: The parser is called while tdb holds a lock on the record. DO NOT
+   call other tdb routines from within the parser. Also, for good performance
+   you should make the parser fast to allow parallel operations.
+
+   tdb_parse_record returns -1 if the record was not found.  If the record was
+   found, the return value of "parser" is passed up to the caller.
+
  ----------------------------------------------------------------------
  int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key);
  
diff --git a/lib/tdb/include/tdb.h b/lib/tdb/include/tdb.h

index db9ce4ad276f094c1ec421d783970f4efaabd428..cd17132fbed87cea9de8be1d982963b07af430ca 100644 (file)
--- a/lib/tdb/include/tdb.h
+++ b/lib/tdb/include/tdb.h
@@ -32,6 +32,11 @@ extern "C" {
  
  #include "signal.h"
  
+/* Samba sets hidden attribute when building libraries: we don't. */
+#ifndef _PUBLIC_
+#define _PUBLIC_
+#endif
+
  /* flags to tdb_store() */
  #define TDB_REPLACE 1          /* Unused */
  #define TDB_INSERT 2           /* Don't overwrite an existing entry */
@@ -90,84 +95,84 @@ struct tdb_logging_context {
          void *log_private;
  };
  
-struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags,
+_PUBLIC_ struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags,
                       int open_flags, mode_t mode);
-struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
+_PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
                          int open_flags, mode_t mode,
                          const struct tdb_logging_context *log_ctx,
                          tdb_hash_func hash_fn);
-void tdb_set_max_dead(struct tdb_context *tdb, int max_dead);
-
-int tdb_reopen(struct tdb_context *tdb);
-int tdb_reopen_all(int parent_longlived);
-void tdb_set_logging_function(struct tdb_context *tdb, const struct tdb_logging_context *log_ctx);
-enum TDB_ERROR tdb_error(struct tdb_context *tdb);
-const char *tdb_errorstr(struct tdb_context *tdb);
-TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
-int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
-                    int (*parser)(TDB_DATA key, TDB_DATA data,
-                                  void *private_data),
-                    void *private_data);
-int tdb_delete(struct tdb_context *tdb, TDB_DATA key);
-int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag);
-int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf);
-int tdb_close(struct tdb_context *tdb);
-TDB_DATA tdb_firstkey(struct tdb_context *tdb);
-TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA key);
-int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *);
-int tdb_traverse_read(struct tdb_context *tdb, tdb_traverse_func fn, void *);
-int tdb_exists(struct tdb_context *tdb, TDB_DATA key);
-int tdb_lockall(struct tdb_context *tdb);
-int tdb_lockall_nonblock(struct tdb_context *tdb);
-int tdb_unlockall(struct tdb_context *tdb);
-int tdb_lockall_read(struct tdb_context *tdb);
-int tdb_lockall_read_nonblock(struct tdb_context *tdb);
-int tdb_unlockall_read(struct tdb_context *tdb);
-int tdb_lockall_mark(struct tdb_context *tdb);
-int tdb_lockall_unmark(struct tdb_context *tdb);
-const char *tdb_name(struct tdb_context *tdb);
-int tdb_fd(struct tdb_context *tdb);
-tdb_log_func tdb_log_fn(struct tdb_context *tdb);
-void *tdb_get_logging_private(struct tdb_context *tdb);
-int tdb_transaction_start(struct tdb_context *tdb);
-int tdb_transaction_prepare_commit(struct tdb_context *tdb);
-int tdb_transaction_commit(struct tdb_context *tdb);
-int tdb_transaction_cancel(struct tdb_context *tdb);
-int tdb_transaction_recover(struct tdb_context *tdb);
-int tdb_get_seqnum(struct tdb_context *tdb);
-int tdb_hash_size(struct tdb_context *tdb);
-size_t tdb_map_size(struct tdb_context *tdb);
-int tdb_get_flags(struct tdb_context *tdb);
-void tdb_add_flags(struct tdb_context *tdb, unsigned flag);
-void tdb_remove_flags(struct tdb_context *tdb, unsigned flag);
-void tdb_enable_seqnum(struct tdb_context *tdb);
-void tdb_increment_seqnum_nonblock(struct tdb_context *tdb);
-int tdb_check(struct tdb_context *tdb,
+_PUBLIC_ void tdb_set_max_dead(struct tdb_context *tdb, int max_dead);
+
+_PUBLIC_ int tdb_reopen(struct tdb_context *tdb);
+_PUBLIC_ int tdb_reopen_all(int parent_longlived);
+_PUBLIC_ void tdb_set_logging_function(struct tdb_context *tdb, const struct tdb_logging_context *log_ctx);
+_PUBLIC_ enum TDB_ERROR tdb_error(struct tdb_context *tdb);
+_PUBLIC_ const char *tdb_errorstr(struct tdb_context *tdb);
+_PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
+                             int (*parser)(TDB_DATA key, TDB_DATA data,
+                                           void *private_data),
+                             void *private_data);
+_PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag);
+_PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf);
+_PUBLIC_ int tdb_close(struct tdb_context *tdb);
+_PUBLIC_ TDB_DATA tdb_firstkey(struct tdb_context *tdb);
+_PUBLIC_ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_traverse(struct tdb_context *tdb, tdb_traverse_func fn, void *);
+_PUBLIC_ int tdb_traverse_read(struct tdb_context *tdb, tdb_traverse_func fn, void *);
+_PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_lockall(struct tdb_context *tdb);
+_PUBLIC_ int tdb_lockall_nonblock(struct tdb_context *tdb);
+_PUBLIC_ int tdb_unlockall(struct tdb_context *tdb);
+_PUBLIC_ int tdb_lockall_read(struct tdb_context *tdb);
+_PUBLIC_ int tdb_lockall_read_nonblock(struct tdb_context *tdb);
+_PUBLIC_ int tdb_unlockall_read(struct tdb_context *tdb);
+_PUBLIC_ int tdb_lockall_mark(struct tdb_context *tdb);
+_PUBLIC_ int tdb_lockall_unmark(struct tdb_context *tdb);
+_PUBLIC_ const char *tdb_name(struct tdb_context *tdb);
+_PUBLIC_ int tdb_fd(struct tdb_context *tdb);
+_PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb);
+_PUBLIC_ void *tdb_get_logging_private(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_start(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_start_nonblock(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_prepare_commit(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_commit(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_cancel(struct tdb_context *tdb);
+_PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb);
+_PUBLIC_ int tdb_hash_size(struct tdb_context *tdb);
+_PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb);
+_PUBLIC_ int tdb_get_flags(struct tdb_context *tdb);
+_PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flag);
+_PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flag);
+_PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb);
+_PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb);
+_PUBLIC_ int tdb_check(struct tdb_context *tdb,
               int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
               void *private_data);
  
  /* Low level locking functions: use with care */
-int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key);
-int tdb_chainlock_unmark(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainlock_mark(struct tdb_context *tdb, TDB_DATA key);
+_PUBLIC_ int tdb_chainlock_unmark(struct tdb_context *tdb, TDB_DATA key);
  
-void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *sigptr);
+_PUBLIC_ void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *sigptr);
  
  /* wipe and repack */
-int tdb_wipe_all(struct tdb_context *tdb);
-int tdb_repack(struct tdb_context *tdb);
+_PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb);
+_PUBLIC_ int tdb_repack(struct tdb_context *tdb);
  
  /* Debug functions. Not used in production. */
-void tdb_dump_all(struct tdb_context *tdb);
-int tdb_printfreelist(struct tdb_context *tdb);
-int tdb_validate_freelist(struct tdb_context *tdb, int *pnum_entries);
-int tdb_freelist_size(struct tdb_context *tdb);
+_PUBLIC_ void tdb_dump_all(struct tdb_context *tdb);
+_PUBLIC_ int tdb_printfreelist(struct tdb_context *tdb);
+_PUBLIC_ int tdb_validate_freelist(struct tdb_context *tdb, int *pnum_entries);
+_PUBLIC_ int tdb_freelist_size(struct tdb_context *tdb);
  
-extern TDB_DATA tdb_null;
+_PUBLIC_ extern TDB_DATA tdb_null;
  
  #ifdef  __cplusplus
  }
diff --git a/lib/tdb/pytdb.c b/lib/tdb/pytdb.c

index 202dca1571e495496de2944e6c829e5b07b9b112..7a9205b815cbaf8c580fcb3fb1d679bf600ce11e 100644 (file)
--- a/lib/tdb/pytdb.c
+++ b/lib/tdb/pytdb.c
@@ -112,13 +112,6 @@ static PyObject *obj_transaction_commit(PyTdbObject *self)
         Py_RETURN_NONE;
  }
  
-static PyObject *obj_transaction_recover(PyTdbObject *self)
-{
-       int ret = tdb_transaction_recover(self->ctx);
-       PyErr_TDB_ERROR_IS_ERR_RAISE(ret, self->ctx);
-       Py_RETURN_NONE;
-}
-
  static PyObject *obj_transaction_start(PyTdbObject *self)
  {
         int ret = tdb_transaction_start(self->ctx);
@@ -325,9 +318,6 @@ static PyMethodDef tdb_object_methods[] = {
         { "transaction_commit", (PyCFunction)obj_transaction_commit, METH_NOARGS,
                 "S.transaction_commit() -> None\n"
                 "Commit the currently active transaction." },
-       { "transaction_recover", (PyCFunction)obj_transaction_recover, METH_NOARGS,
-               "S.transaction_recover() -> None\n"
-               "Recover the currently active transaction." },
         { "transaction_start", (PyCFunction)obj_transaction_start, METH_NOARGS,
                 "S.transaction_start() -> None\n"
                 "Start a new transaction." },
diff --git a/lib/tdb/python/tdbdump.py b/lib/tdb/python/tdbdump.py

index d759d771c872661db207a161be7624b06577de4f..01859ebce267baa71edb52fba10bee081dd7c67c 100644 (file)
--- a/lib/tdb/python/tdbdump.py
+++ b/lib/tdb/python/tdbdump.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
  # Trivial reimplementation of tdbdump in Python
  
  import tdb, sys
diff --git a/lib/tdb/python/tests/simple.py b/lib/tdb/python/tests/simple.py

index c7443c0d433526054190c6ce80d66e4866196cb9..1c5982b1a42bab6c6c74622dd8e674fc77604b23 100644 (file)
--- a/lib/tdb/python/tests/simple.py
+++ b/lib/tdb/python/tests/simple.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
  # Some simple tests for the Python bindings for TDB
  # Note that this tests the interface of the Python bindings
  # It does not test tdb itself.
diff --git a/lib/tdb/release-script.sh b/lib/tdb/release-script.sh

deleted file mode 100755 (executable)

index 273ca30..0000000
--- a/lib/tdb/release-script.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-if [ "$1" = "" ]; then
-    echo "Please provide version string, eg: 1.2.0"
-    exit 1
-fi
-
-if [ ! -d "lib/tdb" ]; then
-    echo "Run this script from the samba base directory."
-    exit 1
-fi
-
-git clean -f -x -d lib/tdb
-git clean -f -x -d lib/replace
-
-curbranch=`git branch |grep "^*" | tr -d "* "`
-
-version=$1
-strver=`echo ${version} | tr "." "-"`
-
-# Checkout the release tag
-git branch -f tdb-release-script-${strver} tdb-${strver}
-if [ ! "$?" = "0" ];  then
-    echo "Unable to checkout tdb-${strver} release"
-    exit 1
-fi
-
-git checkout tdb-release-script-${strver}
-
-# Test configure agrees with us
-confver=`grep "^AC_INIT" lib/tdb/configure.ac | tr -d "AC_INIT(tdb, " | tr -d ")"`
-if [ ! "$confver" = "$version" ]; then
-    echo "Wrong version, requested release for ${version}, found ${confver}"
-    exit 1
-fi
-
-# Now build tarball
-cp -a lib/tdb tdb-${version}
-cp -a lib/replace tdb-${version}/libreplace
-pushd tdb-${version}
-./autogen.sh
-popd
-tar cvzf tdb-${version}.tar.gz tdb-${version}
-rm -fr tdb-${version}
-
-#Clean up
-git checkout $curbranch
-git branch -d tdb-release-script-${strver}
diff --git a/lib/tdb/script/release-script.sh b/lib/tdb/script/release-script.sh

new file mode 100644 (file)

index 0000000..e9a023d
--- /dev/null
+++ b/lib/tdb/script/release-script.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+LNAME=tdb
+LINCLUDE=include/tdb.h
+
+if [ "$1" = "" ]; then
+    echo "Please provide version string, eg: 1.2.0"
+    exit 1
+fi
+
+if [ ! -d "lib/${LNAME}" ]; then
+    echo "Run this script from the samba base directory."
+    exit 1
+fi
+
+curbranch=`git branch |grep "^*" | tr -d "* "`
+
+version=$1
+strver=`echo ${version} | tr "." "-"`
+
+# Checkout the release tag
+git branch -f ${LNAME}-release-script-${strver} ${LNAME}-${strver}
+if [ ! "$?" = "0" ];  then
+    echo "Unable to checkout ${LNAME}-${strver} release"
+    exit 1
+fi
+
+function cleanquit {
+    #Clean up
+    git checkout $curbranch
+    git branch -d ${LNAME}-release-script-${strver}
+    exit $1
+}
+
+# NOTE: use cleanquit after this point
+git checkout ${LNAME}-release-script-${strver}
+
+# Test configure agrees with us
+confver=`grep "^AC_INIT" lib/${LNAME}/configure.ac | tr -d "AC_INIT(${LNAME}, " | tr -d ")"`
+if [ ! "$confver" = "$version" ]; then
+    echo "Wrong version, requested release for ${version}, found ${confver}"
+    exit 1
+fi
+
+# Check exports and signatures are up to date
+pushd lib/${LNAME}
+./script/abi_checks.sh ${LNAME} ${LINCLUDE}
+abicheck=$?
+popd
+if [ ! "$abicheck" = "0" ]; then
+    echo "ERROR: ABI Checks produced warnings!"
+    cleanquit 1
+fi
+
+git clean -f -x -d lib/${LNAME}
+git clean -f -x -d lib/replace
+
+# Now build tarball
+cp -a lib/${LNAME} ${LNAME}-${version}
+cp -a lib/replace ${LNAME}-${version}/libreplace
+pushd ${LNAME}-${version}
+./autogen.sh
+popd
+tar cvzf ${LNAME}-${version}.tar.gz ${LNAME}-${version}
+rm -fr ${LNAME}-${version}
+
+cleanquit 0
diff --git a/lib/tdb/tdb.exports b/lib/tdb/tdb.exports

index cf287d8f32c3f90a49ef7bc95c6158e83de74809..73b8fd634c3ca375b664641965ee7ec50df9bc5a 100644 (file)
--- a/lib/tdb/tdb.exports
+++ b/lib/tdb/tdb.exports
@@ -51,8 +51,8 @@
             tdb_transaction_cancel;
             tdb_transaction_commit;
             tdb_transaction_prepare_commit;
-           tdb_transaction_recover;
             tdb_transaction_start;
+           tdb_transaction_start_nonblock;
             tdb_traverse;
             tdb_traverse_read;
             tdb_unlockall;
diff --git a/lib/tdb/tdb.signatures b/lib/tdb/tdb.signatures

index 93edb071bedde851e2291b2f48efdf961ca63e8a..2148479667a1474aca86307617ac7d5ef8767466 100644 (file)
--- a/lib/tdb/tdb.signatures
+++ b/lib/tdb/tdb.signatures
@@ -34,6 +34,7 @@ int tdb_transaction_commit (struct tdb_context *);
  int tdb_transaction_prepare_commit (struct tdb_context *);
  int tdb_transaction_recover (struct tdb_context *);
  int tdb_transaction_start (struct tdb_context *);
+int tdb_transaction_start_nonblock (struct tdb_context *);
  int tdb_traverse_read (struct tdb_context *, tdb_traverse_func, void *);
  int tdb_traverse (struct tdb_context *, tdb_traverse_func, void *);
  int tdb_unlockall_read (struct tdb_context *);
diff --git a/lib/tdb/tools/tdbtorture.c b/lib/tdb/tools/tdbtorture.c

index b0221a2503227a718142738fda6ddbea5916b1a9..79fe3cd5e0e9ba6dd9c95b882770b5f2fde37f43 100644 (file)
--- a/lib/tdb/tools/tdbtorture.c
+++ b/lib/tdb/tools/tdbtorture.c
@@ -30,6 +30,10 @@ static struct tdb_context *db;
  static int in_transaction;
  static int error_count;
  static int always_transaction = 0;
+static int hash_size = 2;
+static int loopnum;
+static int count_pipe;
+static struct tdb_logging_context log_ctx;
  
  #ifdef PRINTF_ATTRIBUTE
  static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) PRINTF_ATTRIBUTE(3,4);
@@ -48,8 +52,9 @@ static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const c
         va_end(ap);
         fflush(stdout);
  #if 0
-       {
+       if (level != TDB_DEBUG_TRACE) {
                 char *ptr;
+               signal(SIGUSR1, SIG_IGN);
                 asprintf(&ptr,"xterm -e gdb /proc/%d/exe %d", getpid(), getpid());
                 system(ptr);
                 free(ptr);
@@ -211,24 +216,74 @@ static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
  
  static void usage(void)
  {
-       printf("Usage: tdbtorture [-t] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n");
+       printf("Usage: tdbtorture [-t] [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n");
         exit(0);
  }
  
- int main(int argc, char * const *argv)
+static void send_count_and_suicide(int sig)
+{
+       /* This ensures our successor can continue where we left off. */
+       write(count_pipe, &loopnum, sizeof(loopnum));
+       /* This gives a unique signature. */
+       kill(getpid(), SIGUSR2);
+}
+
+static int run_child(int i, int seed, unsigned num_loops, unsigned start)
+{
+       db = tdb_open_ex("torture.tdb", hash_size, TDB_DEFAULT,
+                        O_RDWR | O_CREAT, 0600, &log_ctx, NULL);
+       if (!db) {
+               fatal("db open failed");
+       }
+
+       srand(seed + i);
+       srandom(seed + i);
+
+       /* Set global, then we're ready to handle being killed. */
+       loopnum = start;
+       signal(SIGUSR1, send_count_and_suicide);
+
+       for (;loopnum<num_loops && error_count == 0;loopnum++) {
+               addrec_db();
+       }
+
+       if (error_count == 0) {
+               tdb_traverse_read(db, NULL, NULL);
+               if (always_transaction) {
+                       while (in_transaction) {
+                               tdb_transaction_cancel(db);
+                               in_transaction--;
+                       }
+                       if (tdb_transaction_start(db) != 0)
+                               fatal("tdb_transaction_start failed");
+               }
+               tdb_traverse(db, traverse_fn, NULL);
+               tdb_traverse(db, traverse_fn, NULL);
+               if (always_transaction) {
+                       if (tdb_transaction_commit(db) != 0)
+                               fatal("tdb_transaction_commit failed");
+               }
+       }
+
+       tdb_close(db);
+
+       return (error_count < 100 ? error_count : 100);
+}
+
+int main(int argc, char * const *argv)
  {
         int i, seed = -1;
-       int num_procs = 3;
         int num_loops = 5000;
-       int hash_size = 2;
-       int c;
+       int num_procs = 3;
+       int c, pfds[2];
         extern char *optarg;
         pid_t *pids;
+       int kill_random = 0;
+       int *done;
  
-       struct tdb_logging_context log_ctx;
         log_ctx.log_fn = tdb_log;
  
-       while ((c = getopt(argc, argv, "n:l:s:H:th")) != -1) {
+       while ((c = getopt(argc, argv, "n:l:s:H:thk")) != -1) {
                 switch (c) {
                 case 'n':
                         num_procs = strtol(optarg, NULL, 0);
@@ -245,6 +300,9 @@ static void usage(void)
                 case 't':
                         always_transaction = 1;
                         break;
+               case 'k':
+                       kill_random = 1;
+                       break;
                 default:
                         usage();
                 }
@@ -252,93 +310,120 @@ static void usage(void)
  
         unlink("torture.tdb");
  
-       pids = (pid_t *)calloc(sizeof(pid_t), num_procs);
-       pids[0] = getpid();
-
-       for (i=0;i<num_procs-1;i++) {
-               if ((pids[i+1]=fork()) == 0) break;
-       }
-
-       db = tdb_open_ex("torture.tdb", hash_size, TDB_CLEAR_IF_FIRST, 
-                        O_RDWR | O_CREAT, 0600, &log_ctx, NULL);
-       if (!db) {
-               fatal("db open failed");
-       }
-
         if (seed == -1) {
                 seed = (getpid() + time(NULL)) & 0x7FFFFFFF;
         }
  
-       if (i == 0) {
-               printf("testing with %d processes, %d loops, %d hash_size, seed=%d%s\n",
-                      num_procs, num_loops, hash_size, seed, always_transaction ? " (all within transactions)" : "");
+       if (num_procs == 1 && !kill_random) {
+               /* Don't fork for this case, makes debugging easier. */
+               error_count = run_child(0, seed, num_loops, 0);
+               goto done;
         }
  
-       srand(seed + i);
-       srandom(seed + i);
+       pids = (pid_t *)calloc(sizeof(pid_t), num_procs);
+       done = (int *)calloc(sizeof(int), num_procs);
  
-       for (i=0;i<num_loops && error_count == 0;i++) {
-               addrec_db();
+       if (pipe(pfds) != 0) {
+               perror("Creating pipe");
+               exit(1);
         }
-
-       if (error_count == 0) {
-               tdb_traverse_read(db, NULL, NULL);
-               if (always_transaction) {
-                       while (in_transaction) {
-                               tdb_transaction_cancel(db);
-                               in_transaction--;
+       count_pipe = pfds[1];
+
+       for (i=0;i<num_procs;i++) {
+               if ((pids[i]=fork()) == 0) {
+                       close(pfds[0]);
+                       if (i == 0) {
+                               printf("Testing with %d processes, %d loops, %d hash_size, seed=%d%s\n",
+                                      num_procs, num_loops, hash_size, seed, always_transaction ? " (all within transactions)" : "");
                         }
-                       if (tdb_transaction_start(db) != 0)
-                               fatal("tdb_transaction_start failed");
-               }
-               tdb_traverse(db, traverse_fn, NULL);
-               tdb_traverse(db, traverse_fn, NULL);
-               if (always_transaction) {
-                       if (tdb_transaction_commit(db) != 0)
-                               fatal("tdb_transaction_commit failed");
+                       exit(run_child(i, seed, num_loops, 0));
                 }
         }
  
-       tdb_close(db);
-
-       if (getpid() != pids[0]) {
-               return error_count;
-       }
-
-       for (i=1;i<num_procs;i++) {
+       while (num_procs) {
                 int status, j;
                 pid_t pid;
+
                 if (error_count != 0) {
                         /* try and stop the test on any failure */
-                       for (j=1;j<num_procs;j++) {
+                       for (j=0;j<num_procs;j++) {
                                 if (pids[j] != 0) {
                                         kill(pids[j], SIGTERM);
                                 }
                         }
                 }
-               pid = waitpid(-1, &status, 0);
+
+               pid = waitpid(-1, &status, kill_random ? WNOHANG : 0);
+               if (pid == 0) {
+                       struct timeval tv;
+
+                       /* Sleep for 1/10 second. */
+                       tv.tv_sec = 0;
+                       tv.tv_usec = 100000;
+                       select(0, NULL, NULL, NULL, &tv);
+
+                       /* Kill someone. */
+                       kill(pids[random() % num_procs], SIGUSR1);
+                       continue;
+               }
+
                 if (pid == -1) {
                         perror("failed to wait for child\n");
                         exit(1);
                 }
-               for (j=1;j<num_procs;j++) {
+
+               for (j=0;j<num_procs;j++) {
                         if (pids[j] == pid) break;
                 }
                 if (j == num_procs) {
                         printf("unknown child %d exited!?\n", (int)pid);
                         exit(1);
                 }
-               if (WEXITSTATUS(status) != 0) {
-                       printf("child %d exited with status %d\n",
-                              (int)pid, WEXITSTATUS(status));
+               if (WIFSIGNALED(status)) {
+                       if (WTERMSIG(status) == SIGUSR2
+                           || WTERMSIG(status) == SIGUSR1) {
+                               /* SIGUSR2 means they wrote to pipe. */
+                               if (WTERMSIG(status) == SIGUSR2) {
+                                       read(pfds[0], &done[j],
+                                            sizeof(done[j]));
+                               }
+                               pids[j] = fork();
+                               if (pids[j] == 0)
+                                       exit(run_child(j, seed, num_loops,
+                                                      done[j]));
+                               printf("Restarting child %i for %u-%u\n",
+                                      j, done[j], num_loops);
+                               continue;
+                       }
+                       printf("child %d exited with signal %d\n",
+                              (int)pid, WTERMSIG(status));
                         error_count++;
+               } else {
+                       if (WEXITSTATUS(status) != 0) {
+                               printf("child %d exited with status %d\n",
+                                      (int)pid, WEXITSTATUS(status));
+                               error_count++;
+                       }
                 }
-               pids[j] = 0;
+               memmove(&pids[j], &pids[j+1],
+                       (num_procs - j - 1)*sizeof(pids[0]));
+               num_procs--;
         }
  
         free(pids);
  
+done:
         if (error_count == 0) {
+               db = tdb_open_ex("torture.tdb", hash_size, TDB_DEFAULT,
+                                O_RDWR, 0, &log_ctx, NULL);
+               if (!db) {
+                       fatal("db open failed");
+               }
+               if (tdb_check(db, NULL, NULL) == -1) {
+                       printf("db check failed");
+                       exit(1);
+               }
+               tdb_close(db);
                 printf("OK\n");
         }
author	Ronnie sahlberg <ronniesahlberg@gmail.com>
	Thu, 22 Apr 2010 23:25:25 +0000 (09:25 +1000)
committer	Ronnie sahlberg <ronniesahlberg@gmail.com>
	Thu, 22 Apr 2010 23:25:25 +0000 (09:25 +1000)
lib/tdb/common/check.c		patch \| blob \| history
lib/tdb/common/dump.c		patch \| blob \| history
lib/tdb/common/error.c		patch \| blob \| history
lib/tdb/common/freelist.c		patch \| blob \| history
lib/tdb/common/io.c		patch \| blob \| history
lib/tdb/common/lock.c		patch \| blob \| history
lib/tdb/common/open.c		patch \| blob \| history
lib/tdb/common/tdb.c		patch \| blob \| history
lib/tdb/common/tdb_private.h		patch \| blob \| history
lib/tdb/common/transaction.c		patch \| blob \| history
lib/tdb/common/traverse.c		patch \| blob \| history
lib/tdb/configure.ac		patch \| blob \| history
lib/tdb/docs/README		patch \| blob \| history
lib/tdb/include/tdb.h		patch \| blob \| history
lib/tdb/pytdb.c		patch \| blob \| history
lib/tdb/python/tdbdump.py		patch \| blob \| history
lib/tdb/python/tests/simple.py		patch \| blob \| history
lib/tdb/release-script.sh	[deleted file]	patch \| blob \| history
lib/tdb/script/release-script.sh	[new file with mode: 0644]	patch \| blob
lib/tdb/tdb.exports		patch \| blob \| history
lib/tdb/tdb.signatures		patch \| blob \| history
lib/tdb/tools/tdbtorture.c		patch \| blob \| history