tdb: Add a non-blocking version of tdb_transaction_start
[sahlberg/ctdb.git] / lib / tdb / common / transaction.c
index c9718a59b3b22bbf4badb1988a8f2655439188e0..304a03fa3836940197c1319eeae5ddbcb3fce823 100644 (file)
@@ -8,7 +8,7 @@
      ** NOTE! The following LGPL license applies to the tdb
      ** library. This does NOT imply that all of Samba is released
      ** under the LGPL
-   
+
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
@@ -59,7 +59,7 @@
   - allow for nested calls to tdb_transaction_start(), re-using the
     existing transaction record. If the inner transaction is cancelled
     then a subsequent commit will fail
+
   - keep a mirrored copy of the tdb hash chain heads to allow for the
     fast hash heads scan on traverse, updating the mirrored copy in
     the transaction version of tdb_write
@@ -185,7 +185,7 @@ static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
                        goto fail;
                }
        }
-       
+
        /* now copy it out of this block */
        memcpy(buf, tdb->transaction->blocks[blk] + (off % tdb->transaction->block_size), len);
        if (cv) {
@@ -292,7 +292,7 @@ static int transaction_write(struct tdb_context *tdb, tdb_off_t off,
                        }                       
                }
        }
-       
+
        /* overwrite part of an existing block */
        if (buf == NULL) {
                memset(tdb->transaction->blocks[blk] + off, 0, len);
@@ -408,33 +408,12 @@ static int transaction_expand_file(struct tdb_context *tdb, tdb_off_t size,
        return 0;
 }
 
-/*
-  brlock during a transaction - ignore them
-*/
-static int transaction_brlock(struct tdb_context *tdb,
-                             int rw_type, tdb_off_t offset, size_t len,
-                             enum tdb_lock_flags flags)
-{
-       /* FIXME: We actually grab the open lock during a transaction. */
-       if (offset == OPEN_LOCK)
-               return tdb_brlock(tdb, rw_type, offset, len, flags);
-       return 0;
-}
-
-static int transaction_brunlock(struct tdb_context *tdb,
-                               int rw_type, tdb_off_t offset, size_t len)
-{
-       return 0;
-}
-
 static const struct tdb_methods transaction_methods = {
        transaction_read,
        transaction_write,
        transaction_next_hash_chain,
        transaction_oob,
        transaction_expand_file,
-       transaction_brlock,
-       transaction_brunlock
 };
 
 
@@ -442,7 +421,8 @@ static const struct tdb_methods transaction_methods = {
   start a tdb transaction. No token is returned, as only a single
   transaction is allowed to be pending per tdb_context
 */
-int tdb_transaction_start(struct tdb_context *tdb)
+static int _tdb_transaction_start(struct tdb_context *tdb,
+                                 enum tdb_lock_flags lockflags)
 {
        /* some sanity checks */
        if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) {
@@ -494,12 +474,15 @@ int tdb_transaction_start(struct tdb_context *tdb)
        /* get the transaction write lock. This is a blocking lock. As
           discussed with Volker, there are a number of ways we could
           make this async, which we will probably do in the future */
-       if (tdb_transaction_lock(tdb, F_WRLCK) == -1) {
+       if (tdb_transaction_lock(tdb, F_WRLCK, lockflags) == -1) {
                SAFE_FREE(tdb->transaction->blocks);
                SAFE_FREE(tdb->transaction);
+               if ((lockflags & TDB_LOCK_WAIT) == 0) {
+                       tdb->ecode = TDB_ERR_NOLOCK;
+               }
                return -1;
        }
-       
+
        /* get a read lock from the freelist to the end of file. This
           is upgraded to a write lock during the commit */
        if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, true) == -1) {
@@ -535,7 +518,7 @@ int tdb_transaction_start(struct tdb_context *tdb)
        /* Trace at the end, so we get sequence number correct. */
        tdb_trace(tdb, "tdb_transaction_start");
        return 0;
-       
+
 fail:
        tdb_allrecord_unlock(tdb, F_RDLCK, false);
 fail_allrecord_lock:
@@ -546,6 +529,15 @@ fail_allrecord_lock:
        return -1;
 }
 
+int tdb_transaction_start(struct tdb_context *tdb)
+{
+       return _tdb_transaction_start(tdb, TDB_LOCK_WAIT);
+}
+
+int tdb_transaction_start_nonblock(struct tdb_context *tdb)
+{
+       return _tdb_transaction_start(tdb, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
+}
 
 /*
   sync to disk
@@ -577,8 +569,7 @@ static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t
 }
 
 
-/* ltype is F_WRLCK after prepare. */
-static int _tdb_transaction_cancel(struct tdb_context *tdb, int ltype)
+static int _tdb_transaction_cancel(struct tdb_context *tdb)
 {      
        int i, ret = 0;
 
@@ -616,15 +607,14 @@ static int _tdb_transaction_cancel(struct tdb_context *tdb, int ltype)
        }
 
        /* This also removes the OPEN_LOCK, if we have it. */
-       tdb_release_extra_locks(tdb);
+       tdb_release_transaction_locks(tdb);
 
        /* restore the normal io methods */
        tdb->methods = tdb->transaction->io_methods;
 
-       tdb_transaction_unlock(tdb, F_WRLCK);
        SAFE_FREE(tdb->transaction->hash_heads);
        SAFE_FREE(tdb->transaction);
-       
+
        return ret;
 }
 
@@ -633,11 +623,8 @@ static int _tdb_transaction_cancel(struct tdb_context *tdb, int ltype)
 */
 int tdb_transaction_cancel(struct tdb_context *tdb)
 {
-       int ltype = F_RDLCK;
        tdb_trace(tdb, "tdb_transaction_cancel");
-       if (tdb->transaction && tdb->transaction->prepared)
-               ltype = F_WRLCK;
-       return _tdb_transaction_cancel(tdb, ltype);
+       return _tdb_transaction_cancel(tdb);
 }
 
 /*
@@ -813,7 +800,7 @@ static int transaction_setup_recovery(struct tdb_context *tdb,
                if (i == tdb->transaction->num_blocks-1) {
                        length = tdb->transaction->last_block_size;
                }
-               
+
                if (offset >= old_map_size) {
                        continue;
                }
@@ -903,14 +890,14 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
 
        if (tdb->transaction->prepared) {
                tdb->ecode = TDB_ERR_EINVAL;
-               _tdb_transaction_cancel(tdb, F_WRLCK);
+               _tdb_transaction_cancel(tdb);
                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: transaction already prepared\n"));
                return -1;
        }
 
        if (tdb->transaction->transaction_error) {
                tdb->ecode = TDB_ERR_IO;
-               _tdb_transaction_cancel(tdb, F_RDLCK);
+               _tdb_transaction_cancel(tdb);
                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: transaction error pending\n"));
                return -1;
        }
@@ -926,20 +913,20 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
        }
 
        methods = tdb->transaction->io_methods;
-       
+
        /* if there are any locks pending then the caller has not
           nested their locks properly, so fail the transaction */
        if (tdb_have_extra_locks(tdb)) {
                tdb->ecode = TDB_ERR_LOCK;
                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: locks pending on commit\n"));
-               _tdb_transaction_cancel(tdb, F_RDLCK);
+               _tdb_transaction_cancel(tdb);
                return -1;
        }
 
        /* upgrade the main transaction lock region to a write lock */
        if (tdb_allrecord_upgrade(tdb) == -1) {
                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: failed to upgrade hash locks\n"));
-               _tdb_transaction_cancel(tdb, F_RDLCK);
+               _tdb_transaction_cancel(tdb);
                return -1;
        }
 
@@ -947,7 +934,7 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
           during the commit */
        if (tdb_nest_lock(tdb, OPEN_LOCK, F_WRLCK, TDB_LOCK_WAIT) == -1) {
                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_prepare_commit: failed to get open lock\n"));
-               _tdb_transaction_cancel(tdb, F_WRLCK);
+               _tdb_transaction_cancel(tdb);
                return -1;
        }
 
@@ -955,7 +942,7 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
                /* write the recovery data to the end of the file */
                if (transaction_setup_recovery(tdb, &tdb->transaction->magic_offset) == -1) {
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: failed to setup recovery data\n"));
-                       _tdb_transaction_cancel(tdb, F_WRLCK);
+                       _tdb_transaction_cancel(tdb);
                        return -1;
                }
        }
@@ -969,7 +956,7 @@ static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
                                             tdb->transaction->old_map_size) == -1) {
                        tdb->ecode = TDB_ERR_IO;
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: expansion failed\n"));
-                       _tdb_transaction_cancel(tdb, F_WRLCK);
+                       _tdb_transaction_cancel(tdb);
                        return -1;
                }
                tdb->map_size = tdb->transaction->old_map_size;
@@ -1008,7 +995,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 
        if (tdb->transaction->transaction_error) {
                tdb->ecode = TDB_ERR_IO;
-               _tdb_transaction_cancel(tdb, F_RDLCK);
+               _tdb_transaction_cancel(tdb);
                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: transaction error pending\n"));
                return -1;
        }
@@ -1021,7 +1008,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 
        /* check for a null transaction */
        if (tdb->transaction->blocks == NULL) {
-               _tdb_transaction_cancel(tdb, F_RDLCK);
+               _tdb_transaction_cancel(tdb);
                return 0;
        }
 
@@ -1050,14 +1037,14 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 
                if (methods->tdb_write(tdb, offset, tdb->transaction->blocks[i], length) == -1) {
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed during commit\n"));
-                       
+
                        /* we've overwritten part of the data and
                           possibly expanded the file, so we need to
                           run the crash recovery code */
                        tdb->methods = methods;
                        tdb_transaction_recover(tdb); 
 
-                       _tdb_transaction_cancel(tdb, F_WRLCK);
+                       _tdb_transaction_cancel(tdb);
 
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n"));
                        return -1;
@@ -1092,7 +1079,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 
        /* use a transaction cancel to free memory and remove the
           transaction locks */
-       _tdb_transaction_cancel(tdb, F_WRLCK);
+       _tdb_transaction_cancel(tdb);
 
        if (need_repack) {
                return tdb_repack(tdb);
@@ -1205,16 +1192,6 @@ int tdb_transaction_recover(struct tdb_context *tdb)
                tdb->ecode = TDB_ERR_IO;
                return -1;                      
        }
-       
-       /* reduce the file size to the old size */
-       tdb_munmap(tdb);
-       if (ftruncate(tdb->fd, recovery_eof) != 0) {
-               TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to reduce to recovery size\n"));
-               tdb->ecode = TDB_ERR_IO;
-               return -1;                      
-       }
-       tdb->map_size = recovery_eof;
-       tdb_mmap(tdb);
 
        if (transaction_sync(tdb, 0, recovery_eof) == -1) {
                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to sync2 recovery\n"));
@@ -1228,3 +1205,28 @@ int tdb_transaction_recover(struct tdb_context *tdb)
        /* all done */
        return 0;
 }
+
+/* Any I/O failures we say "needs recovery". */
+bool tdb_needs_recovery(struct tdb_context *tdb)
+{
+       tdb_off_t recovery_head;
+       struct tdb_record rec;
+
+       /* find the recovery area */
+       if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) {
+               return true;
+       }
+
+       if (recovery_head == 0) {
+               /* we have never allocated a recovery record */
+               return false;
+       }
+
+       /* read the recovery record */
+       if (tdb->methods->tdb_read(tdb, recovery_head, &rec,
+                                  sizeof(rec), DOCONV()) == -1) {
+               return true;
+       }
+
+       return (rec.magic == TDB_RECOVERY_MAGIC);
+}