tdb: make check more robust against recovery failures.
authorRusty Russell <rusty@rustcorp.com.au>
Mon, 13 Sep 2010 10:25:26 +0000 (19:55 +0930)
committerRusty Russell <rusty@rustcorp.com.au>
Thu, 7 Oct 2010 04:35:55 +0000 (15:05 +1030)
We can end up with dead areas when we die during transaction commit;
tdb_check() fails on such a (valid) database.

This is particularly noticable now we no longer truncate on recovery;
if the recovery area was at the end of the file we used to remove it
that way.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
lib/tdb/common/check.c

index 2c640434ee42360dd573f6706b134b55b26d340f..3be8a0c48b3b130b5687b718b534c8056113cb19 100644 (file)
@@ -301,6 +301,21 @@ static bool tdb_check_free_record(struct tdb_context *tdb,
        return true;
 }
 
+/* Slow, but should be very rare. */
+static size_t dead_space(struct tdb_context *tdb, tdb_off_t off)
+{
+       size_t len;
+
+       for (len = 0; off + len < tdb->map_size; len++) {
+               char c;
+               if (tdb->methods->tdb_read(tdb, off, &c, 1, 0))
+                       return 0;
+               if (c != 0 && c != 0x42)
+                       break;
+       }
+       return len;
+}
+
 int tdb_check(struct tdb_context *tdb,
              int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
              void *private_data)
@@ -310,6 +325,7 @@ int tdb_check(struct tdb_context *tdb,
        tdb_off_t off, recovery_start;
        struct tdb_record rec;
        bool found_recovery = false;
+       tdb_len_t dead;
 
        if (tdb_lockall_read(tdb) == -1)
                return -1;
@@ -369,8 +385,23 @@ int tdb_check(struct tdb_context *tdb,
                        if (!tdb_check_free_record(tdb, off, &rec, hashes))
                                goto free;
                        break;
-               case TDB_RECOVERY_MAGIC:
+               /* If we crash after ftruncate, we can get zeroes or fill. */
                case TDB_RECOVERY_INVALID_MAGIC:
+               case 0x42424242:
+                       if (recovery_start == off) {
+                               found_recovery = true;
+                               break;
+                       }
+                       dead = dead_space(tdb, off);
+                       if (dead < sizeof(rec))
+                               goto corrupt;
+
+                       TDB_LOG((tdb, TDB_DEBUG_ERROR,
+                                "Dead space at %d-%d (of %u)\n",
+                                off, off + dead, tdb->map_size));
+                       rec.rec_len = dead - sizeof(rec);
+                       break;
+               case TDB_RECOVERY_MAGIC:
                        if (recovery_start != off) {
                                TDB_LOG((tdb, TDB_DEBUG_ERROR,
                                         "Unexpected recovery record at offset %d\n",
@@ -379,7 +410,8 @@ int tdb_check(struct tdb_context *tdb,
                        }
                        found_recovery = true;
                        break;
-               default:
+               default: ;
+               corrupt:
                        tdb->ecode = TDB_ERR_CORRUPT;
                        TDB_LOG((tdb, TDB_DEBUG_ERROR,
                                 "Bad magic 0x%x at offset %d\n",
@@ -405,9 +437,8 @@ int tdb_check(struct tdb_context *tdb,
        /* We must have found recovery area if there was one. */
        if (recovery_start != 0 && !found_recovery) {
                TDB_LOG((tdb, TDB_DEBUG_ERROR,
-                        "Expected %s recovery area, got %s\n",
-                        recovery_start ? "a" : "no",
-                        found_recovery ? "one" : "none"));
+                        "Expected a recovery area at %u\n",
+                        recovery_start));
                goto free;
        }