vacuuming: Fix vacuuming bug where requests keep bouncing between nodes (part 2)
authorAmitay Isaacs <amitay@gmail.com>
Mon, 12 Aug 2013 05:50:30 +0000 (15:50 +1000)
committerMichael Adam <obnox@samba.org>
Mon, 19 Aug 2013 15:14:26 +0000 (17:14 +0200)
This is caused by corruption of a record header such that the records
on two nodes point to each other as dmaster.  This makes a request for
that record bounce between nodes endlessly.

Signed-off-by: Amitay Isaacs <amitay@gmail.com>
(cherry picked from commit f0853013655ac3bedf1b793de128fb679c6db6c6)

Conflicts:

server/ctdb_recover.c

server/ctdb_recover.c

index 6f72b07aa5e50232e69beb24b7f2255d50239272..4794e638d1a52a4e66aec9c348d8a08ad9cc0cca 100644 (file)
@@ -783,7 +783,7 @@ bool ctdb_recovery_lock(struct ctdb_context *ctdb, bool keep)
  */
 static int delete_tdb_record(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, struct ctdb_rec_data *rec)
 {
-       TDB_DATA key, data;
+       TDB_DATA key, data, data2;
        struct ctdb_ltdb_header *hdr, *hdr2;
        
        /* these are really internal tdb functions - but we need them here for
@@ -814,13 +814,13 @@ static int delete_tdb_record(struct ctdb_context *ctdb, struct ctdb_db_context *
                return -1;
        }
 
-       data = tdb_fetch(ctdb_db->ltdb->tdb, key);
-       if (data.dptr == NULL) {
+       data2 = tdb_fetch(ctdb_db->ltdb->tdb, key);
+       if (data2.dptr == NULL) {
                tdb_chainunlock(ctdb_db->ltdb->tdb, key);
                return 0;
        }
 
-       if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
+       if (data2.dsize < sizeof(struct ctdb_ltdb_header)) {
                if (tdb_lock_nonblock(ctdb_db->ltdb->tdb, -1, F_WRLCK) == 0) {
                        if (tdb_delete(ctdb_db->ltdb->tdb, key) != 0) {
                                DEBUG(DEBUG_CRIT,(__location__ " Failed to delete corrupt record\n"));
@@ -829,45 +829,45 @@ static int delete_tdb_record(struct ctdb_context *ctdb, struct ctdb_db_context *
                        DEBUG(DEBUG_CRIT,(__location__ " Deleted corrupt record\n"));
                }
                tdb_chainunlock(ctdb_db->ltdb->tdb, key);
-               free(data.dptr);
+               free(data2.dptr);
                return 0;
        }
        
-       hdr2 = (struct ctdb_ltdb_header *)data.dptr;
+       hdr2 = (struct ctdb_ltdb_header *)data2.dptr;
 
        if (hdr2->rsn > hdr->rsn) {
                tdb_chainunlock(ctdb_db->ltdb->tdb, key);
                DEBUG(DEBUG_INFO,(__location__ " Skipping record with rsn=%llu - called with rsn=%llu\n",
                         (unsigned long long)hdr2->rsn, (unsigned long long)hdr->rsn));
-               free(data.dptr);
-               return -1;              
+               free(data2.dptr);
+               return -1;
        }
 
        if (hdr2->dmaster == ctdb->pnn) {
                tdb_chainunlock(ctdb_db->ltdb->tdb, key);
                DEBUG(DEBUG_INFO,(__location__ " Attempted delete record where we are the dmaster\n"));
-               free(data.dptr);
-               return -1;                              
+               free(data2.dptr);
+               return -1;
        }
 
        if (tdb_lock_nonblock(ctdb_db->ltdb->tdb, -1, F_WRLCK) != 0) {
                tdb_chainunlock(ctdb_db->ltdb->tdb, key);
-               free(data.dptr);
-               return -1;                              
+               free(data2.dptr);
+               return -1;
        }
 
        if (tdb_delete(ctdb_db->ltdb->tdb, key) != 0) {
                tdb_unlock(ctdb_db->ltdb->tdb, -1, F_WRLCK);
                tdb_chainunlock(ctdb_db->ltdb->tdb, key);
                DEBUG(DEBUG_INFO,(__location__ " Failed to delete record\n"));
-               free(data.dptr);
-               return -1;                                              
+               free(data2.dptr);
+               return -1;
        }
 
        tdb_unlock(ctdb_db->ltdb->tdb, -1, F_WRLCK);
        tdb_chainunlock(ctdb_db->ltdb->tdb, key);
-       free(data.dptr);
-       return 0;       
+       free(data2.dptr);
+       return 0;
 }