vacuum: correctly send TRY_DELETE_RECORDS ctrl to all active nodes
authorMichael Adam <obnox@samba.org>
Thu, 3 Feb 2011 11:26:45 +0000 (12:26 +0100)
committerMichael Adam <obnox@samba.org>
Wed, 9 Mar 2011 22:21:23 +0000 (23:21 +0100)
Originally, the control was sent to all records in the vnn_map, but
there was something still missing here:
When a node can not become lmaster (via CTDB_CAPABILITY_LMASTER=no)
then it will not be part of the vnn_map. So such a node would
be active but never receive the TRY_DELETE_RECORDS control from a
vacuuming run.

This is fixed in this change by correctly building the list of
active nodes first in the same way that the recovery process does it.

server/ctdb_vacuum.c

index d6a16f52d65ab87344f4ba4bc18fa9dc94c3cf0a..2c643f72cc3e9e319dba02f2d4e69ced96d6c967 100644 (file)
@@ -290,6 +290,9 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, struct vacuum_data *v
                struct delete_records_list *recs;
                TDB_DATA indata, outdata;
                int32_t res;
+               struct ctdb_node_map *nodemap;
+               uint32_t *active_nodes;
+               int num_active_nodes;
 
                recs = talloc_zero(vdata, struct delete_records_list);
                if (recs == NULL) {
@@ -315,24 +318,37 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, struct vacuum_data *v
                indata.dptr  = (void *)recs->records;
 
                /* 
-                * now tell all the other nodes to delete all these records
+                * now tell all the active nodes to delete all these records
                 * (if possible)
                 */
-               for (i = 0; i < ctdb->vnn_map->size; i++) {
+
+               ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(),
+                                          CTDB_CURRENT_NODE,
+                                          recs, /* talloc context */
+                                          &nodemap);
+               if (ret != 0) {
+                       DEBUG(DEBUG_ERR,(__location__ " unable to get node map\n"));
+                       return -1;
+               }
+
+               active_nodes = list_of_active_nodes(ctdb, nodemap,
+                                                   nodemap, /* talloc context */
+                                                   false /* include self */);
+               /* yuck! ;-) */
+               num_active_nodes = talloc_get_size(active_nodes)/sizeof(*active_nodes);
+
+               for (i = 0; i < num_active_nodes; i++) {
                        struct ctdb_marshall_buffer *records;
                        struct ctdb_rec_data *rec;
 
-                       if (ctdb->vnn_map->map[i] == ctdb->pnn) {
-                               /* we dont delete the records on the local node just yet */
-                               continue;
-                       }
-
-                       ret = ctdb_control(ctdb, ctdb->vnn_map->map[i], 0,
+                       ret = ctdb_control(ctdb, active_nodes[i], 0,
                                        CTDB_CONTROL_TRY_DELETE_RECORDS, 0,
                                        indata, recs, &outdata, &res,
                                        NULL, NULL);
                        if (ret != 0 || res != 0) {
-                               DEBUG(DEBUG_ERR,("Failed to delete records on node %u\n", ctdb->vnn_map->map[i]));
+                               DEBUG(DEBUG_ERR, ("Failed to delete records on "
+                                                 "node %u: ret[%d] res[%d]\n",
+                                                 active_nodes[i], ret, res));
                                return -1;
                        }
 
@@ -369,6 +385,9 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, struct vacuum_data *v
                        }           
                }
 
+               /* free nodemap and active_nodes */
+               talloc_free(nodemap);
+
                /* 
                 * The only records remaining in the tree would be those
                 * records where all other nodes could successfully