We default to non-deterministic ip now where ips are "sticky" and dont change
[sahlberg/ctdb.git] / tools / ctdb.c
index 076e6d3e7e98eef6d475a39b7c7557b837a976f4..674622a68afa129dabbf86a5a019f316964d4702 100644 (file)
@@ -157,7 +157,7 @@ static int control_process_exists(struct ctdb_context *ctdb, int argc, const cha
 /*
   display statistics structure
  */
-static void show_statistics(struct ctdb_statistics *s)
+static void show_statistics(struct ctdb_statistics *s, int show_header)
 {
        TALLOC_CTX *tmp_ctx = talloc_new(NULL);
        int i;
@@ -211,33 +211,100 @@ static void show_statistics(struct ctdb_statistics *s)
        tmp    /= 24;
        days    = tmp;
 
-       printf("CTDB version %u\n", CTDB_VERSION);
-       printf("Current time of statistics  :                %s", ctime(&s->statistics_current_time.tv_sec));
-       printf("Statistics collected since  : (%03d %02d:%02d:%02d) %s", days, hours, minutes, seconds, ctime(&s->statistics_start_time.tv_sec));
-
-       for (i=0;i<ARRAY_SIZE(fields);i++) {
-               if (strchr(fields[i].name, '.')) {
-                       preflen = strcspn(fields[i].name, ".")+1;
-                       if (!prefix || strncmp(prefix, fields[i].name, preflen) != 0) {
-                               prefix = fields[i].name;
-                               printf(" %*.*s\n", preflen-1, preflen-1, fields[i].name);
+       if (options.machinereadable){
+               if (show_header) {
+                       printf("CTDB version:");
+                       printf("Current time of statistics:");
+                       printf("Statistics collected since:");
+                       for (i=0;i<ARRAY_SIZE(fields);i++) {
+                               printf("%s:", fields[i].name);
                        }
-               } else {
-                       preflen = 0;
+                       printf("num_reclock_ctdbd_latency:");
+                       printf("min_reclock_ctdbd_latency:");
+                       printf("avg_reclock_ctdbd_latency:");
+                       printf("max_reclock_ctdbd_latency:");
+
+                       printf("num_reclock_recd_latency:");
+                       printf("min_reclock_recd_latency:");
+                       printf("avg_reclock_recd_latency:");
+                       printf("max_reclock_recd_latency:");
+
+                       printf("num_call_latency:");
+                       printf("min_call_latency:");
+                       printf("avg_call_latency:");
+                       printf("max_call_latency:");
+
+                       printf("num_lockwait_latency:");
+                       printf("min_lockwait_latency:");
+                       printf("avg_lockwait_latency:");
+                       printf("max_lockwait_latency:");
+
+                       printf("num_childwrite_latency:");
+                       printf("min_childwrite_latency:");
+                       printf("avg_childwrite_latency:");
+                       printf("max_childwrite_latency:");
+                       printf("\n");
                }
-               printf(" %*s%-22s%*s%10u\n", 
-                      preflen?4:0, "",
-                      fields[i].name+preflen, 
-                      preflen?0:4, "",
-                      *(uint32_t *)(fields[i].offset+(uint8_t *)s));
-       }
-       printf(" %-30s     %.6f sec\n", "max_reclock_ctdbd", s->reclock.ctdbd);
-       printf(" %-30s     %.6f sec\n", "max_reclock_recd", s->reclock.recd);
+               printf("%d:", CTDB_VERSION);
+               printf("%d:", (int)s->statistics_current_time.tv_sec);
+               printf("%d:", (int)s->statistics_start_time.tv_sec);
+               for (i=0;i<ARRAY_SIZE(fields);i++) {
+                       printf("%d:", *(uint32_t *)(fields[i].offset+(uint8_t *)s));
+               }
+               printf("%d:", s->reclock.ctdbd.num);
+               printf("%.6f:", s->reclock.ctdbd.min);
+               printf("%.6f:", s->reclock.ctdbd.num?s->reclock.ctdbd.total/s->reclock.ctdbd.num:0.0);
+               printf("%.6f:", s->reclock.ctdbd.max);
+
+               printf("%d:", s->reclock.recd.num);
+               printf("%.6f:", s->reclock.recd.min);
+               printf("%.6f:", s->reclock.recd.num?s->reclock.recd.total/s->reclock.recd.num:0.0);
+               printf("%.6f:", s->reclock.recd.max);
+
+               printf("%d:", s->call_latency.num);
+               printf("%.6f:", s->call_latency.min);
+               printf("%.6f:", s->call_latency.num?s->call_latency.total/s->call_latency.num:0.0);
+               printf("%.6f:", s->call_latency.max);
+
+               printf("%d:", s->lockwait_latency.num);
+               printf("%.6f:", s->lockwait_latency.min);
+               printf("%.6f:", s->lockwait_latency.num?s->lockwait_latency.total/s->lockwait_latency.num:0.0);
+               printf("%.6f:", s->lockwait_latency.max);
+
+               printf("%d:", s->childwrite_latency.num);
+               printf("%.6f:", s->childwrite_latency.min);
+               printf("%.6f:", s->childwrite_latency.num?s->childwrite_latency.total/s->childwrite_latency.num:0.0);
+               printf("%.6f:", s->childwrite_latency.max);
+               printf("\n");
+       } else {
+               printf("CTDB version %u\n", CTDB_VERSION);
+               printf("Current time of statistics  :                %s", ctime(&s->statistics_current_time.tv_sec));
+               printf("Statistics collected since  : (%03d %02d:%02d:%02d) %s", days, hours, minutes, seconds, ctime(&s->statistics_start_time.tv_sec));
+
+               for (i=0;i<ARRAY_SIZE(fields);i++) {
+                       if (strchr(fields[i].name, '.')) {
+                               preflen = strcspn(fields[i].name, ".")+1;
+                               if (!prefix || strncmp(prefix, fields[i].name, preflen) != 0) {
+                                       prefix = fields[i].name;
+                                       printf(" %*.*s\n", preflen-1, preflen-1, fields[i].name);
+                               }
+                       } else {
+                               preflen = 0;
+                       }
+                       printf(" %*s%-22s%*s%10u\n", 
+                              preflen?4:0, "",
+                              fields[i].name+preflen, 
+                              preflen?0:4, "",
+                              *(uint32_t *)(fields[i].offset+(uint8_t *)s));
+               }
+               printf(" %-30s     %.6f/%.6f/%.6f sec out of %d\n", "reclock_ctdbd       MIN/AVG/MAX", s->reclock.ctdbd.min, s->reclock.ctdbd.num?s->reclock.ctdbd.total/s->reclock.ctdbd.num:0.0, s->reclock.ctdbd.max, s->reclock.ctdbd.num);
 
-       printf(" %-30s     %.6f sec\n", "max_call_latency", s->max_call_latency);
-       printf(" %-30s     %.6f sec\n", "max_lockwait_latency", s->max_lockwait_latency);
-       printf(" %-30s     %.6f sec\n", "max_childwrite_latency", s->max_childwrite_latency);
-       printf(" %-30s     %.6f sec\n", "max_childwrite_latency", s->max_childwrite_latency);
+               printf(" %-30s     %.6f/%.6f/%.6f sec out of %d\n", "reclock_recd       MIN/AVG/MAX", s->reclock.recd.min, s->reclock.recd.num?s->reclock.recd.total/s->reclock.recd.num:0.0, s->reclock.recd.max, s->reclock.recd.num);
+
+               printf(" %-30s     %.6f/%.6f/%.6f sec out of %d\n", "call_latency       MIN/AVG/MAX", s->call_latency.min, s->call_latency.num?s->call_latency.total/s->call_latency.num:0.0, s->call_latency.max, s->call_latency.num);
+               printf(" %-30s     %.6f/%.6f/%.6f sec out of %d\n", "lockwait_latency   MIN/AVG/MAX", s->lockwait_latency.min, s->lockwait_latency.num?s->lockwait_latency.total/s->lockwait_latency.num:0.0, s->lockwait_latency.max, s->lockwait_latency.num);
+               printf(" %-30s     %.6f/%.6f/%.6f sec out of %d\n", "childwrite_latency MIN/AVG/MAX", s->childwrite_latency.min, s->childwrite_latency.num?s->childwrite_latency.total/s->childwrite_latency.num:0.0, s->childwrite_latency.max, s->childwrite_latency.num);
+       }
 
        talloc_free(tmp_ctx);
 }
@@ -274,14 +341,14 @@ static int control_statistics_all(struct ctdb_context *ctdb)
                }
                statistics.max_hop_count = 
                        MAX(statistics.max_hop_count, s1.max_hop_count);
-               statistics.max_call_latency = 
-                       MAX(statistics.max_call_latency, s1.max_call_latency);
-               statistics.max_lockwait_latency = 
-                       MAX(statistics.max_lockwait_latency, s1.max_lockwait_latency);
+               statistics.call_latency.max = 
+                       MAX(statistics.call_latency.max, s1.call_latency.max);
+               statistics.lockwait_latency.max = 
+                       MAX(statistics.lockwait_latency.max, s1.lockwait_latency.max);
        }
        talloc_free(nodes);
        printf("Gathered statistics for %u nodes\n", num_nodes);
-       show_statistics(&statistics);
+       show_statistics(&statistics, 1);
        return 0;
 }
 
@@ -302,7 +369,7 @@ static int control_statistics(struct ctdb_context *ctdb, int argc, const char **
                DEBUG(DEBUG_ERR, ("Unable to get statistics from node %u\n", options.pnn));
                return ret;
        }
-       show_statistics(&statistics);
+       show_statistics(&statistics, 1);
        return 0;
 }
 
@@ -323,6 +390,37 @@ static int control_statistics_reset(struct ctdb_context *ctdb, int argc, const c
 }
 
 
+/*
+  display remote ctdb rolling statistics
+ */
+static int control_stats(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+       int ret;
+       struct ctdb_statistics_wire *stats;
+       int i, num_records = -1;
+
+       if (argc ==1) {
+               num_records = atoi(argv[0]) - 1;
+       }
+
+       ret = ctdb_ctrl_getstathistory(ctdb, TIMELIMIT(), options.pnn, ctdb, &stats);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, ("Unable to get rolling statistics from node %u\n", options.pnn));
+               return ret;
+       }
+       for (i=0;i<stats->num;i++) {
+               if (stats->stats[i].statistics_start_time.tv_sec == 0) {
+                       continue;
+               }
+               show_statistics(&stats->stats[i], i==0);
+               if (i == num_records) {
+                       break;
+               }
+       }
+       return 0;
+}
+
+
 /*
   display uptime of remote node
  */
@@ -524,40 +622,19 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
        }
 
        if(options.machinereadable){
-               printf(":Node:IP:Disconnected:Banned:Disabled:Unhealthy:Stopped:Inactive:PartiallyOnline:\n");
+               printf(":Node:IP:Disconnected:Banned:Disabled:Unhealthy:Stopped:Inactive:\n");
                for(i=0;i<nodemap->num;i++){
-                       int partially_online = 0;
-                       int j;
-
                        if (nodemap->nodes[i].flags & NODE_FLAGS_DELETED) {
                                continue;
                        }
-                       if (nodemap->nodes[i].flags == 0) {
-                               struct ctdb_control_get_ifaces *ifaces;
-
-                               ret = ctdb_ctrl_get_ifaces(ctdb, TIMELIMIT(),
-                                                          nodemap->nodes[i].pnn,
-                                                          ctdb, &ifaces);
-                               if (ret == 0) {
-                                       for (j=0; j < ifaces->num; j++) {
-                                               if (ifaces->ifaces[j].link_state != 0) {
-                                                       continue;
-                                               }
-                                               partially_online = 1;
-                                               break;
-                                       }
-                                       talloc_free(ifaces);
-                               }
-                       }
-                       printf(":%d:%s:%d:%d:%d:%d:%d:%d:%d:\n", nodemap->nodes[i].pnn,
+                       printf(":%d:%s:%d:%d:%d:%d:%d:%d:\n", nodemap->nodes[i].pnn,
                                ctdb_addr_to_str(&nodemap->nodes[i].addr),
                               !!(nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED),
                               !!(nodemap->nodes[i].flags&NODE_FLAGS_BANNED),
                               !!(nodemap->nodes[i].flags&NODE_FLAGS_PERMANENTLY_DISABLED),
                               !!(nodemap->nodes[i].flags&NODE_FLAGS_UNHEALTHY),
                               !!(nodemap->nodes[i].flags&NODE_FLAGS_STOPPED),
-                              !!(nodemap->nodes[i].flags&NODE_FLAGS_INACTIVE),
-                              partially_online);
+                              !!(nodemap->nodes[i].flags&NODE_FLAGS_INACTIVE));
                }
                return 0;
        }
@@ -582,23 +659,6 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
                if (nodemap->nodes[i].flags & NODE_FLAGS_DELETED) {
                        continue;
                }
-               if (nodemap->nodes[i].flags == 0) {
-                       struct ctdb_control_get_ifaces *ifaces;
-
-                       ret = ctdb_ctrl_get_ifaces(ctdb, TIMELIMIT(),
-                                                  nodemap->nodes[i].pnn,
-                                                  ctdb, &ifaces);
-                       if (ret == 0) {
-                               for (j=0; j < ifaces->num; j++) {
-                                       if (ifaces->ifaces[j].link_state != 0) {
-                                               continue;
-                                       }
-                                       flags_str = talloc_strdup(ctdb, "PARTIALLYONLINE");
-                                       break;
-                               }
-                               talloc_free(ifaces);
-                       }
-               }
                for (j=0;j<ARRAY_SIZE(flag_names);j++) {
                        if (nodemap->nodes[i].flags & flag_names[j].flag) {
                                if (flags_str == NULL) {
@@ -665,6 +725,7 @@ struct natgw_node {
 static int control_natgwlist(struct ctdb_context *ctdb, int argc, const char **argv)
 {
        int i, ret;
+       uint32_t capabilities;
        const char *natgw_list;
        int nlines;
        char **lines;
@@ -743,6 +804,14 @@ static int control_natgwlist(struct ctdb_context *ctdb, int argc, const char **a
         */
        for(i=0;i<nodemap->num;i++){
                if (!(nodemap->nodes[i].flags & (NODE_FLAGS_DISCONNECTED|NODE_FLAGS_STOPPED|NODE_FLAGS_DELETED|NODE_FLAGS_BANNED|NODE_FLAGS_UNHEALTHY))) {
+                       ret = ctdb_ctrl_getcapabilities(ctdb, TIMELIMIT(), nodemap->nodes[i].pnn, &capabilities);
+                       if (ret != 0) {
+                               DEBUG(DEBUG_ERR, ("Unable to get capabilities from node %u\n", nodemap->nodes[i].pnn));
+                               return ret;
+                       }
+                       if (!(capabilities&CTDB_CAP_NATGW)) {
+                               continue;
+                       }
                        printf("%d %s\n", nodemap->nodes[i].pnn,ctdb_addr_to_str(&nodemap->nodes[i].addr));
                        break;
                }
@@ -751,6 +820,14 @@ static int control_natgwlist(struct ctdb_context *ctdb, int argc, const char **a
        if (i == nodemap->num) {
                for(i=0;i<nodemap->num;i++){
                        if (!(nodemap->nodes[i].flags & (NODE_FLAGS_DISCONNECTED|NODE_FLAGS_STOPPED|NODE_FLAGS_DELETED))) {
+                               ret = ctdb_ctrl_getcapabilities(ctdb, TIMELIMIT(), nodemap->nodes[i].pnn, &capabilities);
+                               if (ret != 0) {
+                                       DEBUG(DEBUG_ERR, ("Unable to get capabilities from node %u\n", nodemap->nodes[i].pnn));
+                                       return ret;
+                               }
+                               if (!(capabilities&CTDB_CAP_NATGW)) {
+                                       continue;
+                               }
                                printf("%d %s\n", nodemap->nodes[i].pnn,ctdb_addr_to_str(&nodemap->nodes[i].addr));
                                break;
                        }
@@ -760,6 +837,14 @@ static int control_natgwlist(struct ctdb_context *ctdb, int argc, const char **a
        if (i == nodemap->num) {
                for(i=0;i<nodemap->num;i++){
                        if (!(nodemap->nodes[i].flags & (NODE_FLAGS_DISCONNECTED|NODE_FLAGS_DELETED))) {
+                               ret = ctdb_ctrl_getcapabilities(ctdb, TIMELIMIT(), nodemap->nodes[i].pnn, &capabilities);
+                               if (ret != 0) {
+                                       DEBUG(DEBUG_ERR, ("Unable to get capabilities from node %u\n", nodemap->nodes[i].pnn));
+                                       return ret;
+                               }
+                               if (!(capabilities&CTDB_CAP_NATGW)) {
+                                       continue;
+                               }
                                printf("%d %s\n", nodemap->nodes[i].pnn, ctdb_addr_to_str(&nodemap->nodes[i].addr));
                                break;
                        }
@@ -1205,6 +1290,7 @@ static int move_ip(struct ctdb_context *ctdb, ctdb_sock_addr *addr, uint32_t pnn
 static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv)
 {
        uint32_t pnn;
+       int ret, retries = 0;
        ctdb_sock_addr addr;
 
        if (argc < 2) {
@@ -1223,8 +1309,16 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv
                return -1;
        }
 
-       if (move_ip(ctdb, &addr, pnn) != 0) {
-               DEBUG(DEBUG_ERR,("Failed to move ip to node %d\n", pnn));
+       do {
+               ret = move_ip(ctdb, &addr, pnn);
+               if (ret != 0) {
+                       DEBUG(DEBUG_ERR,("Failed to move ip to node %d. Wait 3 second and try again.\n", pnn));
+                       sleep(3);
+                       retries++;
+               }
+       } while (retries < 5 && ret != 0);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR,("Failed to move ip to node %d. Giving up.\n", pnn));
                return -1;
        }
 
@@ -1388,14 +1482,150 @@ find_other_host_for_public_ip(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
        return -1;
 }
 
+static uint32_t ipreallocate_finished;
+
+/*
+  handler for receiving the response to ipreallocate
+*/
+static void ip_reallocate_handler(struct ctdb_context *ctdb, uint64_t srvid, 
+                            TDB_DATA data, void *private_data)
+{
+       ipreallocate_finished = 1;
+}
+
+static void ctdb_every_second(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
+{
+       struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
+
+       event_add_timed(ctdb->ev, ctdb, 
+                               timeval_current_ofs(1, 0),
+                               ctdb_every_second, ctdb);
+}
+
+/*
+  ask the recovery daemon on the recovery master to perform a ip reallocation
+ */
+static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+       int i, ret;
+       TDB_DATA data;
+       struct takeover_run_reply rd;
+       uint32_t recmaster;
+       struct ctdb_node_map *nodemap=NULL;
+       int retries=0;
+       struct timeval tv = timeval_current();
+
+       /* we need some events to trigger so we can timeout and restart
+          the loop
+       */
+       event_add_timed(ctdb->ev, ctdb, 
+                               timeval_current_ofs(1, 0),
+                               ctdb_every_second, ctdb);
+
+       rd.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
+       if (rd.pnn == -1) {
+               DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
+               return -1;
+       }
+       rd.srvid = getpid();
+
+       /* register a message port for receiveing the reply so that we
+          can receive the reply
+       */
+       ctdb_client_set_message_handler(ctdb, rd.srvid, ip_reallocate_handler, NULL);
+
+       data.dptr = (uint8_t *)&rd;
+       data.dsize = sizeof(rd);
+
+again:
+       /* check that there are valid nodes available */
+       if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap) != 0) {
+               DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
+               return -1;
+       }
+       for (i=0; i<nodemap->num;i++) {
+               if ((nodemap->nodes[i].flags & (NODE_FLAGS_DELETED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)) == 0) {
+                       break;
+               }
+       }
+       if (i==nodemap->num) {
+               DEBUG(DEBUG_ERR,("No recmaster available, no need to wait for cluster convergence\n"));
+               return 0;
+       }
+
+
+       ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), options.pnn, &recmaster);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, ("Unable to get recmaster from node %u\n", options.pnn));
+               return ret;
+       }
+
+       /* verify the node exists */
+       if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), recmaster, ctdb, &nodemap) != 0) {
+               DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
+               return -1;
+       }
+
+
+       /* check tha there are nodes available that can act as a recmaster */
+       for (i=0; i<nodemap->num; i++) {
+               if (nodemap->nodes[i].flags & (NODE_FLAGS_DELETED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)) {
+                       continue;
+               }
+               break;
+       }
+       if (i == nodemap->num) {
+               DEBUG(DEBUG_ERR,("No possible nodes to host addresses.\n"));
+               return 0;
+       }
+
+       /* verify the recovery master is not STOPPED, nor BANNED */
+       if (nodemap->nodes[recmaster].flags & (NODE_FLAGS_DELETED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)) {
+               DEBUG(DEBUG_ERR,("No suitable recmaster found. Try again\n"));
+               retries++;
+               sleep(1);
+               goto again;
+       } 
+       
+       /* verify the recovery master is not STOPPED, nor BANNED */
+       if (nodemap->nodes[recmaster].flags & (NODE_FLAGS_DELETED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)) {
+               DEBUG(DEBUG_ERR,("No suitable recmaster found. Try again\n"));
+               retries++;
+               sleep(1);
+               goto again;
+       } 
+
+       ipreallocate_finished = 0;
+       ret = ctdb_client_send_message(ctdb, recmaster, CTDB_SRVID_TAKEOVER_RUN, data);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR,("Failed to send ip takeover run request message to %u\n", options.pnn));
+               return -1;
+       }
+
+       tv = timeval_current();
+       /* this loop will terminate when we have received the reply */
+       while (timeval_elapsed(&tv) < 5.0 && ipreallocate_finished == 0) {
+               event_loop_once(ctdb->ev);
+       }
+       if (ipreallocate_finished == 1) {
+               return 0;
+       }
+
+       retries++;
+       sleep(1);
+       goto again;
+
+       return 0;
+}
+
+
 /*
   add a public ip address to a node
  */
 static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
 {
        int i, ret;
-       int len;
-       uint32_t pnn;
+       int len, retries = 0;
        unsigned mask;
        ctdb_sock_addr addr;
        struct ctdb_control_ip_iface *pub;
@@ -1414,23 +1644,28 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
                return -1;
        }
 
-       ret = control_get_all_public_ips(ctdb, tmp_ctx, &ips);
+       /* read the public ip list from the node */
+       ret = ctdb_ctrl_get_public_ips(ctdb, TIMELIMIT(), options.pnn, tmp_ctx, &ips);
        if (ret != 0) {
-               DEBUG(DEBUG_ERR, ("Unable to get public ip list from cluster\n"));
+               DEBUG(DEBUG_ERR, ("Unable to get public ip list from node %u\n", options.pnn));
                talloc_free(tmp_ctx);
-               return ret;
+               return -1;
        }
-
-
-       /* check if some other node is already serving this ip, if not,
-        * we will claim it
-        */
        for (i=0;i<ips->num;i++) {
                if (ctdb_same_ip(&addr, &ips->ips[i].addr)) {
-                       break;
+                       DEBUG(DEBUG_ERR,("Can not add ip to node. Node already hosts this ip\n"));
+                       return 0;
                }
        }
 
+
+
+       /* Dont timeout. This command waits for an ip reallocation
+          which sometimes can take wuite a while if there has
+          been a recent recovery
+       */
+       alarm(0);
+
        len = offsetof(struct ctdb_control_ip_iface, iface) + strlen(argv[1]) + 1;
        pub = talloc_size(tmp_ctx, len); 
        CTDB_NO_MEMORY(ctdb, pub);
@@ -1440,23 +1675,32 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
        pub->len   = strlen(argv[1])+1;
        memcpy(&pub->iface[0], argv[1], strlen(argv[1])+1);
 
-       ret = ctdb_ctrl_add_public_ip(ctdb, TIMELIMIT(), options.pnn, pub);
+       do {
+               ret = ctdb_ctrl_add_public_ip(ctdb, TIMELIMIT(), options.pnn, pub);
+               if (ret != 0) {
+                       DEBUG(DEBUG_ERR, ("Unable to add public ip to node %u. Wait 3 seconds and try again.\n", options.pnn));
+                       sleep(3);
+                       retries++;
+               }
+       } while (retries < 5 && ret != 0);
        if (ret != 0) {
-               DEBUG(DEBUG_ERR, ("Unable to add public ip to node %u\n", options.pnn));
+               DEBUG(DEBUG_ERR, ("Unable to add public ip to node %u. Giving up.\n", options.pnn));
                talloc_free(tmp_ctx);
                return ret;
        }
 
-       if (i == ips->num) {
-               /* no one has this ip so we claim it */
-               pnn  = options.pnn;
-       } else {
-               pnn  = ips->ips[i].pnn;
-       }
-
-       if (move_ip(ctdb, &addr, pnn) != 0) {
-               DEBUG(DEBUG_ERR,("Failed to move ip to node %d\n", pnn));
-               return -1;
+       do {
+               ret = control_ipreallocate(ctdb, argc, argv);
+               if (ret != 0) {
+                       DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u. Wait 3 seconds and try again.\n", options.pnn));
+                       sleep(3);
+                       retries++;
+               }
+       } while (retries < 5 && ret != 0);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u. Giving up.\n", options.pnn));
+               talloc_free(tmp_ctx);
+               return ret;
        }
 
        talloc_free(tmp_ctx);
@@ -1539,6 +1783,7 @@ static int control_delip_all(struct ctdb_context *ctdb, int argc, const char **a
 static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv)
 {
        int i, ret;
+       int retries = 0;
        ctdb_sock_addr addr;
        struct ctdb_control_ip_iface pub;
        TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
@@ -1585,8 +1830,16 @@ static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv)
        if (ips->ips[i].pnn == options.pnn) {
                ret = find_other_host_for_public_ip(ctdb, &addr);
                if (ret != -1) {
-                       if (move_ip(ctdb, &addr, ret) != 0) {
-                               DEBUG(DEBUG_ERR,("Failed to move ip to node %d\n", ret));
+                       do {
+                               ret = move_ip(ctdb, &addr, ret);
+                               if (ret != 0) {
+                                       DEBUG(DEBUG_ERR,("Failed to move ip to node %d. Wait 3 seconds and try again.\n", options.pnn));
+                                       sleep(3);
+                                       retries++;
+                               }
+                       } while (retries < 5 && ret != 0);
+                       if (ret != 0) {
+                               DEBUG(DEBUG_ERR,("Failed to move ip to node %d. Giving up.\n", options.pnn));
                                return -1;
                        }
                }
@@ -2078,145 +2331,6 @@ static int control_getpid(struct ctdb_context *ctdb, int argc, const char **argv
        return 0;
 }
 
-static uint32_t ipreallocate_finished;
-
-/*
-  handler for receiving the response to ipreallocate
-*/
-static void ip_reallocate_handler(struct ctdb_context *ctdb, uint64_t srvid, 
-                            TDB_DATA data, void *private_data)
-{
-       ipreallocate_finished = 1;
-}
-
-static void ctdb_every_second(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
-{
-       struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
-
-       event_add_timed(ctdb->ev, ctdb, 
-                               timeval_current_ofs(1, 0),
-                               ctdb_every_second, ctdb);
-}
-
-/*
-  ask the recovery daemon on the recovery master to perform a ip reallocation
- */
-static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char **argv)
-{
-       int i, ret;
-       TDB_DATA data;
-       struct takeover_run_reply rd;
-       uint32_t recmaster;
-       struct ctdb_node_map *nodemap=NULL;
-       int retries=0;
-       struct timeval tv = timeval_current();
-
-       /* we need some events to trigger so we can timeout and restart
-          the loop
-       */
-       event_add_timed(ctdb->ev, ctdb, 
-                               timeval_current_ofs(1, 0),
-                               ctdb_every_second, ctdb);
-
-       rd.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
-       if (rd.pnn == -1) {
-               DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
-               return -1;
-       }
-       rd.srvid = getpid();
-
-       /* register a message port for receiveing the reply so that we
-          can receive the reply
-       */
-       ctdb_client_set_message_handler(ctdb, rd.srvid, ip_reallocate_handler, NULL);
-
-       data.dptr = (uint8_t *)&rd;
-       data.dsize = sizeof(rd);
-
-again:
-       /* check that there are valid nodes available */
-       if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap) != 0) {
-               DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-               return -1;
-       }
-       for (i=0; i<nodemap->num;i++) {
-               if ((nodemap->nodes[i].flags & (NODE_FLAGS_DELETED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)) == 0) {
-                       break;
-               }
-       }
-       if (i==nodemap->num) {
-               DEBUG(DEBUG_ERR,("No recmaster available, no need to wait for cluster convergence\n"));
-               return 0;
-       }
-
-
-       ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), options.pnn, &recmaster);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, ("Unable to get recmaster from node %u\n", options.pnn));
-               return ret;
-       }
-
-       /* verify the node exists */
-       if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), recmaster, ctdb, &nodemap) != 0) {
-               DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-               return -1;
-       }
-
-
-       /* check tha there are nodes available that can act as a recmaster */
-       for (i=0; i<nodemap->num; i++) {
-               if (nodemap->nodes[i].flags & (NODE_FLAGS_DELETED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)) {
-                       continue;
-               }
-               break;
-       }
-       if (i == nodemap->num) {
-               DEBUG(DEBUG_ERR,("No possible nodes to host addresses.\n"));
-               return 0;
-       }
-
-       /* verify the recovery master is not STOPPED, nor BANNED */
-       if (nodemap->nodes[recmaster].flags & (NODE_FLAGS_DELETED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)) {
-               DEBUG(DEBUG_ERR,("No suitable recmaster found. Try again\n"));
-               retries++;
-               sleep(1);
-               goto again;
-       } 
-
-       
-       /* verify the recovery master is not STOPPED, nor BANNED */
-       if (nodemap->nodes[recmaster].flags & (NODE_FLAGS_DELETED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)) {
-               DEBUG(DEBUG_ERR,("No suitable recmaster found. Try again\n"));
-               retries++;
-               sleep(1);
-               goto again;
-       } 
-
-       ipreallocate_finished = 0;
-       ret = ctdb_client_send_message(ctdb, recmaster, CTDB_SRVID_TAKEOVER_RUN, data);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR,("Failed to send ip takeover run request message to %u\n", options.pnn));
-               return -1;
-       }
-
-       tv = timeval_current();
-       /* this loop will terminate when we have received the reply */
-       while (timeval_elapsed(&tv) < 3.0) {
-               event_loop_once(ctdb->ev);
-       }
-       if (ipreallocate_finished == 1) {
-               return 0;
-       }
-
-       DEBUG(DEBUG_ERR,("Timed out waiting for recmaster ipreallocate. Trying again\n"));
-       retries++;
-       sleep(1);
-       goto again;
-
-       return 0;
-}
-
-
 /*
   disable a remote node
  */
@@ -2855,6 +2969,107 @@ static int control_catdb(struct ctdb_context *ctdb, int argc, const char **argv)
        return 0;
 }
 
+/*
+  display the content of a database key
+ */
+static int control_readkey(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+       const char *db_name;
+       struct ctdb_db_context *ctdb_db;
+       struct ctdb_record_handle *h;
+       TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+       TDB_DATA key, data;
+
+       if (argc < 2) {
+               usage();
+       }
+
+       db_name = argv[0];
+
+
+       if (db_exists(ctdb, db_name)) {
+               DEBUG(DEBUG_ERR,("Database '%s' does not exist\n", db_name));
+               return -1;
+       }
+
+       ctdb_db = ctdb_attach(ctdb, db_name, false, 0);
+
+       if (ctdb_db == NULL) {
+               DEBUG(DEBUG_ERR,("Unable to attach to database '%s'\n", db_name));
+               return -1;
+       }
+
+       key.dptr  = discard_const(argv[1]);
+       key.dsize = strlen((char *)key.dptr);
+       h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
+       if (h == NULL) {
+               printf("Failed to fetch record '%s' on node %d\n", 
+                       (const char *)key.dptr, ctdb_get_pnn(ctdb));
+               talloc_free(tmp_ctx);
+               exit(10);
+       }
+
+       printf("Data: size:%d ptr:[%s]\n", (int)data.dsize, data.dptr);
+
+       talloc_free(ctdb_db);
+       talloc_free(tmp_ctx);
+       return 0;
+}
+
+/*
+  display the content of a database key
+ */
+static int control_writekey(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+       const char *db_name;
+       struct ctdb_db_context *ctdb_db;
+       struct ctdb_record_handle *h;
+       TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+       TDB_DATA key, data;
+
+       if (argc < 3) {
+               usage();
+       }
+
+       db_name = argv[0];
+
+
+       if (db_exists(ctdb, db_name)) {
+               DEBUG(DEBUG_ERR,("Database '%s' does not exist\n", db_name));
+               return -1;
+       }
+
+       ctdb_db = ctdb_attach(ctdb, db_name, false, 0);
+
+       if (ctdb_db == NULL) {
+               DEBUG(DEBUG_ERR,("Unable to attach to database '%s'\n", db_name));
+               return -1;
+       }
+
+       key.dptr  = discard_const(argv[1]);
+       key.dsize = strlen((char *)key.dptr);
+       h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
+       if (h == NULL) {
+               printf("Failed to fetch record '%s' on node %d\n", 
+                       (const char *)key.dptr, ctdb_get_pnn(ctdb));
+               talloc_free(tmp_ctx);
+               exit(10);
+       }
+
+       data.dptr  = discard_const(argv[2]);
+       data.dsize = strlen((char *)data.dptr);
+       if (ctdb_record_store(h, data) != 0) {
+               printf("Failed to store record\n");
+       }
+
+       talloc_free(h);
+       talloc_free(ctdb_db);
+       talloc_free(tmp_ctx);
+       return 0;
+}
 
 /*
   fetch a record from a persistent database
@@ -4374,50 +4589,6 @@ static int control_wipedb(struct ctdb_context *ctdb, int argc,
        return 0;
 }
 
-/*
- * set flags of a node in the nodemap
- */
-static int control_setflags(struct ctdb_context *ctdb, int argc, const char **argv)
-{
-       int ret;
-       int32_t status;
-       int node;
-       int flags;
-       TDB_DATA data;
-       struct ctdb_node_flag_change c;
-
-       if (argc != 2) {
-               usage();
-               return -1;
-       }
-
-       if (sscanf(argv[0], "%d", &node) != 1) {
-               DEBUG(DEBUG_ERR, ("Badly formed node\n"));
-               usage();
-               return -1;
-       }
-       if (sscanf(argv[1], "0x%x", &flags) != 1) {
-               DEBUG(DEBUG_ERR, ("Badly formed flags\n"));
-               usage();
-               return -1;
-       }
-
-       c.pnn       = node;
-       c.old_flags = 0;
-       c.new_flags = flags;
-
-       data.dsize = sizeof(c);
-       data.dptr = (unsigned char *)&c;
-
-       ret = ctdb_control(ctdb, options.pnn, 0, CTDB_CONTROL_MODIFY_FLAGS, 0, 
-                          data, NULL, NULL, &status, NULL, NULL);
-       if (ret != 0 || status != 0) {
-               DEBUG(DEBUG_ERR,("Failed to modify flags\n"));
-               return -1;
-       }
-       return 0;
-}
-
 /*
   dump memory usage
  */
@@ -4554,60 +4725,35 @@ static int control_msglisten(struct ctdb_context *ctdb, int argc, const char **a
 
 /*
   list all nodes in the cluster
-  if the daemon is running, we read the data from the daemon.
-  if the daemon is not running we parse the nodes file directly
+  we parse the nodes file directly
  */
 static int control_listnodes(struct ctdb_context *ctdb, int argc, const char **argv)
 {
-       int i, ret;
-       struct ctdb_node_map *nodemap=NULL;
+       TALLOC_CTX *mem_ctx = talloc_new(NULL);
+       struct pnn_node *pnn_nodes;
+       struct pnn_node *pnn_node;
 
-       if (ctdb != NULL) {
-               ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap);
-               if (ret != 0) {
-                       DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn));
-                       return ret;
-               }
+       pnn_nodes = read_nodes_file(mem_ctx);
+       if (pnn_nodes == NULL) {
+               DEBUG(DEBUG_ERR,("Failed to read nodes file\n"));
+               talloc_free(mem_ctx);
+               return -1;
+       }
 
-               for(i=0;i<nodemap->num;i++){
-                       if (nodemap->nodes[i].flags & NODE_FLAGS_DELETED) {
-                               continue;
-                       }
-                       if (options.machinereadable){
-                               printf(":%d:%s:\n", nodemap->nodes[i].pnn, ctdb_addr_to_str(&nodemap->nodes[i].addr));
-                       } else {
-                               printf("%s\n", ctdb_addr_to_str(&nodemap->nodes[i].addr));
-                       }
-               }
-       } else {
-               TALLOC_CTX *mem_ctx = talloc_new(NULL);
-               struct pnn_node *pnn_nodes;
-               struct pnn_node *pnn_node;
-       
-               pnn_nodes = read_nodes_file(mem_ctx);
-               if (pnn_nodes == NULL) {
-                       DEBUG(DEBUG_ERR,("Failed to read nodes file\n"));
+       for(pnn_node=pnn_nodes;pnn_node;pnn_node=pnn_node->next) {
+               ctdb_sock_addr addr;
+               if (parse_ip(pnn_node->addr, NULL, 63999, &addr) == 0) {
+                       DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s' in nodes file\n", pnn_node->addr));
                        talloc_free(mem_ctx);
                        return -1;
                }
-
-               for(pnn_node=pnn_nodes;pnn_node;pnn_node=pnn_node->next) {
-                       ctdb_sock_addr addr;
-
-                       if (parse_ip(pnn_node->addr, NULL, 63999, &addr) == 0) {
-                               DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s' in nodes file\n", pnn_node->addr));
-                               talloc_free(mem_ctx);
-                               return -1;
-                       }
-
-                       if (options.machinereadable){
-                               printf(":%d:%s:\n", pnn_node->pnn, pnn_node->addr);
-                       } else {
-                               printf("%s\n", pnn_node->addr);
-                       }
+               if (options.machinereadable){
+                       printf(":%d:%s:\n", pnn_node->pnn, pnn_node->addr);
+               } else {
+                       printf("%s\n", pnn_node->addr);
                }
-               talloc_free(mem_ctx);
        }
+       talloc_free(mem_ctx);
 
        return 0;
 }
@@ -4679,6 +4825,7 @@ static const struct {
        { "listvars",        control_listvars,          true,   false,  "list tunable variables"},
        { "statistics",      control_statistics,        false,  false, "show statistics" },
        { "statisticsreset", control_statistics_reset,  true,   false,  "reset statistics"},
+       { "stats",           control_stats,             false,  false,  "show rolling statistics", "[number of history records]" },
        { "ip",              control_ip,                false,  false,  "show which public ip's that ctdb manages" },
        { "ipinfo",          control_ipinfo,            true,   false,  "show details about a public ip that ctdb manages", "<ip>" },
        { "ifaces",          control_ifaces,            true,   false,  "show which interfaces that ctdb manages" },
@@ -4740,7 +4887,6 @@ static const struct {
        { "dumpdbbackup",    control_dumpdbbackup,      false,  true,  "dump database backup from a file.", "<file>"},
        { "wipedb",           control_wipedb,        false,     false, "wipe the contents of a database.", "<dbname>"},
        { "recmaster",        control_recmaster,        false,  false, "show the pnn for the recovery master."},
-       { "setflags",        control_setflags,          false,  false, "set flags for a node in the nodemap.", "<node> <flags>"},
        { "scriptstatus",    control_scriptstatus,  false,      false, "show the status of the monitoring scripts (or all scripts)", "[all]"},
        { "enablescript",     control_enablescript,  false,     false, "enable an eventscript", "<script>"},
        { "disablescript",    control_disablescript,  false,    false, "disable an eventscript", "<script>"},
@@ -4759,6 +4905,8 @@ static const struct {
        { "pfetch",          control_pfetch,            false,  false,  "fetch a record from a persistent database", "<db> <key> [<file>]" },
        { "pstore",          control_pstore,            false,  false,  "write a record to a persistent database", "<db> <key> <file containing record>" },
        { "tfetch",          control_tfetch,            false,  true,  "fetch a record from a [c]tdb-file", "<tdb-file> <key> [<file>]" },
+       { "readkey",         control_readkey,           true,   false,  "read the content off a database key", "<tdb-file> <key>" },
+       { "writekey",        control_writekey,          true,   false,  "write to a database key", "<tdb-file> <key> <value>" },
 };
 
 /*
@@ -4885,12 +5033,12 @@ int main(int argc, const char *argv[])
                                close(2);
                        }
 
-                       /* initialise ctdb */
-                       ctdb = ctdb_cmdline_client(ev);
-
                        if (ctdb_commands[i].without_daemon == false) {
                                const char *socket_name;
 
+                               /* initialise ctdb */
+                               ctdb = ctdb_cmdline_client(ev);
+
                                if (ctdb == NULL) {
                                        DEBUG(DEBUG_ERR, ("Failed to init ctdb\n"));
                                        exit(1);