Remove LACOUNT and LACCESSOR and migrate the records immediately.
[sahlberg/ctdb.git] / client / ctdb_client.c
index 2e7a0936ff583f548a6b62a0e8913e6538829074..1abea127a989cf9bc21b0afed89a340a33c365fb 100644 (file)
@@ -22,7 +22,7 @@
 #include "db_wrap.h"
 #include "lib/tdb/include/tdb.h"
 #include "lib/util/dlinklist.h"
-#include "lib/events/events.h"
+#include "lib/tevent/tevent.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "system/locale.h"
@@ -30,6 +30,8 @@
 #include "../include/ctdb_private.h"
 #include "lib/util/dlinklist.h"
 
+pid_t ctdbd_pid;
+
 /*
   allocate a packet for use in client<->daemon communication
  */
@@ -70,7 +72,7 @@ struct ctdb_req_header *_ctdbd_allocate_pkt(struct ctdb_context *ctdb,
 */
 int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
                    struct ctdb_ltdb_header *header, TALLOC_CTX *mem_ctx,
-                   TDB_DATA *data, uint32_t caller)
+                   TDB_DATA *data)
 {
        struct ctdb_call_info *c;
        struct ctdb_registered_call *fn;
@@ -103,15 +105,8 @@ int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
                return -1;
        }
 
-       if (header->laccessor != caller) {
-               header->lacount = 0;
-       }
-       header->laccessor = caller;
-       header->lacount++;
-
-       /* we need to force the record to be written out if this was a remote access,
-          so that the lacount is updated */
-       if (c->new_data == NULL && header->laccessor != ctdb->pnn) {
+       /* we need to force the record to be written out if this was a remote access */
+       if (c->new_data == NULL) {
                c->new_data = &c->record_data;
        }
 
@@ -279,7 +274,7 @@ int ctdb_socket_connect(struct ctdb_context *ctdb)
 
        ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd, 
                                              CTDB_DS_ALIGNMENT, 
-                                             ctdb_client_read_cb, ctdb);
+                                             ctdb_client_read_cb, ctdb, "to-ctdbd");
        return 0;
 }
 
@@ -366,7 +361,7 @@ static struct ctdb_client_call_state *ctdb_client_call_local_send(struct ctdb_db
        *(state->call) = *call;
        state->ctdb_db = ctdb_db;
 
-       ret = ctdb_call_local(ctdb_db, state->call, header, state, data, ctdb->pnn);
+       ret = ctdb_call_local(ctdb_db, state->call, header, state, data);
 
        return state;
 }
@@ -472,8 +467,8 @@ int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
   tell the daemon what messaging srvid we will use, and register the message
   handler function in the client
 */
-int ctdb_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid, 
-                            ctdb_message_fn_t handler,
+int ctdb_client_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid, 
+                            ctdb_msg_fn_t handler,
                             void *private_data)
                                    
 {
@@ -494,7 +489,7 @@ int ctdb_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
 /*
   tell the daemon we no longer want a srvid
 */
-int ctdb_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
+int ctdb_client_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
 {
        int res;
        int32_t status;
@@ -515,7 +510,7 @@ int ctdb_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void
 /*
   send a message - from client context
  */
-int ctdb_send_message(struct ctdb_context *ctdb, uint32_t pnn,
+int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t pnn,
                      uint64_t srvid, TDB_DATA data)
 {
        struct ctdb_req_message *r;
@@ -764,7 +759,9 @@ static void control_timeout_func(struct event_context *ev, struct timed_event *t
 {
        struct ctdb_client_control_state *state = talloc_get_type(private_data, struct ctdb_client_control_state);
 
-       DEBUG(DEBUG_ERR,("control timed out. reqid:%d opcode:%d dstnode:%d\n", state->reqid, state->c->opcode, state->c->hdr.destnode));
+       DEBUG(DEBUG_ERR,(__location__ " control timed out. reqid:%u opcode:%u "
+                        "dstnode:%u\n", state->reqid, state->c->opcode,
+                        state->c->hdr.destnode));
 
        state->state = CTDB_CONTROL_TIMEOUT;
 
@@ -1493,6 +1490,44 @@ int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint3
        return 0;
 }
 
+/*
+  get the health status of a db
+ */
+int ctdb_ctrl_getdbhealth(struct ctdb_context *ctdb,
+                         struct timeval timeout,
+                         uint32_t destnode,
+                         uint32_t dbid, TALLOC_CTX *mem_ctx,
+                         const char **reason)
+{
+       int ret;
+       int32_t res;
+       TDB_DATA data;
+
+       data.dptr = (uint8_t *)&dbid;
+       data.dsize = sizeof(dbid);
+
+       ret = ctdb_control(ctdb, destnode, 0,
+                          CTDB_CONTROL_DB_GET_HEALTH, 0, data,
+                          mem_ctx, &data, &res, &timeout, NULL);
+       if (ret != 0 || res != 0) {
+               return -1;
+       }
+
+       if (data.dsize == 0) {
+               (*reason) = NULL;
+               return 0;
+       }
+
+       (*reason) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
+       if ((*reason) == NULL) {
+               return -1;
+       }
+
+       talloc_free(data.dptr);
+
+       return 0;
+}
+
 /*
   create a database
  */
@@ -1679,9 +1714,10 @@ struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name,
        }
 
        tdb_flags = persistent?TDB_DEFAULT:TDB_NOSYNC;
-       if (!ctdb->do_setsched) {
+       if (ctdb->valgrinding) {
                tdb_flags |= TDB_NOMMAP;
        }
+       tdb_flags |= TDB_DISALLOW_NESTING;
 
        ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 0, tdb_flags, O_RDWR, 0);
        if (ctdb_db->ltdb == NULL) {
@@ -1806,7 +1842,7 @@ int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *
        state.private_data = private_data;
        state.fn = fn;
 
-       ret = ctdb_set_message_handler(ctdb_db->ctdb, srvid, traverse_handler, &state);
+       ret = ctdb_client_set_message_handler(ctdb_db->ctdb, srvid, traverse_handler, &state);
        if (ret != 0) {
                DEBUG(DEBUG_ERR,("Failed to setup traverse handler\n"));
                return -1;
@@ -1823,7 +1859,7 @@ int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *
                           data, NULL, NULL, &status, NULL, NULL);
        if (ret != 0 || status != 0) {
                DEBUG(DEBUG_ERR,("ctdb_traverse_all failed\n"));
-               ctdb_remove_message_handler(ctdb_db->ctdb, srvid, &state);
+               ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
                return -1;
        }
 
@@ -1831,7 +1867,7 @@ int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *
                event_loop_once(ctdb_db->ctdb->ev);
        }
 
-       ret = ctdb_remove_message_handler(ctdb_db->ctdb, srvid, &state);
+       ret = ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
        if (ret != 0) {
                DEBUG(DEBUG_ERR,("Failed to remove ctdb_traverse handler\n"));
                return -1;
@@ -1844,15 +1880,12 @@ int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *
 /*
   called on each key during a catdb
  */
-static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
+int ctdb_dumpdb_record(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
 {
        int i;
        FILE *f = (FILE *)p;
        struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
 
-       fprintf(f, "dmaster: %u\n", h->dmaster);
-       fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
-
        fprintf(f, "key(%u) = \"", (unsigned)key.dsize);
        for (i=0;i<key.dsize;i++) {
                if (ISASCII(key.dptr[i])) {
@@ -1863,7 +1896,10 @@ static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, voi
        }
        fprintf(f, "\"\n");
 
-       fprintf(f, "data(%u) = \"", (unsigned)data.dsize);
+       fprintf(f, "dmaster: %u\n", h->dmaster);
+       fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
+
+       fprintf(f, "data(%u) = \"", (unsigned)(data.dsize - sizeof(*h)));
        for (i=sizeof(*h);i<data.dsize;i++) {
                if (ISASCII(data.dptr[i])) {
                        fprintf(f, "%c", data.dptr[i]);
@@ -1873,6 +1909,8 @@ static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, voi
        }
        fprintf(f, "\"\n");
 
+       fprintf(f, "\n");
+
        return 0;
 }
 
@@ -1881,7 +1919,7 @@ static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, voi
  */
 int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f)
 {
-       return ctdb_traverse(ctdb_db, dumpdb_fn, f);
+       return ctdb_traverse(ctdb_db, ctdb_dumpdb_record, f);
 }
 
 /*
@@ -1910,9 +1948,9 @@ int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t
   async freeze send control
  */
 struct ctdb_client_control_state *
-ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
+ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t priority)
 {
-       return ctdb_control_send(ctdb, destnode, 0
+       return ctdb_control_send(ctdb, destnode, priority
                           CTDB_CONTROL_FREEZE, 0, tdb_null, 
                           mem_ctx, &timeout, NULL);
 }
@@ -1935,30 +1973,43 @@ int ctdb_ctrl_freeze_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct
 }
 
 /*
-  freeze a node
+  freeze databases of a certain priority
  */
-int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+int ctdb_ctrl_freeze_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
 {
        TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
        struct ctdb_client_control_state *state;
        int ret;
 
-       state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode);
+       state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode, priority);
        ret = ctdb_ctrl_freeze_recv(ctdb, tmp_ctx, state);
        talloc_free(tmp_ctx);
 
        return ret;
 }
 
+/* Freeze all databases */
+int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+{
+       int i;
+
+       for (i=1; i<=NUM_DB_PRIORITIES; i++) {
+               if (ctdb_ctrl_freeze_priority(ctdb, timeout, destnode, i) != 0) {
+                       return -1;
+               }
+       }
+       return 0;
+}
+
 /*
-  thaw a node
+  thaw databases of a certain priority
  */
-int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+int ctdb_ctrl_thaw_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
 {
        int ret;
        int32_t res;
 
-       ret = ctdb_control(ctdb, destnode, 0
+       ret = ctdb_control(ctdb, destnode, priority
                           CTDB_CONTROL_THAW, 0, tdb_null, 
                           NULL, NULL, &res, &timeout, NULL);
        if (ret != 0 || res != 0) {
@@ -1969,6 +2020,12 @@ int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t d
        return 0;
 }
 
+/* thaw all databases */
+int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
+{
+       return ctdb_ctrl_thaw_priority(ctdb, timeout, destnode, 0);
+}
+
 /*
   get pnn of a node, or -1
  */
@@ -2254,16 +2311,18 @@ int ctdb_ctrl_list_tunables(struct ctdb_context *ctdb,
 }
 
 
-int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb, 
-                       struct timeval timeout, uint32_t destnode, 
-                       TALLOC_CTX *mem_ctx, struct ctdb_all_public_ips **ips)
+int ctdb_ctrl_get_public_ips_flags(struct ctdb_context *ctdb,
+                                  struct timeval timeout, uint32_t destnode,
+                                  TALLOC_CTX *mem_ctx,
+                                  uint32_t flags,
+                                  struct ctdb_all_public_ips **ips)
 {
        int ret;
        TDB_DATA outdata;
        int32_t res;
 
        ret = ctdb_control(ctdb, destnode, 0, 
-                          CTDB_CONTROL_GET_PUBLIC_IPS, 0, tdb_null, 
+                          CTDB_CONTROL_GET_PUBLIC_IPS, flags, tdb_null,
                           mem_ctx, &outdata, &res, &timeout, NULL);
        if (ret == 0 && res == -1) {
                DEBUG(DEBUG_ERR,(__location__ " ctdb_control to get public ips failed, falling back to ipv4-only version\n"));
@@ -2280,6 +2339,16 @@ int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
        return 0;
 }
 
+int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
+                            struct timeval timeout, uint32_t destnode,
+                            TALLOC_CTX *mem_ctx,
+                            struct ctdb_all_public_ips **ips)
+{
+       return ctdb_ctrl_get_public_ips_flags(ctdb, timeout,
+                                             destnode, mem_ctx,
+                                             0, ips);
+}
+
 int ctdb_ctrl_get_public_ipsv4(struct ctdb_context *ctdb, 
                        struct timeval timeout, uint32_t destnode, 
                        TALLOC_CTX *mem_ctx, struct ctdb_all_public_ips **ips)
@@ -2313,6 +2382,162 @@ int ctdb_ctrl_get_public_ipsv4(struct ctdb_context *ctdb,
        return 0;
 }
 
+int ctdb_ctrl_get_public_ip_info(struct ctdb_context *ctdb,
+                                struct timeval timeout, uint32_t destnode,
+                                TALLOC_CTX *mem_ctx,
+                                const ctdb_sock_addr *addr,
+                                struct ctdb_control_public_ip_info **_info)
+{
+       int ret;
+       TDB_DATA indata;
+       TDB_DATA outdata;
+       int32_t res;
+       struct ctdb_control_public_ip_info *info;
+       uint32_t len;
+       uint32_t i;
+
+       indata.dptr = discard_const_p(uint8_t, addr);
+       indata.dsize = sizeof(*addr);
+
+       ret = ctdb_control(ctdb, destnode, 0,
+                          CTDB_CONTROL_GET_PUBLIC_IP_INFO, 0, indata,
+                          mem_ctx, &outdata, &res, &timeout, NULL);
+       if (ret != 0 || res != 0) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
+                               "failed ret:%d res:%d\n",
+                               ret, res));
+               return -1;
+       }
+
+       len = offsetof(struct ctdb_control_public_ip_info, ifaces);
+       if (len > outdata.dsize) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
+                               "returned invalid data with size %u > %u\n",
+                               (unsigned int)outdata.dsize,
+                               (unsigned int)len));
+               dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
+               return -1;
+       }
+
+       info = (struct ctdb_control_public_ip_info *)outdata.dptr;
+       len += info->num*sizeof(struct ctdb_control_iface_info);
+
+       if (len > outdata.dsize) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
+                               "returned invalid data with size %u > %u\n",
+                               (unsigned int)outdata.dsize,
+                               (unsigned int)len));
+               dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
+               return -1;
+       }
+
+       /* make sure we null terminate the returned strings */
+       for (i=0; i < info->num; i++) {
+               info->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
+       }
+
+       *_info = (struct ctdb_control_public_ip_info *)talloc_memdup(mem_ctx,
+                                                               outdata.dptr,
+                                                               outdata.dsize);
+       talloc_free(outdata.dptr);
+       if (*_info == NULL) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
+                               "talloc_memdup size %u failed\n",
+                               (unsigned int)outdata.dsize));
+               return -1;
+       }
+
+       return 0;
+}
+
+int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb,
+                        struct timeval timeout, uint32_t destnode,
+                        TALLOC_CTX *mem_ctx,
+                        struct ctdb_control_get_ifaces **_ifaces)
+{
+       int ret;
+       TDB_DATA outdata;
+       int32_t res;
+       struct ctdb_control_get_ifaces *ifaces;
+       uint32_t len;
+       uint32_t i;
+
+       ret = ctdb_control(ctdb, destnode, 0,
+                          CTDB_CONTROL_GET_IFACES, 0, tdb_null,
+                          mem_ctx, &outdata, &res, &timeout, NULL);
+       if (ret != 0 || res != 0) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+                               "failed ret:%d res:%d\n",
+                               ret, res));
+               return -1;
+       }
+
+       len = offsetof(struct ctdb_control_get_ifaces, ifaces);
+       if (len > outdata.dsize) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+                               "returned invalid data with size %u > %u\n",
+                               (unsigned int)outdata.dsize,
+                               (unsigned int)len));
+               dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
+               return -1;
+       }
+
+       ifaces = (struct ctdb_control_get_ifaces *)outdata.dptr;
+       len += ifaces->num*sizeof(struct ctdb_control_iface_info);
+
+       if (len > outdata.dsize) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+                               "returned invalid data with size %u > %u\n",
+                               (unsigned int)outdata.dsize,
+                               (unsigned int)len));
+               dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
+               return -1;
+       }
+
+       /* make sure we null terminate the returned strings */
+       for (i=0; i < ifaces->num; i++) {
+               ifaces->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
+       }
+
+       *_ifaces = (struct ctdb_control_get_ifaces *)talloc_memdup(mem_ctx,
+                                                                 outdata.dptr,
+                                                                 outdata.dsize);
+       talloc_free(outdata.dptr);
+       if (*_ifaces == NULL) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
+                               "talloc_memdup size %u failed\n",
+                               (unsigned int)outdata.dsize));
+               return -1;
+       }
+
+       return 0;
+}
+
+int ctdb_ctrl_set_iface_link(struct ctdb_context *ctdb,
+                            struct timeval timeout, uint32_t destnode,
+                            TALLOC_CTX *mem_ctx,
+                            const struct ctdb_control_iface_info *info)
+{
+       int ret;
+       TDB_DATA indata;
+       int32_t res;
+
+       indata.dptr = discard_const_p(uint8_t, info);
+       indata.dsize = sizeof(*info);
+
+       ret = ctdb_control(ctdb, destnode, 0,
+                          CTDB_CONTROL_SET_IFACE_LINK_STATE, 0, indata,
+                          mem_ctx, NULL, &res, &timeout, NULL);
+       if (ret != 0 || res != 0) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set iface link "
+                               "failed ret:%d res:%d\n",
+                               ret, res));
+               return -1;
+       }
+
+       return 0;
+}
+
 /*
   set/clear the permanent disabled bit on a remote node
  */
@@ -2363,11 +2588,11 @@ int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32
        nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
 
        if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
-                                       nodes,
+                                       nodes, 0,
                                        timeout, false, data,
                                        NULL, NULL,
                                        NULL) != 0) {
-               DEBUG(DEBUG_ERR, (__location__ " ctdb_control to disable node failed\n"));
+               DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
 
                talloc_free(tmp_ctx);
                return -1;
@@ -2684,6 +2909,8 @@ struct ctdb_context *ctdb_init(struct event_context *ev)
        }
        ctdb->ev  = ev;
        ctdb->idr = idr_init(ctdb);
+       /* Wrap early to exercise code. */
+       ctdb->lastid = INT_MAX-200;
        CTDB_NO_MEMORY_NULL(ctdb, ctdb->idr);
 
        ret = ctdb_set_socketname(ctdb, CTDB_PATH);
@@ -2693,6 +2920,8 @@ struct ctdb_context *ctdb_init(struct event_context *ev)
                return NULL;
        }
 
+       ctdb->statistics.statistics_start_time = timeval_current();
+
        return ctdb;
 }
 
@@ -2716,6 +2945,11 @@ int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname)
        return 0;
 }
 
+const char *ctdb_get_socketname(struct ctdb_context *ctdb)
+{
+       return ctdb->daemon.name;
+}
+
 /*
   return the pnn of this node
 */
@@ -2868,6 +3102,7 @@ int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *
 int ctdb_client_async_control(struct ctdb_context *ctdb,
                                enum ctdb_controls opcode,
                                uint32_t *nodes,
+                               uint64_t srvid,
                                struct timeval timeout,
                                bool dont_log_errors,
                                TDB_DATA data,
@@ -2893,7 +3128,7 @@ int ctdb_client_async_control(struct ctdb_context *ctdb,
        for (j=0; j<num_nodes; j++) {
                uint32_t pnn = nodes[j];
 
-               state = ctdb_control_send(ctdb, pnn, 0, opcode, 
+               state = ctdb_control_send(ctdb, pnn, srvid, opcode, 
                                          0, data, async_data, &timeout, NULL);
                if (state == NULL) {
                        DEBUG(DEBUG_ERR,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
@@ -2975,6 +3210,40 @@ uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
        return nodes;
 }
 
+uint32_t *list_of_active_nodes_except_pnn(struct ctdb_context *ctdb,
+                               struct ctdb_node_map *node_map,
+                               TALLOC_CTX *mem_ctx,
+                               uint32_t pnn)
+{
+       int i, j, num_nodes;
+       uint32_t *nodes;
+
+       for (i=num_nodes=0;i<node_map->num;i++) {
+               if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+                       continue;
+               }
+               if (node_map->nodes[i].pnn == pnn) {
+                       continue;
+               }
+               num_nodes++;
+       } 
+
+       nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+       CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+       for (i=j=0;i<node_map->num;i++) {
+               if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+                       continue;
+               }
+               if (node_map->nodes[i].pnn == pnn) {
+                       continue;
+               }
+               nodes[j++] = node_map->nodes[i].pnn;
+       } 
+
+       return nodes;
+}
+
 uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
                                struct ctdb_node_map *node_map,
                                TALLOC_CTX *mem_ctx,
@@ -3085,12 +3354,42 @@ int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout,
        return ret;
 }
 
+/**
+ * check whether a transaction is active on a given db on a given node
+ */
+int32_t ctdb_ctrl_transaction_active(struct ctdb_context *ctdb,
+                                    uint32_t destnode,
+                                    uint32_t db_id)
+{
+       int32_t status;
+       int ret;
+       TDB_DATA indata;
+
+       indata.dptr = (uint8_t *)&db_id;
+       indata.dsize = sizeof(db_id);
+
+       ret = ctdb_control(ctdb, destnode, 0,
+                          CTDB_CONTROL_TRANS2_ACTIVE,
+                          0, indata, NULL, NULL, &status,
+                          NULL, NULL);
+
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, (__location__ " ctdb control for transaction_active failed\n"));
+               return -1;
+       }
+
+       return status;
+}
+
+
 struct ctdb_transaction_handle {
        struct ctdb_db_context *ctdb_db;
        bool in_replay;
-       /* we store the reads and writes done under a transaction one
-          list stores both reads and writes, the other just writes
-       */
+       /*
+        * we store the reads and writes done under a transaction:
+        * - one list stores both reads and writes (m_all),
+        * - the other just writes (m_write)
+        */
        struct ctdb_marshall_buffer *m_all;
        struct ctdb_marshall_buffer *m_write;
 };
@@ -3107,11 +3406,14 @@ static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
 {
        struct ctdb_record_handle *rh;
        TDB_DATA key;
+       TDB_DATA data;
        struct ctdb_ltdb_header header;
        TALLOC_CTX *tmp_ctx;
        const char *keyname = CTDB_TRANSACTION_LOCK_KEY;
        int ret;
        struct ctdb_db_context *ctdb_db = h->ctdb_db;
+       pid_t pid;
+       int32_t status;
 
        key.dptr = discard_const(keyname);
        key.dsize = strlen(keyname);
@@ -3126,10 +3428,42 @@ again:
 
        rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL);
        if (rh == NULL) {
-               DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n"));             
+               DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n"));
                talloc_free(tmp_ctx);
                return -1;
        }
+
+       status = ctdb_ctrl_transaction_active(ctdb_db->ctdb,
+                                             CTDB_CURRENT_NODE,
+                                             ctdb_db->db_id);
+       if (status == 1) {
+               unsigned long int usec = (1000 + random()) % 100000;
+               DEBUG(DEBUG_DEBUG, (__location__ " transaction is active "
+                                   "on db_id[0x%08x]. waiting for %lu "
+                                   "microseconds\n",
+                                   ctdb_db->db_id, usec));
+               talloc_free(tmp_ctx);
+               usleep(usec);
+               goto again;
+       }
+
+       /*
+        * store the pid in the database:
+        * it is not enough that the node is dmaster...
+        */
+       pid = getpid();
+       data.dptr = (unsigned char *)&pid;
+       data.dsize = sizeof(pid_t);
+       rh->header.rsn++;
+       rh->header.dmaster = ctdb_db->ctdb->pnn;
+       ret = ctdb_ltdb_store(ctdb_db, key, &(rh->header), data);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, (__location__ " Failed to store pid in "
+                                 "transaction record\n"));
+               talloc_free(tmp_ctx);
+               return -1;
+       }
+
        talloc_free(rh);
 
        ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
@@ -3139,8 +3473,26 @@ again:
                return -1;
        }
 
-       ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, NULL);
-       if (ret != 0 || header.dmaster != ctdb_db->ctdb->pnn) {
+       ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, &data);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR,(__location__ " Failed to re-fetch transaction "
+                                "lock record inside transaction\n"));
+               tdb_transaction_cancel(ctdb_db->ltdb->tdb);
+               talloc_free(tmp_ctx);
+               goto again;
+       }
+
+       if (header.dmaster != ctdb_db->ctdb->pnn) {
+               DEBUG(DEBUG_DEBUG,(__location__ " not dmaster any more on "
+                                  "transaction lock record\n"));
+               tdb_transaction_cancel(ctdb_db->ltdb->tdb);
+               talloc_free(tmp_ctx);
+               goto again;
+       }
+
+       if ((data.dsize != sizeof(pid_t)) || (*(pid_t *)(data.dptr) != pid)) {
+               DEBUG(DEBUG_DEBUG, (__location__ " my pid is not stored in "
+                                   "the transaction lock record\n"));
                tdb_transaction_cancel(ctdb_db->ltdb->tdb);
                talloc_free(tmp_ctx);
                goto again;
@@ -3374,6 +3726,9 @@ again:
                           &timeout, NULL);
        if (ret != 0 || status != 0) {
                tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
+               DEBUG(DEBUG_NOTICE, (__location__ " transaction commit%s failed"
+                                    ", retrying after 1 second...\n",
+                                    (retries==0)?"":"retry "));
                sleep(1);
 
                if (ret != 0) {
@@ -3393,7 +3748,7 @@ again:
                        }
                }
 
-               if (++retries == 10) {
+               if (++retries == 100) {
                        DEBUG(DEBUG_ERR,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n", 
                                         h->ctdb_db->db_id, retries, (unsigned)failure_control));
                        ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, 
@@ -3404,7 +3759,11 @@ again:
                }               
 
                if (ctdb_replay_transaction(h) != 0) {
-                       DEBUG(DEBUG_ERR,(__location__ " Failed to replay transaction\n"));
+                       DEBUG(DEBUG_ERR, (__location__ " Failed to replay "
+                                         "transaction on db 0x%08x, "
+                                         "failure control =%u\n",
+                                         h->ctdb_db->db_id,
+                                         (unsigned)failure_control));
                        ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, 
                                     failure_control, CTDB_CTRL_FLAG_NOREPLY, 
                                     tdb_null, NULL, NULL, NULL, NULL, NULL);           
@@ -3419,7 +3778,11 @@ again:
        /* do the real commit locally */
        ret = tdb_transaction_commit(h->ctdb_db->ltdb->tdb);
        if (ret != 0) {
-               DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction\n"));
+               DEBUG(DEBUG_ERR, (__location__ " Failed to commit transaction "
+                                 "on db id 0x%08x locally, "
+                                 "failure_control=%u\n",
+                                 h->ctdb_db->db_id,
+                                 (unsigned)failure_control));
                ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, 
                             failure_control, CTDB_CTRL_FLAG_NOREPLY, 
                             tdb_null, NULL, NULL, NULL, NULL, NULL);           
@@ -3457,9 +3820,15 @@ int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
  * to the daemon as a client process, this function can be used to change
  * the ctdb context from daemon into client mode
  */
-int switch_from_server_to_client(struct ctdb_context *ctdb)
+int switch_from_server_to_client(struct ctdb_context *ctdb, const char *fmt, ...)
 {
        int ret;
+       va_list ap;
+
+       /* Add extra information so we can identify this in the logs */
+       va_start(ap, fmt);
+       debug_extra = talloc_strdup_append(talloc_vasprintf(NULL, fmt, ap), ":");
+       va_end(ap);
 
        /* shutdown the transport */
        if (ctdb->methods) {
@@ -3469,15 +3838,11 @@ int switch_from_server_to_client(struct ctdb_context *ctdb)
        /* get a new event context */
        talloc_free(ctdb->ev);
        ctdb->ev = event_context_init(ctdb);
+       tevent_loop_allow_nesting(ctdb->ev);
 
        close(ctdb->daemon.sd);
        ctdb->daemon.sd = -1;
 
-       /* the client does not need to be realtime */
-       if (ctdb->do_setsched) {
-               ctdb_restore_scheduler(ctdb);
-       }
-
        /* initialise ctdb */
        ret = ctdb_socket_connect(ctdb);
        if (ret != 0) {
@@ -3489,108 +3854,35 @@ int switch_from_server_to_client(struct ctdb_context *ctdb)
 }
 
 /*
-  tell the main daemon we are starting a new monitor event script
- */
-int ctdb_ctrl_event_script_init(struct ctdb_context *ctdb)
-{
-       int ret;
-       int32_t res;
-
-       ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_INIT, 0, tdb_null, 
-                          ctdb, NULL, &res, NULL, NULL);
-       if (ret != 0 || res != 0) {
-               DEBUG(DEBUG_ERR,("Failed to send event_script_init\n"));
-               return -1;
-       }
-
-       return 0;
-}
-
-/*
-  tell the main daemon we are starting a new monitor event script
- */
-int ctdb_ctrl_event_script_finished(struct ctdb_context *ctdb)
-{
-       int ret;
-       int32_t res;
-
-       ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_FINISHED, 0, tdb_null, 
-                          ctdb, NULL, &res, NULL, NULL);
-       if (ret != 0 || res != 0) {
-               DEBUG(DEBUG_ERR,("Failed to send event_script_init\n"));
-               return -1;
-       }
-
-       return 0;
-}
-
-/*
-  tell the main daemon we are starting to run an eventscript
- */
-int ctdb_ctrl_event_script_start(struct ctdb_context *ctdb, const char *name)
-{
-       int ret;
-       int32_t res;
-       TDB_DATA data;
-
-       data.dptr = discard_const(name);
-       data.dsize = strlen(name)+1;
-
-       ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_START, 0, data, 
-                          ctdb, NULL, &res, NULL, NULL);
-       if (ret != 0 || res != 0) {
-               DEBUG(DEBUG_ERR,("Failed to send event_script_start\n"));
-               return -1;
-       }
-
-       return 0;
-}
-
-/*
-  tell the main daemon the status of the script we ran
- */
-int ctdb_ctrl_event_script_stop(struct ctdb_context *ctdb, int32_t result)
-{
-       int ret;
-       int32_t res;
-       TDB_DATA data;
-
-       data.dptr = (uint8_t *)&result;
-       data.dsize = sizeof(result);
-
-       ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_STOP, 0, data, 
-                          ctdb, NULL, &res, NULL, NULL);
-       if (ret != 0 || res != 0) {
-               DEBUG(DEBUG_ERR,("Failed to send event_script_stop\n"));
-               return -1;
-       }
-
-       return 0;
-}
-
-
-/*
-  get the status of running the monitor eventscripts
+  get the status of running the monitor eventscripts: NULL means never run.
  */
 int ctdb_ctrl_getscriptstatus(struct ctdb_context *ctdb, 
                struct timeval timeout, uint32_t destnode, 
-               TALLOC_CTX *mem_ctx,
-               struct ctdb_monitoring_wire **script_status)
+               TALLOC_CTX *mem_ctx, enum ctdb_eventscript_call type,
+               struct ctdb_scripts_wire **script_status)
 {
        int ret;
-       TDB_DATA outdata;
+       TDB_DATA outdata, indata;
        int32_t res;
+       uint32_t uinttype = type;
+
+       indata.dptr = (uint8_t *)&uinttype;
+       indata.dsize = sizeof(uinttype);
 
        ret = ctdb_control(ctdb, destnode, 0, 
-                          CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, 0, tdb_null, 
+                          CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, 0, indata,
                           mem_ctx, &outdata, &res, &timeout, NULL);
-       if (ret != 0 || res != 0 || outdata.dsize == 0) {
+       if (ret != 0 || res != 0) {
                DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getscriptstatus failed ret:%d res:%d\n", ret, res));
                return -1;
        }
 
-       *script_status = (struct ctdb_monitoring_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
-       talloc_free(outdata.dptr);
+       if (outdata.dsize == 0) {
+               *script_status = NULL;
+       } else {
+               *script_status = (struct ctdb_scripts_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
+               talloc_free(outdata.dptr);
+       }
                    
        return 0;
 }
@@ -3776,3 +4068,164 @@ int ctdb_ctrl_setrecmasterrole(struct ctdb_context *ctdb, struct timeval timeout
 
        return 0;
 }
+
+/* enable an eventscript
+ */
+int ctdb_ctrl_enablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
+{
+       int ret;
+       TDB_DATA data;
+       int32_t res;
+
+       data.dsize = strlen(script) + 1;
+       data.dptr  = discard_const(script);
+
+       ret = ctdb_control(ctdb, destnode, 0, 
+                          CTDB_CONTROL_ENABLE_SCRIPT, 0, data, 
+                          NULL, NULL, &res, &timeout, NULL);
+       if (ret != 0 || res != 0) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enablescript failed\n"));
+               return -1;
+       }
+
+       return 0;
+}
+
+/* disable an eventscript
+ */
+int ctdb_ctrl_disablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
+{
+       int ret;
+       TDB_DATA data;
+       int32_t res;
+
+       data.dsize = strlen(script) + 1;
+       data.dptr  = discard_const(script);
+
+       ret = ctdb_control(ctdb, destnode, 0, 
+                          CTDB_CONTROL_DISABLE_SCRIPT, 0, data, 
+                          NULL, NULL, &res, &timeout, NULL);
+       if (ret != 0 || res != 0) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disablescript failed\n"));
+               return -1;
+       }
+
+       return 0;
+}
+
+
+int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_ban_time *bantime)
+{
+       int ret;
+       TDB_DATA data;
+       int32_t res;
+
+       data.dsize = sizeof(*bantime);
+       data.dptr  = (uint8_t *)bantime;
+
+       ret = ctdb_control(ctdb, destnode, 0, 
+                          CTDB_CONTROL_SET_BAN_STATE, 0, data, 
+                          NULL, NULL, &res, &timeout, NULL);
+       if (ret != 0 || res != 0) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
+               return -1;
+       }
+
+       return 0;
+}
+
+
+int ctdb_ctrl_get_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_ban_time **bantime)
+{
+       int ret;
+       TDB_DATA outdata;
+       int32_t res;
+       TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+
+       ret = ctdb_control(ctdb, destnode, 0, 
+                          CTDB_CONTROL_GET_BAN_STATE, 0, tdb_null,
+                          tmp_ctx, &outdata, &res, &timeout, NULL);
+       if (ret != 0 || res != 0) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
+               talloc_free(tmp_ctx);
+               return -1;
+       }
+
+       *bantime = (struct ctdb_ban_time *)talloc_steal(mem_ctx, outdata.dptr);
+       talloc_free(tmp_ctx);
+
+       return 0;
+}
+
+
+int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio)
+{
+       int ret;
+       int32_t res;
+       TDB_DATA data;
+       TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+
+       data.dptr = (uint8_t*)db_prio;
+       data.dsize = sizeof(*db_prio);
+
+       ret = ctdb_control(ctdb, destnode, 0, 
+                          CTDB_CONTROL_SET_DB_PRIORITY, 0, data,
+                          tmp_ctx, NULL, &res, &timeout, NULL);
+       if (ret != 0 || res != 0) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
+               talloc_free(tmp_ctx);
+               return -1;
+       }
+
+       talloc_free(tmp_ctx);
+
+       return 0;
+}
+
+int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority)
+{
+       int ret;
+       int32_t res;
+       TDB_DATA data;
+       TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+
+       data.dptr = (uint8_t*)&db_id;
+       data.dsize = sizeof(db_id);
+
+       ret = ctdb_control(ctdb, destnode, 0, 
+                          CTDB_CONTROL_GET_DB_PRIORITY, 0, data,
+                          tmp_ctx, NULL, &res, &timeout, NULL);
+       if (ret != 0 || res < 0) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
+               talloc_free(tmp_ctx);
+               return -1;
+       }
+
+       if (priority) {
+               *priority = res;
+       }
+
+       talloc_free(tmp_ctx);
+
+       return 0;
+}
+
+int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats)
+{
+       int ret;
+       TDB_DATA outdata;
+       int32_t res;
+
+       ret = ctdb_control(ctdb, destnode, 0, 
+                          CTDB_CONTROL_GET_STAT_HISTORY, 0, tdb_null, 
+                          mem_ctx, &outdata, &res, &timeout, NULL);
+       if (ret != 0 || res != 0 || outdata.dsize == 0) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getstathistory failed ret:%d res:%d\n", ret, res));
+               return -1;
+       }
+
+       *stats = (struct ctdb_statistics_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
+       talloc_free(outdata.dptr);
+                   
+       return 0;
+}