persistent_callback: ignore the update-recordreturn code of remote node in recovery
[sahlberg/ctdb.git] / server / ctdb_persistent.c
index 3c51742b1c2b14e9cb2c13b6f6c34c463c1244d4..3f297fa22f8bde1664b730335099f6b1e4d2d6ee 100644 (file)
@@ -19,7 +19,7 @@
 */
 
 #include "includes.h"
-#include "lib/events/events.h"
+#include "lib/tevent/tevent.h"
 #include "system/filesys.h"
 #include "system/wait.h"
 #include "db_wrap.h"
@@ -53,6 +53,12 @@ static void ctdb_persistent_callback(struct ctdb_context *ctdb,
        struct ctdb_persistent_state *state = talloc_get_type(private_data, 
                                                              struct ctdb_persistent_state);
 
+       if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+               DEBUG(DEBUG_INFO, ("ctdb_persistent_callback: ignoring reply "
+                                  "during recovery\n"));
+               return;
+       }
+
        if (status != 0) {
                DEBUG(DEBUG_ERR,("ctdb_persistent_callback failed with status %d (%s)\n",
                         status, errormsg));
@@ -105,15 +111,10 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
        struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
        struct ctdb_db_context *ctdb_db;
 
-       if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
-               DEBUG(DEBUG_INFO,("rejecting ctdb_control_trans2_commit when recovery active\n"));
-               return -1;
-       }
-
        ctdb_db = find_ctdb_db(ctdb, m->db_id);
        if (ctdb_db == NULL) {
                DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans2_commit: "
-                                "Unknown database 0x%08x\n", m->db_id));
+                                "Unknown database db_id[0x%08x]\n", m->db_id));
                return -1;
        }
 
@@ -122,6 +123,12 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
                return -1;
        }
 
+       if (ctdb_db->unhealthy_reason) {
+               DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_trans2_commit: %s\n",
+                                ctdb_db->db_name, ctdb_db->unhealthy_reason));
+               return -1;
+       }
+
        /* handling num_persistent_updates is a bit strange - 
           there are 3 cases
             1) very old clients, which never called CTDB_CONTROL_START_PERSISTENT_UPDATE
@@ -138,8 +145,11 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
        switch (c->opcode) {
        case CTDB_CONTROL_PERSISTENT_STORE:
                if (ctdb_db->transaction_active) {
-                       DEBUG(DEBUG_ERR, (__location__ " trans2_commit client db_id[%d] transaction active - refusing persistent store\n",
-                               client->db_id));
+                       DEBUG(DEBUG_ERR, (__location__ " trans2_commit: a "
+                                         "transaction is active on database "
+                                         "db_id[0x%08x] - refusing persistent "
+                                        " store for client id[0x%08x]\n",
+                                         ctdb_db->db_id, client->client_id));
                        return -1;
                }
                if (client->num_persistent_updates > 0) {
@@ -148,33 +158,49 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
                break;
        case CTDB_CONTROL_TRANS2_COMMIT:
                if (ctdb_db->transaction_active) {
-                       DEBUG(DEBUG_ERR,(__location__ " trans2_commit: client "
-                                        "already has a transaction commit "
-                                        "active on db_id[%d]\n",
-                                        client->db_id));
+                       DEBUG(DEBUG_ERR,(__location__ " trans2_commit: there is"
+                                        " already a transaction commit "
+                                        "active on db_id[0x%08x] - forbidding "
+                                        "client_id[0x%08x] to commit\n",
+                                        ctdb_db->db_id, client->client_id));
                        return -1;
                }
                if (client->db_id != 0) {
                        DEBUG(DEBUG_ERR,(__location__ " ERROR: trans2_commit: "
-                                        "client-db_id[%d] != 0\n",
-                                        client->db_id));
+                                        "client-db_id[0x%08x] != 0 "
+                                        "(client_id[0x%08x])\n",
+                                        client->db_id, client->client_id));
                        return -1;
                }
                client->num_persistent_updates++;
                ctdb_db->transaction_active = true;
                client->db_id = m->db_id;
+               DEBUG(DEBUG_DEBUG, (__location__ " client id[0x%08x] started to"
+                                 " commit transaction on db id[0x%08x]\n",
+                                 client->client_id, client->db_id));
                break;
        case CTDB_CONTROL_TRANS2_COMMIT_RETRY:
                /* already updated from the first commit */
                if (client->db_id != m->db_id) {
                        DEBUG(DEBUG_ERR,(__location__ " ERROR: trans2_commit "
-                                        "retry: client-db_id[%d] != db_id[%d]"
-                                        "\n", client->db_id, m->db_id));
+                                        "retry: client-db_id[0x%08x] != "
+                                        "db_id[0x%08x] (client_id[0x%08x])\n",
+                                        client->db_id,
+                                        m->db_id, client->client_id));
                        return -1;
                }
+               DEBUG(DEBUG_DEBUG, (__location__ " client id[0x%08x] started "
+                                   "transaction commit retry on "
+                                   "db_id[0x%08x]\n",
+                                   client->client_id, client->db_id));
                break;
        }
 
+       if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+               DEBUG(DEBUG_INFO,("rejecting ctdb_control_trans2_commit when recovery active\n"));
+               return -1;
+       }
+
        state = talloc_zero(ctdb, struct ctdb_persistent_state);
        CTDB_NO_MEMORY(ctdb, state);
 
@@ -228,6 +254,91 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
 }
 
 
+/*
+ * Store a set of persistent records.
+ * This is used to roll out a transaction to all nodes.
+ */
+int32_t ctdb_control_trans3_commit(struct ctdb_context *ctdb,
+                                  struct ctdb_req_control *c,
+                                  TDB_DATA recdata, bool *async_reply)
+{
+       struct ctdb_client *client;
+       struct ctdb_persistent_state *state;
+       int i;
+       struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
+       struct ctdb_db_context *ctdb_db;
+
+       if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
+               DEBUG(DEBUG_INFO,("rejecting ctdb_control_trans3_commit when recovery active\n"));
+               return -1;
+       }
+
+       ctdb_db = find_ctdb_db(ctdb, m->db_id);
+       if (ctdb_db == NULL) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control_trans3_commit: "
+                                "Unknown database db_id[0x%08x]\n", m->db_id));
+               return -1;
+       }
+
+       client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
+       if (client == NULL) {
+               DEBUG(DEBUG_ERR,(__location__ " can not match persistent_store "
+                                "to a client. Returning error\n"));
+               return -1;
+       }
+
+       state = talloc_zero(ctdb, struct ctdb_persistent_state);
+       CTDB_NO_MEMORY(ctdb, state);
+
+       state->ctdb = ctdb;
+       state->c    = c;
+
+       for (i = 0; i < ctdb->vnn_map->size; i++) {
+               struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]];
+               int ret;
+
+               /* only send to active nodes */
+               if (node->flags & NODE_FLAGS_INACTIVE) {
+                       continue;
+               }
+
+               ret = ctdb_daemon_send_control(ctdb, node->pnn, 0,
+                                              CTDB_CONTROL_UPDATE_RECORD,
+                                              c->client_id, 0, recdata,
+                                              ctdb_persistent_callback,
+                                              state);
+               if (ret == -1) {
+                       DEBUG(DEBUG_ERR,("Unable to send "
+                                        "CTDB_CONTROL_UPDATE_RECORD "
+                                        "to pnn %u\n", node->pnn));
+                       talloc_free(state);
+                       return -1;
+               }
+
+               state->num_pending++;
+               state->num_sent++;
+       }
+
+       if (state->num_pending == 0) {
+               talloc_free(state);
+               return 0;
+       }
+
+       /* we need to wait for the replies */
+       *async_reply = true;
+
+       /* need to keep the control structure around */
+       talloc_steal(state, c);
+
+       /* but we won't wait forever */
+       event_add_timed(ctdb->ev, state,
+                       timeval_current_ofs(ctdb->tunable.control_timeout, 0),
+                       ctdb_persistent_store_timeout, state);
+
+       return 0;
+}
+
+
 struct ctdb_persistent_write_state {
        struct ctdb_db_context *ctdb_db;
        struct ctdb_marshall_buffer *m;
@@ -350,7 +461,7 @@ struct childwrite_handle {
 
 static int childwrite_destructor(struct childwrite_handle *h)
 {
-       h->ctdb->statistics.pending_childwrite_calls--;
+       CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
        kill(h->child, SIGKILL);
        return 0;
 }
@@ -370,8 +481,8 @@ static void childwrite_handler(struct event_context *ev, struct fd_event *fde,
        int ret;
        char c;
 
-       ctdb_latency(h->ctdb_db, "persistent", &h->ctdb->statistics.max_childwrite_latency, h->start_time);
-       h->ctdb->statistics.pending_childwrite_calls--;
+       CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "persistent", childwrite_latency, h->start_time);
+       CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
 
        /* the handle needs to go away when the context is gone - when
           the handle goes away this implicitly closes the pipe, which
@@ -403,11 +514,11 @@ struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
        int ret;
        pid_t parent = getpid();
 
-       ctdb_db->ctdb->statistics.childwrite_calls++;
-       ctdb_db->ctdb->statistics.pending_childwrite_calls++;
+       CTDB_INCREMENT_STAT(ctdb_db->ctdb, childwrite_calls);
+       CTDB_INCREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
 
        if (!(result = talloc_zero(state, struct childwrite_handle))) {
-               ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+               CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
                return NULL;
        }
 
@@ -415,17 +526,17 @@ struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
 
        if (ret != 0) {
                talloc_free(result);
-               ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+               CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
                return NULL;
        }
 
-       result->child = fork();
+       result->child = ctdb_fork(ctdb_db->ctdb);
 
        if (result->child == (pid_t)-1) {
                close(result->fd[0]);
                close(result->fd[1]);
                talloc_free(result);
-               ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+               CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
                return NULL;
        }
 
@@ -438,6 +549,7 @@ struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
                char c = 0;
 
                close(result->fd[0]);
+               debug_extra = talloc_asprintf(NULL, "childwrite-%s:", ctdb_db->db_name);
                ret = ctdb_persistent_store(state);
                if (ret != 0) {
                        DEBUG(DEBUG_ERR, (__location__ " Failed to write persistent data\n"));
@@ -458,16 +570,17 @@ struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
 
        talloc_set_destructor(result, childwrite_destructor);
 
-       DEBUG(DEBUG_NOTICE, (__location__ " Created PIPE FD:%d for ctdb_childwrite\n", result->fd[0]));
+       DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d for ctdb_childwrite\n", result->fd[0]));
 
        result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
-                                  EVENT_FD_READ|EVENT_FD_AUTOCLOSE, childwrite_handler,
+                                  EVENT_FD_READ, childwrite_handler,
                                   (void *)result);
        if (result->fde == NULL) {
                talloc_free(result);
-               ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+               CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
                return NULL;
        }
+       tevent_fd_set_auto_close(result->fde);
 
        result->start_time = timeval_current();
 
@@ -498,6 +611,12 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
                return -1;
        }
 
+       if (ctdb_db->unhealthy_reason) {
+               DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_update_record: %s\n",
+                                ctdb_db->db_name, ctdb_db->unhealthy_reason));
+               return -1;
+       }
+
        state = talloc(ctdb, struct ctdb_persistent_write_state);
        CTDB_NO_MEMORY(ctdb, state);
 
@@ -563,6 +682,10 @@ int32_t ctdb_control_trans2_finished(struct ctdb_context *ctdb,
        }
        client->num_persistent_updates--;
 
+       DEBUG(DEBUG_DEBUG, (__location__ " client id[0x%08x] finished "
+                           "transaction commit db_id[0x%08x]\n",
+                           client->client_id, ctdb_db->db_id));
+
        return 0;
 }
 
@@ -598,7 +721,9 @@ int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb,
                client->num_persistent_updates--;
        }
 
-       DEBUG(DEBUG_ERR,(__location__ " Forcing recovery\n"));
+       DEBUG(DEBUG_ERR,(__location__ " An error occurred during transaction on"
+                        " db_id[0x%08x] - forcing recovery\n",
+                        ctdb_db->db_id));
        client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
 
        return 0;
@@ -608,9 +733,11 @@ int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb,
  * Tell whether a transaction is active on this node on the give DB.
  */
 int32_t ctdb_control_trans2_active(struct ctdb_context *ctdb,
+                                  struct ctdb_req_control *c,
                                   uint32_t db_id)
 {
        struct ctdb_db_context *ctdb_db;
+       struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
 
        ctdb_db = find_ctdb_db(ctdb, db_id);
        if (!ctdb_db) {
@@ -618,6 +745,10 @@ int32_t ctdb_control_trans2_active(struct ctdb_context *ctdb,
                return -1;
        }
 
+       if (client->db_id == db_id) {
+               return 0;
+       }
+
        if (ctdb_db->transaction_active) {
                return 1;
        } else {
@@ -704,4 +835,70 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb,
        return ctdb_control_trans2_commit(ctdb, c, ctdb_marshall_finish(m), async_reply);
 }
 
+static int32_t ctdb_get_db_seqnum(struct ctdb_context *ctdb,
+                                 uint32_t db_id,
+                                 uint64_t *seqnum)
+{
+       int32_t ret;
+       struct ctdb_db_context *ctdb_db;
+       const char *keyname = CTDB_DB_SEQNUM_KEY;
+       TDB_DATA key;
+       TDB_DATA data;
+       TALLOC_CTX *mem_ctx = talloc_new(ctdb);
+
+       ctdb_db = find_ctdb_db(ctdb, db_id);
+       if (!ctdb_db) {
+               DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", db_id));
+               ret = -1;
+               goto done;
+       }
+
+       key.dptr = (uint8_t *)discard_const(keyname);
+       key.dsize = strlen(keyname) + 1;
+
+       ret = (int32_t)ctdb_ltdb_fetch(ctdb_db, key, NULL, mem_ctx, &data);
+       if (ret != 0) {
+               goto done;
+       }
+
+       if (data.dsize != sizeof(uint64_t)) {
+               *seqnum = 0;
+               goto done;
+       }
+
+       *seqnum = *(uint64_t *)data.dptr;
+
+done:
+       talloc_free(mem_ctx);
+       return ret;
+}
+
+/**
+ * Get the sequence number of a persistent database.
+ */
+int32_t ctdb_control_get_db_seqnum(struct ctdb_context *ctdb,
+                                  TDB_DATA indata,
+                                  TDB_DATA *outdata)
+{
+       uint32_t db_id;
+       int32_t ret;
+       uint64_t seqnum;
+
+       db_id = *(uint32_t *)indata.dptr;
+       ret = ctdb_get_db_seqnum(ctdb, db_id, &seqnum);
+       if (ret != 0) {
+               goto done;
+       }
 
+       outdata->dsize = sizeof(uint64_t);
+       outdata->dptr = (uint8_t *)talloc_zero(outdata, uint64_t);
+       if (outdata->dptr == NULL) {
+               ret = -1;
+               goto done;
+       }
+
+       *(outdata->dptr) = seqnum;
+
+done:
+       return ret;
+}