Dont allow client processes to attach to databases while we are still in recovery...
[sahlberg/ctdb.git] / server / ctdb_ltdb_server.c
index 79405467d99bf1e5a67eeeb6c0e87b3f67767757..8340c37fbdef9ed0bd693d0b38ce859a3d7d0c87 100644 (file)
 */
 
 #include "includes.h"
-#include "lib/events/events.h"
+#include "lib/tevent/tevent.h"
 #include "lib/tdb/include/tdb.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "system/dir.h"
+#include "system/time.h"
 #include "../include/ctdb_private.h"
 #include "db_wrap.h"
 #include "lib/util/dlinklist.h"
+#include <ctype.h>
+
+#define PERSISTENT_HEALTH_TDB "persistent_health.tdb"
 
 /*
   this is the dummy null procedure that all databases support
@@ -137,7 +141,6 @@ int ctdb_ltdb_lock_requeue(struct ctdb_db_context *ctdb_db,
        /* now the contended path */
        h = ctdb_lockwait(ctdb_db, key, lock_fetch_callback, state);
        if (h == NULL) {
-               tdb_chainunlock(tdb, key);
                return -1;
        }
 
@@ -166,7 +169,11 @@ int ctdb_ltdb_lock_fetch_requeue(struct ctdb_db_context *ctdb_db,
        if (ret == 0) {
                ret = ctdb_ltdb_fetch(ctdb_db, key, header, hdr, data);
                if (ret != 0) {
-                       ctdb_ltdb_unlock(ctdb_db, key);
+                       int uret;
+                       uret = ctdb_ltdb_unlock(ctdb_db, key);
+                       if (uret != 0) {
+                               DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", uret));
+                       }
                }
        }
        return ret;
@@ -187,21 +194,339 @@ static void ctdb_check_db_empty(struct ctdb_db_context *ctdb_db)
        }
 }
 
+int ctdb_load_persistent_health(struct ctdb_context *ctdb,
+                               struct ctdb_db_context *ctdb_db)
+{
+       struct tdb_context *tdb = ctdb->db_persistent_health->tdb;
+       char *old;
+       char *reason = NULL;
+       TDB_DATA key;
+       TDB_DATA val;
+
+       key.dptr = discard_const_p(uint8_t, ctdb_db->db_name);
+       key.dsize = strlen(ctdb_db->db_name);
+
+       old = ctdb_db->unhealthy_reason;
+       ctdb_db->unhealthy_reason = NULL;
+
+       val = tdb_fetch(tdb, key);
+       if (val.dsize > 0) {
+               reason = talloc_strndup(ctdb_db,
+                                       (const char *)val.dptr,
+                                       val.dsize);
+               if (reason == NULL) {
+                       DEBUG(DEBUG_ALERT,(__location__ " talloc_strndup(%d) failed\n",
+                                          (int)val.dsize));
+                       ctdb_db->unhealthy_reason = old;
+                       free(val.dptr);
+                       return -1;
+               }
+       }
+
+       if (val.dptr) {
+               free(val.dptr);
+       }
+
+       talloc_free(old);
+       ctdb_db->unhealthy_reason = reason;
+       return 0;
+}
+
+int ctdb_update_persistent_health(struct ctdb_context *ctdb,
+                                 struct ctdb_db_context *ctdb_db,
+                                 const char *given_reason,/* NULL means healthy */
+                                 int num_healthy_nodes)
+{
+       struct tdb_context *tdb = ctdb->db_persistent_health->tdb;
+       int ret;
+       TDB_DATA key;
+       TDB_DATA val;
+       char *new_reason = NULL;
+       char *old_reason = NULL;
+
+       ret = tdb_transaction_start(tdb);
+       if (ret != 0) {
+               DEBUG(DEBUG_ALERT,(__location__ " tdb_transaction_start('%s') failed: %d - %s\n",
+                                  tdb_name(tdb), ret, tdb_errorstr(tdb)));
+               return -1;
+       }
+
+       ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+       if (ret != 0) {
+               DEBUG(DEBUG_ALERT,(__location__ " ctdb_load_persistent_health('%s') failed: %d\n",
+                                  ctdb_db->db_name, ret));
+               return -1;
+       }
+       old_reason = ctdb_db->unhealthy_reason;
+
+       key.dptr = discard_const_p(uint8_t, ctdb_db->db_name);
+       key.dsize = strlen(ctdb_db->db_name);
+
+       if (given_reason) {
+               new_reason = talloc_strdup(ctdb_db, given_reason);
+               if (new_reason == NULL) {
+                       DEBUG(DEBUG_ALERT,(__location__ " talloc_strdup(%s) failed\n",
+                                         given_reason));
+                       return -1;
+               }
+       } else if (old_reason && num_healthy_nodes == 0) {
+               /*
+                * If the reason indicates ok, but there where no healthy nodes
+                * available, that it means, we have not recovered valid content
+                * of the db. So if there's an old reason, prefix it with
+                * "NO-HEALTHY-NODES - "
+                */
+               const char *prefix;
+
+#define _TMP_PREFIX "NO-HEALTHY-NODES - "
+               ret = strncmp(_TMP_PREFIX, old_reason, strlen(_TMP_PREFIX));
+               if (ret != 0) {
+                       prefix = _TMP_PREFIX;
+               } else {
+                       prefix = "";
+               }
+               new_reason = talloc_asprintf(ctdb_db, "%s%s",
+                                        prefix, old_reason);
+               if (new_reason == NULL) {
+                       DEBUG(DEBUG_ALERT,(__location__ " talloc_asprintf(%s%s) failed\n",
+                                         prefix, old_reason));
+                       return -1;
+               }
+#undef _TMP_PREFIX
+       }
+
+       if (new_reason) {
+               val.dptr = discard_const_p(uint8_t, new_reason);
+               val.dsize = strlen(new_reason);
+
+               ret = tdb_store(tdb, key, val, TDB_REPLACE);
+               if (ret != 0) {
+                       tdb_transaction_cancel(tdb);
+                       DEBUG(DEBUG_ALERT,(__location__ " tdb_store('%s', %s, %s) failed: %d - %s\n",
+                                          tdb_name(tdb), ctdb_db->db_name, new_reason,
+                                          ret, tdb_errorstr(tdb)));
+                       talloc_free(new_reason);
+                       return -1;
+               }
+               DEBUG(DEBUG_ALERT,("Updated db health for db(%s) to: %s\n",
+                                  ctdb_db->db_name, new_reason));
+       } else if (old_reason) {
+               ret = tdb_delete(tdb, key);
+               if (ret != 0) {
+                       tdb_transaction_cancel(tdb);
+                       DEBUG(DEBUG_ALERT,(__location__ " tdb_delete('%s', %s) failed: %d - %s\n",
+                                          tdb_name(tdb), ctdb_db->db_name,
+                                          ret, tdb_errorstr(tdb)));
+                       talloc_free(new_reason);
+                       return -1;
+               }
+               DEBUG(DEBUG_NOTICE,("Updated db health for db(%s): OK\n",
+                                  ctdb_db->db_name));
+       }
+
+       ret = tdb_transaction_commit(tdb);
+       if (ret != TDB_SUCCESS) {
+               DEBUG(DEBUG_ALERT,(__location__ " tdb_transaction_commit('%s') failed: %d - %s\n",
+                                  tdb_name(tdb), ret, tdb_errorstr(tdb)));
+               talloc_free(new_reason);
+               return -1;
+       }
+
+       talloc_free(old_reason);
+       ctdb_db->unhealthy_reason = new_reason;
+
+       return 0;
+}
+
+static int ctdb_backup_corrupted_tdb(struct ctdb_context *ctdb,
+                                    struct ctdb_db_context *ctdb_db)
+{
+       time_t now = time(NULL);
+       char *new_path;
+       char *new_reason;
+       int ret;
+       struct tm *tm;
+
+       tm = gmtime(&now);
+
+       /* formatted like: foo.tdb.0.corrupted.20091204160825.0Z */
+       new_path = talloc_asprintf(ctdb_db, "%s.corrupted."
+                                  "%04u%02u%02u%02u%02u%02u.0Z",
+                                  ctdb_db->db_path,
+                                  tm->tm_year+1900, tm->tm_mon+1,
+                                  tm->tm_mday, tm->tm_hour, tm->tm_min,
+                                  tm->tm_sec);
+       if (new_path == NULL) {
+               DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+               return -1;
+       }
+
+       new_reason = talloc_asprintf(ctdb_db,
+                                    "ERROR - Backup of corrupted TDB in '%s'",
+                                    new_path);
+       if (new_reason == NULL) {
+               DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+               return -1;
+       }
+       ret = ctdb_update_persistent_health(ctdb, ctdb_db, new_reason, 0);
+       talloc_free(new_reason);
+       if (ret != 0) {
+               DEBUG(DEBUG_CRIT,(__location__
+                                ": ctdb_backup_corrupted_tdb(%s) not implemented yet\n",
+                                ctdb_db->db_path));
+               return -1;
+       }
+
+       ret = rename(ctdb_db->db_path, new_path);
+       if (ret != 0) {
+               DEBUG(DEBUG_CRIT,(__location__
+                                 ": ctdb_backup_corrupted_tdb(%s) rename to %s failed: %d - %s\n",
+                                 ctdb_db->db_path, new_path,
+                                 errno, strerror(errno)));
+               talloc_free(new_path);
+               return -1;
+       }
+
+       DEBUG(DEBUG_CRIT,(__location__
+                        ": ctdb_backup_corrupted_tdb(%s) renamed to %s\n",
+                        ctdb_db->db_path, new_path));
+       talloc_free(new_path);
+       return 0;
+}
+
+int ctdb_recheck_persistent_health(struct ctdb_context *ctdb)
+{
+       struct ctdb_db_context *ctdb_db;
+       int ret;
+       int ok = 0;
+       int fail = 0;
+
+       for (ctdb_db = ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
+               if (!ctdb_db->persistent) {
+                       continue;
+               }
+
+               ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+               if (ret != 0) {
+                       DEBUG(DEBUG_ALERT,(__location__
+                                          " load persistent health for '%s' failed\n",
+                                          ctdb_db->db_path));
+                       return -1;
+               }
+
+               if (ctdb_db->unhealthy_reason == NULL) {
+                       ok++;
+                       DEBUG(DEBUG_INFO,(__location__
+                                  " persistent db '%s' healthy\n",
+                                  ctdb_db->db_path));
+                       continue;
+               }
+
+               fail++;
+               DEBUG(DEBUG_ALERT,(__location__
+                                  " persistent db '%s' unhealthy: %s\n",
+                                  ctdb_db->db_path,
+                                  ctdb_db->unhealthy_reason));
+       }
+       DEBUG((fail!=0)?DEBUG_ALERT:DEBUG_NOTICE,
+             ("ctdb_recheck_presistent_health: OK[%d] FAIL[%d]\n",
+              ok, fail));
+
+       if (fail != 0) {
+               return -1;
+       }
+
+       return 0;
+}
+
+
+/*
+  mark a database - as healthy
+ */
+int32_t ctdb_control_db_set_healthy(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+       uint32_t db_id = *(uint32_t *)indata.dptr;
+       struct ctdb_db_context *ctdb_db;
+       int ret;
+       bool may_recover = false;
+
+       ctdb_db = find_ctdb_db(ctdb, db_id);
+       if (!ctdb_db) {
+               DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", db_id));
+               return -1;
+       }
+
+       if (ctdb_db->unhealthy_reason) {
+               may_recover = true;
+       }
+
+       ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, 1);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR,(__location__
+                                " ctdb_update_persistent_health(%s) failed\n",
+                                ctdb_db->db_name));
+               return -1;
+       }
+
+       if (may_recover && !ctdb->done_startup) {
+               DEBUG(DEBUG_ERR, (__location__ " db %s become healthy  - force recovery for startup\n",
+                                 ctdb_db->db_name));
+               ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
+       }
+
+       return 0;
+}
+
+int32_t ctdb_control_db_get_health(struct ctdb_context *ctdb,
+                                  TDB_DATA indata,
+                                  TDB_DATA *outdata)
+{
+       uint32_t db_id = *(uint32_t *)indata.dptr;
+       struct ctdb_db_context *ctdb_db;
+       int ret;
+
+       ctdb_db = find_ctdb_db(ctdb, db_id);
+       if (!ctdb_db) {
+               DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", db_id));
+               return -1;
+       }
+
+       ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR,(__location__
+                                " ctdb_load_persistent_health(%s) failed\n",
+                                ctdb_db->db_name));
+               return -1;
+       }
+
+       *outdata = tdb_null;
+       if (ctdb_db->unhealthy_reason) {
+               outdata->dptr = (uint8_t *)ctdb_db->unhealthy_reason;
+               outdata->dsize = strlen(ctdb_db->unhealthy_reason)+1;
+       }
+
+       return 0;
+}
 
 /*
   attach to a database, handling both persistent and non-persistent databases
   return 0 on success, -1 on failure
  */
-static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name, bool persistent)
+static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name,
+                            bool persistent, const char *unhealthy_reason,
+                            bool jenkinshash)
 {
        struct ctdb_db_context *ctdb_db, *tmp_db;
        int ret;
        struct TDB_DATA key;
        unsigned tdb_flags;
+       int mode = 0600;
+       int remaining_tries = 0;
 
        ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
        CTDB_NO_MEMORY(ctdb, ctdb_db);
 
+       ctdb_db->priority = 1;
        ctdb_db->ctdb = ctdb;
        ctdb_db->db_name = talloc_strdup(ctdb_db, db_name);
        CTDB_NO_MEMORY(ctdb, ctdb_db->db_name);
@@ -221,23 +546,45 @@ static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name, boo
                }
        }
 
-       if (ctdb->db_directory == NULL) {
-               ctdb->db_directory = VARDIR "/ctdb";
+       if (persistent) {
+               if (unhealthy_reason) {
+                       ret = ctdb_update_persistent_health(ctdb, ctdb_db,
+                                                           unhealthy_reason, 0);
+                       if (ret != 0) {
+                               DEBUG(DEBUG_ALERT,(__location__ " ctdb_update_persistent_health('%s','%s') failed: %d\n",
+                                                  ctdb_db->db_name, unhealthy_reason, ret));
+                               talloc_free(ctdb_db);
+                               return -1;
+                       }
+               }
+
+               if (ctdb->max_persistent_check_errors > 0) {
+                       remaining_tries = 1;
+               }
+               if (ctdb->done_startup) {
+                       remaining_tries = 0;
+               }
+
+               ret = ctdb_load_persistent_health(ctdb, ctdb_db);
+               if (ret != 0) {
+                       DEBUG(DEBUG_ALERT,(__location__ " ctdb_load_persistent_health('%s') failed: %d\n",
+                                  ctdb_db->db_name, ret));
+                       talloc_free(ctdb_db);
+                       return -1;
+               }
        }
 
-       /* make sure the db directory exists */
-       if (mkdir(ctdb->db_directory, 0700) == -1 && errno != EEXIST) {
-               DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb directory '%s'\n", 
-                        ctdb->db_directory));
+       if (ctdb_db->unhealthy_reason && remaining_tries == 0) {
+               DEBUG(DEBUG_ALERT,(__location__ "ERROR: tdb %s is marked as unhealthy: %s\n",
+                                  ctdb_db->db_name, ctdb_db->unhealthy_reason));
                talloc_free(ctdb_db);
                return -1;
        }
 
-       if (persistent && mkdir(ctdb->db_directory_persistent, 0700) == -1 && errno != EEXIST) {
-               DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb persistent directory '%s'\n", 
-                        ctdb->db_directory_persistent));
-               talloc_free(ctdb_db);
-               return -1;
+       if (ctdb_db->unhealthy_reason) {
+               /* this is just a warning, but we want that in the log file! */
+               DEBUG(DEBUG_ALERT,(__location__ "Warning: tdb %s is marked as unhealthy: %s\n",
+                                  ctdb_db->db_name, ctdb_db->unhealthy_reason));
        }
 
        /* open the database */
@@ -246,22 +593,113 @@ static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name, boo
                                           db_name, ctdb->pnn);
 
        tdb_flags = persistent? TDB_DEFAULT : TDB_CLEAR_IF_FIRST | TDB_NOSYNC;
-       if (!ctdb->do_setsched) {
+       if (ctdb->valgrinding) {
                tdb_flags |= TDB_NOMMAP;
        }
+       tdb_flags |= TDB_DISALLOW_NESTING;
+       if (jenkinshash) {
+               tdb_flags |= TDB_INCOMPATIBLE_HASH;
+       }
 
+again:
        ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 
                                      ctdb->tunable.database_hash_size, 
                                      tdb_flags, 
-                                     O_CREAT|O_RDWR, 0666);
+                                     O_CREAT|O_RDWR, mode);
        if (ctdb_db->ltdb == NULL) {
-               DEBUG(DEBUG_CRIT,("Failed to open tdb '%s'\n", ctdb_db->db_path));
-               talloc_free(ctdb_db);
-               return -1;
+               struct stat st;
+               int saved_errno = errno;
+
+               if (!persistent) {
+                       DEBUG(DEBUG_CRIT,("Failed to open tdb '%s': %d - %s\n",
+                                         ctdb_db->db_path,
+                                         saved_errno,
+                                         strerror(saved_errno)));
+                       talloc_free(ctdb_db);
+                       return -1;
+               }
+
+               if (remaining_tries == 0) {
+                       DEBUG(DEBUG_CRIT,(__location__
+                                         "Failed to open persistent tdb '%s': %d - %s\n",
+                                         ctdb_db->db_path,
+                                         saved_errno,
+                                         strerror(saved_errno)));
+                       talloc_free(ctdb_db);
+                       return -1;
+               }
+
+               ret = stat(ctdb_db->db_path, &st);
+               if (ret != 0) {
+                       DEBUG(DEBUG_CRIT,(__location__
+                                         "Failed to open persistent tdb '%s': %d - %s\n",
+                                         ctdb_db->db_path,
+                                         saved_errno,
+                                         strerror(saved_errno)));
+                       talloc_free(ctdb_db);
+                       return -1;
+               }
+
+               ret = ctdb_backup_corrupted_tdb(ctdb, ctdb_db);
+               if (ret != 0) {
+                       DEBUG(DEBUG_CRIT,(__location__
+                                         "Failed to open persistent tdb '%s': %d - %s\n",
+                                         ctdb_db->db_path,
+                                         saved_errno,
+                                         strerror(saved_errno)));
+                       talloc_free(ctdb_db);
+                       return -1;
+               }
+
+               remaining_tries--;
+               mode = st.st_mode;
+               goto again;
        }
 
        if (!persistent) {
                ctdb_check_db_empty(ctdb_db);
+       } else {
+               ret = tdb_check(ctdb_db->ltdb->tdb, NULL, NULL);
+               if (ret != 0) {
+                       int fd;
+                       struct stat st;
+
+                       DEBUG(DEBUG_CRIT,("tdb_check(%s) failed: %d - %s\n",
+                                         ctdb_db->db_path, ret,
+                                         tdb_errorstr(ctdb_db->ltdb->tdb)));
+                       if (remaining_tries == 0) {
+                               talloc_free(ctdb_db);
+                               return -1;
+                       }
+
+                       fd = tdb_fd(ctdb_db->ltdb->tdb);
+                       ret = fstat(fd, &st);
+                       if (ret != 0) {
+                               DEBUG(DEBUG_CRIT,(__location__
+                                                 "Failed to fstat() persistent tdb '%s': %d - %s\n",
+                                                 ctdb_db->db_path,
+                                                 errno,
+                                                 strerror(errno)));
+                               talloc_free(ctdb_db);
+                               return -1;
+                       }
+
+                       /* close the TDB */
+                       talloc_free(ctdb_db->ltdb);
+                       ctdb_db->ltdb = NULL;
+
+                       ret = ctdb_backup_corrupted_tdb(ctdb, ctdb_db);
+                       if (ret != 0) {
+                               DEBUG(DEBUG_CRIT,("Failed to backup corrupted tdb '%s'\n",
+                                                 ctdb_db->db_path));
+                               talloc_free(ctdb_db);
+                               return -1;
+                       }
+
+                       remaining_tries--;
+                       mode = st.st_mode;
+                       goto again;
+               }
        }
 
        DLIST_ADD(ctdb->db_list, ctdb_db);
@@ -293,7 +731,8 @@ static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name, boo
 
        ret = ctdb_vacuum_init(ctdb_db);
        if (ret != 0) {
-               DEBUG(DEBUG_CRIT,("Failed to setup vacuuming for database '%s'\n", ctdb_db->db_name));
+               DEBUG(DEBUG_CRIT,("Failed to setup vacuuming for "
+                                 "database '%s'\n", ctdb_db->db_name));
                talloc_free(ctdb_db);
                return -1;
        }
@@ -311,17 +750,33 @@ static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name, boo
  */
 int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata,
                               TDB_DATA *outdata, uint64_t tdb_flags, 
-                              bool persistent)
+                              bool persistent, uint32_t client_id)
 {
        const char *db_name = (const char *)indata.dptr;
        struct ctdb_db_context *db;
-       struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
+       struct ctdb_node *node;
+
+       /* dont allow any local clients to attach while we are in recovery mode
+        * except for the recovery daemon.
+        * allow all attach from the network since these are always from remote
+        * recovery daemons.
+        */
+       if (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE && client_id != 0) {
+               struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
+
+               if (client != NULL && client->pid != ctdb->recoverd_pid) {
+                       DEBUG(DEBUG_ERR,("DB Attach to database %s refused for client with pid:%d since node is in recovery mode.\n", db_name, client->pid));
+                       return -1;
+               }
+       }
+
+       node = ctdb->nodes[ctdb->pnn];
 
        /* the client can optionally pass additional tdb flags, but we
           only allow a subset of those on the database in ctdb. Note
           that tdb_flags is passed in via the (otherwise unused)
           srvid to the attach control */
-       tdb_flags &= TDB_NOSYNC;
+       tdb_flags &= (TDB_NOSYNC|TDB_INCOMPATIBLE_HASH);
 
        /* If the node is inactive it is not part of the cluster
           and we should not allow clients to attach to any
@@ -342,7 +797,7 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata,
                return 0;
        }
 
-       if (ctdb_local_attach(ctdb, db_name, persistent) != 0) {
+       if (ctdb_local_attach(ctdb, db_name, persistent, NULL, (tdb_flags&TDB_INCOMPATIBLE_HASH)?true:false) != 0) {
                return -1;
        }
 
@@ -358,8 +813,11 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata,
        outdata->dptr  = (uint8_t *)&db->db_id;
        outdata->dsize = sizeof(db->db_id);
 
+       /* Try to ensure it's locked in mem */
+       ctdb_lockdown_memory(ctdb);
+
        /* tell all the other nodes about this database */
-       ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
+       ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, tdb_flags,
                                 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:
                                                CTDB_CONTROL_DB_ATTACH,
                                 0, CTDB_CTRL_FLAG_NOREPLY,
@@ -373,7 +831,8 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata,
 /*
   attach to all existing persistent databases
  */
-int ctdb_attach_persistent(struct ctdb_context *ctdb)
+static int ctdb_attach_persistent(struct ctdb_context *ctdb,
+                                 const char *unhealthy_reason)
 {
        DIR *d;
        struct dirent *de;
@@ -385,38 +844,43 @@ int ctdb_attach_persistent(struct ctdb_context *ctdb)
        }
 
        while ((de=readdir(d))) {
-               char *p, *s;
+               char *p, *s, *q;
                size_t len = strlen(de->d_name);
                uint32_t node;
+               int invalid_name = 0;
                
                s = talloc_strdup(ctdb, de->d_name);
                CTDB_NO_MEMORY(ctdb, s);
 
-               /* ignore names ending in .bak */
-               p = strstr(s, ".bak");
-               if (p != NULL) {
-                       continue;
-               }
-
                /* only accept names ending in .tdb */
                p = strstr(s, ".tdb.");
                if (len < 7 || p == NULL) {
                        talloc_free(s);
                        continue;
                }
-               if (sscanf(p+5, "%u", &node) != 1 || node != ctdb->pnn) {
+
+               /* only accept names ending with .tdb. and any number of digits */
+               q = p+5;
+               while (*q != 0 && invalid_name == 0) {
+                       if (!isdigit(*q++)) {
+                               invalid_name = 1;
+                       }
+               }
+               if (invalid_name == 1 || sscanf(p+5, "%u", &node) != 1 || node != ctdb->pnn) {
+                       DEBUG(DEBUG_ERR,("Ignoring persistent database '%s'\n", de->d_name));
                        talloc_free(s);
                        continue;
                }
                p[4] = 0;
 
-               if (ctdb_local_attach(ctdb, s, true) != 0) {
+               if (ctdb_local_attach(ctdb, s, true, unhealthy_reason, 0) != 0) {
                        DEBUG(DEBUG_ERR,("Failed to attach to persistent database '%s'\n", de->d_name));
                        closedir(d);
                        talloc_free(s);
                        return -1;
                }
-               DEBUG(DEBUG_NOTICE,("Attached to persistent database %s\n", s));
+
+               DEBUG(DEBUG_INFO,("Attached to persistent database %s\n", s));
 
                talloc_free(s);
        }
@@ -424,6 +888,158 @@ int ctdb_attach_persistent(struct ctdb_context *ctdb)
        return 0;
 }
 
+int ctdb_attach_databases(struct ctdb_context *ctdb)
+{
+       int ret;
+       char *persistent_health_path = NULL;
+       char *unhealthy_reason = NULL;
+       bool first_try = true;
+
+       if (ctdb->db_directory == NULL) {
+               ctdb->db_directory = VARDIR "/ctdb";
+       }
+       if (ctdb->db_directory_persistent == NULL) {
+               ctdb->db_directory_persistent = VARDIR "/ctdb/persistent";
+       }
+       if (ctdb->db_directory_state == NULL) {
+               ctdb->db_directory_state = VARDIR "/ctdb/state";
+       }
+
+       /* make sure the db directory exists */
+       ret = mkdir(ctdb->db_directory, 0700);
+       if (ret == -1 && errno != EEXIST) {
+               DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb directory '%s'\n",
+                        ctdb->db_directory));
+               return -1;
+       }
+
+       /* make sure the persistent db directory exists */
+       ret = mkdir(ctdb->db_directory_persistent, 0700);
+       if (ret == -1 && errno != EEXIST) {
+               DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb persistent directory '%s'\n",
+                        ctdb->db_directory_persistent));
+               return -1;
+       }
+
+       /* make sure the internal state db directory exists */
+       ret = mkdir(ctdb->db_directory_state, 0700);
+       if (ret == -1 && errno != EEXIST) {
+               DEBUG(DEBUG_CRIT,(__location__ " Unable to create ctdb state directory '%s'\n",
+                        ctdb->db_directory_state));
+               return -1;
+       }
+
+       persistent_health_path = talloc_asprintf(ctdb, "%s/%s.%u",
+                                                ctdb->db_directory_state,
+                                                PERSISTENT_HEALTH_TDB,
+                                                ctdb->pnn);
+       if (persistent_health_path == NULL) {
+               DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+               return -1;
+       }
+
+again:
+
+       ctdb->db_persistent_health = tdb_wrap_open(ctdb, persistent_health_path,
+                                                  0, TDB_DISALLOW_NESTING,
+                                                  O_CREAT | O_RDWR, 0600);
+       if (ctdb->db_persistent_health == NULL) {
+               struct tdb_wrap *tdb;
+
+               if (!first_try) {
+                       DEBUG(DEBUG_CRIT,("Failed to open tdb '%s': %d - %s\n",
+                                         persistent_health_path,
+                                         errno,
+                                         strerror(errno)));
+                       talloc_free(persistent_health_path);
+                       talloc_free(unhealthy_reason);
+                       return -1;
+               }
+               first_try = false;
+
+               unhealthy_reason = talloc_asprintf(ctdb, "WARNING - '%s' %s - %s",
+                                                  persistent_health_path,
+                                                  "was cleared after a failure",
+                                                  "manual verification needed");
+               if (unhealthy_reason == NULL) {
+                       DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+                       talloc_free(persistent_health_path);
+                       return -1;
+               }
+
+               DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - retrying after CLEAR_IF_FIRST\n",
+                                 persistent_health_path));
+               tdb = tdb_wrap_open(ctdb, persistent_health_path,
+                                   0, TDB_CLEAR_IF_FIRST | TDB_DISALLOW_NESTING,
+                                   O_CREAT | O_RDWR, 0600);
+               if (tdb) {
+                       DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - with CLEAR_IF_FIRST: %d - %s\n",
+                                         persistent_health_path,
+                                         errno,
+                                         strerror(errno)));
+                       talloc_free(persistent_health_path);
+                       talloc_free(unhealthy_reason);
+                       return -1;
+               }
+
+               talloc_free(tdb);
+               goto again;
+       }
+       ret = tdb_check(ctdb->db_persistent_health->tdb, NULL, NULL);
+       if (ret != 0) {
+               struct tdb_wrap *tdb;
+
+               talloc_free(ctdb->db_persistent_health);
+               ctdb->db_persistent_health = NULL;
+
+               if (!first_try) {
+                       DEBUG(DEBUG_CRIT,("tdb_check('%s') failed\n",
+                                         persistent_health_path));
+                       talloc_free(persistent_health_path);
+                       talloc_free(unhealthy_reason);
+                       return -1;
+               }
+               first_try = false;
+
+               unhealthy_reason = talloc_asprintf(ctdb, "WARNING - '%s' %s - %s",
+                                                  persistent_health_path,
+                                                  "was cleared after a failure",
+                                                  "manual verification needed");
+               if (unhealthy_reason == NULL) {
+                       DEBUG(DEBUG_CRIT,(__location__ " talloc_asprintf() failed\n"));
+                       talloc_free(persistent_health_path);
+                       return -1;
+               }
+
+               DEBUG(DEBUG_CRIT,("tdb_check('%s') failed - retrying after CLEAR_IF_FIRST\n",
+                                 persistent_health_path));
+               tdb = tdb_wrap_open(ctdb, persistent_health_path,
+                                   0, TDB_CLEAR_IF_FIRST | TDB_DISALLOW_NESTING,
+                                   O_CREAT | O_RDWR, 0600);
+               if (tdb) {
+                       DEBUG(DEBUG_CRIT,("Failed to open tdb '%s' - with CLEAR_IF_FIRST: %d - %s\n",
+                                         persistent_health_path,
+                                         errno,
+                                         strerror(errno)));
+                       talloc_free(persistent_health_path);
+                       talloc_free(unhealthy_reason);
+                       return -1;
+               }
+
+               talloc_free(tdb);
+               goto again;
+       }
+       talloc_free(persistent_health_path);
+
+       ret = ctdb_attach_persistent(ctdb, unhealthy_reason);
+       talloc_free(unhealthy_reason);
+       if (ret != 0) {
+               return ret;
+       }
+
+       return 0;
+}
+
 /*
   called when a broadcast seqnum update comes in
  */
@@ -441,6 +1057,12 @@ int32_t ctdb_ltdb_update_seqnum(struct ctdb_context *ctdb, uint32_t db_id, uint3
                return -1;
        }
 
+       if (ctdb_db->unhealthy_reason) {
+               DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_ltdb_update_seqnum: %s\n",
+                                ctdb_db->db_name, ctdb_db->unhealthy_reason));
+               return -1;
+       }
+
        tdb_increment_seqnum_nonblock(ctdb_db->ltdb->tdb);
        ctdb_db->seqnum = tdb_get_seqnum(ctdb_db->ltdb->tdb);
        return 0;
@@ -467,7 +1089,7 @@ static void ctdb_ltdb_seqnum_check(struct event_context *ev, struct timed_event
        ctdb_db->seqnum = new_seqnum;
 
        /* setup a new timer */
-       ctdb_db->te = 
+       ctdb_db->seqnum_update =
                event_add_timed(ctdb->ev, ctdb_db, 
                                timeval_current_ofs(ctdb->tunable.seqnum_interval/1000, (ctdb->tunable.seqnum_interval%1000)*1000),
                                ctdb_ltdb_seqnum_check, ctdb_db);
@@ -485,8 +1107,8 @@ int32_t ctdb_ltdb_enable_seqnum(struct ctdb_context *ctdb, uint32_t db_id)
                return -1;
        }
 
-       if (ctdb_db->te == NULL) {
-               ctdb_db->te = 
+       if (ctdb_db->seqnum_update == NULL) {
+               ctdb_db->seqnum_update =
                        event_add_timed(ctdb->ev, ctdb_db, 
                                        timeval_current_ofs(ctdb->tunable.seqnum_interval/1000, (ctdb->tunable.seqnum_interval%1000)*1000),
                                        ctdb_ltdb_seqnum_check, ctdb_db);
@@ -497,3 +1119,26 @@ int32_t ctdb_ltdb_enable_seqnum(struct ctdb_context *ctdb, uint32_t db_id)
        return 0;
 }
 
+int32_t ctdb_control_set_db_priority(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+       struct ctdb_db_priority *db_prio = (struct ctdb_db_priority *)indata.dptr;
+       struct ctdb_db_context *ctdb_db;
+
+       ctdb_db = find_ctdb_db(ctdb, db_prio->db_id);
+       if (!ctdb_db) {
+               DEBUG(DEBUG_ERR,("Unknown db_id 0x%x in ctdb_set_db_priority\n", db_prio->db_id));
+               return -1;
+       }
+
+       if ((db_prio->priority<1) || (db_prio->priority>NUM_DB_PRIORITIES)) {
+               DEBUG(DEBUG_ERR,("Trying to set invalid priority : %u\n", db_prio->priority));
+               return -1;
+       }
+
+       ctdb_db->priority = db_prio->priority;
+       DEBUG(DEBUG_INFO,("Setting DB priority to %u for db 0x%08x\n", db_prio->priority, db_prio->db_id));
+
+       return 0;
+}
+
+