recoverd: Fix tunable NoIPTakeoverOnDisabled, rename to NoIPHostOnAllDisabled
authorMartin Schwenke <martin@meltin.net>
Fri, 3 May 2013 06:59:20 +0000 (16:59 +1000)
committerMartin Schwenke <martin@meltin.net>
Tue, 7 May 2013 06:20:46 +0000 (16:20 +1000)
This really needs to be per-node.  The rename is because nodes with
this tunable switched on should drop IPs if they become unhealthy (or
disabled in some other way).

* Add new flag NODE_FLAGS_NOIPHOST, only used in recovery daemon.

* Enhance set_ipflags_internal() and set_ipflags() to setup
  NODE_FLAGS_NOIPHOST depending on setting of NoIPHostOnAllDisabled
  and/or whether nodes are disabled/inactive.

* Replace can_node_servce_ip() with functions can_node_host_ip() and
  can_node_takeover_ip().  These functions are the only ones that need
  to look at NODE_FLAGS_NOIPTAKEOVER and NODE_FLAGS_NOIPHOST.  They
  can make the decision without looking at any other flags due to
  previous setup.

* Remove explicit flag checking in IP allocation functions (including
  unassign_unsuitable_ips()) and just call can_node_host_ip() and
  can_node_takeover_ip() as appropriate.

* Update test code to handle CTDB_SET_NoIPHostOnAllDisabled.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Pair-programmed-with: Amitay Isaacs <amitay@gmail.com>

doc/ctdbd.1.xml
include/ctdb_private.h
include/ctdb_protocol.h
server/ctdb_takeover.c
server/ctdb_tunables.c
tests/src/ctdb_takeover_tests.c
tests/takeover/lcp2.017.sh
tests/takeover/lcp2.018.sh
tests/takeover/lcp2.019.sh

index 18fddc2258f5dab9bc67649390e2c2d8c25c7702..9516164e171ecc8a97b5322a75a3ec4f6d3cb378 100644 (file)
     </para>
     </refsect2>
 
-    <refsect2><title>NoIPTakeoverOnDisabled</title>
+    <refsect2><title>NoIPHostOnAllDisabled</title>
     <para>Default: 0</para>
     <para>
     If no nodes are healthy then by default ctdb will happily host
     public IPs on disabled (unhealthy or administratively disabled)
     nodes.  This can cause problems, for example if the underlying
-    cluster filesystem is not mounted.  When set to 1 this behaviour
-    is switched off and disabled nodes will not be able to takeover
-    IPs.
+    cluster filesystem is not mounted.  When set to 1 on a node and
+    that node is disabled it, any IPs hosted by this node will be
+    released and the node will not takeover any IPs until it is no
+    longer disabled.
     </para>
     </refsect2>
 
index e6cfa7fdebb964f1bf67094aea8c7493024eff99..813a5a661c258c63b9de400947e5e57a6972a071 100644 (file)
@@ -135,7 +135,7 @@ struct ctdb_tunable {
        uint32_t db_record_size_warn;
        uint32_t db_size_warn;
        uint32_t pulldb_preallocation_size;
-       uint32_t no_ip_takeover_on_disabled;
+       uint32_t no_ip_host_on_all_disabled;
        uint32_t deadlock_timeout;
        uint32_t samba3_hack;
 };
index f3234ed6280242a3dbb28a44fb5149949816dda2..3133b4e6d916c0916a6008267a6656bbedb5cfa3 100644 (file)
@@ -584,7 +584,9 @@ struct ctdb_node_map {
 #define NODE_FLAGS_DISABLED            (NODE_FLAGS_UNHEALTHY|NODE_FLAGS_PERMANENTLY_DISABLED)
 #define NODE_FLAGS_INACTIVE            (NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)
 
-#define NODE_FLAGS_NOIPTAKEOVER                0x01000000 /* this node can takeover any new ip addresses, this flag is ONLY valid within the recovery daemon */
+/* These flags are ONLY valid within the recovery daemon */
+#define NODE_FLAGS_NOIPTAKEOVER                0x01000000 /* can not takeover additional IPs */
+#define NODE_FLAGS_NOIPHOST            0x02000000 /* can not host IPs */
 
 
 /*
index ac4c6542ed5132f93c8ef70fcdf7d567d78142c1..4cb6a6d64a70003c4343706ca9d3a5f8e4a882c0 100644 (file)
@@ -1299,31 +1299,46 @@ static int node_ip_coverage(struct ctdb_context *ctdb,
 }
 
 
-/* Check if this is a public ip known to the node, i.e. can that
  node takeover this ip ?
+/* Can the given node host the given IP: is the public IP known to the
* node and is NOIPHOST unset?
 */
-static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
-               struct ctdb_public_ip_list *ip)
+static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn, 
+                            struct ctdb_node_map *nodemap,
+                            struct ctdb_public_ip_list *ip)
 {
        struct ctdb_all_public_ips *public_ips;
        int i;
 
+       if (nodemap->nodes[pnn].flags & NODE_FLAGS_NOIPHOST) {
+               return false;
+       }
+
        public_ips = ctdb->nodes[pnn]->available_public_ips;
 
        if (public_ips == NULL) {
-               return -1;
+               return false;
        }
 
        for (i=0;i<public_ips->num;i++) {
                if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
                        /* yes, this node can serve this public ip */
-                       return 0;
+                       return true;
                }
        }
 
-       return -1;
+       return false;
 }
 
+static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn, 
+                                struct ctdb_node_map *nodemap,
+                                struct ctdb_public_ip_list *ip)
+{
+       if (nodemap->nodes[pnn].flags & NODE_FLAGS_NOIPTAKEOVER) {
+               return false;
+       }
+
+       return can_node_host_ip(ctdb, pnn, nodemap, ip);
+}
 
 /* search the node lists list for a node to takeover this ip.
    pick the node that currently are serving the least number of ips
@@ -1339,21 +1354,8 @@ static int find_takeover_node(struct ctdb_context *ctdb,
 
        pnn    = -1;
        for (i=0;i<nodemap->num;i++) {
-               if (nodemap->nodes[i].flags & NODE_FLAGS_NOIPTAKEOVER) {
-                       /* This node is not allowed to takeover any addresses
-                       */
-                       continue;
-               }
-
-               if (nodemap->nodes[i].flags & mask) {
-                       /* This node is not healty and can not be used to serve
-                          a public address 
-                       */
-                       continue;
-               }
-
                /* verify that this node can serve this ip */
-               if (can_node_serve_ip(ctdb, i, ip)) {
+               if (!can_node_takeover_ip(ctdb, i, nodemap, ip)) {
                        /* no it couldnt   so skip to the next node */
                        continue;
                }
@@ -1642,17 +1644,8 @@ try_again:
                maxnode = -1;
                minnode = -1;
                for (i=0;i<nodemap->num;i++) {
-                       if (nodemap->nodes[i].flags & mask) {
-                               continue;
-                       }
-
-                       /* Only check nodes that are allowed to takeover an ip */
-                       if (nodemap->nodes[i].flags & NODE_FLAGS_NOIPTAKEOVER) {
-                               continue;
-                       }
-
                        /* only check nodes that can actually serve this ip */
-                       if (can_node_serve_ip(ctdb, i, tmp_ip)) {
+                       if (!can_node_takeover_ip(ctdb, i, nodemap, tmp_ip)) {
                                /* no it couldnt   so skip to the next node */
                                continue;
                        }
@@ -1815,19 +1808,12 @@ static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
                        }
 
                        for (dstnode=0; dstnode < nodemap->num; dstnode++) {
-                               /* Only check nodes that are allowed to takeover an ip */
-                               if (nodemap->nodes[dstnode].flags & NODE_FLAGS_NOIPTAKEOVER) {
-                                       continue;
-                               }
-
-                               /* only check nodes that can actually serve this ip */
-                               if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
+                               /* only check nodes that can actually takeover this ip */
+                               if (!can_node_takeover_ip(ctdb, dstnode,
+                                                         nodemap, tmp_ip)) {
                                        /* no it couldnt   so skip to the next node */
                                        continue;
                                }
-                               if (nodemap->nodes[dstnode].flags & mask) {
-                                       continue;
-                               }
 
                                dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
                                dstimbl = lcp2_imbalances[dstnode] + dstdsum;
@@ -1929,13 +1915,9 @@ static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
                                continue;
                        }
 
-                       /* Only check nodes that are allowed to takeover an ip */
-                       if (nodemap->nodes[dstnode].flags & NODE_FLAGS_NOIPTAKEOVER) {
-                               continue;
-                       }
-
-                       /* only check nodes that can actually serve this ip */
-                       if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
+                       /* only check nodes that can actually takeover this ip */
+                       if (!can_node_takeover_ip(ctdb, dstnode,
+                                                 nodemap, tmp_ip)) {
                                /* no it couldnt   so skip to the next node */
                                continue;
                        }
@@ -2075,21 +2057,6 @@ static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
 {
        struct ctdb_public_ip_list *tmp_ip;
 
-       /* mark all public addresses with a masked node as being served by
-          node -1
-       */
-       for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
-               if (tmp_ip->pnn == -1) {
-                       continue;
-               }
-               if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
-                       DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
-                                          ctdb_addr_to_str(&(tmp_ip->addr)),
-                                          tmp_ip->pnn));
-                       tmp_ip->pnn = -1;
-               }
-       }
-
        /* verify that the assigned nodes can serve that public ip
           and set it to -1 if not
        */
@@ -2097,7 +2064,8 @@ static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
                if (tmp_ip->pnn == -1) {
                        continue;
                }
-               if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
+               if (!can_node_host_ip(ctdb, tmp_ip->pnn,
+                                     nodemap, tmp_ip) != 0) {
                        /* this node can not serve this ip. */
                        DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
                                           ctdb_addr_to_str(&(tmp_ip->addr)),
@@ -2224,7 +2192,7 @@ static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
        */
        mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
        if (all_nodes_are_disabled(nodemap) &&
-           (ctdb->tunable.no_ip_takeover_on_disabled == 0)) {
+           (ctdb->tunable.no_ip_host_on_all_disabled == 0)) {
                /* We didnt have any completely healthy nodes so
                   use "disabled" nodes as a fallback
                */
@@ -2332,22 +2300,54 @@ static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
 /* Set internal flags for IP allocation:
  *   Clear ip flags
  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
+ *   Set NOIPHOST ip flag for each INACTIVE node
+ *   if all nodes are disabled:
+ *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
+ *   else
+ *     Set NOIPHOST ip flags for disabled nodes
  */
 static void set_ipflags_internal(struct ctdb_node_map *nodemap,
-                                uint32_t *tval_noiptakeover)
+                                uint32_t *tval_noiptakeover,
+                                uint32_t *tval_noiphostonalldisabled)
 {
        int i;
 
        /* Clear IP flags */
        for (i=0;i<nodemap->num;i++) {
-               nodemap->nodes[i].flags &= ~NODE_FLAGS_NOIPTAKEOVER;
+               nodemap->nodes[i].flags &=
+                       ~(NODE_FLAGS_NOIPTAKEOVER|NODE_FLAGS_NOIPHOST);
        }
 
-       /* Can not take IPs on node with NoIPTakeover set */
        for (i=0;i<nodemap->num;i++) {
+               /* Can not take IPs on node with NoIPTakeover set */
                if (tval_noiptakeover[i] != 0) {
                        nodemap->nodes[i].flags |= NODE_FLAGS_NOIPTAKEOVER;
                }
+
+               /* Can not host IPs on INACTIVE node */
+               if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
+                       nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
+               }
+       }
+
+       if (all_nodes_are_disabled(nodemap)) {
+               /* If all nodes are disabled, can not host IPs on node
+                * with NoIPHostOnAllDisabled set
+                */
+               for (i=0;i<nodemap->num;i++) {
+                       if (tval_noiphostonalldisabled[i] != 0) {
+                               nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
+                       }
+               }
+       } else {
+               /* If some nodes are not disabled, then can not host
+                * IPs on DISABLED node
+                */
+               for (i=0;i<nodemap->num;i++) {
+                       if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
+                               nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
+                       }
+               }
        }
 }
 
@@ -2356,6 +2356,7 @@ static bool set_ipflags(struct ctdb_context *ctdb,
                        struct ctdb_node_map *nodemap)
 {
        uint32_t *tval_noiptakeover;
+       uint32_t *tval_noiphostonalldisabled;
 
        tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
                                                   "NoIPTakeover");
@@ -2363,9 +2364,18 @@ static bool set_ipflags(struct ctdb_context *ctdb,
                return false;
        }
 
-       set_ipflags_internal(nodemap, tval_noiptakeover);
+       tval_noiphostonalldisabled =
+               get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
+                                      "NoIPHostOnAllDisabled");
+       if (tval_noiphostonalldisabled == NULL) {
+               return false;
+       }
+
+       set_ipflags_internal(nodemap,
+                            tval_noiptakeover, tval_noiphostonalldisabled);
 
        talloc_free(tval_noiptakeover);
+       talloc_free(tval_noiphostonalldisabled);
 
        return true;
 }
@@ -2396,7 +2406,6 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
                goto ipreallocated;
        }
 
-
        if (!set_ipflags(ctdb, tmp_ctx, nodemap)) {
                DEBUG(DEBUG_ERR,("Failed to set IP flags from tunables\n"));
                return -1;
index aa3115d37bb09f1b160a161716fcab6f62a499fa..84fb9490070c69e8ed3e3d61301b4142a2735549 100644 (file)
@@ -84,7 +84,7 @@ static const struct {
        { "DBRecordSizeWarn",   10000000,  offsetof(struct ctdb_tunable, db_record_size_warn), false },
        { "DBSizeWarn",        100000000,  offsetof(struct ctdb_tunable, db_size_warn), false },
        { "PullDBPreallocation", 10*1024*1024,  offsetof(struct ctdb_tunable, pulldb_preallocation_size), false },
-       { "NoIPTakeoverOnDisabled",    0,  offsetof(struct ctdb_tunable, no_ip_takeover_on_disabled), false },
+       { "NoIPHostOnAllDisabled",    0,  offsetof(struct ctdb_tunable, no_ip_host_on_all_disabled), false },
        { "DeadlockTimeout",    300, offsetof(struct ctdb_tunable, deadlock_timeout), false },
        { "Samba3AvoidDeadlocks", 0, offsetof(struct ctdb_tunable, samba3_hack), false },
 };
index 9083af8f7924ab8d3ca995d6edfef1648f075514..89894f8ea4b73c84666fbcbae9d72a65ff5e1804 100644 (file)
@@ -363,6 +363,7 @@ void ctdb_test_init(const char nodestates[],
        uint32_t nodeflags[CTDB_TEST_MAX_NODES];
        char *tok, *ns, *t;
        uint32_t *tval_noiptakeover;
+       uint32_t *tval_noiptakeoverondisabled;
 
        *ctdb = talloc_zero(NULL, struct ctdb_context);
 
@@ -400,13 +401,11 @@ void ctdb_test_init(const char nodestates[],
                }
        }
 
-       (*ctdb)->tunable.no_ip_takeover_on_disabled = 0;
-       if (getenv("CTDB_SET_NoIPTakeoverOnDisabled")) {
-               (*ctdb)->tunable.no_ip_takeover_on_disabled = (uint32_t) strtoul(getenv("CTDB_SET_NoIPTakeoverOnDisabled"), NULL, 0);
-       }
-              
        tval_noiptakeover = get_tunable_values(*ctdb, numnodes,
                                               "CTDB_SET_NoIPTakeover");
+       tval_noiptakeoverondisabled =
+               get_tunable_values(*ctdb, numnodes,
+                                  "CTDB_SET_NoIPHostOnAllDisabled");
 
        *nodemap =  talloc_array(*ctdb, struct ctdb_node_map, numnodes);
        (*nodemap)->num = numnodes;
@@ -427,7 +426,7 @@ void ctdb_test_init(const char nodestates[],
                (*ctdb)->nodes[i]->known_public_ips = avail[i];
        }
 
-       set_ipflags_internal(*nodemap, tval_noiptakeover);
+       set_ipflags_internal(*nodemap, tval_noiptakeover, tval_noiptakeoverondisabled);
 }
 
 /* IP layout is read from stdin. */
index b09cb008ad75414c2ad64908d6e91f8cd526e2de..07b22fb9f99e9854ca620b22070d682c85075b48 100755 (executable)
@@ -2,10 +2,10 @@
 
 . "${TEST_SCRIPTS_DIR}/unit.sh"
 
-define_test "3 nodes, no IPs assigned, all unhealthy, NoIPTakeoverOnDisabled"
+define_test "3 nodes, no IPs assigned, all unhealthy, NoIPHostOnAllDisabled"
 
 export CTDB_TEST_LOGLEVEL=0
-export CTDB_SET_NoIPTakeoverOnDisabled=1
+export CTDB_SET_NoIPHostOnAllDisabled=1
 
 required_result <<EOF
 192.168.21.254 -1
index 6bc448b7e5c758a15720aa2a1ddce06b5d9a5eb8..4a797f78c62e1d5a179e9aacea56dc5519db00db 100755 (executable)
@@ -2,10 +2,10 @@
 
 . "${TEST_SCRIPTS_DIR}/unit.sh"
 
-define_test "3 nodes, all IPs assigned, all unhealthy, NoIPTakeoverOnDisabled"
+define_test "3 nodes, all IPs assigned, all unhealthy, NoIPHostOnAllDisabled"
 
 export CTDB_TEST_LOGLEVEL=0
-export CTDB_SET_NoIPTakeoverOnDisabled=1
+export CTDB_SET_NoIPHostOnAllDisabled=1
 
 required_result <<EOF
 192.168.21.254 -1
index 5080e07b385dec2a419db3dd9eb5ba7abbabd166..0d8937cdc54e4805e0b7a20e1ffaf50dc6c4bf79 100755 (executable)
@@ -2,10 +2,10 @@
 
 . "${TEST_SCRIPTS_DIR}/unit.sh"
 
-define_test "3 nodes, all IPs assigned, 2->3 unhealthy, NoIPTakeoverOnDisabled"
+define_test "3 nodes, all IPs assigned, 2->3 unhealthy, NoIPHostOnAllDisabled"
 
 export CTDB_TEST_LOGLEVEL=0
-export CTDB_SET_NoIPTakeoverOnDisabled=1
+export CTDB_SET_NoIPHostOnAllDisabled=1
 
 required_result <<EOF
 192.168.21.254 -1