}
+struct lcp2_imbalance_pnn {
+ uint32_t imbalance;
+ int pnn;
+};
+
+int lcp2_cmp_imbalance_pnn(const void * a, const void * b)
+{
+ const struct lcp2_imbalance_pnn * lipa = (const struct lcp2_imbalance_pnn *) a;
+ const struct lcp2_imbalance_pnn * lipb = (const struct lcp2_imbalance_pnn *) b;
+
+ if (lipa->imbalance > lipb->imbalance) {
+ return -1;
+ } else if (lipa->imbalance == lipb->imbalance) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
/* LCP2 algorithm for rebalancing the cluster. This finds the source
* node with the highest LCP2 imbalance, and then determines the best
* IP/destination node combination to move from the source node.
uint32_t *lcp2_imbalances,
bool *newly_healthy)
{
- int srcnode, i, num_newly_healthy;
- uint32_t maximbl, b;
+ int i, num_newly_healthy;
+ struct lcp2_imbalance_pnn * lips;
+ bool ret;
/* It is only worth continuing if we have suitable target
* nodes to transfer IPs to. This check is much cheaper than
return false;
}
- /* Get the node with the highest imbalance metric. */
- srcnode = -1;
- maximbl = 0;
- for (i=0; i < nodemap->num; i++) {
- b = lcp2_imbalances[i];
- if ((srcnode == -1) || (b > maximbl)) {
- srcnode = i;
- maximbl = b;
- }
+ /* Put the imbalances and nodes into an array, sort them and
+ * iterate through candidates. Usually the 1st one will be
+ * used, so this doesn't cost much...
+ */
+ lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, nodemap->num);
+ for (i = 0; i < nodemap->num; i++) {
+ lips[i].imbalance = lcp2_imbalances[i];
+ lips[i].pnn = i;
}
+ qsort(lips, nodemap->num, sizeof(struct lcp2_imbalance_pnn),
+ lcp2_cmp_imbalance_pnn);
- /* This means that all nodes had 0 or 1 addresses, so can't be
- * imbalanced.
- */
- if (maximbl == 0) {
- return false;
+ ret = false;
+ for (i = 0; i < nodemap->num; i++) {
+ /* This means that all nodes had 0 or 1 addresses, so
+ * can't be imbalanced.
+ */
+ if (lips[i].imbalance == 0) {
+ break;
+ }
+
+ if (lcp2_failback_candidate(ctdb,
+ nodemap,
+ all_ips,
+ lips[i].pnn,
+ lips[i].imbalance,
+ lcp2_imbalances,
+ newly_healthy)) {
+ ret = true;
+ break;
+ }
}
- return lcp2_failback_candidate(ctdb,
- nodemap,
- all_ips,
- srcnode,
- maximbl,
- lcp2_imbalances,
- newly_healthy);
+ talloc_free(lips);
+ return ret;
}
/* The calculation part of the IP allocation algorithm.
/*
make any IP alias changes for public addresses that are necessary
*/
-int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
+int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
+ client_async_callback fail_callback, void *callback_data)
{
int i;
struct ctdb_public_ip ip;
async_data = talloc_zero(tmp_ctx, struct client_async_data);
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
+ async_data->fail_callback = fail_callback;
+ async_data->callback_data = callback_data;
+
for (i=0;i<nodemap->num;i++) {
/* don't talk to unconnected nodes, but do talk to banned nodes */
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
/* tell all nodes to get their own IPs */
async_data = talloc_zero(tmp_ctx, struct client_async_data);
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
+
+ async_data->fail_callback = fail_callback;
+ async_data->callback_data = callback_data;
+
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == -1) {
/* this IP won't be taken over */
}
ipreallocated:
- /* tell all nodes to update natwg */
- /* send the flags update natgw on all connected nodes */
+ /*
+ * Tell all connected, but not stopped (since they are in
+ * recovery and will reject the event), nodes to run
+ * eventscripts to process the "ipreallocated" event. This
+ * can do a lot of things, including restarting services to
+ * reconfigure them if public IPs have moved. Once upon a
+ * time this event only used to update natwg.
+ */
data.dptr = discard_const("ipreallocated");
data.dsize = strlen((char *)data.dptr) + 1;
- nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+ nodes = list_of_nodes(ctdb, nodemap, tmp_ctx,
+ NODE_FLAGS_DISCONNECTED|NODE_FLAGS_STOPPED, -1);
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
nodes, 0, TAKEOVER_TIMEOUT(),
false, data,
- NULL, NULL,
- NULL) != 0) {
+ NULL, fail_callback,
+ callback_data) != 0) {
DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
}
*/
static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
{
- if (killtcp->vnn) {
- killtcp->vnn->killtcp = NULL;
+ struct ctdb_vnn *tmpvnn;
+
+ /* verify that this vnn is still active */
+ for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
+ if (tmpvnn == killtcp->vnn) {
+ break;
+ }
+ }
+
+ if (tmpvnn == NULL) {
+ return 0;
}
+
+ if (killtcp->vnn->killtcp != killtcp) {
+ return 0;
+ }
+
+ killtcp->vnn->killtcp = NULL;
+
return 0;
}
a new structure
*/
if (killtcp == NULL) {
- killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
+ killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
CTDB_NO_MEMORY(ctdb, killtcp);
killtcp->vnn = vnn;