ctdb/tests/takeover/ctdb_takeover.py

   1 #!/usr/bin/env python
   2
   3 # ctdb ip takeover code
   4
   5 # Copyright (C) Martin Schwenke, Ronnie Sahlberg 2010, 2011
   6
   7 # Based on original CTDB C code:
   8 #
   9 # Copyright (C) Ronnie Sahlberg  2007
  10 # Copyright (C) Andrew Tridgell  2007
  11
  12 # This program is free software; you can redistribute it and/or modify
  13 # it under the terms of the GNU General Public License as published by
  14 # the Free Software Foundation; either version 3 of the License, or
  15 # (at your option) any later version.
  16
  17 # This program is distributed in the hope that it will be useful,
  18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 # GNU General Public License for more details.
  21
  22 # You should have received a copy of the GNU General Public License
  23 # along with this program; if not, see <http://www.gnu.org/licenses/>.
  24
  25
  26 import os
  27 import sys
  28 # Use optparse since newer argparse not available in RHEL5/EPEL.
  29 from optparse import OptionParser
  30 import copy
  31 import random
  32 import itertools
  33
  34 # For parsing IP addresses
  35 import socket
  36 import struct
  37
  38 options = None
  39
  40 def process_args(extra_options=[]):
  41     global options
  42
  43     parser = OptionParser(option_list=extra_options)
  44
  45     parser.add_option("--nd",
  46                       action="store_false", dest="deterministic_public_ips",
  47                       default=True,
  48                       help="turn off deterministic_public_ips")
  49     parser.add_option("--ni",
  50                       action="store_true", dest="no_ip_failback", default=False,
  51                       help="turn on no_ip_failback")
  52     parser.add_option("-L", "--lcp2",
  53                       action="store_true", dest="lcp2", default=False,
  54                       help="use LCP2 IP rebalancing algorithm [default: %default]")
  55     parser.add_option("-b", "--balance",
  56                       action="store_true", dest="balance", default=False,
  57                       help="show (im)balance information after each event")
  58     parser.add_option("-d", "--diff",
  59                       action="store_true", dest="diff", default=False,
  60                       help="show IP address movements for each event")
  61     parser.add_option("-n", "--no-print",
  62                       action="store_false", dest="show", default=True,
  63                       help="don't show IP address layout after each event")
  64     parser.add_option("-v", "--verbose",
  65                       action="count", dest="verbose", default=0,
  66                       help="print information and actions taken to stdout")
  67     parser.add_option("-r", "--retries",
  68                       action="store", type="int", dest="retries", default=5,
  69                       help="number of retry loops for rebalancing non-deterministic failback [default: %default]")
  70     parser.add_option("-i", "--iterations",
  71                       action="store", type="int", dest="iterations",
  72                       default=1000,
  73                       help="number of iterations to run in test [default: %default]")
  74     parser.add_option("-o", "--odds",
  75                       action="store", type="int", dest="odds", default=4,
  76                       help="make the chances of a failover 1 in ODDS [default: %default]")
  77     parser.add_option("-A", "--aggressive",
  78                       action="store_true", dest="aggressive", default=False,
  79                       help="apply ODDS to try to flip each node [default: %default]")
  80
  81     def seed_callback(option, opt, value, parser):
  82         random.seed(value)
  83     parser.add_option("-s", "--seed",
  84                       action="callback", type="int", callback=seed_callback,
  85                       help="initial random number seed for random events")
  86
  87     parser.add_option("-x", "--exit",
  88                       action="store_true", dest="exit", default=False,
  89                       help="exit on the 1st gratuitous IP move or IP imbalance")
  90     parser.add_option("-H", "--hard-imbalance-limit",
  91                       action="store", type="int", dest="hard_limit", default=1,
  92                       help="exceeding this limit causes termination  [default: %default]")
  93     parser.add_option("-S", "--soft-imbalance-limit",
  94                       action="store", type="int", dest="soft_limit", default=1,
  95                       help="exceeding this limit increments a counter [default: %default]")
  96
  97     (options, args) = parser.parse_args()
  98
  99     if len(args) != 0:
 100         parser.error("too many argumentss")
 101
 102 def print_begin(t, delim='='):
 103     print delim * 40
 104     print "%s:" % (t)
 105
 106 def print_end():
 107     print "-" * 40
 108
 109 def verbose_begin(t):
 110     if options.verbose > 0:
 111         print_begin(t)
 112
 113 def verbose_end():
 114     if options.verbose > 0:
 115         print_end()
 116
 117 def verbose_print(t):
 118     if options.verbose > 0:
 119         if not type(t) == list:
 120             t = [t]
 121         if t != []:
 122             print "\n".join([str(i) for i in t])
 123
 124 # more than this and we switch to the logging module...  :-)
 125 def debug_begin(t):
 126     if options.verbose > 1:
 127         print_begin(t, '-')
 128
 129 def debug_end():
 130     if options.verbose > 1:
 131         print_end()
 132
 133 def debug_print(t):
 134     if options.verbose > 1:
 135         if not type(t) == list:
 136             t = [t]
 137         if t != []:
 138             print "\n".join([str(i) for i in t])
 139
 140 def ip_to_list_of_ints(ip):
 141     # Be lazy... but only expose errors in IPv4 addresses, since
 142     # they'll be more commonly used.  :-)
 143     try:
 144         l = socket.inet_pton(socket.AF_INET6, ip)
 145     except:
 146         # Pad with leading 0s.  This makes IPv4 addresses comparable
 147         # with IPv6 but reduces the overall effectiveness of the
 148         # algorithm.  The alternative would be to treat these
 149         # addresses separately while trying to keep all the IPs in
 150         # overall balance.
 151         l = "".join(itertools.repeat("\0", 12)) + \
 152             socket.inet_pton(socket.AF_INET, ip)
 153
 154     return map(lambda x: struct.unpack('B', x)[0], l)
 155
 156 def ip_distance(ip1, ip2):
 157     """Calculate the distance between 2 IPs.
 158
 159     This is the length of the longtest common prefix between the IPs.
 160     It is calculated by XOR-ing the 2 IPs together and counting the
 161     number of leading zeroes."""
 162
 163     distance = 0
 164     for (o1, o2) in zip(ip_to_list_of_ints(ip1), ip_to_list_of_ints(ip2)):
 165         # XOR this pair of octets
 166         x = o1 ^ o2
 167         # count number leading zeroes
 168         if x == 0:
 169             distance += 8
 170         else:
 171             # bin() gives minimal length '0bNNN' string
 172             distance += (8 - (len(bin(x)) - 2))
 173             break
 174
 175     return distance
 176
 177 def ip_distance_2_sum(ip, ips):
 178     """Calculate the IP distance for the given IP relative to IPs.
 179
 180     This could be made more efficient by insering ip_distance_2 into
 181     the loop in this function.  However, that would result in some
 182     loss of clarity and also will not be necessary in a C
 183     implemntation."""
 184
 185     sum = 0
 186     for i in ips:
 187         sum += ip_distance(ip, i) ** 2
 188
 189     return sum
 190
 191 def imbalance_metric(ips):
 192     """Return the imbalance metric for a group of IPs.
 193
 194     This is the sum of squares of the IP distances between each pair of IPs."""
 195     if len(ips) > 1:
 196         (h, t) = (ips[0], ips[1:])
 197         return ip_distance_2_sum(h, t) + imbalance_metric(t)
 198     else:
 199         return 0
 200
 201 def mean(l):
 202     return float(sum(l))/len(l)
 203
 204 class Node(object):
 205     def __init__(self, public_addresses):
 206         # List of list allows groups of IPs to be passed in.  They're
 207         # not actually used in the algorithm but are just used by
 208         # calculate_imbalance() for checking the simulation.  Note
 209         # that people can pass in garbage and make this code
 210         # fail... but we're all friends here in simulation world...
 211         # :-)
 212         if type(public_addresses[0]) is str:
 213             self.public_addresses = set(public_addresses)
 214             self.ip_groups = []
 215         else:
 216             # flatten
 217             self.public_addresses = set([i for s in public_addresses for i in s])
 218             self.ip_groups = public_addresses
 219
 220         self.current_addresses = set()
 221         self.healthy = True
 222         self.imbalance = -1
 223
 224     def __str__(self):
 225         return "%s %s%s" % \
 226             ("*" if len(self.public_addresses) == 0 else \
 227                  (" " if self.healthy else "#"),
 228              sorted(list(self.current_addresses)),
 229              " %d" % self.imbalance if options.lcp2 else "")
 230
 231     def can_node_serve_ip(self, ip):
 232         return ip in self.public_addresses
 233
 234     def node_ip_coverage(self, ips=None):
 235         return len([a for a in self.current_addresses if ips == None or a in ips])
 236
 237     def set_imbalance(self, imbalance=-1):
 238         """Set the imbalance metric to the given value.  If none given
 239         then calculate it."""
 240
 241         if imbalance != -1:
 242             self.imbalance = imbalance
 243         else:
 244             self.imbalance = imbalance_metric(list(self.current_addresses))
 245
 246     def get_imbalance(self):
 247         return self.imbalance
 248
 249 class Cluster(object):
 250     def __init__(self):
 251         self.nodes = []
 252         self.deterministic_public_ips = options.deterministic_public_ips
 253         self.no_ip_failback = options.no_ip_failback
 254         self.all_public_ips = set()
 255
 256         # Statistics
 257         self.ip_moves = []
 258         self.grat_ip_moves = []
 259         self.imbalance = []
 260         self.imbalance_groups = []
 261         self.imbalance_count = 0
 262         self.imbalance_groups_count = itertools.repeat(0)
 263         self.imbalance_metric = []
 264         self.events = -1
 265         self.num_unhealthy = []
 266
 267         self.prev = None
 268
 269     def __str__(self):
 270         return "\n".join(["%2d %s" % (i, n) \
 271                               for (i, n) in enumerate(self.nodes)])
 272
 273     # This is naive.  It assumes that IP groups are indicated by the
 274     # 1st node having IP groups.
 275     def have_ip_groups(self):
 276         return (len(self.nodes[0].ip_groups) > 0)
 277
 278     def print_statistics(self):
 279         print_begin("STATISTICS")
 280         print "Events:                      %6d" % self.events
 281         print "Total IP moves:              %6d" % sum(self.ip_moves)
 282         print "Gratuitous IP moves:         %6d" % sum(self.grat_ip_moves)
 283         print "Max imbalance:               %6d" % max(self.imbalance)
 284         if self.have_ip_groups():
 285             print "Max group imbalance counts:    ", map(max, zip(*self.imbalance_groups))
 286         print "Mean imbalance:              %f" % mean(self.imbalance)
 287         if self.have_ip_groups():
 288             print "Mean group imbalances counts:   ", map(mean, zip(*self.imbalance_groups))
 289         print "Final imbalance:             %6d" % self.imbalance[-1]
 290         if self.have_ip_groups():
 291             print "Final group imbalances:         ", self.imbalance_groups[-1]
 292         if options.lcp2:
 293             print "Max LCP2 imbalance  :        %6d" % max(self.imbalance_metric)
 294         print "Soft imbalance count:        %6d" % self.imbalance_count
 295         if self.have_ip_groups():
 296             print "Soft imbalance group counts:    ", self.imbalance_groups_count
 297         if options.lcp2:
 298             print "Final LCP2 imbalance  :      %6d" % self.imbalance_metric[-1]
 299         print "Maximum unhealthy:           %6d" % max(self.num_unhealthy)
 300         print_end()
 301
 302     def find_pnn_with_ip(self, ip):
 303         for (i, n) in enumerate(self.nodes):
 304             if ip in n.current_addresses:
 305                 return i
 306         return -1
 307
 308     def quietly_remove_ip(self, ip):
 309         # Remove address from old node.
 310         old = self.find_pnn_with_ip(ip)
 311         if old != -1:
 312             self.nodes[old].current_addresses.remove(ip)
 313
 314     def add_node(self, node):
 315         self.nodes.append(node)
 316         self.all_public_ips |= node.public_addresses
 317
 318     def healthy(self, *pnns):
 319         verbose_begin("HEALTHY")
 320
 321         for pnn in pnns:
 322             self.nodes[pnn].healthy = True
 323             verbose_print(pnn)
 324
 325         verbose_end()
 326
 327     def unhealthy(self, *pnns):
 328
 329         verbose_begin("UNHEALTHY")
 330
 331         for pnn in pnns:
 332             self.nodes[pnn].healthy = False
 333             verbose_print(pnn)
 334
 335         verbose_end()
 336
 337     def do_something_random(self):
 338
 339         """Make random node(s) healthy or unhealthy.
 340
 341         If options.aggressive is False then: If all nodes are healthy
 342         or unhealthy, then invert one of them; otherwise, there's a 1
 343         in options.odds chance of making another node unhealthy.
 344
 345         If options.aggressive is True then: For each node there is a 1
 346         in options.odds chance of flipping the state of that node
 347         between healthy and unhealthy."""
 348
 349         if not options.aggressive:
 350             num_nodes = len(self.nodes)
 351             healthy_pnns = [i for (i,n) in enumerate(self.nodes) if n.healthy]
 352             num_healthy = len(healthy_pnns)
 353
 354             if num_nodes == num_healthy:
 355                 self.unhealthy(random.randint(0, num_nodes-1))
 356             elif num_healthy == 0:
 357                 self.healthy(random.randint(0, num_nodes-1))
 358             elif random.randint(1, options.odds) == 1:
 359                 self.unhealthy(random.choice(healthy_pnns))
 360             else:
 361                 all_pnns = range(num_nodes)
 362                 unhealthy_pnns = sorted(list(set(all_pnns) - set(healthy_pnns)))
 363                 self.healthy(random.choice(unhealthy_pnns))
 364         else:
 365             # We need to make at least one change or we retry...x
 366             changed = False
 367             while not changed:
 368                 for (pnn, n) in enumerate(self.nodes):
 369                     if random.randint(1, options.odds) == 1:
 370                         changed = True
 371                         if n.healthy:
 372                             self.unhealthy(pnn)
 373                         else:
 374                             self.healthy(pnn)
 375
 376     def random_iterations(self):
 377         i = 1
 378         while i <= options.iterations:
 379             verbose_begin("EVENT %d" % i)
 380             verbose_end()
 381             self.do_something_random()
 382             if self.recover() and options.exit:
 383                 break
 384             i += 1
 385
 386         self.print_statistics()
 387
 388     def imbalance_for_ips(self, ips):
 389
 390         imbalance = 0
 391
 392         maxnode = -1
 393         minnode = -1
 394
 395         for ip in ips:
 396             for (i, n) in enumerate(self.nodes):
 397
 398                 if not n.healthy or not n.can_node_serve_ip(ip):
 399                     continue
 400
 401                 num = n.node_ip_coverage(ips)
 402
 403                 if maxnode == -1 or num > maxnum:
 404                     maxnode = i
 405                     maxnum = num
 406
 407                 if minnode == -1 or num < minnum:
 408                     minnode = i
 409                     minnum = num
 410
 411             if maxnode == -1 or minnode == -1:
 412                 continue
 413
 414             i = maxnum - minnum
 415             #if i < 2:
 416             #    i = 0
 417             imbalance = max([imbalance, i])
 418
 419         return imbalance
 420
 421
 422     def calculate_imbalance(self):
 423
 424         # First, do all the assigned IPs.
 425         assigned = sorted([ip
 426                            for n in self.nodes
 427                            for ip in n.current_addresses])
 428
 429         i = self.imbalance_for_ips(assigned)
 430
 431         ig = []
 432         # FIXME?  If dealing with IP groups, assume the nodes are all
 433         # the same.
 434         for ips in self.nodes[0].ip_groups:
 435             gi = self.imbalance_for_ips(ips)
 436             ig.append(gi)
 437
 438         return (i, ig)
 439
 440
 441     def diff(self):
 442         """Calculate differences in IP assignments between self and prev.
 443
 444         Gratuitous IP moves (from a healthy node to a healthy node)
 445         are prefixed by !!."""
 446
 447         ip_moves = 0
 448         grat_ip_moves = 0
 449         details = []
 450
 451         for (new, n) in enumerate(self.nodes):
 452             for ip in n.current_addresses:
 453                 old = self.prev.find_pnn_with_ip(ip)
 454                 if old != new:
 455                     ip_moves += 1
 456                     if old != -1 and \
 457                             self.prev.nodes[new].healthy and \
 458                             self.nodes[new].healthy and \
 459                             self.nodes[old].healthy and \
 460                             self.prev.nodes[old].healthy:
 461                         prefix = "!!"
 462                         grat_ip_moves += 1
 463                     else:
 464                         prefix = "  "
 465                     details.append("%s %s: %d -> %d" %
 466                                    (prefix, ip, old, new))
 467
 468         return (ip_moves, grat_ip_moves, details)
 469
 470     def find_takeover_node(self, ip):
 471
 472         pnn = -1
 473         min = 0
 474         for (i, n) in enumerate(self.nodes):
 475             if not n.healthy:
 476                 continue
 477
 478             if not n.can_node_serve_ip(ip):
 479                 continue
 480
 481             num = n.node_ip_coverage()
 482
 483             if (pnn == -1):
 484                 pnn = i
 485                 min = num
 486             else:
 487                 if num < min:
 488                     pnn = i
 489                     min = num
 490
 491         if pnn == -1:
 492             verbose_print("Could not find node to take over public address %s" % ip)
 493             return False
 494
 495         self.nodes[pnn].current_addresses.add(ip)
 496
 497         verbose_print("%s -> %d" % (ip, pnn))
 498         return True
 499
 500     def basic_allocate_unassigned(self):
 501
 502         assigned = set([ip for n in self.nodes for ip in n.current_addresses])
 503         unassigned = sorted(list(self.all_public_ips - assigned))
 504
 505         for ip in unassigned:
 506             self.find_takeover_node(ip)
 507
 508     def basic_failback(self, retries_l):
 509
 510         assigned = sorted([ip
 511                            for n in self.nodes
 512                            for ip in n.current_addresses])
 513         for ip in assigned:
 514
 515             maxnode = -1
 516             minnode = -1
 517             for (i, n) in enumerate(self.nodes):
 518                 if not n.healthy:
 519                     continue
 520
 521                 if not n.can_node_serve_ip(ip):
 522                     continue
 523
 524                 num = n.node_ip_coverage()
 525
 526                 if maxnode == -1:
 527                     maxnode = i
 528                     maxnum = num
 529                 else:
 530                     if num > maxnum:
 531                         maxnode = i
 532                         maxnum = num
 533                 if minnode == -1:
 534                     minnode = i
 535                     minnum = num
 536                 else:
 537                     if num < minnum:
 538                         minnode = i
 539                         minnum = num
 540
 541             if maxnode == -1:
 542                 print "Could not find maxnode. May not be able to serve ip", ip
 543                 continue
 544
 545             #if self.deterministic_public_ips:
 546             #    continue
 547
 548             if maxnum > minnum + 1 and retries_l[0] < options.retries:
 549                 # Remove the 1st ip from maxnode
 550                 t = sorted(list(self.nodes[maxnode].current_addresses))
 551                 realloc = t[0]
 552                 verbose_print("%s <- %d" % (realloc, maxnode))
 553                 self.nodes[maxnode].current_addresses.remove(realloc)
 554                 # Redo the outer loop.
 555                 retries_l[0] += 1
 556                 return True
 557
 558         return False
 559
 560
 561     def lcp2_allocate_unassigned(self):
 562
 563         # Assign as many unassigned addresses as possible.  Keep
 564         # selecting the optimal assignment until we don't manage to
 565         # assign anything.
 566         assigned = set([ip for n in self.nodes for ip in n.current_addresses])
 567         unassigned = sorted(list(self.all_public_ips - assigned))
 568
 569         should_loop = True
 570         while len(unassigned) > 0 and should_loop:
 571             should_loop = False
 572
 573             debug_begin(" CONSIDERING MOVES (UNASSIGNED)")
 574
 575             minnode = -1
 576             mindsum = 0
 577             minip = None
 578
 579             for ip in unassigned:
 580                 for dstnode in range(len(self.nodes)):
 581                     if self.nodes[dstnode].can_node_serve_ip(ip) and \
 582                             self.nodes[dstnode].healthy:
 583                         dstdsum = ip_distance_2_sum(ip, self.nodes[dstnode].current_addresses)
 584                         dstimbl = self.nodes[dstnode].get_imbalance() + dstdsum
 585                         debug_print(" %s -> %d [+%d]" % \
 586                                         (ip,
 587                                          dstnode,
 588                                          dstimbl - self.nodes[dstnode].get_imbalance()))
 589
 590                         if (minnode == -1) or (dstdsum < mindsum):
 591                             minnode = dstnode
 592                             minimbl = dstimbl
 593                             mindsum = dstdsum
 594                             minip = ip
 595                             should_loop = True
 596             debug_end()
 597
 598             if minnode != -1:
 599                 self.nodes[minnode].current_addresses.add(minip)
 600                 self.nodes[minnode].set_imbalance(self.nodes[minnode].get_imbalance() + mindsum)
 601                 verbose_print("%s -> %d [+%d]" % (minip, minnode, mindsum))
 602                 unassigned.remove(minip)
 603
 604         for ip in unassigned:
 605             verbose_print("Could not find node to take over public address %s" % ip)
 606
 607     def lcp2_failback(self, targets):
 608
 609         # Get the node with the highest imbalance metric.
 610         srcnode = -1
 611         maximbl = 0
 612         for (pnn, n) in enumerate(self.nodes):
 613             b = n.get_imbalance()
 614             if (srcnode == -1) or (b > maximbl):
 615                 srcnode = pnn
 616                 maximbl = b
 617
 618         # This means that all nodes had 0 or 1 addresses, so can't
 619         # be imbalanced.
 620         if maximbl == 0:
 621             return False
 622
 623         # We'll need this a few times...
 624         ips = self.nodes[srcnode].current_addresses
 625
 626         # Find an IP and destination node that best reduces imbalance.
 627         optimum = None
 628         debug_begin(" CONSIDERING MOVES FROM %d [%d]" % (srcnode, maximbl))
 629         for ip in ips:
 630             # What is this IP address costing the source node?
 631             srcdsum = ip_distance_2_sum(ip, ips - set([ip]))
 632             srcimbl = maximbl - srcdsum
 633
 634             # Consider this IP address would cost each potential
 635             # destination node.  Destination nodes are limited to
 636             # those that are newly healthy, since we don't want to
 637             # do gratuitous failover of IPs just to make minor
 638             # balance improvements.
 639             for dstnode in targets:
 640                 if self.nodes[dstnode].can_node_serve_ip(ip) and \
 641                         self.nodes[dstnode].healthy:
 642                     dstdsum = ip_distance_2_sum(ip, self.nodes[dstnode].current_addresses)
 643                     dstimbl = self.nodes[dstnode].get_imbalance() + dstdsum
 644                     debug_print(" %d [%d] -> %s -> %d [+%d]" % \
 645                                     (srcnode,
 646                                      srcimbl - self.nodes[srcnode].get_imbalance(),
 647                                      ip,
 648                                      dstnode,
 649                                      dstimbl - self.nodes[dstnode].get_imbalance()))
 650
 651                     if (dstimbl < maximbl) and (dstdsum < srcdsum):
 652                         if optimum is None:
 653                             optimum = (ip, srcnode, srcimbl, dstnode, dstimbl)
 654                         else:
 655                             (x, sn, si, dn, di) = optimum
 656                             if (srcimbl + dstimbl) < (si + di):
 657                                 optimum = (ip, srcnode, srcimbl, dstnode, dstimbl)
 658         debug_end()
 659
 660         if optimum is not None:
 661             # We found a move that makes things better...
 662             (ip, srcnode, srcimbl, dstnode, dstimbl) = optimum
 663             ini_srcimbl = self.nodes[srcnode].get_imbalance()
 664             ini_dstimbl = self.nodes[dstnode].get_imbalance()
 665
 666             self.nodes[srcnode].current_addresses.remove(ip)
 667             self.nodes[srcnode].set_imbalance(srcimbl)
 668
 669             self.nodes[dstnode].current_addresses.add(ip)
 670             self.nodes[dstnode].set_imbalance(dstimbl)
 671
 672             verbose_print("%d [%d] -> %s -> %d [+%d]" % \
 673                               (srcnode,
 674                                srcimbl - ini_srcimbl,
 675                                ip,
 676                                dstnode,
 677                                dstimbl - ini_dstimbl))
 678
 679             return True
 680
 681         return False
 682
 683
 684     def ctdb_takeover_run(self):
 685
 686         self.events += 1
 687
 688         # Don't bother with the num_healthy stuff.  It is an
 689         # irrelevant detail.
 690
 691         # We just keep the allocate IPs in the current_addresses field
 692         # of the node.  This needs to readable, not efficient!
 693
 694         if self.deterministic_public_ips:
 695             # Remap everything.
 696             addr_list = sorted(list(self.all_public_ips))
 697             for (i, ip) in enumerate(addr_list):
 698                 self.quietly_remove_ip(ip)
 699                 # Add addresses to new node.
 700                 pnn = i % len(self.nodes)
 701                 self.nodes[pnn].current_addresses.add(ip)
 702                 verbose_print("%s -> %d" % (ip, pnn))
 703
 704         # Remove public addresses from unhealthy nodes.
 705         for (pnn, n) in enumerate(self.nodes):
 706             if not n.healthy:
 707                 verbose_print(["%s <- %d" % (ip, pnn)
 708                                for ip in n.current_addresses])
 709                 n.current_addresses = set()
 710
 711         # If a node can't serve an assigned address then remove it.
 712         for n in self.nodes:
 713             verbose_print(["%s <- %d" % (ip, pnn)
 714                            for ip in n.current_addresses - n.public_addresses])
 715             n.current_addresses &= n.public_addresses
 716
 717         if options.lcp2:
 718             newly_healthy = [pnn for (pnn, n) in enumerate(self.nodes)
 719                              if len(n.current_addresses) == 0 and n.healthy]
 720             for n in self.nodes:
 721                 n.set_imbalance()
 722
 723         # We'll only retry the balancing act up to options.retries
 724         # times (for the basic non-deterministic algorithm).  This
 725         # nonsense gives us a reference on the retries count in
 726         # Python.  It will be easier in C.  :-)
 727         # For LCP2 we reassignas many IPs from heavily "loaded" nodes
 728         # to nodes that are newly healthy, looping until we fail to
 729         # reassign an IP.
 730         retries_l = [0]
 731         should_loop = True
 732         while should_loop:
 733             should_loop = False
 734
 735             if options.lcp2:
 736                 self.lcp2_allocate_unassigned()
 737             else:
 738                 self.basic_allocate_unassigned()
 739
 740             if self.no_ip_failback or self.deterministic_public_ips:
 741                 break
 742
 743             if options.lcp2:
 744                 if len(newly_healthy) == 0:
 745                     break
 746                 should_loop = self.lcp2_failback(newly_healthy)
 747             else:
 748                 should_loop = self.basic_failback(retries_l)
 749
 750
 751     def recover(self):
 752         verbose_begin("TAKEOVER")
 753
 754         self.ctdb_takeover_run()
 755
 756         verbose_end()
 757
 758         grat_ip_moves = 0
 759
 760         if self.prev is not None:
 761             (ip_moves, grat_ip_moves, details) = self.diff()
 762             self.ip_moves.append(ip_moves)
 763             self.grat_ip_moves.append(grat_ip_moves)
 764
 765             if options.diff:
 766                 print_begin("DIFF")
 767                 print "\n".join(details)
 768                 print_end()
 769
 770         (imbalance, imbalance_groups) = self.calculate_imbalance()
 771         self.imbalance.append(imbalance)
 772         self.imbalance_groups.append(imbalance_groups)
 773
 774         if imbalance > options.soft_limit:
 775             self.imbalance_count += 1
 776
 777         # There must be a cleaner way...
 778         t = []
 779         for (c, i) in zip(self.imbalance_groups_count, imbalance_groups):
 780             if i > options.soft_limit:
 781                 t.append(c + i)
 782             else:
 783                 t.append(c)
 784         self.imbalance_groups_count = t
 785
 786         imbalance_metric = max([n.get_imbalance() for n in self.nodes])
 787         self.imbalance_metric.append(imbalance_metric)
 788         if options.balance:
 789             print_begin("IMBALANCE")
 790             print "ALL IPS:", imbalance
 791             if self.have_ip_groups():
 792                 print "IP GROUPS:", imbalance_groups
 793             if options.lcp2:
 794                 print "LCP2 IMBALANCE:", imbalance_metric
 795             print_end()
 796
 797         num_unhealthy = len(self.nodes) - \
 798             len([n for n in self.nodes if n.healthy])
 799         self.num_unhealthy.append(num_unhealthy)
 800
 801         if options.show:
 802             print_begin("STATE")
 803             print self
 804             print_end()
 805
 806         self.prev = None
 807         self.prev = copy.deepcopy(self)
 808
 809         # True is bad!
 810         return (grat_ip_moves > 0) or \
 811             (not self.have_ip_groups() and imbalance > options.hard_limit) or \
 812             (self.have_ip_groups() and (max(imbalance_groups) > options.hard_limit))